[med-svn] [gadgetron] 02/09: Imported Upstream version 3.1.0

Fri Feb 20 20:30:39 UTC 2015

This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository gadgetron.

commit ccc611d0545c2681941e1eb2d6a8f0d2b25b026e
Author: Ghislain Antony Vaillant <ghisvail at gmail.com>
Date:   Fri Feb 20 20:15:03 2015 +0000

    Imported Upstream version 3.1.0
---
 .gitignore                                         |     3 +
 CMakeLists.txt                                     |   283 +-
 README                                             |     2 +-
 apps/CMakeLists.txt                                |     9 +-
 apps/clients/CMakeLists.txt                        |    14 +-
 .../gadgetron_ismrmrd_client/CMakeLists.txt        |    24 +
 .../gadgetron_ismrmrd_client.cpp                   |  1279 +++
 apps/clients/mriclient/BlobFileWriter.h            |    91 -
 apps/clients/mriclient/CMakeLists.txt              |    52 -
 apps/clients/mriclient/HDF5ImageWriter.h           |    76 -
 apps/clients/mriclient/ImageWriter.h               |   113 -
 apps/clients/mriclient/gt_alive.cpp                |    61 -
 apps/clients/mriclient/main.cpp                    |   230 -
 apps/clients/utilities/CMakeLists.txt              |    47 +
 apps/clients/utilities/DependencyQueryReader.h     |   100 +
 apps/clients/utilities/gt_alive.cpp                |    69 +
 apps/clients/utilities/gt_query.cpp                |    98 +
 apps/clients/utilities/gtquery.xml                 |    33 +
 apps/clients/{mriclient => utilities}/isalive.xml  |     0
 apps/clients/utilities/main.cpp                    |   275 +
 apps/gadgetron/CMakeLists.txt                      |   101 +-
 apps/gadgetron/Gadget.cpp                          |    38 +
 apps/gadgetron/Gadget.h                            |    50 +-
 apps/gadgetron/GadgetContainerMessage.h            |    42 +-
 apps/gadgetron/GadgetMessageInterface.h            |    21 +-
 apps/gadgetron/GadgetServerAcceptor.cpp            |     5 +-
 apps/gadgetron/GadgetServerAcceptor.h              |     5 +
 apps/gadgetron/GadgetStreamController.cpp          |   307 +-
 apps/gadgetron/GadgetStreamController.h            |    19 +-
 apps/gadgetron/GadgetronExport.h                   |    12 +-
 apps/gadgetron/gadgetbase_export.h                 |    16 +
 apps/gadgetron/gadgetron.xml.example               |     7 +-
 apps/gadgetron/gadgetron_config.in                 |    12 +
 apps/gadgetron/gadgetron_info.cpp                  |    88 +
 apps/gadgetron/gadgetron_paths.h                   |    76 +
 apps/gadgetron/gadgetron_xml.cpp                   |    95 +
 apps/gadgetron/gadgetron_xml.h                     |   122 +
 apps/gadgetron/main.cpp                            |   246 +-
 apps/gadgetron/pugiconfig.hpp                      |    72 +
 apps/gadgetron/pugixml.cpp                         | 10639 +++++++++++++++++++
 apps/gadgetron/pugixml.hpp                         |  1332 +++
 apps/gadgetron/schema/gadgetron.xsd                |   100 +-
 .../templates/CMakeLists_GadgetLibraryExample.txt  |    55 -
 .../templates/gadgetronEXAMPLELIB_export.h         |    21 -
 apps/gadgetron/webapp/CMakeLists.txt               |    20 +
 apps/gadgetron/webapp/gadgetron_web.conf.in        |    16 +
 apps/gadgetron/webapp/gadgetron_web_app.cfg        |     2 +-
 apps/gadgetron/webapp/gadgetron_web_app.in         |     8 +
 apps/gadgetron/webapp/gadgetron_web_app.py         |     8 +-
 apps/gadgetron/webapp/gadgetron_web_ld.conf.in     |     2 +
 apps/gadgetron/webapp/main.cpp                     |     8 +
 apps/standalone/CMakeLists.txt                     |     6 +
 apps/standalone/cpu/CMakeLists.txt                 |    18 +-
 apps/standalone/cpu/denoising/2d/CMakeLists.txt    |    33 +-
 apps/standalone/cpu/gtplus/CMakeLists.txt          |    33 +-
 .../cpu/gtplus/Matlab_gt_read_analyze.cpp          |   261 +
 .../cpu/gtplus/Matlab_gt_write_analyze.cpp         |   247 +
 apps/standalone/cpu/registration/2d/CMakeLists.txt |    28 +-
 apps/standalone/cpu/registration/3d/CMakeLists.txt |    10 +-
 apps/standalone/cpu/registration/CMakeLists.txt    |     3 +
 apps/standalone/gpu/CMakeLists.txt                 |    29 +-
 apps/standalone/gpu/ct/CMakeLists.txt              |     3 +
 .../gpu/ct/xray/CBCT_forwards_projection.cpp       |   252 +
 .../standalone/gpu/ct/xray/CBCT_reconstruct_CG.cpp |   204 +
 .../gpu/ct/xray/CBCT_reconstruct_FDK_3d.cpp        |   143 +
 .../gpu/ct/xray/CBCT_reconstruct_FDK_4d.cpp        |   157 +
 .../gpu/ct/xray/CBCT_reconstruct_NLCG.cpp          |   194 +
 .../standalone/gpu/ct/xray/CBCT_reconstruct_SB.cpp |   281 +
 apps/standalone/gpu/ct/xray/CMakeLists.txt         |    64 +
 apps/standalone/gpu/deblurring/2d/CMakeLists.txt   |     8 +-
 apps/standalone/gpu/deblurring/3d/CMakeLists.txt   |     8 +-
 apps/standalone/gpu/denoising/2d/CMakeLists.txt    |     4 +-
 apps/standalone/gpu/mri/nfft/2d/CMakeLists.txt     |    18 +-
 apps/standalone/gpu/mri/nfft/2d/main_cg.cpp        |     3 +
 apps/standalone/gpu/mri/nfft/2d/main_nfft.cpp      |     8 +-
 apps/standalone/gpu/mri/nfft/2d/main_sb.cpp        |     5 +
 apps/standalone/gpu/mri/nfft/ms2d/CMakeLists.txt   |    12 +-
 apps/standalone/gpu/mri/nfft/ms2d/nfft_main.cpp    |     2 +-
 .../standalone/gpu/mri/nfft/ms2d/nffth_generic.cpp |   161 +
 apps/standalone/gpu/mri/nfft/ms2d/nffth_main.cpp   |     6 +-
 .../gpu/mri/sense/noncartesian/CMakeLists.txt      |     8 +-
 .../gpu/mri/sense/noncartesian/generic_cg.cpp      |   200 +
 .../radial/2d_golden_ratio/CMakeLists.txt          |    20 +-
 .../radial/2d_golden_ratio/main_cg.cpp             |     8 +-
 .../radial/2d_golden_ratio/main_gpbb.cpp           |     8 +-
 .../radial/2d_golden_ratio/main_nlcg.cpp           |   341 +
 .../radial/2d_golden_ratio/main_sbc.cpp            |     5 +-
 .../radial/2d_golden_ratio_gui/CMakeLists.txt      |    10 +-
 .../radial/2d_golden_ratio_kt/CMakeLists.txt       |     6 +-
 .../mri/sense/noncartesian/radial/CMakeLists.txt   |     2 +-
 apps/standalone/gpu/registration/2d/CMakeLists.txt |    54 +-
 apps/standalone/gpu/registration/3d/CMakeLists.txt |    12 +-
 chroot/CMakeLists.txt                              |    76 +
 chroot/README.rst                                  |    35 +
 chroot/chroot-manual.txt                           |   156 +
 chroot/copy-cuda-lib.sh.in                         |    33 +
 chroot/copy_file_and_dependencies                  |    25 +
 chroot/create_chroot.sh                            |   147 +
 chroot/enter-chroot-env.sh.in                      |     3 +
 chroot/gadgetron-dependency-query.sh.in            |    15 +
 chroot/gadgetron_chroot.conf                       |    27 +
 chroot/gadgetron_ismrmrd_client.sh.in              |    16 +
 chroot/generate_gadgetron_root                     |    45 +
 chroot/get_dependencies_for_binary                 |    12 +
 chroot/install_chroot_image.sh                     |    91 +
 chroot/make_list_of_dependencies                   |    25 +
 chroot/mount.sh                                    |    35 +
 chroot/run-gadgetron-dependency-query.sh           |    31 +
 chroot/run-gadgetron_ismrmrd_client.sh             |    32 +
 chroot/run-siemens_to_ismrmrd.sh                   |    31 +
 chroot/run-webapp.sh.in                            |    11 +
 chroot/siemens_to_ismrmrd.sh.in                    |    15 +
 chroot/start-env.sh                                |    18 +
 chroot/start-gadgetron-from-image.sh               |    29 +
 chroot/start-gadgetron.sh.in                       |    10 +
 chroot/start-webapp.sh                             |    18 +
 chroot/start.sh                                    |    23 +
 chroot/stop.sh                                     |    26 +
 chroot/umount_image.sh                             |    28 +
 chroot/unique_lines_in_file                        |    14 +
 chroot/upstart-instructions.txt                    |    10 +
 cmake/CMakeLists.txt                               |    18 +-
 cmake/FindACE.cmake                                |     4 +-
 cmake/FindArmadillo.cmake                          |   100 +
 cmake/FindCUDA/cuda_compute_capability.c           |    42 +
 cmake/FindCUDA_advanced.cmake                      |    38 +
 cmake/FindCULA.cmake                               |    63 -
 cmake/FindFFTW3.cmake                              |    41 +-
 cmake/FindIsmrmrd.cmake                            |    29 +-
 cmake/FindMKL.cmake                                |    35 +-
 cmake/FindXSD.cmake                                |    68 -
 cmake/FindXalanC.cmake                             |    35 -
 cmake/FindXercesC.cmake                            |    37 -
 cmake/InstallLinuxDependencies.cmake               |    24 +
 cmake/InstallWinDependencies.cmake                 |   137 +
 cmake/InstallWinGadgetron.bat                      |     6 +
 cmake/cpack_options.cmake.in                       |    41 +
 cmake/cpack_options_dependency.cmake.in            |    37 +
 cmake/cpack_options_web.cmake.in                   |    38 +
 cmake/debian/postinst                              |    18 +
 cmake/debian/prerm                                 |    13 +
 cmake/debian_web/postinst                          |     7 +
 cmake/debian_web/prerm                             |     7 +
 cmake/gadgetron_cpack.cmake                        |    33 +
 cmake/gadgetron_web_cpack.cmake                    |    32 +
 doc/CMakeLists.txt                                 |    24 -
 doc/doxygen/Doxyfile.in                            |     2 +-
 doc/manual/CMakeLists.txt                          |    36 -
 doc/manual/figs/Gadgetron.png                      |   Bin 156640 -> 0 bytes
 doc/manual/figs/Gadgetron.svg                      |  1736 ---
 doc/manual/figs/architecture.png                   |   Bin 185001 -> 0 bytes
 doc/manual/figs/architecture.svg                   |   748 --
 doc/manual/figs/arrayfileformat.png                |   Bin 80429 -> 0 bytes
 doc/manual/figs/arrayfileformat.svg                |   247 -
 doc/manual/figs/cgsense.png                        |   Bin 106261 -> 0 bytes
 doc/manual/figs/cgsense.svg                        |   671 --
 doc/manual/figs/examplecgsenseresult.png           |   Bin 131868 -> 0 bytes
 doc/manual/figs/examplegrapparesult.png            |   Bin 42909 -> 0 bytes
 doc/manual/figs/examplelibresult.png               |   Bin 18132 -> 0 bytes
 doc/manual/figs/gadget.png                         |   Bin 85585 -> 0 bytes
 doc/manual/figs/gadget.svg                         |   573 -
 doc/manual/figs/grappa.png                         |   Bin 193964 -> 0 bytes
 doc/manual/figs/grappa.svg                         |   594 --
 doc/manual/figs/hdfview_image_view.png             |   Bin 64541 -> 0 bytes
 doc/manual/figs/hdfview_image_view_setting.png     |   Bin 47890 -> 0 bytes
 doc/manual/figs/hdfview_mri_testdata.png           |   Bin 88694 -> 0 bytes
 doc/manual/figs/math/HOWTO.txt                     |     5 -
 doc/manual/figs/math/lls.jpg                       |   Bin 3155 -> 0 bytes
 doc/manual/figs/math/lls.tex                       |    11 -
 doc/manual/figs/math/lls_form.jpg                  |   Bin 12749 -> 0 bytes
 doc/manual/figs/math/lls_form.tex                  |    11 -
 doc/manual/figs/math/sb.jpg                        |   Bin 4591 -> 0 bytes
 doc/manual/figs/math/sb.tex                        |    14 -
 doc/manual/figs/python.png                         |   Bin 86776 -> 0 bytes
 doc/manual/figs/python.svg                         |   635 --
 doc/manual/figs/sense_cg.png                       |   Bin 24654 -> 0 bytes
 doc/manual/figs/sense_sbc.png                      |   Bin 23004 -> 0 bytes
 doc/manual/figs/shepp.png                          |   Bin 1243 -> 0 bytes
 doc/manual/figs/shepp_blurred.png                  |   Bin 10894 -> 0 bytes
 doc/manual/figs/shepp_deblurred_cg.png             |   Bin 21952 -> 0 bytes
 doc/manual/figs/shepp_deblurred_sb.png             |   Bin 9121 -> 0 bytes
 doc/manual/figs/shepp_denoised.png                 |   Bin 5646 -> 0 bytes
 doc/manual/figs/shepp_iteration.png                |   Bin 50249 -> 0 bytes
 doc/manual/figs/shepp_noisy.png                    |   Bin 40779 -> 0 bytes
 doc/manual/figs/simple2dft.png                     |   Bin 56594 -> 0 bytes
 doc/manual/figs/simple2dft.svg                     |   355 -
 doc/manual/gadgetron_manual.xml                    |  6472 -----------
 gadgets/CMakeLists.txt                             |    53 +-
 gadgets/cartesian/CMakeLists.txt                   |    24 +-
 gadgets/cartesian/CartesianToGenericGadget.cpp     |    36 +-
 gadgets/cartesian/CartesianToGenericGadget.h       |     3 +-
 gadgets/dicom/CMakeLists.txt                       |   105 +-
 gadgets/dicom/DicomFinishAttribGadget.cpp          |   897 ++
 gadgets/dicom/DicomFinishAttribGadget.h            |    76 +
 gadgets/dicom/DicomFinishGadget.cpp                |   457 +-
 gadgets/dicom/DicomFinishGadget.h                  |    11 +-
 gadgets/dicom/DicomImageWriter.cpp                 |   162 +-
 gadgets/dicom/DicomImageWriter.h                   |    10 +-
 gadgets/dicom/dicom.xml                            |    61 +-
 gadgets/epi/CMakeLists.txt                         |    49 +
 gadgets/epi/CutXGadget.cpp                         |    91 +
 gadgets/epi/CutXGadget.h                           |    33 +
 gadgets/epi/EPICorrGadget.cpp                      |   197 +
 gadgets/epi/EPICorrGadget.h                        |    48 +
 gadgets/epi/EPIReconXGadget.cpp                    |   133 +
 gadgets/epi/EPIReconXGadget.h                      |    38 +
 gadgets/epi/FFTXGadget.cpp                         |    27 +
 gadgets/epi/FFTXGadget.h                           |    25 +
 gadgets/epi/epi.xml                                |   138 +
 gadgets/epi/epi_gtplus_grappa.xml                  |   525 +
 gadgets/epi/gadgetron_epi_export.h                 |    14 +
 gadgets/grappa/CMakeLists.txt                      |    54 +-
 gadgets/grappa/GrappaCalibrationBuffer.cpp         |    17 +-
 gadgets/grappa/GrappaCalibrationBuffer.h           |     5 +-
 gadgets/grappa/GrappaGadget.cpp                    |   129 +-
 gadgets/grappa/GrappaGadget.h                      |     8 +-
 gadgets/grappa/GrappaUnmixingGadget.cpp            |     3 +
 gadgets/grappa/GrappaUnmixingGadget.h              |     2 +-
 gadgets/grappa/GrappaWeightsCalculator.cpp         |    16 +-
 gadgets/grappa/config/CMakeLists.txt               |     5 +-
 gadgets/gtPlus/CMakeLists.txt                      |   170 +-
 gadgets/gtPlus/GadgetCloudJobMessageReadWrite.h    |     4 +-
 gadgets/gtPlus/GadgetMRIHeadersExt.h               |   231 -
 gadgets/gtPlus/GadgetronMrReconCommon.h            |    90 -
 gadgets/gtPlus/GtPlusAccumulatorGadget.cpp         |  1168 --
 gadgets/gtPlus/GtPlusAccumulatorGadget.h           |   198 -
 gadgets/gtPlus/GtPlusAccumulatorIRT2DGadget.cpp    |   615 --
 gadgets/gtPlus/GtPlusAccumulatorIRT2DGadget.h      |    42 -
 .../gtPlus/GtPlusAccumulatorImageTriggerGadget.cpp |   746 ++
 .../gtPlus/GtPlusAccumulatorImageTriggerGadget.h   |   150 +
 gadgets/gtPlus/GtPlusAccumulatorPerfAIFGadget.cpp  |    55 -
 gadgets/gtPlus/GtPlusAccumulatorPerfAIFGadget.h    |    32 -
 gadgets/gtPlus/GtPlusAccumulatorSLCGadget.cpp      |   403 -
 gadgets/gtPlus/GtPlusAccumulatorSLCGadget.h        |    40 -
 .../GtPlusAccumulatorWorkOrderTriggerGadget.cpp    |   786 +-
 .../GtPlusAccumulatorWorkOrderTriggerGadget.h      |    63 +-
 gadgets/gtPlus/GtPlusGadgetImageArray.cpp          |   237 +-
 gadgets/gtPlus/GtPlusGadgetImageArray.h            |    24 +-
 gadgets/gtPlus/GtPlusGadgetOpenMP.cpp              |    32 +-
 gadgets/gtPlus/GtPlusGadgetOpenMP.h                |     5 +-
 gadgets/gtPlus/GtPlusImageReconGadget.cpp          |   711 ++
 gadgets/gtPlus/GtPlusImageReconGadget.h            |   135 +
 gadgets/gtPlus/GtPlusRecon2DTCloudPackage.h        |    98 +-
 gadgets/gtPlus/GtPlusRecon2DTGadget.cpp            |   155 +-
 gadgets/gtPlus/GtPlusRecon2DTGadgetCloud.cpp       |   284 +-
 gadgets/gtPlus/GtPlusRecon2DTGadgetCloud.h         |     4 +-
 gadgets/gtPlus/GtPlusRecon3DTGadget.cpp            |    95 +-
 gadgets/gtPlus/GtPlusReconGadget.cpp               |  2772 +++--
 gadgets/gtPlus/GtPlusReconGadget.h                 |    66 +-
 gadgets/gtPlus/GtPlusReconGadgetUtil.cpp           |   710 ++
 gadgets/gtPlus/GtPlusReconGadgetUtil.h             |    76 +
 gadgets/gtPlus/GtPlusReconJob2DTGadget.cpp         |    30 +-
 gadgets/gtPlus/GtPlusReconJob2DTGadget.h           |    17 +-
 gadgets/gtPlus/GtPlusReconJob2DTGadgetCloud.cpp    |   130 +-
 gadgets/gtPlus/GtPlusReconJob2DTGadgetCloud.h      |    17 +-
 gadgets/gtPlus/GtPlusReconJob3DTGadget.cpp         |    32 +-
 gadgets/gtPlus/GtPlusReconJob3DTGadget.h           |    17 +-
 gadgets/gtPlus/config/GT_2DT_Cartesian.xml         |   850 ++
 .../gtPlus/config/GT_2DT_Cartesian_CloudNode.xml   |    77 +
 gadgets/gtPlus/config/GT_2DT_Cartesian_Dicom.xml   |   857 ++
 ...GT_2DT_Cartesian_DualLayer_Gateway_L1SPIRIT.xml |   851 ++
 .../GT_2DT_Cartesian_DualLayer_Gateway_SPIRIT.xml  |   845 ++
 .../GT_2DT_Cartesian_FirstLayer_CloudNode.xml      |   279 +
 gadgets/gtPlus/config/GT_2DT_Cartesian_GFactor.xml |   872 ++
 .../config/GT_2DT_Cartesian_ImageTrigger_Dicom.xml |   879 ++
 .../gtPlus/config/GT_2DT_Cartesian_L1SPIRIT.xml    |   836 ++
 ...GT_2DT_Cartesian_PseudoReplica_SNRUnitRecon.xml |   810 ++
 gadgets/gtPlus/config/GT_2DT_Cartesian_SPIRIT.xml  |   836 ++
 .../GT_2DT_Cartesian_SingleLayer_CloudNode.xml     |   279 +
 gadgets/gtPlus/config/GT_2DT_FatWater.xml          |   696 ++
 gadgets/gtPlus/config/GT_2DT_HASTE.xml             |   815 ++
 gadgets/gtPlus/config/GT_2DT_HASTE_MOCO_AVE.xml    |  1085 ++
 gadgets/gtPlus/config/GT_2DT_LGE.xml               |   701 ++
 gadgets/gtPlus/config/GT_2DT_MOLLI.xml             |   696 ++
 gadgets/gtPlus/config/GT_2DT_MOLLI_Offline.xml     |   699 ++
 gadgets/gtPlus/config/GT_2DT_Perfusion.xml         |   702 ++
 ...T_2DT_PseudoReplica_SNRUnitRecon_DataExport.xml |    74 +
 .../config/GT_2DT_RTCine_L1SPIRIT_PhysioInterp.xml |   866 ++
 ...ine_L1SPIRIT_PhysioInterp_DualLayer_Gateway.xml |   875 ++
 gadgets/gtPlus/config/GT_2DT_RealTimeCine.xml      |   783 ++
 gadgets/gtPlus/config/GT_2DT_RealTimeFlow.xml      |   765 ++
 gadgets/gtPlus/config/GT_2DT_T2W.xml               |   701 ++
 gadgets/gtPlus/config/GT_3DT_Cartesian.xml         |   849 ++
 .../gtPlus/config/GT_3DT_Cartesian_CloudNode.xml   |    82 +
 gadgets/gtPlus/config/GT_3DT_Cartesian_GFactor.xml |   735 ++
 .../gtPlus/config/GT_3DT_Cartesian_L1SPIRIT.xml    |   853 ++
 gadgets/gtPlus/config/GT_3DT_Cartesian_SPIRIT.xml  |   844 ++
 .../GT_3DT_Cartesian_SingleLayer_L1SPIRIT.xml      |   853 ++
 gadgets/gtPlus/config/gtCloud/myCloud_2DT.txt      |     8 +
 .../config/gtCloud/myCloud_2DT_DualLayer.txt       |     8 +
 .../gtCloud/myCloud_2DT_DualLayer_FirstLayer.txt   |     8 +
 gadgets/gtPlus/config/gtCloud/myCloud_3DT.txt      |    12 +
 gadgets/interventional_mri/CMakeLists.txt          |    54 +
 .../DeviceChannelSplitterGadget.cpp                |    95 +
 .../DeviceChannelSplitterGadget.h                  |    45 +
 .../gadgetron_interventional_mri_export.h          |    14 +
 gadgets/interventional_mri/grappa_device.xml       |   147 +
 gadgets/matlab/BaseGadget.m                        |     4 +-
 gadgets/matlab/CMakeLists.txt                      |    19 +-
 gadgets/matlab/MatlabGadget.cpp                    |    20 +-
 gadgets/matlab/MatlabGadget.h                      |    11 +-
 gadgets/matlab/accumulate_and_recon.m              |    18 +-
 gadgets/matlab/scale.m                             |     1 -
 gadgets/moco/CMakeLists.txt                        |    48 +-
 gadgets/moco/RegistrationAveragingGadget.h         |     2 +-
 gadgets/moco/RegistrationScatteringGadget.h        |     2 +-
 gadgets/moco/config/CMakeLists.txt                 |     4 +-
 gadgets/moco/config/cpureg_cartesian_averaging.xml |     4 +-
 gadgets/moco/config/gpureg_cartesian_averaging.xml |     4 +-
 gadgets/moco/cpuRegistrationAveragingGadget.h      |     5 +-
 gadgets/moco/gpuRegistrationAveragingGadget.h      |     3 +-
 gadgets/moco/gpuRegistrationScatteringGadget.h     |     2 -
 gadgets/mri_core/AccumulatorGadget.cpp             |    67 +-
 gadgets/mri_core/AccumulatorGadget.h               |     2 +-
 .../AcquisitionAccumulateTriggerGadget.cpp         |   403 +
 .../mri_core/AcquisitionAccumulateTriggerGadget.h  |    47 +
 gadgets/mri_core/AcquisitionFinishGadget.h         |     2 +-
 gadgets/mri_core/AcquisitionPassthroughGadget.h    |     2 +-
 gadgets/mri_core/AsymmetricEchoAdjustROGadget.cpp  |   142 +
 gadgets/mri_core/AsymmetricEchoAdjustROGadget.h    |    32 +
 gadgets/mri_core/AutoScaleGadget.cpp               |     6 +-
 gadgets/mri_core/AutoScaleGadget.h                 |     2 +-
 gadgets/mri_core/BucketToBufferGadget.cpp          |   628 ++
 gadgets/mri_core/BucketToBufferGadget.h            |    52 +
 gadgets/mri_core/CMakeLists.txt                    |   212 +-
 gadgets/mri_core/CoilReductionGadget.cpp           |    92 +-
 gadgets/mri_core/CoilReductionGadget.h             |     2 +-
 gadgets/mri_core/CombineGadget.cpp                 |    69 +
 gadgets/mri_core/CombineGadget.h                   |    27 +
 gadgets/mri_core/ComplexToFloatAttribGadget.cpp    |    88 +
 gadgets/mri_core/ComplexToFloatAttribGadget.h      |    34 +
 gadgets/mri_core/CplxDumpGadget.cpp                |     2 +-
 gadgets/mri_core/CplxDumpGadget.h                  |     3 +-
 gadgets/mri_core/CropAndCombineGadget.cpp          |    18 +-
 gadgets/mri_core/CropAndCombineGadget.h            |     2 +-
 gadgets/mri_core/DependencyQueryGadget.cpp         |   200 +
 gadgets/mri_core/DependencyQueryGadget.h           |    53 +
 gadgets/mri_core/DependencyQueryWriter.cpp         |    76 +
 gadgets/mri_core/DependencyQueryWriter.h           |    28 +
 gadgets/mri_core/ExtractGadget.cpp                 |    10 +-
 gadgets/mri_core/ExtractGadget.h                   |     2 +-
 gadgets/mri_core/FFTGadget.cpp                     |   111 +-
 gadgets/mri_core/FFTGadget.h                       |    14 +-
 gadgets/mri_core/FloatToUShortAttribGadget.cpp     |   130 +
 gadgets/mri_core/FloatToUShortAttribGadget.h       |    43 +
 gadgets/mri_core/FloatToUShortGadget.cpp           |    12 +-
 gadgets/mri_core/FloatToUShortGadget.h             |     2 +-
 gadgets/mri_core/FlowPhaseSubtractionGadget.cpp    |    56 +-
 gadgets/mri_core/FlowPhaseSubtractionGadget.h      |     2 +-
 gadgets/mri_core/GadgetIsmrmrdReadWrite.h          |    41 +-
 gadgets/mri_core/GadgetMRIHeaders.h                |    37 +-
 gadgets/mri_core/ImageFinishAttribGadget.cpp       |    53 +
 gadgets/mri_core/ImageFinishAttribGadget.h         |    49 +
 gadgets/mri_core/ImageFinishGadget.cpp             |     6 +-
 gadgets/mri_core/ImageFinishGadget.h               |     2 +-
 gadgets/mri_core/ImageWriterGadget.h               |     2 +-
 gadgets/mri_core/IsmrmrdDumpGadget.cpp             |   250 +-
 gadgets/mri_core/IsmrmrdDumpGadget.h               |     6 +-
 gadgets/mri_core/MRIImageAttribWriter.cpp          |   154 +
 gadgets/mri_core/MRIImageAttribWriter.h            |    43 +
 gadgets/mri_core/MRIImageWriter.cpp                |   114 +-
 gadgets/mri_core/MRIImageWriter.h                  |    42 +-
 gadgets/mri_core/MaxwellCorrectionGadget.cpp       |   246 +-
 gadgets/mri_core/MaxwellCorrectionGadget.h         |     2 +-
 gadgets/mri_core/NoiseAdjustGadget.cpp             |   709 +-
 gadgets/mri_core/NoiseAdjustGadget.h               |    47 +-
 gadgets/mri_core/NoiseAdjustGadget_unoptimized.cpp |    24 +-
 gadgets/mri_core/NoiseAdjustGadget_unoptimized.h   |     2 +-
 gadgets/mri_core/PCACoilGadget.cpp                 |   133 +-
 gadgets/mri_core/PCACoilGadget.h                   |     7 +-
 gadgets/mri_core/PartialFourierAdjustROGadget.cpp  |    37 +-
 gadgets/mri_core/PartialFourierAdjustROGadget.h    |     4 +-
 gadgets/mri_core/PhysioInterpolationGadget.cpp     |   607 +-
 gadgets/mri_core/PhysioInterpolationGadget.h       |    40 +-
 gadgets/mri_core/RemoveROOversamplingGadget.cpp    |   201 +-
 gadgets/mri_core/RemoveROOversamplingGadget.h      |    40 +-
 gadgets/mri_core/WhiteNoiseInjectorGadget.cpp      |   197 +
 gadgets/mri_core/WhiteNoiseInjectorGadget.h        |    62 +
 gadgets/mri_core/default.xml                       |    52 +-
 .../mri_core/default_measurement_dependencies.xml  |    40 +
 gadgets/mri_core/default_optimized.xml             |    56 +-
 gadgets/mri_core/default_short.xml                 |    48 +-
 gadgets/octave/CMakeLists.txt                      |    43 -
 .../octave/GadgetronReturnIsmrmrdAcquisition.cpp   |   136 -
 gadgets/octave/GadgetronReturnIsmrmrdImage.cpp     |   108 -
 gadgets/octave/OctaveCommunicator.cpp              |    68 -
 gadgets/octave/OctaveCommunicator.h                |    40 -
 gadgets/octave/OctaveGadget.cpp                    |   232 -
 gadgets/octave/OctaveGadget.h                      |    95 -
 gadgets/octave/XMLGetXPath.cpp                     |    35 -
 gadgets/octave/gadgetron_octave_export.h           |    23 -
 .../octave/gadgetron_octavecommunicator_export.h   |    23 -
 gadgets/octave/octave.xml                          |    84 -
 gadgets/octave/octave/accumulator.m                |    29 -
 gadgets/octave/octave/configure_accumulator.m      |    18 -
 gadgets/octave/octave/configure_downsample_2x.m    |     3 -
 gadgets/octave/octave/downsample_2x.m              |    11 -
 .../octave/octave/gadget_reference_accumulator.m   |     7 -
 .../octave/octave/gadget_reference_downsample_2x.m |     7 -
 .../octave/ismrm_transform_image_to_kspace.m       |    34 -
 .../octave/ismrm_transform_kspace_to_image.m       |    35 -
 gadgets/octave/octave/my_config_function.m         |     3 -
 gadgets/octave/octave/my_gadget_reference.m        |     7 -
 gadgets/octave/octave/my_recon_function.m          |     8 -
 gadgets/octave/pugiconfig.hpp                      |    69 -
 gadgets/octave/pugixml.cpp                         | 10250 ------------------
 gadgets/octave/pugixml.hpp                         |  1265 ---
 gadgets/pmri/CMakeLists.txt                        |    62 +
 gadgets/pmri/GenericReconJob.h                     |    27 +
 gadgets/pmri/config/CMakeLists.txt                 |    16 +
 .../pmri/config/generic_gpu_ktsense_singleshot.xml |   115 +
 gadgets/pmri/config/generic_gpusense_cg.xml        |   113 +
 .../pmri/config/generic_gpusense_cg_singleshot.xml |   115 +
 .../config/generic_gpusense_nlcg_singleshot.xml    |   116 +
 .../pmri/config/generic_gpusense_sb_singleshot.xml |   119 +
 gadgets/pmri/gadgetron_gpupmri_export.h            |    14 +
 gadgets/pmri/gpuCgKtSenseGadget.cpp                |   375 +
 gadgets/pmri/gpuCgKtSenseGadget.h                  |    71 +
 gadgets/pmri/gpuCgSenseGadget.cpp                  |   347 +
 gadgets/pmri/gpuCgSenseGadget.h                    |    71 +
 gadgets/pmri/gpuCgSpiritGadget.cpp                 |   357 +
 gadgets/pmri/gpuCgSpiritGadget.h                   |    75 +
 gadgets/pmri/gpuGenericSensePrepGadget.cpp         |   941 ++
 gadgets/pmri/gpuGenericSensePrepGadget.h           |   127 +
 gadgets/pmri/gpuNlcgSenseGadget.cpp                |   387 +
 gadgets/pmri/gpuNlcgSenseGadget.h                  |    82 +
 gadgets/pmri/gpuSbSenseGadget.cpp                  |   435 +
 gadgets/pmri/gpuSbSenseGadget.h                    |    85 +
 gadgets/python/CMakeLists.txt                      |    23 +-
 gadgets/python/GadgetReference.cpp                 |     2 +-
 gadgets/python/GadgetReference.h                   |     2 +-
 gadgets/python/GadgetronPythonMRI.cpp              |    22 +-
 gadgets/python/PythonCommunicator.cpp              |    13 +-
 gadgets/python/PythonCommunicator.h                |     3 +
 gadgets/python/PythonGadget.h                      |    15 +-
 gadgets/radial/CMakeLists.txt                      |    59 +-
 gadgets/radial/RadialPhaseCorrectionGadget.cpp     |   314 +
 gadgets/radial/RadialPhaseCorrectionGadget.h       |    44 +
 gadgets/radial/config/CMakeLists.txt               |    10 +-
 .../config/fixed_radial_mode0_gpu_ktsense.xml      |     6 +-
 .../config/fixed_radial_mode0_gpusense_cg.xml      |     6 +-
 .../fixed_radial_mode0_gpusense_cg_unoptimized.xml |     6 +-
 .../config/fixed_radial_mode0_gpusense_sb.xml      |     6 +-
 .../fixed_radial_mode0_gpusense_sb_unoptimized.xml |     6 +-
 .../radial/config/fixed_radial_mode0_realtime.xml  |     6 +-
 .../config/fixed_radial_mode1_gpu_ktsense.xml      |     6 +-
 .../config/fixed_radial_mode1_gpusense_cg.xml      |     6 +-
 .../fixed_radial_mode1_gpusense_cg_unoptimized.xml |     6 +-
 .../config/fixed_radial_mode1_gpusense_sb.xml      |     6 +-
 .../fixed_radial_mode1_gpusense_sb_unoptimized.xml |     6 +-
 .../radial/config/fixed_radial_mode1_realtime.xml  |     6 +-
 .../config/golden_radial_mode2_gpu_ktsense.xml     |     6 +-
 .../config/golden_radial_mode2_gpusense_cg.xml     |     6 +-
 ...golden_radial_mode2_gpusense_cg_unoptimized.xml |     6 +-
 .../config/golden_radial_mode2_gpusense_nlcg.xml   |   158 +
 ...lden_radial_mode2_gpusense_nlcg_unoptimized.xml |   146 +
 .../config/golden_radial_mode2_gpusense_sb.xml     |     6 +-
 ...golden_radial_mode2_gpusense_sb_unoptimized.xml |     6 +-
 .../radial/config/golden_radial_mode2_realtime.xml |     6 +-
 .../config/golden_radial_mode3_gpusense_cg.xml     |   155 +
 .../config/golden_radial_mode3_gpusense_sb.xml     |     6 +-
 gadgets/radial/config/spirit.xml                   |   106 +
 gadgets/radial/gpuRadialPrepGadget.cpp             |   952 ++
 gadgets/radial/gpuRadialPrepGadget.h               |   207 +
 gadgets/radial/gpuRadialSensePrepGadget.cpp        |  1017 +-
 gadgets/radial/gpuRadialSensePrepGadget.h          |   190 +-
 gadgets/radial/gpuRadialSpiritPrepGadget.cpp       |    98 +
 gadgets/radial/gpuRadialSpiritPrepGadget.h         |    33 +
 gadgets/radial/gpuRetroGatedSensePrepGadget.cpp    |   890 ++
 gadgets/radial/gpuRetroGatedSensePrepGadget.h      |   133 +
 gadgets/sense/CMakeLists.txt                       |    43 -
 gadgets/sense/SenseJob.h                           |    27 -
 gadgets/sense/config/CMakeLists.txt                |    15 -
 .../config/generic_gpu_ktsense_singleshot.xml      |   115 -
 gadgets/sense/config/generic_gpusense_cg.xml       |   113 -
 .../config/generic_gpusense_cg_singleshot.xml      |   115 -
 .../config/generic_gpusense_sb_singleshot.xml      |   119 -
 gadgets/sense/gadgetron_gpusense_export.h          |    14 -
 gadgets/sense/gpuCgKtSenseGadget.cpp               |   370 -
 gadgets/sense/gpuCgKtSenseGadget.h                 |    71 -
 gadgets/sense/gpuCgSenseGadget.cpp                 |   321 -
 gadgets/sense/gpuCgSenseGadget.h                   |    71 -
 gadgets/sense/gpuGenericSensePrepGadget.cpp        |   948 --
 gadgets/sense/gpuGenericSensePrepGadget.h          |   127 -
 gadgets/sense/gpuSbSenseGadget.cpp                 |   426 -
 gadgets/sense/gpuSbSenseGadget.h                   |    85 -
 gadgets/spiral/CMakeLists.txt                      |    27 +-
 gadgets/spiral/SpiralToGenericGadget.cpp           |   173 +-
 gadgets/spiral/SpiralToGenericGadget.h             |     2 +-
 gadgets/spiral/config/CMakeLists.txt               |     4 +-
 .../config/spiral_flow_generic_gpusense_cg.xml     |     6 +-
 .../config/spiral_flow_generic_gpusense_sb.xml     |     6 +-
 gadgets/spiral/config/spiral_flow_gpusense_cg.xml  |     4 +-
 .../spiral/config/spiral_flow_gpusense_cg_ecg.xml  |     4 +-
 .../config/spiral_flow_gpusense_cg_unoptimized.xml |     4 +-
 gadgets/spiral/config/spiral_flow_gpusense_sb.xml  |     4 +-
 .../config/spiral_flow_gpusense_sb_unoptimized.xml |     4 +-
 gadgets/spiral/config/spiral_interactive.xml       |     4 +-
 gadgets/spiral/gpuSpiralSensePrepGadget.cpp        |   123 +-
 gadgets/spiral/gpuSpiralSensePrepGadget.h          |     2 +-
 gadgets/util/CMakeLists.txt                        |    27 +
 gadgets/util/ParameterRelayGadget.cpp              |    20 +
 gadgets/util/ParameterRelayGadget.h                |    18 +
 gadgets/util/gadgetron_util_gadgets_export.h       |    14 +
 test/CMakeLists.txt                                |    31 +-
 test/hoNDArray_blas_test.cpp                       |     2 +-
 test/hoNDArray_elemwise_test.cpp                   |   239 +
 test/hoNDArray_operators_test.cpp                  |     1 -
 test/integration/.gitignore                        |     8 +
 test/integration/CMakeLists.txt                    |    47 +
 test/integration/cases/always/simple_gre.cfg       |    24 +
 test/integration/cases/always/simple_gre_3d.cfg    |    26 +
 .../cases/gpu/gpu_fixed_radial_mode1_cg.cfg        |    24 +
 .../cases/gpu/gpu_fixed_radial_mode1_ktsense.cfg   |    24 +
 .../cases/gpu/gpu_fixed_radial_mode1_realtime.cfg  |    24 +
 .../cases/gpu/gpu_golden_radial_mode2_cg.cfg       |    24 +
 .../cases/gpu/gpu_golden_radial_mode2_ktsense.cfg  |    24 +
 .../cases/gpu/gpu_golden_radial_mode2_realtime.cfg |    24 +
 test/integration/cases/gpu/gpu_grappa_simple.cfg   |    25 +
 test/integration/cases/gpu/gpu_spiral.cfg          |    24 +
 test/integration/cases/gpu/gpu_spiral_sb.cfg       |    24 +
 .../linux_mac_only/gpu_golden_radial_mode2_sb.cfg  |    24 +
 test/integration/cases/mkl/gtplus_3D_head.cfg      |    24 +
 test/integration/cases/mkl/gtplus_FatWater.cfg     |    25 +
 test/integration/cases/mkl/gtplus_FetalHASTE.cfg   |    24 +
 test/integration/cases/mkl/gtplus_LGE.cfg          |    24 +
 test/integration/cases/mkl/gtplus_Perfusion.cfg    |    24 +
 test/integration/cases/mkl/gtplus_T2W.cfg          |    24 +
 test/integration/cases/mkl/gtplus_localizer.cfg    |    24 +
 test/integration/cases/mkl/gtplus_molli.cfg        |    24 +
 .../cases/mkl/gtplus_real_time_cine.cfg            |    24 +
 .../cases/mkl/gtplus_real_time_cine_9slices.cfg    |    24 +
 test/integration/cases/mkl/gtplus_sasha.cfg        |    24 +
 .../mkl/gtplus_snr_unit_recon_builtin_noise.cfg    |    24 +
 .../cases/mkl/gtplus_snr_unit_recon_ipat4.cfg      |    24 +
 .../mkl/gtplus_snr_unit_recon_prospective_cine.cfg |    24 +
 .../mkl/gtplus_snr_unit_recon_spat2_asym_pf.cfg    |    24 +
 .../cases/mkl/gtplus_snr_unit_recon_spat3.cfg      |    24 +
 .../cases/mkl/gtplus_snr_unit_recon_tpat3.cfg      |    24 +
 .../integration/cases/python/simple_gre_python.cfg |    24 +
 test/integration/data.txt                          |    62 +
 test/integration/get_data.py                       |    68 +
 test/integration/run_all_tests.py                  |    71 +
 test/integration/run_gadgetron_test.py             |   328 +
 test/tests.cpp                                     |     1 +
 test/unit/run_unit_tests.py                        |    62 +
 toolboxes/CMakeLists.txt                           |    44 +-
 toolboxes/cloudbus/CMakeLists.txt                  |    31 +
 toolboxes/cloudbus/CloudBus.cpp                    |   220 +
 toolboxes/cloudbus/CloudBus.h                      |   117 +
 toolboxes/cloudbus/cloudbus_export.h               |    14 +
 toolboxes/cloudbus/cloudbus_main.cpp               |    35 +
 toolboxes/core/CMakeLists.txt                      |     6 +-
 toolboxes/core/GadgetronCommon.h                   |    39 +-
 toolboxes/core/GadgetronTimer.h                    |     5 +-
 toolboxes/core/Gadgetron_enable_types.h            |    12 +
 toolboxes/core/NDArray.h                           |   264 +-
 toolboxes/core/SerializableObject.h                |    27 -
 toolboxes/core/complext.h                          |    10 +-
 toolboxes/core/cpu/CMakeLists.txt                  |   143 +-
 toolboxes/core/cpu/algorithm/hoNDBSpline.h         |   191 +
 toolboxes/core/cpu/algorithm/hoNDBSpline.hxx       |  2133 ++++
 toolboxes/core/cpu/arma_math/CMakeLists.txt        |    48 -
 toolboxes/core/cpu/arma_math/cpucore_math_export.h |    22 -
 toolboxes/core/cpu/arma_math/hoArmadillo.h         |    84 -
 toolboxes/core/cpu/arma_math/hoNDArray_blas.cpp    |   648 --
 toolboxes/core/cpu/arma_math/hoNDArray_blas.h      |   181 -
 .../core/cpu/arma_math/hoNDArray_elemwise.cpp      |  4810 ---------
 toolboxes/core/cpu/arma_math/hoNDArray_elemwise.h  |   400 -
 toolboxes/core/cpu/arma_math/hoNDArray_math.h      |     6 -
 .../core/cpu/arma_math/hoNDArray_operators.cpp     |   457 -
 toolboxes/core/cpu/arma_math/hoNDArray_operators.h |   239 -
 .../core/cpu/arma_math/hoNDArray_reductions.cpp    |    41 -
 .../core/cpu/arma_math/hoNDArray_reductions.h      |    12 -
 toolboxes/core/cpu/dummy.cpp                       |    18 +
 toolboxes/core/cpu/gadgetronmath.h                 |    26 +
 toolboxes/core/cpu/ho2DArray.h                     |    34 +-
 toolboxes/core/cpu/ho3DArray.h                     |     2 +-
 toolboxes/core/cpu/ho4DArray.h                     |     2 +-
 toolboxes/core/cpu/ho5DArray.h                     |     2 +-
 toolboxes/core/cpu/ho5DArray.hxx                   |     2 +-
 toolboxes/core/cpu/ho6DArray.h                     |     2 +-
 toolboxes/core/cpu/ho6DArray.hxx                   |     2 +-
 toolboxes/core/cpu/ho7DArray.h                     |     2 +-
 toolboxes/core/cpu/ho7DArray.hxx                   |     2 +-
 toolboxes/core/cpu/hoMatrix.cpp                    |  1327 +--
 toolboxes/core/cpu/hoMatrix.h                      |    61 +-
 toolboxes/core/cpu/hoMatrix.hxx                    |   901 +-
 toolboxes/core/cpu/hoNDArray.h                     |    34 +-
 toolboxes/core/cpu/hoNDArray.hxx                   |  1915 ++--
 toolboxes/core/cpu/hoNDArray_utils.h               |   208 +-
 toolboxes/core/cpu/hoNDBoundaryHandler.h           |   276 +
 toolboxes/core/cpu/hoNDBoundaryHandler.hxx         |   470 +
 toolboxes/core/cpu/hoNDFFT.cpp                     |  1713 ---
 toolboxes/core/cpu/hoNDFFT.h                       |   222 -
 toolboxes/core/cpu/hoNDInterpolator.h              |   307 +
 toolboxes/core/cpu/hoNDInterpolatorBSpline.hxx     |   339 +
 toolboxes/core/cpu/hoNDInterpolatorLinear.hxx      |   874 ++
 .../core/cpu/hoNDInterpolatorNearestNeighbor.hxx   |    94 +
 toolboxes/core/cpu/hoNDObjectArray.h               |   200 +
 toolboxes/core/cpu/hoNDPoint.h                     |   338 +
 toolboxes/core/cpu/hostutils/CMakeLists.txt        |    10 +-
 toolboxes/core/cpu/image/hoNDImage.h               |   517 +
 toolboxes/core/cpu/image/hoNDImage.hxx             |  2980 ++++++
 toolboxes/core/cpu/image/hoNDImageAttrib.h         |   329 +
 toolboxes/core/cpu/image/hoNDImageContainer2D.h    |  1223 +++
 toolboxes/core/cpu/math/CMakeLists.txt             |    76 +
 toolboxes/core/cpu/math/cpucore_math_export.h      |    22 +
 toolboxes/core/cpu/math/hoArmadillo.h              |    89 +
 toolboxes/core/cpu/math/hoNDArray_elemwise.cpp     |  3158 ++++++
 toolboxes/core/cpu/math/hoNDArray_elemwise.h       |   641 ++
 toolboxes/core/cpu/math/hoNDArray_linalg.cpp       |  1949 ++++
 toolboxes/core/cpu/math/hoNDArray_linalg.h         |    90 +
 toolboxes/core/cpu/math/hoNDArray_math.h           |     4 +
 toolboxes/core/cpu/math/hoNDArray_math_util.cpp    |  2178 ++++
 toolboxes/core/cpu/math/hoNDArray_math_util.h      |    27 +
 toolboxes/core/cpu/math/hoNDArray_reductions.cpp   |   933 ++
 toolboxes/core/cpu/math/hoNDArray_reductions.h     |   203 +
 toolboxes/core/cpu/math/hoNDImage_util.cpp         |   877 ++
 toolboxes/core/cpu/math/hoNDImage_util.h           |    76 +
 toolboxes/core/cpu/math/hoNDImage_util.hxx         |  1020 ++
 .../core/cpu/math/hoNDImage_util_instantiate.hxx   |    15 +
 toolboxes/core/gpu/CMakeLists.txt                  |    23 +-
 toolboxes/core/gpu/CUBLASContextProvider.cpp       |    16 +-
 toolboxes/core/gpu/GPUTimer.h                      |    88 +-
 toolboxes/core/gpu/cuNDArray.h                     |  1114 +-
 toolboxes/core/gpu/cuNDArray_blas.h                |     1 +
 toolboxes/core/gpu/cuNDArray_elemwise.cu           |    46 +
 toolboxes/core/gpu/cuNDArray_elemwise.h            |    16 +-
 toolboxes/core/gpu/cuNDArray_fileio.h              |     9 +
 toolboxes/core/gpu/cuNDArray_math.h                |     1 +
 toolboxes/core/gpu/cuNDArray_operators.cu          |    34 +-
 toolboxes/core/gpu/cuNDArray_operators.h           |    33 +-
 toolboxes/core/gpu/cuNDArray_utils.cu              |    20 +-
 toolboxes/core/gpu/cuNDArray_utils.h               |   170 +-
 toolboxes/core/gpu/cuNDFFT.cpp                     |   156 -
 toolboxes/core/gpu/cuNDFFT.h                       |    49 -
 toolboxes/core/gpu/hoCuNDArray_math.h              |     1 -
 toolboxes/core/gpu/hoCuNDArray_operators.h         |     9 -
 toolboxes/core/gpu/radial_utilities.cu             |     8 +-
 toolboxes/core/vector_td.h                         |    10 +-
 toolboxes/core/vector_td_utilities.h               |     5 +-
 toolboxes/ct/CMakeLists.txt                        |     3 +
 toolboxes/ct/xray/CMakeLists.txt                   |     3 +
 toolboxes/ct/xray/gpu/CBCT_acquisition.h           |   298 +
 toolboxes/ct/xray/gpu/CBCT_binning.h               |   166 +
 toolboxes/ct/xray/gpu/CMakeLists.txt               |    49 +
 toolboxes/ct/xray/gpu/conebeam_projection.cu       |  1151 ++
 toolboxes/ct/xray/gpu/conebeam_projection.h        |    76 +
 toolboxes/ct/xray/gpu/float3x3.h                   |    66 +
 toolboxes/ct/xray/gpu/gpuxray_export.h             |    19 +
 .../ct/xray/gpu/hoCuConebeamProjectionOperator.cpp |   261 +
 .../ct/xray/gpu/hoCuConebeamProjectionOperator.h   |   150 +
 toolboxes/fft/CMakeLists.txt                       |    11 +
 toolboxes/fft/cpu/CMakeLists.txt                   |    45 +
 toolboxes/fft/cpu/cpufft_export.h                  |    22 +
 toolboxes/fft/cpu/hoNDFFT.cpp                      |  1693 +++
 toolboxes/fft/cpu/hoNDFFT.h                        |   245 +
 toolboxes/fft/gpu/CMakeLists.txt                   |    33 +
 toolboxes/fft/gpu/cuNDFFT.cpp                      |   157 +
 toolboxes/fft/gpu/cuNDFFT.h                        |    44 +
 toolboxes/fft/gpu/gpufft_export.h                  |    18 +
 toolboxes/gadgettools/CMakeLists.txt               |    93 +-
 toolboxes/gadgettools/GadgetCloudController.h      |   126 +-
 toolboxes/gadgettools/GadgetImageMessageReader.h   |    71 -
 toolboxes/gadgettools/GadgetServerAcceptor.cpp     |    58 -
 toolboxes/gadgettools/GadgetServerAcceptor.h       |    27 -
 toolboxes/gadgettools/GadgetStreamController.cpp   |   459 -
 toolboxes/gadgettools/GadgetStreamController.h     |   559 -
 toolboxes/gadgettools/GadgetronCloudConnector.h    |    22 +-
 toolboxes/gadgettools/GadgetronConnector.cpp       |    17 +-
 toolboxes/gadgettools/GadgetronConnector.h         |    23 +-
 toolboxes/gadgettools/GadgetronOSUtil.cpp          |    50 +
 toolboxes/gadgettools/GadgetronOSUtil.h            |    21 +
 toolboxes/gadgettools/GadgetronSlotContainer.h     |     7 -
 toolboxes/gadgettools/gadgettools_export.h         |     2 +-
 toolboxes/gadgettools/ismrmrd/CMakeLists.txt       |     4 +
 .../gadgettools/ismrmrd/GadgetImageMessageReader.h |   196 +
 .../{ => ismrmrd}/GadgetImageMessageWriter.h       |     0
 toolboxes/gadgettools/schema/gadgetron.xsd         |    89 +-
 toolboxes/gtplus/CMakeLists.txt                    |   318 +-
 toolboxes/gtplus/GtPlusDefinition.h                |   222 +
 toolboxes/gtplus/GtPlusIOExport.h                  |    20 +
 .../FreeFormDeformation/gtplusBSplineFFD.h         |   820 ++
 .../FreeFormDeformation/gtplusBSplineFFD2D.h       |   597 ++
 .../FreeFormDeformation/gtplusBSplineFFD3D.h       |   740 ++
 .../FreeFormDeformation/gtplusBSplineFFD4D.h       |   905 ++
 .../algorithm/FreeFormDeformation/gtplusFFDBase.h  |  1976 ++++
 .../algorithm/FreeFormDeformation/gtplusMLFFD.h    |   436 +
 toolboxes/gtplus/algorithm/gtPlusAlgorithmBase.h   |     6 +-
 .../gtplus/algorithm/gtPlusDataFidelityOperator.h  |    18 +-
 toolboxes/gtplus/algorithm/gtPlusGRAPPA.h          |   501 +-
 toolboxes/gtplus/algorithm/gtPlusOperator.h        |    32 +-
 toolboxes/gtplus/algorithm/gtPlusSPIRIT.h          |   816 +-
 .../gtplus/algorithm/gtPlusSPIRIT2DOperator.h      |    38 +-
 .../gtplus/algorithm/gtPlusSPIRIT2DTOperator.h     |    48 +-
 .../algorithm/gtPlusSPIRITNoNullSpace2DTOperator.h |     4 +-
 .../algorithm/gtPlusSPIRITNoNullSpaceOperator.h    |     4 +-
 toolboxes/gtplus/algorithm/gtPlusSPIRITOperator.h  |    22 +-
 .../gtplus/algorithm/gtPlusWavelet2DOperator.h     |    65 +-
 .../gtplus/algorithm/gtPlusWavelet3DOperator.h     |   504 +-
 .../algorithm/gtPlusWaveletNoNullSpace2DOperator.h |     6 +-
 .../algorithm/gtPlusWaveletNoNullSpace3DOperator.h |     7 +-
 toolboxes/gtplus/algorithm/gtPlusWaveletOperator.h |   144 +-
 .../GadgetronProgram_gtPlus_2DT_Cartesian.xml      |   798 --
 ...etronProgram_gtPlus_2DT_Cartesian_CloudNode.xml |    67 -
 ...us_2DT_Cartesian_DualLayer_Gateway_L1SPIRIT.xml |   808 --
 ...Plus_2DT_Cartesian_DualLayer_Gateway_SPIRIT.xml |   808 --
 ...m_gtPlus_2DT_Cartesian_FirstLayer_CloudNode.xml |   269 -
 ...getronProgram_gtPlus_2DT_Cartesian_L1SPIRIT.xml |   799 --
 ...adgetronProgram_gtPlus_2DT_Cartesian_SPIRIT.xml |   799 --
 .../GadgetronProgram_gtPlus_2DT_FatWater.xml       |   654 --
 .../config/GadgetronProgram_gtPlus_2DT_LGE.xml     |   654 --
 .../config/GadgetronProgram_gtPlus_2DT_MOLLI.xml   |   654 --
 .../GadgetronProgram_gtPlus_2DT_Perfusion.xml      |   655 --
 .../GadgetronProgram_gtPlus_2DT_RealTimeCine.xml   |   741 --
 .../GadgetronProgram_gtPlus_2DT_RealTimeFlow.xml   |   689 --
 .../config/GadgetronProgram_gtPlus_2DT_T2W.xml     |   654 --
 .../GadgetronProgram_gtPlus_3DT_Cartesian.xml      |   787 --
 ...etronProgram_gtPlus_3DT_Cartesian_CloudNode.xml |    72 -
 ...getronProgram_gtPlus_3DT_Cartesian_L1SPIRIT.xml |   816 --
 ...adgetronProgram_gtPlus_3DT_Cartesian_SPIRIT.xml |   795 --
 toolboxes/gtplus/config/gtCloud/myCloud_2DT.txt    |     8 -
 .../config/gtCloud/myCloud_2DT_DualLayer.txt       |     8 -
 .../gtCloud/myCloud_2DT_DualLayer_FirstLayer.txt   |     8 -
 toolboxes/gtplus/config/gtCloud/myCloud_3DT.txt    |    12 -
 toolboxes/gtplus/matlab/CMakeLists.txt             |    10 -
 toolboxes/gtplus/matlab/FtkMatlabConverterBase.h   |   569 -
 toolboxes/gtplus/matlab/gtMatlab.h                 |    51 +
 toolboxes/gtplus/matlab/gtMatlabConverter.h        |    65 +-
 toolboxes/gtplus/matlab/gtMatlabConverterComplex.h |    49 +-
 toolboxes/gtplus/matlab/gtMatlabImage.h            |   255 +
 toolboxes/gtplus/solver/gtPlusLSQRSolver.h         |    44 +-
 toolboxes/gtplus/solver/gtPlusLinearSolver.h       |     3 +-
 toolboxes/gtplus/solver/gtPlusNCGSolver.h          |    57 +-
 toolboxes/gtplus/solver/gtPlusNonLinearSolver.h    |     1 +
 toolboxes/gtplus/solver/gtPlusSolver.h             |     2 +-
 toolboxes/gtplus/ut/CMakeLists.txt                 |    56 +-
 toolboxes/gtplus/ut/grappa_test.cpp                |    59 +-
 toolboxes/gtplus/ut/spirit_test.cpp                |   425 -
 toolboxes/gtplus/ut/util_test.cpp                  |  1195 ---
 toolboxes/gtplus/util/gtPlusIOAnalyze.cpp          |   176 +-
 toolboxes/gtplus/util/gtPlusIOAnalyze.h            |   837 +-
 toolboxes/gtplus/util/gtPlusIOBase.cpp             |   146 +-
 toolboxes/gtplus/util/gtPlusIOBase.h               |   798 +-
 toolboxes/gtplus/util/gtPlusMemoryManager.cpp      |    17 +-
 toolboxes/gtplus/util/gtPlusMemoryManager.h        |     2 -
 toolboxes/gtplus/util/gtPlusUtil.h                 |    96 +
 toolboxes/gtplus/util/gtPlusUtil.hxx               |   149 +
 toolboxes/gtplus/workflow/gtPlusCloudScheduler.cpp |     2 +-
 .../workflow/gtPlusISMRMRDReconCoilMapEstimation.h |   137 +
 .../gtplus/workflow/gtPlusISMRMRDReconUtil.cpp     |  2550 ++++-
 toolboxes/gtplus/workflow/gtPlusISMRMRDReconUtil.h |   383 +-
 .../gtplus/workflow/gtPlusISMRMRDReconUtil.hxx     |  2036 ++--
 .../gtplus/workflow/gtPlusISMRMRDReconWorkFlow.h   |   109 +-
 .../workflow/gtPlusISMRMRDReconWorkFlowCartesian.h |  1155 +-
 .../gtPlusISMRMRDReconWorkFlowCartesian2DT.h       |    21 +-
 .../gtPlusISMRMRDReconWorkFlowCartesian3DT.h       |    19 +-
 .../gtplus/workflow/gtPlusISMRMRDReconWorkOrder.h  |   880 +-
 .../workflow/gtPlusISMRMRDReconWorkOrder2DT.h      |    27 +
 .../workflow/gtPlusISMRMRDReconWorkOrder3DT.h      |    25 +
 .../gtplus/workflow/gtPlusISMRMRDReconWorker.h     |    50 +-
 .../gtplus/workflow/gtPlusISMRMRDReconWorker2DT.h  |   500 +-
 .../workflow/gtPlusISMRMRDReconWorker2DTGRAPPA.h   |   149 +-
 .../gtPlusISMRMRDReconWorker2DTL1SPIRITNCG.h       |    56 +-
 .../gtPlusISMRMRDReconWorker2DTNoAcceleration.h    |    50 +-
 .../workflow/gtPlusISMRMRDReconWorker2DTSPIRIT.h   |   107 +-
 .../gtplus/workflow/gtPlusISMRMRDReconWorker3DT.h  |   431 +-
 .../workflow/gtPlusISMRMRDReconWorker3DTGRAPPA.h   |    52 +-
 .../gtPlusISMRMRDReconWorker3DTL1SPIRITNCG.h       |    89 +-
 .../gtPlusISMRMRDReconWorker3DTNoAcceleration.h    |    44 +-
 .../workflow/gtPlusISMRMRDReconWorker3DTSPIRIT.h   |    53 +-
 toolboxes/linalg/CMakeLists.txt                    |    37 +
 toolboxes/linalg/generate_test_data.m              |    32 +
 toolboxes/linalg/linalg_export.h                   |    25 +
 .../linalg/linalg_ground_truth_comparison.cpp      |   418 +
 toolboxes/linalg/linalg_test.cpp                   |   580 +
 toolboxes/linalg/matrix_decomposition.cpp          |   364 +
 toolboxes/linalg/matrix_decomposition.h            |    42 +
 toolboxes/linalg/matrix_vector_op.cpp              |   313 +
 toolboxes/linalg/matrix_vector_op.h                |    52 +
 toolboxes/linalg/write_mr_raw.m                    |    18 +
 toolboxes/mri/CMakeLists.txt                       |     5 +
 toolboxes/mri/epi/CMakeLists.txt                   |    63 +
 toolboxes/mri/epi/EPIExport.h                      |    20 +
 toolboxes/mri/epi/EPIReconXObject.h                |    73 +
 toolboxes/mri/epi/EPIReconXObjectFlat.h            |   189 +
 toolboxes/mri/epi/EPIReconXObjectTrapezoid.h       |   236 +
 toolboxes/mri/pmri/CMakeLists.txt                  |     4 +-
 toolboxes/mri/pmri/gpu/CMakeLists.txt              |    46 +-
 toolboxes/mri/pmri/gpu/b1_map.cu                   |    64 +-
 toolboxes/mri/pmri/gpu/b1_map_NIH_Souheil.cu       |    14 +-
 toolboxes/mri/pmri/gpu/cuBuffer.cpp                |   197 +
 toolboxes/mri/pmri/gpu/cuBuffer.h                  |    61 +
 toolboxes/mri/pmri/gpu/cuCartesianSenseOperator.cu |     4 +-
 .../mri/pmri/gpu/cuNonCartesianSenseOperator.cu    |    20 -
 toolboxes/mri/pmri/gpu/cuSenseBuffer.cpp           |   204 +-
 toolboxes/mri/pmri/gpu/cuSenseBuffer.h             |    57 +-
 toolboxes/mri/pmri/gpu/cuSenseBufferCg.h           |     2 +-
 toolboxes/mri/pmri/gpu/cuSpiritBuffer.cpp          |    89 +
 toolboxes/mri/pmri/gpu/cuSpiritBuffer.h            |    43 +
 toolboxes/mri/pmri/gpu/cuSpiritOperator.h          |   130 +
 toolboxes/mri/pmri/gpu/htgrappa.cpp                |    48 +
 toolboxes/mri/pmri/gpu/htgrappa.cu                 |   387 +-
 toolboxes/mri/pmri/gpu/htgrappa.h                  |    35 +-
 toolboxes/mri/pmri/gpu/sense_utilities.cu          |     4 +-
 toolboxes/mri/pmri/gpu/spirit_calibration.cu       |   363 +
 toolboxes/mri/pmri/gpu/spirit_calibration.h        |    22 +
 toolboxes/mri_core/CMakeLists.txt                  |     3 +
 toolboxes/mri_core/mri_core_data.h                 |   262 +
 toolboxes/nfft/gpu/CMakeLists.txt                  |    26 +-
 toolboxes/nfft/gpu/NFFT_C2NC_conv_kernel.cu        |    16 +-
 toolboxes/nfft/gpu/NFFT_NC2C_atomic_conv_kernel.cu |    14 +-
 toolboxes/nfft/gpu/NFFT_NC2C_conv_kernel.cu        |    10 +-
 toolboxes/nfft/gpu/NFFT_preprocess_kernel.cu       |    12 +-
 toolboxes/nfft/gpu/cuNFFT.cu                       |    47 +-
 toolboxes/nfft/gpu/cuNFFT.h                        |    82 +-
 toolboxes/operators/CMakeLists.txt                 |     3 +-
 toolboxes/operators/FFTOperator.h                  |   116 +-
 toolboxes/operators/cpu/CMakeLists.txt             |     4 +-
 toolboxes/operators/cpu/hoDiagonalOperator.h       |    20 +
 toolboxes/operators/cpu/hoDiagonalSumOperator.h    |    20 +
 toolboxes/operators/diagonalOperator.h             |    52 +-
 toolboxes/operators/diagonalSumOperator.h          |    95 +
 toolboxes/operators/encodedImageOperator.h         |     2 +-
 toolboxes/operators/generalOperator.h              |    25 +-
 toolboxes/operators/gpu/CMakeLists.txt             |    30 +-
 toolboxes/operators/gpu/cuConvolutionOperator.cu   |     2 +-
 toolboxes/operators/gpu/cuConvolutionOperator.h    |     3 +-
 toolboxes/operators/gpu/cuDiagonalOperator.h       |     2 +-
 toolboxes/operators/gpu/cuDiagonalSumOperator.h    |    20 +
 toolboxes/operators/gpu/cuLaplaceOperator.cu       |     6 +-
 toolboxes/operators/gpu/cuLaplaceOperator.h        |     3 +-
 .../operators/gpu/cuPartialDerivativeOperator.cu   |     8 +-
 .../operators/gpu/cuPartialDerivativeOperator.h    |     3 +-
 toolboxes/operators/gpu/cuTv1dOperator.cu          |     4 +-
 toolboxes/operators/gpu/cuTv1dOperator.h           |     2 +-
 toolboxes/operators/gpu/cuTvOperator.cu            |   167 +-
 toolboxes/operators/gpu/cuTvOperator.h             |     4 +-
 toolboxes/operators/gpu/gpuoperators_export.h      |     2 +-
 toolboxes/operators/gpu/hoCuDiagonalOperator.h     |    20 +
 toolboxes/operators/gpu/hoCuIdentityOperator.h     |    28 +
 .../operators/gpu/hoCuPartialDerivativeOperator.h  |    94 +
 toolboxes/operators/gpu/hoCuTvOperator.h           |     2 +-
 toolboxes/registration/optical_flow/CMakeLists.txt |     7 +-
 .../registration/optical_flow/cpu/CMakeLists.txt   |   166 +-
 .../hoImageRegContainer2DRegistration.h            |  1449 +++
 .../cpu/dissimilarity/hoImageRegDissimilarity.h    |   251 +
 .../hoImageRegDissimilarityHistogramBased.h        |   226 +
 .../hoImageRegDissimilarityLocalCCR.h              |   412 +
 .../hoImageRegDissimilarityMutualInformation.h     |   295 +
 ...geRegDissimilarityNormalizedMutualInformation.h |   173 +
 .../cpu/dissimilarity/hoImageRegDissimilaritySSD.h |   108 +
 .../optical_flow/cpu/hoOpticalFlowSolver.h         |     4 +-
 .../optical_flow/cpu/hoRegistration_utils.cpp      |   233 -
 .../optical_flow/cpu/hoRegistration_utils.h        |    13 -
 ...ImageRegDeformationFieldBidirectionalRegister.h |   501 +
 .../register/hoImageRegDeformationFieldRegister.h  |   527 +
 .../cpu/register/hoImageRegNonParametricRegister.h |   148 +
 .../cpu/register/hoImageRegParametricRegister.h    |   408 +
 .../optical_flow/cpu/register/hoImageRegRegister.h |   651 ++
 ...hoImageRegDeformationFieldBidirectionalSolver.h |   602 ++
 .../cpu/solver/hoImageRegDeformationFieldSolver.h  |   673 ++
 .../cpu/solver/hoImageRegNonParametricSolver.h     |   162 +
 .../solver/hoImageRegParametricDownHillSolver.h    |   166 +
 .../hoImageRegParametricGradientDescentSolver.h    |   146 +
 .../cpu/solver/hoImageRegParametricSolver.h        |   326 +
 .../optical_flow/cpu/solver/hoImageRegSolver.h     |   210 +
 .../transformation/hoImageRegDeformationField.h    |   964 ++
 .../hoImageRegHomogenousTransformation.h           |   475 +
 .../hoImageRegNonParametricTransformation.h        |    82 +
 .../hoImageRegParametricTransformation.h           |   227 +
 .../hoImageRegRigid2DTransformation.h              |   380 +
 .../hoImageRegRigid3DTransformation.h              |   491 +
 .../cpu/transformation/hoImageRegTransformation.h  |   408 +
 .../optical_flow/cpu/warper/hoImageRegWarper.h     |   529 +
 .../registration/optical_flow/gpu/CMakeLists.txt   |    12 +-
 .../optical_flow/gpu/cuCKOpticalFlowSolver.cu      |     8 +-
 .../optical_flow/gpu/cuHSOpticalFlowSolver.cu      |     8 +-
 .../optical_flow/gpu/cuLinearResampleOperator.cu   |     6 +-
 .../optical_flow/gpu/cuOpticalFlowSolver.cu        |     8 +-
 toolboxes/solvers/CMakeLists.txt                   |     5 +-
 toolboxes/solvers/cgPreconditioner.h               |     6 +-
 toolboxes/solvers/cpu/CMakeLists.txt               |     5 +-
 toolboxes/solvers/cpu/hoCgPreconditioner.h         |     2 +-
 toolboxes/solvers/cpu/hoCgSolver.h                 |     4 +-
 toolboxes/solvers/cpu/hoGpBbSolver.h               |    21 +-
 toolboxes/solvers/cpu/hoSolverUtils.h              |    26 +
 toolboxes/solvers/gpBbSolver.h                     |     1 -
 toolboxes/solvers/gpSolver.h                       |     2 +-
 toolboxes/solvers/gpu/CMakeLists.txt               |    31 +-
 toolboxes/solvers/gpu/cuGpBbSolver.cu              |    40 -
 toolboxes/solvers/gpu/cuGpBbSolver.h               |    14 +-
 toolboxes/solvers/gpu/cuLbfgsSolver.h              |    36 +
 toolboxes/solvers/gpu/cuNlcgSolver.h               |    24 +
 toolboxes/solvers/gpu/cuSolverUtils.cu             |    38 +
 toolboxes/solvers/gpu/cuSolverUtils.h              |    11 +
 toolboxes/solvers/gpu/hoCuCgSolver.h               |    34 +
 toolboxes/solvers/gpu/hoCuGpBbSolver.h             |    19 +-
 toolboxes/solvers/gpu/hoCuNlcgSolver.h             |    35 +
 toolboxes/solvers/gpu/hoCuSbcCgSolver.h            |    16 +
 toolboxes/solvers/lbfgsSolver.h                    |   825 ++
 toolboxes/solvers/lsqrSolver.h                     |   173 +
 toolboxes/solvers/nlcgSolver.h                     |   776 ++
 toolboxes/solvers/sbSolver.h                       |    14 +-
 toolboxes/solvers/solver.h                         |     4 -
 906 files changed, 129453 insertions(+), 69810 deletions(-)

diff --git a/.gitignore b/.gitignore
index fd1fe60..d2ad758 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,7 +16,10 @@ lib/*.pyc
 .DS_Store
 *.swp
 build/
+build_debug/
 *.pyc
 toolboxes/core/core_defines.h
 prod/
 external/
+test/integration/test_cases.txt
+*.h5
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f43f7a2..5b8f54c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,15 +1,50 @@
 cmake_minimum_required(VERSION 2.8)
 project(GADGETRON)
 
+#VERSIONING AND INSTALL PATHS
+set(GADGETRON_VERSION_MAJOR 3)
+set(GADGETRON_VERSION_MINOR 1)
+set(GADGETRON_VERSION_PATCH 0) 
+set(GADGETRON_VERSION_STRING ${GADGETRON_VERSION_MAJOR}.${GADGETRON_VERSION_MINOR}.${GADGETRON_VERSION_PATCH})
+set(GADGETRON_SOVERSION ${GADGETRON_VERSION_MAJOR}.${GADGETRON_VERSION_MINOR})
+find_package(Git)
+if (GIT_FOUND)
+  execute_process(COMMAND ${GIT_EXECUTABLE} rev-parse HEAD WORKING_DIRECTORY
+      ${CMAKE_SOURCE_DIR} OUTPUT_VARIABLE GADGETRON_GIT_SHA1 ERROR_VARIABLE GADGETRON_GIT_STDERR)
+  string(STRIP "${GADGETRON_GIT_SHA1}" GADGETRON_GIT_SHA1)
+  string(LENGTH "${GADGETRON_GIT_SHA1}" GADGETRON_GIT_SHA1_LEN)
+  if(${GADGETRON_GIT_SHA1_LEN} LESS 40)
+    message(WARNING "Could not determine SHA-1 hash: ${GADGETRON_GIT_STDERR}")
+    set(GADGETRON_GIT_SHA1 "NA")
+  endif(${GADGETRON_GIT_SHA1_LEN} LESS 40)
+else()
+  set(GADGETRON_GIT_SHA1 "NA")
+endif()
+set(GADGETRON_INSTALL_CONFIG_PATH config)
+set(GADGETRON_INSTALL_MATLAB_PATH share/gadgetron/matlab)
+set(GADGETRON_INSTALL_PYTHON_MODULE_PATH share/gadgetron/python)
+set(CMAKE_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX}/gadgetron)
+list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake)
+
+#Set the build type to Release if not specified
+IF(NOT CMAKE_BUILD_TYPE)
+  SET(CMAKE_BUILD_TYPE Release CACHE STRING
+      "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel."
+      FORCE)
+ENDIF(NOT CMAKE_BUILD_TYPE)
+
 # build options for 64 bits system
 if( CMAKE_SIZEOF_VOID_P EQUAL 8 )
-  message(" 64bit system is found  ... ")
+  message("64bit system is found")
   set( HAS_64_BIT On CACHE BOOL "64bit build")
 else( CMAKE_SIZEOF_VOID_P EQUAL 8 )
-  message(" 32bit system is found  ... ")
+  message("32bit system is found")
   set( HAS_64_BIT Off CACHE BOOL "64bit build")
 endif( CMAKE_SIZEOF_VOID_P EQUAL 8 )
 
+# whether to install dependencies
+OPTION(GADGETRON_INSTALL_DEPENDENCIES "Install gadgetron dependencies" Off)
+
 # build options for OpenMP support
 find_package(OpenMP)
 OPTION(USE_OPENMP "Use OpenMP" On)
@@ -28,13 +63,23 @@ endif (OPENMP_FOUND)
 
 if (WIN32)
     ADD_DEFINITIONS(-DWIN32 -D_WIN32 -D_WINDOWS)
-    ADD_DEFINITIONS(-DUNICODE -D_UNICODE)
+#    ADD_DEFINITIONS(-DUNICODE -D_UNICODE)
     ADD_DEFINITIONS(-D_CRT_SECURE_NO_WARNINGS)
+    ADD_DEFINITIONS(-D_VARIADIC_MAX=10) #to fix compiler limitations in Visual Studio Express
     if ( HAS_64_BIT )
         ADD_DEFINITIONS(-DWIN64 -D_WIN64)
     endif ( HAS_64_BIT )
     SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc /MP")
     SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W3")
+    SET (CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} /INCREMENTAL:NO")
+    SET (CMAKE_C_LINK_FLAGS "${CMAKE_C_LINK_FLAGS} /INCREMENTAL:NO")
+    SET (CMAKE_EXE_LINKER_FLAGS_DEBUG "/debug /INCREMENTAL:NO")
+    SET (CMAKE_SHARED_LINKER_FLAGS_DEBUG "/debug /INCREMENTAL:NO")
+    SET (CMAKE_STATIC_LINKER_FLAGS_DEBUG "/debug /INCREMENTAL:NO")
+    SET (CMAKE_MODULE_LINKER_FLAGS_DEBUG "/debug /INCREMENTAL:NO")
+    # The two flags below is to fix Windows problems in relation to multiple defined operators new/delete and some constructors that are defined in our headers
+    #SET (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /FORCE:MULTIPLE") 
+    #SET (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /FORCE:MULTIPLE") 
 else (WIN32)
     if (UNIX)
         if (APPLE)
@@ -42,16 +87,9 @@ else (WIN32)
             SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
             SET (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc -static-libgfortran")
         endif (APPLE)
-  endif (UNIX)
+    endif (UNIX)
 endif (WIN32)
 
-set(CMAKE_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX}/gadgetron)
-list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake)
-
-# uncomment these if all compiled targets are to be stored in the same directory
-#SET(EXECUTABLE_OUTPUT_PATH ${CMAKE_SOURCE_DIR}/bin CACHE STRING "Where to put the executables")
-#SET(LIBRARY_OUTPUT_PATH ${CMAKE_SOURCE_DIR}/bin CACHE STRING "Where to put the libraries")
-
 # whether to suppress compilation warnings
 OPTION(BUILD_SUPPRESS_WARNINGS "Build package while suppressing warnings" Off)
 if (BUILD_SUPPRESS_WARNINGS)
@@ -66,41 +104,71 @@ endif (BUILD_SUPPRESS_WARNINGS)
 OPTION(BUILD_TOOLBOX_STATIC "Build static library for toolboxes" Off)
 
 if ( BUILD_TOOLBOX_STATIC )
-    message("Build static toolbox libray ... ")
+    message("Building static toolbox libray")
     ADD_DEFINITIONS(-DBUILD_TOOLBOX_STATIC)
     set(LIBTYPE STATIC)
     if ( CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX )
         SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fpic")
     endif ( CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX )
 else ( BUILD_TOOLBOX_STATIC )
-    message("Build dynamic toolbox libray ... ")
+    message("Building dynamic toolbox libray")
     set(LIBTYPE SHARED)
 endif ( BUILD_TOOLBOX_STATIC )
 
+set(Boost_USE_STATIC_LIBS OFF)
 set(Boost_USE_MULTITHREADED ON)
 set(Boost_USE_STATIC_RUNTIME OFF)
+# necessary for Windows and RHEL <=6 systems
 set(Boost_NO_BOOST_CMAKE ON)
 
-# We actually only use system and thread explicitly, but they require linking in date_time and chrono...
+if(WIN32)
+  add_definitions( -DBOOST_ALL_NO_LIB )
+  add_definitions( -DBOOST_ALL_DYN_LINK )
+endif(WIN32)
+
+if(WIN32)
+  add_definitions( -DBOOST_ALL_NO_LIB )
+  add_definitions( -DBOOST_ALL_DYN_LINK )
+endif(WIN32)
+
+# We actually only use system and thread explicitly, but they require linking in date_time and chrono
 if (WIN32)
-  find_package(Boost COMPONENTS system thread date_time chrono REQUIRED)
+  find_package(Boost COMPONENTS system thread date_time chrono program_options filesystem REQUIRED)
 else(WIN32)
-  find_package(Boost COMPONENTS system thread REQUIRED)
+  find_package(Boost COMPONENTS system thread program_options filesystem REQUIRED)
 endif(WIN32)
 
-find_package(FFTW3 COMPONENTS single double REQUIRED)
-
 find_package(ACE)
-if(ACE_FOUND)
-  MESSAGE("ACE found, the streaming framework will be compiled.")
-else(ACE_FOUND)
-  MESSAGE("ACE not found. Only toolboxes and standalone applications are compiled. The streaming framework will not be compiled.")
-endif(ACE_FOUND)
+if(NOT ACE_FOUND)
+  MESSAGE("ACE not found. Only toolboxes and standalone applications are compiled. The streaming framework will NOT be compiled.")
+endif(NOT ACE_FOUND)
+
+find_package(CUDA_advanced)
+
+if (CUDA_FOUND)
+  ADD_DEFINITIONS(-DUSE_CUDA)
+  SET( GADGETRON_CUDA_FOUND_BOOL 1 )
+  include_directories( ${CUDA_INCLUDE_DIRS} )
+  #set(CUDA_VERBOSE_BUILD ON)
 
-find_package(CUDA 4.1)
-if ( CUDA_FOUND )
-    ADD_DEFINITIONS(-DUSE_CUDA)
-endif ( CUDA_FOUND )
+  # Compile kernels for compute models 1.0 and 2.0 as default for Cuda 4.1
+  # Support compute model 3.0 from Cuda 4.2 and up
+  # Support compute model 3.5 from Cuda 5 and up
+
+  OPTION(GADGETRON_CUDA_ALL_COMPUTE_MODEL "Build CUDA components for all computing models" Off)
+  if (GADGETRON_CUDA_ALL_COMPUTE_MODEL)
+    MESSAGE("Compiling CUDA components to support compute model 2.0, 3.0 and 3.5") 
+    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-gencode arch=compute_20,code=sm_20")
+    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-gencode arch=compute_30,code=sm_30")
+    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-gencode arch=compute_35,code=sm_35")
+  endif (GADGETRON_CUDA_ALL_COMPUTE_MODEL)
+
+  MESSAGE("Compiling with ${CUDA_NVCC_FLAGS}")
+
+else (CUDA_FOUND)
+  MESSAGE("CUDA not found. CUDA components will not be compiled.")
+  SET( GADGETRON_CUDA_FOUND_BOOL 0 )
+endif (CUDA_FOUND)
 
 find_package(GTest)
 #Add support for the default ubuntu package of gtest (which is not compiled
@@ -119,86 +187,133 @@ endif (NOT GTEST_FOUND)
 find_package(Armadillo)
 # check whether ILP64 MKL should is used
 if(ARMADILLO_FOUND)
+    ADD_DEFINITIONS(-DUSE_ARMADILLO)
     set(ARMADILLO_BLAS_LONG_LONG FALSE)
     if(EXISTS "${ARMADILLO_INCLUDE_DIR}/armadillo_bits/config.hpp")
         # Read and parse armadillo config.hpp to find out whether BLAS uses long long
-        file(STRINGS "${ARMADILLO_INCLUDE_DIR}/armadillo_bits/config.hpp" _armadillo_blas_long_long REGEX "// #define ARMA_BLAS_LONG_LON")
+        file(STRINGS "${ARMADILLO_INCLUDE_DIR}/armadillo_bits/config.hpp" _armadillo_blas_long_long REGEX "// #define ARMA_BLAS_LONG_LONG")
         if ( NOT _armadillo_blas_long_long )
             set(ARMADILLO_BLAS_LONG_LONG TRUE)
-            MESSAGE("Armadillo is found to use long long for BLAS calls ... ")
+            MESSAGE("Armadillo is found to use long long for BLAS calls")
         else ( NOT _armadillo_blas_long_long )
-            MESSAGE("Armadillo is found NOT to use long long for BLAS calls ... ")
-            MESSAGE("Note the ARMADILLO_BLAS_LONG_LONG must be defined in the ${ARMADILLO_INCLUDE_DIR}/armadillo_bits/config.hpp to link against MKL ILP64 interface ... ")
+            MESSAGE("Armadillo found. Note that ARMADILLO_BLAS_LONG_LONG _must_ be defined in ${ARMADILLO_INCLUDE_DIR}/armadillo_bits/config.hpp to link against the MKL ILP64 interface.")
         endif ( NOT _armadillo_blas_long_long )
         unset(_armadillo_blas_long_long)
     endif()
-endif ()
+else()
+  message("Armadillo not found. This will disable many toolboxes and gadgets.")
+endif()
 
+
+find_package(HDF5 1.8 COMPONENTS C CXX HL)
+if (HDF5_FOUND)
+message("HDF5 Found")
+else()
+message("HDF5 not found")
+endif()
+find_package(FFTW3 COMPONENTS single double)
+if (FFTW3_FOUND)
+message("FFTW3 Found")
+else()
+message("FFTW3 not found")
+endif()
+find_package(Ismrmrd)
+if (ISMRMRD_FOUND)
+message("ISMRMRD Found")
+else()
+message("ISMRMRD not found")
+endif()
 find_package(MKL)
+find_package(BLAS)
+find_package(LAPACK)
+if (LAPACK_FOUND)
+    message("LAPACK Found")
+    ADD_DEFINITIONS(-DUSE_LAPACK)
+endif (LAPACK_FOUND)
+find_package(PythonLibs)
+find_package(NumPy)
+find_package(GMatlab)
+if (MATLAB_FOUND)
+    ADD_DEFINITIONS(-DUSE_MATLAB)
+endif (MATLAB_FOUND)
 
-if (CUDA_FOUND)
-  MESSAGE("CUDA found, GPU components will be compiled.")
-  SET( GADGETRON_CUDA_FOUND_BOOL 1 )
-  include_directories( ${CUDA_INCLUDE_DIRS} )
-  #set(CUDA_VERBOSE_BUILD ON)
+message("Searching for OpenGL, GLEW, GLUT, and Qt. These libraries are only used in a single standalone application and are thus non-essential.")
+if(WIN32)
+  message("For Windows users in particular, for ease of installation we do not reccomend installing these libraries.")
+endif(WIN32)
 
-  # Compile kernels for compute models 1.0 and 2.0 as default for Cuda 4.1
-  # Support compute model 3.0 from Cuda 4.2 and up
-  # Support compute model 3.5 from Cuda 5 and up
+find_package(OpenGL)
+find_package(GLEW)
+find_package(GLUT)
+find_package(Qt4 4.6)
 
-  set(CUDA_NVCC_FLAGS1 "-gencode arch=compute_10,code=sm_10")
-  set(CUDA_NVCC_FLAGS2 "-gencode arch=compute_20,code=sm_20")
-  set(CUDA_NVCC_FLAGS3 "-gencode arch=compute_30,code=sm_30") 
-  set(CUDA_NVCC_FLAGS4 "-gencode arch=compute_35,code=sm_35")   
+add_subdirectory(toolboxes)
+add_subdirectory(apps)
 
-  if(${CUDA_VERSION} VERSION_GREATER "4.99")
-    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS1} ${CUDA_NVCC_FLAGS2} ${CUDA_NVCC_FLAGS3} ${CUDA_NVCC_FLAGS4})
-  else(${CUDA_VERSION} VERSION_GREATER "4.99")    
+if (ACE_FOUND AND ISMRMRD_FOUND AND FFTW3_FOUND AND HDF5_FOUND)
+  add_subdirectory(gadgets)
+else()
+  message("Required dependencies for gadget compilation not found (ACE, ISMRMRD, FFTW3, HDF5).")
+endif()
 
-    if(${CUDA_VERSION} VERSION_GREATER "4.1")
-      set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS1} ${CUDA_NVCC_FLAGS2} ${CUDA_NVCC_FLAGS3})
-    else(${CUDA_VERSION} VERSION_GREATER "4.1")      
+add_subdirectory(test)
+add_subdirectory(cmake)
+add_subdirectory(doc)
+add_subdirectory(chroot)
+# install dependencies for WIN32
+if (WIN32)
+    if (GADGETRON_INSTALL_DEPENDENCIES)
+        include(${CMAKE_SOURCE_DIR}/cmake/InstallWinDependencies.cmake)
+    endif (GADGETRON_INSTALL_DEPENDENCIES)
+endif (WIN32)
 
-      set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS1} ${CUDA_NVCC_FLAGS2})          
+if (UNIX)
+    if (NOT APPLE)
+        if (GADGETRON_INSTALL_DEPENDENCIES)
+            include(${CMAKE_SOURCE_DIR}/cmake/InstallLinuxDependencies.cmake)
+        endif (GADGETRON_INSTALL_DEPENDENCIES)
+    endif (NOT APPLE)
+endif (UNIX)
 
-    endif(${CUDA_VERSION} VERSION_GREATER "4.1")
-  endif(${CUDA_VERSION} VERSION_GREATER "4.99")
+#  ---   Main Library  (end) ----
 
-else (CUDA_FOUND)
-  MESSAGE("CUDA not found. CUDA components will not be compiled.")
-  SET( GADGETRON_CUDA_FOUND_BOOL 0 )
-endif (CUDA_FOUND)
+# whether to install dependencies
+OPTION(GADGETRON_PERFORM_PACKAGING "Perform gadgetron packaging" Off)
 
-find_package(Qt4 4.6)
-find_package(PythonLibs)
-find_package(NumPy)
-find_package(GLEW)
-find_package(OpenGL)
-find_package(GLUT)
-find_package(HDF5 1.8 COMPONENTS C CXX)
+if (GADGETRON_PERFORM_PACKAGING)
+    if (NOT GADGETRON_COMPONENT)
+        set(GADGETRON_COMPONENT main CACHE STRING "gadgetron component")
+    endif (NOT GADGETRON_COMPONENT)
 
-find_package(Ismrmrd)
-if(ISMRMRD_FOUND)
-  message("ISMRMRD found")
-  find_package(XSD REQUIRED)
-  find_package(XercesC REQUIRED)
-else(ISMRMRD_FOUND)
-  message("ISMRMRD not found. Only compiling toolboxes and standalone applications.")
-endif(ISMRMRD_FOUND)
+        if (GADGETRON_COMPONENT STREQUAL main)
+            message("Packaging gadgetron  ${PROJECT_NAME} ")
+        else (GADGETRON_COMPONENT STREQUAL main)
+            set(PROJECT_NAME ${PROJECT_NAME}_${GADGETRON_COMPONENT})
+            message("Packaging gadgetron component ${GADGETRON_COMPONENT} ... ")
+        endif (GADGETRON_COMPONENT STREQUAL main)
 
-find_package(GMatlab)
+    # Create package
+    string(TOLOWER ${PROJECT_NAME} PROJECT_NAME_LOWER)
 
-include_directories( ${CMAKE_SOURCE_DIR} )
+        if (GADGETRON_COMPONENT STREQUAL main)
+        include(${CMAKE_SOURCE_DIR}/cmake/gadgetron_cpack.cmake)
+        if(CPACK_GENERATOR)
+          message(STATUS "Found CPack generators: ${CPACK_GENERATOR}")
+          configure_file("${CMAKE_SOURCE_DIR}/cmake/cpack_options.cmake.in" ${GADGETRON_CPACK_CFG_FILE} @ONLY)
+          set(CPACK_PROJECT_CONFIG_FILE ${GADGETRON_CPACK_CFG_FILE})
+        endif(CPACK_GENERATOR)
+        endif (GADGETRON_COMPONENT STREQUAL main)
 
-add_subdirectory(toolboxes)
-add_subdirectory(apps)
-if (ACE_FOUND AND ISMRMRD_FOUND)
-  add_subdirectory(gadgets)
-endif (ACE_FOUND AND ISMRMRD_FOUND)
+        if (GADGETRON_COMPONENT STREQUAL web)
+        include(${CMAKE_SOURCE_DIR}/cmake/gadgetron_web_cpack.cmake)
+        if(CPACK_GENERATOR)
+          message(STATUS "Found CPack generators: ${CPACK_GENERATOR}")
+          configure_file("${CMAKE_SOURCE_DIR}/cmake/cpack_options_web.cmake.in" ${GADGETRON_WEB_CPACK_CFG_FILE} @ONLY)
+          set(CPACK_PROJECT_CONFIG_FILE ${GADGETRON_WEB_CPACK_CFG_FILE})
+        endif(CPACK_GENERATOR)
+        endif (GADGETRON_COMPONENT STREQUAL web)
 
-add_subdirectory(cmake)
-add_subdirectory(doc)
+        set(CPACK_COMPONENTS_ALL ${GADGETRON_COMPONENT})
+        include (CPack)    
+endif (GADGETRON_PERFORM_PACKAGING)
 
-if (GTEST_FOUND AND ARMADILLO_FOUND)
-  add_subdirectory(test)
-endif (GTEST_FOUND AND ARMADILLO_FOUND)
diff --git a/README b/README
index 860f423..d3b8af6 100644
--- a/README
+++ b/README
@@ -4,7 +4,7 @@ Please read LICENSE file for licensing details.
 
 Detailed installation instructions and manual is available at:
 
-http://gadgetron.sourceforge.net
+http://gadgetron.github.io
 
 -------------------------------------
 General Building Instructions (on Unix platforms)
diff --git a/apps/CMakeLists.txt b/apps/CMakeLists.txt
index ca6900c..71946d5 100644
--- a/apps/CMakeLists.txt
+++ b/apps/CMakeLists.txt
@@ -1,7 +1,6 @@
-IF (ACE_FOUND AND XSD_FOUND)
+IF (ACE_FOUND )
   add_subdirectory(gadgetron)
-  IF (ISMRMRD_FOUND)
-    add_subdirectory(clients)
-  ENDIF (ISMRMRD_FOUND)	 
-ENDIF (ACE_FOUND AND XSD_FOUND)
+ENDIF (ACE_FOUND )
+
+add_subdirectory(clients)
 add_subdirectory(standalone)
diff --git a/apps/clients/CMakeLists.txt b/apps/clients/CMakeLists.txt
index 00bf884..c61222b 100644
--- a/apps/clients/CMakeLists.txt
+++ b/apps/clients/CMakeLists.txt
@@ -1,7 +1,7 @@
-if(WIN32)
-  add_definitions(-DTIXML_USE_STL)
-endif(WIN32)
- 
-if(ISMRMRD_FOUND AND HDF5_FOUND)
-  add_subdirectory(mriclient)
-endif(ISMRMRD_FOUND AND HDF5_FOUND)
+IF(ACE_FOUND)
+  add_subdirectory(utilities)
+ENDIF()
+
+IF(ISMRMRD_FOUND AND ACE_FOUND AND HDF5_FOUND)
+  add_subdirectory(gadgetron_ismrmrd_client)
+ENDIF()
\ No newline at end of file
diff --git a/apps/clients/gadgetron_ismrmrd_client/CMakeLists.txt b/apps/clients/gadgetron_ismrmrd_client/CMakeLists.txt
new file mode 100644
index 0000000..d55d060
--- /dev/null
+++ b/apps/clients/gadgetron_ismrmrd_client/CMakeLists.txt
@@ -0,0 +1,24 @@
+find_package(Ismrmrd REQUIRED)
+
+set(Boost_NO_BOOST_CMAKE ON)
+
+if(WIN32)
+  find_package(Boost COMPONENTS program_options thread system date_time chrono REQUIRED)
+else(WIN32)
+  find_package(Boost COMPONENTS program_options thread system REQUIRED)
+endif(WIN32)
+
+if(WIN32)
+  link_directories(${Boost_LIBRARY_DIRS})
+endif(WIN32)
+
+include_directories(
+  ${Boost_INCLUDE_DIR} 
+  ${ISMRMRD_INCLUDE_DIR}
+  )
+
+add_executable(gadgetron_ismrmrd_client gadgetron_ismrmrd_client.cpp)
+
+target_link_libraries(gadgetron_ismrmrd_client ${ISMRMRD_LIBRARIES} ${Boost_LIBRARIES})
+
+install(TARGETS gadgetron_ismrmrd_client DESTINATION bin COMPONENT main)
diff --git a/apps/clients/gadgetron_ismrmrd_client/gadgetron_ismrmrd_client.cpp b/apps/clients/gadgetron_ismrmrd_client/gadgetron_ismrmrd_client.cpp
new file mode 100644
index 0000000..1f07de8
--- /dev/null
+++ b/apps/clients/gadgetron_ismrmrd_client/gadgetron_ismrmrd_client.cpp
@@ -0,0 +1,1279 @@
+/*****************************************
+*  Standalone ISMRMRD Gadgetron Client  
+*
+* Author: Michael S. Hansen
+* 
+* Dependencies: ISMRMRD and Boost
+*
+*****************************************/
+
+//TODO:
+// -Blobs (for DICOM image support)
+//  - First implementation is in, but testing needed
+// -NIFTI and Analyze output
+// -Check on potential threading problem with asio socket 
+//    - having and reading and writing thread is supposedly not safe, but seems to work here
+// -Add command line switch for controlling verbosity of output
+// -Static linking for standalone executable. 
+
+#include <boost/program_options.hpp>
+#include <boost/asio.hpp>
+#include <boost/thread/thread.hpp>
+#include <boost/thread/mutex.hpp>
+#include <boost/shared_ptr.hpp>
+
+#include <ismrmrd/ismrmrd.h>
+#include <ismrmrd/dataset.h>
+#include <ismrmrd/meta.h>
+
+#include <fstream>
+#include <streambuf>
+#include <time.h>
+#include <iomanip>
+#include <sstream>
+#include <iostream>
+#include <exception>
+#include <map>
+
+
+std::string get_date_time_string()
+{
+    time_t rawtime;
+    struct tm * timeinfo;
+    time ( &rawtime );
+    timeinfo = localtime ( &rawtime );
+
+    std::stringstream str;
+    str << timeinfo->tm_year+1900 << "-"
+        << std::setw(2) << std::setfill('0') << timeinfo->tm_mon+1 << "-"
+        << std::setw(2) << std::setfill('0') << timeinfo->tm_mday << " "
+        << std::setw(2) << std::setfill('0') << timeinfo->tm_hour << ":"
+        << std::setw(2) << std::setfill('0') << timeinfo->tm_min << ":"
+        << std::setw(2) << std::setfill('0') << timeinfo->tm_sec;
+
+    std::string ret = str.str();
+
+    return ret;
+}
+
+
+namespace po = boost::program_options;
+using boost::asio::ip::tcp;
+
+
+enum GadgetronMessageID {
+    GADGET_MESSAGE_INT_ID_MIN                             =   0,
+    GADGET_MESSAGE_CONFIG_FILE                            =   1,
+    GADGET_MESSAGE_CONFIG_SCRIPT                          =   2,
+    GADGET_MESSAGE_PARAMETER_SCRIPT                       =   3,
+    GADGET_MESSAGE_CLOSE                                  =   4,
+    GADGET_MESSAGE_INT_ID_MAX                             = 999,
+    GADGET_MESSAGE_EXT_ID_MIN                             = 1000,
+    GADGET_MESSAGE_ACQUISITION                            = 1001, /**< DEPRECATED */
+    GADGET_MESSAGE_NEW_MEASUREMENT                        = 1002, /**< DEPRECATED */
+    GADGET_MESSAGE_END_OF_SCAN                            = 1003, /**< DEPRECATED */
+    GADGET_MESSAGE_IMAGE_CPLX_FLOAT                       = 1004, /**< DEPRECATED */
+    GADGET_MESSAGE_IMAGE_REAL_FLOAT                       = 1005, /**< DEPRECATED */
+    GADGET_MESSAGE_IMAGE_REAL_USHORT                      = 1006, /**< DEPRECATED */
+    GADGET_MESSAGE_EMPTY                                  = 1007, /**< DEPRECATED */
+    GADGET_MESSAGE_ISMRMRD_ACQUISITION                    = 1008,
+    GADGET_MESSAGE_ISMRMRD_IMAGE_CPLX_FLOAT               = 1009,
+    GADGET_MESSAGE_ISMRMRD_IMAGE_REAL_FLOAT               = 1010,
+    GADGET_MESSAGE_ISMRMRD_IMAGE_REAL_USHORT              = 1011,
+    GADGET_MESSAGE_DICOM                                  = 1012,
+    GADGET_MESSAGE_CLOUD_JOB                              = 1013,
+    GADGET_MESSAGE_GADGETCLOUD_JOB                        = 1014,
+    GADGET_MESSAGE_ISMRMRD_IMAGEWITHATTRIB_CPLX_FLOAT     = 1015,
+    GADGET_MESSAGE_ISMRMRD_IMAGEWITHATTRIB_REAL_FLOAT     = 1016,
+    GADGET_MESSAGE_ISMRMRD_IMAGEWITHATTRIB_REAL_USHORT    = 1017,
+    GADGET_MESSAGE_DICOM_WITHNAME                         = 1018,
+    GADGET_MESSAGE_DEPENDENCY_QUERY                       = 1019,
+    GADGET_MESSAGE_EXT_ID_MAX                             = 4096
+};
+
+boost::mutex mtx;
+
+struct GadgetMessageIdentifier
+{
+    uint16_t id;
+};
+
+struct GadgetMessageConfigurationFile
+{
+    char configuration_file[1024];
+};
+
+struct GadgetMessageScript
+{
+    uint32_t script_length;
+};
+
+class GadgetronClientException : public std::exception
+{
+
+public:
+    GadgetronClientException(std::string msg)
+        : msg_(msg)
+    {
+
+    }
+
+    virtual ~GadgetronClientException() throw() {}
+
+    virtual const char* what() const throw()
+    {
+        return msg_.c_str();
+    }
+
+protected:
+    std::string msg_;
+};
+
+class GadgetronClientMessageReader
+{
+public:
+    virtual ~GadgetronClientMessageReader() {}
+
+    /**
+    Function must be implemented to read a specific message.
+    */
+    virtual void read(tcp::socket* s) = 0;
+
+};
+
+
+template <typename T> class GadgetronClientImageMessageReader 
+    : public GadgetronClientMessageReader
+{
+
+public:
+    GadgetronClientImageMessageReader(std::string filename, std::string groupname)
+        : file_name_(filename)
+        , group_name_(groupname)
+    {
+
+    }
+
+    ~GadgetronClientImageMessageReader() {
+    } 
+
+    virtual void read(tcp::socket* stream) 
+    {
+        //std::cout << "Receiving image." << std::endl;
+        //Read the image from the socket
+        ISMRMRD::ImageHeader h;
+        boost::asio::read(*stream, boost::asio::buffer(&h,sizeof(ISMRMRD::ImageHeader)));
+
+        // TODO check the datatype!
+        ISMRMRD::Image<T> im; 
+        im.setHead(h);
+        boost::asio::read(*stream, boost::asio::buffer(im.getDataPtr(), im.getDataSize()));
+        {
+            if (!dataset_) {
+
+                {
+                    mtx.lock();
+                    dataset_ = boost::shared_ptr<ISMRMRD::Dataset>(new ISMRMRD::Dataset(file_name_.c_str(), group_name_.c_str(), true)); // create if necessary 
+                    mtx.unlock();
+                }
+            }
+
+            std::stringstream st1;
+            st1 << "image_" << h.image_series_index;
+            std::string image_varname = st1.str();
+
+            {
+                mtx.lock();
+                // TODO should this be wrapped in a try/catch?
+                dataset_->appendImage(image_varname, im);
+                mtx.unlock();
+            }
+
+        }
+    }
+
+protected:
+    std::string group_name_;
+    std::string file_name_;
+    boost::shared_ptr<ISMRMRD::Dataset> dataset_;
+};
+
+template <typename T> class GadgetronClientAttribImageMessageReader 
+    : public GadgetronClientMessageReader
+{
+
+public:
+    GadgetronClientAttribImageMessageReader(std::string filename, std::string groupname)
+        : file_name_(filename)
+        , group_name_(groupname)
+    {
+
+    }
+
+    ~GadgetronClientAttribImageMessageReader() {
+    } 
+
+    virtual void read(tcp::socket* stream) 
+    {
+        //std::cout << "Receiving image with attributes." << std::endl;
+        //Read the image headerfrom the socket
+        ISMRMRD::ImageHeader h;
+        boost::asio::read(*stream, boost::asio::buffer(&h,sizeof(ISMRMRD::ImageHeader)));
+        ISMRMRD::Image<T> im;
+        im.setHead(h);
+
+        typedef unsigned long long size_t_type;
+
+        //Read meta attributes
+        size_t_type meta_attrib_length;
+        boost::asio::read(*stream, boost::asio::buffer(&meta_attrib_length, sizeof(size_t_type)));
+
+        std::string meta_attrib(meta_attrib_length,0);
+        boost::asio::read(*stream, boost::asio::buffer(const_cast<char*>(meta_attrib.c_str()), meta_attrib_length));
+        im.setAttributeString(meta_attrib);
+
+        //Read image data
+        boost::asio::read(*stream, boost::asio::buffer(im.getDataPtr(), im.getDataSize()));
+        {
+            if (!dataset_) {
+
+                {
+                    mtx.lock();
+                    dataset_ = boost::shared_ptr<ISMRMRD::Dataset>(new ISMRMRD::Dataset(file_name_.c_str(), group_name_.c_str(), true)); // create if necessary 
+                    mtx.unlock();
+                }
+            }
+
+            std::stringstream st1;
+            st1 << "image_" << h.image_series_index;
+            std::string image_varname = st1.str();
+
+            {
+                mtx.lock();
+                //TODO should this be wrapped in a try/catch?
+                dataset_->appendImage(image_varname, im);
+                mtx.unlock();
+            }
+        }
+    }
+
+protected:
+    std::string group_name_;
+    std::string file_name_;
+    boost::shared_ptr<ISMRMRD::Dataset> dataset_;
+};
+
+// ----------------------------------------------------------------
+// for the analyze image format
+#ifdef DT_UNKNOWN
+    #undef DT_UNKNOWN
+#endif // DT_UNKNOWN
+
+enum AnalyzeDataType
+{
+    DT_ANA_UNKNOWN=0,
+
+    DT_NONE                    =0,
+    DT_UNKNOWN                 =0,     /* what it says, dude           */
+    DT_BINARY                  =1,     /* binary (1 bit/voxel)         */
+    DT_UNSIGNED_CHAR           =2,     /* unsigned char (8 bits/voxel) */
+    DT_SIGNED_SHORT            =4,     /* signed short (16 bits/voxel) */
+    DT_UNSIGNED_SHORT          =5,
+    DT_SIGNED_INT              =8,     /* signed int (32 bits/voxel)   */
+    DT_UNSIGNED_INT            =9,
+    DT_FLOAT                  =16,     /* float (32 bits/voxel)        */
+    DT_COMPLEX                =32,     /* complex (64 bits/voxel)      */
+    DT_DOUBLE                 =64,     /* double (64 bits/voxel)       */
+    DT_RGB                   =128,     /* RGB triple (24 bits/voxel)   */
+    DT_ALL                   =255,     /* not very useful (?)          */
+
+                                /*----- another set of names for the same ---*/
+    DT_UINT8                   =2,
+    DT_INT16                   =4,
+    DT_INT32                   =8,
+    DT_FLOAT32                =16,
+    DT_COMPLEX64              =32,
+    DT_FLOAT64                =64,
+    DT_RGB24                 =128,
+
+                                /*------------------- new codes for NIFTI ---*/
+    DT_INT8                  =256,     /* signed char (8 bits)         */
+    DT_UINT16                =512,     /* unsigned short (16 bits)     */
+    DT_UINT32                =768,     /* unsigned int (32 bits)       */
+    DT_INT64                =1024,     /* long long (64 bits)          */
+    DT_UINT64               =1280,     /* unsigned long long (64 bits) */
+    DT_FLOAT128             =1536,     /* long double (128 bits)       */
+    DT_COMPLEX128           =1792,     /* double pair (128 bits)       */
+    DT_COMPLEX256           =2048,     /* long double pair (256 bits)  */
+    DT_RGBA32               =2304,     /* 4 byte RGBA (32 bits/voxel)  */
+};
+
+AnalyzeDataType getDataTypeFromRTTI(const std::string& name)
+{
+    AnalyzeDataType analyzeDT = DT_ANA_UNKNOWN;
+
+    if ( name == typeid(unsigned char).name() )
+    {
+        analyzeDT = DT_UNSIGNED_CHAR;
+    }
+
+    if ( name == typeid(short).name() )
+    {
+        analyzeDT = DT_SIGNED_SHORT;
+    }
+
+    if ( name == typeid(unsigned short).name() )
+    {
+        analyzeDT = DT_UINT16;
+    }
+
+    if ( name == typeid(int).name() )
+    {
+        analyzeDT = DT_SIGNED_INT;
+    }
+
+    if ( name == typeid(unsigned int).name() )
+    {
+        analyzeDT = DT_UINT32;
+    }
+
+    if ( name == typeid(float).name() )
+    {
+        analyzeDT = DT_FLOAT;
+    }
+
+    if ( name == typeid(double).name() )
+    {
+        analyzeDT = DT_DOUBLE;
+    }
+
+    if ( name == typeid(long double).name() )
+    {
+        analyzeDT = DT_FLOAT128;
+    }
+
+    if ( name == typeid(std::complex<float>).name() )
+    {
+        analyzeDT = DT_COMPLEX;
+    }
+
+    if ( name == typeid(std::complex<double>).name() )
+    {
+        analyzeDT = DT_COMPLEX128;
+    }
+
+    if ( name == typeid(std::complex<long double>).name() )
+    {
+        analyzeDT = DT_COMPLEX256;
+    }
+
+    return analyzeDT;
+}
+
+struct header_key
+{
+    int sizeof_hdr;
+    char data_type[10];
+    char db_name[18];
+    int extents;
+    short int session_error;
+    char regular;
+    char hkey_un0;
+};
+
+struct image_dimension
+{
+    short int dim[8];
+    short int unused8;
+    short int unused9;
+    short int unused10;
+    short int unused11;
+    short int unused12;
+    short int unused13;
+    short int unused14;
+    short int datatype;
+    short int bitpix;
+    short int dim_un0;
+    float pixdim[8];
+    float vox_offset;
+    float funused1;
+    float funused2;
+    float funused3;
+    float cal_max;
+    float cal_min;
+    float compressed;
+    float verified;
+    int glmax,glmin;
+};
+
+struct data_history
+{
+    char descrip[80];
+    char aux_file[24];
+    char orient;
+    char originator[10];
+    char generated[10];
+    char scannum[10];
+    char patient_id[10];
+    char exp_date[10];
+    char exp_time[10];
+    char hist_un0[3];
+    int views;
+    int vols_added;
+    int start_field;
+    int field_skip;
+    int omax, omin;
+    int smax, smin;
+};
+
+// Analyze75 header has 348 bytes
+struct dsr
+{
+    struct header_key hk;
+    struct image_dimension dime;
+    struct data_history hist;
+};
+
+class IOAnalyze
+{
+public:
+
+    typedef dsr HeaderType;
+
+    IOAnalyze() {}
+    virtual ~IOAnalyze() {}
+
+    template <typename T> void array2Header(const std::vector<size_t>& dim, const std::vector<float>& pixelSize, HeaderType& header)
+    {
+        try
+        {
+            // set everything to zero
+            memset(&header, 0, sizeof(dsr));
+
+            // header_key
+            header.hk.sizeof_hdr = 348;
+            size_t i;
+            for (i=0; i<10; i++ ) header.hk.data_type[i] = 0;
+            for (i=0; i<18; i++ ) header.hk.db_name[i] = 0;
+            header.hk.extents = 16384;
+            header.hk.session_error = 0;
+            header.hk.regular = 'r';
+            header.hk.hkey_un0 = 0;
+
+            // image_dimension
+            size_t NDim = dim.size();
+
+            header.dime.dim[0] = (short)(NDim);
+            header.dime.dim[1] = (short)(dim[0]);
+
+            if ( NDim > 1 )
+                header.dime.dim[2] = (short)(dim[1]);
+            else
+                header.dime.dim[2] = 1;
+
+            if ( NDim > 2 )
+                header.dime.dim[3] = (short)(dim[2]);
+            else
+                header.dime.dim[3] = 1;
+
+            if ( NDim > 3 )
+                header.dime.dim[4] = (short)(dim[3]);
+            else
+                header.dime.dim[4] = 1;
+
+            if ( NDim > 4 )
+                header.dime.dim[5] = (short)(dim[4]);
+            else
+                header.dime.dim[5] = 1;
+
+            if ( NDim > 5 )
+                header.dime.dim[6] = (short)(dim[5]);
+            else
+                header.dime.dim[6] = 1;
+
+            if ( NDim > 6 )
+                header.dime.dim[7] = (short)(dim[6]);
+            else
+                header.dime.dim[7] = 1;
+
+            if ( NDim > 7 )
+                header.dime.unused8 = (short)(dim[7]);
+            else
+                header.dime.unused8 = 1;
+
+            if ( NDim > 8 )
+                header.dime.unused9 = (short)(dim[8]);
+            else
+                header.dime.unused9 = 1;
+
+            if ( NDim > 9 )
+                header.dime.unused10 = (short)(dim[9]);
+            else
+                header.dime.unused10 = 1;
+
+            header.dime.unused11 = 0;
+            header.dime.unused12 = 0;
+            header.dime.unused13 = 0;
+            header.dime.unused14 = 0;
+
+            std::string rttiID = std::string(typeid(T).name());
+            header.dime.datatype = (short)getDataTypeFromRTTI(rttiID);
+            header.dime.bitpix = (short)(8*sizeof(T));
+            header.dime.dim_un0 = 0;
+
+            // since the NDArray does not carry the pixel spacing
+            header.dime.pixdim[0] = 0;
+            if ( pixelSize.size() > 1 )
+                header.dime.pixdim[1] = pixelSize[0];
+            if ( pixelSize.size() > 2 )
+                header.dime.pixdim[2] = pixelSize[1];
+            if ( pixelSize.size() > 3 )
+                header.dime.pixdim[3] = pixelSize[2];
+            if ( pixelSize.size() > 4 )
+                header.dime.pixdim[4] = pixelSize[3];
+            if ( pixelSize.size() > 5 )
+                header.dime.pixdim[5] = pixelSize[4];
+            if ( pixelSize.size() > 6 )
+                header.dime.pixdim[6] = pixelSize[5];
+            if ( pixelSize.size() > 7 )
+                header.dime.pixdim[7] = pixelSize[6];
+
+            header.dime.vox_offset = 0;
+            header.dime.funused1 = 0;
+            header.dime.funused2 = 0;
+            header.dime.funused3 = 0;
+            header.dime.cal_max = 0;
+            header.dime.cal_min = 0;
+            header.dime.compressed = 0;
+            header.dime.verified = 0;
+            header.dime.glmax = 0;
+            header.dime.glmin = 0;
+
+            // data history
+            for (i=0; i<80; i++ ) header.hist.descrip[i] = 0;
+            for (i=0; i<24; i++ ) header.hist.aux_file[i] = 0;
+            header.hist.orient = 0;
+            for (i=0; i<10; i++ ) header.hist.originator[i] = 0;
+            for (i=0; i<10; i++ ) header.hist.generated[i] = 0;
+            for (i=0; i<10; i++ ) header.hist.scannum[i] = 0;
+            for (i=0; i<10; i++ ) header.hist.patient_id[i] = 0;
+            for (i=0; i<10; i++ ) header.hist.exp_date[i] = 0;
+            for (i=0; i<10; i++ ) header.hist.exp_time[i] = 0;
+            for (i=0; i<3; i++ ) header.hist.hist_un0[i] = 0;
+            header.hist.views = 0;
+            header.hist.vols_added = 0;
+            header.hist.start_field = 0;
+            header.hist.field_skip = 0;
+            header.hist.omax = 0;
+            header.hist.omin = 0;
+            header.hist.smax = 0;
+            header.hist.smin = 0;
+        }
+        catch(...)
+        {
+            throw GadgetronClientException("Errors in IOAnalyze::array2Analyze(dim, header) ... ");
+        }
+    }
+};
+
+template <typename T> class GadgetronClientAnalyzeImageMessageReader 
+    : public GadgetronClientMessageReader
+{
+
+public:
+    GadgetronClientAnalyzeImageMessageReader(const std::string& prefix=std::string("Image")) : prefix_(prefix)
+    {
+
+    }
+
+    ~GadgetronClientAnalyzeImageMessageReader() {
+    } 
+
+    virtual void read(tcp::socket* stream) 
+    {
+        using namespace ISMRMRD;
+
+        //Read the image from the socket
+        ISMRMRD::ImageHeader h;
+        boost::asio::read(*stream, boost::asio::buffer(&h,sizeof(ISMRMRD::ImageHeader)));
+        ISMRMRD::Image<T> im; 
+        im.setHead(h);
+        boost::asio::read(*stream, boost::asio::buffer(im.getDataPtr(), im.getDataSize()));
+
+        std::cout << "Receiving image : " << h.image_series_index << " - " << h.image_index << std::endl;
+        {
+            // analyze header
+            std::stringstream st1;
+            st1 << prefix_ << "_" << h.image_series_index << "_" << h.image_index << ".hdr";
+            std::string head_varname = st1.str();
+
+            std::vector<size_t> dim(3);
+            dim[0] = h.matrix_size[0];
+            dim[1] = h.matrix_size[1];
+            dim[2] = h.matrix_size[2];
+
+            std::vector<float> pixelSize(3);
+            pixelSize[0] = h.field_of_view[0]/h.matrix_size[0];
+            pixelSize[1] = h.field_of_view[1]/h.matrix_size[1];
+            pixelSize[2] = h.field_of_view[2]/h.matrix_size[2];
+
+            IOAnalyze hdr;
+            dsr header;
+            hdr.array2Header<T>(dim, pixelSize, header);
+
+            std::ofstream outfileHeader;
+            outfileHeader.open (head_varname.c_str(), std::ios::out|std::ios::binary);
+            outfileHeader.write(reinterpret_cast<const char*>(&header), sizeof(dsr));
+            outfileHeader.close();
+
+            // data
+            std::stringstream st2;
+            st2 << prefix_ << "_" << h.image_series_index << "_" << h.image_index << ".img";
+            std::string img_varname = st2.str();
+
+            std::ofstream outfileData;
+            outfileData.open (img_varname.c_str(), std::ios::out|std::ios::binary);
+            outfileData.write(reinterpret_cast<const char*>(im.getDataPtr()), sizeof(T)*dim[0]*dim[1]*dim[2]);
+            outfileData.close();
+        }
+    }
+
+protected:
+
+    std::string prefix_;
+};
+
+template <typename T> class GadgetronClientAttribAnalyzeImageMessageReader 
+    : public GadgetronClientMessageReader
+{
+
+public:
+    GadgetronClientAttribAnalyzeImageMessageReader(const std::string& prefix=std::string("Image")) : prefix_(prefix)
+    {
+
+    }
+
+    ~GadgetronClientAttribAnalyzeImageMessageReader() {
+    } 
+
+    virtual void read(tcp::socket* stream) 
+    {
+        //Read the image headerfrom the socket
+        ISMRMRD::ImageHeader h;
+        boost::asio::read(*stream, boost::asio::buffer(&h,sizeof(ISMRMRD::ImageHeader)));
+        ISMRMRD::Image<T> im; 
+        im.setHead(h);
+
+        std::cout << "Receiving image with attributes : " << h.image_series_index << " - " << h.image_index << std::endl;
+
+        typedef unsigned long long size_t_type;
+
+        //Read meta attributes
+        size_t_type meta_attrib_length;
+        boost::asio::read(*stream, boost::asio::buffer(&meta_attrib_length, sizeof(size_t_type)));
+
+        std::string meta_attrib(meta_attrib_length,0);
+        boost::asio::read(*stream, boost::asio::buffer(const_cast<char*>(meta_attrib.c_str()), meta_attrib_length));
+
+        //Read image data
+        boost::asio::read(*stream, boost::asio::buffer(im.getDataPtr(), im.getDataSize()));
+        {
+            // deserialize the meta attribute
+            ISMRMRD::MetaContainer imgAttrib;
+            ISMRMRD::deserialize(meta_attrib.c_str(), imgAttrib);
+
+            size_t n;
+            size_t num = imgAttrib.length("GT_DataRole");
+
+            std::vector<std::string> dataRole;
+            if ( num == 0 )
+            {
+                dataRole.push_back("GT_Image");
+            }
+            else
+            {
+                dataRole.resize(num);
+                for ( n=0; n<num; n++ )
+                {
+                    dataRole[n] = std::string( imgAttrib.as_str("GT_DataRole", n) );
+                }
+            }
+
+            long imageNumber = imgAttrib.as_long("GT_ImageNumber", 0);
+
+            long cha, slc, e2, con, phs, rep, set, ave;
+            cha = imgAttrib.as_long("CHA",          0);
+            slc = imgAttrib.as_long("SLC",          0);
+            e2  = imgAttrib.as_long("E2",           0);
+            con = imgAttrib.as_long("CON",          0);
+            phs = imgAttrib.as_long("PHS",          0);
+            rep = imgAttrib.as_long("REP",          0);
+            set = imgAttrib.as_long("SET",          0);
+            ave = imgAttrib.as_long("AVE",          0);
+
+            std::ostringstream ostr;
+
+            if ( !prefix_.empty() )
+            {
+                ostr << prefix_ << "_";
+            }
+
+            for ( n=0; n<dataRole.size(); n++ )
+            {
+                ostr << dataRole[n] << "_";
+            }
+
+            ostr << "SLC" << slc << "_"
+                 << "E2"  << e2  << "_"
+                 << "CON" << con << "_"
+                 << "PHS" << phs << "_"
+                 << "REP" << rep << "_"
+                 << "SET" << set << "_"
+                 << "AVE" << ave << "_"
+                 << "CHA" << cha << "_" 
+                 << "ImageSeries" << h.image_series_index;
+
+            std::string filename = ostr.str();
+
+            // analyze header
+            std::stringstream st1;
+            st1 << filename << ".hdr";
+            std::string head_varname = st1.str();
+
+            std::vector<size_t> dim(3);
+            dim[0] = h.matrix_size[0];
+            dim[1] = h.matrix_size[1];
+            dim[2] = h.matrix_size[2];
+
+            std::vector<float> pixelSize(3);
+            pixelSize[0] = h.field_of_view[0]/h.matrix_size[0];
+            pixelSize[1] = h.field_of_view[1]/h.matrix_size[1];
+            pixelSize[2] = h.field_of_view[2]/h.matrix_size[2];
+
+            IOAnalyze hdr;
+            dsr header;
+            hdr.array2Header<T>(dim, pixelSize, header);
+
+            std::ofstream outfileHeader;
+            outfileHeader.open (head_varname.c_str(), std::ios::out|std::ios::binary);
+            outfileHeader.write(reinterpret_cast<const char*>(&header), sizeof(dsr));
+            outfileHeader.close();
+
+            // data
+            std::stringstream st2;
+            st2 << filename << ".img";
+            std::string img_varname = st2.str();
+
+            std::ofstream outfileData;
+            outfileData.open (img_varname.c_str(), std::ios::out|std::ios::binary);
+            outfileData.write(reinterpret_cast<const char*>(im.getDataPtr()), sizeof(T)*dim[0]*dim[1]*dim[2]);
+            outfileData.close();
+
+            // attribute
+            std::stringstream st3;
+            st3 << filename << ".attrib";
+            std::string meta_varname = st3.str();
+
+            std::ofstream outfile;
+            outfile.open (meta_varname.c_str(), std::ios::out|std::ios::binary);
+            outfile.write(meta_attrib.c_str(), meta_attrib_length);
+            outfile.close();
+        }
+    }
+
+protected:
+
+    std::string prefix_;
+};
+
+// ----------------------------------------------------------------
+
+#define MAX_BLOBS_LOG_10    6
+
+class GadgetronClientBlobMessageReader 
+    : public GadgetronClientMessageReader
+{
+
+public:
+    GadgetronClientBlobMessageReader(std::string fileprefix, std::string filesuffix)
+        : number_of_calls_(0)
+        , file_prefix(fileprefix)
+        , file_suffix(filesuffix)
+
+    {
+
+    }
+
+    virtual ~GadgetronClientBlobMessageReader() {}
+
+    virtual void read(tcp::socket* socket) 
+    {
+
+        // MUST READ 32-bits
+        uint32_t nbytes;
+        boost::asio::read(*socket, boost::asio::buffer(&nbytes,sizeof(uint32_t)));
+
+        std::vector<char> data(nbytes,0);
+        boost::asio::read(*socket, boost::asio::buffer(&data[0],nbytes));
+
+        std::stringstream filename;
+
+        // Create the filename: (prefix_%06.suffix)
+        filename << file_prefix << "_";
+        filename << std::setfill('0') << std::setw(MAX_BLOBS_LOG_10) << number_of_calls_;
+        filename << "." << file_suffix;
+
+        std::ofstream outfile;
+        outfile.open (filename.str().c_str(), std::ios::out|std::ios::binary);
+
+        std::cout << "Writing image " << filename.str() << std::endl;
+
+        if (outfile.good()) {
+            /* write 'size' bytes starting at 'data's pointer */
+            outfile.write(&data[0], nbytes);
+            outfile.close();
+            number_of_calls_++;
+        } else {
+            throw GadgetronClientException("Unable to write blob to output file\n");
+        }
+    }
+
+protected:
+    size_t number_of_calls_;
+    std::string file_prefix;
+    std::string file_suffix;
+
+};
+
+class GadgetronClientBlobAttribMessageReader 
+    : public GadgetronClientMessageReader
+{
+
+public:
+    GadgetronClientBlobAttribMessageReader(std::string fileprefix, std::string filesuffix)
+        : number_of_calls_(0)
+        , file_prefix(fileprefix)
+        , file_suffix(filesuffix)
+
+    {
+
+    }
+
+    virtual ~GadgetronClientBlobAttribMessageReader() {}
+
+    virtual void read(tcp::socket* socket) 
+    {
+
+        // MUST READ 32-bits
+        uint32_t nbytes;
+        boost::asio::read(*socket, boost::asio::buffer(&nbytes,sizeof(uint32_t)));
+
+        std::vector<char> data(nbytes,0);
+        boost::asio::read(*socket, boost::asio::buffer(&data[0],nbytes));
+
+
+        unsigned long long fileNameLen;
+        boost::asio::read(*socket, boost::asio::buffer(&fileNameLen,sizeof(unsigned long long)));
+
+        std::string filenameBuf(fileNameLen,0);
+        boost::asio::read(*socket, boost::asio::buffer(const_cast<char*>(filenameBuf.c_str()),fileNameLen));
+
+        typedef unsigned long long size_t_type;
+
+        size_t_type meta_attrib_length;
+        boost::asio::read(*socket, boost::asio::buffer(&meta_attrib_length, sizeof(size_t_type)));
+
+        std::string meta_attrib(meta_attrib_length-sizeof(size_t_type),0);
+        boost::asio::read(*socket, boost::asio::buffer(const_cast<char*>(meta_attrib.c_str()), meta_attrib_length-sizeof(size_t_type)));
+
+
+        std::string filename_image, filename_attrib;
+
+        // Create the filename: (prefix_%06.suffix)
+        if ( file_prefix.empty() )
+        {
+            filename_image =  filenameBuf + "." + file_suffix;
+            filename_attrib =  filenameBuf + "_attrib.xml";
+        }
+        else
+        {
+            filename_image = file_prefix + "_" + filenameBuf + "." + file_suffix;
+            filename_attrib = file_prefix + "_" + filenameBuf + "_attrib.xml";
+        }
+
+        std::cout << "Writing image " << filename_image.c_str() << std::endl;
+
+        std::ofstream outfile;
+        outfile.open (filename_image.c_str(), std::ios::out|std::ios::binary);
+
+        std::ofstream outfile_attrib;
+        outfile_attrib.open (filename_attrib.c_str(), std::ios::out|std::ios::binary);
+
+        if (outfile.good())
+        {
+            /* write 'size' bytes starting at 'data's pointer */
+            outfile.write(&data[0], nbytes);
+            outfile.close();
+
+            outfile_attrib.write(meta_attrib.c_str(), meta_attrib.length());
+            outfile_attrib.close();
+
+            number_of_calls_++;
+        }
+        else
+        {
+            throw GadgetronClientException("Unable to write blob to output file\n");
+        }
+    }
+
+protected:
+    size_t number_of_calls_;
+    std::string file_prefix;
+    std::string file_suffix;
+
+};
+
+class GadgetronClientConnector
+{
+
+public:
+    GadgetronClientConnector() 
+        : socket_(0)
+    {
+
+    }
+
+    virtual ~GadgetronClientConnector() 
+    {
+        if (socket_) {
+            socket_->close();
+            delete socket_;
+        }
+    }
+
+    void read_task()
+    {
+        if (!socket_) {
+            throw GadgetronClientException("Unable to create socket.");
+        }
+
+        GadgetMessageIdentifier id;
+        while (socket_->is_open()) {
+            boost::asio::read(*socket_, boost::asio::buffer(&id,sizeof(GadgetMessageIdentifier)));
+
+            if (id.id == GADGET_MESSAGE_CLOSE) {
+                break;
+            }
+
+            GadgetronClientMessageReader* r = find_reader(id.id);
+
+            if (!r) {
+                std::cout << "Message received with ID: " << id.id << std::endl;
+                throw GadgetronClientException("Unknown Message ID");
+            } else {
+                r->read(socket_);
+            }
+        }
+    }
+
+    void wait() {
+        reader_thread_.join();
+    }
+
+    void connect(std::string hostname, std::string port)
+    {
+
+
+        tcp::resolver resolver(io_service);
+        tcp::resolver::query query(tcp::v4(), hostname.c_str(), port.c_str());
+        tcp::resolver::iterator endpoint_iterator = resolver.resolve(query);
+        tcp::resolver::iterator end;
+
+        socket_ = new tcp::socket(io_service);
+
+        if (!socket_) {
+            throw GadgetronClientException("Unable to create socket.");
+        }
+
+        //TODO:
+        //For newer versions of Boost, we should use
+        //   boost::asio::connect(*socket_, iterator);
+
+        boost::system::error_code error = boost::asio::error::host_not_found;
+        while (error && endpoint_iterator != end) {
+            socket_->close();
+            socket_->connect(*endpoint_iterator++, error);
+        }
+        if (error)
+            throw GadgetronClientException("Error connecting using socket.");
+
+        reader_thread_ = boost::thread(boost::bind(&GadgetronClientConnector::read_task, this));
+
+    }
+
+    void send_gadgetron_close() { 
+        if (!socket_) {
+            throw GadgetronClientException("Invalid socket.");
+        }
+        GadgetMessageIdentifier id;
+        id.id = GADGET_MESSAGE_CLOSE;    
+        boost::asio::write(*socket_, boost::asio::buffer(&id, sizeof(GadgetMessageIdentifier)));
+    }
+
+    void send_gadgetron_configuration_file(std::string config_xml_name) {
+
+        if (!socket_) {
+            throw GadgetronClientException("Invalid socket.");
+        }
+
+        GadgetMessageIdentifier id;
+        id.id = GADGET_MESSAGE_CONFIG_FILE;
+
+        GadgetMessageConfigurationFile ini;
+        memset(&ini,0,sizeof(GadgetMessageConfigurationFile));
+        strncpy(ini.configuration_file, config_xml_name.c_str(),config_xml_name.size());
+
+        boost::asio::write(*socket_, boost::asio::buffer(&id, sizeof(GadgetMessageIdentifier)));
+        boost::asio::write(*socket_, boost::asio::buffer(&ini, sizeof(GadgetMessageConfigurationFile)));
+
+    }
+
+    void send_gadgetron_configuration_script(std::string xml_string)
+    {
+        if (!socket_) {
+            throw GadgetronClientException("Invalid socket.");
+        }
+
+        GadgetMessageIdentifier id;
+        id.id = GADGET_MESSAGE_CONFIG_SCRIPT;
+
+        GadgetMessageScript conf;
+        conf.script_length = (uint32_t)xml_string.size()+1;
+
+        boost::asio::write(*socket_, boost::asio::buffer(&id, sizeof(GadgetMessageIdentifier)));
+        boost::asio::write(*socket_, boost::asio::buffer(&conf, sizeof(GadgetMessageScript)));
+        boost::asio::write(*socket_, boost::asio::buffer(xml_string.c_str(), conf.script_length));    
+
+    }
+
+
+    void  send_gadgetron_parameters(std::string xml_string)
+    {
+        if (!socket_) {
+            throw GadgetronClientException("Invalid socket.");
+        }
+
+        GadgetMessageIdentifier id;
+        id.id = GADGET_MESSAGE_PARAMETER_SCRIPT;
+
+        GadgetMessageScript conf;
+        conf.script_length = (uint32_t)xml_string.size()+1;
+
+        boost::asio::write(*socket_, boost::asio::buffer(&id, sizeof(GadgetMessageIdentifier)));
+        boost::asio::write(*socket_, boost::asio::buffer(&conf, sizeof(GadgetMessageScript)));
+        boost::asio::write(*socket_, boost::asio::buffer(xml_string.c_str(), conf.script_length));    
+    }
+
+    void send_ismrmrd_acquisition(ISMRMRD::Acquisition& acq) 
+    {
+        if (!socket_) {
+            throw GadgetronClientException("Invalid socket.");
+        }
+
+        GadgetMessageIdentifier id;
+        id.id = GADGET_MESSAGE_ISMRMRD_ACQUISITION;;
+
+        boost::asio::write(*socket_, boost::asio::buffer(&id, sizeof(GadgetMessageIdentifier)));
+        boost::asio::write(*socket_, boost::asio::buffer(&acq.getHead(), sizeof(ISMRMRD::AcquisitionHeader)));
+
+        unsigned long trajectory_elements = acq.getHead().trajectory_dimensions*acq.getHead().number_of_samples;
+        unsigned long data_elements = acq.getHead().active_channels*acq.getHead().number_of_samples;
+
+        if (trajectory_elements) {
+            boost::asio::write(*socket_, boost::asio::buffer(&acq.getTrajPtr()[0], sizeof(float)*trajectory_elements));
+        }
+
+
+        if (data_elements) {
+            boost::asio::write(*socket_, boost::asio::buffer(&acq.getDataPtr()[0], 2*sizeof(float)*data_elements));
+        }
+    }
+
+    void register_reader(unsigned short slot, boost::shared_ptr<GadgetronClientMessageReader> r) {
+        readers_[slot] = r;
+    }
+
+protected:
+    typedef std::map<unsigned short, boost::shared_ptr<GadgetronClientMessageReader> > maptype;
+
+    GadgetronClientMessageReader* find_reader(unsigned short r)
+    {
+        GadgetronClientMessageReader* ret = 0;
+
+        maptype::iterator it = readers_.find(r);
+
+        if (it != readers_.end()) {
+            ret = it->second.get();
+        }
+
+        return ret;
+    }
+
+    boost::asio::io_service io_service;
+    tcp::socket* socket_;
+    boost::thread reader_thread_;
+    maptype readers_;
+
+
+};
+
+
+int main(int argc, char **argv)
+{
+
+    std::string host_name;
+    std::string port;
+    std::string in_filename;
+    std::string out_filename;
+    std::string hdf5_in_group;
+    std::string hdf5_out_group;
+    std::string config_file;
+    std::string config_file_local;
+    std::string config_xml_local;
+    unsigned int loops;
+    std::string out_fileformat;
+
+    po::options_description desc("Allowed options");
+
+    desc.add_options()
+        ("help,h", "produce help message")
+        ("port,p", po::value<std::string>(&port)->default_value("9002"), "Port")
+        ("address,a", po::value<std::string>(&host_name)->default_value("localhost"), "Address (hostname) of Gadgetron host")
+        ("filename,f", po::value<std::string>(&in_filename), "Input file")
+        ("outfile,o", po::value<std::string>(&out_filename)->default_value("out.h5"), "Output file")
+        ("in-group,g", po::value<std::string>(&hdf5_in_group)->default_value("/dataset"), "Input data group")
+        ("out-group,G", po::value<std::string>(&hdf5_out_group)->default_value(get_date_time_string()), "Output group name")  
+        ("config,c", po::value<std::string>(&config_file)->default_value("default.xml"), "Configuration file (remote)")
+        ("config-local,C", po::value<std::string>(&config_file_local), "Configuration file (local)")
+        ("loops,l", po::value<unsigned int>(&loops)->default_value(1), "Loops")
+        ("outformat,F", po::value<std::string>(&out_fileformat)->default_value("h5"), "Out format, h5 for hdf5 and hdr for analyze image")
+        ;
+
+    po::variables_map vm;
+    po::store(po::parse_command_line(argc, argv, desc), vm);
+    po::notify(vm);
+
+    if (vm.count("help")) {
+        std::cout << desc << std::endl;
+        return 0;
+    }
+
+    if (!vm.count("filename")) {
+        std::cout << std::endl << std::endl << "\tYou must supply a filename" << std::endl << std::endl;
+        std::cout << desc << std::endl;
+        return -1;
+    }
+
+    if (vm.count("config-local")) {
+        std::ifstream t(config_file_local.c_str());
+        if (t) {
+            //Read in the file.
+            config_xml_local = std::string((std::istreambuf_iterator<char>(t)),
+                std::istreambuf_iterator<char>());
+        } else {
+            std::cout << "Unable to read local xml configuration: " << config_file_local  << std::endl;
+            return -1;
+        }
+    }
+
+    std::cout << "Gadgetron ISMRMRD client" << std::endl;
+
+    //Let's check if the files exist:
+    std::string hdf5_xml_varname = std::string(hdf5_in_group) + std::string("/xml");
+    std::string hdf5_data_varname = std::string(hdf5_in_group) + std::string("/data");
+
+
+    //TODO:
+    // Add check to see if input file exists
+
+    //Let's open the input file
+    ISMRMRD::Dataset ismrmrd_dataset(in_filename.c_str(), hdf5_in_group.c_str(), false);
+    // Read the header
+    std::string xml_config;
+    ismrmrd_dataset.readHeader(xml_config);
+
+
+    std::cout << "  -- host            :      " << host_name << std::endl;
+    std::cout << "  -- port            :      " << port << std::endl;
+    std::cout << "  -- hdf5 file  in   :      " << in_filename << std::endl;
+    std::cout << "  -- hdf5 group in   :      " << hdf5_in_group << std::endl;
+    std::cout << "  -- conf            :      " << config_file << std::endl;
+    std::cout << "  -- loop            :      " << loops << std::endl;
+    std::cout << "  -- hdf5 file out   :      " << out_filename << std::endl;
+    std::cout << "  -- hdf5 group out  :      " << hdf5_out_group << std::endl;
+
+
+    GadgetronClientConnector con;
+
+    if ( out_fileformat == "hdr" )
+    {
+        con.register_reader(GADGET_MESSAGE_ISMRMRD_IMAGE_REAL_USHORT, boost::shared_ptr<GadgetronClientMessageReader>(new GadgetronClientAnalyzeImageMessageReader<uint16_t>(hdf5_out_group)));
+        con.register_reader(GADGET_MESSAGE_ISMRMRD_IMAGE_REAL_FLOAT, boost::shared_ptr<GadgetronClientMessageReader>(new GadgetronClientAnalyzeImageMessageReader<float>(hdf5_out_group)));
+        con.register_reader(GADGET_MESSAGE_ISMRMRD_IMAGE_CPLX_FLOAT, boost::shared_ptr<GadgetronClientMessageReader>(new GadgetronClientAnalyzeImageMessageReader< std::complex<float> >(hdf5_out_group)));
+
+        //Image with attributes 
+        con.register_reader(GADGET_MESSAGE_ISMRMRD_IMAGEWITHATTRIB_REAL_USHORT, boost::shared_ptr<GadgetronClientMessageReader>(new GadgetronClientAttribAnalyzeImageMessageReader<uint16_t>(hdf5_out_group)));
+        con.register_reader(GADGET_MESSAGE_ISMRMRD_IMAGEWITHATTRIB_REAL_FLOAT, boost::shared_ptr<GadgetronClientMessageReader>(new GadgetronClientAttribAnalyzeImageMessageReader<float>(hdf5_out_group)));
+        con.register_reader(GADGET_MESSAGE_ISMRMRD_IMAGEWITHATTRIB_CPLX_FLOAT, boost::shared_ptr<GadgetronClientMessageReader>(new GadgetronClientAttribAnalyzeImageMessageReader< std::complex<float> >(hdf5_out_group)));
+    }
+    else
+    {
+        con.register_reader(GADGET_MESSAGE_ISMRMRD_IMAGE_REAL_USHORT, boost::shared_ptr<GadgetronClientMessageReader>(new GadgetronClientImageMessageReader<uint16_t>(out_filename, hdf5_out_group)));
+        con.register_reader(GADGET_MESSAGE_ISMRMRD_IMAGE_REAL_FLOAT, boost::shared_ptr<GadgetronClientMessageReader>(new GadgetronClientImageMessageReader<float>(out_filename, hdf5_out_group)));
+        con.register_reader(GADGET_MESSAGE_ISMRMRD_IMAGE_CPLX_FLOAT, boost::shared_ptr<GadgetronClientMessageReader>(new GadgetronClientImageMessageReader< std::complex<float> >(out_filename, hdf5_out_group)));
+
+        //Image with attributes 
+        con.register_reader(GADGET_MESSAGE_ISMRMRD_IMAGEWITHATTRIB_REAL_USHORT, boost::shared_ptr<GadgetronClientMessageReader>(new GadgetronClientAttribImageMessageReader<uint16_t>(out_filename, hdf5_out_group)));
+        con.register_reader(GADGET_MESSAGE_ISMRMRD_IMAGEWITHATTRIB_REAL_FLOAT, boost::shared_ptr<GadgetronClientMessageReader>(new GadgetronClientAttribImageMessageReader<float>(out_filename, hdf5_out_group)));
+        con.register_reader(GADGET_MESSAGE_ISMRMRD_IMAGEWITHATTRIB_CPLX_FLOAT, boost::shared_ptr<GadgetronClientMessageReader>(new GadgetronClientAttribImageMessageReader< std::complex<float> >(out_filename, hdf5_out_group)));
+    }
+
+    con.register_reader(GADGET_MESSAGE_DICOM, boost::shared_ptr<GadgetronClientMessageReader>(new GadgetronClientBlobMessageReader(std::string(hdf5_out_group), std::string("dcm"))));
+    con.register_reader(GADGET_MESSAGE_DICOM_WITHNAME, boost::shared_ptr<GadgetronClientMessageReader>(new GadgetronClientBlobAttribMessageReader(std::string(), std::string("dcm"))));
+
+    try {
+        con.connect(host_name,port);
+        if (vm.count("config-local")) {
+            con.send_gadgetron_configuration_script(config_xml_local);
+        } else {
+            con.send_gadgetron_configuration_file(config_file);
+        }
+        con.send_gadgetron_parameters(xml_config);
+
+        uint32_t acquisitions = 0;
+        {
+            mtx.lock();
+            acquisitions = ismrmrd_dataset.getNumberOfAcquisitions();
+            mtx.unlock();
+        }
+
+        ISMRMRD::Acquisition acq_tmp;
+        for (uint32_t i = 0; i < acquisitions; i++) {
+            {
+                {
+                    boost::mutex::scoped_lock scoped_lock(mtx);
+                    ismrmrd_dataset.readAcquisition(i, acq_tmp);
+                }
+                con.send_ismrmrd_acquisition(acq_tmp);
+            }
+        }
+
+        con.send_gadgetron_close();
+        con.wait();
+
+    } catch (std::exception& ex) {
+        std::cout << "Error caught: " << ex.what() << std::endl;
+    }
+
+    return 0;
+}
diff --git a/apps/clients/mriclient/BlobFileWriter.h b/apps/clients/mriclient/BlobFileWriter.h
deleted file mode 100644
index 5c79a3f..0000000
--- a/apps/clients/mriclient/BlobFileWriter.h
+++ /dev/null
@@ -1,91 +0,0 @@
-#ifndef BLOB_FILE_WRITER_H
-#define BLOB_FILE_WRITER_H
-
-#include <fstream>
-#include <iomanip>
-
-#include "GadgetMessageInterface.h"
-
-namespace Gadgetron {
-
-#define MAX_BLOBS_LOG_10    6
-
-class BlobFileWriter : public GadgetMessageReader
-{
-
-    public:
-        BlobFileWriter(std::string fileprefix, std::string filesuffix)
-            : number_of_calls_(0)
-            , file_prefix(fileprefix)
-            , file_suffix(filesuffix)
-        {
-        }
-
-        virtual ~BlobFileWriter() {};
-
-        virtual ACE_Message_Block* read(ACE_SOCK_Stream* socket)
-        {
-            ssize_t recv_count = 0;
-
-            // MUST READ 32-bits
-            uint32_t nbytes;
-            if ((recv_count = socket->recv_n(&nbytes, sizeof(nbytes))) <= 0) {
-                ACE_DEBUG( (LM_ERROR, ACE_TEXT("%P, %l, BlobFileWriter, failed to read Blob Header\n")) );
-                return 0;
-            }
-
-            char *data = new char[nbytes];
-            if ((recv_count = socket->recv_n(data, nbytes)) <= 0) {
-                ACE_DEBUG( (LM_ERROR, ACE_TEXT("%P, %l, BlobFileWriter, failed to read blob from socket\n")) );
-                return 0;
-            }
-
-            if (this->process_image(nbytes, data) < 0) {
-                GADGET_DEBUG1("Failed to process image\n");
-                return 0;
-            }
-
-            delete[] data;
-
-            // The GadgetronConnector expects an ACE_Message_Block* (NOT NULL)
-            ACE_Message_Block *mb = new ACE_Message_Block();
-
-            return mb;
-        }
-
-        virtual int process_image(const unsigned int bytes, const char* data)
-        {
-            std::stringstream filename;
-
-            // Create the filename: (prefix_%06.suffix)
-            filename << file_prefix << "_";
-            filename << std::setfill('0') << std::setw(MAX_BLOBS_LOG_10) << number_of_calls_;
-            filename << "." << file_suffix;
-
-            std::ofstream outfile;
-            outfile.open (filename.str().c_str(), std::ios::out|std::ios::binary);
-
-            ACE_DEBUG( (LM_DEBUG, ACE_TEXT("Writing image %s\n"), filename.str().c_str()) );
-
-            if (outfile.good()) {
-                /* write 'size' bytes starting at 'data's pointer */
-                outfile.write(data, bytes);
-                outfile.close();
-                number_of_calls_++;
-            } else {
-                GADGET_DEBUG1("File is not good for writing\n");
-                return GADGET_FAIL;
-            }
-
-            return GADGET_OK;
-        }
-
-    protected:
-        size_t number_of_calls_;
-        std::string file_prefix;
-        std::string file_suffix;
-};
-
-} // namespace Gadgetron
-
-#endif //BLOB_FILE_WRITER_H
diff --git a/apps/clients/mriclient/CMakeLists.txt b/apps/clients/mriclient/CMakeLists.txt
deleted file mode 100644
index 0be2e20..0000000
--- a/apps/clients/mriclient/CMakeLists.txt
+++ /dev/null
@@ -1,52 +0,0 @@
-find_package(Ismrmrd REQUIRED)
-find_package(HDF5 1.8 COMPONENTS C CXX REQUIRED)
-
-set(Boost_NO_BOOST_CMAKE ON)
-
-if(WIN32)
-  find_package(Boost COMPONENTS thread system date_time chrono REQUIRED)
-else(WIN32)
-  find_package(Boost COMPONENTS thread system REQUIRED)
-endif(WIN32)
-
-if(WIN32)
-  link_directories(${Boost_LIBRARY_DIRS})
-endif(WIN32)
-
-include_directories(      
-  ${CMAKE_SOURCE_DIR}/gadgets/mri_core 
-  ${CMAKE_SOURCE_DIR}/apps/gadgetron
-  ${CMAKE_SOURCE_DIR}/toolboxes/core
-  ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu
-  ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/hostutils
-  ${CMAKE_SOURCE_DIR}/toolboxes/gadgettools
-  ${ISMRMRD_SCHEMA_DIR}
-  ${ISMRMRD_XSD_INCLUDE_DIR}
-  ${ACE_INCLUDE_DIR} 
-  ${Boost_INCLUDE_DIR} 
-  ${HDF5_CXX_INCLUDE_DIR} 
-  ${HDF5_C_INCLUDE_DIR} 
-  ${ISMRMRD_INCLUDE_DIR}
-  ${XSD_INCLUDE_DIR}
-  ${XERCESC_INCLUDE_DIR}
-  )
-
-add_executable(mriclient main.cpp)
-add_executable(gt_alive gt_alive.cpp)
-
-target_link_libraries(mriclient cpucore ${MKL_LIBRARIES})
-target_link_libraries(gt_alive cpucore gadgettools optimized ${ACE_LIBRARIES} debug ${ACE_DEBUG_LIBRARY}  ${Boost_LIBRARIES} ${ISMRMRD_LIBRARIES} ${MKL_LIBRARIES})
-
-IF(WIN32)
-    target_link_libraries(mriclient optimized ${HDF5_hdf5_LIBRARY_RELEASE} ${HDF5_hdf5_cpp_LIBRARY_RELEASE})
-    target_link_libraries(mriclient debug ${HDF5_hdf5_LIBRARY_DEBUG} ${HDF5_hdf5_cpp_LIBRARY_DEBUG})
-
-    target_link_libraries(mriclient gadgetron_mricore gadgettools optimized ${ACE_LIBRARIES} debug ${ACE_DEBUG_LIBRARY} ${ISMRMRD_LIBRARIES} ${Boost_LIBRARIES})
-ELSE (WIN32)
-    target_link_libraries(mriclient gadgettools  ${HDF5_LIBRARIES} optimized ${ACE_LIBRARIES} debug ${ACE_DEBUG_LIBRARY} ${ISMRMRD_LIBRARIES} ${Boost_LIBRARIES})
-ENDIF(WIN32)
-
-install(TARGETS mriclient gt_alive DESTINATION bin)
-install(FILES ImageWriter.h HDF5ImageWriter.h BlobFileWriter.h DESTINATION include)
-install(FILES ${ISMRMRD_LIBRARIES} DESTINATION lib)
-install(FILES isalive.xml DESTINATION config)
diff --git a/apps/clients/mriclient/HDF5ImageWriter.h b/apps/clients/mriclient/HDF5ImageWriter.h
deleted file mode 100644
index f3327de..0000000
--- a/apps/clients/mriclient/HDF5ImageWriter.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * HDF5ImageWriter.h
- *
- *  Created on: Jan 25, 2012
- *      Author: Michael S. Hansen
- */
-
-#ifndef HDF5IMAGEWRITER_H_
-#define HDF5IMAGEWRITER_H_
-
-#include "ImageWriter.h"
-
-#include <ismrmrd_hdf5.h>
-#include <sstream>
-
-namespace Gadgetron{
-template <typename T> class HDF5ImageWriter : public ImageWriter<T>
-{
-
-public:
-	HDF5ImageWriter(std::string filename, std::string groupname)
-	: ImageWriter<T>()
-	, file_name_(filename)
-	, group_name_(groupname)
-	, dataset_(filename.c_str(), groupname.c_str())
-	{
-
-	}
-
-	virtual int process_image(ISMRMRD::ImageHeader* img_head,
-			hoNDArray< T >* data)
-	{
-		try {
-			ISMRMRD::HDF5Exclusive lock; //This will ensure threadsafe access to HDF5
-			std::stringstream st1;
-			st1 << "image_" << img_head->image_series_index << ".head";
-			std::string head_varname = st1.str();
-
-			std::stringstream st2;
-			st2 << "image_" << img_head->image_series_index << ".img";
-			std::string img_varname = st2.str();
-
-			if (dataset_.appendImageHeader(*img_head, head_varname.c_str()) < 0) {
-				GADGET_DEBUG1("Failed to write image header\n");
-				return GADGET_FAIL;
-			}
-
-            std::vector<size_t> dim = *data->get_dimensions();
-            std::vector<unsigned int> dim2(dim.size());
-
-            size_t ii;
-            for ( ii=0; ii<dim.size(); ii++ )
-            {
-                dim2[ii] = dim[ii];
-            }
-
-			if (dataset_.appendArray(dim2,data->get_data_ptr(), img_varname.c_str())  < 0) {
-				GADGET_DEBUG1("Failed to write image data\n");
-				return GADGET_FAIL;
-			};
-		} catch (...) {
-			GADGET_DEBUG1("Error attempting to append images to HDF5 file\n");
-			return GADGET_FAIL;
-		}
-
-		return GADGET_OK;
-	}
-
-protected:
-	std::string group_name_;
-	std::string file_name_;
-	ISMRMRD::IsmrmrdDataset dataset_;
-};
-
-}
-#endif /* HDF5IMAGEWRITER_H_ */
diff --git a/apps/clients/mriclient/ImageWriter.h b/apps/clients/mriclient/ImageWriter.h
deleted file mode 100644
index 8341e0f..0000000
--- a/apps/clients/mriclient/ImageWriter.h
+++ /dev/null
@@ -1,113 +0,0 @@
-#ifndef IMAGEWRITER_H
-#define IMAGEWRITER_H
-
-#include <fstream>
-
-#include "GadgetImageMessageReader.h"
-
-namespace Gadgetron
-{
-
-template <typename T> class ImageWriter : public GadgetImageMessageReader<T>
-{
-
-public:
-	ImageWriter()
-	: number_of_calls_(0)
-	{}
-
-	virtual ~ImageWriter() {};
-
-	virtual ACE_Message_Block* read(ACE_SOCK_Stream* socket) 
-	{
-		// Invoke parent's read
-		ACE_Message_Block* mb = GadgetImageMessageReader<T>::read(socket);
-
-		if (!mb) {
-			GADGET_DEBUG1("Read failed in parent\n");
-			return 0;
-		}
-
-		GadgetContainerMessage<ISMRMRD::ImageHeader> * img_head_mb =
-				dynamic_cast<GadgetContainerMessage<ISMRMRD::ImageHeader> *>(mb);
-
-		if (!img_head_mb) {
-			GADGET_DEBUG1("Failed in dynamic cast\n");
-			mb->release();
-			return 0;
-		}
-
-		//GADGET_DEBUG2("Received image with %d channels\n", img_head_mb->getObjectPtr()->channels);
-
-		GadgetContainerMessage<hoNDArray< T > > * img_data_mb =
-				dynamic_cast<GadgetContainerMessage<hoNDArray< T > > *>(img_head_mb->cont());
-
-		if (!img_data_mb) {
-			GADGET_DEBUG1("Failed in dynamic cast\n");
-			mb->release();
-			return 0;
-		}
-
-		if (this->process_image(img_head_mb->getObjectPtr(), img_data_mb->getObjectPtr()) < 0) {
-			GADGET_DEBUG1("Failed to process image\n");
-			mb->release();
-			return 0;
-		}
-
-		return mb;
-	}
-
-	virtual int process_image(ISMRMRD::ImageHeader* img_head,
-			hoNDArray< T >* data)
-	{
-		ACE_DEBUG( (LM_DEBUG, ACE_TEXT("Image Writer writing image\n")) );
-
-		char filename[1024];
-
-		switch (sizeof(T)) {
-
-		case (8): //Complex float
-    			sprintf(filename, "out_%05d.cplx", (int)number_of_calls_);
-		break;
-		case (4): //Real floats
-				sprintf(filename, "out_%05d.real", (int)number_of_calls_);
-		break;
-		case (2): //Unsigned short
-				sprintf(filename, "out_%05d.short", (int)number_of_calls_);
-		break;
-		default:
-			sprintf(filename, "out_%05d.cplx", (int)number_of_calls_);
-			break;
-		}
-
-		std::ofstream outfile;
-		outfile.open (filename, std::ios::out|std::ios::binary);
-
-		if (outfile.good()) {
-			int ndim = 4;
-			int dims[4];
-			size_t elements = 1;
-			dims[0] = img_head->matrix_size[0]; elements*=dims[0];
-			dims[1] = img_head->matrix_size[1]; elements*=dims[1];
-			dims[2] = img_head->matrix_size[2]; elements*=dims[2];
-			dims[3] = img_head->channels; elements*=dims[3];
-
-			outfile.write((char*)&ndim,sizeof(int));
-			outfile.write((char*)dims,sizeof(int)*4);
-			outfile.write((char*)data->get_data_ptr(),sizeof(T)*elements);
-			outfile.close();
-			number_of_calls_++;
-		} else {
-			GADGET_DEBUG1("File is not good for writing\n");
-			return GADGET_FAIL;
-		}
-
-		return GADGET_OK;
-	}
-
-protected:
-	size_t number_of_calls_;
-};
-
-}
-#endif //IMAGE_WRITER
diff --git a/apps/clients/mriclient/gt_alive.cpp b/apps/clients/mriclient/gt_alive.cpp
deleted file mode 100644
index 145980f..0000000
--- a/apps/clients/mriclient/gt_alive.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-#include "GadgetronConnector.h"
-#include "GadgetMRIHeaders.h"
-#include "GadgetContainerMessage.h"
-#include "hoNDArray.h"
-#include "ImageWriter.h"
-#include "HDF5ImageWriter.h"
-#include "FileInfo.h"
-#include "ismrmrd_hdf5.h"
-#include "GadgetIsmrmrdReadWrite.h"
-
-#include <ace/Log_Msg.h>
-#include <ace/Get_Opt.h>
-#include <ace/OS_NS_string.h>
-
-#include <fstream>
-#include <time.h>
-#include <iomanip>
-
-using namespace Gadgetron;
-
-int ACE_TMAIN(int argc, ACE_TCHAR *argv[] )
-{
-	GadgetronConnector con;
-
-	std::string host("localhost");
-	std::string port("9002");
-
-	if (argc > 1) {
-		host = std::string(argv[1]);
-	}
-
-	if (argc > 2) {
-		port = std::string(argv[2]);
-	}
-
-	if (con.open(host,port) != 0) {
-		ACE_DEBUG((LM_ERROR, ACE_TEXT("Unable to connect to the Gadgetron host")));
-		return -1;
-	}
-
-	//Tell Gadgetron which XML configuration to run.
-	if (con.send_gadgetron_configuration_file(std::string("isalive.xml")) != 0) {
-		ACE_DEBUG((LM_ERROR, ACE_TEXT("Unable to send XML configuration to the Gadgetron host")));
-		return -1;
-	}
-
-
-	GadgetContainerMessage<GadgetMessageIdentifier>* m1 =
-			new GadgetContainerMessage<GadgetMessageIdentifier>();
-
-	m1->getObjectPtr()->id = GADGET_MESSAGE_CLOSE;
-
-	if (con.putq(m1) == -1) {
-		ACE_DEBUG((LM_ERROR, ACE_TEXT("Unable to put CLOSE package on queue")));
-		return -1;
-	}
-
-	con.wait();
-
-	return 0;
-}
diff --git a/apps/clients/mriclient/main.cpp b/apps/clients/mriclient/main.cpp
deleted file mode 100644
index 3d3b0c8..0000000
--- a/apps/clients/mriclient/main.cpp
+++ /dev/null
@@ -1,230 +0,0 @@
-#include "ace/Log_Msg.h"
-#include "ace/Get_Opt.h"
-#include "ace/OS_NS_string.h"
-
-#include "GadgetronConnector.h"
-#include "GadgetMRIHeaders.h"
-#include "GadgetContainerMessage.h"
-#include "hoNDArray.h"
-#include "ImageWriter.h"
-#include "HDF5ImageWriter.h"
-#include "FileInfo.h"
-#include "ismrmrd_hdf5.h"
-#include "GadgetIsmrmrdReadWrite.h"
-#include "BlobFileWriter.h"
-
-#include <fstream>
-#include <time.h>
-#include <iomanip>
-
-using namespace Gadgetron;
-void print_usage()
-{
-	ACE_DEBUG((LM_INFO, ACE_TEXT("Usage: \n") ));
-	ACE_DEBUG((LM_INFO, ACE_TEXT("mriclient -p <PORT>                      (default 9002)\n") ));
-	ACE_DEBUG((LM_INFO, ACE_TEXT("          -h <HOST>                      (default localhost)\n") ));
-	ACE_DEBUG((LM_INFO, ACE_TEXT("          -d <HDF5 DATA FILE>            (default ./data.h5)\n") ));
-	ACE_DEBUG((LM_INFO, ACE_TEXT("          -g <HDF5 DATA GROUP>           (default /dataset)\n") ));
-	ACE_DEBUG((LM_INFO, ACE_TEXT("          -c <GADGETRON CONFIG>          (default default.xml)\n") ));
-	ACE_DEBUG((LM_INFO, ACE_TEXT("          -l <LOOPS>                     (default 1)\n") ));
-	ACE_DEBUG((LM_INFO, ACE_TEXT("          -o <HDF5 OUT FILE>             (out.h5)\n") ));
-	ACE_DEBUG((LM_INFO, ACE_TEXT("          -G <HDF5 OUT GROUP>            (default date and time)\n") ));
-}
-
-
-std::string get_date_time_string()
-{
-	time_t rawtime;
-	struct tm * timeinfo;
-	time ( &rawtime );
-	timeinfo = localtime ( &rawtime );
-
-
-	std::stringstream str;
-	str << timeinfo->tm_year+1900 << "-"
-			<< std::setw(2) << std::setfill('0') << timeinfo->tm_mon+1
-			<< "-"
-			<< std::setw(2) << std::setfill('0') << timeinfo->tm_mday
-			<< " "
-			<< std::setw(2) << std::setfill('0') << timeinfo->tm_hour
-			<< ":"
-			<< std::setw(2) << std::setfill('0') << timeinfo->tm_min
-			<< ":"
-			<< std::setw(2) << std::setfill('0') << timeinfo->tm_sec;
-
-	std::string ret = str.str();
-
-	return ret;
-}
-
-int ACE_TMAIN(int argc, ACE_TCHAR *argv[] )
-{
-	static const ACE_TCHAR options[] = ACE_TEXT(":p:h:d:x:c:l:o:g:G:");
-
-	ACE_Get_Opt cmd_opts(argc, argv, options);
-
-	ACE_TCHAR port_no[1024];
-	ACE_OS_String::strncpy(port_no, "9002", 1024);
-
-	ACE_TCHAR hostname[1024];
-	ACE_OS_String::strncpy(hostname, "localhost", 1024);
-
-	ACE_TCHAR hdf5_in_data_file[4096];
-	ACE_OS_String::strncpy(hdf5_in_data_file, "./data.h5", 4096);
-
-	ACE_TCHAR hdf5_in_group[4096];
-	ACE_OS_String::strncpy(hdf5_in_group, "/dataset", 4096);
-
-	ACE_TCHAR config_file[1024];
-	ACE_OS_String::strncpy(config_file, "default.xml", 1024);
-
-	bool save_hdf5 = false;
-
-	ACE_TCHAR hdf5_out_file[1024];
-	ACE_OS_String::strncpy(hdf5_out_file, "./out.h5", 1024);
-
-	ACE_TCHAR hdf5_out_group[1024];
-
-	std::string date_time = get_date_time_string();
-
-	ACE_OS_String::strncpy(hdf5_out_group, date_time.c_str(), 1024);
-
-	int repetition_loops = 1;
-
-	int option;
-	while ((option = cmd_opts()) != EOF) {
-		switch (option) {
-		case 'p':
-			ACE_OS_String::strncpy(port_no, cmd_opts.opt_arg(), 1024);
-			break;
-		case 'h':
-			ACE_OS_String::strncpy(hostname, cmd_opts.opt_arg(), 1024);
-			break;
-		case 'd':
-			ACE_OS_String::strncpy(hdf5_in_data_file, cmd_opts.opt_arg(), 4096);
-			break;
-		case 'g':
-			ACE_OS_String::strncpy(hdf5_in_group, cmd_opts.opt_arg(), 4096);
-			break;
-		case 'c':
-			ACE_OS_String::strncpy(config_file, cmd_opts.opt_arg(), 1024);
-			break;
-		case 'l':
-			repetition_loops = ACE_OS::atoi(cmd_opts.opt_arg());
-			break;
-		case 'o':
-			ACE_OS_String::strncpy(hdf5_out_file, cmd_opts.opt_arg(), 1024);
-			break;
-		case 'G':
-			ACE_OS_String::strncpy(hdf5_out_group, cmd_opts.opt_arg(), 1024);
-			break;
-		case ':':
-			print_usage();
-			ACE_ERROR_RETURN((LM_ERROR, ACE_TEXT("-%c requires an argument.\n"), cmd_opts.opt_opt()),-1);
-			break;
-		default:
-			print_usage();
-			ACE_ERROR_RETURN( (LM_ERROR, ACE_TEXT("Command line parse error\n")), -1);
-			break;
-		}
-	}
-
-	ACE_DEBUG(( LM_INFO, ACE_TEXT("Gadgetron MRI Data Sender\n") ));
-
-	//Let's check if the files exist:
-	std::string hdf5_xml_varname = std::string(hdf5_in_group) + std::string("/xml");
-	std::string hdf5_data_varname = std::string(hdf5_in_group) + std::string("/data");
-
-	if (!FileInfo(std::string(hdf5_in_data_file)).exists()) {
-		ACE_DEBUG((LM_INFO, ACE_TEXT("Data file %s does not exist.\n"), hdf5_in_data_file));
-		print_usage();
-		return -1;
-	}
-
-	boost::shared_ptr<ISMRMRD::IsmrmrdDataset> ismrmrd_dataset(new ISMRMRD::IsmrmrdDataset(hdf5_in_data_file,hdf5_in_group));
-	boost::shared_ptr<std::string> xml_config = ismrmrd_dataset->readHeader();
-
-	if (repetition_loops < 1) {
-		ACE_DEBUG((LM_INFO, ACE_TEXT("Invalid number of repetition loops (%d).\n"), repetition_loops));
-		print_usage();
-		return -1;
-	}
-
-	ACE_DEBUG((LM_INFO, ACE_TEXT("  -- host            :      %s\n"), hostname));
-	ACE_DEBUG((LM_INFO, ACE_TEXT("  -- port            :      %s\n"), port_no));
-	ACE_DEBUG((LM_INFO, ACE_TEXT("  -- hdf5 file  in   :      %s\n"), hdf5_in_data_file));
-	ACE_DEBUG((LM_INFO, ACE_TEXT("  -- hdf5 group in   :      %s\n"), hdf5_in_group));
-	ACE_DEBUG((LM_INFO, ACE_TEXT("  -- conf            :      %s\n"), config_file));
-	ACE_DEBUG((LM_INFO, ACE_TEXT("  -- loop            :      %d\n"), repetition_loops));
-	ACE_DEBUG((LM_INFO, ACE_TEXT("  -- hdf5 file out   :      %s\n"), hdf5_out_file));
-	ACE_DEBUG((LM_INFO, ACE_TEXT("  -- hdf5 group out  :      %s\n"), hdf5_out_group));
-
-	for (int i = 0; i < repetition_loops; i++) {
-
-		GadgetronConnector con;
-
-		//con.register_writer(GADGET_MESSAGE_ACQUISITION, new GadgetAcquisitionMessageWriter());
-		con.register_writer(GADGET_MESSAGE_ISMRMRD_ACQUISITION, new GadgetIsmrmrdAcquisitionMessageWriter());
-		con.register_reader(GADGET_MESSAGE_ISMRMRD_IMAGE_REAL_USHORT, new HDF5ImageWriter<ACE_UINT16>(std::string(hdf5_out_file), std::string(hdf5_out_group)));
-		con.register_reader(GADGET_MESSAGE_ISMRMRD_IMAGE_REAL_FLOAT, new HDF5ImageWriter<float>(std::string(hdf5_out_file), std::string(hdf5_out_group)));
-		con.register_reader(GADGET_MESSAGE_ISMRMRD_IMAGE_CPLX_FLOAT, new HDF5ImageWriter< std::complex<float> >(std::string(hdf5_out_file), std::string(hdf5_out_group)));
-
-		con.register_reader(GADGET_MESSAGE_DICOM, new BlobFileWriter(std::string(hdf5_out_file), std::string("sdcopen")));
-
-		//Open a connection with the gadgetron
-		if (con.open(std::string(hostname),std::string(port_no)) != 0) {
-			ACE_DEBUG((LM_ERROR, ACE_TEXT("Unable to connect to the Gadgetron host")));
-			return -1;
-		}
-
-		//Tell Gadgetron which XML configuration to run.
-		if (con.send_gadgetron_configuration_file(std::string(config_file)) != 0) {
-			ACE_DEBUG((LM_ERROR, ACE_TEXT("Unable to send XML configuration to the Gadgetron host")));
-			return -1;
-		}
-
-		if (con.send_gadgetron_parameters(*xml_config) != 0) {
-			ACE_DEBUG((LM_ERROR, ACE_TEXT("Unable to send XML parameters to the Gadgetron host")));
-			return -1;
-		}
-
-		unsigned long acquisitions = ismrmrd_dataset->getNumberOfAcquisitions();//HDF5GetLengthOfFirstDimension(hdf5_in_data_file, hdf5_data_varname.c_str());
-
-		for (unsigned long int i = 0; i < acquisitions; i++) {
-			GadgetContainerMessage<ISMRMRD::Acquisition>* acq = new GadgetContainerMessage<ISMRMRD::Acquisition>();
-			{
-				ISMRMRD::HDF5Exclusive lock; //This will ensure thread-safe access to HDF5
-				boost::shared_ptr<ISMRMRD::Acquisition> acq_tmp = ismrmrd_dataset->readAcquisition(i);
-				*(acq->getObjectPtr()) = *acq_tmp; //We are copying the data into the container message
-
-			}
-
-
-			GadgetContainerMessage<GadgetMessageIdentifier>* m1 =
-					new GadgetContainerMessage<GadgetMessageIdentifier>();
-
-			m1->getObjectPtr()->id = GADGET_MESSAGE_ISMRMRD_ACQUISITION;
-
-			m1->cont(acq);
-
-			if (con.putq(m1) == -1) {
-				ACE_DEBUG((LM_ERROR, ACE_TEXT("Unable to put data package on queue")));
-				return -1;
-			}
-		}
-
-		GadgetContainerMessage<GadgetMessageIdentifier>* m1 =
-				new GadgetContainerMessage<GadgetMessageIdentifier>();
-
-		m1->getObjectPtr()->id = GADGET_MESSAGE_CLOSE;
-
-		if (con.putq(m1) == -1) {
-			ACE_DEBUG((LM_ERROR, ACE_TEXT("Unable to put CLOSE package on queue")));
-			return -1;
-		}
-
-		con.wait();
-	}
-
-	return 0;
-}
diff --git a/apps/clients/utilities/CMakeLists.txt b/apps/clients/utilities/CMakeLists.txt
new file mode 100644
index 0000000..011359b
--- /dev/null
+++ b/apps/clients/utilities/CMakeLists.txt
@@ -0,0 +1,47 @@
+set(Boost_NO_BOOST_CMAKE ON)
+
+if(WIN32)
+  find_package(Boost COMPONENTS thread system date_time chrono REQUIRED)
+else(WIN32)
+  find_package(Boost COMPONENTS thread system REQUIRED)
+endif(WIN32)
+
+if(WIN32)
+  link_directories(${Boost_LIBRARY_DIRS})
+endif(WIN32)
+
+include_directories(
+    ${CMAKE_SOURCE_DIR}/apps/gadgetron
+    ${CMAKE_BINARY_DIR}/apps/gadgetron
+    ${CMAKE_SOURCE_DIR}/gadgets/mri_core
+    ${CMAKE_SOURCE_DIR}/toolboxes/core
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/image
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/hostutils
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/math
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/image
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/algorithm
+    ${CMAKE_SOURCE_DIR}/toolboxes/gadgettools
+    ${ACE_INCLUDE_DIR} 
+    ${Boost_INCLUDE_DIR} 
+    ${ISMRMRD_INCLUDE_DIR}
+    )
+
+add_executable(gt_alive gt_alive.cpp)
+add_executable(gtdependencyquery gt_query.cpp DependencyQueryReader.h gtquery.xml)
+
+target_link_libraries(gt_alive gadgetron_toolbox_cpucore 
+                               gadgetron_toolbox_gadgettools 
+                               optimized ${ACE_LIBRARIES} debug ${ACE_DEBUG_LIBRARY} 
+                               ${Boost_LIBRARIES} 
+                               ${ISMRMRD_LIBRARIES} )
+
+target_link_libraries(gtdependencyquery gadgetron_toolbox_cpucore 
+                                        gadgetron_toolbox_gadgettools 
+                                        optimized ${ACE_LIBRARIES} debug ${ACE_DEBUG_LIBRARY} 
+                                        ${Boost_LIBRARIES} 
+                                        ${ISMRMRD_LIBRARIES} )
+
+install(TARGETS gt_alive gtdependencyquery DESTINATION bin COMPONENT main)
+install(FILES DependencyQueryReader.h DESTINATION include COMPONENT main)
+install(FILES isalive.xml gtquery.xml DESTINATION ${GADGETRON_INSTALL_CONFIG_PATH} COMPONENT main)
diff --git a/apps/clients/utilities/DependencyQueryReader.h b/apps/clients/utilities/DependencyQueryReader.h
new file mode 100644
index 0000000..02464d2
--- /dev/null
+++ b/apps/clients/utilities/DependencyQueryReader.h
@@ -0,0 +1,100 @@
+
+/** \file   DependencyQueryReader.h
+    \brief  Implement the writer to write the dependency query reults into a file
+    \author Hui Xue
+*/
+
+#pragma once
+
+#include <fstream>
+#include <iomanip>
+
+#include "GadgetMessageInterface.h"
+#include "ismrmrd/meta.h"
+
+namespace Gadgetron
+{
+
+class DependencyQueryReader : public GadgetMessageReader
+{
+    public:
+
+        DependencyQueryReader(std::string filename) : number_of_calls_(0) , filename_(filename)
+        {
+        }
+
+        virtual ~DependencyQueryReader()
+        {
+        }
+
+        virtual ACE_Message_Block* read(ACE_SOCK_Stream* socket)
+        {
+            ssize_t recv_count = 0;
+
+            typedef unsigned long long size_t_type;
+
+            size_t_type len(0);
+            if ( ( recv_count = socket->recv_n( &len, sizeof(size_t_type)) ) <= 0 )
+            {
+                ACE_DEBUG( (LM_ERROR, ACE_TEXT("%P, %l, DependencyQueryReader, failed to read query results length\n")) );
+                return 0;
+            }
+
+            char* buf = NULL;
+            try
+            {
+                buf = new char[len];
+                if ( buf == NULL )
+                {
+                    ACE_DEBUG( (LM_ERROR, ACE_TEXT("%P, %l, DependencyQueryReader, failed to allocate buffer\n")) );
+                    return 0;
+                }
+
+                memset(buf, '\0', len);
+                memcpy(buf, &len, sizeof(size_t_type));
+            }
+            catch (std::runtime_error &err)
+            {
+                GADGET_DEBUG_EXCEPTION(err,"DependencyQueryReader, failed to allocate buffer\n");
+                return 0;
+            }
+
+            if ( ( recv_count = socket->recv_n( buf, len) ) <= 0 )
+            {
+                ACE_DEBUG( (LM_ERROR, ACE_TEXT("%P, %l, DependencyQueryReader, failed to read query results\n")) );
+                delete [] buf;
+                return 0;
+            }
+
+            std::ofstream outfile;
+            outfile.open (filename_.c_str(), std::ios::out|std::ios::binary);
+
+            if (outfile.good())
+            {
+                outfile.write(buf, len);
+                outfile.close();
+                number_of_calls_++;
+            }
+            else
+            {
+                delete[] buf;
+
+                GADGET_ERROR_MSG("File " << filename_ << " is not good for writing\n");
+                return 0;
+            }
+
+            delete[] buf;
+
+            // The GadgetronConnector expects an ACE_Message_Block* (NOT NULL)
+            ACE_Message_Block *mb = new ACE_Message_Block();
+
+            return mb;
+        }
+
+    protected:
+
+        size_t number_of_calls_;
+        std::string filename_;
+};
+
+} // namespace Gadgetron
diff --git a/apps/clients/utilities/gt_alive.cpp b/apps/clients/utilities/gt_alive.cpp
new file mode 100644
index 0000000..134e924
--- /dev/null
+++ b/apps/clients/utilities/gt_alive.cpp
@@ -0,0 +1,69 @@
+#include "GadgetronConnector.h"
+#include "GadgetMRIHeaders.h"
+#include "GadgetContainerMessage.h"
+#include "FileInfo.h"
+
+#include <ace/SOCK_Acceptor.h>
+#include <ace/Addr.h>
+#include <ace/INET_Addr.h>
+#include <ace/Log_Msg.h>
+#include <ace/Get_Opt.h>
+#include <ace/OS_NS_string.h>
+
+#include <fstream>
+#include <time.h>
+#include <iomanip>
+#include <iostream>
+
+using namespace Gadgetron;
+
+int ACE_TMAIN(int argc, ACE_TCHAR *argv[] )
+{
+	GadgetronConnector con;
+
+	std::string host("localhost");
+	std::string port("9002");
+
+	ACE_TCHAR hostname[1024];
+	//We will do a little trick to figure out what the hostname would be accoring to ACE
+	ACE_SOCK_Acceptor listener (ACE_Addr::sap_any);
+	ACE_INET_Addr addr;
+	listener.get_local_addr (addr);
+	ACE_OS_String::strncpy(hostname, addr.get_host_name(), 1024);
+
+	host = std::string(hostname);
+        
+	if (argc > 1) {
+		host = std::string(argv[1]);
+	}
+
+	if (argc > 2) {
+		port = std::string(argv[2]);
+	}
+
+	if (con.open(host,port) != 0) {
+		ACE_DEBUG((LM_ERROR, ACE_TEXT("Unable to connect to the Gadgetron host")));
+		return -1;
+	}
+
+	//Tell Gadgetron which XML configuration to run.
+	if (con.send_gadgetron_configuration_file(std::string("isalive.xml")) != 0) {
+		ACE_DEBUG((LM_ERROR, ACE_TEXT("Unable to send XML configuration to the Gadgetron host")));
+		return -1;
+	}
+
+
+	GadgetContainerMessage<GadgetMessageIdentifier>* m1 =
+			new GadgetContainerMessage<GadgetMessageIdentifier>();
+
+	m1->getObjectPtr()->id = GADGET_MESSAGE_CLOSE;
+
+	if (con.putq(m1) == -1) {
+		ACE_DEBUG((LM_ERROR, ACE_TEXT("Unable to put CLOSE package on queue")));
+		return -1;
+	}
+
+	con.wait();
+
+	return 0;
+}
diff --git a/apps/clients/utilities/gt_query.cpp b/apps/clients/utilities/gt_query.cpp
new file mode 100644
index 0000000..1d6ad72
--- /dev/null
+++ b/apps/clients/utilities/gt_query.cpp
@@ -0,0 +1,98 @@
+#include "GadgetronConnector.h"
+#include "GadgetMRIHeaders.h"
+#include "GadgetContainerMessage.h"
+#include "GadgetronCommon.h"
+#include "DependencyQueryReader.h"
+
+#include <ace/Log_Msg.h>
+#include <ace/Get_Opt.h>
+#include <ace/OS_NS_string.h>
+
+#include <fstream>
+#include <time.h>
+#include <iomanip>
+#include <sstream>
+
+using namespace Gadgetron;
+
+static void usage()
+{
+    using namespace std;
+    std::ostringstream outs;
+
+    outs << "Query the gadgetron server for the stored dependency measurements" << endl;
+    outs << "gtdependencyquery   -p <PORT>                      (default 9002)" << endl;
+    outs << "                    -h <HOST>                      (default localhost)" << endl;
+    outs << "                    -o <Query out file>            (default dependency.xml)" << endl;
+    outs << std::ends; 
+
+    std::cout << outs.str();
+}
+
+int ACE_TMAIN(int argc, ACE_TCHAR *argv[] )
+{
+    GadgetronConnector con;
+
+    std::string host("localhost");
+    std::string port("9002");
+    std::string out("dependency.xml");
+
+    static const ACE_TCHAR options[] = ACE_TEXT(":p:h:o:");
+
+    ACE_Get_Opt cmd_opts(argc, argv, options);
+
+    int option;
+    while ((option = cmd_opts()) != EOF)
+    {
+        switch (option) {
+        case 'p':
+            port = std::string(cmd_opts.opt_arg());
+            break;
+        case 'h':
+            host = std::string(cmd_opts.opt_arg());
+            break;
+        case 'o':
+            out = std::string(cmd_opts.opt_arg());
+            break;
+        case ':':
+            usage();
+            ACE_ERROR_RETURN((LM_ERROR, ACE_TEXT("-%c requires an argument.\n"), cmd_opts.opt_opt()),-1);
+            break;
+        default:
+            usage();
+            ACE_ERROR_RETURN( (LM_ERROR, ACE_TEXT("Command line parse error\n")), -1);
+            break;
+        }
+    }
+
+    if (con.open(host,port) != 0)
+    {
+        ACE_DEBUG((LM_ERROR, ACE_TEXT("Unable to connect to the Gadgetron host")));
+        return -1;
+    }
+
+    // need to register a reader
+    con.register_reader(GADGET_MESSAGE_DEPENDENCY_QUERY, new DependencyQueryReader(out));
+
+    //Tell Gadgetron which XML configuration to run.
+    if (con.send_gadgetron_configuration_file(std::string("gtquery.xml")) != 0)
+    {
+        ACE_DEBUG((LM_ERROR, ACE_TEXT("Unable to send XML configuration to the Gadgetron host")));
+        return -1;
+    }
+
+    GadgetContainerMessage<GadgetMessageIdentifier>* m1 =
+            new GadgetContainerMessage<GadgetMessageIdentifier>();
+
+    m1->getObjectPtr()->id = GADGET_MESSAGE_CLOSE;
+
+    if (con.putq(m1) == -1)
+    {
+        ACE_DEBUG((LM_ERROR, ACE_TEXT("Unable to put CLOSE package on queue")));
+        return -1;
+    }
+
+    con.wait();
+
+    return 0;
+}
diff --git a/apps/clients/utilities/gtquery.xml b/apps/clients/utilities/gtquery.xml
new file mode 100644
index 0000000..86e6a3d
--- /dev/null
+++ b/apps/clients/utilities/gtquery.xml
@@ -0,0 +1,33 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+    <writer>
+        <slot>1019</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>DependencyQueryWriter</classname>
+    </writer>
+
+    <gadget>
+        <name>DependencyQuery</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>DependencyQueryGadget</classname>
+
+        <!-- If set to true, check the file creation time and delete old stored files -->
+        <property>
+            <name>clean_storage_while_query</name>
+            <value>true</value>
+        </property>
+
+        <!-- In the unit of hours, the maximal time duration allowed for a file stored
+             If clean_storage_while_query == true, this time limit will be used to 
+             determine which files to be deleted
+         -->
+        <property>
+            <name>time_limit_in_storage</name>
+            <value>24.0</value>
+        </property>
+    </gadget>
+
+</gadgetronStreamConfiguration>
diff --git a/apps/clients/mriclient/isalive.xml b/apps/clients/utilities/isalive.xml
similarity index 100%
rename from apps/clients/mriclient/isalive.xml
rename to apps/clients/utilities/isalive.xml
diff --git a/apps/clients/utilities/main.cpp b/apps/clients/utilities/main.cpp
new file mode 100644
index 0000000..3ce1dec
--- /dev/null
+++ b/apps/clients/utilities/main.cpp
@@ -0,0 +1,275 @@
+#include <ace/SOCK_Acceptor.h>
+#include <ace/Addr.h>
+#include <ace/INET_Addr.h>
+#include <ace/Log_Msg.h>
+#include <ace/Get_Opt.h>
+#include <ace/OS_NS_string.h>
+#include "GadgetronConnector.h"
+#include "GadgetMRIHeaders.h"
+#include "GadgetContainerMessage.h"
+#include "hoNDArray.h"
+#include "ImageWriter.h"
+#include "HDF5ImageWriter.h"
+#include "ImageAttribWriter.h"
+#include "HDF5ImageAttribWriter.h"
+#include "FileInfo.h"
+#include "ismrmrd/dataset.h"
+#include "GadgetIsmrmrdReadWrite.h"
+#include "BlobFileWriter.h"
+#include "BlobFileWithAttribWriter.h"
+
+#include <fstream>
+#include <time.h>
+#include <iomanip>
+
+using namespace Gadgetron;
+void print_usage()
+{
+    ACE_DEBUG((LM_INFO, ACE_TEXT("Usage: \n") ));
+    ACE_DEBUG((LM_INFO, ACE_TEXT("mriclient -p <PORT>                                                                    (default 9002)\n") ));
+    ACE_DEBUG((LM_INFO, ACE_TEXT("          -h <HOST>                                                                    (default localhost)\n") ));
+    ACE_DEBUG((LM_INFO, ACE_TEXT("          -d <HDF5 DATA FILE>                                                          (default ./data.h5)\n") ));
+    ACE_DEBUG((LM_INFO, ACE_TEXT("          -g <HDF5 DATA GROUP>                                                         (default /dataset)\n") ));
+    ACE_DEBUG((LM_INFO, ACE_TEXT("          -c <GADGETRON CONFIG>                                                        (default default.xml)\n") ));
+    ACE_DEBUG((LM_INFO, ACE_TEXT("          -l <LOOPS>                                                                   (default 1)\n") ));
+    ACE_DEBUG((LM_INFO, ACE_TEXT("          -o <HDF5 OUT FILE>                                                           (out.h5)\n") ));
+    ACE_DEBUG((LM_INFO, ACE_TEXT("          -G <HDF5 OUT GROUP>                                                          (default date and time)\n") ));
+    ACE_DEBUG((LM_INFO, ACE_TEXT("          -F <OUT FILE FORMAT, 'h5 or hdf5' or 'hdf or analyze' >                      (default 'h5' format)\n") ));
+}
+
+
+std::string get_date_time_string()
+{
+    time_t rawtime;
+    struct tm * timeinfo;
+    time ( &rawtime );
+    timeinfo = localtime ( &rawtime );
+
+    std::stringstream str;
+    str << timeinfo->tm_year+1900 << "-"
+            << std::setw(2) << std::setfill('0') << timeinfo->tm_mon+1
+            << "-"
+            << std::setw(2) << std::setfill('0') << timeinfo->tm_mday
+            << " "
+            << std::setw(2) << std::setfill('0') << timeinfo->tm_hour
+            << ":"
+            << std::setw(2) << std::setfill('0') << timeinfo->tm_min
+            << ":"
+            << std::setw(2) << std::setfill('0') << timeinfo->tm_sec;
+
+    std::string ret = str.str();
+
+    return ret;
+}
+
+ACE_Thread_Mutex mtx;
+
+int ACE_TMAIN(int argc, ACE_TCHAR *argv[] )
+{
+    static const ACE_TCHAR options[] = ACE_TEXT(":p:h:d:x:c:l:o:g:G:F:");
+
+    ACE_Get_Opt cmd_opts(argc, argv, options);
+
+    ACE_TCHAR port_no[1024];
+    ACE_OS_String::strncpy(port_no, "9002", 1024);
+
+    ACE_TCHAR hostname[1024];
+    //We will do a little trick to figure out what the hostname would be accoring to ACE
+    ACE_SOCK_Acceptor listener (ACE_Addr::sap_any);
+    ACE_INET_Addr addr;
+    listener.get_local_addr (addr);
+    ACE_OS_String::strncpy(hostname, addr.get_host_name(), 1024);
+
+    ACE_TCHAR hdf5_in_data_file[4096];
+    ACE_OS_String::strncpy(hdf5_in_data_file, "./data.h5", 4096);
+
+    ACE_TCHAR hdf5_in_group[4096];
+    ACE_OS_String::strncpy(hdf5_in_group, "/dataset", 4096);
+
+    ACE_TCHAR config_file[1024];
+    ACE_OS_String::strncpy(config_file, "default.xml", 1024);
+
+    bool save_hdf5 = false;
+
+    ACE_TCHAR hdf5_out_file[1024];
+    ACE_OS_String::strncpy(hdf5_out_file, "./out.h5", 1024);
+
+    ACE_TCHAR hdf5_out_group[1024];
+
+    std::string date_time = get_date_time_string();
+
+    ACE_OS_String::strncpy(hdf5_out_group, date_time.c_str(), 1024);
+
+    ACE_TCHAR out_format[128];
+    ACE_OS_String::strncpy(out_format, "h5", 128);
+
+    int repetition_loops = 1;
+
+    int option;
+    while ((option = cmd_opts()) != EOF) {
+        switch (option) {
+        case 'p':
+            ACE_OS_String::strncpy(port_no, cmd_opts.opt_arg(), 1024);
+            break;
+        case 'h':
+            ACE_OS_String::strncpy(hostname, cmd_opts.opt_arg(), 1024);
+            break;
+        case 'd':
+            ACE_OS_String::strncpy(hdf5_in_data_file, cmd_opts.opt_arg(), 4096);
+            break;
+        case 'g':
+            ACE_OS_String::strncpy(hdf5_in_group, cmd_opts.opt_arg(), 4096);
+            break;
+        case 'c':
+            ACE_OS_String::strncpy(config_file, cmd_opts.opt_arg(), 1024);
+            break;
+        case 'l':
+            repetition_loops = ACE_OS::atoi(cmd_opts.opt_arg());
+            break;
+        case 'o':
+            ACE_OS_String::strncpy(hdf5_out_file, cmd_opts.opt_arg(), 1024);
+            break;
+        case 'G':
+            ACE_OS_String::strncpy(hdf5_out_group, cmd_opts.opt_arg(), 1024);
+            break;
+        case 'F':
+            ACE_OS_String::strncpy(out_format, cmd_opts.opt_arg(), 128);
+            break;
+        case ':':
+            print_usage();
+            ACE_ERROR_RETURN((LM_ERROR, ACE_TEXT("-%c requires an argument.\n"), cmd_opts.opt_opt()),-1);
+            break;
+        default:
+            print_usage();
+            ACE_ERROR_RETURN( (LM_ERROR, ACE_TEXT("Command line parse error\n")), -1);
+            break;
+        }
+    }
+
+    ACE_DEBUG(( LM_INFO, ACE_TEXT("Gadgetron MRI Data Sender\n") ));
+
+    //Let's check if the files exist:
+    std::string hdf5_xml_varname = std::string(hdf5_in_group) + std::string("/xml");
+    std::string hdf5_data_varname = std::string(hdf5_in_group) + std::string("/data");
+
+    if (!FileInfo(std::string(hdf5_in_data_file)).exists()) {
+        ACE_DEBUG((LM_INFO, ACE_TEXT("Data file %s does not exist.\n"), hdf5_in_data_file));
+        print_usage();
+        return -1;
+    }
+
+    boost::shared_ptr<ISMRMRD::Dataset> ismrmrd_dataset(new ISMRMRD::Dataset(hdf5_in_data_file,hdf5_in_group));
+    std::string xml_config;
+    ismrmrd_dataset->readHeader(xml_config);
+
+    if (repetition_loops < 1) {
+        ACE_DEBUG((LM_INFO, ACE_TEXT("Invalid number of repetition loops (%d).\n"), repetition_loops));
+        print_usage();
+        return -1;
+    }
+
+    ACE_DEBUG((LM_INFO, ACE_TEXT("  -- host            :      %s\n"), hostname));
+    ACE_DEBUG((LM_INFO, ACE_TEXT("  -- port            :      %s\n"), port_no));
+    ACE_DEBUG((LM_INFO, ACE_TEXT("  -- hdf5 file  in   :      %s\n"), hdf5_in_data_file));
+    ACE_DEBUG((LM_INFO, ACE_TEXT("  -- hdf5 group in   :      %s\n"), hdf5_in_group));
+    ACE_DEBUG((LM_INFO, ACE_TEXT("  -- conf            :      %s\n"), config_file));
+    ACE_DEBUG((LM_INFO, ACE_TEXT("  -- loop            :      %d\n"), repetition_loops));
+    ACE_DEBUG((LM_INFO, ACE_TEXT("  -- hdf5 file out   :      %s\n"), hdf5_out_file));
+    ACE_DEBUG((LM_INFO, ACE_TEXT("  -- hdf5 group out  :      %s\n"), hdf5_out_group));
+    ACE_DEBUG((LM_INFO, ACE_TEXT("  -- out format      :      %s\n"), out_format));
+
+    std::string prefix;
+    std::string out_format_str(out_format);
+
+    for (int i = 0; i < repetition_loops; i++)
+    {
+        if ( repetition_loops > 1 )
+        {
+            std::ostringstream ostr;
+            ostr << "MriClient_Run" << i;
+            prefix = ostr.str();
+        }
+
+        GadgetronConnector con;
+
+        //con.register_writer(GADGET_MESSAGE_ACQUISITION, new GadgetAcquisitionMessageWriter());
+        con.register_writer(GADGET_MESSAGE_ISMRMRD_ACQUISITION, new GadgetIsmrmrdAcquisitionMessageWriter());
+        con.register_reader(GADGET_MESSAGE_ISMRMRD_IMAGE_REAL_USHORT, new HDF5ImageWriter<ACE_UINT16>(std::string(hdf5_out_file), std::string(hdf5_out_group), mtx));
+        con.register_reader(GADGET_MESSAGE_ISMRMRD_IMAGE_REAL_FLOAT, new HDF5ImageWriter<float>(std::string(hdf5_out_file), std::string(hdf5_out_group), mtx));
+        con.register_reader(GADGET_MESSAGE_ISMRMRD_IMAGE_CPLX_FLOAT, new HDF5ImageWriter< std::complex<float> >(std::string(hdf5_out_file), std::string(hdf5_out_group), mtx));
+
+        if ( (out_format_str == "analyze") || (out_format_str == "hdr") )
+        {
+            con.register_reader(GADGET_MESSAGE_ISMRMRD_IMAGEWITHATTRIB_REAL_USHORT, new AnalyzeImageAttribWriter<ACE_UINT16>(prefix));
+            con.register_reader(GADGET_MESSAGE_ISMRMRD_IMAGEWITHATTRIB_REAL_FLOAT, new AnalyzeImageAttribWriter<float>(prefix));
+            con.register_reader(GADGET_MESSAGE_ISMRMRD_IMAGEWITHATTRIB_CPLX_FLOAT, new AnalyzeComplexImageAttribWriter< std::complex<float> >(prefix));
+        }
+        else
+        {
+            con.register_reader(GADGET_MESSAGE_ISMRMRD_IMAGEWITHATTRIB_REAL_USHORT, new HDF5ImageAttribWriter<ACE_UINT16>(std::string(hdf5_out_file), std::string(hdf5_out_group), mtx, prefix));
+            con.register_reader(GADGET_MESSAGE_ISMRMRD_IMAGEWITHATTRIB_REAL_FLOAT, new HDF5ImageAttribWriter<float>(std::string(hdf5_out_file), std::string(hdf5_out_group), mtx, prefix));
+            con.register_reader(GADGET_MESSAGE_ISMRMRD_IMAGEWITHATTRIB_CPLX_FLOAT, new HDF5ImageAttribWriter< std::complex<float> >(std::string(hdf5_out_file), std::string(hdf5_out_group), mtx, prefix));
+        }
+
+        con.register_reader(GADGET_MESSAGE_DICOM, new BlobFileWriter(std::string(hdf5_out_group), std::string("dcm")));
+        con.register_reader(GADGET_MESSAGE_DICOM_WITHNAME, new BlobFileWithAttribWriter(std::string(), std::string("dcm")));
+
+        //Open a connection with the gadgetron
+        if (con.open(std::string(hostname),std::string(port_no)) != 0) {
+            ACE_DEBUG((LM_ERROR, ACE_TEXT("Unable to connect to the Gadgetron host")));
+            return -1;
+        }
+
+        //Tell Gadgetron which XML configuration to run.
+        if (con.send_gadgetron_configuration_file(std::string(config_file)) != 0) {
+            ACE_DEBUG((LM_ERROR, ACE_TEXT("Unable to send XML configuration to the Gadgetron host")));
+            return -1;
+        }
+
+        if (con.send_gadgetron_parameters(xml_config) != 0) {
+            ACE_DEBUG((LM_ERROR, ACE_TEXT("Unable to send XML parameters to the Gadgetron host")));
+            return -1;
+        }
+
+        unsigned long acquisitions = 0;
+        {
+            ACE_GUARD_RETURN(ACE_Thread_Mutex, guard, mtx, -1);
+            acquisitions = ismrmrd_dataset->getNumberOfAcquisitions();//HDF5GetLengthOfFirstDimension(hdf5_in_data_file, hdf5_data_varname.c_str());
+        }
+
+        for (unsigned long int i = 0; i < acquisitions; i++) {
+            GadgetContainerMessage<ISMRMRD::Acquisition>* acq = new GadgetContainerMessage<ISMRMRD::Acquisition>();
+
+            {
+                ACE_GUARD_RETURN(ACE_Thread_Mutex, guard, mtx, -1);
+                ismrmrd_dataset->readAcquisition(i, *acq->getObjectPtr());
+            }
+
+            GadgetContainerMessage<GadgetMessageIdentifier>* m1 =
+                    new GadgetContainerMessage<GadgetMessageIdentifier>();
+
+            m1->getObjectPtr()->id = GADGET_MESSAGE_ISMRMRD_ACQUISITION;
+
+            m1->cont(acq);
+
+            if (con.putq(m1) == -1) {
+                ACE_DEBUG((LM_ERROR, ACE_TEXT("Unable to put data package on queue")));
+                return -1;
+            }
+        }
+
+        GadgetContainerMessage<GadgetMessageIdentifier>* m1 =
+                new GadgetContainerMessage<GadgetMessageIdentifier>();
+
+        m1->getObjectPtr()->id = GADGET_MESSAGE_CLOSE;
+
+        if (con.putq(m1) == -1) {
+            ACE_DEBUG((LM_ERROR, ACE_TEXT("Unable to put CLOSE package on queue")));
+            return -1;
+        }
+
+        con.wait();
+    }
+
+    return 0;
+}
diff --git a/apps/gadgetron/CMakeLists.txt b/apps/gadgetron/CMakeLists.txt
index 2f3381e..8afaf36 100644
--- a/apps/gadgetron/CMakeLists.txt
+++ b/apps/gadgetron/CMakeLists.txt
@@ -1,60 +1,85 @@
-IF (WIN32)
-ADD_DEFINITIONS(-DTIXML_USE_STL)
-ENDIF (WIN32)
+configure_file(gadgetron_config.in gadgetron_config.h)
 
 include_directories(
-  ${Boost_INCLUDE_DIR}
-  ${ACE_INCLUDE_DIR} 
-  ${XSD_INCLUDE_DIR}
+  ${CMAKE_CURRENT_BINARY_DIR}
   ${CMAKE_SOURCE_DIR}/apps/gadgetron
+  ${CMAKE_SOURCE_DIR}/toolboxes/cloudbus
   ${CMAKE_SOURCE_DIR}/toolboxes/gadgettools
   ${CMAKE_SOURCE_DIR}/toolboxes/core
-  ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/hostutils)
-
-#Process the XSD files
-SET(XSDS schema/gadgetron.xsd)
-SET(XSD_ARGS cxx-tree --generate-serialization)
-WRAP_XSD(XSDS_SOURCES XSD_INCLUDES ${CMAKE_CURRENT_BINARY_DIR}/schema ${XSDS} OPTIONS ${XSD_ARGS})
-INCLUDE_DIRECTORIES(${XSD_INCLUDES} ${XERCESC_INCLUDE_DIR})
+  ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/hostutils
+  ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/image
+  ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/algorithm
+  ${Boost_INCLUDE_DIR}
+  ${ACE_INCLUDE_DIR}
+  )
 
 add_executable(gadgetron 
-  ${XSDS_SOURCES} 
   main.cpp 
-  #GadgetStreamController.cpp 
-  #GadgetServerAcceptor.cpp 
-  EndGadget.h
-  Gadget.h
-  GadgetContainerMessage.h
-  GadgetMessageInterface.h
-  Gadgetron.h
-  GadgetronExport.h
-  #GadgetServerAcceptor.h
-  #GadgetStreamController.h 
+  GadgetServerAcceptor.h
+  GadgetServerAcceptor.cpp 
+  GadgetStreamController.h
+  EndGadget.h 
+  Gadget.h 
+  GadgetContainerMessage.h 
+  GadgetMessageInterface.h 
+  Gadgetron.h 
+  GadgetronExport.h 
+  gadgetron_xml.h
   )
 
 target_link_libraries(gadgetron 
-  gadgettools 
+  gadgetron_gadgetbase
+  gadgetron_toolbox_gadgettools gadgetron_toolbox_cloudbus 
   optimized ${ACE_LIBRARIES} debug ${ACE_DEBUG_LIBRARY} 
-  ${XERCESC_LIBRARIES}
  )
 
-install(TARGETS gadgetron DESTINATION bin)
+add_executable(gadgetron_info
+  gadgetron_info.cpp
+)
 
-install(FILES 	
-  GadgetContainerMessage.h
+target_link_libraries(gadgetron_info 
+  gadgetron_gadgetbase
+  optimized ${ACE_LIBRARIES} debug ${ACE_DEBUG_LIBRARY} 
+ )
+
+add_library(gadgetron_gadgetbase SHARED
+  Gadget.cpp
+  GadgetStreamController.cpp
+  gadgetron_xml.cpp
+  pugixml.cpp  
+)
+
+target_link_libraries(gadgetron_gadgetbase
+  optimized ${ACE_LIBRARIES} debug ${ACE_DEBUG_LIBRARY}
+  gadgetron_toolbox_gadgettools
+)
+
+set_target_properties (gadgetron_gadgetbase PROPERTIES COMPILE_DEFINITIONS "__BUILD_GADGETRON_GADGETBASE__")
+
+install(TARGETS gadgetron gadgetron_info DESTINATION bin COMPONENT main)
+install(TARGETS gadgetron_gadgetbase DESTINATION lib COMPONENT main)
+
+install(FILES
+  gadgetbase_export.h
+  EndGadget.h
   Gadget.h
-  #GadgetServerAcceptor.h
-  #GadgetStreamController.h
-  Gadgetron.h
+  GadgetContainerMessage.h
   GadgetMessageInterface.h
-  EndGadget.h
+  Gadgetron.h
   GadgetronExport.h
-  DESTINATION include) 
+  gadgetron_paths.h
+  gadgetron_xml.h
+  GadgetServerAcceptor.h
+  GadgetStreamController.h
+  ${CMAKE_CURRENT_BINARY_DIR}/gadgetron_config.h
+  DESTINATION include COMPONENT main) 
 
-install(FILES 	
+install(FILES 
   gadgetron.xml.example
-  DESTINATION config)
+  DESTINATION ${GADGETRON_INSTALL_CONFIG_PATH} COMPONENT main)
 
-install(FILES 	
+install(FILES 
   schema/gadgetron.xsd
-  DESTINATION schema)
+  DESTINATION schema COMPONENT main)
+
+add_subdirectory(webapp)
diff --git a/apps/gadgetron/Gadget.cpp b/apps/gadgetron/Gadget.cpp
new file mode 100644
index 0000000..91b1476
--- /dev/null
+++ b/apps/gadgetron/Gadget.cpp
@@ -0,0 +1,38 @@
+#include "Gadget.h"
+#include "GadgetStreamController.h"
+
+namespace Gadgetron
+{
+  boost::shared_ptr<std::string> Gadget::get_string_value(const char* name, unsigned int recursive) {
+    const unsigned int recursive_limit = 10;
+    if (recursive > recursive_limit) {
+      GADGET_DEBUG2("Recursive level %d exceeds maimum limit (%d) in Gadget::get_string_value(...)\n", recursive, recursive_limit);
+      return boost::shared_ptr<std::string>(new std::string(""));
+    }
+
+    std::map<std::string,std::string>::iterator it;
+    parameter_mutex_.acquire();
+    it = parameters_.find(std::string(name));
+    parameter_mutex_.release();
+    if (it != parameters_.end()) {
+      //If string contains an @ sign, we should look for this parameter on another gadget
+      size_t at_pos = it->second.find('@');
+      if (at_pos != std::string::npos) {
+	//There was an add sign, which means look for that parameter on another gadget
+	std::string parm = it->second.substr(0,at_pos);
+	std::string gadget = it->second.substr(at_pos+1);
+	  
+	Gadget* ref_gadget = this->controller_->find_gadget(gadget.c_str());
+
+	if (ref_gadget) {
+	  recursive++;
+	  return ref_gadget->get_string_value(parm.c_str(), recursive);
+	}
+      } else {
+	return boost::shared_ptr<std::string>(new std::string(it->second));
+      }
+    }
+      
+    return boost::shared_ptr<std::string>(new std::string(""));
+  }
+}
diff --git a/apps/gadgetron/Gadget.h b/apps/gadgetron/Gadget.h
index acbd028..75b797d 100644
--- a/apps/gadgetron/Gadget.h
+++ b/apps/gadgetron/Gadget.h
@@ -14,16 +14,20 @@
 #include <string>
 #include <boost/shared_ptr.hpp>
 
+#include "gadgetbase_export.h"
 #include "GadgetContainerMessage.h"
 #include "GadgetronExport.h"
 #include "Gadgetron.h"
+#include "gadgetron_config.h"
+
 #include <stdexcept>
 
 namespace Gadgetron{
 
+    //Forward declarations
     class GadgetStreamController;
 
-    class Gadget : public ACE_Task<ACE_MT_SYNCH>
+    class EXPORTGADGETBASE Gadget : public ACE_Task<ACE_MT_SYNCH>
     {
 
     public:
@@ -39,13 +43,19 @@ namespace Gadgetron{
             , desired_threads_(1)
             , pass_on_undesired_data_(false)
             , controller_(0)
+	    , parameter_mutex_("GadgetParameterMutex")
         {
-            ACE_TRACE(( ACE_TEXT("Gadget::Gadget") ));
+	  gadgetron_version_ = std::string(GADGETRON_VERSION_STRING) + std::string(" (") + 
+	    std::string(GADGETRON_GIT_SHA1_HASH) + std::string(")");
+
+	  ACE_TRACE(( ACE_TEXT("Gadget::Gadget") ));
         }
 
         virtual ~Gadget()
         {
+	  if (this->module()) {
             GADGET_DEBUG2("Shutting down Gadget (%s)\n", this->module()->name());
+	  }
         }
 
 
@@ -192,12 +202,14 @@ namespace Gadgetron{
         }
 
         int set_parameter(const char* name, const char* val, bool trigger = true) {
-            boost::shared_ptr<std::string> old_value = get_string_value(name);
+	  boost::shared_ptr<std::string> old_value = get_string_value(name);
 
+	    parameter_mutex_.acquire();
             parameters_[std::string(name)] = std::string(val);
+	    parameter_mutex_.release();
 
             if (trigger) {
-                return parameter_changed(std::string(name), std::string(val), *old_value);
+	      return parameter_changed(std::string(name), std::string(val), *old_value);
             }
 
             return 0;
@@ -215,17 +227,7 @@ namespace Gadgetron{
             return ACE_OS::atof(get_string_value(name)->c_str());
         }
 
-        boost::shared_ptr<std::string> get_string_value(const char* name) {
-            std::map<std::string,std::string>::iterator it;
-
-            it = parameters_.find(std::string(name));
-
-            if (it != parameters_.end()) {
-                return boost::shared_ptr<std::string>(new std::string(it->second));
-            }
-
-            return boost::shared_ptr<std::string>(new std::string(""));
-        }
+	boost::shared_ptr<std::string> get_string_value(const char* name, unsigned int recursive = 0);
 
         /**
         *  This trigger function is called whenever set_parameter is called with the trigger = true;
@@ -235,6 +237,10 @@ namespace Gadgetron{
             return GADGET_OK;
         }
 
+	const char* get_gadgetron_version() {
+	  return gadgetron_version_.c_str();
+	}
+
     protected:
         virtual int next_step(ACE_Message_Block *m)
         {
@@ -250,11 +256,13 @@ namespace Gadgetron{
         unsigned int desired_threads_;
         bool pass_on_undesired_data_;
         GadgetStreamController* controller_;
-
+	ACE_Thread_Mutex parameter_mutex_;
     private:
         std::map<std::string, std::string> parameters_;
+	std::string gadgetron_version_;
     };
 
+
     template <class P1> class Gadget1 : public Gadget
     {
 
@@ -370,13 +378,13 @@ namespace Gadgetron{
 
     };
 
+/* Macros for handling dyamic linking */
+// #define GADGET_DECLARE(GADGET) GADGETRON_LOADABLE_DECLARE(GADGET)
+// #define GADGET_FACTORY_DECLARE(GADGET) GADGETRON_LOADABLE_FACTORY_DECLARE(Gadget,GADGET)
 
-    /* Macros for handling dyamic linking */
-#define GADGET_DECLARE(GADGET)			\
-    GADGETRON_LOADABLE_DECLARE(GADGET)
+#define GADGET_DECLARE(GADGET) 
+#define GADGET_FACTORY_DECLARE(GADGET) GADGETRON_LOADABLE_FACTORY_DECLARE(Gadget,GADGET)
 
-#define GADGET_FACTORY_DECLARE(GADGET)			\
-    GADGETRON_LOADABLE_FACTORY_DECLARE(Gadget,GADGET)
 }
 
 #endif //GADGET_H
diff --git a/apps/gadgetron/GadgetContainerMessage.h b/apps/gadgetron/GadgetContainerMessage.h
index 2cef40e..0aaf578 100644
--- a/apps/gadgetron/GadgetContainerMessage.h
+++ b/apps/gadgetron/GadgetContainerMessage.h
@@ -4,6 +4,7 @@
 
 #include <ace/Message_Block.h>
 #include <string>
+#include "Gadgetron.h"
 
 namespace Gadgetron{
 /**
@@ -26,6 +27,13 @@ class GadgetContainerMessageBase : public ACE_Message_Block
     set_flags(CONTAINER_MESSAGE_BLOCK); //Mark this message block as a container, so that we know it is safe to type cast it.
   }
 
+  GadgetContainerMessageBase(ACE_Data_Block* d)
+    : base(d)
+  {
+    set_flags(CONTAINER_MESSAGE_BLOCK);
+  }
+  
+
 #ifdef WIN32
   std::string getTypeID() { return type_magic_id_; }
   template <class T> static std::string magic_number_for_type() { return std::string(typeid(T).name()); } 
@@ -74,19 +82,47 @@ public:
     type_magic_id_ = magic_number_for_type<T>(); 
   }
 
-  virtual ~GadgetContainerMessage() 
+  GadgetContainerMessage(ACE_Data_Block* d)
+    : base(d)
   {
-    //In case the object contained in this object has allocated memory on the heap, it must be destroyed
-    if (content_) content_->~T();
+    type_magic_id_ = magic_number_for_type<T>();
+    content_ = reinterpret_cast<T*>(this->rd_ptr());
+  }
 
+  virtual ~GadgetContainerMessage() 
+  {
     //ACE_Message_Block will take care of deallocating space for the object itself;
   }
 
+  virtual ACE_Message_Block* release()
+  {    
+    //In case the object contained in this object has allocated memory on the heap, it must be destroyed
+    if (this->reference_count() <= 1) {
+      if (content_) content_->~T();
+    } 
+    if (cont_) {
+      cont_->release();
+      cont_ = 0;
+    }
+    return ACE_Message_Block::release();
+  }
+
   T* getObjectPtr() 
   {
     return content_;
   }
 
+  virtual GadgetContainerMessage<T>* duplicate() 
+  {
+    GadgetContainerMessage<T>* nb = new GadgetContainerMessage<T>(this->data_block()->duplicate());
+    nb->rd_ptr (this->rd_ptr_);
+    nb->wr_ptr (this->wr_ptr_);
+    if (this->cont_) {
+      nb->cont_ = this->cont_->duplicate();
+    }
+    return nb;
+  }
+
 protected:
   T* content_;
 }; 
diff --git a/apps/gadgetron/GadgetMessageInterface.h b/apps/gadgetron/GadgetMessageInterface.h
index 39c8276..9d7d591 100644
--- a/apps/gadgetron/GadgetMessageInterface.h
+++ b/apps/gadgetron/GadgetMessageInterface.h
@@ -1,16 +1,15 @@
 #ifndef GADGETMESSAGEINTERFACE_H
 #define GADGETMESSAGEINTERFACE_H
 
-#include "ace/SOCK_Stream.h"
-#include <ace/Basic_Types.h>
-
-#include <map>
-
 #include "GadgetContainerMessage.h"
 #include "Gadgetron.h"
 #include "GadgetronExport.h"
 #include "Gadget.h"
 
+#include <ace/SOCK_Stream.h>
+#include <ace/Basic_Types.h>
+#include <map>
+
 namespace Gadgetron
 {
 
@@ -218,14 +217,18 @@ class GadgetMessageScriptReader : public GadgetMessageReader
 
 /* Macros for handling dyamic linking */
 
-#define GADGETRON_READER_DECLARE(READER) \
-  GADGETRON_LOADABLE_DECLARE(READER)
+//#define GADGETRON_READER_DECLARE(READER) \
+//  GADGETRON_LOADABLE_DECLARE(READER)
+
+#define GADGETRON_READER_DECLARE(READER) 
 
 #define GADGETRON_READER_FACTORY_DECLARE(READER)	\
   GADGETRON_LOADABLE_FACTORY_DECLARE(GadgetMessageReader, READER)
 
-#define GADGETRON_WRITER_DECLARE(WRITER) \
-  GADGETRON_LOADABLE_DECLARE(WRITER)
+//#define GADGETRON_WRITER_DECLARE(WRITER) \
+//  GADGETRON_LOADABLE_DECLARE(WRITER)
+
+#define GADGETRON_WRITER_DECLARE(WRITER) 
 
 #define GADGETRON_WRITER_FACTORY_DECLARE(WRITER)	\
   GADGETRON_LOADABLE_FACTORY_DECLARE(GadgetMessageWriter, WRITER)
diff --git a/apps/gadgetron/GadgetServerAcceptor.cpp b/apps/gadgetron/GadgetServerAcceptor.cpp
index 48270b9..a5ba8ea 100644
--- a/apps/gadgetron/GadgetServerAcceptor.cpp
+++ b/apps/gadgetron/GadgetServerAcceptor.cpp
@@ -19,15 +19,14 @@ int GadgetServerAcceptor::open (const ACE_INET_Addr &listen_addr)
     (this, ACE_Event_Handler::ACCEPT_MASK);
 }
 
-
-
-
 int GadgetServerAcceptor::handle_input (ACE_HANDLE)
 {
   GadgetStreamController *controller;
   ACE_NEW_RETURN (controller, GadgetStreamController, -1);
   auto_ptr<GadgetStreamController> p (controller);
 
+  controller->set_global_gadget_parameters(global_gadget_parameters_);
+
   if (this->acceptor_.accept (controller->peer ()) == -1)
     ACE_ERROR_RETURN ((LM_ERROR,
                        ACE_TEXT ("(%P|%t) %p\n"),
diff --git a/apps/gadgetron/GadgetServerAcceptor.h b/apps/gadgetron/GadgetServerAcceptor.h
index 4b74ea5..3df5bca 100644
--- a/apps/gadgetron/GadgetServerAcceptor.h
+++ b/apps/gadgetron/GadgetServerAcceptor.h
@@ -3,6 +3,8 @@
 
 #include "ace/SOCK_Acceptor.h"
 #include "ace/Reactor.h"
+#include <string>
+#include <map>
 
 namespace Gadgetron{
 class GadgetServerAcceptor : public ACE_Event_Handler
@@ -19,6 +21,9 @@ public:
 
   virtual int handle_close (ACE_HANDLE handle,
                             ACE_Reactor_Mask close_mask);
+
+  std::map<std::string, std::string> global_gadget_parameters_;
+
 protected:
   ACE_SOCK_Acceptor acceptor_;
 };
diff --git a/apps/gadgetron/GadgetStreamController.cpp b/apps/gadgetron/GadgetStreamController.cpp
index f194b45..87e8c74 100644
--- a/apps/gadgetron/GadgetStreamController.cpp
+++ b/apps/gadgetron/GadgetStreamController.cpp
@@ -7,16 +7,40 @@
 
 #include "GadgetStreamController.h"
 #include "GadgetContainerMessage.h"
+#include "GadgetMessageInterface.h"
+#include "GadgetronConnector.h"
 #include "Gadget.h"
 #include "EndGadget.h"
+#include "gadgetron_config.h"
 
-#include "gadgetron.hxx" //Auto generated class representation of gadgetron XML configuration
+#include "gadgetron_xml.h"
 #include "url_encode.h"
 
 #include <complex>
 #include <fstream>
 
 using namespace Gadgetron;
+
+/*
+namespace Gadgetron {
+//This function is needed to avoid some linking problems.
+  extern "C" {
+    Gadget* find_gadget_in_controller(GadgetStreamController* c, const char* g)
+    {
+      return c->find_gadget(g);
+    }
+  }
+}
+*/
+
+GadgetStreamController::GadgetStreamController()
+  : stream_configured_(false)
+  , notifier_ (0, this, ACE_Event_Handler::WRITE_MASK)
+  , writer_task_(&this->peer())
+{
+  gadgetron_home_ = get_gadgetron_home();
+}
+
 int GadgetStreamController::open (void)
 {
 	//We will set up the controllers message queue such that when a packet is enqueued write will be triggered.
@@ -208,12 +232,15 @@ Gadget* GadgetStreamController::find_gadget(std::string gadget_name)
 	return 0;
 }
 
-int GadgetStreamController::configure_from_file(std::string config_xml_filename)
+void GadgetStreamController::set_global_gadget_parameters(const std::map<std::string, std::string>& globalGadgetPara)
 {
+    global_gadget_parameters_ = globalGadgetPara;
+}
 
-	char * gadgetron_home = ACE_OS::getenv("GADGETRON_HOME");
+int GadgetStreamController::configure_from_file(std::string config_xml_filename)
+{
 	ACE_TCHAR config_file_name[4096];
-	ACE_OS::sprintf(config_file_name, "%s/config/%s", gadgetron_home, config_xml_filename.c_str());
+	ACE_OS::sprintf(config_file_name, "%s/%s/%s", gadgetron_home_.c_str(), GADGETRON_CONFIG_PATH, config_xml_filename.c_str());
 
 	GADGET_DEBUG2("Running configuration: %s\n", config_file_name);
 
@@ -245,142 +272,148 @@ int GadgetStreamController::configure_from_file(std::string config_xml_filename)
 int GadgetStreamController::configure(std::string config_xml_string)
 {
 
-	char * gadgetron_home = ACE_OS::getenv("GADGETRON_HOME");
-	ACE_TCHAR schema_file_name[4096];
-	ACE_OS::sprintf(schema_file_name, "%s/schema/gadgetron.xsd", gadgetron_home);
-
-	std::string tmp(schema_file_name);
-	tmp = url_encode(tmp);
-	ACE_OS_String::strncpy(schema_file_name,tmp.c_str(), 4096);
-
-
-	xml_schema::properties props;
-	props.schema_location (
-	  "http://gadgetron.sf.net/gadgetron",
-	  std::string (schema_file_name));
-
-	std::istringstream str_stream(config_xml_string, std::stringstream::in);
-	std::auto_ptr<gadgetron::gadgetronStreamConfiguration> cfg;
-
-	ACE_TCHAR port_no[1024];
-	try {
-		cfg = std::auto_ptr<gadgetron::gadgetronStreamConfiguration>(gadgetron::gadgetronStreamConfiguration_(str_stream,0,props));
-		//cfg = std::auto_ptr<gadgetron::gadgetronStreamConfiguration>(gadgetron::gadgetronStreamConfiguration_(std::string(config_file_name)));
-	}  catch (const xml_schema::exception& e) {
-		GADGET_DEBUG2("Failed to parse Gadget Stream Configuration: %s\n", e.what());
-		return GADGET_FAIL;
-	}
-
-	GADGET_DEBUG2("Found %d readers\n", cfg->reader().size());
-	GADGET_DEBUG2("Found %d writers\n", cfg->writer().size());
-	GADGET_DEBUG2("Found %d gadgets\n", cfg->gadget().size());
-
-	for (gadgetron::gadgetronStreamConfiguration::reader_sequence::iterator i (cfg->reader().begin ()); i != cfg->reader().end(); ++i) {
-		long slot = 0;
-		std::string dllname("");
-		std::string classname("");
-
-		slot = i->slot();
-		dllname = i->dll();
-		classname = i->classname();
-
-		GADGET_DEBUG1("--Found reader declaration\n");
-		GADGET_DEBUG2("  Reader dll: %s\n", dllname.c_str());
-		GADGET_DEBUG2("  Reader class: %s\n", classname.c_str());
-		GADGET_DEBUG2("  Reader slot: %d\n", slot);
-
-		GadgetMessageReader* r =
-				load_dll_component<GadgetMessageReader>(dllname.c_str(),
+  GadgetronXML::GadgetStreamConfiguration cfg;
+  try {
+    deserialize(config_xml_string.c_str(), cfg);  
+  }  catch (const std::runtime_error& e) {
+    GADGET_DEBUG2("Failed to parse Gadget Stream Configuration: %s\n", e.what());
+    return GADGET_FAIL;
+  }
+
+  GADGET_DEBUG2("Found %d readers\n", cfg.reader.size());
+  GADGET_DEBUG2("Found %d writers\n", cfg.writer.size());
+  GADGET_DEBUG2("Found %d gadgets\n", cfg.gadget.size());
+  
+  //Configuration of readers
+  for (std::vector<GadgetronXML::Reader>::iterator i = cfg.reader.begin();
+       i != cfg.reader.end();
+       ++i) 
+    {
+
+      long slot = 0;
+      std::string dllname("");
+      std::string classname("");
+
+      slot = i->slot;
+      dllname = i->dll;
+      classname = i->classname;
+
+      GADGET_DEBUG1("--Found reader declaration\n");
+      GADGET_DEBUG2("  Reader dll: %s\n", dllname.c_str());
+      GADGET_DEBUG2("  Reader class: %s\n", classname.c_str());
+      GADGET_DEBUG2("  Reader slot: %d\n", slot);
+
+      GadgetMessageReader* r =
+	load_dll_component<GadgetMessageReader>(dllname.c_str(),
 						classname.c_str());
-
-		if (!r) {
-			GADGET_DEBUG1("Failed to load GadgetMessageReader from DLL\n");
-			return GADGET_FAIL;
-		}
-
-		readers_.insert(slot, r);
-
-	}
-	//Configuration of readers end
-
-
-	//Configuration of writers
-	for (gadgetron::gadgetronStreamConfiguration::writer_sequence::iterator i (cfg->writer().begin ()); i != cfg->writer().end(); ++i) {
-		long slot = 0;
-		std::string dllname("");
-		std::string classname("");
-
-		slot = i->slot();
-		dllname = i->dll();
-		classname = i->classname();
-
-		GADGET_DEBUG1("--Found writer declaration\n");
-		GADGET_DEBUG2("  Reader dll: %s\n", dllname.c_str());
-		GADGET_DEBUG2("  Reader class: %s\n", classname.c_str());
-		GADGET_DEBUG2("  Reader slot: %d\n", slot);
-
-		GadgetMessageWriter* w =
-				load_dll_component<GadgetMessageWriter>(dllname.c_str(),
+      
+      if (!r) {
+	GADGET_DEBUG1("Failed to load GadgetMessageReader from DLL\n");
+	return GADGET_FAIL;
+      }
+      
+      readers_.insert(slot, r);
+      
+    }	
+  //Configuration of readers end
+
+
+  //Configuration of writers
+  for (std::vector<GadgetronXML::Writer>::iterator i = cfg.writer.begin();
+       i != cfg.writer.end();
+       ++i) 
+    {
+      long slot = 0;
+      std::string dllname("");
+      std::string classname("");
+      
+      slot = i->slot;
+      dllname = i->dll;
+      classname = i->classname;
+
+      GADGET_DEBUG1("--Found writer declaration\n");
+      GADGET_DEBUG2("  Reader dll: %s\n", dllname.c_str());
+      GADGET_DEBUG2("  Reader class: %s\n", classname.c_str());
+      GADGET_DEBUG2("  Reader slot: %d\n", slot);
+      
+      GadgetMessageWriter* w =
+	load_dll_component<GadgetMessageWriter>(dllname.c_str(),
 						classname.c_str());
-
-		if (!w) {
-			GADGET_DEBUG1("Failed to load GadgetMessageWriter from DLL\n");
-			return GADGET_FAIL;
-		}
-
-		writer_task_.register_writer(slot, w);
-	}
-	//Configuration of writers end
-
-	//Let's configure the stream
-	GADGET_DEBUG2("Processing %d gadgets in reverse order\n",cfg->gadget().size());
-	for (gadgetron::gadgetronStreamConfiguration::gadget_sequence::reverse_iterator i (cfg->gadget().rbegin ()); i != cfg->gadget().rend(); ++i) {
-		std::string gadgetname("");
-		std::string dllname("");
-		std::string classname("");
-
-		gadgetname = i->name();
-		dllname = i->dll();
-		classname = i->classname();
-
-		GADGET_DEBUG1("--Found gadget declaration\n");
-		GADGET_DEBUG2("  Gadget Name: %s\n", gadgetname.c_str());
-		GADGET_DEBUG2("  Gadget dll: %s\n", dllname.c_str());
-		GADGET_DEBUG2("  Gadget class: %s\n", classname.c_str());
-
-		GadgetModule* m = create_gadget_module(dllname.c_str(),
-				classname.c_str(),
-				gadgetname.c_str());
-
-		if (!m) {
-			GADGET_DEBUG2("Failed to create GadgetModule from %s:%s\n",
-					classname.c_str(),
-					dllname.c_str());
-			return GADGET_FAIL;
-		}
-
-		Gadget* g = dynamic_cast<Gadget*>(m->writer());//Get the gadget out of the module
-
-		GADGET_DEBUG2("  Gadget parameters: %d\n", i->property().size());
-		for (gadgetron::gadget::property_sequence::iterator p (i->property().begin()); p != i->property().end(); ++p) {
-			std::string pname(p->name());
-			std::string pval(p->value());
-			GADGET_DEBUG2("Setting parameter %s = %s\n", pname.c_str(),pval.c_str());
-			g->set_parameter(pname.c_str(),pval.c_str(),false);
-		}
-
-		if (stream_.push(m) < 0) {
-			GADGET_DEBUG2("Failed to push Gadget %s onto stream\n", gadgetname.c_str());
-			delete m;
-			return GADGET_FAIL;
-		}
-
+      
+      if (!w) {
+	GADGET_DEBUG1("Failed to load GadgetMessageWriter from DLL\n");
+	return GADGET_FAIL;
+      }
+      
+      writer_task_.register_writer(slot, w);
+    }
+  //Configuration of writers end
+
+  //Let's configure the stream
+  GADGET_DEBUG2("Processing %d gadgets in reverse order\n",cfg.gadget.size());
+
+  for (std::vector<GadgetronXML::Gadget>::reverse_iterator i = cfg.gadget.rbegin();
+       i != cfg.gadget.rend();
+       ++i) 
+    {
+      std::string gadgetname("");
+      std::string dllname("");
+      std::string classname("");
+
+      gadgetname = i->name;
+      dllname = i->dll;
+      classname = i->classname;
+
+      GADGET_DEBUG1("--Found gadget declaration\n");
+      GADGET_DEBUG2("  Gadget Name: %s\n", gadgetname.c_str());
+      GADGET_DEBUG2("  Gadget dll: %s\n", dllname.c_str());
+      GADGET_DEBUG2("  Gadget class: %s\n", classname.c_str());
+
+      GadgetModule* m = create_gadget_module(dllname.c_str(),
+					     classname.c_str(),
+					     gadgetname.c_str());
+      
+      if (!m) {
+	GADGET_DEBUG2("Failed to create GadgetModule from %s:%s\n",
+		      classname.c_str(),
+		      dllname.c_str());
+	return GADGET_FAIL;
+      }
+      
+      Gadget* g = dynamic_cast<Gadget*>(m->writer());//Get the gadget out of the module
+      
+      GADGET_DEBUG2("  Gadget parameters: %d\n", i->property.size());
+      for (std::vector<GadgetronXML::GadgetronParameter>::iterator p = i->property.begin();
+	   p != i->property.end();
+	   ++p)
+	{
+	  std::string pname(p->name);
+	  std::string pval(p->value);
+	  GADGET_DEBUG2("Setting parameter %s = %s\n", pname.c_str(),pval.c_str());
+	  g->set_parameter(pname.c_str(),pval.c_str(),false);
 	}
-
-	GADGET_DEBUG1("Gadget Stream configured\n");
-	stream_configured_ = true;
-
-	return GADGET_OK;
+      
+        // set the global gadget parameters for every gadget
+      std::map<std::string, std::string>::const_iterator iter;
+      for ( iter=global_gadget_parameters_.begin(); iter!=global_gadget_parameters_.end(); iter++ )
+        {
+	  std::string key = iter->first;
+	  std::string value = iter->second;
+	  g->set_parameter(key.c_str(), value.c_str(), false);
+        }
+
+      if (stream_.push(m) < 0) {
+	GADGET_DEBUG2("Failed to push Gadget %s onto stream\n", gadgetname.c_str());
+	delete m;
+	return GADGET_FAIL;
+      }
+      
+    }
+
+  GADGET_DEBUG1("Gadget Stream configured\n");
+  stream_configured_ = true;
+
+  return GADGET_OK;
 }
 
 GadgetModule * GadgetStreamController::create_gadget_module(const char* DLL, 
diff --git a/apps/gadgetron/GadgetStreamController.h b/apps/gadgetron/GadgetStreamController.h
index 473e732..3850091 100644
--- a/apps/gadgetron/GadgetStreamController.h
+++ b/apps/gadgetron/GadgetStreamController.h
@@ -12,25 +12,20 @@
 #include <complex>
 #include <vector>
 
+#include "gadgetbase_export.h"
 #include "Gadgetron.h"
-#include "Gadget.h"
-#include "GadgetMessageInterface.h"
+#include "gadgetron_paths.h"
 #include "GadgetronConnector.h"
 
 typedef ACE_Module<ACE_MT_SYNCH> GadgetModule;
 
 namespace Gadgetron{
 
-
-class GadgetStreamController 
+class EXPORTGADGETBASE GadgetStreamController 
 : public ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_MT_SYNCH>
 {
 public:
-  GadgetStreamController()
-    : stream_configured_(false)
-    , notifier_ (0, this, ACE_Event_Handler::WRITE_MASK)
-  	, writer_task_(&this->peer())
-    { }
+  GadgetStreamController();
 
   virtual ~GadgetStreamController()
     { 
@@ -56,6 +51,8 @@ public:
 
   virtual Gadget* find_gadget(std::string gadget_name);
 
+  void set_global_gadget_parameters(const std::map<std::string, std::string>& globalGadgetPara);
+
 private:
   ACE_Stream<ACE_MT_SYNCH> stream_;
   bool stream_configured_;
@@ -67,12 +64,16 @@ private:
   
   std::vector<ACE_DLL_Handle*> dll_handles_;
 
+  std::map<std::string, std::string> global_gadget_parameters_;
+
   virtual int configure(std::string config_xml_string);
   virtual int configure_from_file(std::string config_xml_filename);
 
   virtual GadgetModule * create_gadget_module(const char* DLL, const char* gadget, const char* gadget_module_name);
 
   template <class T>  T* load_dll_component(const char* DLL, const char* component_name);
+  
+  std::string gadgetron_home_;
 
 };
 }
diff --git a/apps/gadgetron/GadgetronExport.h b/apps/gadgetron/GadgetronExport.h
index d2c1c44..f9b2ff0 100644
--- a/apps/gadgetron/GadgetronExport.h
+++ b/apps/gadgetron/GadgetronExport.h
@@ -13,10 +13,10 @@
 #endif
 
 //In header file add this macro
-#define GADGETRON_LOADABLE_DECLARE(COMPONENT)                   \
-  void *operator new (size_t bytes);                            \
-  void operator delete (void *ptr);                             \
-  void *operator new(size_t s, void * p) { return p; }
+//#define GADGETRON_LOADABLE_DECLARE(COMPONENT)                   \
+//  void *operator new (size_t bytes);                            \
+//  void operator delete (void *ptr);                             \
+//  void *operator new(size_t s, void * p) { return p; }
 
 //In CPP file add this macro add the end
 #define GADGETRON_LOADABLE_FACTORY_DECLARE(CLASS, COMPONENT)	\
@@ -25,14 +25,14 @@ CLASS * make_##COMPONENT (void)       				\
 {							       	\
   return new COMPONENT;                                         \
 }                                                               \
-void * COMPONENT ::operator new (size_t bytes)                  \
+/*void * COMPONENT ::operator new (size_t bytes)                  \
 {                                                               \
   return ::new char[bytes];                                     \
 }                                                               \
 void COMPONENT ::operator delete (void *ptr)                    \
 {                                                               \
   delete [] static_cast <char *> (ptr);                         \
-} 
+}*/ 
 
 
 #endif
diff --git a/apps/gadgetron/gadgetbase_export.h b/apps/gadgetron/gadgetbase_export.h
new file mode 100644
index 0000000..d90ecb9
--- /dev/null
+++ b/apps/gadgetron/gadgetbase_export.h
@@ -0,0 +1,16 @@
+#ifndef GADGETBASE_EXPORT_H_
+#define GADGETBASE_EXPORT_H_
+
+
+#if defined (WIN32)
+#if defined (__BUILD_GADGETRON_GADGETBASE__) || defined (gadgetron_gadgetbase_EXPORTS)
+#define EXPORTGADGETBASE __declspec(dllexport)
+#else
+#define EXPORTGADGETBASE __declspec(dllimport)
+#endif
+#else
+#define EXPORTGADGETBASE
+#endif
+
+
+#endif /* GADGETBASE_EXPORT_H_ */
diff --git a/apps/gadgetron/gadgetron.xml.example b/apps/gadgetron/gadgetron.xml.example
index 6b6f5f5..fdc20c1 100644
--- a/apps/gadgetron/gadgetron.xml.example
+++ b/apps/gadgetron/gadgetron.xml.example
@@ -4,6 +4,11 @@
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
         
   <port>9002</port>
+
+  <cloudBus>
+    <multiCastAddress>224.2.2.9</multiCastAddress>
+    <port>4148</port>
+  </cloudBus>
   
 </gadgetronConfiguration>
-  
\ No newline at end of file
+  
diff --git a/apps/gadgetron/gadgetron_config.in b/apps/gadgetron/gadgetron_config.in
new file mode 100644
index 0000000..b288f3d
--- /dev/null
+++ b/apps/gadgetron/gadgetron_config.in
@@ -0,0 +1,12 @@
+#ifndef GADGETRON_CONFIG_H
+#define GADGETRON_CONFIG_H
+
+#define GADGETRON_VERSION_MAJOR @GADGETRON_VERSION_MAJOR@
+#define GADGETRON_VERSION_MINOR @GADGETRON_VERSION_MINOR@
+#define GADGETRON_VERSION_PATCH @GADGETRON_VERSION_PATCH@
+#define GADGETRON_VERSION_STRING "@GADGETRON_VERSION_STRING@"
+#define GADGETRON_CONFIG_PATH "@GADGETRON_INSTALL_CONFIG_PATH@"
+#define GADGETRON_PYTHON_PATH "@GADGETRON_INSTALL_PYTHON_MODULE_PATH@"
+#define GADGETRON_GIT_SHA1_HASH "@GADGETRON_GIT_SHA1@"
+
+#endif //GADGETRON_CONFIG_H
diff --git a/apps/gadgetron/gadgetron_info.cpp b/apps/gadgetron/gadgetron_info.cpp
new file mode 100644
index 0000000..325d5a0
--- /dev/null
+++ b/apps/gadgetron/gadgetron_info.cpp
@@ -0,0 +1,88 @@
+//#include "ace/OS_NS_stdlib.h"
+#include "ace/OS_NS_string.h"
+//#include "ace/OS_NS_stdio.h"
+#include "ace/DLL.h"
+#include "ace/DLL_Manager.h"
+//#include "ace/OS_NS_netdb.h"
+
+#include "gadgetron_config.h"
+#include "Gadget.h"
+
+#include <iostream>
+
+
+using namespace Gadgetron;
+
+int main(int argc, char** argv)
+{
+  std::cout << "Gadgetron Version Info" << std::endl;
+  std::cout << "  -- Version  : " << GADGETRON_VERSION_STRING << std::endl;
+  std::cout << "  -- Git SHA1 : " << GADGETRON_GIT_SHA1_HASH << std::endl << std::endl;
+  
+  if (argc == 1) {
+    return 0;
+  }
+
+  if ((argc == 2) || (argc > 3)) {
+    std::cout << "Invalid number of arguments (" << argc -1 << ")." << std::endl;
+    std::cout << "Usage (gadget library info):  " << argc << std::endl;
+    std::cout << " -- gadgetron_info <SHARED LIB> <GADGET_INFO>" << std::endl;
+    return -1; 
+  }
+
+  const char* DLL = argv[1];
+  const char* component_name = argv[2];
+
+  //We must be investigating a certain gadget
+  std::cout << "Examining Gadget (SHARED LIB): " << component_name << " (" << DLL << ")" << std::endl;
+
+  //Attempt to load Gadget
+  //ACE_DLL_Manager* dllmgr = ACE_DLL_Manager::instance();
+  
+  ACE_DLL_Handle dll;// = 0;
+  ACE_SHLIB_HANDLE dll_handle = 0;
+  
+  ACE_TCHAR dllname[1024];
+#if defined(WIN32) && defined(_DEBUG)
+  ACE_OS::sprintf(dllname, "%s%sd",ACE_DLL_PREFIX, DLL);
+#else
+  ACE_OS::sprintf(dllname, "%s%s",ACE_DLL_PREFIX, DLL);
+#endif
+
+  ACE_TCHAR factoryname[1024];
+  ACE_OS::sprintf(factoryname, "make_%s", component_name);
+  
+  if (dll.open(dllname, ACE_DEFAULT_SHLIB_MODE, dll_handle )) {
+    std::cout << "Failed to load DLL (" << DLL << "), Possible reasons:" << std::endl;
+    std::cout << "   - Name of DLL is wrong" << std::endl;
+    std::cout << "   - Path of DLL is not in your DLL search path (LD_LIBRARY_PATH on Unix)" << std::endl;
+    std::cout << "   - Path of other DLLs that this DLL depends on is not in the search path" << std::endl;
+    std::cout << "" << std::endl;
+    std::cout << "Set environment variable ACE_DEBUG=1 to get more information" << std::endl << std::endl; 
+    return 0;
+  } 
+
+  //Function pointer
+  typedef Gadget* (*ComponentCreator) (void);
+
+  void *void_ptr = dll.symbol (factoryname);
+  ptrdiff_t tmp = reinterpret_cast<ptrdiff_t> (void_ptr);
+  ComponentCreator cc = reinterpret_cast<ComponentCreator> (tmp);
+  
+  if (cc == 0) {
+    std::cout << "Failed to load factory (" << factoryname << ") from DLL (" << dllname << ")" << std::endl;
+    return -1;
+  }
+  
+  Gadget* g = cc();
+  if (!g) {
+    std::cout << "Failed to create component using factory" << std::endl;
+    return 0;
+  }
+
+  std::cout << "  -- Gadget compiled against Gadgetron version " << g->get_gadgetron_version() << std::endl;
+
+  delete g;
+
+  return 0;
+}
diff --git a/apps/gadgetron/gadgetron_paths.h b/apps/gadgetron/gadgetron_paths.h
new file mode 100644
index 0000000..7f5a4cb
--- /dev/null
+++ b/apps/gadgetron/gadgetron_paths.h
@@ -0,0 +1,76 @@
+#ifndef GADGETRON_PATHS_H
+#define GADGETRON_PATHS_H
+
+#include <limits.h>
+#include <string>
+#include <iostream>
+
+#ifdef _WIN32
+#include <windows.h>
+#include <Shlwapi.h>
+#pragma comment(lib, "shlwapi.lib")
+#else
+#include <unistd.h>
+#endif // _WIN32
+
+#ifdef __APPLE__
+#include <mach-o/dyld.h>/* _NSGetExecutablePath */
+#endif
+
+#define MAX_GADGETRON_HOME_LENGTH 1024
+
+namespace Gadgetron
+{
+  inline std::string get_gadgetron_home()
+  {
+#if defined  __APPLE__
+    char path[PATH_MAX];
+    uint32_t size = sizeof(path);
+    char resolved[PATH_MAX];
+    if ((_NSGetExecutablePath(path, &size) == 0) && (realpath(path, resolved) != NULL)) {
+      std::string s1(resolved);
+      return s1.substr(0, s1.find_last_of("\\/")) + std::string("/../");
+    } else {
+      std::cout << "Unable to determine GADGETRON_HOME" << std::endl;
+      return std::string("");
+    }
+#elif defined _WIN32 || _WIN64
+    // Full path to the executable (including the executable file)
+    char fullPath[MAX_GADGETRON_HOME_LENGTH];	
+    // Full path to the executable (without executable file)
+    char *rightPath;
+    // Will contain exe path
+    HMODULE hModule = GetModuleHandle(NULL);
+    if (hModule != NULL)
+      {
+	// When passing NULL to GetModuleHandle, it returns handle of exe itself
+	GetModuleFileName(hModule, fullPath, (sizeof(fullPath))); 
+	rightPath = fullPath;
+	PathRemoveFileSpec(rightPath);
+	for(int i = 0; i < strlen(rightPath); i++)
+	  if(rightPath[i] == '\\') rightPath[i] = '/';
+
+	std::string s1(rightPath);
+	return s1 + std::string("/../");
+      }
+    else
+      {
+        std::cout << "The path to the executable is NULL" << std::endl;
+        return std::string("");
+      }
+#else //Probably some NIX where readlink should work
+    char buff[MAX_GADGETRON_HOME_LENGTH];
+    ssize_t len = ::readlink("/proc/self/exe", buff, sizeof(buff)-1);
+    if (len != -1) {
+      buff[len] = '\0';
+      std::string s1(buff);
+      return s1.substr(0, s1.find_last_of("\\/")) + std::string("/../");
+    } else {
+      std::cout << "Unable to determine GADGETRON_HOME" << std::endl;
+      return std::string("");
+    }
+#endif
+  }
+}
+
+#endif //GADGETRON_PATHS_H
diff --git a/apps/gadgetron/gadgetron_xml.cpp b/apps/gadgetron/gadgetron_xml.cpp
new file mode 100644
index 0000000..839ba1c
--- /dev/null
+++ b/apps/gadgetron/gadgetron_xml.cpp
@@ -0,0 +1,95 @@
+#include "gadgetron_xml.h"
+#include "pugixml.hpp"
+#include <stdexcept>
+#include <cstdlib>
+
+namespace GadgetronXML
+{
+
+  void deserialize(const char* xml_config, GadgetronConfiguration& h)
+  {
+    pugi::xml_document doc;
+    pugi::xml_parse_result result = doc.load(xml_config);
+    pugi::xml_node root = doc.child("gadgetronConfiguration");
+
+    if (!root) {
+      throw std::runtime_error("gadgetronConfiguration element not found in configuration file");
+    }
+    
+    pugi::xml_node port = root.child("port");
+    if (!port) {
+      throw std::runtime_error("Port not found in Gadgetron configuration");
+    }
+
+    h.port = port.child_value();
+
+    pugi::xml_node p = root.child("globalGadgetParameter");
+    while (p) {
+      GadgetronParameter pp;
+      pp.name = p.child_value("name");
+      pp.value = p.child_value("value");
+      h.globalGadgetParameter.push_back(pp);
+      p = p.next_sibling("globalGadgetParameter");
+    }
+
+    pugi::xml_node b = root.child("cloudBus");
+    if (b) {
+      CloudBus cb;
+      cb.multiCastAddress = b.child_value("multiCastAddress");
+      cb.port = static_cast<unsigned int>(std::atoi(b.child_value("port")));
+      h.cloudBus = cb;
+    }
+    
+  }
+
+  void deserialize(const char* xml_config, GadgetStreamConfiguration& cfg)
+  {
+    pugi::xml_document doc;
+    pugi::xml_parse_result result = doc.load(xml_config);
+    pugi::xml_node root = doc.child("gadgetronStreamConfiguration");
+
+    if (!root) {
+      throw std::runtime_error("gadgetronStreamConfiguration element not found in configuration file");
+    }
+
+    pugi::xml_node reader = root.child("reader");
+    while (reader) {
+      Reader r;
+      r.slot = static_cast<unsigned short>(std::atoi(reader.child_value("slot")));
+      r.dll = reader.child_value("dll");
+      r.classname = reader.child_value("classname");
+      cfg.reader.push_back(r);
+      reader = reader.next_sibling("reader");
+    }
+    
+    pugi::xml_node writer = root.child("writer");
+    while (writer) {
+      Writer w;
+      w.slot = static_cast<unsigned short>(std::atoi(writer.child_value("slot")));
+      w.dll = writer.child_value("dll");
+      w.classname = writer.child_value("classname");
+      cfg.writer.push_back(w);
+      writer = writer.next_sibling("writer");
+    }
+
+    pugi::xml_node gadget = root.child("gadget");
+    while (gadget) {
+      Gadget g;
+      g.name = gadget.child_value("name");
+      g.dll = gadget.child_value("dll");
+      g.classname = gadget.child_value("classname");
+      
+      pugi::xml_node property = gadget.child("property");
+      while (property) {
+	GadgetronParameter p;
+	p.name = property.child_value("name");
+	p.value = property.child_value("value");
+	g.property.push_back(p);
+	property = property.next_sibling("property");
+      }
+
+      cfg.gadget.push_back(g);
+      gadget = gadget.next_sibling("gadget");
+    }
+  }
+}
diff --git a/apps/gadgetron/gadgetron_xml.h b/apps/gadgetron/gadgetron_xml.h
new file mode 100644
index 0000000..896bed2
--- /dev/null
+++ b/apps/gadgetron/gadgetron_xml.h
@@ -0,0 +1,122 @@
+#ifndef GADGETRON_XML_H
+#define GADGETRON_XML_H
+
+#include <string>
+#include <vector>
+#include <stdexcept>
+#include "gadgetbase_export.h"
+
+namespace GadgetronXML
+{
+  template <typename T> class Optional
+  {
+  public:
+    Optional()
+      : present_(false)
+    {
+
+    }
+
+    Optional(const T&v) {
+      present_ = true;
+      value_ = v;      
+    }
+
+    const Optional& operator=(const T& v) {
+      present_ = true;
+      value_ = v;
+      return *this;
+    }
+
+    const T* operator->() const {
+      return &value_;
+    }
+
+    const T& operator*() const {
+      return value_;
+    }
+
+    operator bool() const {
+      return present_;
+    }
+
+    bool is_present() const {
+      return present_;
+    }
+
+    T& get() {
+      if (!present_) {
+	throw std::runtime_error("Access optional value, which has not been set");
+      }
+      return value_;
+    }
+    
+    T& operator()() {
+      return get();
+    }
+
+    void set(const T& v) {
+      present_ = true;
+      value_ = v;
+    }
+
+  protected:
+    bool present_;
+    T value_;
+
+  }; 
+
+
+  struct GadgetronParameter
+  {
+    std::string name;
+    std::string value;
+  };
+
+  struct CloudBus
+  {
+    std::string multiCastAddress;
+    unsigned int port;
+  };
+
+
+  struct GadgetronConfiguration
+  {
+    std::string port;
+    std::vector<GadgetronParameter> globalGadgetParameter;
+    Optional<CloudBus> cloudBus;    
+  };
+
+  void EXPORTGADGETBASE deserialize(const char* xml_config, GadgetronConfiguration& h);
+  
+  struct Reader
+  {
+    unsigned short slot;
+    std::string dll;
+    std::string classname;
+  };
+
+  typedef Reader Writer;
+  
+  struct Gadget
+  {
+    std::string name;
+    std::string dll;
+    std::string classname;
+    std::vector<GadgetronParameter> property;
+  };
+
+  struct GadgetStreamConfiguration
+  {
+    std::vector<Reader> reader;
+    std::vector<Writer> writer;
+    std::vector<Gadget> gadget;
+  };
+
+  void EXPORTGADGETBASE deserialize(const char* xml, GadgetStreamConfiguration& cfg);
+
+};
+
+#endif //GADGETRON_XML_H
+
+
diff --git a/apps/gadgetron/main.cpp b/apps/gadgetron/main.cpp
index 50542ef..48764ea 100644
--- a/apps/gadgetron/main.cpp
+++ b/apps/gadgetron/main.cpp
@@ -1,7 +1,10 @@
 #include "GadgetServerAcceptor.h"
 #include "FileInfo.h"
 #include "url_encode.h"
-#include "gadgetron.hxx" //Generated header file for XML configuration
+#include "gadgetron_xml.h"
+#include "gadgetron_config.h"
+#include "gadgetron_paths.h"
+#include "CloudBus.h"
 
 #include <ace/Log_Msg.h>
 #include <ace/Service_Config.h>
@@ -9,86 +12,181 @@
 #include <ace/Get_Opt.h>
 #include <ace/OS_NS_string.h>
 #include <iostream>
+#include <string>
+#include <fstream>
+#include <streambuf>
+
+
+#ifdef _WIN32
+#include <windows.h>
+#include <Shlwapi.h>
+#pragma comment(lib, "shlwapi.lib")
+#else
+#include <sys/types.h>
+#include <sys/stat.h>
+#endif // _WIN32
+
+#include <boost/filesystem.hpp>
+using namespace boost::filesystem;
 
 using namespace Gadgetron;
 
+#define GT_WORKING_DIRECTORY "workingDirectory"
+
+namespace Gadgetron {
+
+
+  bool create_folder_with_all_permissions(const std::string& workingdirectory)
+  {
+    if ( !boost::filesystem::exists(workingdirectory) )
+      {
+        boost::filesystem::path workingPath(workingdirectory);
+        if ( !boost::filesystem::create_directory(workingPath) )
+	  {
+            ACE_ERROR_RETURN((LM_ERROR, ACE_TEXT("Error creating the working directory.\n")), false);
+	  }
+
+        // set the permission for the folder
+#ifdef _WIN32
+	try
+	  {
+	    boost::filesystem::permissions(workingPath, all_all);
+	  }
+	catch(...)
+	  {
+	    ACE_ERROR_RETURN((LM_ERROR, ACE_TEXT("Error changing the permission of the working directory.\n")), false);
+	  }
+#else
+	// in case an older version of boost is used in non-win system
+	// the system call is used
+	int res = chmod(workingPath.string().c_str(), S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IWGRP|S_IXGRP|S_IROTH|S_IWOTH|S_IXOTH);
+	if ( res != 0 )
+	  {
+	    ACE_ERROR_RETURN((LM_ERROR, ACE_TEXT("Error changing the permission of the working directory.\n")), false);
+	  }
+#endif // _WIN32
+      }
+
+    return true;
+  }
+
+}
+
 void print_usage()
 {
-	ACE_DEBUG((LM_INFO, ACE_TEXT("Usage: \n") ));
-	ACE_DEBUG((LM_INFO, ACE_TEXT("gadgetron   -p <PORT>                      (default 9002)       \n") ));
+  ACE_DEBUG((LM_INFO, ACE_TEXT("Usage: \n") ));
+  ACE_DEBUG((LM_INFO, ACE_TEXT("gadgetron   -p <PORT>                      (default 9002)       \n") ));
 }
 
 int ACE_TMAIN(int argc, ACE_TCHAR *argv[])
 {
-	ACE_TRACE(( ACE_TEXT("main") ));
-	
-	ACE_LOG_MSG->priority_mask( LM_INFO | LM_NOTICE | LM_ERROR| LM_DEBUG,
-			ACE_Log_Msg::PROCESS);
-
-	char * gadgetron_home = ACE_OS::getenv("GADGETRON_HOME");
-
-	if (std::string(gadgetron_home).size() == 0) {
-		ACE_ERROR_RETURN((LM_ERROR, ACE_TEXT("GADGETRON_HOME variable not set.\n")),-1);
-	}
-
-	std::string gcfg = std::string(gadgetron_home) + std::string("/config/gadgetron.xml");
-
-	if (!FileInfo(gcfg).exists()) {
-		ACE_ERROR_RETURN((LM_ERROR, ACE_TEXT("Gadgetron configuration file %s not found.\n"), gcfg.c_str()),-1);
-	}
-
-	ACE_TCHAR schema_file_name[4096];
-	ACE_OS::sprintf(schema_file_name, "%s/schema/gadgetron.xsd", gadgetron_home);
-
-	std::string tmp(schema_file_name);
-	tmp = url_encode(tmp);
-	ACE_OS_String::strncpy(schema_file_name,tmp.c_str(), 4096);
-
-	xml_schema::properties props;
-	props.schema_location (
-	  "http://gadgetron.sf.net/gadgetron",
-	  std::string (schema_file_name));
-
-	ACE_TCHAR port_no[1024];
-	try {
-		std::auto_ptr<gadgetron::gadgetronConfiguration> cfg(gadgetron::gadgetronConfiguration_(gcfg,0,props));
-		ACE_OS_String::strncpy(port_no, cfg->port().c_str(), 1024);
-	}  catch (const xml_schema::exception& e) {
-		std::cerr << e << std::endl;
-		ACE_DEBUG(( LM_DEBUG, ACE_TEXT("XML Parse Error: %s\n"), e.what() ));
-		ACE_ERROR_RETURN((LM_ERROR, ACE_TEXT("Error parsing configuration file %s.\n"), gcfg.c_str()),-1);
-	}
-
-	static const ACE_TCHAR options[] = ACE_TEXT(":p:");
-	ACE_Get_Opt cmd_opts(argc, argv, options);
-
-	int option;
-	while ((option = cmd_opts()) != EOF) {
-		switch (option) {
-		case 'p':
-			ACE_OS_String::strncpy(port_no, cmd_opts.opt_arg(), 1024);
-			break;
-		case ':':
-			print_usage();
-			ACE_ERROR_RETURN((LM_ERROR, ACE_TEXT("-%c requires an argument.\n"), cmd_opts.opt_opt()),-1);
-			break;
-		default:
-			print_usage();
-			ACE_ERROR_RETURN( (LM_ERROR, ACE_TEXT("Command line parse error\n")), -1);
-			break;
-		}
-	}
-
-
-	ACE_DEBUG(( LM_DEBUG, ACE_TEXT("%IConfiguring services, Running on port %s\n"), port_no ));
-
-	ACE_INET_Addr port_to_listen (port_no);
-	GadgetServerAcceptor acceptor;
-	acceptor.reactor (ACE_Reactor::instance ());
-	if (acceptor.open (port_to_listen) == -1)
-		return 1;
-
-	ACE_Reactor::instance()->run_reactor_event_loop ();
-
-	return 0;
+  ACE_TRACE(( ACE_TEXT("main") ));
+    
+  ACE_LOG_MSG->priority_mask( LM_INFO | LM_NOTICE | LM_ERROR| LM_DEBUG,
+			      ACE_Log_Msg::PROCESS);
+
+  std::string  gadgetron_home = get_gadgetron_home();
+
+  if (gadgetron_home.size() == 0) {
+    ACE_ERROR_RETURN((LM_ERROR, ACE_TEXT("GADGETRON_HOME variable not set.\n")),-1);
+  }
+
+  std::string gcfg = gadgetron_home + std::string("/") + std::string(GADGETRON_CONFIG_PATH) + std::string("/gadgetron.xml");
+  if (!FileInfo(gcfg).exists()) {
+    ACE_ERROR_RETURN((LM_ERROR, ACE_TEXT("Gadgetron configuration file %s not found.\n"), gcfg.c_str()),-1);
+  }
+
+
+  ACE_TCHAR port_no[1024];
+  std::map<std::string, std::string> gadget_parameters;
+
+  // the working directory of gadgetron should always be set
+  bool workingDirectorySet = false;
+
+  GadgetronXML::GadgetronConfiguration c;
+  try
+    {
+      std::ifstream t(gcfg.c_str());
+      std::string gcfg_text((std::istreambuf_iterator<char>(t)),
+			    std::istreambuf_iterator<char>());
+      
+      GadgetronXML::deserialize(gcfg_text.c_str(), c);
+      ACE_OS_String::strncpy(port_no, c.port.c_str(), 1024);
+
+      for (std::vector<GadgetronXML::GadgetronParameter>::iterator it = c.globalGadgetParameter.begin();
+	   it != c.globalGadgetParameter.end();
+	   ++it)
+	{
+	  std::string key = it->name;
+	  std::string value = it->value;
+      
+	  gadget_parameters[key] = value;
+	  
+	  if ( key == std::string(GT_WORKING_DIRECTORY) ) workingDirectorySet = true;
+        }
+    }  catch (std::runtime_error& e) {
+    ACE_DEBUG(( LM_DEBUG, ACE_TEXT("XML Parse Error: %s\n"), e.what() ));
+    ACE_ERROR_RETURN((LM_ERROR, ACE_TEXT("Error parsing configuration file %s.\n"), gcfg.c_str()),-1);
+  }
+
+  static const ACE_TCHAR options[] = ACE_TEXT(":p:");
+  ACE_Get_Opt cmd_opts(argc, argv, options);
+
+  int option;
+  while ((option = cmd_opts()) != EOF) {
+    switch (option) {
+    case 'p':
+      ACE_OS_String::strncpy(port_no, cmd_opts.opt_arg(), 1024);
+      break;
+    case ':':
+      print_usage();
+      ACE_ERROR_RETURN((LM_ERROR, ACE_TEXT("-%c requires an argument.\n"), cmd_opts.opt_opt()),-1);
+      break;
+    default:
+      print_usage();
+      ACE_ERROR_RETURN( (LM_ERROR, ACE_TEXT("Command line parse error\n")), -1);
+      break;
+    }
+  }
+
+  if (c.cloudBus) {
+    ACE_DEBUG(( LM_DEBUG, ACE_TEXT("Starting cloudBus: %s:%d\n"), 
+		c.cloudBus->multiCastAddress.c_str(), c.cloudBus->port ));
+    Gadgetron::CloudBus::set_mcast_address(c.cloudBus->multiCastAddress.c_str());
+    Gadgetron::CloudBus::set_mcast_port(c.cloudBus->port);
+    Gadgetron::CloudBus::set_gadgetron_port(std::atoi(port_no));
+    Gadgetron::CloudBus* cb = Gadgetron::CloudBus::instance();//This actually starts the bus.
+    gadget_parameters["using_cloudbus"] = std::string("true"); //This is our message to the Gadgets that we have activated the bus
+  }
+
+
+  // if the working directory is not set, use the default path
+  if ( !workingDirectorySet )
+    {
+#ifdef _WIN32
+      gadget_parameters[std::string(GT_WORKING_DIRECTORY)] = std::string("c:\\temp\\gadgetron\\");
+#else
+      gadget_parameters[std::string(GT_WORKING_DIRECTORY)] = std::string("/tmp/gadgetron/");
+#endif // _WIN32
+    }
+
+  // check and create workingdirectory
+  std::string workingDirectory = gadget_parameters[std::string(GT_WORKING_DIRECTORY)];
+  if ( !Gadgetron::create_folder_with_all_permissions(workingDirectory) )
+    {
+      ACE_ERROR_RETURN((LM_ERROR, ACE_TEXT("Gadgetron creating working directory %s failed ... \n"), workingDirectory.c_str()),-1);
+    }
+
+  ACE_DEBUG(( LM_DEBUG, ACE_TEXT("%IConfiguring services, Running on port %s\n"), port_no ));
+
+  ACE_INET_Addr port_to_listen (port_no);
+  GadgetServerAcceptor acceptor;
+  acceptor.global_gadget_parameters_ = gadget_parameters;
+  acceptor.reactor (ACE_Reactor::instance ());
+  if (acceptor.open (port_to_listen) == -1)
+    return 1;
+
+  ACE_Reactor::instance()->run_reactor_event_loop ();
+
+  return 0;
 }
diff --git a/apps/gadgetron/pugiconfig.hpp b/apps/gadgetron/pugiconfig.hpp
new file mode 100644
index 0000000..56f1d22
--- /dev/null
+++ b/apps/gadgetron/pugiconfig.hpp
@@ -0,0 +1,72 @@
+/**
+ * pugixml parser - version 1.4
+ * --------------------------------------------------------
+ * Copyright (C) 2006-2014, by Arseny Kapoulkine (arseny.kapoulkine at gmail.com)
+ * Report bugs and download new versions at http://pugixml.org/
+ *
+ * This library is distributed under the MIT License. See notice at the end
+ * of this file.
+ *
+ * This work is based on the pugxml parser, which is:
+ * Copyright (C) 2003, by Kristen Wegner (kristen at tima.net)
+ */
+
+#ifndef HEADER_PUGICONFIG_HPP
+#define HEADER_PUGICONFIG_HPP
+
+// Uncomment this to enable wchar_t mode
+// #define PUGIXML_WCHAR_MODE
+
+// Uncomment this to disable XPath
+// #define PUGIXML_NO_XPATH
+
+// Uncomment this to disable STL
+// #define PUGIXML_NO_STL
+
+// Uncomment this to disable exceptions
+// #define PUGIXML_NO_EXCEPTIONS
+
+// Set this to control attributes for public classes/functions, i.e.:
+// #define PUGIXML_API __declspec(dllexport) // to export all public symbols from DLL
+// #define PUGIXML_CLASS __declspec(dllimport) // to import all classes from DLL
+// #define PUGIXML_FUNCTION __fastcall // to set calling conventions to all public functions to fastcall
+// In absence of PUGIXML_CLASS/PUGIXML_FUNCTION definitions PUGIXML_API is used instead
+
+// Tune these constants to adjust memory-related behavior
+// #define PUGIXML_MEMORY_PAGE_SIZE 32768
+// #define PUGIXML_MEMORY_OUTPUT_STACK 10240
+// #define PUGIXML_MEMORY_XPATH_PAGE_SIZE 4096
+
+// Uncomment this to switch to header-only version
+// #define PUGIXML_HEADER_ONLY
+// #include "pugixml.cpp"
+
+// Uncomment this to enable long long support
+// #define PUGIXML_HAS_LONG_LONG
+
+#endif
+
+/**
+ * Copyright (c) 2006-2014 Arseny Kapoulkine
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
diff --git a/apps/gadgetron/pugixml.cpp b/apps/gadgetron/pugixml.cpp
new file mode 100644
index 0000000..754f92f
--- /dev/null
+++ b/apps/gadgetron/pugixml.cpp
@@ -0,0 +1,10639 @@
+/**
+ * pugixml parser - version 1.4
+ * --------------------------------------------------------
+ * Copyright (C) 2006-2014, by Arseny Kapoulkine (arseny.kapoulkine at gmail.com)
+ * Report bugs and download new versions at http://pugixml.org/
+ *
+ * This library is distributed under the MIT License. See notice at the end
+ * of this file.
+ *
+ * This work is based on the pugxml parser, which is:
+ * Copyright (C) 2003, by Kristen Wegner (kristen at tima.net)
+ */
+
+#ifndef SOURCE_PUGIXML_CPP
+#define SOURCE_PUGIXML_CPP
+
+#include "pugixml.hpp"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#ifdef PUGIXML_WCHAR_MODE
+#	include <wchar.h>
+#endif
+
+#ifndef PUGIXML_NO_XPATH
+#	include <math.h>
+#	include <float.h>
+#	ifdef PUGIXML_NO_EXCEPTIONS
+#		include <setjmp.h>
+#	endif
+#endif
+
+#ifndef PUGIXML_NO_STL
+#	include <istream>
+#	include <ostream>
+#	include <string>
+#endif
+
+// For placement new
+#include <new>
+
+#ifdef _MSC_VER
+#	pragma warning(push)
+#	pragma warning(disable: 4127) // conditional expression is constant
+#	pragma warning(disable: 4324) // structure was padded due to __declspec(align())
+#	pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable
+#	pragma warning(disable: 4702) // unreachable code
+#	pragma warning(disable: 4996) // this function or variable may be unsafe
+#	pragma warning(disable: 4793) // function compiled as native: presence of '_setjmp' makes a function unmanaged
+#endif
+
+#ifdef __INTEL_COMPILER
+#	pragma warning(disable: 177) // function was declared but never referenced 
+#	pragma warning(disable: 279) // controlling expression is constant
+#	pragma warning(disable: 1478 1786) // function was declared "deprecated"
+#	pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
+#endif
+
+#if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)
+#	pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away
+#endif
+
+#ifdef __BORLANDC__
+#	pragma option push
+#	pragma warn -8008 // condition is always false
+#	pragma warn -8066 // unreachable code
+#endif
+
+#ifdef __SNC__
+// Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
+#	pragma diag_suppress=178 // function was declared but never referenced
+#	pragma diag_suppress=237 // controlling expression is constant
+#endif
+
+// Inlining controls
+#if defined(_MSC_VER) && _MSC_VER >= 1300
+#	define PUGI__NO_INLINE __declspec(noinline)
+#elif defined(__GNUC__)
+#	define PUGI__NO_INLINE __attribute__((noinline))
+#else
+#	define PUGI__NO_INLINE 
+#endif
+
+// Simple static assertion
+#define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
+
+// Digital Mars C++ bug workaround for passing char loaded from memory via stack
+#ifdef __DMC__
+#	define PUGI__DMC_VOLATILE volatile
+#else
+#	define PUGI__DMC_VOLATILE
+#endif
+
+// Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all)
+#if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
+using std::memcpy;
+using std::memmove;
+#endif
+
+// In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
+#if defined(_MSC_VER) && !defined(__S3E__)
+#	define PUGI__MSVC_CRT_VERSION _MSC_VER
+#endif
+
+#ifdef PUGIXML_HEADER_ONLY
+#	define PUGI__NS_BEGIN namespace pugi { namespace impl {
+#	define PUGI__NS_END } }
+#	define PUGI__FN inline
+#	define PUGI__FN_NO_INLINE inline
+#else
+#	if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
+#		define PUGI__NS_BEGIN namespace pugi { namespace impl {
+#		define PUGI__NS_END } }
+#	else
+#		define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace {
+#		define PUGI__NS_END } } }
+#	endif
+#	define PUGI__FN
+#	define PUGI__FN_NO_INLINE PUGI__NO_INLINE
+#endif
+
+// uintptr_t
+#if !defined(_MSC_VER) || _MSC_VER >= 1600
+#	include <stdint.h>
+#else
+#	ifndef _UINTPTR_T_DEFINED
+// No native uintptr_t in MSVC6 and in some WinCE versions
+typedef size_t uintptr_t;
+#define _UINTPTR_T_DEFINED
+#	endif
+PUGI__NS_BEGIN
+	typedef unsigned __int8 uint8_t;
+	typedef unsigned __int16 uint16_t;
+	typedef unsigned __int32 uint32_t;
+PUGI__NS_END
+#endif
+
+// Memory allocation
+PUGI__NS_BEGIN
+	PUGI__FN void* default_allocate(size_t size)
+	{
+		return malloc(size);
+	}
+
+	PUGI__FN void default_deallocate(void* ptr)
+	{
+		free(ptr);
+	}
+
+	template <typename T>
+	struct xml_memory_management_function_storage
+	{
+		static allocation_function allocate;
+		static deallocation_function deallocate;
+	};
+
+	template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate;
+	template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate;
+
+	typedef xml_memory_management_function_storage<int> xml_memory;
+PUGI__NS_END
+
+// String utilities
+PUGI__NS_BEGIN
+	// Get string length
+	PUGI__FN size_t strlength(const char_t* s)
+	{
+		assert(s);
+
+	#ifdef PUGIXML_WCHAR_MODE
+		return wcslen(s);
+	#else
+		return strlen(s);
+	#endif
+	}
+
+	// Compare two strings
+	PUGI__FN bool strequal(const char_t* src, const char_t* dst)
+	{
+		assert(src && dst);
+
+	#ifdef PUGIXML_WCHAR_MODE
+		return wcscmp(src, dst) == 0;
+	#else
+		return strcmp(src, dst) == 0;
+	#endif
+	}
+
+	// Compare lhs with [rhs_begin, rhs_end)
+	PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
+	{
+		for (size_t i = 0; i < count; ++i)
+			if (lhs[i] != rhs[i])
+				return false;
+	
+		return lhs[count] == 0;
+	}
+
+	// Get length of wide string, even if CRT lacks wide character support
+	PUGI__FN size_t strlength_wide(const wchar_t* s)
+	{
+		assert(s);
+
+	#ifdef PUGIXML_WCHAR_MODE
+		return wcslen(s);
+	#else
+		const wchar_t* end = s;
+		while (*end) end++;
+		return static_cast<size_t>(end - s);
+	#endif
+	}
+
+#ifdef PUGIXML_WCHAR_MODE
+	// Convert string to wide string, assuming all symbols are ASCII
+	PUGI__FN void widen_ascii(wchar_t* dest, const char* source)
+	{
+		for (const char* i = source; *i; ++i) *dest++ = *i;
+		*dest = 0;
+	}
+#endif
+PUGI__NS_END
+
+#if !defined(PUGIXML_NO_STL) || !defined(PUGIXML_NO_XPATH)
+// auto_ptr-like buffer holder for exception recovery
+PUGI__NS_BEGIN
+	struct buffer_holder
+	{
+		void* data;
+		void (*deleter)(void*);
+
+		buffer_holder(void* data_, void (*deleter_)(void*)): data(data_), deleter(deleter_)
+		{
+		}
+
+		~buffer_holder()
+		{
+			if (data) deleter(data);
+		}
+
+		void* release()
+		{
+			void* result = data;
+			data = 0;
+			return result;
+		}
+	};
+PUGI__NS_END
+#endif
+
+PUGI__NS_BEGIN
+	static const size_t xml_memory_page_size =
+	#ifdef PUGIXML_MEMORY_PAGE_SIZE
+		PUGIXML_MEMORY_PAGE_SIZE
+	#else
+		32768
+	#endif
+		;
+
+	static const uintptr_t xml_memory_page_alignment = 32;
+	static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1);
+	static const uintptr_t xml_memory_page_name_allocated_mask = 16;
+	static const uintptr_t xml_memory_page_value_allocated_mask = 8;
+	static const uintptr_t xml_memory_page_type_mask = 7;
+
+	struct xml_allocator;
+
+	struct xml_memory_page
+	{
+		static xml_memory_page* construct(void* memory)
+		{
+			if (!memory) return 0; //$ redundant, left for performance
+
+			xml_memory_page* result = static_cast<xml_memory_page*>(memory);
+
+			result->allocator = 0;
+			result->memory = 0;
+			result->prev = 0;
+			result->next = 0;
+			result->busy_size = 0;
+			result->freed_size = 0;
+
+			return result;
+		}
+
+		xml_allocator* allocator;
+
+		void* memory;
+
+		xml_memory_page* prev;
+		xml_memory_page* next;
+
+		size_t busy_size;
+		size_t freed_size;
+
+		char data[1];
+	};
+
+	struct xml_memory_string_header
+	{
+		uint16_t page_offset; // offset from page->data
+		uint16_t full_size; // 0 if string occupies whole page
+	};
+
+	struct xml_allocator
+	{
+		xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
+		{
+		}
+
+		xml_memory_page* allocate_page(size_t data_size)
+		{
+			size_t size = offsetof(xml_memory_page, data) + data_size;
+
+			// allocate block with some alignment, leaving memory for worst-case padding
+			void* memory = xml_memory::allocate(size + xml_memory_page_alignment);
+			if (!memory) return 0;
+
+			// align upwards to page boundary
+			void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(memory) + (xml_memory_page_alignment - 1)) & ~(xml_memory_page_alignment - 1));
+
+			// prepare page structure
+			xml_memory_page* page = xml_memory_page::construct(page_memory);
+			assert(page);
+
+			page->memory = memory;
+			page->allocator = _root->allocator;
+
+			return page;
+		}
+
+		static void deallocate_page(xml_memory_page* page)
+		{
+			xml_memory::deallocate(page->memory);
+		}
+
+		void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
+
+		void* allocate_memory(size_t size, xml_memory_page*& out_page)
+		{
+			if (_busy_size + size > xml_memory_page_size) return allocate_memory_oob(size, out_page);
+
+			void* buf = _root->data + _busy_size;
+
+			_busy_size += size;
+
+			out_page = _root;
+
+			return buf;
+		}
+
+		void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
+		{
+			if (page == _root) page->busy_size = _busy_size;
+
+			assert(ptr >= page->data && ptr < page->data + page->busy_size);
+			(void)!ptr;
+
+			page->freed_size += size;
+			assert(page->freed_size <= page->busy_size);
+
+			if (page->freed_size == page->busy_size)
+			{
+				if (page->next == 0)
+				{
+					assert(_root == page);
+
+					// top page freed, just reset sizes
+					page->busy_size = page->freed_size = 0;
+					_busy_size = 0;
+				}
+				else
+				{
+					assert(_root != page);
+					assert(page->prev);
+
+					// remove from the list
+					page->prev->next = page->next;
+					page->next->prev = page->prev;
+
+					// deallocate
+					deallocate_page(page);
+				}
+			}
+		}
+
+		char_t* allocate_string(size_t length)
+		{
+			// allocate memory for string and header block
+			size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
+			
+			// round size up to pointer alignment boundary
+			size_t full_size = (size + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1);
+
+			xml_memory_page* page;
+			xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
+
+			if (!header) return 0;
+
+			// setup header
+			ptrdiff_t page_offset = reinterpret_cast<char*>(header) - page->data;
+
+			assert(page_offset >= 0 && page_offset < (1 << 16));
+			header->page_offset = static_cast<uint16_t>(page_offset);
+
+			// full_size == 0 for large strings that occupy the whole page
+			assert(full_size < (1 << 16) || (page->busy_size == full_size && page_offset == 0));
+			header->full_size = static_cast<uint16_t>(full_size < (1 << 16) ? full_size : 0);
+
+			// round-trip through void* to avoid 'cast increases required alignment of target type' warning
+			// header is guaranteed a pointer-sized alignment, which should be enough for char_t
+			return static_cast<char_t*>(static_cast<void*>(header + 1));
+		}
+
+		void deallocate_string(char_t* string)
+		{
+			// this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
+			// we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
+
+			// get header
+			xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
+
+			// deallocate
+			size_t page_offset = offsetof(xml_memory_page, data) + header->page_offset;
+			xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));
+
+			// if full_size == 0 then this string occupies the whole page
+			size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size;
+
+			deallocate_memory(header, full_size, page);
+		}
+
+		xml_memory_page* _root;
+		size_t _busy_size;
+	};
+
+	PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
+	{
+		const size_t large_allocation_threshold = xml_memory_page_size / 4;
+
+		xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
+		out_page = page;
+
+		if (!page) return 0;
+
+		if (size <= large_allocation_threshold)
+		{
+			_root->busy_size = _busy_size;
+
+			// insert page at the end of linked list
+			page->prev = _root;
+			_root->next = page;
+			_root = page;
+
+			_busy_size = size;
+		}
+		else
+		{
+			// insert page before the end of linked list, so that it is deleted as soon as possible
+			// the last page is not deleted even if it's empty (see deallocate_memory)
+			assert(_root->prev);
+
+			page->prev = _root->prev;
+			page->next = _root;
+
+			_root->prev->next = page;
+			_root->prev = page;
+		}
+
+		// allocate inside page
+		page->busy_size = size;
+
+		return page->data;
+	}
+PUGI__NS_END
+
+namespace pugi
+{
+	/// A 'name=value' XML attribute structure.
+	struct xml_attribute_struct
+	{
+		/// Default ctor
+		xml_attribute_struct(impl::xml_memory_page* page): header(reinterpret_cast<uintptr_t>(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0)
+		{
+		}
+
+		uintptr_t header;
+
+		char_t* name;	///< Pointer to attribute name.
+		char_t*	value;	///< Pointer to attribute value.
+
+		xml_attribute_struct* prev_attribute_c;	///< Previous attribute (cyclic list)
+		xml_attribute_struct* next_attribute;	///< Next attribute
+	};
+
+	/// An XML document tree node.
+	struct xml_node_struct
+	{
+		/// Default ctor
+		/// \param type - node type
+		xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(reinterpret_cast<uintptr_t>(page) | (type - 1)), parent(0), name(0), value(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
+		{
+		}
+
+		uintptr_t header;
+
+		xml_node_struct*		parent;					///< Pointer to parent
+
+		char_t*					name;					///< Pointer to element name.
+		char_t*					value;					///< Pointer to any associated string data.
+
+		xml_node_struct*		first_child;			///< First child
+		
+		xml_node_struct*		prev_sibling_c;			///< Left brother (cyclic list)
+		xml_node_struct*		next_sibling;			///< Right brother
+		
+		xml_attribute_struct*	first_attribute;		///< First attribute
+	};
+}
+
+PUGI__NS_BEGIN
+	struct xml_extra_buffer
+	{
+		char_t* buffer;
+		xml_extra_buffer* next;
+	};
+
+	struct xml_document_struct: public xml_node_struct, public xml_allocator
+	{
+		xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0)
+		{
+		}
+
+		const char_t* buffer;
+
+		xml_extra_buffer* extra_buffers;
+	};
+
+	inline xml_allocator& get_allocator(const xml_node_struct* node)
+	{
+		assert(node);
+
+		return *reinterpret_cast<xml_memory_page*>(node->header & xml_memory_page_pointer_mask)->allocator;
+	}
+PUGI__NS_END
+
+// Low-level DOM operations
+PUGI__NS_BEGIN
+	inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
+	{
+		xml_memory_page* page;
+		void* memory = alloc.allocate_memory(sizeof(xml_attribute_struct), page);
+
+		return new (memory) xml_attribute_struct(page);
+	}
+
+	inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
+	{
+		xml_memory_page* page;
+		void* memory = alloc.allocate_memory(sizeof(xml_node_struct), page);
+
+		return new (memory) xml_node_struct(page, type);
+	}
+
+	inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
+	{
+		uintptr_t header = a->header;
+
+		if (header & impl::xml_memory_page_name_allocated_mask) alloc.deallocate_string(a->name);
+		if (header & impl::xml_memory_page_value_allocated_mask) alloc.deallocate_string(a->value);
+
+		alloc.deallocate_memory(a, sizeof(xml_attribute_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask));
+	}
+
+	inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
+	{
+		uintptr_t header = n->header;
+
+		if (header & impl::xml_memory_page_name_allocated_mask) alloc.deallocate_string(n->name);
+		if (header & impl::xml_memory_page_value_allocated_mask) alloc.deallocate_string(n->value);
+
+		for (xml_attribute_struct* attr = n->first_attribute; attr; )
+		{
+			xml_attribute_struct* next = attr->next_attribute;
+
+			destroy_attribute(attr, alloc);
+
+			attr = next;
+		}
+
+		for (xml_node_struct* child = n->first_child; child; )
+		{
+			xml_node_struct* next = child->next_sibling;
+
+			destroy_node(child, alloc);
+
+			child = next;
+		}
+
+		alloc.deallocate_memory(n, sizeof(xml_node_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask));
+	}
+
+	PUGI__FN_NO_INLINE xml_node_struct* append_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
+	{
+		xml_node_struct* child = allocate_node(alloc, type);
+		if (!child) return 0;
+
+		child->parent = node;
+
+		xml_node_struct* first_child = node->first_child;
+			
+		if (first_child)
+		{
+			xml_node_struct* last_child = first_child->prev_sibling_c;
+
+			last_child->next_sibling = child;
+			child->prev_sibling_c = last_child;
+			first_child->prev_sibling_c = child;
+		}
+		else
+		{
+			node->first_child = child;
+			child->prev_sibling_c = child;
+		}
+			
+		return child;
+	}
+
+	PUGI__FN_NO_INLINE xml_attribute_struct* append_attribute_ll(xml_node_struct* node, xml_allocator& alloc)
+	{
+		xml_attribute_struct* a = allocate_attribute(alloc);
+		if (!a) return 0;
+
+		xml_attribute_struct* first_attribute = node->first_attribute;
+
+		if (first_attribute)
+		{
+			xml_attribute_struct* last_attribute = first_attribute->prev_attribute_c;
+
+			last_attribute->next_attribute = a;
+			a->prev_attribute_c = last_attribute;
+			first_attribute->prev_attribute_c = a;
+		}
+		else
+		{
+			node->first_attribute = a;
+			a->prev_attribute_c = a;
+		}
+			
+		return a;
+	}
+PUGI__NS_END
+
+// Helper classes for code generation
+PUGI__NS_BEGIN
+	struct opt_false
+	{
+		enum { value = 0 };
+	};
+
+	struct opt_true
+	{
+		enum { value = 1 };
+	};
+PUGI__NS_END
+
+// Unicode utilities
+PUGI__NS_BEGIN
+	inline uint16_t endian_swap(uint16_t value)
+	{
+		return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
+	}
+
+	inline uint32_t endian_swap(uint32_t value)
+	{
+		return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
+	}
+
+	struct utf8_counter
+	{
+		typedef size_t value_type;
+
+		static value_type low(value_type result, uint32_t ch)
+		{
+			// U+0000..U+007F
+			if (ch < 0x80) return result + 1;
+			// U+0080..U+07FF
+			else if (ch < 0x800) return result + 2;
+			// U+0800..U+FFFF
+			else return result + 3;
+		}
+
+		static value_type high(value_type result, uint32_t)
+		{
+			// U+10000..U+10FFFF
+			return result + 4;
+		}
+	};
+
+	struct utf8_writer
+	{
+		typedef uint8_t* value_type;
+
+		static value_type low(value_type result, uint32_t ch)
+		{
+			// U+0000..U+007F
+			if (ch < 0x80)
+			{
+				*result = static_cast<uint8_t>(ch);
+				return result + 1;
+			}
+			// U+0080..U+07FF
+			else if (ch < 0x800)
+			{
+				result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
+				result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
+				return result + 2;
+			}
+			// U+0800..U+FFFF
+			else
+			{
+				result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
+				result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
+				result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
+				return result + 3;
+			}
+		}
+
+		static value_type high(value_type result, uint32_t ch)
+		{
+			// U+10000..U+10FFFF
+			result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
+			result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
+			result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
+			result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
+			return result + 4;
+		}
+
+		static value_type any(value_type result, uint32_t ch)
+		{
+			return (ch < 0x10000) ? low(result, ch) : high(result, ch);
+		}
+	};
+
+	struct utf16_counter
+	{
+		typedef size_t value_type;
+
+		static value_type low(value_type result, uint32_t)
+		{
+			return result + 1;
+		}
+
+		static value_type high(value_type result, uint32_t)
+		{
+			return result + 2;
+		}
+	};
+
+	struct utf16_writer
+	{
+		typedef uint16_t* value_type;
+
+		static value_type low(value_type result, uint32_t ch)
+		{
+			*result = static_cast<uint16_t>(ch);
+
+			return result + 1;
+		}
+
+		static value_type high(value_type result, uint32_t ch)
+		{
+			uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
+			uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
+
+			result[0] = static_cast<uint16_t>(0xD800 + msh);
+			result[1] = static_cast<uint16_t>(0xDC00 + lsh);
+
+			return result + 2;
+		}
+
+		static value_type any(value_type result, uint32_t ch)
+		{
+			return (ch < 0x10000) ? low(result, ch) : high(result, ch);
+		}
+	};
+
+	struct utf32_counter
+	{
+		typedef size_t value_type;
+
+		static value_type low(value_type result, uint32_t)
+		{
+			return result + 1;
+		}
+
+		static value_type high(value_type result, uint32_t)
+		{
+			return result + 1;
+		}
+	};
+
+	struct utf32_writer
+	{
+		typedef uint32_t* value_type;
+
+		static value_type low(value_type result, uint32_t ch)
+		{
+			*result = ch;
+
+			return result + 1;
+		}
+
+		static value_type high(value_type result, uint32_t ch)
+		{
+			*result = ch;
+
+			return result + 1;
+		}
+
+		static value_type any(value_type result, uint32_t ch)
+		{
+			*result = ch;
+
+			return result + 1;
+		}
+	};
+
+	struct latin1_writer
+	{
+		typedef uint8_t* value_type;
+
+		static value_type low(value_type result, uint32_t ch)
+		{
+			*result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
+
+			return result + 1;
+		}
+
+		static value_type high(value_type result, uint32_t ch)
+		{
+			(void)ch;
+
+			*result = '?';
+
+			return result + 1;
+		}
+	};
+
+	template <size_t size> struct wchar_selector;
+
+	template <> struct wchar_selector<2>
+	{
+		typedef uint16_t type;
+		typedef utf16_counter counter;
+		typedef utf16_writer writer;
+	};
+
+	template <> struct wchar_selector<4>
+	{
+		typedef uint32_t type;
+		typedef utf32_counter counter;
+		typedef utf32_writer writer;
+	};
+
+	typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
+	typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
+
+	template <typename Traits, typename opt_swap = opt_false> struct utf_decoder
+	{
+		static inline typename Traits::value_type decode_utf8_block(const uint8_t* data, size_t size, typename Traits::value_type result)
+		{
+			const uint8_t utf8_byte_mask = 0x3f;
+
+			while (size)
+			{
+				uint8_t lead = *data;
+
+				// 0xxxxxxx -> U+0000..U+007F
+				if (lead < 0x80)
+				{
+					result = Traits::low(result, lead);
+					data += 1;
+					size -= 1;
+
+					// process aligned single-byte (ascii) blocks
+					if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
+					{
+						// round-trip through void* to silence 'cast increases required alignment of target type' warnings
+						while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0)
+						{
+							result = Traits::low(result, data[0]);
+							result = Traits::low(result, data[1]);
+							result = Traits::low(result, data[2]);
+							result = Traits::low(result, data[3]);
+							data += 4;
+							size -= 4;
+						}
+					}
+				}
+				// 110xxxxx -> U+0080..U+07FF
+				else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
+				{
+					result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
+					data += 2;
+					size -= 2;
+				}
+				// 1110xxxx -> U+0800-U+FFFF
+				else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
+				{
+					result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
+					data += 3;
+					size -= 3;
+				}
+				// 11110xxx -> U+10000..U+10FFFF
+				else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
+				{
+					result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
+					data += 4;
+					size -= 4;
+				}
+				// 10xxxxxx or 11111xxx -> invalid
+				else
+				{
+					data += 1;
+					size -= 1;
+				}
+			}
+
+			return result;
+		}
+
+		static inline typename Traits::value_type decode_utf16_block(const uint16_t* data, size_t size, typename Traits::value_type result)
+		{
+			const uint16_t* end = data + size;
+
+			while (data < end)
+			{
+				unsigned int lead = opt_swap::value ? endian_swap(*data) : *data;
+
+				// U+0000..U+D7FF
+				if (lead < 0xD800)
+				{
+					result = Traits::low(result, lead);
+					data += 1;
+				}
+				// U+E000..U+FFFF
+				else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)
+				{
+					result = Traits::low(result, lead);
+					data += 1;
+				}
+				// surrogate pair lead
+				else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && data + 1 < end)
+				{
+					uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
+
+					if (static_cast<unsigned int>(next - 0xDC00) < 0x400)
+					{
+						result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
+						data += 2;
+					}
+					else
+					{
+						data += 1;
+					}
+				}
+				else
+				{
+					data += 1;
+				}
+			}
+
+			return result;
+		}
+
+		static inline typename Traits::value_type decode_utf32_block(const uint32_t* data, size_t size, typename Traits::value_type result)
+		{
+			const uint32_t* end = data + size;
+
+			while (data < end)
+			{
+				uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
+
+				// U+0000..U+FFFF
+				if (lead < 0x10000)
+				{
+					result = Traits::low(result, lead);
+					data += 1;
+				}
+				// U+10000..U+10FFFF
+				else
+				{
+					result = Traits::high(result, lead);
+					data += 1;
+				}
+			}
+
+			return result;
+		}
+
+		static inline typename Traits::value_type decode_latin1_block(const uint8_t* data, size_t size, typename Traits::value_type result)
+		{
+			for (size_t i = 0; i < size; ++i)
+			{
+				result = Traits::low(result, data[i]);
+			}
+
+			return result;
+		}
+
+		static inline typename Traits::value_type decode_wchar_block_impl(const uint16_t* data, size_t size, typename Traits::value_type result)
+		{
+			return decode_utf16_block(data, size, result);
+		}
+
+		static inline typename Traits::value_type decode_wchar_block_impl(const uint32_t* data, size_t size, typename Traits::value_type result)
+		{
+			return decode_utf32_block(data, size, result);
+		}
+
+		static inline typename Traits::value_type decode_wchar_block(const wchar_t* data, size_t size, typename Traits::value_type result)
+		{
+			return decode_wchar_block_impl(reinterpret_cast<const wchar_selector<sizeof(wchar_t)>::type*>(data), size, result);
+		}
+	};
+
+	template <typename T> PUGI__FN void convert_utf_endian_swap(T* result, const T* data, size_t length)
+	{
+		for (size_t i = 0; i < length; ++i) result[i] = endian_swap(data[i]);
+	}
+
+#ifdef PUGIXML_WCHAR_MODE
+	PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
+	{
+		for (size_t i = 0; i < length; ++i) result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
+	}
+#endif
+PUGI__NS_END
+
+PUGI__NS_BEGIN
+	enum chartype_t
+	{
+		ct_parse_pcdata = 1,	// \0, &, \r, <
+		ct_parse_attr = 2,		// \0, &, \r, ', "
+		ct_parse_attr_ws = 4,	// \0, &, \r, ', ", \n, tab
+		ct_space = 8,			// \r, \n, space, tab
+		ct_parse_cdata = 16,	// \0, ], >, \r
+		ct_parse_comment = 32,	// \0, -, >, \r
+		ct_symbol = 64,			// Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
+		ct_start_symbol = 128	// Any symbol > 127, a-z, A-Z, _, :
+	};
+
+	static const unsigned char chartype_table[256] =
+	{
+		55,  0,   0,   0,   0,   0,   0,   0,      0,   12,  12,  0,   0,   63,  0,   0,   // 0-15
+		0,   0,   0,   0,   0,   0,   0,   0,      0,   0,   0,   0,   0,   0,   0,   0,   // 16-31
+		8,   0,   6,   0,   0,   0,   7,   6,      0,   0,   0,   0,   0,   96,  64,  0,   // 32-47
+		64,  64,  64,  64,  64,  64,  64,  64,     64,  64,  192, 0,   1,   0,   48,  0,   // 48-63
+		0,   192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 64-79
+		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 0,   0,   16,  0,   192, // 80-95
+		0,   192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 96-111
+		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 0, 0, 0, 0, 0,           // 112-127
+
+		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 128+
+		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
+		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
+		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
+		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
+		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
+		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
+		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192
+	};
+
+	enum chartypex_t
+	{
+		ctx_special_pcdata = 1,   // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
+		ctx_special_attr = 2,     // Any symbol >= 0 and < 32 (except \t), &, <, >, "
+		ctx_start_symbol = 4,	  // Any symbol > 127, a-z, A-Z, _
+		ctx_digit = 8,			  // 0-9
+		ctx_symbol = 16			  // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
+	};
+	
+	static const unsigned char chartypex_table[256] =
+	{
+		3,  3,  3,  3,  3,  3,  3,  3,     3,  0,  2,  3,  3,  2,  3,  3,     // 0-15
+		3,  3,  3,  3,  3,  3,  3,  3,     3,  3,  3,  3,  3,  3,  3,  3,     // 16-31
+		0,  0,  2,  0,  0,  0,  3,  0,     0,  0,  0,  0,  0, 16, 16,  0,     // 32-47
+		24, 24, 24, 24, 24, 24, 24, 24,    24, 24, 0,  0,  3,  0,  3,  0,     // 48-63
+
+		0,  20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 64-79
+		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 0,  0,  0,  0,  20,    // 80-95
+		0,  20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 96-111
+		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 0,  0,  0,  0,  0,     // 112-127
+
+		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 128+
+		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
+		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
+		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
+		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
+		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
+		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
+		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20
+	};
+	
+#ifdef PUGIXML_WCHAR_MODE
+	#define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
+#else
+	#define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
+#endif
+
+	#define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table)
+	#define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table)
+
+	PUGI__FN bool is_little_endian()
+	{
+		unsigned int ui = 1;
+
+		return *reinterpret_cast<unsigned char*>(&ui) == 1;
+	}
+
+	PUGI__FN xml_encoding get_wchar_encoding()
+	{
+		PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
+
+		if (sizeof(wchar_t) == 2)
+			return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+		else 
+			return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+	}
+
+	PUGI__FN xml_encoding guess_buffer_encoding(uint8_t d0, uint8_t d1, uint8_t d2, uint8_t d3)
+	{
+		// look for BOM in first few bytes
+		if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
+		if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
+		if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
+		if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
+		if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
+
+		// look for <, <? or <?xm in various encodings
+		if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
+		if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
+		if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
+		if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
+		if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d) return encoding_utf8;
+
+		// look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
+		if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
+		if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
+
+		// no known BOM detected, assume utf8
+		return encoding_utf8;
+	}
+
+	PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
+	{
+		// replace wchar encoding with utf implementation
+		if (encoding == encoding_wchar) return get_wchar_encoding();
+
+		// replace utf16 encoding with utf16 with specific endianness
+		if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+		// replace utf32 encoding with utf32 with specific endianness
+		if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+
+		// only do autodetection if no explicit encoding is requested
+		if (encoding != encoding_auto) return encoding;
+
+		// skip encoding autodetection if input buffer is too small
+		if (size < 4) return encoding_utf8;
+
+		// try to guess encoding (based on XML specification, Appendix F.1)
+		const uint8_t* data = static_cast<const uint8_t*>(contents);
+
+		PUGI__DMC_VOLATILE uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
+
+		return guess_buffer_encoding(d0, d1, d2, d3);
+	}
+
+	PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
+	{
+		size_t length = size / sizeof(char_t);
+
+		if (is_mutable)
+		{
+			out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
+			out_length = length;
+		}
+		else
+		{
+			char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+			if (!buffer) return false;
+
+			memcpy(buffer, contents, length * sizeof(char_t));
+			buffer[length] = 0;
+
+			out_buffer = buffer;
+			out_length = length + 1;
+		}
+
+		return true;
+	}
+
+#ifdef PUGIXML_WCHAR_MODE
+	PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
+	{
+		return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
+			   (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
+	}
+
+	PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
+	{
+		const char_t* data = static_cast<const char_t*>(contents);
+		size_t length = size / sizeof(char_t);
+
+		if (is_mutable)
+		{
+			char_t* buffer = const_cast<char_t*>(data);
+
+			convert_wchar_endian_swap(buffer, data, length);
+
+			out_buffer = buffer;
+			out_length = length;
+		}
+		else
+		{
+			char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+			if (!buffer) return false;
+
+			convert_wchar_endian_swap(buffer, data, length);
+			buffer[length] = 0;
+
+			out_buffer = buffer;
+			out_length = length + 1;
+		}
+
+		return true;
+	}
+
+	PUGI__FN bool convert_buffer_utf8(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size)
+	{
+		const uint8_t* data = static_cast<const uint8_t*>(contents);
+		size_t data_length = size;
+
+		// first pass: get length in wchar_t units
+		size_t length = utf_decoder<wchar_counter>::decode_utf8_block(data, data_length, 0);
+
+		// allocate buffer of suitable length
+		char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+		if (!buffer) return false;
+
+		// second pass: convert utf8 input to wchar_t
+		wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
+		wchar_writer::value_type oend = utf_decoder<wchar_writer>::decode_utf8_block(data, data_length, obegin);
+
+		assert(oend == obegin + length);
+		*oend = 0;
+
+		out_buffer = buffer;
+		out_length = length + 1;
+
+		return true;
+	}
+
+	template <typename opt_swap> PUGI__FN bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
+	{
+		const uint16_t* data = static_cast<const uint16_t*>(contents);
+		size_t data_length = size / sizeof(uint16_t);
+
+		// first pass: get length in wchar_t units
+		size_t length = utf_decoder<wchar_counter, opt_swap>::decode_utf16_block(data, data_length, 0);
+
+		// allocate buffer of suitable length
+		char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+		if (!buffer) return false;
+
+		// second pass: convert utf16 input to wchar_t
+		wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
+		wchar_writer::value_type oend = utf_decoder<wchar_writer, opt_swap>::decode_utf16_block(data, data_length, obegin);
+
+		assert(oend == obegin + length);
+		*oend = 0;
+
+		out_buffer = buffer;
+		out_length = length + 1;
+
+		return true;
+	}
+
+	template <typename opt_swap> PUGI__FN bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
+	{
+		const uint32_t* data = static_cast<const uint32_t*>(contents);
+		size_t data_length = size / sizeof(uint32_t);
+
+		// first pass: get length in wchar_t units
+		size_t length = utf_decoder<wchar_counter, opt_swap>::decode_utf32_block(data, data_length, 0);
+
+		// allocate buffer of suitable length
+		char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+		if (!buffer) return false;
+
+		// second pass: convert utf32 input to wchar_t
+		wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
+		wchar_writer::value_type oend = utf_decoder<wchar_writer, opt_swap>::decode_utf32_block(data, data_length, obegin);
+
+		assert(oend == obegin + length);
+		*oend = 0;
+
+		out_buffer = buffer;
+		out_length = length + 1;
+
+		return true;
+	}
+
+	PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size)
+	{
+		const uint8_t* data = static_cast<const uint8_t*>(contents);
+		size_t data_length = size;
+
+		// get length in wchar_t units
+		size_t length = data_length;
+
+		// allocate buffer of suitable length
+		char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+		if (!buffer) return false;
+
+		// convert latin1 input to wchar_t
+		wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
+		wchar_writer::value_type oend = utf_decoder<wchar_writer>::decode_latin1_block(data, data_length, obegin);
+
+		assert(oend == obegin + length);
+		*oend = 0;
+
+		out_buffer = buffer;
+		out_length = length + 1;
+
+		return true;
+	}
+
+	PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
+	{
+		// get native encoding
+		xml_encoding wchar_encoding = get_wchar_encoding();
+
+		// fast path: no conversion required
+		if (encoding == wchar_encoding) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
+
+		// only endian-swapping is required
+		if (need_endian_swap_utf(encoding, wchar_encoding)) return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
+
+		// source encoding is utf8
+		if (encoding == encoding_utf8) return convert_buffer_utf8(out_buffer, out_length, contents, size);
+
+		// source encoding is utf16
+		if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
+		{
+			xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+			return (native_encoding == encoding) ?
+				convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :
+				convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());
+		}
+
+		// source encoding is utf32
+		if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
+		{
+			xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+
+			return (native_encoding == encoding) ?
+				convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :
+				convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());
+		}
+
+		// source encoding is latin1
+		if (encoding == encoding_latin1) return convert_buffer_latin1(out_buffer, out_length, contents, size);
+
+		assert(!"Invalid encoding");
+		return false;
+	}
+#else
+	template <typename opt_swap> PUGI__FN bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
+	{
+		const uint16_t* data = static_cast<const uint16_t*>(contents);
+		size_t data_length = size / sizeof(uint16_t);
+
+		// first pass: get length in utf8 units
+		size_t length = utf_decoder<utf8_counter, opt_swap>::decode_utf16_block(data, data_length, 0);
+
+		// allocate buffer of suitable length
+		char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+		if (!buffer) return false;
+
+		// second pass: convert utf16 input to utf8
+		uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
+		uint8_t* oend = utf_decoder<utf8_writer, opt_swap>::decode_utf16_block(data, data_length, obegin);
+
+		assert(oend == obegin + length);
+		*oend = 0;
+
+		out_buffer = buffer;
+		out_length = length + 1;
+
+		return true;
+	}
+
+	template <typename opt_swap> PUGI__FN bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
+	{
+		const uint32_t* data = static_cast<const uint32_t*>(contents);
+		size_t data_length = size / sizeof(uint32_t);
+
+		// first pass: get length in utf8 units
+		size_t length = utf_decoder<utf8_counter, opt_swap>::decode_utf32_block(data, data_length, 0);
+
+		// allocate buffer of suitable length
+		char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+		if (!buffer) return false;
+
+		// second pass: convert utf32 input to utf8
+		uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
+		uint8_t* oend = utf_decoder<utf8_writer, opt_swap>::decode_utf32_block(data, data_length, obegin);
+
+		assert(oend == obegin + length);
+		*oend = 0;
+
+		out_buffer = buffer;
+		out_length = length + 1;
+
+		return true;
+	}
+
+	PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)
+	{
+		for (size_t i = 0; i < size; ++i)
+			if (data[i] > 127)
+				return i;
+
+		return size;
+	}
+
+	PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
+	{
+		const uint8_t* data = static_cast<const uint8_t*>(contents);
+		size_t data_length = size;
+
+		// get size of prefix that does not need utf8 conversion
+		size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length);
+		assert(prefix_length <= data_length);
+
+		const uint8_t* postfix = data + prefix_length;
+		size_t postfix_length = data_length - prefix_length;
+
+		// if no conversion is needed, just return the original buffer
+		if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
+
+		// first pass: get length in utf8 units
+		size_t length = prefix_length + utf_decoder<utf8_counter>::decode_latin1_block(postfix, postfix_length, 0);
+
+		// allocate buffer of suitable length
+		char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+		if (!buffer) return false;
+
+		// second pass: convert latin1 input to utf8
+		memcpy(buffer, data, prefix_length);
+
+		uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
+		uint8_t* oend = utf_decoder<utf8_writer>::decode_latin1_block(postfix, postfix_length, obegin + prefix_length);
+
+		assert(oend == obegin + length);
+		*oend = 0;
+
+		out_buffer = buffer;
+		out_length = length + 1;
+
+		return true;
+	}
+
+	PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
+	{
+		// fast path: no conversion required
+		if (encoding == encoding_utf8) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
+
+		// source encoding is utf16
+		if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
+		{
+			xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+			return (native_encoding == encoding) ?
+				convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :
+				convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());
+		}
+
+		// source encoding is utf32
+		if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
+		{
+			xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+
+			return (native_encoding == encoding) ?
+				convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :
+				convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());
+		}
+
+		// source encoding is latin1
+		if (encoding == encoding_latin1) return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
+
+		assert(!"Invalid encoding");
+		return false;
+	}
+#endif
+
+	PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)
+	{
+		// get length in utf8 characters
+		return utf_decoder<utf8_counter>::decode_wchar_block(str, length, 0);
+	}
+
+	PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
+	{
+		// convert to utf8
+		uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
+		uint8_t* end = utf_decoder<utf8_writer>::decode_wchar_block(str, length, begin);
+	
+		assert(begin + size == end);
+		(void)!end;
+
+		// zero-terminate
+		buffer[size] = 0;
+	}
+	
+#ifndef PUGIXML_NO_STL
+	PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)
+	{
+		// first pass: get length in utf8 characters
+		size_t size = as_utf8_begin(str, length);
+
+		// allocate resulting string
+		std::string result;
+		result.resize(size);
+
+		// second pass: convert to utf8
+		if (size > 0) as_utf8_end(&result[0], size, str, length);
+
+		return result;
+	}
+
+	PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size)
+	{
+		const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
+
+		// first pass: get length in wchar_t units
+		size_t length = utf_decoder<wchar_counter>::decode_utf8_block(data, size, 0);
+
+		// allocate resulting string
+		std::basic_string<wchar_t> result;
+		result.resize(length);
+
+		// second pass: convert to wchar_t
+		if (length > 0)
+		{
+			wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
+			wchar_writer::value_type end = utf_decoder<wchar_writer>::decode_utf8_block(data, size, begin);
+
+			assert(begin + length == end);
+			(void)!end;
+		}
+
+		return result;
+	}
+#endif
+
+	inline bool strcpy_insitu_allow(size_t length, uintptr_t allocated, char_t* target)
+	{
+		assert(target);
+		size_t target_length = strlength(target);
+
+		// always reuse document buffer memory if possible
+		if (!allocated) return target_length >= length;
+
+		// reuse heap memory if waste is not too great
+		const size_t reuse_threshold = 32;
+
+		return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
+	}
+
+	PUGI__FN bool strcpy_insitu(char_t*& dest, uintptr_t& header, uintptr_t header_mask, const char_t* source)
+	{
+		assert(header);
+
+		size_t source_length = strlength(source);
+
+		if (source_length == 0)
+		{
+			// empty string and null pointer are equivalent, so just deallocate old memory
+			xml_allocator* alloc = reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)->allocator;
+
+			if (header & header_mask) alloc->deallocate_string(dest);
+			
+			// mark the string as not allocated
+			dest = 0;
+			header &= ~header_mask;
+
+			return true;
+		}
+		else if (dest && strcpy_insitu_allow(source_length, header & header_mask, dest))
+		{
+			// we can reuse old buffer, so just copy the new data (including zero terminator)
+			memcpy(dest, source, (source_length + 1) * sizeof(char_t));
+			
+			return true;
+		}
+		else
+		{
+			xml_allocator* alloc = reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)->allocator;
+
+			// allocate new buffer
+			char_t* buf = alloc->allocate_string(source_length + 1);
+			if (!buf) return false;
+
+			// copy the string (including zero terminator)
+			memcpy(buf, source, (source_length + 1) * sizeof(char_t));
+
+			// deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
+			if (header & header_mask) alloc->deallocate_string(dest);
+			
+			// the string is now allocated, so set the flag
+			dest = buf;
+			header |= header_mask;
+
+			return true;
+		}
+	}
+
+	struct gap
+	{
+		char_t* end;
+		size_t size;
+			
+		gap(): end(0), size(0)
+		{
+		}
+			
+		// Push new gap, move s count bytes further (skipping the gap).
+		// Collapse previous gap.
+		void push(char_t*& s, size_t count)
+		{
+			if (end) // there was a gap already; collapse it
+			{
+				// Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
+				assert(s >= end);
+				memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
+			}
+				
+			s += count; // end of current gap
+				
+			// "merge" two gaps
+			end = s;
+			size += count;
+		}
+			
+		// Collapse all gaps, return past-the-end pointer
+		char_t* flush(char_t* s)
+		{
+			if (end)
+			{
+				// Move [old_gap_end, current_pos) to [old_gap_start, ...)
+				assert(s >= end);
+				memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
+
+				return s - size;
+			}
+			else return s;
+		}
+	};
+	
+	PUGI__FN char_t* strconv_escape(char_t* s, gap& g)
+	{
+		char_t* stre = s + 1;
+
+		switch (*stre)
+		{
+			case '#':	// &#...
+			{
+				unsigned int ucsc = 0;
+
+				if (stre[1] == 'x') // &#x... (hex code)
+				{
+					stre += 2;
+
+					char_t ch = *stre;
+
+					if (ch == ';') return stre;
+
+					for (;;)
+					{
+						if (static_cast<unsigned int>(ch - '0') <= 9)
+							ucsc = 16 * ucsc + (ch - '0');
+						else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
+							ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
+						else if (ch == ';')
+							break;
+						else // cancel
+							return stre;
+
+						ch = *++stre;
+					}
+					
+					++stre;
+				}
+				else	// &#... (dec code)
+				{
+					char_t ch = *++stre;
+
+					if (ch == ';') return stre;
+
+					for (;;)
+					{
+						if (static_cast<unsigned int>(static_cast<unsigned int>(ch) - '0') <= 9)
+							ucsc = 10 * ucsc + (ch - '0');
+						else if (ch == ';')
+							break;
+						else // cancel
+							return stre;
+
+						ch = *++stre;
+					}
+					
+					++stre;
+				}
+
+			#ifdef PUGIXML_WCHAR_MODE
+				s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
+			#else
+				s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
+			#endif
+					
+				g.push(s, stre - s);
+				return stre;
+			}
+
+			case 'a':	// &a
+			{
+				++stre;
+
+				if (*stre == 'm') // &am
+				{
+					if (*++stre == 'p' && *++stre == ';') // &
+					{
+						*s++ = '&';
+						++stre;
+							
+						g.push(s, stre - s);
+						return stre;
+					}
+				}
+				else if (*stre == 'p') // &ap
+				{
+					if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // '
+					{
+						*s++ = '\'';
+						++stre;
+
+						g.push(s, stre - s);
+						return stre;
+					}
+				}
+				break;
+			}
+
+			case 'g': // &g
+			{
+				if (*++stre == 't' && *++stre == ';') // >
+				{
+					*s++ = '>';
+					++stre;
+					
+					g.push(s, stre - s);
+					return stre;
+				}
+				break;
+			}
+
+			case 'l': // &l
+			{
+				if (*++stre == 't' && *++stre == ';') // <
+				{
+					*s++ = '<';
+					++stre;
+						
+					g.push(s, stre - s);
+					return stre;
+				}
+				break;
+			}
+
+			case 'q': // &q
+			{
+				if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // "
+				{
+					*s++ = '"';
+					++stre;
+					
+					g.push(s, stre - s);
+					return stre;
+				}
+				break;
+			}
+
+			default:
+				break;
+		}
+		
+		return stre;
+	}
+
+	// Utility macro for last character handling
+	#define ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e)))
+
+	PUGI__FN char_t* strconv_comment(char_t* s, char_t endch)
+	{
+		gap g;
+		
+		while (true)
+		{
+			while (!PUGI__IS_CHARTYPE(*s, ct_parse_comment)) ++s;
+		
+			if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
+			{
+				*s++ = '\n'; // replace first one with 0x0a
+				
+				if (*s == '\n') g.push(s, 1);
+			}
+			else if (s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>')) // comment ends here
+			{
+				*g.flush(s) = 0;
+				
+				return s + (s[2] == '>' ? 3 : 2);
+			}
+			else if (*s == 0)
+			{
+				return 0;
+			}
+			else ++s;
+		}
+	}
+
+	PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch)
+	{
+		gap g;
+			
+		while (true)
+		{
+			while (!PUGI__IS_CHARTYPE(*s, ct_parse_cdata)) ++s;
+			
+			if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
+			{
+				*s++ = '\n'; // replace first one with 0x0a
+				
+				if (*s == '\n') g.push(s, 1);
+			}
+			else if (s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>')) // CDATA ends here
+			{
+				*g.flush(s) = 0;
+				
+				return s + 1;
+			}
+			else if (*s == 0)
+			{
+				return 0;
+			}
+			else ++s;
+		}
+	}
+	
+	typedef char_t* (*strconv_pcdata_t)(char_t*);
+		
+	template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl
+	{
+		static char_t* parse(char_t* s)
+		{
+			gap g;
+
+			char_t* begin = s;
+
+			while (true)
+			{
+				while (!PUGI__IS_CHARTYPE(*s, ct_parse_pcdata)) ++s;
+					
+				if (*s == '<') // PCDATA ends here
+				{
+					char_t* end = g.flush(s);
+
+					if (opt_trim::value)
+						while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
+							--end;
+
+					*end = 0;
+					
+					return s + 1;
+				}
+				else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
+				{
+					*s++ = '\n'; // replace first one with 0x0a
+					
+					if (*s == '\n') g.push(s, 1);
+				}
+				else if (opt_escape::value && *s == '&')
+				{
+					s = strconv_escape(s, g);
+				}
+				else if (*s == 0)
+				{
+					char_t* end = g.flush(s);
+
+					if (opt_trim::value)
+						while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
+							--end;
+
+					*end = 0;
+
+					return s;
+				}
+				else ++s;
+			}
+		}
+	};
+	
+	PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
+	{
+		PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800);
+
+		switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (eol escapes trim)
+		{
+		case 0: return strconv_pcdata_impl<opt_false, opt_false, opt_false>::parse;
+		case 1: return strconv_pcdata_impl<opt_false, opt_false, opt_true>::parse;
+		case 2: return strconv_pcdata_impl<opt_false, opt_true, opt_false>::parse;
+		case 3: return strconv_pcdata_impl<opt_false, opt_true, opt_true>::parse;
+		case 4: return strconv_pcdata_impl<opt_true, opt_false, opt_false>::parse;
+		case 5: return strconv_pcdata_impl<opt_true, opt_false, opt_true>::parse;
+		case 6: return strconv_pcdata_impl<opt_true, opt_true, opt_false>::parse;
+		case 7: return strconv_pcdata_impl<opt_true, opt_true, opt_true>::parse;
+		default: assert(false); return 0; // should not get here
+		}
+	}
+
+	typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
+	
+	template <typename opt_escape> struct strconv_attribute_impl
+	{
+		static char_t* parse_wnorm(char_t* s, char_t end_quote)
+		{
+			gap g;
+
+			// trim leading whitespaces
+			if (PUGI__IS_CHARTYPE(*s, ct_space))
+			{
+				char_t* str = s;
+				
+				do ++str;
+				while (PUGI__IS_CHARTYPE(*str, ct_space));
+				
+				g.push(s, str - s);
+			}
+
+			while (true)
+			{
+				while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr_ws | ct_space)) ++s;
+				
+				if (*s == end_quote)
+				{
+					char_t* str = g.flush(s);
+					
+					do *str-- = 0;
+					while (PUGI__IS_CHARTYPE(*str, ct_space));
+				
+					return s + 1;
+				}
+				else if (PUGI__IS_CHARTYPE(*s, ct_space))
+				{
+					*s++ = ' ';
+		
+					if (PUGI__IS_CHARTYPE(*s, ct_space))
+					{
+						char_t* str = s + 1;
+						while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
+						
+						g.push(s, str - s);
+					}
+				}
+				else if (opt_escape::value && *s == '&')
+				{
+					s = strconv_escape(s, g);
+				}
+				else if (!*s)
+				{
+					return 0;
+				}
+				else ++s;
+			}
+		}
+
+		static char_t* parse_wconv(char_t* s, char_t end_quote)
+		{
+			gap g;
+
+			while (true)
+			{
+				while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr_ws)) ++s;
+				
+				if (*s == end_quote)
+				{
+					*g.flush(s) = 0;
+				
+					return s + 1;
+				}
+				else if (PUGI__IS_CHARTYPE(*s, ct_space))
+				{
+					if (*s == '\r')
+					{
+						*s++ = ' ';
+				
+						if (*s == '\n') g.push(s, 1);
+					}
+					else *s++ = ' ';
+				}
+				else if (opt_escape::value && *s == '&')
+				{
+					s = strconv_escape(s, g);
+				}
+				else if (!*s)
+				{
+					return 0;
+				}
+				else ++s;
+			}
+		}
+
+		static char_t* parse_eol(char_t* s, char_t end_quote)
+		{
+			gap g;
+
+			while (true)
+			{
+				while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr)) ++s;
+				
+				if (*s == end_quote)
+				{
+					*g.flush(s) = 0;
+				
+					return s + 1;
+				}
+				else if (*s == '\r')
+				{
+					*s++ = '\n';
+					
+					if (*s == '\n') g.push(s, 1);
+				}
+				else if (opt_escape::value && *s == '&')
+				{
+					s = strconv_escape(s, g);
+				}
+				else if (!*s)
+				{
+					return 0;
+				}
+				else ++s;
+			}
+		}
+
+		static char_t* parse_simple(char_t* s, char_t end_quote)
+		{
+			gap g;
+
+			while (true)
+			{
+				while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr)) ++s;
+				
+				if (*s == end_quote)
+				{
+					*g.flush(s) = 0;
+				
+					return s + 1;
+				}
+				else if (opt_escape::value && *s == '&')
+				{
+					s = strconv_escape(s, g);
+				}
+				else if (!*s)
+				{
+					return 0;
+				}
+				else ++s;
+			}
+		}
+	};
+
+	PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)
+	{
+		PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
+		
+		switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes)
+		{
+		case 0:  return strconv_attribute_impl<opt_false>::parse_simple;
+		case 1:  return strconv_attribute_impl<opt_true>::parse_simple;
+		case 2:  return strconv_attribute_impl<opt_false>::parse_eol;
+		case 3:  return strconv_attribute_impl<opt_true>::parse_eol;
+		case 4:  return strconv_attribute_impl<opt_false>::parse_wconv;
+		case 5:  return strconv_attribute_impl<opt_true>::parse_wconv;
+		case 6:  return strconv_attribute_impl<opt_false>::parse_wconv;
+		case 7:  return strconv_attribute_impl<opt_true>::parse_wconv;
+		case 8:  return strconv_attribute_impl<opt_false>::parse_wnorm;
+		case 9:  return strconv_attribute_impl<opt_true>::parse_wnorm;
+		case 10: return strconv_attribute_impl<opt_false>::parse_wnorm;
+		case 11: return strconv_attribute_impl<opt_true>::parse_wnorm;
+		case 12: return strconv_attribute_impl<opt_false>::parse_wnorm;
+		case 13: return strconv_attribute_impl<opt_true>::parse_wnorm;
+		case 14: return strconv_attribute_impl<opt_false>::parse_wnorm;
+		case 15: return strconv_attribute_impl<opt_true>::parse_wnorm;
+		default: assert(false); return 0; // should not get here
+		}
+	}
+
+	inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
+	{
+		xml_parse_result result;
+		result.status = status;
+		result.offset = offset;
+
+		return result;
+	}
+
+	struct xml_parser
+	{
+		xml_allocator alloc;
+		char_t* error_offset;
+		xml_parse_status error_status;
+		
+		// Parser utilities.
+		#define PUGI__SKIPWS()			{ while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
+		#define PUGI__OPTSET(OPT)			( optmsk & (OPT) )
+		#define PUGI__PUSHNODE(TYPE)		{ cursor = append_node(cursor, alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }
+		#define PUGI__POPNODE()			{ cursor = cursor->parent; }
+		#define PUGI__SCANFOR(X)			{ while (*s != 0 && !(X)) ++s; }
+		#define PUGI__SCANWHILE(X)		{ while ((X)) ++s; }
+		#define PUGI__ENDSEG()			{ ch = *s; *s = 0; ++s; }
+		#define PUGI__THROW_ERROR(err, m)	return error_offset = m, error_status = err, static_cast<char_t*>(0)
+		#define PUGI__CHECK_ERROR(err, m)	{ if (*s == 0) PUGI__THROW_ERROR(err, m); }
+		
+		xml_parser(const xml_allocator& alloc_): alloc(alloc_), error_offset(0), error_status(status_ok)
+		{
+		}
+
+		// DOCTYPE consists of nested sections of the following possible types:
+		// <!-- ... -->, <? ... ?>, "...", '...'
+		// <![...]]>
+		// <!...>
+		// First group can not contain nested groups
+		// Second group can contain nested groups of the same type
+		// Third group can contain all other groups
+		char_t* parse_doctype_primitive(char_t* s)
+		{
+			if (*s == '"' || *s == '\'')
+			{
+				// quoted string
+				char_t ch = *s++;
+				PUGI__SCANFOR(*s == ch);
+				if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
+
+				s++;
+			}
+			else if (s[0] == '<' && s[1] == '?')
+			{
+				// <? ... ?>
+				s += 2;
+				PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
+				if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
+
+				s += 2;
+			}
+			else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
+			{
+				s += 4;
+				PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
+				if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
+
+				s += 4;
+			}
+			else PUGI__THROW_ERROR(status_bad_doctype, s);
+
+			return s;
+		}
+
+		char_t* parse_doctype_ignore(char_t* s)
+		{
+			assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
+			s++;
+
+			while (*s)
+			{
+				if (s[0] == '<' && s[1] == '!' && s[2] == '[')
+				{
+					// nested ignore section
+					s = parse_doctype_ignore(s);
+					if (!s) return s;
+				}
+				else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
+				{
+					// ignore section end
+					s += 3;
+
+					return s;
+				}
+				else s++;
+			}
+
+			PUGI__THROW_ERROR(status_bad_doctype, s);
+		}
+
+		char_t* parse_doctype_group(char_t* s, char_t endch, bool toplevel)
+		{
+			assert((s[0] == '<' || s[0] == 0) && s[1] == '!');
+			s++;
+
+			while (*s)
+			{
+				if (s[0] == '<' && s[1] == '!' && s[2] != '-')
+				{
+					if (s[2] == '[')
+					{
+						// ignore
+						s = parse_doctype_ignore(s);
+						if (!s) return s;
+					}
+					else
+					{
+						// some control group
+						s = parse_doctype_group(s, endch, false);
+						if (!s) return s;
+
+						// skip >
+						assert(*s == '>');
+						s++;
+					}
+				}
+				else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
+				{
+					// unknown tag (forbidden), or some primitive group
+					s = parse_doctype_primitive(s);
+					if (!s) return s;
+				}
+				else if (*s == '>')
+				{
+					return s;
+				}
+				else s++;
+			}
+
+			if (!toplevel || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
+
+			return s;
+		}
+
+		char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch)
+		{
+			// parse node contents, starting with exclamation mark
+			++s;
+
+			if (*s == '-') // '<!-...'
+			{
+				++s;
+
+				if (*s == '-') // '<!--...'
+				{
+					++s;
+
+					if (PUGI__OPTSET(parse_comments))
+					{
+						PUGI__PUSHNODE(node_comment); // Append a new node on the tree.
+						cursor->value = s; // Save the offset.
+					}
+
+					if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments))
+					{
+						s = strconv_comment(s, endch);
+
+						if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
+					}
+					else
+					{
+						// Scan for terminating '-->'.
+						PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>'));
+						PUGI__CHECK_ERROR(status_bad_comment, s);
+
+						if (PUGI__OPTSET(parse_comments))
+							*s = 0; // Zero-terminate this segment at the first terminating '-'.
+
+						s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
+					}
+				}
+				else PUGI__THROW_ERROR(status_bad_comment, s);
+			}
+			else if (*s == '[')
+			{
+				// '<![CDATA[...'
+				if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
+				{
+					++s;
+
+					if (PUGI__OPTSET(parse_cdata))
+					{
+						PUGI__PUSHNODE(node_cdata); // Append a new node on the tree.
+						cursor->value = s; // Save the offset.
+
+						if (PUGI__OPTSET(parse_eol))
+						{
+							s = strconv_cdata(s, endch);
+
+							if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
+						}
+						else
+						{
+							// Scan for terminating ']]>'.
+							PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>'));
+							PUGI__CHECK_ERROR(status_bad_cdata, s);
+
+							*s++ = 0; // Zero-terminate this segment.
+						}
+					}
+					else // Flagged for discard, but we still have to scan for the terminator.
+					{
+						// Scan for terminating ']]>'.
+						PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>'));
+						PUGI__CHECK_ERROR(status_bad_cdata, s);
+
+						++s;
+					}
+
+					s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
+				}
+				else PUGI__THROW_ERROR(status_bad_cdata, s);
+			}
+			else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && ENDSWITH(s[6], 'E'))
+			{
+				s -= 2;
+
+				if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
+
+				char_t* mark = s + 9;
+
+				s = parse_doctype_group(s, endch, true);
+				if (!s) return s;
+
+				assert((*s == 0 && endch == '>') || *s == '>');
+				if (*s) *s++ = 0;
+
+				if (PUGI__OPTSET(parse_doctype))
+				{
+					while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
+
+					PUGI__PUSHNODE(node_doctype);
+
+					cursor->value = mark;
+
+					PUGI__POPNODE();
+				}
+			}
+			else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
+			else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);
+			else PUGI__THROW_ERROR(status_unrecognized_tag, s);
+
+			return s;
+		}
+
+		char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)
+		{
+			// load into registers
+			xml_node_struct* cursor = ref_cursor;
+			char_t ch = 0;
+
+			// parse node contents, starting with question mark
+			++s;
+
+			// read PI target
+			char_t* target = s;
+
+			if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s);
+
+			PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol));
+			PUGI__CHECK_ERROR(status_bad_pi, s);
+
+			// determine node type; stricmp / strcasecmp is not portable
+			bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
+
+			if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi))
+			{
+				if (declaration)
+				{
+					// disallow non top-level declarations
+					if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
+
+					PUGI__PUSHNODE(node_declaration);
+				}
+				else
+				{
+					PUGI__PUSHNODE(node_pi);
+				}
+
+				cursor->name = target;
+
+				PUGI__ENDSEG();
+
+				// parse value/attributes
+				if (ch == '?')
+				{
+					// empty node
+					if (!ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
+					s += (*s == '>');
+
+					PUGI__POPNODE();
+				}
+				else if (PUGI__IS_CHARTYPE(ch, ct_space))
+				{
+					PUGI__SKIPWS();
+
+					// scan for tag end
+					char_t* value = s;
+
+					PUGI__SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));
+					PUGI__CHECK_ERROR(status_bad_pi, s);
+
+					if (declaration)
+					{
+						// replace ending ? with / so that 'element' terminates properly
+						*s = '/';
+
+						// we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
+						s = value;
+					}
+					else
+					{
+						// store value and step over >
+						cursor->value = value;
+						PUGI__POPNODE();
+
+						PUGI__ENDSEG();
+
+						s += (*s == '>');
+					}
+				}
+				else PUGI__THROW_ERROR(status_bad_pi, s);
+			}
+			else
+			{
+				// scan for tag end
+				PUGI__SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));
+				PUGI__CHECK_ERROR(status_bad_pi, s);
+
+				s += (s[1] == '>' ? 2 : 1);
+			}
+
+			// store from registers
+			ref_cursor = cursor;
+
+			return s;
+		}
+
+		char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch)
+		{
+			strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
+			strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
+			
+			char_t ch = 0;
+			xml_node_struct* cursor = root;
+			char_t* mark = s;
+
+			while (*s != 0)
+			{
+				if (*s == '<')
+				{
+					++s;
+
+				LOC_TAG:
+					if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
+					{
+						PUGI__PUSHNODE(node_element); // Append a new node to the tree.
+
+						cursor->name = s;
+
+						PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator.
+						PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
+
+						if (ch == '>')
+						{
+							// end of tag
+						}
+						else if (PUGI__IS_CHARTYPE(ch, ct_space))
+						{
+						LOC_ATTRIBUTES:
+							while (true)
+							{
+								PUGI__SKIPWS(); // Eat any whitespace.
+						
+								if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
+								{
+									xml_attribute_struct* a = append_attribute_ll(cursor, alloc); // Make space for this attribute.
+									if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);
+
+									a->name = s; // Save the offset.
+
+									PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator.
+									PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
+
+									PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
+									PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
+
+									if (PUGI__IS_CHARTYPE(ch, ct_space))
+									{
+										PUGI__SKIPWS(); // Eat any whitespace.
+										PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
+
+										ch = *s;
+										++s;
+									}
+									
+									if (ch == '=') // '<... #=...'
+									{
+										PUGI__SKIPWS(); // Eat any whitespace.
+
+										if (*s == '"' || *s == '\'') // '<... #="...'
+										{
+											ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
+											++s; // Step over the quote.
+											a->value = s; // Save the offset.
+
+											s = strconv_attribute(s, ch);
+										
+											if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
+
+											// After this line the loop continues from the start;
+											// Whitespaces, / and > are ok, symbols and EOF are wrong,
+											// everything else will be detected
+											if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s);
+										}
+										else PUGI__THROW_ERROR(status_bad_attribute, s);
+									}
+									else PUGI__THROW_ERROR(status_bad_attribute, s);
+								}
+								else if (*s == '/')
+								{
+									++s;
+									
+									if (*s == '>')
+									{
+										PUGI__POPNODE();
+										s++;
+										break;
+									}
+									else if (*s == 0 && endch == '>')
+									{
+										PUGI__POPNODE();
+										break;
+									}
+									else PUGI__THROW_ERROR(status_bad_start_element, s);
+								}
+								else if (*s == '>')
+								{
+									++s;
+
+									break;
+								}
+								else if (*s == 0 && endch == '>')
+								{
+									break;
+								}
+								else PUGI__THROW_ERROR(status_bad_start_element, s);
+							}
+
+							// !!!
+						}
+						else if (ch == '/') // '<#.../'
+						{
+							if (!ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s);
+
+							PUGI__POPNODE(); // Pop.
+
+							s += (*s == '>');
+						}
+						else if (ch == 0)
+						{
+							// we stepped over null terminator, backtrack & handle closing tag
+							--s;
+							
+							if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
+						}
+						else PUGI__THROW_ERROR(status_bad_start_element, s);
+					}
+					else if (*s == '/')
+					{
+						++s;
+
+						char_t* name = cursor->name;
+						if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, s);
+						
+						while (PUGI__IS_CHARTYPE(*s, ct_symbol))
+						{
+							if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, s);
+						}
+
+						if (*name)
+						{
+							if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
+							else PUGI__THROW_ERROR(status_end_element_mismatch, s);
+						}
+							
+						PUGI__POPNODE(); // Pop.
+
+						PUGI__SKIPWS();
+
+						if (*s == 0)
+						{
+							if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
+						}
+						else
+						{
+							if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
+							++s;
+						}
+					}
+					else if (*s == '?') // '<?...'
+					{
+						s = parse_question(s, cursor, optmsk, endch);
+						if (!s) return s;
+
+						assert(cursor);
+						if ((cursor->header & xml_memory_page_type_mask) + 1 == node_declaration) goto LOC_ATTRIBUTES;
+					}
+					else if (*s == '!') // '<!...'
+					{
+						s = parse_exclamation(s, cursor, optmsk, endch);
+						if (!s) return s;
+					}
+					else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s);
+					else PUGI__THROW_ERROR(status_unrecognized_tag, s);
+				}
+				else
+				{
+					mark = s; // Save this offset while searching for a terminator.
+
+					PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
+
+					if (*s == '<' || !*s)
+					{
+						// We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
+						assert(mark != s);
+
+						if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI__OPTSET(parse_trim_pcdata))
+						{
+							continue;
+						}
+						else if (PUGI__OPTSET(parse_ws_pcdata_single))
+						{
+							if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue;
+						}
+					}
+
+					if (!PUGI__OPTSET(parse_trim_pcdata))
+						s = mark;
+							
+					if (cursor->parent || PUGI__OPTSET(parse_fragment))
+					{
+						PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
+						cursor->value = s; // Save the offset.
+
+						s = strconv_pcdata(s);
+								
+						PUGI__POPNODE(); // Pop since this is a standalone.
+						
+						if (!*s) break;
+					}
+					else
+					{
+						PUGI__SCANFOR(*s == '<'); // '...<'
+						if (!*s) break;
+						
+						++s;
+					}
+
+					// We're after '<'
+					goto LOC_TAG;
+				}
+			}
+
+			// check that last tag is closed
+			if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s);
+
+			return s;
+		}
+
+	#ifdef PUGIXML_WCHAR_MODE
+		static char_t* parse_skip_bom(char_t* s)
+		{
+			unsigned int bom = 0xfeff;
+			return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s;
+		}
+	#else
+		static char_t* parse_skip_bom(char_t* s)
+		{
+			return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s;
+		}
+	#endif
+
+		static bool has_element_node_siblings(xml_node_struct* node)
+		{
+			while (node)
+			{
+				xml_node_type type = static_cast<xml_node_type>((node->header & impl::xml_memory_page_type_mask) + 1);
+				if (type == node_element) return true;
+
+				node = node->next_sibling;
+			}
+
+			return false;
+		}
+
+		static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk)
+		{
+			// allocator object is a part of document object
+			xml_allocator& alloc = *static_cast<xml_allocator*>(xmldoc);
+
+			// early-out for empty documents
+			if (length == 0)
+				return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element);
+
+			// get last child of the root before parsing
+			xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c : 0;
+	
+			// create parser on stack
+			xml_parser parser(alloc);
+
+			// save last character and make buffer zero-terminated (speeds up parsing)
+			char_t endch = buffer[length - 1];
+			buffer[length - 1] = 0;
+			
+			// skip BOM to make sure it does not end up as part of parse output
+			char_t* buffer_data = parse_skip_bom(buffer);
+
+			// perform actual parsing
+			parser.parse_tree(buffer_data, root, optmsk, endch);
+
+			// update allocator state
+			alloc = parser.alloc;
+
+			xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
+			assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
+
+			if (result)
+			{
+				// since we removed last character, we have to handle the only possible false positive (stray <)
+				if (endch == '<')
+					return make_parse_result(status_unrecognized_tag, length - 1);
+
+				// check if there are any element nodes parsed
+				xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling : root->first_child;
+
+				if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed))
+					return make_parse_result(status_no_document_element, length - 1);
+			}
+			else
+			{
+				// roll back offset if it occurs on a null terminator in the source buffer
+				if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0)
+					result.offset--;
+			}
+
+			return result;
+		}
+	};
+
+	// Output facilities
+	PUGI__FN xml_encoding get_write_native_encoding()
+	{
+	#ifdef PUGIXML_WCHAR_MODE
+		return get_wchar_encoding();
+	#else
+		return encoding_utf8;
+	#endif
+	}
+
+	PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding)
+	{
+		// replace wchar encoding with utf implementation
+		if (encoding == encoding_wchar) return get_wchar_encoding();
+
+		// replace utf16 encoding with utf16 with specific endianness
+		if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+		// replace utf32 encoding with utf32 with specific endianness
+		if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+
+		// only do autodetection if no explicit encoding is requested
+		if (encoding != encoding_auto) return encoding;
+
+		// assume utf8 encoding
+		return encoding_utf8;
+	}
+
+#ifdef PUGIXML_WCHAR_MODE
+	PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
+	{
+		assert(length > 0);
+
+		// discard last character if it's the lead of a surrogate pair 
+		return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
+	}
+
+	PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
+	{
+		// only endian-swapping is required
+		if (need_endian_swap_utf(encoding, get_wchar_encoding()))
+		{
+			convert_wchar_endian_swap(r_char, data, length);
+
+			return length * sizeof(char_t);
+		}
+	
+		// convert to utf8
+		if (encoding == encoding_utf8)
+		{
+			uint8_t* dest = r_u8;
+			uint8_t* end = utf_decoder<utf8_writer>::decode_wchar_block(data, length, dest);
+
+			return static_cast<size_t>(end - dest);
+		}
+
+		// convert to utf16
+		if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
+		{
+			uint16_t* dest = r_u16;
+
+			// convert to native utf16
+			uint16_t* end = utf_decoder<utf16_writer>::decode_wchar_block(data, length, dest);
+
+			// swap if necessary
+			xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+			if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
+
+			return static_cast<size_t>(end - dest) * sizeof(uint16_t);
+		}
+
+		// convert to utf32
+		if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
+		{
+			uint32_t* dest = r_u32;
+
+			// convert to native utf32
+			uint32_t* end = utf_decoder<utf32_writer>::decode_wchar_block(data, length, dest);
+
+			// swap if necessary
+			xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+
+			if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
+
+			return static_cast<size_t>(end - dest) * sizeof(uint32_t);
+		}
+
+		// convert to latin1
+		if (encoding == encoding_latin1)
+		{
+			uint8_t* dest = r_u8;
+			uint8_t* end = utf_decoder<latin1_writer>::decode_wchar_block(data, length, dest);
+
+			return static_cast<size_t>(end - dest);
+		}
+
+		assert(!"Invalid encoding");
+		return 0;
+	}
+#else
+	PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
+	{
+		assert(length > 4);
+
+		for (size_t i = 1; i <= 4; ++i)
+		{
+			uint8_t ch = static_cast<uint8_t>(data[length - i]);
+
+			// either a standalone character or a leading one
+			if ((ch & 0xc0) != 0x80) return length - i;
+		}
+
+		// there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
+		return length;
+	}
+
+	PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
+	{
+		if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
+		{
+			uint16_t* dest = r_u16;
+
+			// convert to native utf16
+			uint16_t* end = utf_decoder<utf16_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
+
+			// swap if necessary
+			xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+			if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
+
+			return static_cast<size_t>(end - dest) * sizeof(uint16_t);
+		}
+
+		if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
+		{
+			uint32_t* dest = r_u32;
+
+			// convert to native utf32
+			uint32_t* end = utf_decoder<utf32_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
+
+			// swap if necessary
+			xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+
+			if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
+
+			return static_cast<size_t>(end - dest) * sizeof(uint32_t);
+		}
+
+		if (encoding == encoding_latin1)
+		{
+			uint8_t* dest = r_u8;
+			uint8_t* end = utf_decoder<latin1_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
+
+			return static_cast<size_t>(end - dest);
+		}
+
+		assert(!"Invalid encoding");
+		return 0;
+	}
+#endif
+
+	class xml_buffered_writer
+	{
+		xml_buffered_writer(const xml_buffered_writer&);
+		xml_buffered_writer& operator=(const xml_buffered_writer&);
+
+	public:
+		xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding))
+		{
+			PUGI__STATIC_ASSERT(bufcapacity >= 8);
+		}
+
+		~xml_buffered_writer()
+		{
+			flush();
+		}
+
+		void flush()
+		{
+			flush(buffer, bufsize);
+			bufsize = 0;
+		}
+
+		void flush(const char_t* data, size_t size)
+		{
+			if (size == 0) return;
+
+			// fast path, just write data
+			if (encoding == get_write_native_encoding())
+				writer.write(data, size * sizeof(char_t));
+			else
+			{
+				// convert chunk
+				size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
+				assert(result <= sizeof(scratch));
+
+				// write data
+				writer.write(scratch.data_u8, result);
+			}
+		}
+
+		void write(const char_t* data, size_t length)
+		{
+			if (bufsize + length > bufcapacity)
+			{
+				// flush the remaining buffer contents
+				flush();
+
+				// handle large chunks
+				if (length > bufcapacity)
+				{
+					if (encoding == get_write_native_encoding())
+					{
+						// fast path, can just write data chunk
+						writer.write(data, length * sizeof(char_t));
+						return;
+					}
+
+					// need to convert in suitable chunks
+					while (length > bufcapacity)
+					{
+						// get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
+						// and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
+						size_t chunk_size = get_valid_length(data, bufcapacity);
+
+						// convert chunk and write
+						flush(data, chunk_size);
+
+						// iterate
+						data += chunk_size;
+						length -= chunk_size;
+					}
+
+					// small tail is copied below
+					bufsize = 0;
+				}
+			}
+
+			memcpy(buffer + bufsize, data, length * sizeof(char_t));
+			bufsize += length;
+		}
+
+		void write(const char_t* data)
+		{
+			write(data, strlength(data));
+		}
+
+		void write(char_t d0)
+		{
+			if (bufsize + 1 > bufcapacity) flush();
+
+			buffer[bufsize + 0] = d0;
+			bufsize += 1;
+		}
+
+		void write(char_t d0, char_t d1)
+		{
+			if (bufsize + 2 > bufcapacity) flush();
+
+			buffer[bufsize + 0] = d0;
+			buffer[bufsize + 1] = d1;
+			bufsize += 2;
+		}
+
+		void write(char_t d0, char_t d1, char_t d2)
+		{
+			if (bufsize + 3 > bufcapacity) flush();
+
+			buffer[bufsize + 0] = d0;
+			buffer[bufsize + 1] = d1;
+			buffer[bufsize + 2] = d2;
+			bufsize += 3;
+		}
+
+		void write(char_t d0, char_t d1, char_t d2, char_t d3)
+		{
+			if (bufsize + 4 > bufcapacity) flush();
+
+			buffer[bufsize + 0] = d0;
+			buffer[bufsize + 1] = d1;
+			buffer[bufsize + 2] = d2;
+			buffer[bufsize + 3] = d3;
+			bufsize += 4;
+		}
+
+		void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
+		{
+			if (bufsize + 5 > bufcapacity) flush();
+
+			buffer[bufsize + 0] = d0;
+			buffer[bufsize + 1] = d1;
+			buffer[bufsize + 2] = d2;
+			buffer[bufsize + 3] = d3;
+			buffer[bufsize + 4] = d4;
+			bufsize += 5;
+		}
+
+		void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
+		{
+			if (bufsize + 6 > bufcapacity) flush();
+
+			buffer[bufsize + 0] = d0;
+			buffer[bufsize + 1] = d1;
+			buffer[bufsize + 2] = d2;
+			buffer[bufsize + 3] = d3;
+			buffer[bufsize + 4] = d4;
+			buffer[bufsize + 5] = d5;
+			bufsize += 6;
+		}
+
+		// utf8 maximum expansion: x4 (-> utf32)
+		// utf16 maximum expansion: x2 (-> utf32)
+		// utf32 maximum expansion: x1
+		enum
+		{
+			bufcapacitybytes =
+			#ifdef PUGIXML_MEMORY_OUTPUT_STACK
+				PUGIXML_MEMORY_OUTPUT_STACK
+			#else
+				10240
+			#endif
+			,
+			bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
+		};
+
+		char_t buffer[bufcapacity];
+
+		union
+		{
+			uint8_t data_u8[4 * bufcapacity];
+			uint16_t data_u16[2 * bufcapacity];
+			uint32_t data_u32[bufcapacity];
+			char_t data_char[bufcapacity];
+		} scratch;
+
+		xml_writer& writer;
+		size_t bufsize;
+		xml_encoding encoding;
+	};
+
+	PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type)
+	{
+		while (*s)
+		{
+			const char_t* prev = s;
+			
+			// While *s is a usual symbol
+			while (!PUGI__IS_CHARTYPEX(*s, type)) ++s;
+		
+			writer.write(prev, static_cast<size_t>(s - prev));
+
+			switch (*s)
+			{
+				case 0: break;
+				case '&':
+					writer.write('&', 'a', 'm', 'p', ';');
+					++s;
+					break;
+				case '<':
+					writer.write('&', 'l', 't', ';');
+					++s;
+					break;
+				case '>':
+					writer.write('&', 'g', 't', ';');
+					++s;
+					break;
+				case '"':
+					writer.write('&', 'q', 'u', 'o', 't', ';');
+					++s;
+					break;
+				default: // s is not a usual symbol
+				{
+					unsigned int ch = static_cast<unsigned int>(*s++);
+					assert(ch < 32);
+
+					writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
+				}
+			}
+		}
+	}
+
+	PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
+	{
+		if (flags & format_no_escapes)
+			writer.write(s);
+		else
+			text_output_escaped(writer, s, type);
+	}
+
+	PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
+	{
+		do
+		{
+			writer.write('<', '!', '[', 'C', 'D');
+			writer.write('A', 'T', 'A', '[');
+
+			const char_t* prev = s;
+
+			// look for ]]> sequence - we can't output it as is since it terminates CDATA
+			while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
+
+			// skip ]] if we stopped at ]]>, > will go to the next CDATA section
+			if (*s) s += 2;
+
+			writer.write(prev, static_cast<size_t>(s - prev));
+
+			writer.write(']', ']', '>');
+		}
+		while (*s);
+	}
+
+	PUGI__FN void node_output_attributes(xml_buffered_writer& writer, const xml_node& node, unsigned int flags)
+	{
+		const char_t* default_name = PUGIXML_TEXT(":anonymous");
+
+		for (xml_attribute a = node.first_attribute(); a; a = a.next_attribute())
+		{
+			writer.write(' ');
+			writer.write(a.name()[0] ? a.name() : default_name);
+			writer.write('=', '"');
+
+			text_output(writer, a.value(), ctx_special_attr, flags);
+
+			writer.write('"');
+		}
+	}
+
+	PUGI__FN void node_output(xml_buffered_writer& writer, const xml_node& node, const char_t* indent, unsigned int flags, unsigned int depth)
+	{
+		const char_t* default_name = PUGIXML_TEXT(":anonymous");
+
+		if ((flags & format_indent) != 0 && (flags & format_raw) == 0)
+			for (unsigned int i = 0; i < depth; ++i) writer.write(indent);
+
+		switch (node.type())
+		{
+		case node_document:
+		{
+			for (xml_node n = node.first_child(); n; n = n.next_sibling())
+				node_output(writer, n, indent, flags, depth);
+			break;
+		}
+			
+		case node_element:
+		{
+			const char_t* name = node.name()[0] ? node.name() : default_name;
+
+			writer.write('<');
+			writer.write(name);
+
+			node_output_attributes(writer, node, flags);
+
+			if (flags & format_raw)
+			{
+				if (!node.first_child())
+					writer.write(' ', '/', '>');
+				else
+				{
+					writer.write('>');
+
+					for (xml_node n = node.first_child(); n; n = n.next_sibling())
+						node_output(writer, n, indent, flags, depth + 1);
+
+					writer.write('<', '/');
+					writer.write(name);
+					writer.write('>');
+				}
+			}
+			else if (!node.first_child())
+				writer.write(' ', '/', '>', '\n');
+			else if (node.first_child() == node.last_child() && (node.first_child().type() == node_pcdata || node.first_child().type() == node_cdata))
+			{
+				writer.write('>');
+
+				if (node.first_child().type() == node_pcdata)
+					text_output(writer, node.first_child().value(), ctx_special_pcdata, flags);
+				else
+					text_output_cdata(writer, node.first_child().value());
+
+				writer.write('<', '/');
+				writer.write(name);
+				writer.write('>', '\n');
+			}
+			else
+			{
+				writer.write('>', '\n');
+				
+				for (xml_node n = node.first_child(); n; n = n.next_sibling())
+					node_output(writer, n, indent, flags, depth + 1);
+
+				if ((flags & format_indent) != 0 && (flags & format_raw) == 0)
+					for (unsigned int i = 0; i < depth; ++i) writer.write(indent);
+				
+				writer.write('<', '/');
+				writer.write(name);
+				writer.write('>', '\n');
+			}
+
+			break;
+		}
+		
+		case node_pcdata:
+			text_output(writer, node.value(), ctx_special_pcdata, flags);
+			if ((flags & format_raw) == 0) writer.write('\n');
+			break;
+
+		case node_cdata:
+			text_output_cdata(writer, node.value());
+			if ((flags & format_raw) == 0) writer.write('\n');
+			break;
+
+		case node_comment:
+			writer.write('<', '!', '-', '-');
+			writer.write(node.value());
+			writer.write('-', '-', '>');
+			if ((flags & format_raw) == 0) writer.write('\n');
+			break;
+
+		case node_pi:
+		case node_declaration:
+			writer.write('<', '?');
+			writer.write(node.name()[0] ? node.name() : default_name);
+
+			if (node.type() == node_declaration)
+			{
+				node_output_attributes(writer, node, flags);
+			}
+			else if (node.value()[0])
+			{
+				writer.write(' ');
+				writer.write(node.value());
+			}
+
+			writer.write('?', '>');
+			if ((flags & format_raw) == 0) writer.write('\n');
+			break;
+
+		case node_doctype:
+			writer.write('<', '!', 'D', 'O', 'C');
+			writer.write('T', 'Y', 'P', 'E');
+
+			if (node.value()[0])
+			{
+				writer.write(' ');
+				writer.write(node.value());
+			}
+
+			writer.write('>');
+			if ((flags & format_raw) == 0) writer.write('\n');
+			break;
+
+		default:
+			assert(!"Invalid node type");
+		}
+	}
+
+	inline bool has_declaration(const xml_node& node)
+	{
+		for (xml_node child = node.first_child(); child; child = child.next_sibling())
+		{
+			xml_node_type type = child.type();
+
+			if (type == node_declaration) return true;
+			if (type == node_element) return false;
+		}
+
+		return false;
+	}
+
+	inline bool allow_insert_child(xml_node_type parent, xml_node_type child)
+	{
+		if (parent != node_document && parent != node_element) return false;
+		if (child == node_document || child == node_null) return false;
+		if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
+
+		return true;
+	}
+
+	PUGI__FN void recursive_copy_skip(xml_node& dest, const xml_node& source, const xml_node& skip)
+	{
+		assert(dest.type() == source.type());
+
+		switch (source.type())
+		{
+		case node_element:
+		{
+			dest.set_name(source.name());
+
+			for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute())
+				dest.append_attribute(a.name()).set_value(a.value());
+
+			for (xml_node c = source.first_child(); c; c = c.next_sibling())
+			{
+				if (c == skip) continue;
+
+				xml_node cc = dest.append_child(c.type());
+				assert(cc);
+
+				recursive_copy_skip(cc, c, skip);
+			}
+
+			break;
+		}
+
+		case node_pcdata:
+		case node_cdata:
+		case node_comment:
+		case node_doctype:
+			dest.set_value(source.value());
+			break;
+
+		case node_pi:
+			dest.set_name(source.name());
+			dest.set_value(source.value());
+			break;
+
+		case node_declaration:
+		{
+			dest.set_name(source.name());
+
+			for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute())
+				dest.append_attribute(a.name()).set_value(a.value());
+
+			break;
+		}
+
+		default:
+			assert(!"Invalid node type");
+		}
+	}
+
+	inline bool is_text_node(xml_node_struct* node)
+	{
+		xml_node_type type = static_cast<xml_node_type>((node->header & impl::xml_memory_page_type_mask) + 1);
+
+		return type == node_pcdata || type == node_cdata;
+	}
+
+	// get value with conversion functions
+	PUGI__FN int get_integer_base(const char_t* value)
+	{
+		const char_t* s = value;
+
+		while (PUGI__IS_CHARTYPE(*s, ct_space))
+			s++;
+
+		if (*s == '-')
+			s++;
+
+		return (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) ? 16 : 10;
+	}
+
+	PUGI__FN int get_value_int(const char_t* value, int def)
+	{
+		if (!value) return def;
+
+		int base = get_integer_base(value);
+
+	#ifdef PUGIXML_WCHAR_MODE
+		return static_cast<int>(wcstol(value, 0, base));
+	#else
+		return static_cast<int>(strtol(value, 0, base));
+	#endif
+	}
+
+	PUGI__FN unsigned int get_value_uint(const char_t* value, unsigned int def)
+	{
+		if (!value) return def;
+
+		int base = get_integer_base(value);
+
+	#ifdef PUGIXML_WCHAR_MODE
+		return static_cast<unsigned int>(wcstoul(value, 0, base));
+	#else
+		return static_cast<unsigned int>(strtoul(value, 0, base));
+	#endif
+	}
+
+	PUGI__FN double get_value_double(const char_t* value, double def)
+	{
+		if (!value) return def;
+
+	#ifdef PUGIXML_WCHAR_MODE
+		return wcstod(value, 0);
+	#else
+		return strtod(value, 0);
+	#endif
+	}
+
+	PUGI__FN float get_value_float(const char_t* value, float def)
+	{
+		if (!value) return def;
+
+	#ifdef PUGIXML_WCHAR_MODE
+		return static_cast<float>(wcstod(value, 0));
+	#else
+		return static_cast<float>(strtod(value, 0));
+	#endif
+	}
+
+	PUGI__FN bool get_value_bool(const char_t* value, bool def)
+	{
+		if (!value) return def;
+
+		// only look at first char
+		char_t first = *value;
+
+		// 1*, t* (true), T* (True), y* (yes), Y* (YES)
+		return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
+	}
+
+#ifdef PUGIXML_HAS_LONG_LONG
+	PUGI__FN long long get_value_llong(const char_t* value, long long def)
+	{
+		if (!value) return def;
+
+		int base = get_integer_base(value);
+
+	#ifdef PUGIXML_WCHAR_MODE
+		#ifdef PUGI__MSVC_CRT_VERSION
+			return _wcstoi64(value, 0, base);
+		#else
+			return wcstoll(value, 0, base);
+		#endif
+	#else
+		#ifdef PUGI__MSVC_CRT_VERSION
+			return _strtoi64(value, 0, base);
+		#else
+			return strtoll(value, 0, base);
+		#endif
+	#endif
+	}
+
+	PUGI__FN unsigned long long get_value_ullong(const char_t* value, unsigned long long def)
+	{
+		if (!value) return def;
+
+		int base = get_integer_base(value);
+
+	#ifdef PUGIXML_WCHAR_MODE
+		#ifdef PUGI__MSVC_CRT_VERSION
+			return _wcstoui64(value, 0, base);
+		#else
+			return wcstoull(value, 0, base);
+		#endif
+	#else
+		#ifdef PUGI__MSVC_CRT_VERSION
+			return _strtoui64(value, 0, base);
+		#else
+			return strtoull(value, 0, base);
+		#endif
+	#endif
+	}
+#endif
+
+	// set value with conversion functions
+	PUGI__FN bool set_value_buffer(char_t*& dest, uintptr_t& header, uintptr_t header_mask, char (&buf)[128])
+	{
+	#ifdef PUGIXML_WCHAR_MODE
+		char_t wbuf[128];
+		impl::widen_ascii(wbuf, buf);
+
+		return strcpy_insitu(dest, header, header_mask, wbuf);
+	#else
+		return strcpy_insitu(dest, header, header_mask, buf);
+	#endif
+	}
+
+	PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, int value)
+	{
+		char buf[128];
+		sprintf(buf, "%d", value);
+	
+		return set_value_buffer(dest, header, header_mask, buf);
+	}
+
+	PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, unsigned int value)
+	{
+		char buf[128];
+		sprintf(buf, "%u", value);
+
+		return set_value_buffer(dest, header, header_mask, buf);
+	}
+
+	PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, double value)
+	{
+		char buf[128];
+		sprintf(buf, "%g", value);
+
+		return set_value_buffer(dest, header, header_mask, buf);
+	}
+	
+	PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, bool value)
+	{
+		return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
+	}
+
+#ifdef PUGIXML_HAS_LONG_LONG
+	PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, long long value)
+	{
+		char buf[128];
+		sprintf(buf, "%lld", value);
+	
+		return set_value_buffer(dest, header, header_mask, buf);
+	}
+
+	PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, unsigned long long value)
+	{
+		char buf[128];
+		sprintf(buf, "%llu", value);
+	
+		return set_value_buffer(dest, header, header_mask, buf);
+	}
+#endif
+
+	// we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
+	PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
+	{
+	#if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
+		// there are 64-bit versions of fseek/ftell, let's use them
+		typedef __int64 length_type;
+
+		_fseeki64(file, 0, SEEK_END);
+		length_type length = _ftelli64(file);
+		_fseeki64(file, 0, SEEK_SET);
+	#elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && !defined(__STRICT_ANSI__)
+		// there are 64-bit versions of fseek/ftell, let's use them
+		typedef off64_t length_type;
+
+		fseeko64(file, 0, SEEK_END);
+		length_type length = ftello64(file);
+		fseeko64(file, 0, SEEK_SET);
+	#else
+		// if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
+		typedef long length_type;
+
+		fseek(file, 0, SEEK_END);
+		length_type length = ftell(file);
+		fseek(file, 0, SEEK_SET);
+	#endif
+
+		// check for I/O errors
+		if (length < 0) return status_io_error;
+		
+		// check for overflow
+		size_t result = static_cast<size_t>(length);
+
+		if (static_cast<length_type>(result) != length) return status_out_of_memory;
+
+		// finalize
+		out_result = result;
+
+		return status_ok;
+	}
+
+	PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding) 
+	{
+		// We only need to zero-terminate if encoding conversion does not do it for us
+	#ifdef PUGIXML_WCHAR_MODE
+		xml_encoding wchar_encoding = get_wchar_encoding();
+
+		if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding))
+		{
+			size_t length = size / sizeof(char_t);
+
+			static_cast<char_t*>(buffer)[length] = 0;
+			return (length + 1) * sizeof(char_t);
+		}
+	#else
+		if (encoding == encoding_utf8)
+		{
+			static_cast<char*>(buffer)[size] = 0;
+			return size + 1;
+		}
+	#endif
+
+		return size;
+	}
+
+	PUGI__FN xml_parse_result load_file_impl(xml_document& doc, FILE* file, unsigned int options, xml_encoding encoding)
+	{
+		if (!file) return make_parse_result(status_file_not_found);
+
+		// get file size (can result in I/O errors)
+		size_t size = 0;
+		xml_parse_status size_status = get_file_size(file, size);
+
+		if (size_status != status_ok)
+		{
+			fclose(file);
+			return make_parse_result(size_status);
+		}
+		
+		size_t max_suffix_size = sizeof(char_t);
+
+		// allocate buffer for the whole file
+		char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size));
+
+		if (!contents)
+		{
+			fclose(file);
+			return make_parse_result(status_out_of_memory);
+		}
+
+		// read file in memory
+		size_t read_size = fread(contents, 1, size, file);
+		fclose(file);
+
+		if (read_size != size)
+		{
+			xml_memory::deallocate(contents);
+			return make_parse_result(status_io_error);
+		}
+
+		xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size);
+		
+		return doc.load_buffer_inplace_own(contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding);
+	}
+
+#ifndef PUGIXML_NO_STL
+	template <typename T> struct xml_stream_chunk
+	{
+		static xml_stream_chunk* create()
+		{
+			void* memory = xml_memory::allocate(sizeof(xml_stream_chunk));
+			
+			return new (memory) xml_stream_chunk();
+		}
+
+		static void destroy(void* ptr)
+		{
+			xml_stream_chunk* chunk = static_cast<xml_stream_chunk*>(ptr);
+
+			// free chunk chain
+			while (chunk)
+			{
+				xml_stream_chunk* next = chunk->next;
+				xml_memory::deallocate(chunk);
+				chunk = next;
+			}
+		}
+
+		xml_stream_chunk(): next(0), size(0)
+		{
+		}
+
+		xml_stream_chunk* next;
+		size_t size;
+
+		T data[xml_memory_page_size / sizeof(T)];
+	};
+
+	template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
+	{
+		buffer_holder chunks(0, xml_stream_chunk<T>::destroy);
+
+		// read file to a chunk list
+		size_t total = 0;
+		xml_stream_chunk<T>* last = 0;
+
+		while (!stream.eof())
+		{
+			// allocate new chunk
+			xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create();
+			if (!chunk) return status_out_of_memory;
+
+			// append chunk to list
+			if (last) last = last->next = chunk;
+			else chunks.data = last = chunk;
+
+			// read data to chunk
+			stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
+			chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
+
+			// read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors
+			if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
+
+			// guard against huge files (chunk size is small enough to make this overflow check work)
+			if (total + chunk->size < total) return status_out_of_memory;
+			total += chunk->size;
+		}
+
+		size_t max_suffix_size = sizeof(char_t);
+
+		// copy chunk list to a contiguous buffer
+		char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size));
+		if (!buffer) return status_out_of_memory;
+
+		char* write = buffer;
+
+		for (xml_stream_chunk<T>* chunk = static_cast<xml_stream_chunk<T>*>(chunks.data); chunk; chunk = chunk->next)
+		{
+			assert(write + chunk->size <= buffer + total);
+			memcpy(write, chunk->data, chunk->size);
+			write += chunk->size;
+		}
+
+		assert(write == buffer + total);
+
+		// return buffer
+		*out_buffer = buffer;
+		*out_size = total;
+
+		return status_ok;
+	}
+
+	template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
+	{
+		// get length of remaining data in stream
+		typename std::basic_istream<T>::pos_type pos = stream.tellg();
+		stream.seekg(0, std::ios::end);
+		std::streamoff length = stream.tellg() - pos;
+		stream.seekg(pos);
+
+		if (stream.fail() || pos < 0) return status_io_error;
+
+		// guard against huge files
+		size_t read_length = static_cast<size_t>(length);
+
+		if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory;
+
+		size_t max_suffix_size = sizeof(char_t);
+
+		// read stream data into memory (guard against stream exceptions with buffer holder)
+		buffer_holder buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate);
+		if (!buffer.data) return status_out_of_memory;
+
+		stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
+
+		// read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
+		if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
+
+		// return buffer
+		size_t actual_length = static_cast<size_t>(stream.gcount());
+		assert(actual_length <= read_length);
+		
+		*out_buffer = buffer.release();
+		*out_size = actual_length * sizeof(T);
+
+		return status_ok;
+	}
+
+	template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document& doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding)
+	{
+		void* buffer = 0;
+		size_t size = 0;
+		xml_parse_status status = status_ok;
+
+		// if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits)
+		if (stream.fail()) return make_parse_result(status_io_error);
+
+		// load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)
+		if (stream.tellg() < 0)
+		{
+			stream.clear(); // clear error flags that could be set by a failing tellg
+			status = load_stream_data_noseek(stream, &buffer, &size);
+		}
+		else
+			status = load_stream_data_seek(stream, &buffer, &size);
+
+		if (status != status_ok) return make_parse_result(status);
+
+		xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size);
+		
+		return doc.load_buffer_inplace_own(buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding);
+	}
+#endif
+
+#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && !defined(__STRICT_ANSI__))
+	PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
+	{
+		return _wfopen(path, mode);
+	}
+#else
+	PUGI__FN char* convert_path_heap(const wchar_t* str)
+	{
+		assert(str);
+
+		// first pass: get length in utf8 characters
+		size_t length = strlength_wide(str);
+		size_t size = as_utf8_begin(str, length);
+
+		// allocate resulting string
+		char* result = static_cast<char*>(xml_memory::allocate(size + 1));
+		if (!result) return 0;
+
+		// second pass: convert to utf8
+		as_utf8_end(result, size, str, length);
+
+		return result;
+	}
+
+	PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
+	{
+		// there is no standard function to open wide paths, so our best bet is to try utf8 path
+		char* path_utf8 = convert_path_heap(path);
+		if (!path_utf8) return 0;
+
+		// convert mode to ASCII (we mirror _wfopen interface)
+		char mode_ascii[4] = {0};
+		for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
+
+		// try to open the utf8 path
+		FILE* result = fopen(path_utf8, mode_ascii);
+
+		// free dummy buffer
+		xml_memory::deallocate(path_utf8);
+
+		return result;
+	}
+#endif
+
+	PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding)
+	{
+		if (!file) return false;
+
+		xml_writer_file writer(file);
+		doc.save(writer, indent, flags, encoding);
+
+		int result = ferror(file);
+
+		fclose(file);
+
+		return result == 0;
+	}
+
+	PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer)
+	{
+		// check input buffer
+		assert(contents || size == 0);
+
+		// get actual encoding
+		xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);
+
+		// get private buffer
+		char_t* buffer = 0;
+		size_t length = 0;
+
+		if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);
+		
+		// delete original buffer if we performed a conversion
+		if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);
+
+		// store buffer for offset_debug
+		doc->buffer = buffer;
+
+		// parse
+		xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options);
+
+		// remember encoding
+		res.encoding = buffer_encoding;
+
+		// grab onto buffer if it's our buffer, user is responsible for deallocating contents himself
+		if (own || buffer != contents) *out_buffer = buffer;
+
+		return res;
+	}
+PUGI__NS_END
+
+namespace pugi
+{
+	PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_)
+	{
+	}
+
+	PUGI__FN void xml_writer_file::write(const void* data, size_t size)
+	{
+		size_t result = fwrite(data, 1, size, static_cast<FILE*>(file));
+		(void)!result; // unfortunately we can't do proper error handling here
+	}
+
+#ifndef PUGIXML_NO_STL
+	PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
+	{
+	}
+
+	PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
+	{
+	}
+
+	PUGI__FN void xml_writer_stream::write(const void* data, size_t size)
+	{
+		if (narrow_stream)
+		{
+			assert(!wide_stream);
+			narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
+		}
+		else
+		{
+			assert(wide_stream);
+			assert(size % sizeof(wchar_t) == 0);
+
+			wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
+		}
+	}
+#endif
+
+	PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0)
+	{
+	}
+	
+	PUGI__FN xml_tree_walker::~xml_tree_walker()
+	{
+	}
+
+	PUGI__FN int xml_tree_walker::depth() const
+	{
+		return _depth;
+	}
+
+	PUGI__FN bool xml_tree_walker::begin(xml_node&)
+	{
+		return true;
+	}
+
+	PUGI__FN bool xml_tree_walker::end(xml_node&)
+	{
+		return true;
+	}
+
+	PUGI__FN xml_attribute::xml_attribute(): _attr(0)
+	{
+	}
+
+	PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr)
+	{
+	}
+
+	PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***)
+	{
+	}
+
+	PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const
+	{
+		return _attr ? unspecified_bool_xml_attribute : 0;
+	}
+
+	PUGI__FN bool xml_attribute::operator!() const
+	{
+		return !_attr;
+	}
+
+	PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const
+	{
+		return (_attr == r._attr);
+	}
+	
+	PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const
+	{
+		return (_attr != r._attr);
+	}
+
+	PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const
+	{
+		return (_attr < r._attr);
+	}
+	
+	PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const
+	{
+		return (_attr > r._attr);
+	}
+	
+	PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const
+	{
+		return (_attr <= r._attr);
+	}
+	
+	PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const
+	{
+		return (_attr >= r._attr);
+	}
+
+	PUGI__FN xml_attribute xml_attribute::next_attribute() const
+	{
+		return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute();
+	}
+
+	PUGI__FN xml_attribute xml_attribute::previous_attribute() const
+	{
+		return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute();
+	}
+
+	PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const
+	{
+		return (_attr && _attr->value) ? _attr->value : def;
+	}
+
+	PUGI__FN int xml_attribute::as_int(int def) const
+	{
+		return impl::get_value_int(_attr ? _attr->value : 0, def);
+	}
+
+	PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const
+	{
+		return impl::get_value_uint(_attr ? _attr->value : 0, def);
+	}
+
+	PUGI__FN double xml_attribute::as_double(double def) const
+	{
+		return impl::get_value_double(_attr ? _attr->value : 0, def);
+	}
+
+	PUGI__FN float xml_attribute::as_float(float def) const
+	{
+		return impl::get_value_float(_attr ? _attr->value : 0, def);
+	}
+
+	PUGI__FN bool xml_attribute::as_bool(bool def) const
+	{
+		return impl::get_value_bool(_attr ? _attr->value : 0, def);
+	}
+
+#ifdef PUGIXML_HAS_LONG_LONG
+	PUGI__FN long long xml_attribute::as_llong(long long def) const
+	{
+		return impl::get_value_llong(_attr ? _attr->value : 0, def);
+	}
+
+	PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const
+	{
+		return impl::get_value_ullong(_attr ? _attr->value : 0, def);
+	}
+#endif
+
+	PUGI__FN bool xml_attribute::empty() const
+	{
+		return !_attr;
+	}
+
+	PUGI__FN const char_t* xml_attribute::name() const
+	{
+		return (_attr && _attr->name) ? _attr->name : PUGIXML_TEXT("");
+	}
+
+	PUGI__FN const char_t* xml_attribute::value() const
+	{
+		return (_attr && _attr->value) ? _attr->value : PUGIXML_TEXT("");
+	}
+
+	PUGI__FN size_t xml_attribute::hash_value() const
+	{
+		return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));
+	}
+
+	PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const
+	{
+		return _attr;
+	}
+
+	PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs)
+	{
+		set_value(rhs);
+		return *this;
+	}
+	
+	PUGI__FN xml_attribute& xml_attribute::operator=(int rhs)
+	{
+		set_value(rhs);
+		return *this;
+	}
+
+	PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs)
+	{
+		set_value(rhs);
+		return *this;
+	}
+
+	PUGI__FN xml_attribute& xml_attribute::operator=(double rhs)
+	{
+		set_value(rhs);
+		return *this;
+	}
+	
+	PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs)
+	{
+		set_value(rhs);
+		return *this;
+	}
+
+#ifdef PUGIXML_HAS_LONG_LONG
+	PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs)
+	{
+		set_value(rhs);
+		return *this;
+	}
+
+	PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs)
+	{
+		set_value(rhs);
+		return *this;
+	}
+#endif
+
+	PUGI__FN bool xml_attribute::set_name(const char_t* rhs)
+	{
+		if (!_attr) return false;
+		
+		return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs);
+	}
+		
+	PUGI__FN bool xml_attribute::set_value(const char_t* rhs)
+	{
+		if (!_attr) return false;
+
+		return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
+	}
+
+	PUGI__FN bool xml_attribute::set_value(int rhs)
+	{
+		if (!_attr) return false;
+
+		return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
+	}
+
+	PUGI__FN bool xml_attribute::set_value(unsigned int rhs)
+	{
+		if (!_attr) return false;
+
+		return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
+	}
+
+	PUGI__FN bool xml_attribute::set_value(double rhs)
+	{
+		if (!_attr) return false;
+
+		return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
+	}
+	
+	PUGI__FN bool xml_attribute::set_value(bool rhs)
+	{
+		if (!_attr) return false;
+
+		return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
+	}
+
+#ifdef PUGIXML_HAS_LONG_LONG
+	PUGI__FN bool xml_attribute::set_value(long long rhs)
+	{
+		if (!_attr) return false;
+
+		return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
+	}
+
+	PUGI__FN bool xml_attribute::set_value(unsigned long long rhs)
+	{
+		if (!_attr) return false;
+
+		return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
+	}
+#endif
+
+#ifdef __BORLANDC__
+	PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs)
+	{
+		return (bool)lhs && rhs;
+	}
+
+	PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs)
+	{
+		return (bool)lhs || rhs;
+	}
+#endif
+
+	PUGI__FN xml_node::xml_node(): _root(0)
+	{
+	}
+
+	PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p)
+	{
+	}
+	
+	PUGI__FN static void unspecified_bool_xml_node(xml_node***)
+	{
+	}
+
+	PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const
+	{
+		return _root ? unspecified_bool_xml_node : 0;
+	}
+
+	PUGI__FN bool xml_node::operator!() const
+	{
+		return !_root;
+	}
+
+	PUGI__FN xml_node::iterator xml_node::begin() const
+	{
+		return iterator(_root ? _root->first_child : 0, _root);
+	}
+
+	PUGI__FN xml_node::iterator xml_node::end() const
+	{
+		return iterator(0, _root);
+	}
+	
+	PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const
+	{
+		return attribute_iterator(_root ? _root->first_attribute : 0, _root);
+	}
+
+	PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const
+	{
+		return attribute_iterator(0, _root);
+	}
+	
+	PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const
+	{
+		return xml_object_range<xml_node_iterator>(begin(), end());
+	}
+
+	PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const
+	{
+		return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_));
+	}
+
+	PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const
+	{
+		return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end());
+	}
+
+	PUGI__FN bool xml_node::operator==(const xml_node& r) const
+	{
+		return (_root == r._root);
+	}
+
+	PUGI__FN bool xml_node::operator!=(const xml_node& r) const
+	{
+		return (_root != r._root);
+	}
+
+	PUGI__FN bool xml_node::operator<(const xml_node& r) const
+	{
+		return (_root < r._root);
+	}
+	
+	PUGI__FN bool xml_node::operator>(const xml_node& r) const
+	{
+		return (_root > r._root);
+	}
+	
+	PUGI__FN bool xml_node::operator<=(const xml_node& r) const
+	{
+		return (_root <= r._root);
+	}
+	
+	PUGI__FN bool xml_node::operator>=(const xml_node& r) const
+	{
+		return (_root >= r._root);
+	}
+
+	PUGI__FN bool xml_node::empty() const
+	{
+		return !_root;
+	}
+	
+	PUGI__FN const char_t* xml_node::name() const
+	{
+		return (_root && _root->name) ? _root->name : PUGIXML_TEXT("");
+	}
+
+	PUGI__FN xml_node_type xml_node::type() const
+	{
+		return _root ? static_cast<xml_node_type>((_root->header & impl::xml_memory_page_type_mask) + 1) : node_null;
+	}
+	
+	PUGI__FN const char_t* xml_node::value() const
+	{
+		return (_root && _root->value) ? _root->value : PUGIXML_TEXT("");
+	}
+	
+	PUGI__FN xml_node xml_node::child(const char_t* name_) const
+	{
+		if (!_root) return xml_node();
+
+		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
+			if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
+
+		return xml_node();
+	}
+
+	PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const
+	{
+		if (!_root) return xml_attribute();
+
+		for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute)
+			if (i->name && impl::strequal(name_, i->name))
+				return xml_attribute(i);
+		
+		return xml_attribute();
+	}
+	
+	PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const
+	{
+		if (!_root) return xml_node();
+		
+		for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
+			if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
+
+		return xml_node();
+	}
+
+	PUGI__FN xml_node xml_node::next_sibling() const
+	{
+		if (!_root) return xml_node();
+		
+		if (_root->next_sibling) return xml_node(_root->next_sibling);
+		else return xml_node();
+	}
+
+	PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const
+	{
+		if (!_root) return xml_node();
+		
+		for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c)
+			if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
+
+		return xml_node();
+	}
+
+	PUGI__FN xml_node xml_node::previous_sibling() const
+	{
+		if (!_root) return xml_node();
+		
+		if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c);
+		else return xml_node();
+	}
+
+	PUGI__FN xml_node xml_node::parent() const
+	{
+		return _root ? xml_node(_root->parent) : xml_node();
+	}
+
+	PUGI__FN xml_node xml_node::root() const
+	{
+		if (!_root) return xml_node();
+
+		impl::xml_memory_page* page = reinterpret_cast<impl::xml_memory_page*>(_root->header & impl::xml_memory_page_pointer_mask);
+
+		return xml_node(static_cast<impl::xml_document_struct*>(page->allocator));
+	}
+
+	PUGI__FN xml_text xml_node::text() const
+	{
+		return xml_text(_root);
+	}
+
+	PUGI__FN const char_t* xml_node::child_value() const
+	{
+		if (!_root) return PUGIXML_TEXT("");
+		
+		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
+			if (i->value && impl::is_text_node(i))
+				return i->value;
+
+		return PUGIXML_TEXT("");
+	}
+
+	PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const
+	{
+		return child(name_).child_value();
+	}
+
+	PUGI__FN xml_attribute xml_node::first_attribute() const
+	{
+		return _root ? xml_attribute(_root->first_attribute) : xml_attribute();
+	}
+
+	PUGI__FN xml_attribute xml_node::last_attribute() const
+	{
+		return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute();
+	}
+
+	PUGI__FN xml_node xml_node::first_child() const
+	{
+		return _root ? xml_node(_root->first_child) : xml_node();
+	}
+
+	PUGI__FN xml_node xml_node::last_child() const
+	{
+		return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node();
+	}
+
+	PUGI__FN bool xml_node::set_name(const char_t* rhs)
+	{
+		switch (type())
+		{
+		case node_pi:
+		case node_declaration:
+		case node_element:
+			return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs);
+
+		default:
+			return false;
+		}
+	}
+		
+	PUGI__FN bool xml_node::set_value(const char_t* rhs)
+	{
+		switch (type())
+		{
+		case node_pi:
+		case node_cdata:
+		case node_pcdata:
+		case node_comment:
+		case node_doctype:
+			return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs);
+
+		default:
+			return false;
+		}
+	}
+
+	PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_)
+	{
+		if (type() != node_element && type() != node_declaration) return xml_attribute();
+		
+		xml_attribute a(impl::append_attribute_ll(_root, impl::get_allocator(_root)));
+		a.set_name(name_);
+		
+		return a;
+	}
+
+	PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_)
+	{
+		if (type() != node_element && type() != node_declaration) return xml_attribute();
+		
+		xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root)));
+		if (!a) return xml_attribute();
+
+		a.set_name(name_);
+		
+		xml_attribute_struct* head = _root->first_attribute;
+
+		if (head)
+		{
+			a._attr->prev_attribute_c = head->prev_attribute_c;
+			head->prev_attribute_c = a._attr;
+		}
+		else
+			a._attr->prev_attribute_c = a._attr;
+		
+		a._attr->next_attribute = head;
+		_root->first_attribute = a._attr;
+				
+		return a;
+	}
+
+	PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr)
+	{
+		if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute();
+		
+		// check that attribute belongs to *this
+		xml_attribute_struct* cur = attr._attr;
+
+		while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c;
+
+		if (cur != _root->first_attribute) return xml_attribute();
+
+		xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root)));
+		if (!a) return xml_attribute();
+
+		a.set_name(name_);
+
+		if (attr._attr->prev_attribute_c->next_attribute)
+			attr._attr->prev_attribute_c->next_attribute = a._attr;
+		else
+			_root->first_attribute = a._attr;
+		
+		a._attr->prev_attribute_c = attr._attr->prev_attribute_c;
+		a._attr->next_attribute = attr._attr;
+		attr._attr->prev_attribute_c = a._attr;
+				
+		return a;
+	}
+
+	PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr)
+	{
+		if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute();
+		
+		// check that attribute belongs to *this
+		xml_attribute_struct* cur = attr._attr;
+
+		while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c;
+
+		if (cur != _root->first_attribute) return xml_attribute();
+
+		xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root)));
+		if (!a) return xml_attribute();
+
+		a.set_name(name_);
+
+		if (attr._attr->next_attribute)
+			attr._attr->next_attribute->prev_attribute_c = a._attr;
+		else
+			_root->first_attribute->prev_attribute_c = a._attr;
+		
+		a._attr->next_attribute = attr._attr->next_attribute;
+		a._attr->prev_attribute_c = attr._attr;
+		attr._attr->next_attribute = a._attr;
+
+		return a;
+	}
+
+	PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto)
+	{
+		if (!proto) return xml_attribute();
+
+		xml_attribute result = append_attribute(proto.name());
+		result.set_value(proto.value());
+
+		return result;
+	}
+
+	PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto)
+	{
+		if (!proto) return xml_attribute();
+
+		xml_attribute result = prepend_attribute(proto.name());
+		result.set_value(proto.value());
+
+		return result;
+	}
+
+	PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr)
+	{
+		if (!proto) return xml_attribute();
+
+		xml_attribute result = insert_attribute_after(proto.name(), attr);
+		result.set_value(proto.value());
+
+		return result;
+	}
+
+	PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr)
+	{
+		if (!proto) return xml_attribute();
+
+		xml_attribute result = insert_attribute_before(proto.name(), attr);
+		result.set_value(proto.value());
+
+		return result;
+	}
+
+	PUGI__FN xml_node xml_node::append_child(xml_node_type type_)
+	{
+		if (!impl::allow_insert_child(this->type(), type_)) return xml_node();
+		
+		xml_node n(impl::append_node(_root, impl::get_allocator(_root), type_));
+
+		if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
+
+		return n;
+	}
+
+	PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_)
+	{
+		if (!impl::allow_insert_child(this->type(), type_)) return xml_node();
+		
+		xml_node n(impl::allocate_node(impl::get_allocator(_root), type_));
+		if (!n) return xml_node();
+
+		n._root->parent = _root;
+
+		xml_node_struct* head = _root->first_child;
+
+		if (head)
+		{
+			n._root->prev_sibling_c = head->prev_sibling_c;
+			head->prev_sibling_c = n._root;
+		}
+		else
+			n._root->prev_sibling_c = n._root;
+		
+		n._root->next_sibling = head;
+		_root->first_child = n._root;
+				
+		if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
+
+		return n;
+	}
+
+	PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node)
+	{
+		if (!impl::allow_insert_child(this->type(), type_)) return xml_node();
+		if (!node._root || node._root->parent != _root) return xml_node();
+	
+		xml_node n(impl::allocate_node(impl::get_allocator(_root), type_));
+		if (!n) return xml_node();
+
+		n._root->parent = _root;
+		
+		if (node._root->prev_sibling_c->next_sibling)
+			node._root->prev_sibling_c->next_sibling = n._root;
+		else
+			_root->first_child = n._root;
+		
+		n._root->prev_sibling_c = node._root->prev_sibling_c;
+		n._root->next_sibling = node._root;
+		node._root->prev_sibling_c = n._root;
+
+		if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
+
+		return n;
+	}
+
+	PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node)
+	{
+		if (!impl::allow_insert_child(this->type(), type_)) return xml_node();
+		if (!node._root || node._root->parent != _root) return xml_node();
+	
+		xml_node n(impl::allocate_node(impl::get_allocator(_root), type_));
+		if (!n) return xml_node();
+
+		n._root->parent = _root;
+	
+		if (node._root->next_sibling)
+			node._root->next_sibling->prev_sibling_c = n._root;
+		else
+			_root->first_child->prev_sibling_c = n._root;
+		
+		n._root->next_sibling = node._root->next_sibling;
+		n._root->prev_sibling_c = node._root;
+		node._root->next_sibling = n._root;
+
+		if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
+
+		return n;
+	}
+
+	PUGI__FN xml_node xml_node::append_child(const char_t* name_)
+	{
+		xml_node result = append_child(node_element);
+
+		result.set_name(name_);
+
+		return result;
+	}
+
+	PUGI__FN xml_node xml_node::prepend_child(const char_t* name_)
+	{
+		xml_node result = prepend_child(node_element);
+
+		result.set_name(name_);
+
+		return result;
+	}
+
+	PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node)
+	{
+		xml_node result = insert_child_after(node_element, node);
+
+		result.set_name(name_);
+
+		return result;
+	}
+
+	PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node)
+	{
+		xml_node result = insert_child_before(node_element, node);
+
+		result.set_name(name_);
+
+		return result;
+	}
+
+	PUGI__FN xml_node xml_node::append_copy(const xml_node& proto)
+	{
+		xml_node result = append_child(proto.type());
+
+		if (result) impl::recursive_copy_skip(result, proto, result);
+
+		return result;
+	}
+
+	PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto)
+	{
+		xml_node result = prepend_child(proto.type());
+
+		if (result) impl::recursive_copy_skip(result, proto, result);
+
+		return result;
+	}
+
+	PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node)
+	{
+		xml_node result = insert_child_after(proto.type(), node);
+
+		if (result) impl::recursive_copy_skip(result, proto, result);
+
+		return result;
+	}
+
+	PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node)
+	{
+		xml_node result = insert_child_before(proto.type(), node);
+
+		if (result) impl::recursive_copy_skip(result, proto, result);
+
+		return result;
+	}
+
+	PUGI__FN bool xml_node::remove_attribute(const char_t* name_)
+	{
+		return remove_attribute(attribute(name_));
+	}
+
+	PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a)
+	{
+		if (!_root || !a._attr) return false;
+
+		// check that attribute belongs to *this
+		xml_attribute_struct* attr = a._attr;
+
+		while (attr->prev_attribute_c->next_attribute) attr = attr->prev_attribute_c;
+
+		if (attr != _root->first_attribute) return false;
+
+		if (a._attr->next_attribute) a._attr->next_attribute->prev_attribute_c = a._attr->prev_attribute_c;
+		else if (_root->first_attribute) _root->first_attribute->prev_attribute_c = a._attr->prev_attribute_c;
+		
+		if (a._attr->prev_attribute_c->next_attribute) a._attr->prev_attribute_c->next_attribute = a._attr->next_attribute;
+		else _root->first_attribute = a._attr->next_attribute;
+
+		impl::destroy_attribute(a._attr, impl::get_allocator(_root));
+
+		return true;
+	}
+
+	PUGI__FN bool xml_node::remove_child(const char_t* name_)
+	{
+		return remove_child(child(name_));
+	}
+
+	PUGI__FN bool xml_node::remove_child(const xml_node& n)
+	{
+		if (!_root || !n._root || n._root->parent != _root) return false;
+
+		if (n._root->next_sibling) n._root->next_sibling->prev_sibling_c = n._root->prev_sibling_c;
+		else if (_root->first_child) _root->first_child->prev_sibling_c = n._root->prev_sibling_c;
+		
+		if (n._root->prev_sibling_c->next_sibling) n._root->prev_sibling_c->next_sibling = n._root->next_sibling;
+		else _root->first_child = n._root->next_sibling;
+		
+		impl::destroy_node(n._root, impl::get_allocator(_root));
+
+		return true;
+	}
+
+	PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
+	{
+		// append_buffer is only valid for elements/documents
+		if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root);
+
+		// get document node
+		impl::xml_document_struct* doc = static_cast<impl::xml_document_struct*>(root()._root);
+		assert(doc);
+		
+		// get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later)
+		impl::xml_memory_page* page = 0;
+		impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(doc->allocate_memory(sizeof(impl::xml_extra_buffer), page));
+		(void)page;
+
+		if (!extra) return impl::make_parse_result(status_out_of_memory);
+
+		// save name; name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level
+		char_t* rootname = _root->name;
+		_root->name = 0;
+
+		// parse
+		char_t* buffer = 0;
+		xml_parse_result res = impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &buffer);
+
+		// restore name
+		_root->name = rootname;
+
+		// add extra buffer to the list
+		extra->buffer = buffer;
+		extra->next = doc->extra_buffers;
+		doc->extra_buffers = extra;
+
+		return res;
+	}
+
+	PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const
+	{
+		if (!_root) return xml_node();
+		
+		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
+			if (i->name && impl::strequal(name_, i->name))
+			{
+				for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
+					if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value : PUGIXML_TEXT("")))
+						return xml_node(i);
+			}
+
+		return xml_node();
+	}
+
+	PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const
+	{
+		if (!_root) return xml_node();
+		
+		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
+			for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
+				if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value : PUGIXML_TEXT("")))
+					return xml_node(i);
+
+		return xml_node();
+	}
+
+#ifndef PUGIXML_NO_STL
+	PUGI__FN string_t xml_node::path(char_t delimiter) const
+	{
+		xml_node cursor = *this; // Make a copy.
+		
+		string_t result = cursor.name();
+
+		while (cursor.parent())
+		{
+			cursor = cursor.parent();
+			
+			string_t temp = cursor.name();
+			temp += delimiter;
+			temp += result;
+			result.swap(temp);
+		}
+
+		return result;
+	}
+#endif
+
+	PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const
+	{
+		xml_node found = *this; // Current search context.
+
+		if (!_root || !path_ || !path_[0]) return found;
+
+		if (path_[0] == delimiter)
+		{
+			// Absolute path; e.g. '/foo/bar'
+			found = found.root();
+			++path_;
+		}
+
+		const char_t* path_segment = path_;
+
+		while (*path_segment == delimiter) ++path_segment;
+
+		const char_t* path_segment_end = path_segment;
+
+		while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end;
+
+		if (path_segment == path_segment_end) return found;
+
+		const char_t* next_segment = path_segment_end;
+
+		while (*next_segment == delimiter) ++next_segment;
+
+		if (*path_segment == '.' && path_segment + 1 == path_segment_end)
+			return found.first_element_by_path(next_segment, delimiter);
+		else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end)
+			return found.parent().first_element_by_path(next_segment, delimiter);
+		else
+		{
+			for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling)
+			{
+				if (j->name && impl::strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment)))
+				{
+					xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter);
+
+					if (subsearch) return subsearch;
+				}
+			}
+
+			return xml_node();
+		}
+	}
+
+	PUGI__FN bool xml_node::traverse(xml_tree_walker& walker)
+	{
+		walker._depth = -1;
+		
+		xml_node arg_begin = *this;
+		if (!walker.begin(arg_begin)) return false;
+
+		xml_node cur = first_child();
+				
+		if (cur)
+		{
+			++walker._depth;
+
+			do 
+			{
+				xml_node arg_for_each = cur;
+				if (!walker.for_each(arg_for_each))
+					return false;
+						
+				if (cur.first_child())
+				{
+					++walker._depth;
+					cur = cur.first_child();
+				}
+				else if (cur.next_sibling())
+					cur = cur.next_sibling();
+				else
+				{
+					// Borland C++ workaround
+					while (!cur.next_sibling() && cur != *this && !cur.parent().empty())
+					{
+						--walker._depth;
+						cur = cur.parent();
+					}
+						
+					if (cur != *this)
+						cur = cur.next_sibling();
+				}
+			}
+			while (cur && cur != *this);
+		}
+
+		assert(walker._depth == -1);
+
+		xml_node arg_end = *this;
+		return walker.end(arg_end);
+	}
+
+	PUGI__FN size_t xml_node::hash_value() const
+	{
+		return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct));
+	}
+
+	PUGI__FN xml_node_struct* xml_node::internal_object() const
+	{
+		return _root;
+	}
+
+	PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
+	{
+		if (!_root) return;
+
+		impl::xml_buffered_writer buffered_writer(writer, encoding);
+
+		impl::node_output(buffered_writer, *this, indent, flags, depth);
+	}
+
+#ifndef PUGIXML_NO_STL
+	PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
+	{
+		xml_writer_stream writer(stream);
+
+		print(writer, indent, flags, encoding, depth);
+	}
+
+	PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const
+	{
+		xml_writer_stream writer(stream);
+
+		print(writer, indent, flags, encoding_wchar, depth);
+	}
+#endif
+
+	PUGI__FN ptrdiff_t xml_node::offset_debug() const
+	{
+		xml_node_struct* r = root()._root;
+
+		if (!r) return -1;
+
+		const char_t* buffer = static_cast<impl::xml_document_struct*>(r)->buffer;
+
+		if (!buffer) return -1;
+
+		switch (type())
+		{
+		case node_document:
+			return 0;
+
+		case node_element:
+		case node_declaration:
+		case node_pi:
+			return (_root->header & impl::xml_memory_page_name_allocated_mask) ? -1 : _root->name - buffer;
+
+		case node_pcdata:
+		case node_cdata:
+		case node_comment:
+		case node_doctype:
+			return (_root->header & impl::xml_memory_page_value_allocated_mask) ? -1 : _root->value - buffer;
+
+		default:
+			return -1;
+		}
+	}
+
+#ifdef __BORLANDC__
+	PUGI__FN bool operator&&(const xml_node& lhs, bool rhs)
+	{
+		return (bool)lhs && rhs;
+	}
+
+	PUGI__FN bool operator||(const xml_node& lhs, bool rhs)
+	{
+		return (bool)lhs || rhs;
+	}
+#endif
+
+	PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root)
+	{
+	}
+
+	PUGI__FN xml_node_struct* xml_text::_data() const
+	{
+		if (!_root || impl::is_text_node(_root)) return _root;
+
+		for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling)
+			if (impl::is_text_node(node))
+				return node;
+
+		return 0;
+	}
+
+	PUGI__FN xml_node_struct* xml_text::_data_new()
+	{
+		xml_node_struct* d = _data();
+		if (d) return d;
+
+		return xml_node(_root).append_child(node_pcdata).internal_object();
+	}
+
+	PUGI__FN xml_text::xml_text(): _root(0)
+	{
+	}
+
+	PUGI__FN static void unspecified_bool_xml_text(xml_text***)
+	{
+	}
+
+	PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const
+	{
+		return _data() ? unspecified_bool_xml_text : 0;
+	}
+
+	PUGI__FN bool xml_text::operator!() const
+	{
+		return !_data();
+	}
+
+	PUGI__FN bool xml_text::empty() const
+	{
+		return _data() == 0;
+	}
+
+	PUGI__FN const char_t* xml_text::get() const
+	{
+		xml_node_struct* d = _data();
+
+		return (d && d->value) ? d->value : PUGIXML_TEXT("");
+	}
+
+	PUGI__FN const char_t* xml_text::as_string(const char_t* def) const
+	{
+		xml_node_struct* d = _data();
+
+		return (d && d->value) ? d->value : def;
+	}
+
+	PUGI__FN int xml_text::as_int(int def) const
+	{
+		xml_node_struct* d = _data();
+
+		return impl::get_value_int(d ? d->value : 0, def);
+	}
+
+	PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const
+	{
+		xml_node_struct* d = _data();
+
+		return impl::get_value_uint(d ? d->value : 0, def);
+	}
+
+	PUGI__FN double xml_text::as_double(double def) const
+	{
+		xml_node_struct* d = _data();
+
+		return impl::get_value_double(d ? d->value : 0, def);
+	}
+
+	PUGI__FN float xml_text::as_float(float def) const
+	{
+		xml_node_struct* d = _data();
+
+		return impl::get_value_float(d ? d->value : 0, def);
+	}
+
+	PUGI__FN bool xml_text::as_bool(bool def) const
+	{
+		xml_node_struct* d = _data();
+
+		return impl::get_value_bool(d ? d->value : 0, def);
+	}
+
+#ifdef PUGIXML_HAS_LONG_LONG
+	PUGI__FN long long xml_text::as_llong(long long def) const
+	{
+		xml_node_struct* d = _data();
+
+		return impl::get_value_llong(d ? d->value : 0, def);
+	}
+
+	PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const
+	{
+		xml_node_struct* d = _data();
+
+		return impl::get_value_ullong(d ? d->value : 0, def);
+	}
+#endif
+
+	PUGI__FN bool xml_text::set(const char_t* rhs)
+	{
+		xml_node_struct* dn = _data_new();
+
+		return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
+	}
+
+	PUGI__FN bool xml_text::set(int rhs)
+	{
+		xml_node_struct* dn = _data_new();
+
+		return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
+	}
+
+	PUGI__FN bool xml_text::set(unsigned int rhs)
+	{
+		xml_node_struct* dn = _data_new();
+
+		return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
+	}
+
+	PUGI__FN bool xml_text::set(double rhs)
+	{
+		xml_node_struct* dn = _data_new();
+
+		return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
+	}
+
+	PUGI__FN bool xml_text::set(bool rhs)
+	{
+		xml_node_struct* dn = _data_new();
+
+		return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
+	}
+
+#ifdef PUGIXML_HAS_LONG_LONG
+	PUGI__FN bool xml_text::set(long long rhs)
+	{
+		xml_node_struct* dn = _data_new();
+
+		return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
+	}
+
+	PUGI__FN bool xml_text::set(unsigned long long rhs)
+	{
+		xml_node_struct* dn = _data_new();
+
+		return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
+	}
+#endif
+
+	PUGI__FN xml_text& xml_text::operator=(const char_t* rhs)
+	{
+		set(rhs);
+		return *this;
+	}
+
+	PUGI__FN xml_text& xml_text::operator=(int rhs)
+	{
+		set(rhs);
+		return *this;
+	}
+
+	PUGI__FN xml_text& xml_text::operator=(unsigned int rhs)
+	{
+		set(rhs);
+		return *this;
+	}
+
+	PUGI__FN xml_text& xml_text::operator=(double rhs)
+	{
+		set(rhs);
+		return *this;
+	}
+
+	PUGI__FN xml_text& xml_text::operator=(bool rhs)
+	{
+		set(rhs);
+		return *this;
+	}
+
+#ifdef PUGIXML_HAS_LONG_LONG
+	PUGI__FN xml_text& xml_text::operator=(long long rhs)
+	{
+		set(rhs);
+		return *this;
+	}
+
+	PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs)
+	{
+		set(rhs);
+		return *this;
+	}
+#endif
+
+	PUGI__FN xml_node xml_text::data() const
+	{
+		return xml_node(_data());
+	}
+
+#ifdef __BORLANDC__
+	PUGI__FN bool operator&&(const xml_text& lhs, bool rhs)
+	{
+		return (bool)lhs && rhs;
+	}
+
+	PUGI__FN bool operator||(const xml_text& lhs, bool rhs)
+	{
+		return (bool)lhs || rhs;
+	}
+#endif
+
+	PUGI__FN xml_node_iterator::xml_node_iterator()
+	{
+	}
+
+	PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent())
+	{
+	}
+
+	PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
+	{
+	}
+
+	PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const
+	{
+		return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
+	}
+	
+	PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const
+	{
+		return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
+	}
+
+	PUGI__FN xml_node& xml_node_iterator::operator*() const
+	{
+		assert(_wrap._root);
+		return _wrap;
+	}
+
+	PUGI__FN xml_node* xml_node_iterator::operator->() const
+	{
+		assert(_wrap._root);
+		return const_cast<xml_node*>(&_wrap); // BCC32 workaround
+	}
+
+	PUGI__FN const xml_node_iterator& xml_node_iterator::operator++()
+	{
+		assert(_wrap._root);
+		_wrap._root = _wrap._root->next_sibling;
+		return *this;
+	}
+
+	PUGI__FN xml_node_iterator xml_node_iterator::operator++(int)
+	{
+		xml_node_iterator temp = *this;
+		++*this;
+		return temp;
+	}
+
+	PUGI__FN const xml_node_iterator& xml_node_iterator::operator--()
+	{
+		_wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child();
+		return *this;
+	}
+
+	PUGI__FN xml_node_iterator xml_node_iterator::operator--(int)
+	{
+		xml_node_iterator temp = *this;
+		--*this;
+		return temp;
+	}
+
+	PUGI__FN xml_attribute_iterator::xml_attribute_iterator()
+	{
+	}
+
+	PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent)
+	{
+	}
+
+	PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
+	{
+	}
+
+	PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const
+	{
+		return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root;
+	}
+	
+	PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const
+	{
+		return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root;
+	}
+
+	PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const
+	{
+		assert(_wrap._attr);
+		return _wrap;
+	}
+
+	PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const
+	{
+		assert(_wrap._attr);
+		return const_cast<xml_attribute*>(&_wrap); // BCC32 workaround
+	}
+
+	PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator++()
+	{
+		assert(_wrap._attr);
+		_wrap._attr = _wrap._attr->next_attribute;
+		return *this;
+	}
+
+	PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int)
+	{
+		xml_attribute_iterator temp = *this;
+		++*this;
+		return temp;
+	}
+
+	PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator--()
+	{
+		_wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute();
+		return *this;
+	}
+
+	PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int)
+	{
+		xml_attribute_iterator temp = *this;
+		--*this;
+		return temp;
+	}
+
+	PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0)
+	{
+	}
+
+	PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name)
+	{
+	}
+
+	PUGI__FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name)
+	{
+	}
+
+	PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const
+	{
+		return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
+	}
+
+	PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const
+	{
+		return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
+	}
+
+	PUGI__FN xml_node& xml_named_node_iterator::operator*() const
+	{
+		assert(_wrap._root);
+		return _wrap;
+	}
+
+	PUGI__FN xml_node* xml_named_node_iterator::operator->() const
+	{
+		assert(_wrap._root);
+		return const_cast<xml_node*>(&_wrap); // BCC32 workaround
+	}
+
+	PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator++()
+	{
+		assert(_wrap._root);
+		_wrap = _wrap.next_sibling(_name);
+		return *this;
+	}
+
+	PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int)
+	{
+		xml_named_node_iterator temp = *this;
+		++*this;
+		return temp;
+	}
+
+	PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator--()
+	{
+		if (_wrap._root)
+			_wrap = _wrap.previous_sibling(_name);
+		else
+		{
+			_wrap = _parent.last_child();
+
+			if (!impl::strequal(_wrap.name(), _name))
+				_wrap = _wrap.previous_sibling(_name);
+		}
+
+		return *this;
+	}
+
+	PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int)
+	{
+		xml_named_node_iterator temp = *this;
+		--*this;
+		return temp;
+	}
+
+	PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto)
+	{
+	}
+
+	PUGI__FN xml_parse_result::operator bool() const
+	{
+		return status == status_ok;
+	}
+
+	PUGI__FN const char* xml_parse_result::description() const
+	{
+		switch (status)
+		{
+		case status_ok: return "No error";
+
+		case status_file_not_found: return "File was not found";
+		case status_io_error: return "Error reading from file/stream";
+		case status_out_of_memory: return "Could not allocate memory";
+		case status_internal_error: return "Internal error occurred";
+
+		case status_unrecognized_tag: return "Could not determine tag type";
+
+		case status_bad_pi: return "Error parsing document declaration/processing instruction";
+		case status_bad_comment: return "Error parsing comment";
+		case status_bad_cdata: return "Error parsing CDATA section";
+		case status_bad_doctype: return "Error parsing document type declaration";
+		case status_bad_pcdata: return "Error parsing PCDATA section";
+		case status_bad_start_element: return "Error parsing start element tag";
+		case status_bad_attribute: return "Error parsing element attribute";
+		case status_bad_end_element: return "Error parsing end element tag";
+		case status_end_element_mismatch: return "Start-end tags mismatch";
+
+		case status_append_invalid_root: return "Unable to append nodes: root is not an element or document";
+
+		case status_no_document_element: return "No document element found";
+
+		default: return "Unknown error";
+		}
+	}
+
+	PUGI__FN xml_document::xml_document(): _buffer(0)
+	{
+		create();
+	}
+
+	PUGI__FN xml_document::~xml_document()
+	{
+		destroy();
+	}
+
+	PUGI__FN void xml_document::reset()
+	{
+		destroy();
+		create();
+	}
+
+	PUGI__FN void xml_document::reset(const xml_document& proto)
+	{
+		reset();
+
+		for (xml_node cur = proto.first_child(); cur; cur = cur.next_sibling())
+			append_copy(cur);
+	}
+
+	PUGI__FN void xml_document::create()
+	{
+        assert(!_root);
+
+		// initialize sentinel page
+		PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + impl::xml_memory_page_alignment <= sizeof(_memory));
+
+		// align upwards to page boundary
+		void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(_memory) + (impl::xml_memory_page_alignment - 1)) & ~(impl::xml_memory_page_alignment - 1));
+
+		// prepare page structure
+		impl::xml_memory_page* page = impl::xml_memory_page::construct(page_memory);
+		assert(page);
+
+		page->busy_size = impl::xml_memory_page_size;
+
+		// allocate new root
+		_root = new (page->data) impl::xml_document_struct(page);
+		_root->prev_sibling_c = _root;
+
+		// setup sentinel page
+		page->allocator = static_cast<impl::xml_document_struct*>(_root);
+	}
+
+	PUGI__FN void xml_document::destroy()
+	{
+        assert(_root);
+
+		// destroy static storage
+		if (_buffer)
+		{
+			impl::xml_memory::deallocate(_buffer);
+			_buffer = 0;
+		}
+
+		// destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator)
+		for (impl::xml_extra_buffer* extra = static_cast<impl::xml_document_struct*>(_root)->extra_buffers; extra; extra = extra->next)
+		{
+			if (extra->buffer) impl::xml_memory::deallocate(extra->buffer);
+		}
+
+		// destroy dynamic storage, leave sentinel page (it's in static memory)
+        impl::xml_memory_page* root_page = reinterpret_cast<impl::xml_memory_page*>(_root->header & impl::xml_memory_page_pointer_mask);
+        assert(root_page && !root_page->prev && !root_page->memory);
+
+        for (impl::xml_memory_page* page = root_page->next; page; )
+        {
+            impl::xml_memory_page* next = page->next;
+
+            impl::xml_allocator::deallocate_page(page);
+
+            page = next;
+        }
+
+        _root = 0;
+	}
+
+#ifndef PUGIXML_NO_STL
+	PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding)
+	{
+		reset();
+
+		return impl::load_stream_impl(*this, stream, options, encoding);
+	}
+
+	PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
+	{
+		reset();
+
+		return impl::load_stream_impl(*this, stream, options, encoding_wchar);
+	}
+#endif
+
+	PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options)
+	{
+		// Force native encoding (skip autodetection)
+	#ifdef PUGIXML_WCHAR_MODE
+		xml_encoding encoding = encoding_wchar;
+	#else
+		xml_encoding encoding = encoding_utf8;
+	#endif
+
+		return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding);
+	}
+
+	PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding)
+	{
+		reset();
+
+		FILE* file = fopen(path_, "rb");
+
+		return impl::load_file_impl(*this, file, options, encoding);
+	}
+
+	PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding)
+	{
+		reset();
+
+		FILE* file = impl::open_file_wide(path_, L"rb");
+
+		return impl::load_file_impl(*this, file, options, encoding);
+	}
+
+	PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
+	{
+		reset();
+
+		return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, const_cast<void*>(contents), size, options, encoding, false, false, &_buffer);
+	}
+
+	PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding)
+	{
+		reset();
+
+		return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, false, &_buffer);
+	}
+		
+	PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding)
+	{
+		reset();
+
+		return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, true, &_buffer);
+	}
+
+	PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const
+	{
+		impl::xml_buffered_writer buffered_writer(writer, encoding);
+
+		if ((flags & format_write_bom) && encoding != encoding_latin1)
+		{
+			// BOM always represents the codepoint U+FEFF, so just write it in native encoding
+		#ifdef PUGIXML_WCHAR_MODE
+			unsigned int bom = 0xfeff;
+			buffered_writer.write(static_cast<wchar_t>(bom));
+		#else
+			buffered_writer.write('\xef', '\xbb', '\xbf');
+		#endif
+		}
+
+		if (!(flags & format_no_declaration) && !impl::has_declaration(*this))
+		{
+			buffered_writer.write(PUGIXML_TEXT("<?xml version=\"1.0\""));
+			if (encoding == encoding_latin1) buffered_writer.write(PUGIXML_TEXT(" encoding=\"ISO-8859-1\""));
+			buffered_writer.write('?', '>');
+			if (!(flags & format_raw)) buffered_writer.write('\n');
+		}
+
+		impl::node_output(buffered_writer, *this, indent, flags, 0);
+	}
+
+#ifndef PUGIXML_NO_STL
+	PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const
+	{
+		xml_writer_stream writer(stream);
+
+		save(writer, indent, flags, encoding);
+	}
+
+	PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const
+	{
+		xml_writer_stream writer(stream);
+
+		save(writer, indent, flags, encoding_wchar);
+	}
+#endif
+
+	PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
+	{
+		FILE* file = fopen(path_, (flags & format_save_file_text) ? "w" : "wb");
+		return impl::save_file_impl(*this, file, indent, flags, encoding);
+	}
+
+	PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
+	{
+		FILE* file = impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb");
+		return impl::save_file_impl(*this, file, indent, flags, encoding);
+	}
+
+	PUGI__FN xml_node xml_document::document_element() const
+	{
+        assert(_root);
+
+		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
+			if ((i->header & impl::xml_memory_page_type_mask) + 1 == node_element)
+				return xml_node(i);
+
+		return xml_node();
+	}
+
+#ifndef PUGIXML_NO_STL
+	PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str)
+	{
+		assert(str);
+
+		return impl::as_utf8_impl(str, impl::strlength_wide(str));
+	}
+
+	PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str)
+	{
+		return impl::as_utf8_impl(str.c_str(), str.size());
+	}
+	
+	PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str)
+	{
+		assert(str);
+
+		return impl::as_wide_impl(str, strlen(str));
+	}
+	
+	PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str)
+	{
+		return impl::as_wide_impl(str.c_str(), str.size());
+	}
+#endif
+
+	PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate)
+	{
+		impl::xml_memory::allocate = allocate;
+		impl::xml_memory::deallocate = deallocate;
+	}
+
+	PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function()
+	{
+		return impl::xml_memory::allocate;
+	}
+
+	PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function()
+	{
+		return impl::xml_memory::deallocate;
+	}
+}
+
+#if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
+namespace std
+{
+	// Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
+	PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&)
+	{
+		return std::bidirectional_iterator_tag();
+	}
+
+	PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&)
+	{
+		return std::bidirectional_iterator_tag();
+	}
+
+	PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&)
+	{
+		return std::bidirectional_iterator_tag();
+	}
+}
+#endif
+
+#if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
+namespace std
+{
+	// Workarounds for (non-standard) iterator category detection
+	PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&)
+	{
+		return std::bidirectional_iterator_tag();
+	}
+
+	PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&)
+	{
+		return std::bidirectional_iterator_tag();
+	}
+
+	PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&)
+	{
+		return std::bidirectional_iterator_tag();
+	}
+}
+#endif
+
+#ifndef PUGIXML_NO_XPATH
+
+// STL replacements
+PUGI__NS_BEGIN
+	struct equal_to
+	{
+		template <typename T> bool operator()(const T& lhs, const T& rhs) const
+		{
+			return lhs == rhs;
+		}
+	};
+
+	struct not_equal_to
+	{
+		template <typename T> bool operator()(const T& lhs, const T& rhs) const
+		{
+			return lhs != rhs;
+		}
+	};
+
+	struct less
+	{
+		template <typename T> bool operator()(const T& lhs, const T& rhs) const
+		{
+			return lhs < rhs;
+		}
+	};
+
+	struct less_equal
+	{
+		template <typename T> bool operator()(const T& lhs, const T& rhs) const
+		{
+			return lhs <= rhs;
+		}
+	};
+
+	template <typename T> void swap(T& lhs, T& rhs)
+	{
+		T temp = lhs;
+		lhs = rhs;
+		rhs = temp;
+	}
+
+	template <typename I, typename Pred> I min_element(I begin, I end, const Pred& pred)
+	{
+		I result = begin;
+
+		for (I it = begin + 1; it != end; ++it)
+			if (pred(*it, *result))
+				result = it;
+
+		return result;
+	}
+
+	template <typename I> void reverse(I begin, I end)
+	{
+		while (end - begin > 1) swap(*begin++, *--end);
+	}
+
+	template <typename I> I unique(I begin, I end)
+	{
+		// fast skip head
+		while (end - begin > 1 && *begin != *(begin + 1)) begin++;
+
+		if (begin == end) return begin;
+
+		// last written element
+		I write = begin++; 
+
+		// merge unique elements
+		while (begin != end)
+		{
+			if (*begin != *write)
+				*++write = *begin++;
+			else
+				begin++;
+		}
+
+		// past-the-end (write points to live element)
+		return write + 1;
+	}
+
+	template <typename I> void copy_backwards(I begin, I end, I target)
+	{
+		while (begin != end) *--target = *--end;
+	}
+
+	template <typename I, typename Pred, typename T> void insertion_sort(I begin, I end, const Pred& pred, T*)
+	{
+		assert(begin != end);
+
+		for (I it = begin + 1; it != end; ++it)
+		{
+			T val = *it;
+
+			if (pred(val, *begin))
+			{
+				// move to front
+				copy_backwards(begin, it, it + 1);
+				*begin = val;
+			}
+			else
+			{
+				I hole = it;
+
+				// move hole backwards
+				while (pred(val, *(hole - 1)))
+				{
+					*hole = *(hole - 1);
+					hole--;
+				}
+
+				// fill hole with element
+				*hole = val;
+			}
+		}
+	}
+
+	// std variant for elements with ==
+	template <typename I, typename Pred> void partition(I begin, I middle, I end, const Pred& pred, I* out_eqbeg, I* out_eqend)
+	{
+		I eqbeg = middle, eqend = middle + 1;
+
+		// expand equal range
+		while (eqbeg != begin && *(eqbeg - 1) == *eqbeg) --eqbeg;
+		while (eqend != end && *eqend == *eqbeg) ++eqend;
+
+		// process outer elements
+		I ltend = eqbeg, gtbeg = eqend;
+
+		for (;;)
+		{
+			// find the element from the right side that belongs to the left one
+			for (; gtbeg != end; ++gtbeg)
+				if (!pred(*eqbeg, *gtbeg))
+				{
+					if (*gtbeg == *eqbeg) swap(*gtbeg, *eqend++);
+					else break;
+				}
+
+			// find the element from the left side that belongs to the right one
+			for (; ltend != begin; --ltend)
+				if (!pred(*(ltend - 1), *eqbeg))
+				{
+					if (*eqbeg == *(ltend - 1)) swap(*(ltend - 1), *--eqbeg);
+					else break;
+				}
+
+			// scanned all elements
+			if (gtbeg == end && ltend == begin)
+			{
+				*out_eqbeg = eqbeg;
+				*out_eqend = eqend;
+				return;
+			}
+
+			// make room for elements by moving equal area
+			if (gtbeg == end)
+			{
+				if (--ltend != --eqbeg) swap(*ltend, *eqbeg);
+				swap(*eqbeg, *--eqend);
+			}
+			else if (ltend == begin)
+			{
+				if (eqend != gtbeg) swap(*eqbeg, *eqend);
+				++eqend;
+				swap(*gtbeg++, *eqbeg++);
+			}
+			else swap(*gtbeg++, *--ltend);
+		}
+	}
+
+	template <typename I, typename Pred> void median3(I first, I middle, I last, const Pred& pred)
+	{
+		if (pred(*middle, *first)) swap(*middle, *first);
+		if (pred(*last, *middle)) swap(*last, *middle);
+		if (pred(*middle, *first)) swap(*middle, *first);
+	}
+
+	template <typename I, typename Pred> void median(I first, I middle, I last, const Pred& pred)
+	{
+		if (last - first <= 40)
+		{
+			// median of three for small chunks
+			median3(first, middle, last, pred);
+		}
+		else
+		{
+			// median of nine
+			size_t step = (last - first + 1) / 8;
+
+			median3(first, first + step, first + 2 * step, pred);
+			median3(middle - step, middle, middle + step, pred);
+			median3(last - 2 * step, last - step, last, pred);
+			median3(first + step, middle, last - step, pred);
+		}
+	}
+
+	template <typename I, typename Pred> void sort(I begin, I end, const Pred& pred)
+	{
+		// sort large chunks
+		while (end - begin > 32)
+		{
+			// find median element
+			I middle = begin + (end - begin) / 2;
+			median(begin, middle, end - 1, pred);
+
+			// partition in three chunks (< = >)
+			I eqbeg, eqend;
+			partition(begin, middle, end, pred, &eqbeg, &eqend);
+
+			// loop on larger half
+			if (eqbeg - begin > end - eqend)
+			{
+				sort(eqend, end, pred);
+				end = eqbeg;
+			}
+			else
+			{
+				sort(begin, eqbeg, pred);
+				begin = eqend;
+			}
+		}
+
+		// insertion sort small chunk
+		if (begin != end) insertion_sort(begin, end, pred, &*begin);
+	}
+PUGI__NS_END
+
+// Allocator used for AST and evaluation stacks
+PUGI__NS_BEGIN
+	struct xpath_memory_block
+	{	
+		xpath_memory_block* next;
+
+		char data[
+	#ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE
+			PUGIXML_MEMORY_XPATH_PAGE_SIZE
+	#else
+			4096
+	#endif
+		];
+	};
+		
+	class xpath_allocator
+	{
+		xpath_memory_block* _root;
+		size_t _root_size;
+
+	public:
+	#ifdef PUGIXML_NO_EXCEPTIONS
+		jmp_buf* error_handler;
+	#endif
+
+		xpath_allocator(xpath_memory_block* root, size_t root_size = 0): _root(root), _root_size(root_size)
+		{
+		#ifdef PUGIXML_NO_EXCEPTIONS
+			error_handler = 0;
+		#endif
+		}
+		
+		void* allocate_nothrow(size_t size)
+		{
+			const size_t block_capacity = sizeof(_root->data);
+
+			// align size so that we're able to store pointers in subsequent blocks
+			size = (size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
+
+			if (_root_size + size <= block_capacity)
+			{
+				void* buf = _root->data + _root_size;
+				_root_size += size;
+				return buf;
+			}
+			else
+			{
+				size_t block_data_size = (size > block_capacity) ? size : block_capacity;
+				size_t block_size = block_data_size + offsetof(xpath_memory_block, data);
+
+				xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size));
+				if (!block) return 0;
+				
+				block->next = _root;
+				
+				_root = block;
+				_root_size = size;
+				
+				return block->data;
+			}
+		}
+
+		void* allocate(size_t size)
+		{
+			void* result = allocate_nothrow(size);
+
+			if (!result)
+			{
+			#ifdef PUGIXML_NO_EXCEPTIONS
+				assert(error_handler);
+				longjmp(*error_handler, 1);
+			#else
+				throw std::bad_alloc();
+			#endif
+			}
+
+			return result;
+		}
+
+		void* reallocate(void* ptr, size_t old_size, size_t new_size)
+		{
+			// align size so that we're able to store pointers in subsequent blocks
+			old_size = (old_size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
+			new_size = (new_size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
+
+			// we can only reallocate the last object
+			assert(ptr == 0 || static_cast<char*>(ptr) + old_size == _root->data + _root_size);
+
+			// adjust root size so that we have not allocated the object at all
+			bool only_object = (_root_size == old_size);
+
+			if (ptr) _root_size -= old_size;
+
+			// allocate a new version (this will obviously reuse the memory if possible)
+			void* result = allocate(new_size);
+			assert(result);
+
+			// we have a new block
+			if (result != ptr && ptr)
+			{
+				// copy old data
+				assert(new_size >= old_size);
+				memcpy(result, ptr, old_size);
+
+				// free the previous page if it had no other objects
+				if (only_object)
+				{
+					assert(_root->data == result);
+					assert(_root->next);
+
+					xpath_memory_block* next = _root->next->next;
+
+					if (next)
+					{
+						// deallocate the whole page, unless it was the first one
+						xml_memory::deallocate(_root->next);
+						_root->next = next;
+					}
+				}
+			}
+
+			return result;
+		}
+
+		void revert(const xpath_allocator& state)
+		{
+			// free all new pages
+			xpath_memory_block* cur = _root;
+
+			while (cur != state._root)
+			{
+				xpath_memory_block* next = cur->next;
+
+				xml_memory::deallocate(cur);
+
+				cur = next;
+			}
+
+			// restore state
+			_root = state._root;
+			_root_size = state._root_size;
+		}
+
+		void release()
+		{
+			xpath_memory_block* cur = _root;
+			assert(cur);
+
+			while (cur->next)
+			{
+				xpath_memory_block* next = cur->next;
+
+				xml_memory::deallocate(cur);
+
+				cur = next;
+			}
+		}
+	};
+
+	struct xpath_allocator_capture
+	{
+		xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc)
+		{
+		}
+
+		~xpath_allocator_capture()
+		{
+			_target->revert(_state);
+		}
+
+		xpath_allocator* _target;
+		xpath_allocator _state;
+	};
+
+	struct xpath_stack
+	{
+		xpath_allocator* result;
+		xpath_allocator* temp;
+	};
+
+	struct xpath_stack_data
+	{
+		xpath_memory_block blocks[2];
+		xpath_allocator result;
+		xpath_allocator temp;
+		xpath_stack stack;
+
+	#ifdef PUGIXML_NO_EXCEPTIONS
+		jmp_buf error_handler;
+	#endif
+
+		xpath_stack_data(): result(blocks + 0), temp(blocks + 1)
+		{
+			blocks[0].next = blocks[1].next = 0;
+
+			stack.result = &result;
+			stack.temp = &temp;
+
+		#ifdef PUGIXML_NO_EXCEPTIONS
+			result.error_handler = temp.error_handler = &error_handler;
+		#endif
+		}
+
+		~xpath_stack_data()
+		{
+			result.release();
+			temp.release();
+		}
+	};
+PUGI__NS_END
+
+// String class
+PUGI__NS_BEGIN
+	class xpath_string
+	{
+		const char_t* _buffer;
+		bool _uses_heap;
+
+		static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc)
+		{
+			char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t)));
+			assert(result);
+
+			memcpy(result, string, length * sizeof(char_t));
+			result[length] = 0;
+
+			return result;
+		}
+
+		static char_t* duplicate_string(const char_t* string, xpath_allocator* alloc)
+		{
+			return duplicate_string(string, strlength(string), alloc);
+		}
+
+	public:
+		xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false)
+		{
+		}
+
+		explicit xpath_string(const char_t* str, xpath_allocator* alloc)
+		{
+			bool empty_ = (*str == 0);
+
+			_buffer = empty_ ? PUGIXML_TEXT("") : duplicate_string(str, alloc);
+			_uses_heap = !empty_;
+		}
+
+		explicit xpath_string(const char_t* str, bool use_heap): _buffer(str), _uses_heap(use_heap)
+		{
+		}
+
+		xpath_string(const char_t* begin, const char_t* end, xpath_allocator* alloc)
+		{
+			assert(begin <= end);
+
+			bool empty_ = (begin == end);
+
+			_buffer = empty_ ? PUGIXML_TEXT("") : duplicate_string(begin, static_cast<size_t>(end - begin), alloc);
+			_uses_heap = !empty_;
+		}
+
+		void append(const xpath_string& o, xpath_allocator* alloc)
+		{
+			// skip empty sources
+			if (!*o._buffer) return;
+
+			// fast append for constant empty target and constant source
+			if (!*_buffer && !_uses_heap && !o._uses_heap)
+			{
+				_buffer = o._buffer;
+			}
+			else
+			{
+				// need to make heap copy
+				size_t target_length = strlength(_buffer);
+				size_t source_length = strlength(o._buffer);
+				size_t result_length = target_length + source_length;
+
+				// allocate new buffer
+				char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t)));
+				assert(result);
+
+				// append first string to the new buffer in case there was no reallocation
+				if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t));
+
+				// append second string to the new buffer
+				memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
+				result[result_length] = 0;
+
+				// finalize
+				_buffer = result;
+				_uses_heap = true;
+			}
+		}
+
+		const char_t* c_str() const
+		{
+			return _buffer;
+		}
+
+		size_t length() const
+		{
+			return strlength(_buffer);
+		}
+		
+		char_t* data(xpath_allocator* alloc)
+		{
+			// make private heap copy
+			if (!_uses_heap)
+			{
+				_buffer = duplicate_string(_buffer, alloc);
+				_uses_heap = true;
+			}
+
+			return const_cast<char_t*>(_buffer);
+		}
+
+		bool empty() const
+		{
+			return *_buffer == 0;
+		}
+
+		bool operator==(const xpath_string& o) const
+		{
+			return strequal(_buffer, o._buffer);
+		}
+
+		bool operator!=(const xpath_string& o) const
+		{
+			return !strequal(_buffer, o._buffer);
+		}
+
+		bool uses_heap() const
+		{
+			return _uses_heap;
+		}
+	};
+
+	PUGI__FN xpath_string xpath_string_const(const char_t* str)
+	{
+		return xpath_string(str, false);
+	}
+PUGI__NS_END
+
+PUGI__NS_BEGIN
+	PUGI__FN bool starts_with(const char_t* string, const char_t* pattern)
+	{
+		while (*pattern && *string == *pattern)
+		{
+			string++;
+			pattern++;
+		}
+
+		return *pattern == 0;
+	}
+
+	PUGI__FN const char_t* find_char(const char_t* s, char_t c)
+	{
+	#ifdef PUGIXML_WCHAR_MODE
+		return wcschr(s, c);
+	#else
+		return strchr(s, c);
+	#endif
+	}
+
+	PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p)
+	{
+	#ifdef PUGIXML_WCHAR_MODE
+		// MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
+		return (*p == 0) ? s : wcsstr(s, p);
+	#else
+		return strstr(s, p);
+	#endif
+	}
+
+	// Converts symbol to lower case, if it is an ASCII one
+	PUGI__FN char_t tolower_ascii(char_t ch)
+	{
+		return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch;
+	}
+
+	PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc)
+	{
+		if (na.attribute())
+			return xpath_string_const(na.attribute().value());
+		else
+		{
+			const xml_node& n = na.node();
+
+			switch (n.type())
+			{
+			case node_pcdata:
+			case node_cdata:
+			case node_comment:
+			case node_pi:
+				return xpath_string_const(n.value());
+			
+			case node_document:
+			case node_element:
+			{
+				xpath_string result;
+
+				xml_node cur = n.first_child();
+				
+				while (cur && cur != n)
+				{
+					if (cur.type() == node_pcdata || cur.type() == node_cdata)
+						result.append(xpath_string_const(cur.value()), alloc);
+
+					if (cur.first_child())
+						cur = cur.first_child();
+					else if (cur.next_sibling())
+						cur = cur.next_sibling();
+					else
+					{
+						while (!cur.next_sibling() && cur != n)
+							cur = cur.parent();
+
+						if (cur != n) cur = cur.next_sibling();
+					}
+				}
+				
+				return result;
+			}
+			
+			default:
+				return xpath_string();
+			}
+		}
+	}
+	
+	PUGI__FN unsigned int node_height(xml_node n)
+	{
+		unsigned int result = 0;
+		
+		while (n)
+		{
+			++result;
+			n = n.parent();
+		}
+		
+		return result;
+	}
+	
+	PUGI__FN bool node_is_before(xml_node ln, unsigned int lh, xml_node rn, unsigned int rh)
+	{
+		// normalize heights
+		for (unsigned int i = rh; i < lh; i++) ln = ln.parent();
+		for (unsigned int j = lh; j < rh; j++) rn = rn.parent();
+		
+		// one node is the ancestor of the other
+		if (ln == rn) return lh < rh;
+		
+		// find common ancestor
+		while (ln.parent() != rn.parent())
+		{
+			ln = ln.parent();
+			rn = rn.parent();
+		}
+
+		// there is no common ancestor (the shared parent is null), nodes are from different documents
+		if (!ln.parent()) return ln < rn;
+
+		// determine sibling order
+		for (; ln; ln = ln.next_sibling())
+			if (ln == rn)
+				return true;
+				
+		return false;
+	}
+
+	PUGI__FN bool node_is_ancestor(xml_node parent, xml_node node)
+	{
+		while (node && node != parent) node = node.parent();
+
+		return parent && node == parent;
+	}
+
+	PUGI__FN const void* document_order(const xpath_node& xnode)
+	{
+		xml_node_struct* node = xnode.node().internal_object();
+
+		if (node)
+		{
+			if (node->name && (node->header & xml_memory_page_name_allocated_mask) == 0) return node->name;
+			if (node->value && (node->header & xml_memory_page_value_allocated_mask) == 0) return node->value;
+			return 0;
+		}
+
+		xml_attribute_struct* attr = xnode.attribute().internal_object();
+
+		if (attr)
+		{
+			if ((attr->header & xml_memory_page_name_allocated_mask) == 0) return attr->name;
+			if ((attr->header & xml_memory_page_value_allocated_mask) == 0) return attr->value;
+			return 0;
+		}
+
+		return 0;
+	}
+	
+	struct document_order_comparator
+	{
+		bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
+		{
+			// optimized document order based check
+			const void* lo = document_order(lhs);
+			const void* ro = document_order(rhs);
+
+			if (lo && ro) return lo < ro;
+
+			// slow comparison
+			xml_node ln = lhs.node(), rn = rhs.node();
+
+			// compare attributes
+			if (lhs.attribute() && rhs.attribute())
+			{
+				// shared parent
+				if (lhs.parent() == rhs.parent())
+				{
+					// determine sibling order
+					for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
+						if (a == rhs.attribute())
+							return true;
+					
+					return false;
+				}
+				
+				// compare attribute parents
+				ln = lhs.parent();
+				rn = rhs.parent();
+			}
+			else if (lhs.attribute())
+			{
+				// attributes go after the parent element
+				if (lhs.parent() == rhs.node()) return false;
+				
+				ln = lhs.parent();
+			}
+			else if (rhs.attribute())
+			{
+				// attributes go after the parent element
+				if (rhs.parent() == lhs.node()) return true;
+				
+				rn = rhs.parent();
+			}
+
+			if (ln == rn) return false;
+			
+			unsigned int lh = node_height(ln);
+			unsigned int rh = node_height(rn);
+			
+			return node_is_before(ln, lh, rn, rh);
+		}
+	};
+
+	struct duplicate_comparator
+	{
+		bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
+		{
+			if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true;
+			else return rhs.attribute() ? false : lhs.node() < rhs.node();
+		}
+	};
+	
+	PUGI__FN double gen_nan()
+	{
+	#if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
+		union { float f; uint32_t i; } u[sizeof(float) == sizeof(uint32_t) ? 1 : -1];
+		u[0].i = 0x7fc00000;
+		return u[0].f;
+	#else
+		// fallback
+		const volatile double zero = 0.0;
+		return zero / zero;
+	#endif
+	}
+	
+	PUGI__FN bool is_nan(double value)
+	{
+	#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
+		return !!_isnan(value);
+	#elif defined(fpclassify) && defined(FP_NAN)
+		return fpclassify(value) == FP_NAN;
+	#else
+		// fallback
+		const volatile double v = value;
+		return v != v;
+	#endif
+	}
+	
+	PUGI__FN const char_t* convert_number_to_string_special(double value)
+	{
+	#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
+		if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
+		if (_isnan(value)) return PUGIXML_TEXT("NaN");
+		return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
+	#elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
+		switch (fpclassify(value))
+		{
+		case FP_NAN:
+			return PUGIXML_TEXT("NaN");
+
+		case FP_INFINITE:
+			return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
+
+		case FP_ZERO:
+			return PUGIXML_TEXT("0");
+
+		default:
+			return 0;
+		}
+	#else
+		// fallback
+		const volatile double v = value;
+
+		if (v == 0) return PUGIXML_TEXT("0");
+		if (v != v) return PUGIXML_TEXT("NaN");
+		if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
+		return 0;
+	#endif
+	}
+	
+	PUGI__FN bool convert_number_to_boolean(double value)
+	{
+		return (value != 0 && !is_nan(value));
+	}
+	
+	PUGI__FN void truncate_zeros(char* begin, char* end)
+	{
+		while (begin != end && end[-1] == '0') end--;
+
+		*end = 0;
+	}
+
+	// gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
+#if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
+	PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
+	{
+		// get base values
+		int sign, exponent;
+		_ecvt_s(buffer, buffer_size, value, DBL_DIG + 1, &exponent, &sign);
+
+		// truncate redundant zeros
+		truncate_zeros(buffer, buffer + strlen(buffer));
+
+		// fill results
+		*out_mantissa = buffer;
+		*out_exponent = exponent;
+	}
+#else
+	PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
+	{
+		// get a scientific notation value with IEEE DBL_DIG decimals
+		sprintf(buffer, "%.*e", DBL_DIG, value);
+		assert(strlen(buffer) < buffer_size);
+		(void)!buffer_size;
+
+		// get the exponent (possibly negative)
+		char* exponent_string = strchr(buffer, 'e');
+		assert(exponent_string);
+
+		int exponent = atoi(exponent_string + 1);
+
+		// extract mantissa string: skip sign
+		char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
+		assert(mantissa[0] != '0' && mantissa[1] == '.');
+
+		// divide mantissa by 10 to eliminate integer part
+		mantissa[1] = mantissa[0];
+		mantissa++;
+		exponent++;
+
+		// remove extra mantissa digits and zero-terminate mantissa
+		truncate_zeros(mantissa, exponent_string);
+
+		// fill results
+		*out_mantissa = mantissa;
+		*out_exponent = exponent;
+	}
+#endif
+
+	PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc)
+	{
+		// try special number conversion
+		const char_t* special = convert_number_to_string_special(value);
+		if (special) return xpath_string_const(special);
+
+		// get mantissa + exponent form
+		char mantissa_buffer[32];
+
+		char* mantissa;
+		int exponent;
+		convert_number_to_mantissa_exponent(value, mantissa_buffer, sizeof(mantissa_buffer), &mantissa, &exponent);
+
+		// allocate a buffer of suitable length for the number
+		size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4;
+		char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size));
+		assert(result);
+
+		// make the number!
+		char_t* s = result;
+
+		// sign
+		if (value < 0) *s++ = '-';
+
+		// integer part
+		if (exponent <= 0)
+		{
+			*s++ = '0';
+		}
+		else
+		{
+			while (exponent > 0)
+			{
+				assert(*mantissa == 0 || static_cast<unsigned int>(static_cast<unsigned int>(*mantissa) - '0') <= 9);
+				*s++ = *mantissa ? *mantissa++ : '0';
+				exponent--;
+			}
+		}
+
+		// fractional part
+		if (*mantissa)
+		{
+			// decimal point
+			*s++ = '.';
+
+			// extra zeroes from negative exponent
+			while (exponent < 0)
+			{
+				*s++ = '0';
+				exponent++;
+			}
+
+			// extra mantissa digits
+			while (*mantissa)
+			{
+				assert(static_cast<unsigned int>(*mantissa - '0') <= 9);
+				*s++ = *mantissa++;
+			}
+		}
+
+		// zero-terminate
+		assert(s < result + result_size);
+		*s = 0;
+
+		return xpath_string(result, true);
+	}
+	
+	PUGI__FN bool check_string_to_number_format(const char_t* string)
+	{
+		// parse leading whitespace
+		while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
+
+		// parse sign
+		if (*string == '-') ++string;
+
+		if (!*string) return false;
+
+		// if there is no integer part, there should be a decimal part with at least one digit
+		if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false;
+
+		// parse integer part
+		while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
+
+		// parse decimal part
+		if (*string == '.')
+		{
+			++string;
+
+			while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
+		}
+
+		// parse trailing whitespace
+		while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
+
+		return *string == 0;
+	}
+
+	PUGI__FN double convert_string_to_number(const char_t* string)
+	{
+		// check string format
+		if (!check_string_to_number_format(string)) return gen_nan();
+
+		// parse string
+	#ifdef PUGIXML_WCHAR_MODE
+		return wcstod(string, 0);
+	#else
+		return atof(string);
+	#endif
+	}
+
+	PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result)
+	{
+		size_t length = static_cast<size_t>(end - begin);
+		char_t* scratch = buffer;
+
+		if (length >= sizeof(buffer) / sizeof(buffer[0]))
+		{
+			// need to make dummy on-heap copy
+			scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+			if (!scratch) return false;
+		}
+
+		// copy string to zero-terminated buffer and perform conversion
+		memcpy(scratch, begin, length * sizeof(char_t));
+		scratch[length] = 0;
+
+		*out_result = convert_string_to_number(scratch);
+
+		// free dummy buffer
+		if (scratch != buffer) xml_memory::deallocate(scratch);
+
+		return true;
+	}
+	
+	PUGI__FN double round_nearest(double value)
+	{
+		return floor(value + 0.5);
+	}
+
+	PUGI__FN double round_nearest_nzero(double value)
+	{
+		// same as round_nearest, but returns -0 for [-0.5, -0]
+		// ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
+		return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
+	}
+	
+	PUGI__FN const char_t* qualified_name(const xpath_node& node)
+	{
+		return node.attribute() ? node.attribute().name() : node.node().name();
+	}
+	
+	PUGI__FN const char_t* local_name(const xpath_node& node)
+	{
+		const char_t* name = qualified_name(node);
+		const char_t* p = find_char(name, ':');
+		
+		return p ? p + 1 : name;
+	}
+
+	struct namespace_uri_predicate
+	{
+		const char_t* prefix;
+		size_t prefix_length;
+
+		namespace_uri_predicate(const char_t* name)
+		{
+			const char_t* pos = find_char(name, ':');
+
+			prefix = pos ? name : 0;
+			prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
+		}
+
+		bool operator()(const xml_attribute& a) const
+		{
+			const char_t* name = a.name();
+
+			if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
+
+			return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
+		}
+	};
+
+	PUGI__FN const char_t* namespace_uri(const xml_node& node)
+	{
+		namespace_uri_predicate pred = node.name();
+		
+		xml_node p = node;
+		
+		while (p)
+		{
+			xml_attribute a = p.find_attribute(pred);
+			
+			if (a) return a.value();
+			
+			p = p.parent();
+		}
+		
+		return PUGIXML_TEXT("");
+	}
+
+	PUGI__FN const char_t* namespace_uri(const xml_attribute& attr, const xml_node& parent)
+	{
+		namespace_uri_predicate pred = attr.name();
+		
+		// Default namespace does not apply to attributes
+		if (!pred.prefix) return PUGIXML_TEXT("");
+		
+		xml_node p = parent;
+		
+		while (p)
+		{
+			xml_attribute a = p.find_attribute(pred);
+			
+			if (a) return a.value();
+			
+			p = p.parent();
+		}
+		
+		return PUGIXML_TEXT("");
+	}
+
+	PUGI__FN const char_t* namespace_uri(const xpath_node& node)
+	{
+		return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
+	}
+
+	PUGI__FN void normalize_space(char_t* buffer)
+	{
+		char_t* write = buffer;
+
+		for (char_t* it = buffer; *it; )
+		{
+			char_t ch = *it++;
+
+			if (PUGI__IS_CHARTYPE(ch, ct_space))
+			{
+				// replace whitespace sequence with single space
+				while (PUGI__IS_CHARTYPE(*it, ct_space)) it++;
+
+				// avoid leading spaces
+				if (write != buffer) *write++ = ' ';
+			}
+			else *write++ = ch;
+		}
+
+		// remove trailing space
+		if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--;
+
+		// zero-terminate
+		*write = 0;
+	}
+
+	PUGI__FN void translate(char_t* buffer, const char_t* from, const char_t* to)
+	{
+		size_t to_length = strlength(to);
+
+		char_t* write = buffer;
+
+		while (*buffer)
+		{
+			PUGI__DMC_VOLATILE char_t ch = *buffer++;
+
+			const char_t* pos = find_char(from, ch);
+
+			if (!pos)
+				*write++ = ch; // do not process
+			else if (static_cast<size_t>(pos - from) < to_length)
+				*write++ = to[pos - from]; // replace
+		}
+
+		// zero-terminate
+		*write = 0;
+	}
+
+	struct xpath_variable_boolean: xpath_variable
+	{
+		xpath_variable_boolean(): value(false)
+		{
+		}
+
+		bool value;
+		char_t name[1];
+	};
+
+	struct xpath_variable_number: xpath_variable
+	{
+		xpath_variable_number(): value(0)
+		{
+		}
+
+		double value;
+		char_t name[1];
+	};
+
+	struct xpath_variable_string: xpath_variable
+	{
+		xpath_variable_string(): value(0)
+		{
+		}
+
+		~xpath_variable_string()
+		{
+			if (value) xml_memory::deallocate(value);
+		}
+
+		char_t* value;
+		char_t name[1];
+	};
+
+	struct xpath_variable_node_set: xpath_variable
+	{
+		xpath_node_set value;
+		char_t name[1];
+	};
+
+	static const xpath_node_set dummy_node_set;
+
+	PUGI__FN unsigned int hash_string(const char_t* str)
+	{
+		// Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
+		unsigned int result = 0;
+
+		while (*str)
+		{
+			result += static_cast<unsigned int>(*str++);
+			result += result << 10;
+			result ^= result >> 6;
+		}
+	
+		result += result << 3;
+		result ^= result >> 11;
+		result += result << 15;
+	
+		return result;
+	}
+
+	template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name)
+	{
+		size_t length = strlength(name);
+		if (length == 0) return 0; // empty variable names are invalid
+
+		// $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters
+		void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t));
+		if (!memory) return 0;
+
+		T* result = new (memory) T();
+
+		memcpy(result->name, name, (length + 1) * sizeof(char_t));
+
+		return result;
+	}
+
+	PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name)
+	{
+		switch (type)
+		{
+		case xpath_type_node_set:
+			return new_xpath_variable<xpath_variable_node_set>(name);
+
+		case xpath_type_number:
+			return new_xpath_variable<xpath_variable_number>(name);
+
+		case xpath_type_string:
+			return new_xpath_variable<xpath_variable_string>(name);
+
+		case xpath_type_boolean:
+			return new_xpath_variable<xpath_variable_boolean>(name);
+
+		default:
+			return 0;
+		}
+	}
+
+	template <typename T> PUGI__FN void delete_xpath_variable(T* var)
+	{
+		var->~T();
+		xml_memory::deallocate(var);
+	}
+
+	PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var)
+	{
+		switch (type)
+		{
+		case xpath_type_node_set:
+			delete_xpath_variable(static_cast<xpath_variable_node_set*>(var));
+			break;
+
+		case xpath_type_number:
+			delete_xpath_variable(static_cast<xpath_variable_number*>(var));
+			break;
+
+		case xpath_type_string:
+			delete_xpath_variable(static_cast<xpath_variable_string*>(var));
+			break;
+
+		case xpath_type_boolean:
+			delete_xpath_variable(static_cast<xpath_variable_boolean*>(var));
+			break;
+
+		default:
+			assert(!"Invalid variable type");
+		}
+	}
+
+	PUGI__FN xpath_variable* get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end)
+	{
+		size_t length = static_cast<size_t>(end - begin);
+		char_t* scratch = buffer;
+
+		if (length >= sizeof(buffer) / sizeof(buffer[0]))
+		{
+			// need to make dummy on-heap copy
+			scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+			if (!scratch) return 0;
+		}
+
+		// copy string to zero-terminated buffer and perform lookup
+		memcpy(scratch, begin, length * sizeof(char_t));
+		scratch[length] = 0;
+
+		xpath_variable* result = set->get(scratch);
+
+		// free dummy buffer
+		if (scratch != buffer) xml_memory::deallocate(scratch);
+
+		return result;
+	}
+PUGI__NS_END
+
+// Internal node set class
+PUGI__NS_BEGIN
+	PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev)
+	{
+		xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
+
+		if (type == xpath_node_set::type_unsorted)
+		{
+			sort(begin, end, document_order_comparator());
+
+			type = xpath_node_set::type_sorted;
+		}
+		
+		if (type != order) reverse(begin, end);
+			
+		return order;
+	}
+
+	PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type)
+	{
+		if (begin == end) return xpath_node();
+
+		switch (type)
+		{
+		case xpath_node_set::type_sorted:
+			return *begin;
+
+		case xpath_node_set::type_sorted_reverse:
+			return *(end - 1);
+
+		case xpath_node_set::type_unsorted:
+			return *min_element(begin, end, document_order_comparator());
+
+		default:
+			assert(!"Invalid node set type");
+			return xpath_node();
+		}
+	}
+
+	class xpath_node_set_raw
+	{
+		xpath_node_set::type_t _type;
+
+		xpath_node* _begin;
+		xpath_node* _end;
+		xpath_node* _eos;
+
+	public:
+		xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0)
+		{
+		}
+
+		xpath_node* begin() const
+		{
+			return _begin;
+		}
+
+		xpath_node* end() const
+		{
+			return _end;
+		}
+
+		bool empty() const
+		{
+			return _begin == _end;
+		}
+
+		size_t size() const
+		{
+			return static_cast<size_t>(_end - _begin);
+		}
+
+		xpath_node first() const
+		{
+			return xpath_first(_begin, _end, _type);
+		}
+
+		void push_back(const xpath_node& node, xpath_allocator* alloc)
+		{
+			if (_end == _eos)
+			{
+				size_t capacity = static_cast<size_t>(_eos - _begin);
+
+				// get new capacity (1.5x rule)
+				size_t new_capacity = capacity + capacity / 2 + 1;
+
+				// reallocate the old array or allocate a new one
+				xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node)));
+				assert(data);
+
+				// finalize
+				_begin = data;
+				_end = data + capacity;
+				_eos = data + new_capacity;
+			}
+
+			*_end++ = node;
+		}
+
+		void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc)
+		{
+			size_t size_ = static_cast<size_t>(_end - _begin);
+			size_t capacity = static_cast<size_t>(_eos - _begin);
+			size_t count = static_cast<size_t>(end_ - begin_);
+
+			if (size_ + count > capacity)
+			{
+				// reallocate the old array or allocate a new one
+				xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node)));
+				assert(data);
+
+				// finalize
+				_begin = data;
+				_end = data + size_;
+				_eos = data + size_ + count;
+			}
+
+			memcpy(_end, begin_, count * sizeof(xpath_node));
+			_end += count;
+		}
+
+		void sort_do()
+		{
+			_type = xpath_sort(_begin, _end, _type, false);
+		}
+
+		void truncate(xpath_node* pos)
+		{
+			assert(_begin <= pos && pos <= _end);
+
+			_end = pos;
+		}
+
+		void remove_duplicates()
+		{
+			if (_type == xpath_node_set::type_unsorted)
+				sort(_begin, _end, duplicate_comparator());
+		
+			_end = unique(_begin, _end);
+		}
+
+		xpath_node_set::type_t type() const
+		{
+			return _type;
+		}
+
+		void set_type(xpath_node_set::type_t value)
+		{
+			_type = value;
+		}
+	};
+PUGI__NS_END
+
+PUGI__NS_BEGIN
+	struct xpath_context
+	{
+		xpath_node n;
+		size_t position, size;
+
+		xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_)
+		{
+		}
+	};
+
+	enum lexeme_t
+	{
+		lex_none = 0,
+		lex_equal,
+		lex_not_equal,
+		lex_less,
+		lex_greater,
+		lex_less_or_equal,
+		lex_greater_or_equal,
+		lex_plus,
+		lex_minus,
+		lex_multiply,
+		lex_union,
+		lex_var_ref,
+		lex_open_brace,
+		lex_close_brace,
+		lex_quoted_string,
+		lex_number,
+		lex_slash,
+		lex_double_slash,
+		lex_open_square_brace,
+		lex_close_square_brace,
+		lex_string,
+		lex_comma,
+		lex_axis_attribute,
+		lex_dot,
+		lex_double_dot,
+		lex_double_colon,
+		lex_eof
+	};
+
+	struct xpath_lexer_string
+	{
+		const char_t* begin;
+		const char_t* end;
+
+		xpath_lexer_string(): begin(0), end(0)
+		{
+		}
+
+		bool operator==(const char_t* other) const
+		{
+			size_t length = static_cast<size_t>(end - begin);
+
+			return strequalrange(other, begin, length);
+		}
+	};
+
+	class xpath_lexer
+	{
+		const char_t* _cur;
+		const char_t* _cur_lexeme_pos;
+		xpath_lexer_string _cur_lexeme_contents;
+
+		lexeme_t _cur_lexeme;
+
+	public:
+		explicit xpath_lexer(const char_t* query): _cur(query)
+		{
+			next();
+		}
+		
+		const char_t* state() const
+		{
+			return _cur;
+		}
+		
+		void next()
+		{
+			const char_t* cur = _cur;
+
+			while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur;
+
+			// save lexeme position for error reporting
+			_cur_lexeme_pos = cur;
+
+			switch (*cur)
+			{
+			case 0:
+				_cur_lexeme = lex_eof;
+				break;
+			
+			case '>':
+				if (*(cur+1) == '=')
+				{
+					cur += 2;
+					_cur_lexeme = lex_greater_or_equal;
+				}
+				else
+				{
+					cur += 1;
+					_cur_lexeme = lex_greater;
+				}
+				break;
+
+			case '<':
+				if (*(cur+1) == '=')
+				{
+					cur += 2;
+					_cur_lexeme = lex_less_or_equal;
+				}
+				else
+				{
+					cur += 1;
+					_cur_lexeme = lex_less;
+				}
+				break;
+
+			case '!':
+				if (*(cur+1) == '=')
+				{
+					cur += 2;
+					_cur_lexeme = lex_not_equal;
+				}
+				else
+				{
+					_cur_lexeme = lex_none;
+				}
+				break;
+
+			case '=':
+				cur += 1;
+				_cur_lexeme = lex_equal;
+
+				break;
+			
+			case '+':
+				cur += 1;
+				_cur_lexeme = lex_plus;
+
+				break;
+
+			case '-':
+				cur += 1;
+				_cur_lexeme = lex_minus;
+
+				break;
+
+			case '*':
+				cur += 1;
+				_cur_lexeme = lex_multiply;
+
+				break;
+
+			case '|':
+				cur += 1;
+				_cur_lexeme = lex_union;
+
+				break;
+			
+			case '$':
+				cur += 1;
+
+				if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
+				{
+					_cur_lexeme_contents.begin = cur;
+
+					while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
+
+					if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname
+					{
+						cur++; // :
+
+						while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
+					}
+
+					_cur_lexeme_contents.end = cur;
+				
+					_cur_lexeme = lex_var_ref;
+				}
+				else
+				{
+					_cur_lexeme = lex_none;
+				}
+
+				break;
+
+			case '(':
+				cur += 1;
+				_cur_lexeme = lex_open_brace;
+
+				break;
+
+			case ')':
+				cur += 1;
+				_cur_lexeme = lex_close_brace;
+
+				break;
+			
+			case '[':
+				cur += 1;
+				_cur_lexeme = lex_open_square_brace;
+
+				break;
+
+			case ']':
+				cur += 1;
+				_cur_lexeme = lex_close_square_brace;
+
+				break;
+
+			case ',':
+				cur += 1;
+				_cur_lexeme = lex_comma;
+
+				break;
+
+			case '/':
+				if (*(cur+1) == '/')
+				{
+					cur += 2;
+					_cur_lexeme = lex_double_slash;
+				}
+				else
+				{
+					cur += 1;
+					_cur_lexeme = lex_slash;
+				}
+				break;
+		
+			case '.':
+				if (*(cur+1) == '.')
+				{
+					cur += 2;
+					_cur_lexeme = lex_double_dot;
+				}
+				else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit))
+				{
+					_cur_lexeme_contents.begin = cur; // .
+
+					++cur;
+
+					while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
+
+					_cur_lexeme_contents.end = cur;
+					
+					_cur_lexeme = lex_number;
+				}
+				else
+				{
+					cur += 1;
+					_cur_lexeme = lex_dot;
+				}
+				break;
+
+			case '@':
+				cur += 1;
+				_cur_lexeme = lex_axis_attribute;
+
+				break;
+
+			case '"':
+			case '\'':
+			{
+				char_t terminator = *cur;
+
+				++cur;
+
+				_cur_lexeme_contents.begin = cur;
+				while (*cur && *cur != terminator) cur++;
+				_cur_lexeme_contents.end = cur;
+				
+				if (!*cur)
+					_cur_lexeme = lex_none;
+				else
+				{
+					cur += 1;
+					_cur_lexeme = lex_quoted_string;
+				}
+
+				break;
+			}
+
+			case ':':
+				if (*(cur+1) == ':')
+				{
+					cur += 2;
+					_cur_lexeme = lex_double_colon;
+				}
+				else
+				{
+					_cur_lexeme = lex_none;
+				}
+				break;
+
+			default:
+				if (PUGI__IS_CHARTYPEX(*cur, ctx_digit))
+				{
+					_cur_lexeme_contents.begin = cur;
+
+					while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
+				
+					if (*cur == '.')
+					{
+						cur++;
+
+						while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
+					}
+
+					_cur_lexeme_contents.end = cur;
+
+					_cur_lexeme = lex_number;
+				}
+				else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
+				{
+					_cur_lexeme_contents.begin = cur;
+
+					while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
+
+					if (cur[0] == ':')
+					{
+						if (cur[1] == '*') // namespace test ncname:*
+						{
+							cur += 2; // :*
+						}
+						else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname
+						{
+							cur++; // :
+
+							while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
+						}
+					}
+
+					_cur_lexeme_contents.end = cur;
+				
+					_cur_lexeme = lex_string;
+				}
+				else
+				{
+					_cur_lexeme = lex_none;
+				}
+			}
+
+			_cur = cur;
+		}
+
+		lexeme_t current() const
+		{
+			return _cur_lexeme;
+		}
+
+		const char_t* current_pos() const
+		{
+			return _cur_lexeme_pos;
+		}
+
+		const xpath_lexer_string& contents() const
+		{
+			assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
+
+			return _cur_lexeme_contents;
+		}
+	};
+
+	enum ast_type_t
+	{
+		ast_unknown,
+		ast_op_or,						// left or right
+		ast_op_and,						// left and right
+		ast_op_equal,					// left = right
+		ast_op_not_equal,				// left != right
+		ast_op_less,					// left < right
+		ast_op_greater,					// left > right
+		ast_op_less_or_equal,			// left <= right
+		ast_op_greater_or_equal,		// left >= right
+		ast_op_add,						// left + right
+		ast_op_subtract,				// left - right
+		ast_op_multiply,				// left * right
+		ast_op_divide,					// left / right
+		ast_op_mod,						// left % right
+		ast_op_negate,					// left - right
+		ast_op_union,					// left | right
+		ast_predicate,					// apply predicate to set; next points to next predicate
+		ast_filter,						// select * from left where right
+		ast_filter_posinv,				// select * from left where right; proximity position invariant
+		ast_string_constant,			// string constant
+		ast_number_constant,			// number constant
+		ast_variable,					// variable
+		ast_func_last,					// last()
+		ast_func_position,				// position()
+		ast_func_count,					// count(left)
+		ast_func_id,					// id(left)
+		ast_func_local_name_0,			// local-name()
+		ast_func_local_name_1,			// local-name(left)
+		ast_func_namespace_uri_0,		// namespace-uri()
+		ast_func_namespace_uri_1,		// namespace-uri(left)
+		ast_func_name_0,				// name()
+		ast_func_name_1,				// name(left)
+		ast_func_string_0,				// string()
+		ast_func_string_1,				// string(left)
+		ast_func_concat,				// concat(left, right, siblings)
+		ast_func_starts_with,			// starts_with(left, right)
+		ast_func_contains,				// contains(left, right)
+		ast_func_substring_before,		// substring-before(left, right)
+		ast_func_substring_after,		// substring-after(left, right)
+		ast_func_substring_2,			// substring(left, right)
+		ast_func_substring_3,			// substring(left, right, third)
+		ast_func_string_length_0,		// string-length()
+		ast_func_string_length_1,		// string-length(left)
+		ast_func_normalize_space_0,		// normalize-space()
+		ast_func_normalize_space_1,		// normalize-space(left)
+		ast_func_translate,				// translate(left, right, third)
+		ast_func_boolean,				// boolean(left)
+		ast_func_not,					// not(left)
+		ast_func_true,					// true()
+		ast_func_false,					// false()
+		ast_func_lang,					// lang(left)
+		ast_func_number_0,				// number()
+		ast_func_number_1,				// number(left)
+		ast_func_sum,					// sum(left)
+		ast_func_floor,					// floor(left)
+		ast_func_ceiling,				// ceiling(left)
+		ast_func_round,					// round(left)
+		ast_step,						// process set left with step
+		ast_step_root					// select root node
+	};
+
+	enum axis_t
+	{
+		axis_ancestor,
+		axis_ancestor_or_self,
+		axis_attribute,
+		axis_child,
+		axis_descendant,
+		axis_descendant_or_self,
+		axis_following,
+		axis_following_sibling,
+		axis_namespace,
+		axis_parent,
+		axis_preceding,
+		axis_preceding_sibling,
+		axis_self
+	};
+	
+	enum nodetest_t
+	{
+		nodetest_none,
+		nodetest_name,
+		nodetest_type_node,
+		nodetest_type_comment,
+		nodetest_type_pi,
+		nodetest_type_text,
+		nodetest_pi,
+		nodetest_all,
+		nodetest_all_in_namespace
+	};
+
+	template <axis_t N> struct axis_to_type
+	{
+		static const axis_t axis;
+	};
+
+	template <axis_t N> const axis_t axis_to_type<N>::axis = N;
+		
+	class xpath_ast_node
+	{
+	private:
+		// node type
+		char _type;
+		char _rettype;
+
+		// for ast_step / ast_predicate
+		char _axis;
+		char _test;
+
+		// tree node structure
+		xpath_ast_node* _left;
+		xpath_ast_node* _right;
+		xpath_ast_node* _next;
+
+		union
+		{
+			// value for ast_string_constant
+			const char_t* string;
+			// value for ast_number_constant
+			double number;
+			// variable for ast_variable
+			xpath_variable* variable;
+			// node test for ast_step (node name/namespace/node type/pi target)
+			const char_t* nodetest;
+		} _data;
+
+		xpath_ast_node(const xpath_ast_node&);
+		xpath_ast_node& operator=(const xpath_ast_node&);
+
+		template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
+		{
+			xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
+
+			if (lt != xpath_type_node_set && rt != xpath_type_node_set)
+			{
+				if (lt == xpath_type_boolean || rt == xpath_type_boolean)
+					return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
+				else if (lt == xpath_type_number || rt == xpath_type_number)
+					return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
+				else if (lt == xpath_type_string || rt == xpath_type_string)
+				{
+					xpath_allocator_capture cr(stack.result);
+
+					xpath_string ls = lhs->eval_string(c, stack);
+					xpath_string rs = rhs->eval_string(c, stack);
+
+					return comp(ls, rs);
+				}
+			}
+			else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
+			{
+				xpath_allocator_capture cr(stack.result);
+
+				xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
+				xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
+
+				for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
+					for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
+					{
+						xpath_allocator_capture cri(stack.result);
+
+						if (comp(string_value(*li, stack.result), string_value(*ri, stack.result)))
+							return true;
+					}
+
+				return false;
+			}
+			else
+			{
+				if (lt == xpath_type_node_set)
+				{
+					swap(lhs, rhs);
+					swap(lt, rt);
+				}
+
+				if (lt == xpath_type_boolean)
+					return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
+				else if (lt == xpath_type_number)
+				{
+					xpath_allocator_capture cr(stack.result);
+
+					double l = lhs->eval_number(c, stack);
+					xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
+
+					for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
+					{
+						xpath_allocator_capture cri(stack.result);
+
+						if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
+							return true;
+					}
+
+					return false;
+				}
+				else if (lt == xpath_type_string)
+				{
+					xpath_allocator_capture cr(stack.result);
+
+					xpath_string l = lhs->eval_string(c, stack);
+					xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
+
+					for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
+					{
+						xpath_allocator_capture cri(stack.result);
+
+						if (comp(l, string_value(*ri, stack.result)))
+							return true;
+					}
+
+					return false;
+				}
+			}
+
+			assert(!"Wrong types");
+			return false;
+		}
+
+		template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
+		{
+			xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
+
+			if (lt != xpath_type_node_set && rt != xpath_type_node_set)
+				return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
+			else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
+			{
+				xpath_allocator_capture cr(stack.result);
+
+				xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
+				xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
+
+				for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
+				{
+					xpath_allocator_capture cri(stack.result);
+
+					double l = convert_string_to_number(string_value(*li, stack.result).c_str());
+
+					for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
+					{
+						xpath_allocator_capture crii(stack.result);
+
+						if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
+							return true;
+					}
+				}
+
+				return false;
+			}
+			else if (lt != xpath_type_node_set && rt == xpath_type_node_set)
+			{
+				xpath_allocator_capture cr(stack.result);
+
+				double l = lhs->eval_number(c, stack);
+				xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
+
+				for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
+				{
+					xpath_allocator_capture cri(stack.result);
+
+					if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
+						return true;
+				}
+
+				return false;
+			}
+			else if (lt == xpath_type_node_set && rt != xpath_type_node_set)
+			{
+				xpath_allocator_capture cr(stack.result);
+
+				xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
+				double r = rhs->eval_number(c, stack);
+
+				for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
+				{
+					xpath_allocator_capture cri(stack.result);
+
+					if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r))
+						return true;
+				}
+
+				return false;
+			}
+			else
+			{
+				assert(!"Wrong types");
+				return false;
+			}
+		}
+
+		void apply_predicate(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)
+		{
+			assert(ns.size() >= first);
+
+			size_t i = 1;
+			size_t size = ns.size() - first;
+				
+			xpath_node* last = ns.begin() + first;
+				
+			// remove_if... or well, sort of
+			for (xpath_node* it = last; it != ns.end(); ++it, ++i)
+			{
+				xpath_context c(*it, i, size);
+			
+				if (expr->rettype() == xpath_type_number)
+				{
+					if (expr->eval_number(c, stack) == i)
+						*last++ = *it;
+				}
+				else if (expr->eval_boolean(c, stack))
+					*last++ = *it;
+			}
+			
+			ns.truncate(last);
+		}
+
+		void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack)
+		{
+			if (ns.size() == first) return;
+			
+			for (xpath_ast_node* pred = _right; pred; pred = pred->_next)
+			{
+				apply_predicate(ns, first, pred->_left, stack);
+			}
+		}
+
+		void step_push(xpath_node_set_raw& ns, const xml_attribute& a, const xml_node& parent, xpath_allocator* alloc)
+		{
+			if (!a) return;
+
+			const char_t* name = a.name();
+
+			// There are no attribute nodes corresponding to attributes that declare namespaces
+			// That is, "xmlns:..." or "xmlns"
+			if (starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')) return;
+			
+			switch (_test)
+			{
+			case nodetest_name:
+				if (strequal(name, _data.nodetest)) ns.push_back(xpath_node(a, parent), alloc);
+				break;
+				
+			case nodetest_type_node:
+			case nodetest_all:
+				ns.push_back(xpath_node(a, parent), alloc);
+				break;
+				
+			case nodetest_all_in_namespace:
+				if (starts_with(name, _data.nodetest))
+					ns.push_back(xpath_node(a, parent), alloc);
+				break;
+			
+			default:
+				;
+			}
+		}
+		
+		void step_push(xpath_node_set_raw& ns, const xml_node& n, xpath_allocator* alloc)
+		{
+			if (!n) return;
+
+			switch (_test)
+			{
+			case nodetest_name:
+				if (n.type() == node_element && strequal(n.name(), _data.nodetest)) ns.push_back(n, alloc);
+				break;
+				
+			case nodetest_type_node:
+				ns.push_back(n, alloc);
+				break;
+				
+			case nodetest_type_comment:
+				if (n.type() == node_comment)
+					ns.push_back(n, alloc);
+				break;
+				
+			case nodetest_type_text:
+				if (n.type() == node_pcdata || n.type() == node_cdata)
+					ns.push_back(n, alloc);
+				break;
+				
+			case nodetest_type_pi:
+				if (n.type() == node_pi)
+					ns.push_back(n, alloc);
+				break;
+									
+			case nodetest_pi:
+				if (n.type() == node_pi && strequal(n.name(), _data.nodetest))
+					ns.push_back(n, alloc);
+				break;
+				
+			case nodetest_all:
+				if (n.type() == node_element)
+					ns.push_back(n, alloc);
+				break;
+				
+			case nodetest_all_in_namespace:
+				if (n.type() == node_element && starts_with(n.name(), _data.nodetest))
+					ns.push_back(n, alloc);
+				break;
+
+			default:
+				assert(!"Unknown axis");
+			} 
+		}
+
+		template <class T> void step_fill(xpath_node_set_raw& ns, const xml_node& n, xpath_allocator* alloc, T)
+		{
+			const axis_t axis = T::axis;
+
+			switch (axis)
+			{
+			case axis_attribute:
+			{
+				for (xml_attribute a = n.first_attribute(); a; a = a.next_attribute())
+					step_push(ns, a, n, alloc);
+				
+				break;
+			}
+			
+			case axis_child:
+			{
+				for (xml_node c = n.first_child(); c; c = c.next_sibling())
+					step_push(ns, c, alloc);
+					
+				break;
+			}
+			
+			case axis_descendant:
+			case axis_descendant_or_self:
+			{
+				if (axis == axis_descendant_or_self)
+					step_push(ns, n, alloc);
+					
+				xml_node cur = n.first_child();
+				
+				while (cur && cur != n)
+				{
+					step_push(ns, cur, alloc);
+					
+					if (cur.first_child())
+						cur = cur.first_child();
+					else if (cur.next_sibling())
+						cur = cur.next_sibling();
+					else
+					{
+						while (!cur.next_sibling() && cur != n)
+							cur = cur.parent();
+					
+						if (cur != n) cur = cur.next_sibling();
+					}
+				}
+				
+				break;
+			}
+			
+			case axis_following_sibling:
+			{
+				for (xml_node c = n.next_sibling(); c; c = c.next_sibling())
+					step_push(ns, c, alloc);
+				
+				break;
+			}
+			
+			case axis_preceding_sibling:
+			{
+				for (xml_node c = n.previous_sibling(); c; c = c.previous_sibling())
+					step_push(ns, c, alloc);
+				
+				break;
+			}
+			
+			case axis_following:
+			{
+				xml_node cur = n;
+
+				// exit from this node so that we don't include descendants
+				while (cur && !cur.next_sibling()) cur = cur.parent();
+				cur = cur.next_sibling();
+
+				for (;;)
+				{
+					step_push(ns, cur, alloc);
+
+					if (cur.first_child())
+						cur = cur.first_child();
+					else if (cur.next_sibling())
+						cur = cur.next_sibling();
+					else
+					{
+						while (cur && !cur.next_sibling()) cur = cur.parent();
+						cur = cur.next_sibling();
+
+						if (!cur) break;
+					}
+				}
+
+				break;
+			}
+
+			case axis_preceding:
+			{
+				xml_node cur = n;
+
+				while (cur && !cur.previous_sibling()) cur = cur.parent();
+				cur = cur.previous_sibling();
+
+				for (;;)
+				{
+					if (cur.last_child())
+						cur = cur.last_child();
+					else
+					{
+						// leaf node, can't be ancestor
+						step_push(ns, cur, alloc);
+
+						if (cur.previous_sibling())
+							cur = cur.previous_sibling();
+						else
+						{
+							do 
+							{
+								cur = cur.parent();
+								if (!cur) break;
+
+								if (!node_is_ancestor(cur, n)) step_push(ns, cur, alloc);
+							}
+							while (!cur.previous_sibling());
+
+							cur = cur.previous_sibling();
+
+							if (!cur) break;
+						}
+					}
+				}
+
+				break;
+			}
+			
+			case axis_ancestor:
+			case axis_ancestor_or_self:
+			{
+				if (axis == axis_ancestor_or_self)
+					step_push(ns, n, alloc);
+
+				xml_node cur = n.parent();
+				
+				while (cur)
+				{
+					step_push(ns, cur, alloc);
+					
+					cur = cur.parent();
+				}
+				
+				break;
+			}
+
+			case axis_self:
+			{
+				step_push(ns, n, alloc);
+
+				break;
+			}
+
+			case axis_parent:
+			{
+				if (n.parent()) step_push(ns, n.parent(), alloc);
+
+				break;
+			}
+				
+			default:
+				assert(!"Unimplemented axis");
+			}
+		}
+		
+		template <class T> void step_fill(xpath_node_set_raw& ns, const xml_attribute& a, const xml_node& p, xpath_allocator* alloc, T v)
+		{
+			const axis_t axis = T::axis;
+
+			switch (axis)
+			{
+			case axis_ancestor:
+			case axis_ancestor_or_self:
+			{
+				if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
+					step_push(ns, a, p, alloc);
+
+				xml_node cur = p;
+				
+				while (cur)
+				{
+					step_push(ns, cur, alloc);
+					
+					cur = cur.parent();
+				}
+				
+				break;
+			}
+
+			case axis_descendant_or_self:
+			case axis_self:
+			{
+				if (_test == nodetest_type_node) // reject attributes based on principal node type test
+					step_push(ns, a, p, alloc);
+
+				break;
+			}
+
+			case axis_following:
+			{
+				xml_node cur = p;
+				
+				for (;;)
+				{
+					if (cur.first_child())
+						cur = cur.first_child();
+					else if (cur.next_sibling())
+						cur = cur.next_sibling();
+					else
+					{
+						while (cur && !cur.next_sibling()) cur = cur.parent();
+						cur = cur.next_sibling();
+						
+						if (!cur) break;
+					}
+
+					step_push(ns, cur, alloc);
+				}
+
+				break;
+			}
+
+			case axis_parent:
+			{
+				step_push(ns, p, alloc);
+
+				break;
+			}
+
+			case axis_preceding:
+			{
+				// preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
+				step_fill(ns, p, alloc, v);
+				break;
+			}
+			
+			default:
+				assert(!"Unimplemented axis");
+			}
+		}
+		
+		template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, T v)
+		{
+			const axis_t axis = T::axis;
+			bool attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
+
+			xpath_node_set_raw ns;
+			ns.set_type((axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling) ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted);
+
+			if (_left)
+			{
+				xpath_node_set_raw s = _left->eval_node_set(c, stack);
+
+				// self axis preserves the original order
+				if (axis == axis_self) ns.set_type(s.type());
+
+				for (const xpath_node* it = s.begin(); it != s.end(); ++it)
+				{
+					size_t size = ns.size();
+
+					// in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes
+					if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);
+					
+					if (it->node())
+						step_fill(ns, it->node(), stack.result, v);
+					else if (attributes)
+						step_fill(ns, it->attribute(), it->parent(), stack.result, v);
+						
+					apply_predicates(ns, size, stack);
+				}
+			}
+			else
+			{
+				if (c.n.node())
+					step_fill(ns, c.n.node(), stack.result, v);
+				else if (attributes)
+					step_fill(ns, c.n.attribute(), c.n.parent(), stack.result, v);
+				
+				apply_predicates(ns, 0, stack);
+			}
+
+			// child, attribute and self axes always generate unique set of nodes
+			// for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice
+			if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted)
+				ns.remove_duplicates();
+
+			return ns;
+		}
+		
+	public:
+		xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value):
+			_type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
+		{
+			assert(type == ast_string_constant);
+			_data.string = value;
+		}
+
+		xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value):
+			_type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
+		{
+			assert(type == ast_number_constant);
+			_data.number = value;
+		}
+		
+		xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value):
+			_type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
+		{
+			assert(type == ast_variable);
+			_data.variable = value;
+		}
+		
+		xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0):
+			_type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0)
+		{
+		}
+
+		xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents):
+			_type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0)
+		{
+			_data.nodetest = contents;
+		}
+
+		void set_next(xpath_ast_node* value)
+		{
+			_next = value;
+		}
+
+		void set_right(xpath_ast_node* value)
+		{
+			_right = value;
+		}
+
+		bool eval_boolean(const xpath_context& c, const xpath_stack& stack)
+		{
+			switch (_type)
+			{
+			case ast_op_or:
+				return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);
+				
+			case ast_op_and:
+				return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);
+				
+			case ast_op_equal:
+				return compare_eq(_left, _right, c, stack, equal_to());
+
+			case ast_op_not_equal:
+				return compare_eq(_left, _right, c, stack, not_equal_to());
+	
+			case ast_op_less:
+				return compare_rel(_left, _right, c, stack, less());
+			
+			case ast_op_greater:
+				return compare_rel(_right, _left, c, stack, less());
+
+			case ast_op_less_or_equal:
+				return compare_rel(_left, _right, c, stack, less_equal());
+			
+			case ast_op_greater_or_equal:
+				return compare_rel(_right, _left, c, stack, less_equal());
+
+			case ast_func_starts_with:
+			{
+				xpath_allocator_capture cr(stack.result);
+
+				xpath_string lr = _left->eval_string(c, stack);
+				xpath_string rr = _right->eval_string(c, stack);
+
+				return starts_with(lr.c_str(), rr.c_str());
+			}
+
+			case ast_func_contains:
+			{
+				xpath_allocator_capture cr(stack.result);
+
+				xpath_string lr = _left->eval_string(c, stack);
+				xpath_string rr = _right->eval_string(c, stack);
+
+				return find_substring(lr.c_str(), rr.c_str()) != 0;
+			}
+
+			case ast_func_boolean:
+				return _left->eval_boolean(c, stack);
+				
+			case ast_func_not:
+				return !_left->eval_boolean(c, stack);
+				
+			case ast_func_true:
+				return true;
+				
+			case ast_func_false:
+				return false;
+
+			case ast_func_lang:
+			{
+				if (c.n.attribute()) return false;
+				
+				xpath_allocator_capture cr(stack.result);
+
+				xpath_string lang = _left->eval_string(c, stack);
+				
+				for (xml_node n = c.n.node(); n; n = n.parent())
+				{
+					xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));
+					
+					if (a)
+					{
+						const char_t* value = a.value();
+						
+						// strnicmp / strncasecmp is not portable
+						for (const char_t* lit = lang.c_str(); *lit; ++lit)
+						{
+							if (tolower_ascii(*lit) != tolower_ascii(*value)) return false;
+							++value;
+						}
+						
+						return *value == 0 || *value == '-';
+					}
+				}
+				
+				return false;
+			}
+
+			case ast_variable:
+			{
+				assert(_rettype == _data.variable->type());
+
+				if (_rettype == xpath_type_boolean)
+					return _data.variable->get_boolean();
+
+				// fallthrough to type conversion
+			}
+
+			default:
+			{
+				switch (_rettype)
+				{
+				case xpath_type_number:
+					return convert_number_to_boolean(eval_number(c, stack));
+					
+				case xpath_type_string:
+				{
+					xpath_allocator_capture cr(stack.result);
+
+					return !eval_string(c, stack).empty();
+				}
+					
+				case xpath_type_node_set:				
+				{
+					xpath_allocator_capture cr(stack.result);
+
+					return !eval_node_set(c, stack).empty();
+				}
+
+				default:
+					assert(!"Wrong expression for return type boolean");
+					return false;
+				}
+			}
+			}
+		}
+
+		double eval_number(const xpath_context& c, const xpath_stack& stack)
+		{
+			switch (_type)
+			{
+			case ast_op_add:
+				return _left->eval_number(c, stack) + _right->eval_number(c, stack);
+				
+			case ast_op_subtract:
+				return _left->eval_number(c, stack) - _right->eval_number(c, stack);
+
+			case ast_op_multiply:
+				return _left->eval_number(c, stack) * _right->eval_number(c, stack);
+
+			case ast_op_divide:
+				return _left->eval_number(c, stack) / _right->eval_number(c, stack);
+
+			case ast_op_mod:
+				return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack));
+
+			case ast_op_negate:
+				return -_left->eval_number(c, stack);
+
+			case ast_number_constant:
+				return _data.number;
+
+			case ast_func_last:
+				return static_cast<double>(c.size);
+			
+			case ast_func_position:
+				return static_cast<double>(c.position);
+
+			case ast_func_count:
+			{
+				xpath_allocator_capture cr(stack.result);
+
+				return static_cast<double>(_left->eval_node_set(c, stack).size());
+			}
+			
+			case ast_func_string_length_0:
+			{
+				xpath_allocator_capture cr(stack.result);
+
+				return static_cast<double>(string_value(c.n, stack.result).length());
+			}
+			
+			case ast_func_string_length_1:
+			{
+				xpath_allocator_capture cr(stack.result);
+
+				return static_cast<double>(_left->eval_string(c, stack).length());
+			}
+			
+			case ast_func_number_0:
+			{
+				xpath_allocator_capture cr(stack.result);
+
+				return convert_string_to_number(string_value(c.n, stack.result).c_str());
+			}
+			
+			case ast_func_number_1:
+				return _left->eval_number(c, stack);
+
+			case ast_func_sum:
+			{
+				xpath_allocator_capture cr(stack.result);
+
+				double r = 0;
+				
+				xpath_node_set_raw ns = _left->eval_node_set(c, stack);
+				
+				for (const xpath_node* it = ns.begin(); it != ns.end(); ++it)
+				{
+					xpath_allocator_capture cri(stack.result);
+
+					r += convert_string_to_number(string_value(*it, stack.result).c_str());
+				}
+			
+				return r;
+			}
+
+			case ast_func_floor:
+			{
+				double r = _left->eval_number(c, stack);
+				
+				return r == r ? floor(r) : r;
+			}
+
+			case ast_func_ceiling:
+			{
+				double r = _left->eval_number(c, stack);
+				
+				return r == r ? ceil(r) : r;
+			}
+
+			case ast_func_round:
+				return round_nearest_nzero(_left->eval_number(c, stack));
+			
+			case ast_variable:
+			{
+				assert(_rettype == _data.variable->type());
+
+				if (_rettype == xpath_type_number)
+					return _data.variable->get_number();
+
+				// fallthrough to type conversion
+			}
+
+			default:
+			{
+				switch (_rettype)
+				{
+				case xpath_type_boolean:
+					return eval_boolean(c, stack) ? 1 : 0;
+					
+				case xpath_type_string:
+				{
+					xpath_allocator_capture cr(stack.result);
+
+					return convert_string_to_number(eval_string(c, stack).c_str());
+				}
+					
+				case xpath_type_node_set:
+				{
+					xpath_allocator_capture cr(stack.result);
+
+					return convert_string_to_number(eval_string(c, stack).c_str());
+				}
+					
+				default:
+					assert(!"Wrong expression for return type number");
+					return 0;
+				}
+				
+			}
+			}
+		}
+		
+		xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack)
+		{
+			assert(_type == ast_func_concat);
+
+			xpath_allocator_capture ct(stack.temp);
+
+			// count the string number
+			size_t count = 1;
+			for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++;
+
+			// gather all strings
+			xpath_string static_buffer[4];
+			xpath_string* buffer = static_buffer;
+
+			// allocate on-heap for large concats
+			if (count > sizeof(static_buffer) / sizeof(static_buffer[0]))
+			{
+				buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string)));
+				assert(buffer);
+			}
+
+			// evaluate all strings to temporary stack
+			xpath_stack swapped_stack = {stack.temp, stack.result};
+
+			buffer[0] = _left->eval_string(c, swapped_stack);
+
+			size_t pos = 1;
+			for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack);
+			assert(pos == count);
+
+			// get total length
+			size_t length = 0;
+			for (size_t i = 0; i < count; ++i) length += buffer[i].length();
+
+			// create final string
+			char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t)));
+			assert(result);
+
+			char_t* ri = result;
+
+			for (size_t j = 0; j < count; ++j)
+				for (const char_t* bi = buffer[j].c_str(); *bi; ++bi)
+					*ri++ = *bi;
+
+			*ri = 0;
+
+			return xpath_string(result, true);
+		}
+
+		xpath_string eval_string(const xpath_context& c, const xpath_stack& stack)
+		{
+			switch (_type)
+			{
+			case ast_string_constant:
+				return xpath_string_const(_data.string);
+			
+			case ast_func_local_name_0:
+			{
+				xpath_node na = c.n;
+				
+				return xpath_string_const(local_name(na));
+			}
+
+			case ast_func_local_name_1:
+			{
+				xpath_allocator_capture cr(stack.result);
+
+				xpath_node_set_raw ns = _left->eval_node_set(c, stack);
+				xpath_node na = ns.first();
+				
+				return xpath_string_const(local_name(na));
+			}
+
+			case ast_func_name_0:
+			{
+				xpath_node na = c.n;
+				
+				return xpath_string_const(qualified_name(na));
+			}
+
+			case ast_func_name_1:
+			{
+				xpath_allocator_capture cr(stack.result);
+
+				xpath_node_set_raw ns = _left->eval_node_set(c, stack);
+				xpath_node na = ns.first();
+				
+				return xpath_string_const(qualified_name(na));
+			}
+
+			case ast_func_namespace_uri_0:
+			{
+				xpath_node na = c.n;
+				
+				return xpath_string_const(namespace_uri(na));
+			}
+
+			case ast_func_namespace_uri_1:
+			{
+				xpath_allocator_capture cr(stack.result);
+
+				xpath_node_set_raw ns = _left->eval_node_set(c, stack);
+				xpath_node na = ns.first();
+				
+				return xpath_string_const(namespace_uri(na));
+			}
+
+			case ast_func_string_0:
+				return string_value(c.n, stack.result);
+
+			case ast_func_string_1:
+				return _left->eval_string(c, stack);
+
+			case ast_func_concat:
+				return eval_string_concat(c, stack);
+
+			case ast_func_substring_before:
+			{
+				xpath_allocator_capture cr(stack.temp);
+
+				xpath_stack swapped_stack = {stack.temp, stack.result};
+
+				xpath_string s = _left->eval_string(c, swapped_stack);
+				xpath_string p = _right->eval_string(c, swapped_stack);
+
+				const char_t* pos = find_substring(s.c_str(), p.c_str());
+				
+				return pos ? xpath_string(s.c_str(), pos, stack.result) : xpath_string();
+			}
+			
+			case ast_func_substring_after:
+			{
+				xpath_allocator_capture cr(stack.temp);
+
+				xpath_stack swapped_stack = {stack.temp, stack.result};
+
+				xpath_string s = _left->eval_string(c, swapped_stack);
+				xpath_string p = _right->eval_string(c, swapped_stack);
+				
+				const char_t* pos = find_substring(s.c_str(), p.c_str());
+				if (!pos) return xpath_string();
+
+				const char_t* result = pos + p.length();
+
+				return s.uses_heap() ? xpath_string(result, stack.result) : xpath_string_const(result);
+			}
+
+			case ast_func_substring_2:
+			{
+				xpath_allocator_capture cr(stack.temp);
+
+				xpath_stack swapped_stack = {stack.temp, stack.result};
+
+				xpath_string s = _left->eval_string(c, swapped_stack);
+				size_t s_length = s.length();
+
+				double first = round_nearest(_right->eval_number(c, stack));
+				
+				if (is_nan(first)) return xpath_string(); // NaN
+				else if (first >= s_length + 1) return xpath_string();
+				
+				size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
+				assert(1 <= pos && pos <= s_length + 1);
+
+				const char_t* rbegin = s.c_str() + (pos - 1);
+				
+				return s.uses_heap() ? xpath_string(rbegin, stack.result) : xpath_string_const(rbegin);
+			}
+			
+			case ast_func_substring_3:
+			{
+				xpath_allocator_capture cr(stack.temp);
+
+				xpath_stack swapped_stack = {stack.temp, stack.result};
+
+				xpath_string s = _left->eval_string(c, swapped_stack);
+				size_t s_length = s.length();
+
+				double first = round_nearest(_right->eval_number(c, stack));
+				double last = first + round_nearest(_right->_next->eval_number(c, stack));
+				
+				if (is_nan(first) || is_nan(last)) return xpath_string();
+				else if (first >= s_length + 1) return xpath_string();
+				else if (first >= last) return xpath_string();
+				else if (last < 1) return xpath_string();
+				
+				size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
+				size_t end = last >= s_length + 1 ? s_length + 1 : static_cast<size_t>(last);
+
+				assert(1 <= pos && pos <= end && end <= s_length + 1);
+				const char_t* rbegin = s.c_str() + (pos - 1);
+				const char_t* rend = s.c_str() + (end - 1);
+
+				return (end == s_length + 1 && !s.uses_heap()) ? xpath_string_const(rbegin) : xpath_string(rbegin, rend, stack.result);
+			}
+
+			case ast_func_normalize_space_0:
+			{
+				xpath_string s = string_value(c.n, stack.result);
+
+				normalize_space(s.data(stack.result));
+
+				return s;
+			}
+
+			case ast_func_normalize_space_1:
+			{
+				xpath_string s = _left->eval_string(c, stack);
+
+				normalize_space(s.data(stack.result));
+			
+				return s;
+			}
+
+			case ast_func_translate:
+			{
+				xpath_allocator_capture cr(stack.temp);
+
+				xpath_stack swapped_stack = {stack.temp, stack.result};
+
+				xpath_string s = _left->eval_string(c, stack);
+				xpath_string from = _right->eval_string(c, swapped_stack);
+				xpath_string to = _right->_next->eval_string(c, swapped_stack);
+
+				translate(s.data(stack.result), from.c_str(), to.c_str());
+
+				return s;
+			}
+
+			case ast_variable:
+			{
+				assert(_rettype == _data.variable->type());
+
+				if (_rettype == xpath_type_string)
+					return xpath_string_const(_data.variable->get_string());
+
+				// fallthrough to type conversion
+			}
+
+			default:
+			{
+				switch (_rettype)
+				{
+				case xpath_type_boolean:
+					return xpath_string_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
+					
+				case xpath_type_number:
+					return convert_number_to_string(eval_number(c, stack), stack.result);
+					
+				case xpath_type_node_set:
+				{
+					xpath_allocator_capture cr(stack.temp);
+
+					xpath_stack swapped_stack = {stack.temp, stack.result};
+
+					xpath_node_set_raw ns = eval_node_set(c, swapped_stack);
+					return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);
+				}
+				
+				default:
+					assert(!"Wrong expression for return type string");
+					return xpath_string();
+				}
+			}
+			}
+		}
+
+		xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack)
+		{
+			switch (_type)
+			{
+			case ast_op_union:
+			{
+				xpath_allocator_capture cr(stack.temp);
+
+				xpath_stack swapped_stack = {stack.temp, stack.result};
+
+				xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack);
+				xpath_node_set_raw rs = _right->eval_node_set(c, stack);
+				
+				// we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
+				rs.set_type(xpath_node_set::type_unsorted);
+
+				rs.append(ls.begin(), ls.end(), stack.result);
+				rs.remove_duplicates();
+				
+				return rs;
+			}
+
+			case ast_filter:
+			case ast_filter_posinv:
+			{
+				xpath_node_set_raw set = _left->eval_node_set(c, stack);
+
+				// either expression is a number or it contains position() call; sort by document order
+				if (_type == ast_filter) set.sort_do();
+
+				apply_predicate(set, 0, _right, stack);
+			
+				return set;
+			}
+			
+			case ast_func_id:
+				return xpath_node_set_raw();
+			
+			case ast_step:
+			{
+				switch (_axis)
+				{
+				case axis_ancestor:
+					return step_do(c, stack, axis_to_type<axis_ancestor>());
+					
+				case axis_ancestor_or_self:
+					return step_do(c, stack, axis_to_type<axis_ancestor_or_self>());
+
+				case axis_attribute:
+					return step_do(c, stack, axis_to_type<axis_attribute>());
+
+				case axis_child:
+					return step_do(c, stack, axis_to_type<axis_child>());
+				
+				case axis_descendant:
+					return step_do(c, stack, axis_to_type<axis_descendant>());
+
+				case axis_descendant_or_self:
+					return step_do(c, stack, axis_to_type<axis_descendant_or_self>());
+
+				case axis_following:
+					return step_do(c, stack, axis_to_type<axis_following>());
+				
+				case axis_following_sibling:
+					return step_do(c, stack, axis_to_type<axis_following_sibling>());
+				
+				case axis_namespace:
+					// namespaced axis is not supported
+					return xpath_node_set_raw();
+				
+				case axis_parent:
+					return step_do(c, stack, axis_to_type<axis_parent>());
+				
+				case axis_preceding:
+					return step_do(c, stack, axis_to_type<axis_preceding>());
+
+				case axis_preceding_sibling:
+					return step_do(c, stack, axis_to_type<axis_preceding_sibling>());
+				
+				case axis_self:
+					return step_do(c, stack, axis_to_type<axis_self>());
+
+				default:
+					assert(!"Unknown axis");
+					return xpath_node_set_raw();
+				}
+			}
+
+			case ast_step_root:
+			{
+				assert(!_right); // root step can't have any predicates
+
+				xpath_node_set_raw ns;
+
+				ns.set_type(xpath_node_set::type_sorted);
+
+				if (c.n.node()) ns.push_back(c.n.node().root(), stack.result);
+				else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result);
+
+				return ns;
+			}
+
+			case ast_variable:
+			{
+				assert(_rettype == _data.variable->type());
+
+				if (_rettype == xpath_type_node_set)
+				{
+					const xpath_node_set& s = _data.variable->get_node_set();
+
+					xpath_node_set_raw ns;
+
+					ns.set_type(s.type());
+					ns.append(s.begin(), s.end(), stack.result);
+
+					return ns;
+				}
+
+				// fallthrough to type conversion
+			}
+
+			default:
+				assert(!"Wrong expression for return type node set");
+				return xpath_node_set_raw();
+			}
+		}
+		
+		bool is_posinv()
+		{
+			switch (_type)
+			{
+			case ast_func_position:
+				return false;
+
+			case ast_string_constant:
+			case ast_number_constant:
+			case ast_variable:
+				return true;
+
+			case ast_step:
+			case ast_step_root:
+				return true;
+
+			case ast_predicate:
+			case ast_filter:
+			case ast_filter_posinv:
+				return true;
+
+			default:
+				if (_left && !_left->is_posinv()) return false;
+				
+				for (xpath_ast_node* n = _right; n; n = n->_next)
+					if (!n->is_posinv()) return false;
+					
+				return true;
+			}
+		}
+
+		xpath_value_type rettype() const
+		{
+			return static_cast<xpath_value_type>(_rettype);
+		}
+	};
+
+	struct xpath_parser
+	{
+		xpath_allocator* _alloc;
+		xpath_lexer _lexer;
+
+		const char_t* _query;
+		xpath_variable_set* _variables;
+
+		xpath_parse_result* _result;
+
+		char_t _scratch[32];
+
+	#ifdef PUGIXML_NO_EXCEPTIONS
+		jmp_buf _error_handler;
+	#endif
+
+		void throw_error(const char* message)
+		{
+			_result->error = message;
+			_result->offset = _lexer.current_pos() - _query;
+
+		#ifdef PUGIXML_NO_EXCEPTIONS
+			longjmp(_error_handler, 1);
+		#else
+			throw xpath_exception(*_result);
+		#endif
+		}
+
+		void throw_error_oom()
+		{
+		#ifdef PUGIXML_NO_EXCEPTIONS
+			throw_error("Out of memory");
+		#else
+			throw std::bad_alloc();
+		#endif
+		}
+
+		void* alloc_node()
+		{
+			void* result = _alloc->allocate_nothrow(sizeof(xpath_ast_node));
+
+			if (!result) throw_error_oom();
+
+			return result;
+		}
+
+		const char_t* alloc_string(const xpath_lexer_string& value)
+		{
+			if (value.begin)
+			{
+				size_t length = static_cast<size_t>(value.end - value.begin);
+
+				char_t* c = static_cast<char_t*>(_alloc->allocate_nothrow((length + 1) * sizeof(char_t)));
+				if (!c) throw_error_oom();
+				assert(c); // workaround for clang static analysis
+
+				memcpy(c, value.begin, length * sizeof(char_t));
+				c[length] = 0;
+
+				return c;
+			}
+			else return 0;
+		}
+
+		xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2])
+		{
+			assert(argc <= 1);
+
+			if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
+
+			return new (alloc_node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]);
+		}
+
+		xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2])
+		{
+			switch (name.begin[0])
+			{
+			case 'b':
+				if (name == PUGIXML_TEXT("boolean") && argc == 1)
+					return new (alloc_node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]);
+					
+				break;
+			
+			case 'c':
+				if (name == PUGIXML_TEXT("count") && argc == 1)
+				{
+					if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
+					return new (alloc_node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]);
+				}
+				else if (name == PUGIXML_TEXT("contains") && argc == 2)
+					return new (alloc_node()) xpath_ast_node(ast_func_contains, xpath_type_boolean, args[0], args[1]);
+				else if (name == PUGIXML_TEXT("concat") && argc >= 2)
+					return new (alloc_node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]);
+				else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
+					return new (alloc_node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]);
+					
+				break;
+			
+			case 'f':
+				if (name == PUGIXML_TEXT("false") && argc == 0)
+					return new (alloc_node()) xpath_ast_node(ast_func_false, xpath_type_boolean);
+				else if (name == PUGIXML_TEXT("floor") && argc == 1)
+					return new (alloc_node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]);
+					
+				break;
+			
+			case 'i':
+				if (name == PUGIXML_TEXT("id") && argc == 1)
+					return new (alloc_node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]);
+					
+				break;
+			
+			case 'l':
+				if (name == PUGIXML_TEXT("last") && argc == 0)
+					return new (alloc_node()) xpath_ast_node(ast_func_last, xpath_type_number);
+				else if (name == PUGIXML_TEXT("lang") && argc == 1)
+					return new (alloc_node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]);
+				else if (name == PUGIXML_TEXT("local-name") && argc <= 1)
+					return parse_function_helper(ast_func_local_name_0, ast_func_local_name_1, argc, args);
+			
+				break;
+			
+			case 'n':
+				if (name == PUGIXML_TEXT("name") && argc <= 1)
+					return parse_function_helper(ast_func_name_0, ast_func_name_1, argc, args);
+				else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)
+					return parse_function_helper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args);
+				else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)
+					return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]);
+				else if (name == PUGIXML_TEXT("not") && argc == 1)
+					return new (alloc_node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]);
+				else if (name == PUGIXML_TEXT("number") && argc <= 1)
+					return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]);
+			
+				break;
+			
+			case 'p':
+				if (name == PUGIXML_TEXT("position") && argc == 0)
+					return new (alloc_node()) xpath_ast_node(ast_func_position, xpath_type_number);
+				
+				break;
+			
+			case 'r':
+				if (name == PUGIXML_TEXT("round") && argc == 1)
+					return new (alloc_node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]);
+
+				break;
+			
+			case 's':
+				if (name == PUGIXML_TEXT("string") && argc <= 1)
+					return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]);
+				else if (name == PUGIXML_TEXT("string-length") && argc <= 1)
+					return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]);
+				else if (name == PUGIXML_TEXT("starts-with") && argc == 2)
+					return new (alloc_node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]);
+				else if (name == PUGIXML_TEXT("substring-before") && argc == 2)
+					return new (alloc_node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]);
+				else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
+					return new (alloc_node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);
+				else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
+					return new (alloc_node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);
+				else if (name == PUGIXML_TEXT("sum") && argc == 1)
+				{
+					if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
+					return new (alloc_node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]);
+				}
+
+				break;
+			
+			case 't':
+				if (name == PUGIXML_TEXT("translate") && argc == 3)
+					return new (alloc_node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]);
+				else if (name == PUGIXML_TEXT("true") && argc == 0)
+					return new (alloc_node()) xpath_ast_node(ast_func_true, xpath_type_boolean);
+					
+				break;
+
+			default:
+				break;
+			}
+
+			throw_error("Unrecognized function or wrong parameter count");
+
+			return 0;
+		}
+
+		axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified)
+		{
+			specified = true;
+
+			switch (name.begin[0])
+			{
+			case 'a':
+				if (name == PUGIXML_TEXT("ancestor"))
+					return axis_ancestor;
+				else if (name == PUGIXML_TEXT("ancestor-or-self"))
+					return axis_ancestor_or_self;
+				else if (name == PUGIXML_TEXT("attribute"))
+					return axis_attribute;
+				
+				break;
+			
+			case 'c':
+				if (name == PUGIXML_TEXT("child"))
+					return axis_child;
+				
+				break;
+			
+			case 'd':
+				if (name == PUGIXML_TEXT("descendant"))
+					return axis_descendant;
+				else if (name == PUGIXML_TEXT("descendant-or-self"))
+					return axis_descendant_or_self;
+				
+				break;
+			
+			case 'f':
+				if (name == PUGIXML_TEXT("following"))
+					return axis_following;
+				else if (name == PUGIXML_TEXT("following-sibling"))
+					return axis_following_sibling;
+				
+				break;
+			
+			case 'n':
+				if (name == PUGIXML_TEXT("namespace"))
+					return axis_namespace;
+				
+				break;
+			
+			case 'p':
+				if (name == PUGIXML_TEXT("parent"))
+					return axis_parent;
+				else if (name == PUGIXML_TEXT("preceding"))
+					return axis_preceding;
+				else if (name == PUGIXML_TEXT("preceding-sibling"))
+					return axis_preceding_sibling;
+				
+				break;
+			
+			case 's':
+				if (name == PUGIXML_TEXT("self"))
+					return axis_self;
+				
+				break;
+
+			default:
+				break;
+			}
+
+			specified = false;
+			return axis_child;
+		}
+
+		nodetest_t parse_node_test_type(const xpath_lexer_string& name)
+		{
+			switch (name.begin[0])
+			{
+			case 'c':
+				if (name == PUGIXML_TEXT("comment"))
+					return nodetest_type_comment;
+
+				break;
+
+			case 'n':
+				if (name == PUGIXML_TEXT("node"))
+					return nodetest_type_node;
+
+				break;
+
+			case 'p':
+				if (name == PUGIXML_TEXT("processing-instruction"))
+					return nodetest_type_pi;
+
+				break;
+
+			case 't':
+				if (name == PUGIXML_TEXT("text"))
+					return nodetest_type_text;
+
+				break;
+			
+			default:
+				break;
+			}
+
+			return nodetest_none;
+		}
+
+		// PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
+		xpath_ast_node* parse_primary_expression()
+		{
+			switch (_lexer.current())
+			{
+			case lex_var_ref:
+			{
+				xpath_lexer_string name = _lexer.contents();
+
+				if (!_variables)
+					throw_error("Unknown variable: variable set is not provided");
+
+				xpath_variable* var = get_variable_scratch(_scratch, _variables, name.begin, name.end);
+
+				if (!var)
+					throw_error("Unknown variable: variable set does not contain the given name");
+
+				_lexer.next();
+
+				return new (alloc_node()) xpath_ast_node(ast_variable, var->type(), var);
+			}
+
+			case lex_open_brace:
+			{
+				_lexer.next();
+
+				xpath_ast_node* n = parse_expression();
+
+				if (_lexer.current() != lex_close_brace)
+					throw_error("Unmatched braces");
+
+				_lexer.next();
+
+				return n;
+			}
+
+			case lex_quoted_string:
+			{
+				const char_t* value = alloc_string(_lexer.contents());
+
+				xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_string_constant, xpath_type_string, value);
+				_lexer.next();
+
+				return n;
+			}
+
+			case lex_number:
+			{
+				double value = 0;
+
+				if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value))
+					throw_error_oom();
+
+				xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_number_constant, xpath_type_number, value);
+				_lexer.next();
+
+				return n;
+			}
+
+			case lex_string:
+			{
+				xpath_ast_node* args[2] = {0};
+				size_t argc = 0;
+				
+				xpath_lexer_string function = _lexer.contents();
+				_lexer.next();
+				
+				xpath_ast_node* last_arg = 0;
+				
+				if (_lexer.current() != lex_open_brace)
+					throw_error("Unrecognized function call");
+				_lexer.next();
+
+				if (_lexer.current() != lex_close_brace)
+					args[argc++] = parse_expression();
+
+				while (_lexer.current() != lex_close_brace)
+				{
+					if (_lexer.current() != lex_comma)
+						throw_error("No comma between function arguments");
+					_lexer.next();
+					
+					xpath_ast_node* n = parse_expression();
+					
+					if (argc < 2) args[argc] = n;
+					else last_arg->set_next(n);
+
+					argc++;
+					last_arg = n;
+				}
+				
+				_lexer.next();
+
+				return parse_function(function, argc, args);
+			}
+
+			default:
+				throw_error("Unrecognizable primary expression");
+
+				return 0;
+			}
+		}
+		
+		// FilterExpr ::= PrimaryExpr | FilterExpr Predicate
+		// Predicate ::= '[' PredicateExpr ']'
+		// PredicateExpr ::= Expr
+		xpath_ast_node* parse_filter_expression()
+		{
+			xpath_ast_node* n = parse_primary_expression();
+
+			while (_lexer.current() == lex_open_square_brace)
+			{
+				_lexer.next();
+
+				xpath_ast_node* expr = parse_expression();
+
+				if (n->rettype() != xpath_type_node_set) throw_error("Predicate has to be applied to node set");
+
+				bool posinv = expr->rettype() != xpath_type_number && expr->is_posinv();
+
+				n = new (alloc_node()) xpath_ast_node(posinv ? ast_filter_posinv : ast_filter, xpath_type_node_set, n, expr);
+
+				if (_lexer.current() != lex_close_square_brace)
+					throw_error("Unmatched square brace");
+			
+				_lexer.next();
+			}
+			
+			return n;
+		}
+		
+		// Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
+		// AxisSpecifier ::= AxisName '::' | '@'?
+		// NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
+		// NameTest ::= '*' | NCName ':' '*' | QName
+		// AbbreviatedStep ::= '.' | '..'
+		xpath_ast_node* parse_step(xpath_ast_node* set)
+		{
+			if (set && set->rettype() != xpath_type_node_set)
+				throw_error("Step has to be applied to node set");
+
+			bool axis_specified = false;
+			axis_t axis = axis_child; // implied child axis
+
+			if (_lexer.current() == lex_axis_attribute)
+			{
+				axis = axis_attribute;
+				axis_specified = true;
+				
+				_lexer.next();
+			}
+			else if (_lexer.current() == lex_dot)
+			{
+				_lexer.next();
+				
+				return new (alloc_node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, 0);
+			}
+			else if (_lexer.current() == lex_double_dot)
+			{
+				_lexer.next();
+				
+				return new (alloc_node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, 0);
+			}
+		
+			nodetest_t nt_type = nodetest_none;
+			xpath_lexer_string nt_name;
+			
+			if (_lexer.current() == lex_string)
+			{
+				// node name test
+				nt_name = _lexer.contents();
+				_lexer.next();
+
+				// was it an axis name?
+				if (_lexer.current() == lex_double_colon)
+				{
+					// parse axis name
+					if (axis_specified) throw_error("Two axis specifiers in one step");
+
+					axis = parse_axis_name(nt_name, axis_specified);
+
+					if (!axis_specified) throw_error("Unknown axis");
+
+					// read actual node test
+					_lexer.next();
+
+					if (_lexer.current() == lex_multiply)
+					{
+						nt_type = nodetest_all;
+						nt_name = xpath_lexer_string();
+						_lexer.next();
+					}
+					else if (_lexer.current() == lex_string)
+					{
+						nt_name = _lexer.contents();
+						_lexer.next();
+					}
+					else throw_error("Unrecognized node test");
+				}
+				
+				if (nt_type == nodetest_none)
+				{
+					// node type test or processing-instruction
+					if (_lexer.current() == lex_open_brace)
+					{
+						_lexer.next();
+						
+						if (_lexer.current() == lex_close_brace)
+						{
+							_lexer.next();
+
+							nt_type = parse_node_test_type(nt_name);
+
+							if (nt_type == nodetest_none) throw_error("Unrecognized node type");
+							
+							nt_name = xpath_lexer_string();
+						}
+						else if (nt_name == PUGIXML_TEXT("processing-instruction"))
+						{
+							if (_lexer.current() != lex_quoted_string)
+								throw_error("Only literals are allowed as arguments to processing-instruction()");
+						
+							nt_type = nodetest_pi;
+							nt_name = _lexer.contents();
+							_lexer.next();
+							
+							if (_lexer.current() != lex_close_brace)
+								throw_error("Unmatched brace near processing-instruction()");
+							_lexer.next();
+						}
+						else
+							throw_error("Unmatched brace near node type test");
+
+					}
+					// QName or NCName:*
+					else
+					{
+						if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:*
+						{
+							nt_name.end--; // erase *
+							
+							nt_type = nodetest_all_in_namespace;
+						}
+						else nt_type = nodetest_name;
+					}
+				}
+			}
+			else if (_lexer.current() == lex_multiply)
+			{
+				nt_type = nodetest_all;
+				_lexer.next();
+			}
+			else throw_error("Unrecognized node test");
+			
+			xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step, set, axis, nt_type, alloc_string(nt_name));
+			
+			xpath_ast_node* last = 0;
+			
+			while (_lexer.current() == lex_open_square_brace)
+			{
+				_lexer.next();
+				
+				xpath_ast_node* expr = parse_expression();
+
+				xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, xpath_type_node_set, expr);
+				
+				if (_lexer.current() != lex_close_square_brace)
+					throw_error("Unmatched square brace");
+				_lexer.next();
+				
+				if (last) last->set_next(pred);
+				else n->set_right(pred);
+				
+				last = pred;
+			}
+			
+			return n;
+		}
+		
+		// RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
+		xpath_ast_node* parse_relative_location_path(xpath_ast_node* set)
+		{
+			xpath_ast_node* n = parse_step(set);
+			
+			while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
+			{
+				lexeme_t l = _lexer.current();
+				_lexer.next();
+
+				if (l == lex_double_slash)
+					n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
+				
+				n = parse_step(n);
+			}
+			
+			return n;
+		}
+		
+		// LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
+		// AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
+		xpath_ast_node* parse_location_path()
+		{
+			if (_lexer.current() == lex_slash)
+			{
+				_lexer.next();
+				
+				xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
+
+				// relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path
+				lexeme_t l = _lexer.current();
+
+				if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
+					return parse_relative_location_path(n);
+				else
+					return n;
+			}
+			else if (_lexer.current() == lex_double_slash)
+			{
+				_lexer.next();
+				
+				xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
+				n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
+				
+				return parse_relative_location_path(n);
+			}
+
+			// else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1
+			return parse_relative_location_path(0);
+		}
+		
+		// PathExpr ::= LocationPath
+		//				| FilterExpr
+		//				| FilterExpr '/' RelativeLocationPath
+		//				| FilterExpr '//' RelativeLocationPath
+		// UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
+		// UnaryExpr ::= UnionExpr | '-' UnaryExpr
+		xpath_ast_node* parse_path_or_unary_expression()
+		{
+			// Clarification.
+			// PathExpr begins with either LocationPath or FilterExpr.
+			// FilterExpr begins with PrimaryExpr
+			// PrimaryExpr begins with '$' in case of it being a variable reference,
+			// '(' in case of it being an expression, string literal, number constant or
+			// function call.
+
+			if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace || 
+				_lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||
+				_lexer.current() == lex_string)
+			{
+				if (_lexer.current() == lex_string)
+				{
+					// This is either a function call, or not - if not, we shall proceed with location path
+					const char_t* state = _lexer.state();
+					
+					while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state;
+					
+					if (*state != '(') return parse_location_path();
+
+					// This looks like a function call; however this still can be a node-test. Check it.
+					if (parse_node_test_type(_lexer.contents()) != nodetest_none) return parse_location_path();
+				}
+				
+				xpath_ast_node* n = parse_filter_expression();
+
+				if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
+				{
+					lexeme_t l = _lexer.current();
+					_lexer.next();
+					
+					if (l == lex_double_slash)
+					{
+						if (n->rettype() != xpath_type_node_set) throw_error("Step has to be applied to node set");
+
+						n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
+					}
+	
+					// select from location path
+					return parse_relative_location_path(n);
+				}
+
+				return n;
+			}
+			else if (_lexer.current() == lex_minus)
+			{
+				_lexer.next();
+
+				// precedence 7+ - only parses union expressions
+				xpath_ast_node* expr = parse_expression_rec(parse_path_or_unary_expression(), 7);
+
+				return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr);
+			}
+			else
+				return parse_location_path();
+		}
+
+		struct binary_op_t
+		{
+			ast_type_t asttype;
+			xpath_value_type rettype;
+			int precedence;
+
+			binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0)
+			{
+			}
+
+			binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_)
+			{
+			}
+
+			static binary_op_t parse(xpath_lexer& lexer)
+			{
+				switch (lexer.current())
+				{
+				case lex_string:
+					if (lexer.contents() == PUGIXML_TEXT("or"))
+						return binary_op_t(ast_op_or, xpath_type_boolean, 1);
+					else if (lexer.contents() == PUGIXML_TEXT("and"))
+						return binary_op_t(ast_op_and, xpath_type_boolean, 2);
+					else if (lexer.contents() == PUGIXML_TEXT("div"))
+						return binary_op_t(ast_op_divide, xpath_type_number, 6);
+					else if (lexer.contents() == PUGIXML_TEXT("mod"))
+						return binary_op_t(ast_op_mod, xpath_type_number, 6);
+					else
+						return binary_op_t();
+
+				case lex_equal:
+					return binary_op_t(ast_op_equal, xpath_type_boolean, 3);
+
+				case lex_not_equal:
+					return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3);
+
+				case lex_less:
+					return binary_op_t(ast_op_less, xpath_type_boolean, 4);
+
+				case lex_greater:
+					return binary_op_t(ast_op_greater, xpath_type_boolean, 4);
+
+				case lex_less_or_equal:
+					return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4);
+
+				case lex_greater_or_equal:
+					return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4);
+
+				case lex_plus:
+					return binary_op_t(ast_op_add, xpath_type_number, 5);
+
+				case lex_minus:
+					return binary_op_t(ast_op_subtract, xpath_type_number, 5);
+
+				case lex_multiply:
+					return binary_op_t(ast_op_multiply, xpath_type_number, 6);
+
+				case lex_union:
+					return binary_op_t(ast_op_union, xpath_type_node_set, 7);
+
+				default:
+					return binary_op_t();
+				}
+			}
+		};
+
+		xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit)
+		{
+			binary_op_t op = binary_op_t::parse(_lexer);
+
+			while (op.asttype != ast_unknown && op.precedence >= limit)
+			{
+				_lexer.next();
+
+				xpath_ast_node* rhs = parse_path_or_unary_expression();
+
+				binary_op_t nextop = binary_op_t::parse(_lexer);
+
+				while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence)
+				{
+					rhs = parse_expression_rec(rhs, nextop.precedence);
+
+					nextop = binary_op_t::parse(_lexer);
+				}
+
+				if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set))
+					throw_error("Union operator has to be applied to node sets");
+
+				lhs = new (alloc_node()) xpath_ast_node(op.asttype, op.rettype, lhs, rhs);
+
+				op = binary_op_t::parse(_lexer);
+			}
+
+			return lhs;
+		}
+
+		// Expr ::= OrExpr
+		// OrExpr ::= AndExpr | OrExpr 'or' AndExpr
+		// AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
+		// EqualityExpr ::= RelationalExpr
+		//					| EqualityExpr '=' RelationalExpr
+		//					| EqualityExpr '!=' RelationalExpr
+		// RelationalExpr ::= AdditiveExpr
+		//					  | RelationalExpr '<' AdditiveExpr
+		//					  | RelationalExpr '>' AdditiveExpr
+		//					  | RelationalExpr '<=' AdditiveExpr
+		//					  | RelationalExpr '>=' AdditiveExpr
+		// AdditiveExpr ::= MultiplicativeExpr
+		//					| AdditiveExpr '+' MultiplicativeExpr
+		//					| AdditiveExpr '-' MultiplicativeExpr
+		// MultiplicativeExpr ::= UnaryExpr
+		//						  | MultiplicativeExpr '*' UnaryExpr
+		//						  | MultiplicativeExpr 'div' UnaryExpr
+		//						  | MultiplicativeExpr 'mod' UnaryExpr
+		xpath_ast_node* parse_expression()
+		{
+			return parse_expression_rec(parse_path_or_unary_expression(), 0);
+		}
+
+		xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result)
+		{
+		}
+
+		xpath_ast_node* parse()
+		{
+			xpath_ast_node* result = parse_expression();
+			
+			if (_lexer.current() != lex_eof)
+			{
+				// there are still unparsed tokens left, error
+				throw_error("Incorrect query");
+			}
+			
+			return result;
+		}
+
+		static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result)
+		{
+			xpath_parser parser(query, variables, alloc, result);
+
+		#ifdef PUGIXML_NO_EXCEPTIONS
+			int error = setjmp(parser._error_handler);
+
+			return (error == 0) ? parser.parse() : 0;
+		#else
+			return parser.parse();
+		#endif
+		}
+	};
+
+	struct xpath_query_impl
+	{
+		static xpath_query_impl* create()
+		{
+			void* memory = xml_memory::allocate(sizeof(xpath_query_impl));
+
+			return new (memory) xpath_query_impl();
+		}
+
+		static void destroy(void* ptr)
+		{
+			if (!ptr) return;
+			
+			// free all allocated pages
+			static_cast<xpath_query_impl*>(ptr)->alloc.release();
+
+			// free allocator memory (with the first page)
+			xml_memory::deallocate(ptr);
+		}
+
+		xpath_query_impl(): root(0), alloc(&block)
+		{
+			block.next = 0;
+		}
+
+		xpath_ast_node* root;
+		xpath_allocator alloc;
+		xpath_memory_block block;
+	};
+
+	PUGI__FN xpath_string evaluate_string_impl(xpath_query_impl* impl, const xpath_node& n, xpath_stack_data& sd)
+	{
+		if (!impl) return xpath_string();
+
+	#ifdef PUGIXML_NO_EXCEPTIONS
+		if (setjmp(sd.error_handler)) return xpath_string();
+	#endif
+
+		xpath_context c(n, 1, 1);
+
+		return impl->root->eval_string(c, sd.stack);
+	}
+PUGI__NS_END
+
+namespace pugi
+{
+#ifndef PUGIXML_NO_EXCEPTIONS
+	PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_)
+	{
+		assert(_result.error);
+	}
+	
+	PUGI__FN const char* xpath_exception::what() const throw()
+	{
+		return _result.error;
+	}
+
+	PUGI__FN const xpath_parse_result& xpath_exception::result() const
+	{
+		return _result;
+	}
+#endif
+	
+	PUGI__FN xpath_node::xpath_node()
+	{
+	}
+		
+	PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_)
+	{
+	}
+		
+	PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_)
+	{
+	}
+
+	PUGI__FN xml_node xpath_node::node() const
+	{
+		return _attribute ? xml_node() : _node;
+	}
+		
+	PUGI__FN xml_attribute xpath_node::attribute() const
+	{
+		return _attribute;
+	}
+	
+	PUGI__FN xml_node xpath_node::parent() const
+	{
+		return _attribute ? _node : _node.parent();
+	}
+
+	PUGI__FN static void unspecified_bool_xpath_node(xpath_node***)
+	{
+	}
+
+	PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const
+	{
+		return (_node || _attribute) ? unspecified_bool_xpath_node : 0;
+	}
+	
+	PUGI__FN bool xpath_node::operator!() const
+	{
+		return !(_node || _attribute);
+	}
+
+	PUGI__FN bool xpath_node::operator==(const xpath_node& n) const
+	{
+		return _node == n._node && _attribute == n._attribute;
+	}
+	
+	PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const
+	{
+		return _node != n._node || _attribute != n._attribute;
+	}
+
+#ifdef __BORLANDC__
+	PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs)
+	{
+		return (bool)lhs && rhs;
+	}
+
+	PUGI__FN bool operator||(const xpath_node& lhs, bool rhs)
+	{
+		return (bool)lhs || rhs;
+	}
+#endif
+
+	PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_)
+	{
+		assert(begin_ <= end_);
+
+		size_t size_ = static_cast<size_t>(end_ - begin_);
+
+		if (size_ <= 1)
+		{
+			// deallocate old buffer
+			if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
+
+			// use internal buffer
+			if (begin_ != end_) _storage = *begin_;
+
+			_begin = &_storage;
+			_end = &_storage + size_;
+		}
+		else
+		{
+			// make heap copy
+			xpath_node* storage = static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node)));
+
+			if (!storage)
+			{
+			#ifdef PUGIXML_NO_EXCEPTIONS
+				return;
+			#else
+				throw std::bad_alloc();
+			#endif
+			}
+
+			memcpy(storage, begin_, size_ * sizeof(xpath_node));
+			
+			// deallocate old buffer
+			if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
+
+			// finalize
+			_begin = storage;
+			_end = storage + size_;
+		}
+	}
+
+	PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(&_storage), _end(&_storage)
+	{
+	}
+
+	PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_), _begin(&_storage), _end(&_storage)
+	{
+		_assign(begin_, end_);
+	}
+
+	PUGI__FN xpath_node_set::~xpath_node_set()
+	{
+		if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
+	}
+		
+	PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(ns._type), _begin(&_storage), _end(&_storage)
+	{
+		_assign(ns._begin, ns._end);
+	}
+	
+	PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns)
+	{
+		if (this == &ns) return *this;
+		
+		_type = ns._type;
+		_assign(ns._begin, ns._end);
+
+		return *this;
+	}
+
+	PUGI__FN xpath_node_set::type_t xpath_node_set::type() const
+	{
+		return _type;
+	}
+		
+	PUGI__FN size_t xpath_node_set::size() const
+	{
+		return _end - _begin;
+	}
+		
+	PUGI__FN bool xpath_node_set::empty() const
+	{
+		return _begin == _end;
+	}
+		
+	PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const
+	{
+		assert(index < size());
+		return _begin[index];
+	}
+
+	PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const
+	{
+		return _begin;
+	}
+		
+	PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const
+	{
+		return _end;
+	}
+	
+	PUGI__FN void xpath_node_set::sort(bool reverse)
+	{
+		_type = impl::xpath_sort(_begin, _end, _type, reverse);
+	}
+
+	PUGI__FN xpath_node xpath_node_set::first() const
+	{
+		return impl::xpath_first(_begin, _end, _type);
+	}
+
+	PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0)
+	{
+	}
+
+	PUGI__FN xpath_parse_result::operator bool() const
+	{
+		return error == 0;
+	}
+
+	PUGI__FN const char* xpath_parse_result::description() const
+	{
+		return error ? error : "No error";
+	}
+
+	PUGI__FN xpath_variable::xpath_variable(): _type(xpath_type_none), _next(0)
+	{
+	}
+
+	PUGI__FN const char_t* xpath_variable::name() const
+	{
+		switch (_type)
+		{
+		case xpath_type_node_set:
+			return static_cast<const impl::xpath_variable_node_set*>(this)->name;
+
+		case xpath_type_number:
+			return static_cast<const impl::xpath_variable_number*>(this)->name;
+
+		case xpath_type_string:
+			return static_cast<const impl::xpath_variable_string*>(this)->name;
+
+		case xpath_type_boolean:
+			return static_cast<const impl::xpath_variable_boolean*>(this)->name;
+
+		default:
+			assert(!"Invalid variable type");
+			return 0;
+		}
+	}
+
+	PUGI__FN xpath_value_type xpath_variable::type() const
+	{
+		return _type;
+	}
+
+	PUGI__FN bool xpath_variable::get_boolean() const
+	{
+		return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false;
+	}
+
+	PUGI__FN double xpath_variable::get_number() const
+	{
+		return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan();
+	}
+
+	PUGI__FN const char_t* xpath_variable::get_string() const
+	{
+		const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0;
+		return value ? value : PUGIXML_TEXT("");
+	}
+
+	PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const
+	{
+		return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set;
+	}
+
+	PUGI__FN bool xpath_variable::set(bool value)
+	{
+		if (_type != xpath_type_boolean) return false;
+
+		static_cast<impl::xpath_variable_boolean*>(this)->value = value;
+		return true;
+	}
+
+	PUGI__FN bool xpath_variable::set(double value)
+	{
+		if (_type != xpath_type_number) return false;
+
+		static_cast<impl::xpath_variable_number*>(this)->value = value;
+		return true;
+	}
+
+	PUGI__FN bool xpath_variable::set(const char_t* value)
+	{
+		if (_type != xpath_type_string) return false;
+
+		impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this);
+
+		// duplicate string
+		size_t size = (impl::strlength(value) + 1) * sizeof(char_t);
+
+		char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size));
+		if (!copy) return false;
+
+		memcpy(copy, value, size);
+
+		// replace old string
+		if (var->value) impl::xml_memory::deallocate(var->value);
+		var->value = copy;
+
+		return true;
+	}
+
+	PUGI__FN bool xpath_variable::set(const xpath_node_set& value)
+	{
+		if (_type != xpath_type_node_set) return false;
+
+		static_cast<impl::xpath_variable_node_set*>(this)->value = value;
+		return true;
+	}
+
+	PUGI__FN xpath_variable_set::xpath_variable_set()
+	{
+		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) _data[i] = 0;
+	}
+
+	PUGI__FN xpath_variable_set::~xpath_variable_set()
+	{
+		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
+		{
+			xpath_variable* var = _data[i];
+
+			while (var)
+			{
+				xpath_variable* next = var->_next;
+
+				impl::delete_xpath_variable(var->_type, var);
+
+				var = next;
+			}
+		}
+	}
+
+	PUGI__FN xpath_variable* xpath_variable_set::find(const char_t* name) const
+	{
+		const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
+		size_t hash = impl::hash_string(name) % hash_size;
+
+		// look for existing variable
+		for (xpath_variable* var = _data[hash]; var; var = var->_next)
+			if (impl::strequal(var->name(), name))
+				return var;
+
+		return 0;
+	}
+
+	PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type)
+	{
+		const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
+		size_t hash = impl::hash_string(name) % hash_size;
+
+		// look for existing variable
+		for (xpath_variable* var = _data[hash]; var; var = var->_next)
+			if (impl::strequal(var->name(), name))
+				return var->type() == type ? var : 0;
+
+		// add new variable
+		xpath_variable* result = impl::new_xpath_variable(type, name);
+
+		if (result)
+		{
+			result->_type = type;
+			result->_next = _data[hash];
+
+			_data[hash] = result;
+		}
+
+		return result;
+	}
+
+	PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value)
+	{
+		xpath_variable* var = add(name, xpath_type_boolean);
+		return var ? var->set(value) : false;
+	}
+
+	PUGI__FN bool xpath_variable_set::set(const char_t* name, double value)
+	{
+		xpath_variable* var = add(name, xpath_type_number);
+		return var ? var->set(value) : false;
+	}
+
+	PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value)
+	{
+		xpath_variable* var = add(name, xpath_type_string);
+		return var ? var->set(value) : false;
+	}
+
+	PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value)
+	{
+		xpath_variable* var = add(name, xpath_type_node_set);
+		return var ? var->set(value) : false;
+	}
+
+	PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name)
+	{
+		return find(name);
+	}
+
+	PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const
+	{
+		return find(name);
+	}
+
+	PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0)
+	{
+		impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create();
+
+		if (!qimpl)
+		{
+		#ifdef PUGIXML_NO_EXCEPTIONS
+			_result.error = "Out of memory";
+		#else
+			throw std::bad_alloc();
+		#endif
+		}
+		else
+		{
+			impl::buffer_holder impl_holder(qimpl, impl::xpath_query_impl::destroy);
+
+			qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result);
+
+			if (qimpl->root)
+			{
+				_impl = static_cast<impl::xpath_query_impl*>(impl_holder.release());
+				_result.error = 0;
+			}
+		}
+	}
+
+	PUGI__FN xpath_query::~xpath_query()
+	{
+		impl::xpath_query_impl::destroy(_impl);
+	}
+
+	PUGI__FN xpath_value_type xpath_query::return_type() const
+	{
+		if (!_impl) return xpath_type_none;
+
+		return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype();
+	}
+
+	PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const
+	{
+		if (!_impl) return false;
+		
+		impl::xpath_context c(n, 1, 1);
+		impl::xpath_stack_data sd;
+
+	#ifdef PUGIXML_NO_EXCEPTIONS
+		if (setjmp(sd.error_handler)) return false;
+	#endif
+		
+		return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack);
+	}
+	
+	PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const
+	{
+		if (!_impl) return impl::gen_nan();
+		
+		impl::xpath_context c(n, 1, 1);
+		impl::xpath_stack_data sd;
+
+	#ifdef PUGIXML_NO_EXCEPTIONS
+		if (setjmp(sd.error_handler)) return impl::gen_nan();
+	#endif
+
+		return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack);
+	}
+
+#ifndef PUGIXML_NO_STL
+	PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const
+	{
+		impl::xpath_stack_data sd;
+
+		return impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd).c_str();
+	}
+#endif
+
+	PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const
+	{
+		impl::xpath_stack_data sd;
+
+		impl::xpath_string r = impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd);
+
+		size_t full_size = r.length() + 1;
+		
+		if (capacity > 0)
+		{
+			size_t size = (full_size < capacity) ? full_size : capacity;
+			assert(size > 0);
+
+			memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t));
+			buffer[size - 1] = 0;
+		}
+		
+		return full_size;
+	}
+
+	PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const
+	{
+		if (!_impl) return xpath_node_set();
+
+		impl::xpath_ast_node* root = static_cast<impl::xpath_query_impl*>(_impl)->root;
+
+		if (root->rettype() != xpath_type_node_set)
+		{
+		#ifdef PUGIXML_NO_EXCEPTIONS
+			return xpath_node_set();
+		#else
+			xpath_parse_result res;
+			res.error = "Expression does not evaluate to node set";
+
+			throw xpath_exception(res);
+		#endif
+		}
+		
+		impl::xpath_context c(n, 1, 1);
+		impl::xpath_stack_data sd;
+
+	#ifdef PUGIXML_NO_EXCEPTIONS
+		if (setjmp(sd.error_handler)) return xpath_node_set();
+	#endif
+
+		impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack);
+
+		return xpath_node_set(r.begin(), r.end(), r.type());
+	}
+
+	PUGI__FN const xpath_parse_result& xpath_query::result() const
+	{
+		return _result;
+	}
+
+	PUGI__FN static void unspecified_bool_xpath_query(xpath_query***)
+	{
+	}
+
+	PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const
+	{
+		return _impl ? unspecified_bool_xpath_query : 0;
+	}
+
+	PUGI__FN bool xpath_query::operator!() const
+	{
+		return !_impl;
+	}
+
+	PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const
+	{
+		xpath_query q(query, variables);
+		return select_single_node(q);
+	}
+
+	PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const
+	{
+		xpath_node_set s = query.evaluate_node_set(*this);
+		return s.empty() ? xpath_node() : s.first();
+	}
+
+	PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const
+	{
+		xpath_query q(query, variables);
+		return select_nodes(q);
+	}
+
+	PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const
+	{
+		return query.evaluate_node_set(*this);
+	}
+}
+
+#endif
+
+#ifdef __BORLANDC__
+#	pragma option pop
+#endif
+
+// Intel C++ does not properly keep warning state for function templates,
+// so popping warning state at the end of translation unit leads to warnings in the middle.
+#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
+#	pragma warning(pop)
+#endif
+
+// Undefine all local macros (makes sure we're not leaking macros in header-only mode)
+#undef PUGI__NO_INLINE
+#undef PUGI__STATIC_ASSERT
+#undef PUGI__DMC_VOLATILE
+#undef PUGI__MSVC_CRT_VERSION
+#undef PUGI__NS_BEGIN
+#undef PUGI__NS_END
+#undef PUGI__FN
+#undef PUGI__FN_NO_INLINE
+#undef PUGI__IS_CHARTYPE_IMPL
+#undef PUGI__IS_CHARTYPE
+#undef PUGI__IS_CHARTYPEX
+#undef PUGI__SKIPWS
+#undef PUGI__OPTSET
+#undef PUGI__PUSHNODE
+#undef PUGI__POPNODE
+#undef PUGI__SCANFOR
+#undef PUGI__SCANWHILE
+#undef PUGI__ENDSEG
+#undef PUGI__THROW_ERROR
+#undef PUGI__CHECK_ERROR
+
+#endif
+
+/**
+ * Copyright (c) 2006-2014 Arseny Kapoulkine
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
diff --git a/apps/gadgetron/pugixml.hpp b/apps/gadgetron/pugixml.hpp
new file mode 100644
index 0000000..6fb99be
--- /dev/null
+++ b/apps/gadgetron/pugixml.hpp
@@ -0,0 +1,1332 @@
+/**
+ * pugixml parser - version 1.4
+ * --------------------------------------------------------
+ * Copyright (C) 2006-2014, by Arseny Kapoulkine (arseny.kapoulkine at gmail.com)
+ * Report bugs and download new versions at http://pugixml.org/
+ *
+ * This library is distributed under the MIT License. See notice at the end
+ * of this file.
+ *
+ * This work is based on the pugxml parser, which is:
+ * Copyright (C) 2003, by Kristen Wegner (kristen at tima.net)
+ */
+
+#ifndef PUGIXML_VERSION
+// Define version macro; evaluates to major * 100 + minor so that it's safe to use in less-than comparisons
+#	define PUGIXML_VERSION 140
+#endif
+
+// Include user configuration file (this can define various configuration macros)
+#include "pugiconfig.hpp"
+
+#ifndef HEADER_PUGIXML_HPP
+#define HEADER_PUGIXML_HPP
+
+// Include stddef.h for size_t and ptrdiff_t
+#include <stddef.h>
+
+// Include exception header for XPath
+#if !defined(PUGIXML_NO_XPATH) && !defined(PUGIXML_NO_EXCEPTIONS)
+#	include <exception>
+#endif
+
+// Include STL headers
+#ifndef PUGIXML_NO_STL
+#	include <iterator>
+#	include <iosfwd>
+#	include <string>
+#endif
+
+// Macro for deprecated features
+#ifndef PUGIXML_DEPRECATED
+#	if defined(__GNUC__)
+#		define PUGIXML_DEPRECATED __attribute__((deprecated))
+#	elif defined(_MSC_VER) && _MSC_VER >= 1300
+#		define PUGIXML_DEPRECATED __declspec(deprecated)
+#	else
+#		define PUGIXML_DEPRECATED
+#	endif
+#endif
+
+// If no API is defined, assume default
+#ifndef PUGIXML_API
+#	define PUGIXML_API
+#endif
+
+// If no API for classes is defined, assume default
+#ifndef PUGIXML_CLASS
+#	define PUGIXML_CLASS PUGIXML_API
+#endif
+
+// If no API for functions is defined, assume default
+#ifndef PUGIXML_FUNCTION
+#	define PUGIXML_FUNCTION PUGIXML_API
+#endif
+
+// If the platform is known to have long long support, enable long long functions
+#ifndef PUGIXML_HAS_LONG_LONG
+#	if defined(__cplusplus) && __cplusplus >= 201103
+#		define PUGIXML_HAS_LONG_LONG
+#	elif defined(_MSC_VER) && _MSC_VER >= 1400
+#		define PUGIXML_HAS_LONG_LONG
+#	endif
+#endif
+
+// Character interface macros
+#ifdef PUGIXML_WCHAR_MODE
+#	define PUGIXML_TEXT(t) L ## t
+#	define PUGIXML_CHAR wchar_t
+#else
+#	define PUGIXML_TEXT(t) t
+#	define PUGIXML_CHAR char
+#endif
+
+namespace pugi
+{
+	// Character type used for all internal storage and operations; depends on PUGIXML_WCHAR_MODE
+	typedef PUGIXML_CHAR char_t;
+
+#ifndef PUGIXML_NO_STL
+	// String type used for operations that work with STL string; depends on PUGIXML_WCHAR_MODE
+	typedef std::basic_string<PUGIXML_CHAR, std::char_traits<PUGIXML_CHAR>, std::allocator<PUGIXML_CHAR> > string_t;
+#endif
+}
+
+// The PugiXML namespace
+namespace pugi
+{
+	// Tree node types
+	enum xml_node_type
+	{
+		node_null,			// Empty (null) node handle
+		node_document,		// A document tree's absolute root
+		node_element,		// Element tag, i.e. '<node/>'
+		node_pcdata,		// Plain character data, i.e. 'text'
+		node_cdata,			// Character data, i.e. '<![CDATA[text]]>'
+		node_comment,		// Comment tag, i.e. '<!-- text -->'
+		node_pi,			// Processing instruction, i.e. '<?name?>'
+		node_declaration,	// Document declaration, i.e. '<?xml version="1.0"?>'
+		node_doctype		// Document type declaration, i.e. '<!DOCTYPE doc>'
+	};
+
+	// Parsing options
+
+	// Minimal parsing mode (equivalent to turning all other flags off).
+	// Only elements and PCDATA sections are added to the DOM tree, no text conversions are performed.
+	const unsigned int parse_minimal = 0x0000;
+
+	// This flag determines if processing instructions (node_pi) are added to the DOM tree. This flag is off by default.
+	const unsigned int parse_pi = 0x0001;
+
+	// This flag determines if comments (node_comment) are added to the DOM tree. This flag is off by default.
+	const unsigned int parse_comments = 0x0002;
+
+	// This flag determines if CDATA sections (node_cdata) are added to the DOM tree. This flag is on by default.
+	const unsigned int parse_cdata = 0x0004;
+
+	// This flag determines if plain character data (node_pcdata) that consist only of whitespace are added to the DOM tree.
+	// This flag is off by default; turning it on usually results in slower parsing and more memory consumption.
+	const unsigned int parse_ws_pcdata = 0x0008;
+
+	// This flag determines if character and entity references are expanded during parsing. This flag is on by default.
+	const unsigned int parse_escapes = 0x0010;
+
+	// This flag determines if EOL characters are normalized (converted to #xA) during parsing. This flag is on by default.
+	const unsigned int parse_eol = 0x0020;
+	
+	// This flag determines if attribute values are normalized using CDATA normalization rules during parsing. This flag is on by default.
+	const unsigned int parse_wconv_attribute = 0x0040;
+
+	// This flag determines if attribute values are normalized using NMTOKENS normalization rules during parsing. This flag is off by default.
+	const unsigned int parse_wnorm_attribute = 0x0080;
+	
+	// This flag determines if document declaration (node_declaration) is added to the DOM tree. This flag is off by default.
+	const unsigned int parse_declaration = 0x0100;
+
+	// This flag determines if document type declaration (node_doctype) is added to the DOM tree. This flag is off by default.
+	const unsigned int parse_doctype = 0x0200;
+
+	// This flag determines if plain character data (node_pcdata) that is the only child of the parent node and that consists only
+	// of whitespace is added to the DOM tree.
+	// This flag is off by default; turning it on may result in slower parsing and more memory consumption.
+	const unsigned int parse_ws_pcdata_single = 0x0400;
+
+	// This flag determines if leading and trailing whitespace is to be removed from plain character data. This flag is off by default.
+	const unsigned int parse_trim_pcdata = 0x0800;
+
+	// This flag determines if plain character data that does not have a parent node is added to the DOM tree, and if an empty document
+	// is a valid document. This flag is off by default.
+	const unsigned int parse_fragment = 0x1000;
+
+	// The default parsing mode.
+	// Elements, PCDATA and CDATA sections are added to the DOM tree, character/reference entities are expanded,
+	// End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules.
+	const unsigned int parse_default = parse_cdata | parse_escapes | parse_wconv_attribute | parse_eol;
+
+	// The full parsing mode.
+	// Nodes of all types are added to the DOM tree, character/reference entities are expanded,
+	// End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules.
+	const unsigned int parse_full = parse_default | parse_pi | parse_comments | parse_declaration | parse_doctype;
+
+	// These flags determine the encoding of input data for XML document
+	enum xml_encoding
+	{
+		encoding_auto,		// Auto-detect input encoding using BOM or < / <? detection; use UTF8 if BOM is not found
+		encoding_utf8,		// UTF8 encoding
+		encoding_utf16_le,	// Little-endian UTF16
+		encoding_utf16_be,	// Big-endian UTF16
+		encoding_utf16,		// UTF16 with native endianness
+		encoding_utf32_le,	// Little-endian UTF32
+		encoding_utf32_be,	// Big-endian UTF32
+		encoding_utf32,		// UTF32 with native endianness
+		encoding_wchar,		// The same encoding wchar_t has (either UTF16 or UTF32)
+		encoding_latin1
+	};
+
+	// Formatting flags
+	
+	// Indent the nodes that are written to output stream with as many indentation strings as deep the node is in DOM tree. This flag is on by default.
+	const unsigned int format_indent = 0x01;
+	
+	// Write encoding-specific BOM to the output stream. This flag is off by default.
+	const unsigned int format_write_bom = 0x02;
+
+	// Use raw output mode (no indentation and no line breaks are written). This flag is off by default.
+	const unsigned int format_raw = 0x04;
+	
+	// Omit default XML declaration even if there is no declaration in the document. This flag is off by default.
+	const unsigned int format_no_declaration = 0x08;
+
+	// Don't escape attribute values and PCDATA contents. This flag is off by default.
+	const unsigned int format_no_escapes = 0x10;
+
+	// Open file using text mode in xml_document::save_file. This enables special character (i.e. new-line) conversions on some systems. This flag is off by default.
+	const unsigned int format_save_file_text = 0x20;
+
+	// The default set of formatting flags.
+	// Nodes are indented depending on their depth in DOM tree, a default declaration is output if document has none.
+	const unsigned int format_default = format_indent;
+		
+	// Forward declarations
+	struct xml_attribute_struct;
+	struct xml_node_struct;
+
+	class xml_node_iterator;
+	class xml_attribute_iterator;
+	class xml_named_node_iterator;
+
+	class xml_tree_walker;
+
+	struct xml_parse_result;
+
+	class xml_node;
+
+	class xml_text;
+	
+	#ifndef PUGIXML_NO_XPATH
+	class xpath_node;
+	class xpath_node_set;
+	class xpath_query;
+	class xpath_variable_set;
+	#endif
+
+	// Range-based for loop support
+	template <typename It> class xml_object_range
+	{
+	public:
+		typedef It const_iterator;
+		typedef It iterator;
+
+		xml_object_range(It b, It e): _begin(b), _end(e)
+		{
+		}
+
+		It begin() const { return _begin; }
+		It end() const { return _end; }
+
+	private:
+		It _begin, _end;
+	};
+
+	// Writer interface for node printing (see xml_node::print)
+	class PUGIXML_CLASS xml_writer
+	{
+	public:
+		virtual ~xml_writer() {}
+
+		// Write memory chunk into stream/file/whatever
+		virtual void write(const void* data, size_t size) = 0;
+	};
+
+	// xml_writer implementation for FILE*
+	class PUGIXML_CLASS xml_writer_file: public xml_writer
+	{
+	public:
+		// Construct writer from a FILE* object; void* is used to avoid header dependencies on stdio
+		xml_writer_file(void* file);
+
+		virtual void write(const void* data, size_t size);
+
+	private:
+		void* file;
+	};
+
+	#ifndef PUGIXML_NO_STL
+	// xml_writer implementation for streams
+	class PUGIXML_CLASS xml_writer_stream: public xml_writer
+	{
+	public:
+		// Construct writer from an output stream object
+		xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream);
+		xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream);
+
+		virtual void write(const void* data, size_t size);
+
+	private:
+		std::basic_ostream<char, std::char_traits<char> >* narrow_stream;
+		std::basic_ostream<wchar_t, std::char_traits<wchar_t> >* wide_stream;
+	};
+	#endif
+
+	// A light-weight handle for manipulating attributes in DOM tree
+	class PUGIXML_CLASS xml_attribute
+	{
+		friend class xml_attribute_iterator;
+		friend class xml_node;
+
+	private:
+		xml_attribute_struct* _attr;
+	
+		typedef void (*unspecified_bool_type)(xml_attribute***);
+
+	public:
+		// Default constructor. Constructs an empty attribute.
+		xml_attribute();
+		
+		// Constructs attribute from internal pointer
+		explicit xml_attribute(xml_attribute_struct* attr);
+
+		// Safe bool conversion operator
+		operator unspecified_bool_type() const;
+
+		// Borland C++ workaround
+		bool operator!() const;
+
+		// Comparison operators (compares wrapped attribute pointers)
+		bool operator==(const xml_attribute& r) const;
+		bool operator!=(const xml_attribute& r) const;
+		bool operator<(const xml_attribute& r) const;
+		bool operator>(const xml_attribute& r) const;
+		bool operator<=(const xml_attribute& r) const;
+		bool operator>=(const xml_attribute& r) const;
+
+		// Check if attribute is empty
+		bool empty() const;
+
+		// Get attribute name/value, or "" if attribute is empty
+		const char_t* name() const;
+		const char_t* value() const;
+
+		// Get attribute value, or the default value if attribute is empty
+		const char_t* as_string(const char_t* def = PUGIXML_TEXT("")) const;
+
+		// Get attribute value as a number, or the default value if conversion did not succeed or attribute is empty
+		int as_int(int def = 0) const;
+		unsigned int as_uint(unsigned int def = 0) const;
+		double as_double(double def = 0) const;
+		float as_float(float def = 0) const;
+
+	#ifdef PUGIXML_HAS_LONG_LONG
+		long long as_llong(long long def = 0) const;
+		unsigned long long as_ullong(unsigned long long def = 0) const;
+	#endif
+
+		// Get attribute value as bool (returns true if first character is in '1tTyY' set), or the default value if attribute is empty
+		bool as_bool(bool def = false) const;
+
+		// Set attribute name/value (returns false if attribute is empty or there is not enough memory)
+		bool set_name(const char_t* rhs);
+		bool set_value(const char_t* rhs);
+
+		// Set attribute value with type conversion (numbers are converted to strings, boolean is converted to "true"/"false")
+		bool set_value(int rhs);
+		bool set_value(unsigned int rhs);
+		bool set_value(double rhs);
+		bool set_value(bool rhs);
+
+	#ifdef PUGIXML_HAS_LONG_LONG
+		bool set_value(long long rhs);
+		bool set_value(unsigned long long rhs);
+	#endif
+
+		// Set attribute value (equivalent to set_value without error checking)
+		xml_attribute& operator=(const char_t* rhs);
+		xml_attribute& operator=(int rhs);
+		xml_attribute& operator=(unsigned int rhs);
+		xml_attribute& operator=(double rhs);
+		xml_attribute& operator=(bool rhs);
+
+	#ifdef PUGIXML_HAS_LONG_LONG
+		xml_attribute& operator=(long long rhs);
+		xml_attribute& operator=(unsigned long long rhs);
+	#endif
+
+		// Get next/previous attribute in the attribute list of the parent node
+		xml_attribute next_attribute() const;
+		xml_attribute previous_attribute() const;
+
+		// Get hash value (unique for handles to the same object)
+		size_t hash_value() const;
+
+		// Get internal pointer
+		xml_attribute_struct* internal_object() const;
+	};
+
+#ifdef __BORLANDC__
+	// Borland C++ workaround
+	bool PUGIXML_FUNCTION operator&&(const xml_attribute& lhs, bool rhs);
+	bool PUGIXML_FUNCTION operator||(const xml_attribute& lhs, bool rhs);
+#endif
+
+	// A light-weight handle for manipulating nodes in DOM tree
+	class PUGIXML_CLASS xml_node
+	{
+		friend class xml_attribute_iterator;
+		friend class xml_node_iterator;
+		friend class xml_named_node_iterator;
+
+	protected:
+		xml_node_struct* _root;
+
+		typedef void (*unspecified_bool_type)(xml_node***);
+
+	public:
+		// Default constructor. Constructs an empty node.
+		xml_node();
+
+		// Constructs node from internal pointer
+		explicit xml_node(xml_node_struct* p);
+
+		// Safe bool conversion operator
+		operator unspecified_bool_type() const;
+
+		// Borland C++ workaround
+		bool operator!() const;
+	
+		// Comparison operators (compares wrapped node pointers)
+		bool operator==(const xml_node& r) const;
+		bool operator!=(const xml_node& r) const;
+		bool operator<(const xml_node& r) const;
+		bool operator>(const xml_node& r) const;
+		bool operator<=(const xml_node& r) const;
+		bool operator>=(const xml_node& r) const;
+
+		// Check if node is empty.
+		bool empty() const;
+
+		// Get node type
+		xml_node_type type() const;
+
+		// Get node name, or "" if node is empty or it has no name
+		const char_t* name() const;
+
+		// Get node value, or "" if node is empty or it has no value
+        // Note: For <node>text</node> node.value() does not return "text"! Use child_value() or text() methods to access text inside nodes.
+		const char_t* value() const;
+	
+		// Get attribute list
+		xml_attribute first_attribute() const;
+		xml_attribute last_attribute() const;
+
+		// Get children list
+		xml_node first_child() const;
+		xml_node last_child() const;
+
+		// Get next/previous sibling in the children list of the parent node
+		xml_node next_sibling() const;
+		xml_node previous_sibling() const;
+		
+		// Get parent node
+		xml_node parent() const;
+
+		// Get root of DOM tree this node belongs to
+		xml_node root() const;
+
+		// Get text object for the current node
+		xml_text text() const;
+
+		// Get child, attribute or next/previous sibling with the specified name
+		xml_node child(const char_t* name) const;
+		xml_attribute attribute(const char_t* name) const;
+		xml_node next_sibling(const char_t* name) const;
+		xml_node previous_sibling(const char_t* name) const;
+
+		// Get child value of current node; that is, value of the first child node of type PCDATA/CDATA
+		const char_t* child_value() const;
+
+		// Get child value of child with specified name. Equivalent to child(name).child_value().
+		const char_t* child_value(const char_t* name) const;
+
+		// Set node name/value (returns false if node is empty, there is not enough memory, or node can not have name/value)
+		bool set_name(const char_t* rhs);
+		bool set_value(const char_t* rhs);
+		
+		// Add attribute with specified name. Returns added attribute, or empty attribute on errors.
+		xml_attribute append_attribute(const char_t* name);
+		xml_attribute prepend_attribute(const char_t* name);
+		xml_attribute insert_attribute_after(const char_t* name, const xml_attribute& attr);
+		xml_attribute insert_attribute_before(const char_t* name, const xml_attribute& attr);
+
+		// Add a copy of the specified attribute. Returns added attribute, or empty attribute on errors.
+		xml_attribute append_copy(const xml_attribute& proto);
+		xml_attribute prepend_copy(const xml_attribute& proto);
+		xml_attribute insert_copy_after(const xml_attribute& proto, const xml_attribute& attr);
+		xml_attribute insert_copy_before(const xml_attribute& proto, const xml_attribute& attr);
+
+		// Add child node with specified type. Returns added node, or empty node on errors.
+		xml_node append_child(xml_node_type type = node_element);
+		xml_node prepend_child(xml_node_type type = node_element);
+		xml_node insert_child_after(xml_node_type type, const xml_node& node);
+		xml_node insert_child_before(xml_node_type type, const xml_node& node);
+
+		// Add child element with specified name. Returns added node, or empty node on errors.
+		xml_node append_child(const char_t* name);
+		xml_node prepend_child(const char_t* name);
+		xml_node insert_child_after(const char_t* name, const xml_node& node);
+		xml_node insert_child_before(const char_t* name, const xml_node& node);
+
+		// Add a copy of the specified node as a child. Returns added node, or empty node on errors.
+		xml_node append_copy(const xml_node& proto);
+		xml_node prepend_copy(const xml_node& proto);
+		xml_node insert_copy_after(const xml_node& proto, const xml_node& node);
+		xml_node insert_copy_before(const xml_node& proto, const xml_node& node);
+
+		// Remove specified attribute
+		bool remove_attribute(const xml_attribute& a);
+		bool remove_attribute(const char_t* name);
+
+		// Remove specified child
+		bool remove_child(const xml_node& n);
+		bool remove_child(const char_t* name);
+
+		// Parses buffer as an XML document fragment and appends all nodes as children of the current node.
+		// Copies/converts the buffer, so it may be deleted or changed after the function returns.
+		// Note: append_buffer allocates memory that has the lifetime of the owning document; removing the appended nodes does not immediately reclaim that memory.
+		xml_parse_result append_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
+
+		// Find attribute using predicate. Returns first attribute for which predicate returned true.
+		template <typename Predicate> xml_attribute find_attribute(Predicate pred) const
+		{
+			if (!_root) return xml_attribute();
+			
+			for (xml_attribute attrib = first_attribute(); attrib; attrib = attrib.next_attribute())
+				if (pred(attrib))
+					return attrib;
+		
+			return xml_attribute();
+		}
+
+		// Find child node using predicate. Returns first child for which predicate returned true.
+		template <typename Predicate> xml_node find_child(Predicate pred) const
+		{
+			if (!_root) return xml_node();
+	
+			for (xml_node node = first_child(); node; node = node.next_sibling())
+				if (pred(node))
+					return node;
+		
+			return xml_node();
+		}
+
+		// Find node from subtree using predicate. Returns first node from subtree (depth-first), for which predicate returned true.
+		template <typename Predicate> xml_node find_node(Predicate pred) const
+		{
+			if (!_root) return xml_node();
+
+			xml_node cur = first_child();
+			
+			while (cur._root && cur._root != _root)
+			{
+				if (pred(cur)) return cur;
+
+				if (cur.first_child()) cur = cur.first_child();
+				else if (cur.next_sibling()) cur = cur.next_sibling();
+				else
+				{
+					while (!cur.next_sibling() && cur._root != _root) cur = cur.parent();
+
+					if (cur._root != _root) cur = cur.next_sibling();
+				}
+			}
+
+			return xml_node();
+		}
+
+		// Find child node by attribute name/value
+		xml_node find_child_by_attribute(const char_t* name, const char_t* attr_name, const char_t* attr_value) const;
+		xml_node find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const;
+
+	#ifndef PUGIXML_NO_STL
+		// Get the absolute node path from root as a text string.
+		string_t path(char_t delimiter = '/') const;
+	#endif
+
+		// Search for a node by path consisting of node names and . or .. elements.
+		xml_node first_element_by_path(const char_t* path, char_t delimiter = '/') const;
+
+		// Recursively traverse subtree with xml_tree_walker
+		bool traverse(xml_tree_walker& walker);
+	
+	#ifndef PUGIXML_NO_XPATH
+		// Select single node by evaluating XPath query. Returns first node from the resulting node set.
+		xpath_node select_single_node(const char_t* query, xpath_variable_set* variables = 0) const;
+		xpath_node select_single_node(const xpath_query& query) const;
+
+		// Select node set by evaluating XPath query
+		xpath_node_set select_nodes(const char_t* query, xpath_variable_set* variables = 0) const;
+		xpath_node_set select_nodes(const xpath_query& query) const;
+	#endif
+		
+		// Print subtree using a writer object
+		void print(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;
+
+	#ifndef PUGIXML_NO_STL
+		// Print subtree to stream
+		void print(std::basic_ostream<char, std::char_traits<char> >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;
+		void print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, unsigned int depth = 0) const;
+	#endif
+
+		// Child nodes iterators
+		typedef xml_node_iterator iterator;
+
+		iterator begin() const;
+		iterator end() const;
+
+		// Attribute iterators
+		typedef xml_attribute_iterator attribute_iterator;
+
+		attribute_iterator attributes_begin() const;
+		attribute_iterator attributes_end() const;
+
+		// Range-based for support
+		xml_object_range<xml_node_iterator> children() const;
+		xml_object_range<xml_named_node_iterator> children(const char_t* name) const;
+		xml_object_range<xml_attribute_iterator> attributes() const;
+
+		// Get node offset in parsed file/string (in char_t units) for debugging purposes
+		ptrdiff_t offset_debug() const;
+
+		// Get hash value (unique for handles to the same object)
+		size_t hash_value() const;
+
+		// Get internal pointer
+		xml_node_struct* internal_object() const;
+	};
+
+#ifdef __BORLANDC__
+	// Borland C++ workaround
+	bool PUGIXML_FUNCTION operator&&(const xml_node& lhs, bool rhs);
+	bool PUGIXML_FUNCTION operator||(const xml_node& lhs, bool rhs);
+#endif
+
+	// A helper for working with text inside PCDATA nodes
+	class PUGIXML_CLASS xml_text
+	{
+		friend class xml_node;
+
+		xml_node_struct* _root;
+
+		typedef void (*unspecified_bool_type)(xml_text***);
+
+		explicit xml_text(xml_node_struct* root);
+
+		xml_node_struct* _data_new();
+		xml_node_struct* _data() const;
+
+	public:
+		// Default constructor. Constructs an empty object.
+		xml_text();
+
+		// Safe bool conversion operator
+		operator unspecified_bool_type() const;
+
+		// Borland C++ workaround
+		bool operator!() const;
+
+		// Check if text object is empty
+		bool empty() const;
+
+		// Get text, or "" if object is empty
+		const char_t* get() const;
+
+		// Get text, or the default value if object is empty
+		const char_t* as_string(const char_t* def = PUGIXML_TEXT("")) const;
+
+		// Get text as a number, or the default value if conversion did not succeed or object is empty
+		int as_int(int def = 0) const;
+		unsigned int as_uint(unsigned int def = 0) const;
+		double as_double(double def = 0) const;
+		float as_float(float def = 0) const;
+
+	#ifdef PUGIXML_HAS_LONG_LONG
+		long long as_llong(long long def = 0) const;
+		unsigned long long as_ullong(unsigned long long def = 0) const;
+	#endif
+
+		// Get text as bool (returns true if first character is in '1tTyY' set), or the default value if object is empty
+		bool as_bool(bool def = false) const;
+
+		// Set text (returns false if object is empty or there is not enough memory)
+		bool set(const char_t* rhs);
+
+		// Set text with type conversion (numbers are converted to strings, boolean is converted to "true"/"false")
+		bool set(int rhs);
+		bool set(unsigned int rhs);
+		bool set(double rhs);
+		bool set(bool rhs);
+
+	#ifdef PUGIXML_HAS_LONG_LONG
+		bool set(long long rhs);
+		bool set(unsigned long long rhs);
+	#endif
+
+		// Set text (equivalent to set without error checking)
+		xml_text& operator=(const char_t* rhs);
+		xml_text& operator=(int rhs);
+		xml_text& operator=(unsigned int rhs);
+		xml_text& operator=(double rhs);
+		xml_text& operator=(bool rhs);
+
+	#ifdef PUGIXML_HAS_LONG_LONG
+		xml_text& operator=(long long rhs);
+		xml_text& operator=(unsigned long long rhs);
+	#endif
+
+		// Get the data node (node_pcdata or node_cdata) for this object
+		xml_node data() const;
+	};
+
+#ifdef __BORLANDC__
+	// Borland C++ workaround
+	bool PUGIXML_FUNCTION operator&&(const xml_text& lhs, bool rhs);
+	bool PUGIXML_FUNCTION operator||(const xml_text& lhs, bool rhs);
+#endif
+
+	// Child node iterator (a bidirectional iterator over a collection of xml_node)
+	class PUGIXML_CLASS xml_node_iterator
+	{
+		friend class xml_node;
+
+	private:
+		mutable xml_node _wrap;
+		xml_node _parent;
+
+		xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent);
+
+	public:
+		// Iterator traits
+		typedef ptrdiff_t difference_type;
+		typedef xml_node value_type;
+		typedef xml_node* pointer;
+		typedef xml_node& reference;
+
+	#ifndef PUGIXML_NO_STL
+		typedef std::bidirectional_iterator_tag iterator_category;
+	#endif
+
+		// Default constructor
+		xml_node_iterator();
+
+		// Construct an iterator which points to the specified node
+		xml_node_iterator(const xml_node& node);
+
+		// Iterator operators
+		bool operator==(const xml_node_iterator& rhs) const;
+		bool operator!=(const xml_node_iterator& rhs) const;
+
+		xml_node& operator*() const;
+		xml_node* operator->() const;
+
+		const xml_node_iterator& operator++();
+		xml_node_iterator operator++(int);
+
+		const xml_node_iterator& operator--();
+		xml_node_iterator operator--(int);
+	};
+
+	// Attribute iterator (a bidirectional iterator over a collection of xml_attribute)
+	class PUGIXML_CLASS xml_attribute_iterator
+	{
+		friend class xml_node;
+
+	private:
+		mutable xml_attribute _wrap;
+		xml_node _parent;
+
+		xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent);
+
+	public:
+		// Iterator traits
+		typedef ptrdiff_t difference_type;
+		typedef xml_attribute value_type;
+		typedef xml_attribute* pointer;
+		typedef xml_attribute& reference;
+
+	#ifndef PUGIXML_NO_STL
+		typedef std::bidirectional_iterator_tag iterator_category;
+	#endif
+
+		// Default constructor
+		xml_attribute_iterator();
+
+		// Construct an iterator which points to the specified attribute
+		xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent);
+
+		// Iterator operators
+		bool operator==(const xml_attribute_iterator& rhs) const;
+		bool operator!=(const xml_attribute_iterator& rhs) const;
+
+		xml_attribute& operator*() const;
+		xml_attribute* operator->() const;
+
+		const xml_attribute_iterator& operator++();
+		xml_attribute_iterator operator++(int);
+
+		const xml_attribute_iterator& operator--();
+		xml_attribute_iterator operator--(int);
+	};
+
+	// Named node range helper
+	class PUGIXML_CLASS xml_named_node_iterator
+	{
+		friend class xml_node;
+
+	public:
+		// Iterator traits
+		typedef ptrdiff_t difference_type;
+		typedef xml_node value_type;
+		typedef xml_node* pointer;
+		typedef xml_node& reference;
+
+	#ifndef PUGIXML_NO_STL
+		typedef std::bidirectional_iterator_tag iterator_category;
+	#endif
+
+		// Default constructor
+		xml_named_node_iterator();
+
+		// Construct an iterator which points to the specified node
+		xml_named_node_iterator(const xml_node& node, const char_t* name);
+
+		// Iterator operators
+		bool operator==(const xml_named_node_iterator& rhs) const;
+		bool operator!=(const xml_named_node_iterator& rhs) const;
+
+		xml_node& operator*() const;
+		xml_node* operator->() const;
+
+		const xml_named_node_iterator& operator++();
+		xml_named_node_iterator operator++(int);
+
+		const xml_named_node_iterator& operator--();
+		xml_named_node_iterator operator--(int);
+
+	private:
+		mutable xml_node _wrap;
+		xml_node _parent;
+		const char_t* _name;
+
+		xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name);
+	};
+
+	// Abstract tree walker class (see xml_node::traverse)
+	class PUGIXML_CLASS xml_tree_walker
+	{
+		friend class xml_node;
+
+	private:
+		int _depth;
+	
+	protected:
+		// Get current traversal depth
+		int depth() const;
+	
+	public:
+		xml_tree_walker();
+		virtual ~xml_tree_walker();
+
+		// Callback that is called when traversal begins
+		virtual bool begin(xml_node& node);
+
+		// Callback that is called for each node traversed
+		virtual bool for_each(xml_node& node) = 0;
+
+		// Callback that is called when traversal ends
+		virtual bool end(xml_node& node);
+	};
+
+	// Parsing status, returned as part of xml_parse_result object
+	enum xml_parse_status
+	{
+		status_ok = 0,				// No error
+
+		status_file_not_found,		// File was not found during load_file()
+		status_io_error,			// Error reading from file/stream
+		status_out_of_memory,		// Could not allocate memory
+		status_internal_error,		// Internal error occurred
+
+		status_unrecognized_tag,	// Parser could not determine tag type
+
+		status_bad_pi,				// Parsing error occurred while parsing document declaration/processing instruction
+		status_bad_comment,			// Parsing error occurred while parsing comment
+		status_bad_cdata,			// Parsing error occurred while parsing CDATA section
+		status_bad_doctype,			// Parsing error occurred while parsing document type declaration
+		status_bad_pcdata,			// Parsing error occurred while parsing PCDATA section
+		status_bad_start_element,	// Parsing error occurred while parsing start element tag
+		status_bad_attribute,		// Parsing error occurred while parsing element attribute
+		status_bad_end_element,		// Parsing error occurred while parsing end element tag
+		status_end_element_mismatch,// There was a mismatch of start-end tags (closing tag had incorrect name, some tag was not closed or there was an excessive closing tag)
+
+		status_append_invalid_root,	// Unable to append nodes since root type is not node_element or node_document (exclusive to xml_node::append_buffer)
+
+		status_no_document_element	// Parsing resulted in a document without element nodes
+	};
+
+	// Parsing result
+	struct PUGIXML_CLASS xml_parse_result
+	{
+		// Parsing status (see xml_parse_status)
+		xml_parse_status status;
+
+		// Last parsed offset (in char_t units from start of input data)
+		ptrdiff_t offset;
+
+		// Source document encoding
+		xml_encoding encoding;
+
+		// Default constructor, initializes object to failed state
+		xml_parse_result();
+
+		// Cast to bool operator
+		operator bool() const;
+
+		// Get error description
+		const char* description() const;
+	};
+
+	// Document class (DOM tree root)
+	class PUGIXML_CLASS xml_document: public xml_node
+	{
+	private:
+		char_t* _buffer;
+
+		char _memory[192];
+		
+		// Non-copyable semantics
+		xml_document(const xml_document&);
+		const xml_document& operator=(const xml_document&);
+
+		void create();
+		void destroy();
+
+	public:
+		// Default constructor, makes empty document
+		xml_document();
+
+		// Destructor, invalidates all node/attribute handles to this document
+		~xml_document();
+
+		// Removes all nodes, leaving the empty document
+		void reset();
+
+		// Removes all nodes, then copies the entire contents of the specified document
+		void reset(const xml_document& proto);
+
+	#ifndef PUGIXML_NO_STL
+		// Load document from stream.
+		xml_parse_result load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
+		xml_parse_result load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options = parse_default);
+	#endif
+
+		// Load document from zero-terminated string. No encoding conversions are applied.
+		xml_parse_result load(const char_t* contents, unsigned int options = parse_default);
+
+		// Load document from file
+		xml_parse_result load_file(const char* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
+		xml_parse_result load_file(const wchar_t* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
+
+		// Load document from buffer. Copies/converts the buffer, so it may be deleted or changed after the function returns.
+		xml_parse_result load_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
+
+		// Load document from buffer, using the buffer for in-place parsing (the buffer is modified and used for storage of document data).
+		// You should ensure that buffer data will persist throughout the document's lifetime, and free the buffer memory manually once document is destroyed.
+		xml_parse_result load_buffer_inplace(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
+
+		// Load document from buffer, using the buffer for in-place parsing (the buffer is modified and used for storage of document data).
+		// You should allocate the buffer with pugixml allocation function; document will free the buffer when it is no longer needed (you can't use it anymore).
+		xml_parse_result load_buffer_inplace_own(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
+
+		// Save XML document to writer (semantics is slightly different from xml_node::print, see documentation for details).
+		void save(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
+
+	#ifndef PUGIXML_NO_STL
+		// Save XML document to stream (semantics is slightly different from xml_node::print, see documentation for details).
+		void save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
+		void save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default) const;
+	#endif
+
+		// Save XML to file
+		bool save_file(const char* path, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
+		bool save_file(const wchar_t* path, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
+
+		// Get document element
+		xml_node document_element() const;
+	};
+
+#ifndef PUGIXML_NO_XPATH
+	// XPath query return type
+	enum xpath_value_type
+	{
+		xpath_type_none,	  // Unknown type (query failed to compile)
+		xpath_type_node_set,  // Node set (xpath_node_set)
+		xpath_type_number,	  // Number
+		xpath_type_string,	  // String
+		xpath_type_boolean	  // Boolean
+	};
+
+	// XPath parsing result
+	struct PUGIXML_CLASS xpath_parse_result
+	{
+		// Error message (0 if no error)
+		const char* error;
+
+		// Last parsed offset (in char_t units from string start)
+		ptrdiff_t offset;
+
+		// Default constructor, initializes object to failed state
+		xpath_parse_result();
+
+		// Cast to bool operator
+		operator bool() const;
+
+		// Get error description
+		const char* description() const;
+	};
+
+	// A single XPath variable
+	class PUGIXML_CLASS xpath_variable
+	{
+		friend class xpath_variable_set;
+
+	protected:
+		xpath_value_type _type;
+		xpath_variable* _next;
+
+		xpath_variable();
+
+		// Non-copyable semantics
+		xpath_variable(const xpath_variable&);
+		xpath_variable& operator=(const xpath_variable&);
+		
+	public:
+		// Get variable name
+		const char_t* name() const;
+
+		// Get variable type
+		xpath_value_type type() const;
+
+		// Get variable value; no type conversion is performed, default value (false, NaN, empty string, empty node set) is returned on type mismatch error
+		bool get_boolean() const;
+		double get_number() const;
+		const char_t* get_string() const;
+		const xpath_node_set& get_node_set() const;
+
+		// Set variable value; no type conversion is performed, false is returned on type mismatch error
+		bool set(bool value);
+		bool set(double value);
+		bool set(const char_t* value);
+		bool set(const xpath_node_set& value);
+	};
+
+	// A set of XPath variables
+	class PUGIXML_CLASS xpath_variable_set
+	{
+	private:
+		xpath_variable* _data[64];
+
+		// Non-copyable semantics
+		xpath_variable_set(const xpath_variable_set&);
+		xpath_variable_set& operator=(const xpath_variable_set&);
+
+		xpath_variable* find(const char_t* name) const;
+
+	public:
+		// Default constructor/destructor
+		xpath_variable_set();
+		~xpath_variable_set();
+
+		// Add a new variable or get the existing one, if the types match
+		xpath_variable* add(const char_t* name, xpath_value_type type);
+
+		// Set value of an existing variable; no type conversion is performed, false is returned if there is no such variable or if types mismatch
+		bool set(const char_t* name, bool value);
+		bool set(const char_t* name, double value);
+		bool set(const char_t* name, const char_t* value);
+		bool set(const char_t* name, const xpath_node_set& value);
+
+		// Get existing variable by name
+		xpath_variable* get(const char_t* name);
+		const xpath_variable* get(const char_t* name) const;
+	};
+
+	// A compiled XPath query object
+	class PUGIXML_CLASS xpath_query
+	{
+	private:
+		void* _impl;
+		xpath_parse_result _result;
+
+		typedef void (*unspecified_bool_type)(xpath_query***);
+
+		// Non-copyable semantics
+		xpath_query(const xpath_query&);
+		xpath_query& operator=(const xpath_query&);
+
+	public:
+		// Construct a compiled object from XPath expression.
+		// If PUGIXML_NO_EXCEPTIONS is not defined, throws xpath_exception on compilation errors.
+		explicit xpath_query(const char_t* query, xpath_variable_set* variables = 0);
+
+		// Destructor
+		~xpath_query();
+
+		// Get query expression return type
+		xpath_value_type return_type() const;
+		
+		// Evaluate expression as boolean value in the specified context; performs type conversion if necessary.
+		// If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
+		bool evaluate_boolean(const xpath_node& n) const;
+		
+		// Evaluate expression as double value in the specified context; performs type conversion if necessary.
+		// If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
+		double evaluate_number(const xpath_node& n) const;
+		
+	#ifndef PUGIXML_NO_STL
+		// Evaluate expression as string value in the specified context; performs type conversion if necessary.
+		// If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
+		string_t evaluate_string(const xpath_node& n) const;
+	#endif
+		
+		// Evaluate expression as string value in the specified context; performs type conversion if necessary.
+		// At most capacity characters are written to the destination buffer, full result size is returned (includes terminating zero).
+		// If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
+		// If PUGIXML_NO_EXCEPTIONS is defined, returns empty  set instead.
+		size_t evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const;
+
+		// Evaluate expression as node set in the specified context.
+		// If PUGIXML_NO_EXCEPTIONS is not defined, throws xpath_exception on type mismatch and std::bad_alloc on out of memory errors.
+		// If PUGIXML_NO_EXCEPTIONS is defined, returns empty node set instead.
+		xpath_node_set evaluate_node_set(const xpath_node& n) const;
+
+		// Get parsing result (used to get compilation errors in PUGIXML_NO_EXCEPTIONS mode)
+		const xpath_parse_result& result() const;
+
+		// Safe bool conversion operator
+		operator unspecified_bool_type() const;
+
+		// Borland C++ workaround
+		bool operator!() const;
+	};
+	
+	#ifndef PUGIXML_NO_EXCEPTIONS
+	// XPath exception class
+	class PUGIXML_CLASS xpath_exception: public std::exception
+	{
+	private:
+		xpath_parse_result _result;
+
+	public:
+		// Construct exception from parse result
+		explicit xpath_exception(const xpath_parse_result& result);
+
+		// Get error message
+		virtual const char* what() const throw();
+
+		// Get parse result
+		const xpath_parse_result& result() const;
+	};
+	#endif
+	
+	// XPath node class (either xml_node or xml_attribute)
+	class PUGIXML_CLASS xpath_node
+	{
+	private:
+		xml_node _node;
+		xml_attribute _attribute;
+	
+		typedef void (*unspecified_bool_type)(xpath_node***);
+
+	public:
+		// Default constructor; constructs empty XPath node
+		xpath_node();
+		
+		// Construct XPath node from XML node/attribute
+		xpath_node(const xml_node& node);
+		xpath_node(const xml_attribute& attribute, const xml_node& parent);
+
+		// Get node/attribute, if any
+		xml_node node() const;
+		xml_attribute attribute() const;
+		
+		// Get parent of contained node/attribute
+		xml_node parent() const;
+
+		// Safe bool conversion operator
+		operator unspecified_bool_type() const;
+		
+		// Borland C++ workaround
+		bool operator!() const;
+
+		// Comparison operators
+		bool operator==(const xpath_node& n) const;
+		bool operator!=(const xpath_node& n) const;
+	};
+
+#ifdef __BORLANDC__
+	// Borland C++ workaround
+	bool PUGIXML_FUNCTION operator&&(const xpath_node& lhs, bool rhs);
+	bool PUGIXML_FUNCTION operator||(const xpath_node& lhs, bool rhs);
+#endif
+
+	// A fixed-size collection of XPath nodes
+	class PUGIXML_CLASS xpath_node_set
+	{
+	public:
+		// Collection type
+		enum type_t
+		{
+			type_unsorted,			// Not ordered
+			type_sorted,			// Sorted by document order (ascending)
+			type_sorted_reverse		// Sorted by document order (descending)
+		};
+		
+		// Constant iterator type
+		typedef const xpath_node* const_iterator;
+	
+		// Default constructor. Constructs empty set.
+		xpath_node_set();
+
+		// Constructs a set from iterator range; data is not checked for duplicates and is not sorted according to provided type, so be careful
+		xpath_node_set(const_iterator begin, const_iterator end, type_t type = type_unsorted);
+
+		// Destructor
+		~xpath_node_set();
+		
+		// Copy constructor/assignment operator
+		xpath_node_set(const xpath_node_set& ns);
+		xpath_node_set& operator=(const xpath_node_set& ns);
+
+		// Get collection type
+		type_t type() const;
+		
+		// Get collection size
+		size_t size() const;
+
+		// Indexing operator
+		const xpath_node& operator[](size_t index) const;
+		
+		// Collection iterators
+		const_iterator begin() const;
+		const_iterator end() const;
+
+		// Sort the collection in ascending/descending order by document order
+		void sort(bool reverse = false);
+		
+		// Get first node in the collection by document order
+		xpath_node first() const;
+		
+		// Check if collection is empty
+		bool empty() const;
+	
+	private:
+		type_t _type;
+		
+		xpath_node _storage;
+		
+		xpath_node* _begin;
+		xpath_node* _end;
+
+		void _assign(const_iterator begin, const_iterator end);
+	};
+#endif
+
+#ifndef PUGIXML_NO_STL
+	// Convert wide string to UTF8
+	std::basic_string<char, std::char_traits<char>, std::allocator<char> > PUGIXML_FUNCTION as_utf8(const wchar_t* str);
+	std::basic_string<char, std::char_traits<char>, std::allocator<char> > PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >& str);
+	
+	// Convert UTF8 to wide string
+	std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > PUGIXML_FUNCTION as_wide(const char* str);
+	std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > PUGIXML_FUNCTION as_wide(const std::basic_string<char, std::char_traits<char>, std::allocator<char> >& str);
+#endif
+
+	// Memory allocation function interface; returns pointer to allocated memory or NULL on failure
+	typedef void* (*allocation_function)(size_t size);
+	
+	// Memory deallocation function interface
+	typedef void (*deallocation_function)(void* ptr);
+
+	// Override default memory management functions. All subsequent allocations/deallocations will be performed via supplied functions.
+	void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate);
+	
+	// Get current memory management functions
+	allocation_function PUGIXML_FUNCTION get_memory_allocation_function();
+	deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function();
+}
+
+#if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
+namespace std
+{
+	// Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
+	std::bidirectional_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_node_iterator&);
+	std::bidirectional_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_attribute_iterator&);
+	std::bidirectional_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_named_node_iterator&);
+}
+#endif
+
+#if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
+namespace std
+{
+	// Workarounds for (non-standard) iterator category detection
+	std::bidirectional_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_node_iterator&);
+	std::bidirectional_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_attribute_iterator&);
+	std::bidirectional_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_named_node_iterator&);
+}
+#endif
+
+#endif
+
+/**
+ * Copyright (c) 2006-2014 Arseny Kapoulkine
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
diff --git a/apps/gadgetron/schema/gadgetron.xsd b/apps/gadgetron/schema/gadgetron.xsd
index cfe50ca..6cad8c0 100644
--- a/apps/gadgetron/schema/gadgetron.xsd
+++ b/apps/gadgetron/schema/gadgetron.xsd
@@ -1,50 +1,78 @@
 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 <xs:schema xmlns="http://gadgetron.sf.net/gadgetron" xmlns:xs="http://www.w3.org/2001/XMLSchema" elementFormDefault="qualified" targetNamespace="http://gadgetron.sf.net/gadgetron">
 
-  <xs:element name="gadgetronConfiguration">
-    <xs:complexType>
-      <xs:sequence>
+    <!--
+    Gadgetron configuration xml includes two sections: port and globalGadgetParameters
+    The 'port' is where gadgetron is listening to. The 'globalGadgetParameters' section 
+    lists the parameters which will be pre-set for all gadgets
+    e.g. if 'workingDirectory' in the globalGadgetParameters is set, the dependency measurements
+    will be stored and read from this directory. If it is not set, the default
+    directory will be used ('/tmp/gadgetron' in linux and 'c:/temp/gadgetron' in windows).
+    -->
+
+    <xs:element name="gadgetronConfiguration">
+        <xs:complexType>
+            <xs:sequence>
                 <xs:element name="port" type="xs:string"/>
-      </xs:sequence>
-    </xs:complexType>
-  </xs:element>
+
+                <xs:element maxOccurs="unbounded" minOccurs="0" name="globalGadgetParameter">
+                    <xs:complexType>
+                        <xs:sequence>
+                            <xs:element maxOccurs="1" minOccurs="1" name="name" type="xs:string"/>
+                            <xs:element maxOccurs="1" minOccurs="1" name="value" type="xs:string"/>
+                        </xs:sequence>
+                    </xs:complexType>
+                </xs:element>
+
+		<xs:element maxOccurs="1" minOccurs="0" name="cloudBus">
+		  <xs:complexType>
+		    <xs:sequence>
+		      <xs:element maxOccurs="1" minOccurs="1" name="multiCastAddress" type="xs:string"/>
+		      <xs:element maxOccurs="1" minOccurs="1" name="port" type="xs:unsignedInt"/>
+		    </xs:sequence>
+		  </xs:complexType>
+		</xs:element>
+
+            </xs:sequence>
+        </xs:complexType>
+    </xs:element>
 
   <xs:element name="gadgetronStreamConfiguration">
     <xs:complexType>
       <xs:sequence>
                 <xs:element maxOccurs="unbounded" minOccurs="0" name="reader">
-                	<xs:complexType>
-					      <xs:sequence>
-					      	<xs:element name="slot" type="xs:unsignedShort"/>
-					      	<xs:element name="dll" type="xs:string"/>
-					      	<xs:element name="classname" type="xs:string"/>
-					      </xs:sequence>
-          			</xs:complexType>
-        		</xs:element>
+                    <xs:complexType>
+                          <xs:sequence>
+                              <xs:element name="slot" type="xs:unsignedShort"/>
+                              <xs:element name="dll" type="xs:string"/>
+                              <xs:element name="classname" type="xs:string"/>
+                          </xs:sequence>
+                      </xs:complexType>
+                </xs:element>
                 <xs:element maxOccurs="unbounded" minOccurs="0" name="writer">
-                	<xs:complexType>
-					      <xs:sequence>
-					      	<xs:element maxOccurs="1" minOccurs="1" name="slot" type="xs:unsignedShort"/>
-					      	<xs:element maxOccurs="1" minOccurs="1"  name="dll" type="xs:string"/>
-					      	<xs:element maxOccurs="1" minOccurs="1"  name="classname" type="xs:string"/>
-					      </xs:sequence>
-          			</xs:complexType>
-        		</xs:element>
+                    <xs:complexType>
+                          <xs:sequence>
+                              <xs:element maxOccurs="1" minOccurs="1" name="slot" type="xs:unsignedShort"/>
+                              <xs:element maxOccurs="1" minOccurs="1"  name="dll" type="xs:string"/>
+                              <xs:element maxOccurs="1" minOccurs="1"  name="classname" type="xs:string"/>
+                          </xs:sequence>
+                      </xs:complexType>
+                </xs:element>
                 <xs:element maxOccurs="unbounded" minOccurs="0" name="gadget">
-                	<xs:complexType>
-					      <xs:sequence>
-					      	<xs:element maxOccurs="1" minOccurs="1"  name="name" type="xs:string"/>
-					      	<xs:element maxOccurs="1" minOccurs="1"  name="dll" type="xs:string"/>
-					      	<xs:element maxOccurs="1" minOccurs="1"  name="classname" type="xs:string"/>
-					      	<xs:element maxOccurs="unbounded" minOccurs="0" name="property">
-					      		<xs:complexType>
-					      			<xs:sequence>
-								      	<xs:element maxOccurs="1" minOccurs="1" name="name" type="xs:string"/>
-								      	<xs:element maxOccurs="1" minOccurs="1" name="value" type="xs:string"/>
-					      			</xs:sequence>		
-					      		</xs:complexType>
-              				</xs:element>
-           				  </xs:sequence>
+                    <xs:complexType>
+                          <xs:sequence>
+                              <xs:element maxOccurs="1" minOccurs="1"  name="name" type="xs:string"/>
+                              <xs:element maxOccurs="1" minOccurs="1"  name="dll" type="xs:string"/>
+                              <xs:element maxOccurs="1" minOccurs="1"  name="classname" type="xs:string"/>
+                              <xs:element maxOccurs="unbounded" minOccurs="0" name="property">
+                                  <xs:complexType>
+                                      <xs:sequence>
+                                          <xs:element maxOccurs="1" minOccurs="1" name="name" type="xs:string"/>
+                                          <xs:element maxOccurs="1" minOccurs="1" name="value" type="xs:string"/>
+                                      </xs:sequence>        
+                                  </xs:complexType>
+                              </xs:element>
+                             </xs:sequence>
           </xs:complexType>
         </xs:element>
       </xs:sequence>
diff --git a/apps/gadgetron/templates/CMakeLists_GadgetLibraryExample.txt b/apps/gadgetron/templates/CMakeLists_GadgetLibraryExample.txt
deleted file mode 100644
index d794865..0000000
--- a/apps/gadgetron/templates/CMakeLists_GadgetLibraryExample.txt
+++ /dev/null
@@ -1,55 +0,0 @@
-cmake_minimum_required(VERSION 2.6)
-
-project(EXAMPLELIB)
-
-if (WIN32)
-ADD_DEFINITIONS(-DWIN32 -D_WIN32 -D_WINDOWS)
-ADD_DEFINITIONS(-DUNICODE -D_UNICODE)
-SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc")
-SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W3")
-endif (WIN32)
-
-###############################################################
-#Bootstrap search for libraries 
-# (We need to find cmake modules in Gadgetron)
-###############################################################
-find_path(GADGETRON_CMAKE_MODULES FindGadgetron.cmake HINTS
-$ENV{GADGETRON_HOME}/cmake
-/usr/local/gadgetron)
-
-if (NOT GADGETRON_CMAKE_MODULES)
-  MESSAGE(FATAL_ERROR "GADGETRON_CMAKE_MODULES cannot be found. 
-   Try to set GADGETRON_HOME environment variable.")
-endif(NOT GADGETRON_CMAKE_MODULES)
-
-set(CMAKE_MODULE_PATH ${GADGETRON_CMAKE_MODULES})
-###############################################################
-
-find_package(Gadgetron REQUIRED)
-find_package(Boost REQUIRED)
-find_package(ACE REQUIRED)
-find_package(Ismrmrd REQUIRED)
-
-
-set(CMAKE_INSTALL_PREFIX ${GADGETRON_HOME})
-
-INCLUDE_DIRECTORIES(${ACE_INCLUDE_DIR} 
-     ${Boost_INCLUDE_DIR}
-     ${GADGETRON_INCLUDE_DIR}
-     ${ISMRMRD_INCLUDE_DIR}
-	 ${XSD_INCLUDE_DIR})
-
-LINK_DIRECTORIES(${GADGETRON_LIB_DIR})
-
-ADD_LIBRARY(gadgetronEXAMPLELIB SHARED <<CPPFILES>> )
-
-TARGET_LINK_LIBRARIES(gadgetronEXAMPLELIB 
-                      cpucore
-                      ${ISMRMRD_LIBRARIES} ${ISMRMRD_XSD_LIBRARIES}
-                      optimized ${ACE_LIBRARIES} 
-                      debug ${ACE_DEBUG_LIBRARY})
-
-INSTALL(TARGETS gadgetronEXAMPLELIB DESTINATION lib)
-
-#INSTALL(FILES <<XMLFILES>> DESTINATION config)
-#INSTALL (FILES <<HEADERFILES>> gadgetronEXAMPLELIB_export.h DESTINATION include)
diff --git a/apps/gadgetron/templates/gadgetronEXAMPLELIB_export.h b/apps/gadgetron/templates/gadgetronEXAMPLELIB_export.h
deleted file mode 100644
index e5b5810..0000000
--- a/apps/gadgetron/templates/gadgetronEXAMPLELIB_export.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * gadgetronEXAMPLELIB_export.h
- *
- */
-
-#ifndef GADGETRONEXAMPLELIB_EXPORT_H_
-#define GADGETRONEXAMPLELIB_EXPORT_H_
-
-
-#if defined (WIN32)
-#if defined (__BUILD_GADGETRON_EXAMPLELIB__) || defined (gadgetronEXAMPLELIB_EXPORTS)
-#define EXPORTGADGETSEXAMPLELIB __declspec(dllexport)
-#else
-#define EXPORTGADGETSEXAMPLELIB __declspec(dllimport)
-#endif
-#else
-#define EXPORTGADGETSEXAMPLELIB
-#endif
-
-
-#endif /* GADGETRONEXAMPLELIB_EXPORT_H_ */
diff --git a/apps/gadgetron/webapp/CMakeLists.txt b/apps/gadgetron/webapp/CMakeLists.txt
new file mode 100644
index 0000000..ecac768
--- /dev/null
+++ b/apps/gadgetron/webapp/CMakeLists.txt
@@ -0,0 +1,20 @@
+
+if (GADGETRON_PERFORM_PACKAGING)
+	configure_file("gadgetron_web_app.in" ${CMAKE_BINARY_DIR}/gadgetron_web_app.cfg @ONLY)
+	configure_file("gadgetron_web.conf.in" ${CMAKE_BINARY_DIR}/gadgetron_web.conf @ONLY)
+	configure_file("gadgetron_web_ld.conf.in" ${CMAKE_BINARY_DIR}/gadgetron_web_ld.conf @ONLY)
+
+	if (WIN32)
+	    install(FILES gadgetron_web_app.py DESTINATION bin)
+	    install(FILES ${CMAKE_BINARY_DIR}/gadgetron_web_app.cfg DESTINATION config)
+	else (WIN32)
+	    #install(FILES ${CMAKE_BINARY_DIR}/gadgetron_web.conf DESTINATION /etc/init COMPONENT web)
+	    #install(FILES ${CMAKE_BINARY_DIR}/gadgetron_web_ld.conf DESTINATION /etc/ld.so.conf.d COMPONENT web)
+	    install(FILES gadgetron_web_app.py DESTINATION bin COMPONENT web)
+	    install(FILES ${CMAKE_BINARY_DIR}/gadgetron_web_app.cfg DESTINATION config COMPONENT web)
+	endif (WIN32)
+
+    message("Add gadgetron_web_info ...")
+	add_executable(gadgetron_web_info main.cpp)
+        install(TARGETS gadgetron_web_info DESTINATION bin COMPONENT web)
+endif (GADGETRON_PERFORM_PACKAGING)
diff --git a/apps/gadgetron/webapp/gadgetron_web.conf.in b/apps/gadgetron/webapp/gadgetron_web.conf.in
new file mode 100644
index 0000000..4b210f7
--- /dev/null
+++ b/apps/gadgetron/webapp/gadgetron_web.conf.in
@@ -0,0 +1,16 @@
+description     "Foobar management daemon"
+author          "Alex Smith"
+
+start on started network
+start on started startup
+stop on stopping network
+stop on starting shutdown
+
+console output
+kill signal INT
+
+exec su -c "python @CMAKE_INSTALL_PREFIX@/bin/gadgetron_web_app.py @CMAKE_INSTALL_PREFIX@/config/gadgetron_web_app.cfg" gadgetron
+
+respawn
+
+respawn
diff --git a/apps/gadgetron/webapp/gadgetron_web_app.cfg b/apps/gadgetron/webapp/gadgetron_web_app.cfg
index 03c8726..0eb5f3a 100644
--- a/apps/gadgetron/webapp/gadgetron_web_app.cfg
+++ b/apps/gadgetron/webapp/gadgetron_web_app.cfg
@@ -4,5 +4,5 @@ port=8090
 [GADGETRON]
 port=9002
 GADGETRON_HOME=/usr/local/gadgetron
-ISMRMRD_HOME=/usr/local/ismrmrd
+ISMRMRD_HOME=/usr/local
 logfile=/tmp/gadgetron.log
\ No newline at end of file
diff --git a/apps/gadgetron/webapp/gadgetron_web_app.in b/apps/gadgetron/webapp/gadgetron_web_app.in
new file mode 100644
index 0000000..ccc0fe5
--- /dev/null
+++ b/apps/gadgetron/webapp/gadgetron_web_app.in
@@ -0,0 +1,8 @@
+[WEBSERVER]
+port=8090
+
+[GADGETRON]
+port=9002
+GADGETRON_HOME=@CMAKE_INSTALL_PREFIX@
+ISMRMRD_HOME=@ISMRMRD_INCLUDE_DIR@/..
+logfile=/tmp/gadgetron.log
\ No newline at end of file
diff --git a/apps/gadgetron/webapp/gadgetron_web_app.py b/apps/gadgetron/webapp/gadgetron_web_app.py
index 9b98ae8..be57309 100644
--- a/apps/gadgetron/webapp/gadgetron_web_app.py
+++ b/apps/gadgetron/webapp/gadgetron_web_app.py
@@ -11,6 +11,7 @@ import threading
 import signal
 import psutil
 import inspect
+import socket;
 
 run_gadgetron_check = True
 
@@ -29,7 +30,12 @@ def termsignal(signal, frame):
     reactor.stop()
 
 def isGadgetronAlive(port,environment):
-    process = subprocess.Popen(["gt_alive","localhost",str(port)], env=environment)
+    try:
+        hostname = socket.gethostbyname(socket.gethostname())
+    except:
+        hostname = "127.0.0.1"
+
+    process = subprocess.Popen(["gt_alive",hostname,str(port)], env=environment)
     
     time.sleep(1)
     ret = process.poll()
diff --git a/apps/gadgetron/webapp/gadgetron_web_ld.conf.in b/apps/gadgetron/webapp/gadgetron_web_ld.conf.in
new file mode 100644
index 0000000..26c2dc1
--- /dev/null
+++ b/apps/gadgetron/webapp/gadgetron_web_ld.conf.in
@@ -0,0 +1,2 @@
+ at CMAKE_INSTALL_PREFIX@/../lib
+ at CMAKE_INSTALL_PREFIX@/lib
\ No newline at end of file
diff --git a/apps/gadgetron/webapp/main.cpp b/apps/gadgetron/webapp/main.cpp
new file mode 100644
index 0000000..f5525c2
--- /dev/null
+++ b/apps/gadgetron/webapp/main.cpp
@@ -0,0 +1,8 @@
+
+#include <iostream>
+#include <string>
+
+int main(int argc, char *argv[])
+{
+    return 0;
+}
diff --git a/apps/standalone/CMakeLists.txt b/apps/standalone/CMakeLists.txt
index 1bb04a3..62e606b 100644
--- a/apps/standalone/CMakeLists.txt
+++ b/apps/standalone/CMakeLists.txt
@@ -1,3 +1,9 @@
+if (MKL_FOUND)
+    INCLUDE_DIRECTORIES( ${MKL_INCLUDE_DIR} )
+    LINK_DIRECTORIES( ${MKL_LIB_DIR} ${MKL_COMPILER_LIB_DIR} )
+    link_libraries(${MKL_LIBRARIES})
+endif (MKL_FOUND)
+
 if (ARMADILLO_FOUND)
   add_subdirectory(cpu)
 endif (ARMADILLO_FOUND)
diff --git a/apps/standalone/cpu/CMakeLists.txt b/apps/standalone/cpu/CMakeLists.txt
index a00ffab..4a7c00e 100644
--- a/apps/standalone/cpu/CMakeLists.txt
+++ b/apps/standalone/cpu/CMakeLists.txt
@@ -7,14 +7,25 @@ include_directories(
     ${CMAKE_SOURCE_DIR}/toolboxes/core
     ${CMAKE_SOURCE_DIR}/toolboxes/core/gpu
     ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/image
     ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/hostutils
-    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/arma_math
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/math
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/image
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/algorithm
     ${CMAKE_SOURCE_DIR}/toolboxes/operators
     ${CMAKE_SOURCE_DIR}/toolboxes/operators/cpu
     ${CMAKE_SOURCE_DIR}/toolboxes/solvers
     ${CMAKE_SOURCE_DIR}/toolboxes/solvers/cpu
     ${CMAKE_SOURCE_DIR}/gadgets/core
     ${CMAKE_SOURCE_DIR}/toolboxes/registration/optical_flow
+    ${CMAKE_SOURCE_DIR}/toolboxes/registration/optical_flow
+    ${CMAKE_SOURCE_DIR}/toolboxes/registration/optical_flow/cpu
+    ${CMAKE_SOURCE_DIR}/toolboxes/registration/optical_flow/cpu/transformation
+    ${CMAKE_SOURCE_DIR}/toolboxes/registration/optical_flow/cpu/solver
+    ${CMAKE_SOURCE_DIR}/toolboxes/registration/optical_flow/cpu/warper
+    ${CMAKE_SOURCE_DIR}/toolboxes/registration/optical_flow/cpu/dissimilarity
+    ${CMAKE_SOURCE_DIR}/toolboxes/registration/optical_flow/cpu/register
+    ${CMAKE_SOURCE_DIR}/toolboxes/registration/optical_flow/cpu/application
     ${CMAKE_SOURCE_DIR}/toolboxes/gtplus
     ${CMAKE_SOURCE_DIR}/toolboxes/gtplus/util
     ${CMAKE_SOURCE_DIR}/toolboxes/gtplus/workflow
@@ -32,4 +43,7 @@ include_directories(
 add_subdirectory(denoising)
 #add_subdirectory(deblurring)
 add_subdirectory(registration)
-add_subdirectory(gtplus)
\ No newline at end of file
+
+if(ISMRMRD_FOUND)
+  add_subdirectory(gtplus)
+endif()
diff --git a/apps/standalone/cpu/denoising/2d/CMakeLists.txt b/apps/standalone/cpu/denoising/2d/CMakeLists.txt
index ed75345..f7a6baf 100644
--- a/apps/standalone/cpu/denoising/2d/CMakeLists.txt
+++ b/apps/standalone/cpu/denoising/2d/CMakeLists.txt
@@ -1,25 +1,26 @@
 if (WIN32)
-ADD_DEFINITIONS(-D_USE_MATH_DEFINES)
+    ADD_DEFINITIONS(-D_USE_MATH_DEFINES)
 endif (WIN32)
 
 include_directories( 
-  ${CMAKE_SOURCE_DIR}/toolboxes/core 
-  ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu 
-  ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/arma_math 
-  ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/hostutils
-  ${CMAKE_SOURCE_DIR}/toolboxes/solvers
-  ${CMAKE_SOURCE_DIR}/toolboxes/solvers/cpu
-  ${CMAKE_SOURCE_DIR}/toolboxes/operators
-  ${CMAKE_SOURCE_DIR}/toolboxes/operators/cpu
-  )
+                    ${CMAKE_SOURCE_DIR}/toolboxes/core 
+                    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu 
+                    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/image
+                    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/math 
+                    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/hostutils
+                    ${CMAKE_SOURCE_DIR}/toolboxes/solvers
+                    ${CMAKE_SOURCE_DIR}/toolboxes/solvers/cpu
+                    ${CMAKE_SOURCE_DIR}/toolboxes/operators
+                    ${CMAKE_SOURCE_DIR}/toolboxes/operators/cpu
+                    ${ACE_INCLUDE_DIR}
+                    ${ISMRMRD_INCLUDE_DIR} )
 
 add_executable(cpu_denoise_TV denoise_TV.cpp)
 
 target_link_libraries(cpu_denoise_TV 
-  cpucore 
-  cpucore_math 
-  hostutils
-  ${ARMADILLO_LIBRARIES}
-  )
+                    gadgetron_toolbox_cpucore 
+                    gadgetron_toolbox_cpucore_math 
+                    gadgetron_toolbox_hostutils
+                    ${ARMADILLO_LIBRARIES} )
 
-install(TARGETS cpu_denoise_TV DESTINATION bin)
+install(TARGETS cpu_denoise_TV DESTINATION bin COMPONENT main)
diff --git a/apps/standalone/cpu/gtplus/CMakeLists.txt b/apps/standalone/cpu/gtplus/CMakeLists.txt
index 7bab6b2..c40a030 100644
--- a/apps/standalone/cpu/gtplus/CMakeLists.txt
+++ b/apps/standalone/cpu/gtplus/CMakeLists.txt
@@ -6,14 +6,21 @@ if (MATLAB_FOUND)
 
     SET(CMAKE_DEBUG_POSTFIX)
 
-    include_directories( ${MATLAB_INCLUDE_DIR}  ${ISMRMRD_INCLUDE_DIR} )
+    include_directories( 
+        ${CMAKE_SOURCE_DIR}/toolboxes/fft/cpu
+        ${ACE_INCLUDE_DIR}
+        ${MATLAB_INCLUDE_DIR}  
+        ${ISMRMRD_INCLUDE_DIR} 
+    )
 
     link_directories(${Boost_LIBRARY_DIRS})
     link_libraries(${MATLAB_LIBRARIES} 
                     optimized ${ACE_LIBRARIES} debug ${ACE_DEBUG_LIBRARY} 
-                    gtplus 
-                    cpucore 
-                    cpucore_math)
+                    ${ISMRMRD_LIBRARIES} 
+                    gadgetron_toolbox_gtplus 
+                    gadgetron_toolbox_cpucore 
+                    gadgetron_toolbox_cpucore_math
+                    gadgetron_toolbox_cpureg )
 
     if (WIN32)
         if ( HAS_64_BIT )
@@ -37,19 +44,17 @@ if (MATLAB_FOUND)
         endif ( HAS_64_BIT )
     endif(APPLE)
 
-    if ( MKL_FOUND )
+    if ( MKL_FOUND AND FFTW3_FOUND )
 
-        # coil map 2D
-        add_library(Matlab_compute_coil_map_2D SHARED Matlab_compute_coil_map_2D.cpp)
-        SET_TARGET_PROPERTIES(Matlab_compute_coil_map_2D PROPERTIES SUFFIX ${MATLAB_SUFFIX})
-        install(TARGETS Matlab_compute_coil_map_2D DESTINATION bin )
+        add_library(Matlab_gt_read_analyze SHARED Matlab_gt_read_analyze.cpp)
+        SET_TARGET_PROPERTIES(Matlab_gt_read_analyze PROPERTIES SUFFIX ${MATLAB_SUFFIX})
+        install(TARGETS Matlab_gt_read_analyze DESTINATION ${GADGETRON_INSTALL_MATLAB_PATH} COMPONENT main)
 
-        # coil map 3D
-        add_library(Matlab_compute_coil_map_3D SHARED Matlab_compute_coil_map_3D.cpp)
-        SET_TARGET_PROPERTIES(Matlab_compute_coil_map_3D PROPERTIES SUFFIX ${MATLAB_SUFFIX})
-        install(TARGETS Matlab_compute_coil_map_3D DESTINATION bin )
+        add_library(Matlab_gt_write_analyze SHARED Matlab_gt_write_analyze.cpp)
+        SET_TARGET_PROPERTIES(Matlab_gt_write_analyze PROPERTIES SUFFIX ${MATLAB_SUFFIX})
+        install(TARGETS Matlab_gt_write_analyze DESTINATION ${GADGETRON_INSTALL_MATLAB_PATH} COMPONENT main)
 
-    endif ( MKL_FOUND )
+    endif ( MKL_FOUND AND FFTW3_FOUND )
 
 else(MATLAB_FOUND)
     message("MATLAB NOT FOUND: matlab wrapper for gtplus toolbox will not be compiled.")
diff --git a/apps/standalone/cpu/gtplus/Matlab_gt_read_analyze.cpp b/apps/standalone/cpu/gtplus/Matlab_gt_read_analyze.cpp
new file mode 100644
index 0000000..7824c01
--- /dev/null
+++ b/apps/standalone/cpu/gtplus/Matlab_gt_read_analyze.cpp
@@ -0,0 +1,261 @@
+
+#include <matrix.h>
+#include <mat.h>
+#ifdef _WIN32
+    #include <mexGT.h>
+#else
+    #include <mex.h>
+#endif // _WIN32
+
+// Gadgetron includes
+#include "GadgetronCommon.h"
+#include "gtMatlab.h"
+#include "gtPlusISMRMRDReconUtil.h"
+#include "hoNDArray.h"
+#include "hoNDArray_fileio.h"
+#include "hoNDPoint.h"
+#include "hoNDImage.h"
+
+#include "gtMatlabConverter.h"
+#include "gtMatlabConverterComplex.h"
+
+#define MEXPRINTF(name) mexPrintf(#name);
+
+#define NIn 1
+#define NOut 2
+
+static void usage()
+{
+    using namespace std;
+    std::stringstream outs;
+
+    outs << "==============================================================================================" << endl;
+    outs << "Usage: Matlab_gt_read_analyze \n";
+    outs << "Read in the Gadgetron produced Analyze image file" << endl;
+    outs << "Support 2D/3D/4D/5D/6D images with short/float/double data types" << endl;
+    printAuthorInfo(outs);
+    outs << "1 Input paras:" << endl;
+    outs << '\t' << "filename   : file name of the analyze image, no .hdr or .img extension needed" << endl;
+
+    outs << "2 Output para:" << endl;
+    outs << '\t' << "data       : image data" << endl;
+    outs << '\t' << "header     : image header" << endl;
+    outs << "==============================================================================================" << endl;
+    outs << std::ends; 
+
+    std::string msg = outs.str();
+    mexPrintf("%s\n", msg.c_str() );
+}
+
+void mexFunction(int nlhs,mxArray *plhs[],int nrhs,const mxArray *prhs[])
+{
+    try
+    {
+        // ---------------------------------------------------------------
+        // consistency check
+        // ---------------------------------------------------------------    
+        if (nrhs != NIn) 
+        {
+            mexWarnMsgTxt("1 input arguments are required ...");
+            usage();
+            return;
+        }
+
+        if (nlhs < NOut )
+        {
+            mexWarnMsgTxt("2 output argument is required ...");
+            usage();
+            return;
+        }
+
+        using namespace Gadgetron;
+        using namespace Gadgetron::gtPlus;
+
+        Gadgetron::gtMatlabConverter<float> converterFloat;
+        Gadgetron::gtMatlabConverter<double> converterDouble;
+        Gadgetron::gtMatlabConverter<short> converterShort;
+
+        // ---------------------------------------------------------------
+        // input parameters
+        // ---------------------------------------------------------------    
+        // file name
+        std::string filename;
+        converterFloat.Matlab2Str(prhs[0], filename);
+
+        gtPlusIOAnalyze gt_io;
+
+        mxArray* aMx = NULL;
+        mxArray* aHeader = NULL;
+
+        try
+        {
+            hoNDImage<float, 2> data;
+            if ( gt_io.importImage(data, filename) )
+            {
+                converterFloat.hoNDImage2Matlab(data, aMx, aHeader);
+            }
+            else
+            {
+                hoNDImage<double, 2> data;
+                if ( gt_io.importImage(data, filename) )
+                {
+                    converterDouble.hoNDImage2Matlab(data, aMx, aHeader);
+                }
+                else
+                {
+                    hoNDImage<short, 2> data;
+                    if ( gt_io.importImage(data, filename) )
+                    {
+                        converterShort.hoNDImage2Matlab(data, aMx, aHeader);
+                    }
+                    else
+                    {
+                        throw("not 2D ... ");
+                    }
+                }
+            }
+        }
+        catch(...)
+        {
+            try
+            {
+                hoNDImage<float, 3> data;
+                if ( gt_io.importImage(data, filename) )
+                {
+                    converterFloat.hoNDImage2Matlab(data, aMx, aHeader);
+                }
+                else
+                {
+                    hoNDImage<double, 3> data;
+                    if ( gt_io.importImage(data, filename) )
+                    {
+                        converterDouble.hoNDImage2Matlab(data, aMx, aHeader);
+                    }
+                    else
+                    {
+                        hoNDImage<short, 3> data;
+                        if ( gt_io.importImage(data, filename) )
+                        {
+                            converterShort.hoNDImage2Matlab(data, aMx, aHeader);
+                        }
+                        else
+                        {
+                            throw("not 3D ... ");
+                        }
+                    }
+                }
+            }
+            catch(...)
+            {
+                try
+                {
+                    hoNDImage<float, 4> data;
+                    if ( gt_io.importImage(data, filename) )
+                    {
+                        converterFloat.hoNDImage2Matlab(data, aMx, aHeader);
+                    }
+                    else
+                    {
+                        hoNDImage<double, 4> data;
+                        if ( gt_io.importImage(data, filename) )
+                        {
+                            converterDouble.hoNDImage2Matlab(data, aMx, aHeader);
+                        }
+                        else
+                        {
+                            hoNDImage<short, 4> data;
+                            if ( gt_io.importImage(data, filename) )
+                            {
+                                converterShort.hoNDImage2Matlab(data, aMx, aHeader);
+                            }
+                            else
+                            {
+                                throw("not 4D ... ");
+                            }
+                        }
+                    }
+                }
+                catch(...)
+                {
+                    try
+                    {
+                        hoNDImage<float, 5> data;
+                        if ( gt_io.importImage(data, filename) )
+                        {
+                            converterFloat.hoNDImage2Matlab(data, aMx, aHeader);
+                        }
+                        else
+                        {
+                            hoNDImage<double, 5> data;
+                            if ( gt_io.importImage(data, filename) )
+                            {
+                                converterDouble.hoNDImage2Matlab(data, aMx, aHeader);
+                            }
+                            else
+                            {
+                                hoNDImage<short, 5> data;
+                                if ( gt_io.importImage(data, filename) )
+                                {
+                                    converterShort.hoNDImage2Matlab(data, aMx, aHeader);
+                                }
+                                else
+                                {
+                                    throw("not 5D ... ");
+                                }
+                            }
+                        }
+                    }
+                    catch(...)
+                    {
+                        try
+                        {
+                            hoNDImage<float, 6> data;
+                            if ( gt_io.importImage(data, filename) )
+                            {
+                                converterFloat.hoNDImage2Matlab(data, aMx, aHeader);
+                            }
+                            else
+                            {
+                                hoNDImage<double, 6> data;
+                                if ( gt_io.importImage(data, filename) )
+                                {
+                                    converterDouble.hoNDImage2Matlab(data, aMx, aHeader);
+                                }
+                                else
+                                {
+                                    hoNDImage<short, 6> data;
+                                    if ( gt_io.importImage(data, filename) )
+                                    {
+                                        converterShort.hoNDImage2Matlab(data, aMx, aHeader);
+                                    }
+                                    else
+                                    {
+                                        throw("not 6D ... ");
+                                    }
+                                }
+                            }
+                        }
+                        catch(...)
+                        {
+                            mexWarnMsgTxt("Images must be 2D/3D/4D/5D/6D ...");
+                            return;
+                        }
+                    }
+                }
+            }
+        }
+
+        // ---------------------------------------------------------------
+        // output parameter
+        // ---------------------------------------------------------------
+        plhs[0] = aMx;
+        plhs[1] = aHeader;
+    }
+    catch(...)
+    {
+        mexWarnMsgTxt("Exceptions happened in Matlab_gt_read_analyze(...) ...");
+        return;
+    }
+
+    return;
+}
diff --git a/apps/standalone/cpu/gtplus/Matlab_gt_write_analyze.cpp b/apps/standalone/cpu/gtplus/Matlab_gt_write_analyze.cpp
new file mode 100644
index 0000000..412e0c9
--- /dev/null
+++ b/apps/standalone/cpu/gtplus/Matlab_gt_write_analyze.cpp
@@ -0,0 +1,247 @@
+
+#include <matrix.h>
+#include <mat.h>
+#ifdef _WIN32
+    #include <mexGT.h>
+#else
+    #include <mex.h>
+#endif // _WIN32
+
+// Gadgetron includes
+#include "GadgetronCommon.h"
+#include "gtMatlab.h"
+#include "gtPlusISMRMRDReconUtil.h"
+#include "hoNDArray.h"
+#include "hoNDArray_fileio.h"
+#include "hoNDPoint.h"
+#include "hoNDImage.h"
+
+#include "gtMatlabConverter.h"
+#include "gtMatlabConverterComplex.h"
+
+#define MEXPRINTF(name) mexPrintf(#name);
+
+#define NIn 3
+#define NOut 0
+
+static void usage()
+{
+    using namespace std;
+    std::stringstream outs;
+
+    outs << "==============================================================================================" << endl;
+    outs << "Usage: Matlab_gt_write_analyze \n";
+    outs << "Write out the Gadgetron produced Analyze image file" << endl;
+    outs << "Support 2D/3D/4D/5D/6D images with short/float/double data types" << endl;
+    printAuthorInfo(outs);
+    outs << "3 Input paras:" << endl;
+    outs << '\t' << "data       : image data" << endl;
+    outs << '\t' << "header     : image header" << endl;
+    outs << '\t' << "filename   : file name of the analyze image, no .hdr or .img extension needed" << endl;
+
+    outs << "0 Output para" << endl;
+    outs << "==============================================================================================" << endl;
+    outs << std::ends; 
+
+    std::string msg = outs.str();
+    mexPrintf("%s\n", msg.c_str() );
+}
+
+void mexFunction(int nlhs,mxArray *plhs[],int nrhs,const mxArray *prhs[])
+{
+    try
+    {
+        // ---------------------------------------------------------------
+        // consistency check
+        // ---------------------------------------------------------------    
+        if (nrhs != NIn) 
+        {
+            mexWarnMsgTxt("3 input arguments are required ...");
+            usage();
+            return;
+        }
+
+        using namespace Gadgetron;
+        using namespace Gadgetron::gtPlus;
+
+        Gadgetron::gtMatlabConverter<float> converterFloat;
+        Gadgetron::gtMatlabConverter<double> converterDouble;
+        Gadgetron::gtMatlabConverter<short> converterShort;
+
+        // ---------------------------------------------------------------
+        // input parameters
+        // ---------------------------------------------------------------    
+        gtPlusIOAnalyze gt_io;
+
+        const mxArray* aMx = prhs[0];
+        const mxArray* aHeader = prhs[1];
+
+        std::string filename;
+        converterFloat.Matlab2Str(prhs[2], filename);
+
+        try
+        {
+            hoNDImage<float, 2> data;
+            if ( converterFloat.Matlab2hoNDImage(aMx, aHeader, data) )
+            {
+                gt_io.exportImage(data, filename);
+            }
+            else
+            {
+                hoNDImage<double, 2> data;
+                if ( converterDouble.Matlab2hoNDImage(aMx, aHeader, data) )
+                {
+                    gt_io.exportImage(data, filename);
+                }
+                else
+                {
+                    hoNDImage<short, 2> data;
+                    if ( converterShort.Matlab2hoNDImage(aMx, aHeader, data) )
+                    {
+                        gt_io.exportImage(data, filename);
+                    }
+                    else
+                    {
+                        throw("not 2D ... ");
+                    }
+                }
+            }
+        }
+        catch(...)
+        {
+            try
+            {
+                hoNDImage<float, 3> data;
+                if ( converterFloat.Matlab2hoNDImage(aMx, aHeader, data) )
+                {
+                    gt_io.exportImage(data, filename);
+                }
+                else
+                {
+                    hoNDImage<double, 3> data;
+                    if ( converterDouble.Matlab2hoNDImage(aMx, aHeader, data) )
+                    {
+                        gt_io.exportImage(data, filename);
+                    }
+                    else
+                    {
+                        hoNDImage<short, 3> data;
+                        if ( converterShort.Matlab2hoNDImage(aMx, aHeader, data) )
+                        {
+                            gt_io.exportImage(data, filename);
+                        }
+                        else
+                        {
+                            throw("not 3D ... ");
+                        }
+                    }
+                }
+            }
+            catch(...)
+            {
+                try
+                {
+                    hoNDImage<float, 4> data;
+                    if ( converterFloat.Matlab2hoNDImage(aMx, aHeader, data) )
+                    {
+                        gt_io.exportImage(data, filename);
+                    }
+                    else
+                    {
+                        hoNDImage<double, 4> data;
+                        if ( converterDouble.Matlab2hoNDImage(aMx, aHeader, data) )
+                        {
+                            gt_io.exportImage(data, filename);
+                        }
+                        else
+                        {
+                            hoNDImage<short, 4> data;
+                            if ( converterShort.Matlab2hoNDImage(aMx, aHeader, data) )
+                            {
+                                gt_io.exportImage(data, filename);
+                            }
+                            else
+                            {
+                                throw("not 4D ... ");
+                            }
+                        }
+                    }
+                }
+                catch(...)
+                {
+                    try
+                    {
+                        hoNDImage<float, 5> data;
+                        if ( converterFloat.Matlab2hoNDImage(aMx, aHeader, data) )
+                        {
+                            gt_io.exportImage(data, filename);
+                        }
+                        else
+                        {
+                            hoNDImage<double, 5> data;
+                            if ( converterDouble.Matlab2hoNDImage(aMx, aHeader, data) )
+                            {
+                                gt_io.exportImage(data, filename);
+                            }
+                            else
+                            {
+                                hoNDImage<short, 5> data;
+                                if ( converterShort.Matlab2hoNDImage(aMx, aHeader, data) )
+                                {
+                                    gt_io.exportImage(data, filename);
+                                }
+                                else
+                                {
+                                    throw("not 5D ... ");
+                                }
+                            }
+                        }
+                    }
+                    catch(...)
+                    {
+                        try
+                        {
+                            hoNDImage<float, 6> data;
+                            if ( converterFloat.Matlab2hoNDImage(aMx, aHeader, data) )
+                            {
+                                gt_io.exportImage(data, filename);
+                            }
+                            else
+                            {
+                                hoNDImage<double, 6> data;
+                                if ( converterDouble.Matlab2hoNDImage(aMx, aHeader, data) )
+                                {
+                                    gt_io.exportImage(data, filename);
+                                }
+                                else
+                                {
+                                    hoNDImage<short, 6> data;
+                                    if ( converterShort.Matlab2hoNDImage(aMx, aHeader, data) )
+                                    {
+                                        gt_io.exportImage(data, filename);
+                                    }
+                                    else
+                                    {
+                                        throw("not 6D ... ");
+                                    }
+                                }
+                            }
+                        }
+                        catch(...)
+                        {
+                            mexWarnMsgTxt("Images must be 2D/3D/4D/5D/6D ...");
+                            return;
+                        }
+                    }
+                }
+            }
+        }
+    }
+    catch(...)
+    {
+        mexWarnMsgTxt("Exceptions happened in Matlab_gt_write_analyze(...) ...");
+        return;
+    }
+
+    return;
+}
diff --git a/apps/standalone/cpu/registration/2d/CMakeLists.txt b/apps/standalone/cpu/registration/2d/CMakeLists.txt
index e476fb4..f385828 100644
--- a/apps/standalone/cpu/registration/2d/CMakeLists.txt
+++ b/apps/standalone/cpu/registration/2d/CMakeLists.txt
@@ -1,26 +1,32 @@
+include_directories( 
+        ${CMAKE_SOURCE_DIR}/toolboxes/fft/cpu
+        ${ACE_INCLUDE_DIR}
+        ${ISMRMRD_INCLUDE_DIR}
+    )
+
 add_executable(register_HS_2d_cpu register_HS_2d.cpp)
 add_executable(register_CK_2d_cpu register_CK_2d.cpp)
 
 target_link_libraries(register_HS_2d_cpu 
-  hostutils 
-  cpureg 
-  cpucore 
-  cpucore_math
+  gadgetron_toolbox_hostutils 
+  gadgetron_toolbox_cpureg 
+  gadgetron_toolbox_cpucore 
+  gadgetron_toolbox_cpucore_math
   ${ARMADILLO_LIBRARIES}
   )
 
 target_link_libraries(register_CK_2d_cpu 
-  hostutils 
-  cpureg 
-  cpucore
-  cpucore_math
+  gadgetron_toolbox_hostutils 
+  gadgetron_toolbox_cpureg 
+  gadgetron_toolbox_cpucore
+  gadgetron_toolbox_cpucore_math
   ${ARMADILLO_LIBRARIES}
   )
 
 install(TARGETS 
   register_HS_2d_cpu
   register_CK_2d_cpu 
-  DESTINATION bin)
+  DESTINATION bin COMPONENT main)
 
 # matlab wrapper
 if (MATLAB_FOUND)
@@ -29,11 +35,11 @@ if (MATLAB_FOUND)
   if (WIN32)
     include_directories( ${MATLAB_INCLUDE_DIR} )
     add_library(Matlab_register_CK_2d_cpu SHARED Matlab_register_CK_2d.cpp)
-    target_link_libraries(Matlab_register_CK_2d_cpu ${MATLAB_LIBRARIES} hostutils cpureg cpucore cpucore_math)
+    target_link_libraries(Matlab_register_CK_2d_cpu ${MATLAB_LIBRARIES} gadgetron_toolbox_hostutils gadgetron_toolbox_cpureg gadgetron_toolbox_cpucore gadgetron_toolbox_cpucore_math)
     if ( HAS_64_BIT )				
       SET_TARGET_PROPERTIES(Matlab_register_CK_2d_cpu PROPERTIES SUFFIX .mexw64)
     endif ( HAS_64_BIT )    
-    install(TARGETS Matlab_register_CK_2d_cpu DESTINATION bin )
+    install(TARGETS Matlab_register_CK_2d_cpu DESTINATION ${GADGETRON_INSTALL_MATLAB_PATH} COMPONENT main)
   endif (WIN32)
 else(MATLAB_FOUND)
   message("Matlab not found. Matlab wrapper for registration toolbox will not be compiled.")
diff --git a/apps/standalone/cpu/registration/3d/CMakeLists.txt b/apps/standalone/cpu/registration/3d/CMakeLists.txt
index 0053c52..fe3e261 100644
--- a/apps/standalone/cpu/registration/3d/CMakeLists.txt
+++ b/apps/standalone/cpu/registration/3d/CMakeLists.txt
@@ -1,11 +1,11 @@
 add_executable(register_CK_3d_cpu register_CK_3d.cpp)
 
 target_link_libraries(register_CK_3d_cpu
-  hostutils 
-  cpureg 
-  cpucore 
-  cpucore_math
+  gadgetron_toolbox_hostutils 
+  gadgetron_toolbox_cpureg 
+  gadgetron_toolbox_cpucore 
+  gadgetron_toolbox_cpucore_math
   ${ARMADILLO_LIBRARIES}
   )
 
-install(TARGETS register_CK_3d_cpu DESTINATION bin)
+install(TARGETS register_CK_3d_cpu DESTINATION bin COMPONENT main)
diff --git a/apps/standalone/cpu/registration/CMakeLists.txt b/apps/standalone/cpu/registration/CMakeLists.txt
index 79e23c9..89f0e6e 100644
--- a/apps/standalone/cpu/registration/CMakeLists.txt
+++ b/apps/standalone/cpu/registration/CMakeLists.txt
@@ -1,6 +1,9 @@
 include_directories(
   ${CMAKE_SOURCE_DIR}/toolboxes/registration/optical_flow
   ${CMAKE_SOURCE_DIR}/toolboxes/registration/optical_flow/cpu
+  ${CMAKE_SOURCE_DIR}/toolboxes/fft/cpu
+  ${ACE_INCLUDE_DIR}
+  ${ISMRMRD_INCLUDE_DIR}
   )
 
 if(${ARMADILLO_VERSION_STRING} VERSION_GREATER "3.819" )
diff --git a/apps/standalone/gpu/CMakeLists.txt b/apps/standalone/gpu/CMakeLists.txt
index c66f3f6..6cb2cf3 100644
--- a/apps/standalone/gpu/CMakeLists.txt
+++ b/apps/standalone/gpu/CMakeLists.txt
@@ -1,19 +1,22 @@
 include_directories( 
-  ${CUDA_INCLUDE_DIRS}
-  ${Boost_INCLUDE_DIR} 
-  ${ACE_INCLUDE_DIR}
-  ${CMAKE_SOURCE_DIR}/toolboxes/core 
-  ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu
-  ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/hostutils
-  ${CMAKE_SOURCE_DIR}/toolboxes/core/gpu 
-  ${CMAKE_SOURCE_DIR}/toolboxes/nfft/gpu
-  ${CMAKE_SOURCE_DIR}/toolboxes/operators
-  ${CMAKE_SOURCE_DIR}/toolboxes/operators/gpu
-  ${CMAKE_SOURCE_DIR}/toolboxes/solvers
-  ${CMAKE_SOURCE_DIR}/toolboxes/solvers/gpu
-  )
+	${CMAKE_SOURCE_DIR}/toolboxes/core 
+	${CMAKE_SOURCE_DIR}/toolboxes/core/cpu
+	${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/image
+	${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/hostutils
+	${CMAKE_SOURCE_DIR}/toolboxes/core/gpu 
+    ${CMAKE_SOURCE_DIR}/toolboxes/fft/gpu
+	${CMAKE_SOURCE_DIR}/toolboxes/nfft/gpu
+	${CMAKE_SOURCE_DIR}/toolboxes/operators
+	${CMAKE_SOURCE_DIR}/toolboxes/operators/gpu
+	${CMAKE_SOURCE_DIR}/toolboxes/solvers
+	${CMAKE_SOURCE_DIR}/toolboxes/solvers/gpu
+	${CUDA_INCLUDE_DIRS}
+	${Boost_INCLUDE_DIR} 
+	${ISMRMRD_INCLUDE_DIR} 
+	)
 
 add_subdirectory(mri)
+add_subdirectory(ct)
 add_subdirectory(denoising)
 add_subdirectory(deblurring)
 add_subdirectory(registration)
diff --git a/apps/standalone/gpu/ct/CMakeLists.txt b/apps/standalone/gpu/ct/CMakeLists.txt
new file mode 100644
index 0000000..77738f0
--- /dev/null
+++ b/apps/standalone/gpu/ct/CMakeLists.txt
@@ -0,0 +1,3 @@
+if(HDF5_FOUND)
+  add_subdirectory(xray)
+endif()
diff --git a/apps/standalone/gpu/ct/xray/CBCT_forwards_projection.cpp b/apps/standalone/gpu/ct/xray/CBCT_forwards_projection.cpp
new file mode 100644
index 0000000..80abb17
--- /dev/null
+++ b/apps/standalone/gpu/ct/xray/CBCT_forwards_projection.cpp
@@ -0,0 +1,252 @@
+#include "parameterparser.h"
+#include "CBCT_acquisition.h"
+#include "CBCT_binning.h"
+#include "hoCuConebeamProjectionOperator.h"
+#include "hoNDArray_fileio.h"
+#include "vector_td_utilities.h"
+#include "GPUTimer.h"
+#include "setup_grid.h"
+#include "cuNDArray_utils.h"
+
+#include <iostream>
+#include <algorithm>
+#include <sstream>
+
+using namespace Gadgetron;
+using namespace std;
+
+// Utility to load offsets - if file is provided - from an HDF5 file
+//
+
+std::vector<floatd2> 
+get_offsets( std::string filename )
+{  
+  hsize_t dim;
+  
+  hid_t file_id = H5Fopen (filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);  
+  
+  herr_t errCode = H5LTget_dataset_info(file_id,"/offsetx",&dim,NULL,NULL);
+  if (errCode < 0) 
+    throw std::runtime_error("Error getting /offsetx dataset info from file.");
+
+  std::vector<float> offsets_x = std::vector<float>(dim,0.0f);
+  errCode=H5LTread_dataset (file_id, "/offsetx", H5T_NATIVE_FLOAT, &offsets_x[0]);
+  if (errCode < 0)
+    throw std::runtime_error("Error reading /offsetx from file.");
+  
+  errCode=H5LTget_dataset_info(file_id,"/offsety",&dim,NULL,NULL);
+  if (errCode < 0)
+    throw std::runtime_error("Error getting /offsety dataset info from file.");
+
+  std::vector<float> offsets_y = std::vector<float>(dim,0.0f);
+
+  errCode = H5LTread_dataset (file_id, "/offsety", H5T_NATIVE_FLOAT, &offsets_y[0]);
+  if (errCode < 0)
+    throw std::runtime_error("Error reading /offsety from file.");
+  
+  if( offsets_x.size() != offsets_y.size() ){
+    throw std::runtime_error("CBCT_geometry::load : x/y offset arrays has different lengths");
+  }
+
+  std::vector<floatd2> res;
+  for( unsigned int i=0; i<offsets_x.size(); i++ )
+    res.push_back(floatd2( offsets_x[i], offsets_y[i]));
+  
+  return res;
+}
+
+int main(int argc, char** argv) 
+{ 
+  // Parse command line
+  //
+
+  ParameterParser parms(1024);
+  parms.add_parameter( 'd', COMMAND_LINE_STRING, 1, "Input volume filename (.real)", true );
+  parms.add_parameter( 'b', COMMAND_LINE_STRING, 1, "Binning filename (.h5)", false );
+  parms.add_parameter( 'o', COMMAND_LINE_STRING, 1, "Offsets filename (.h5)", false );
+  parms.add_parameter( 'r', COMMAND_LINE_STRING, 1, "Output projections filename (.real)", true, "projections_simulated.real" );
+  parms.add_parameter( 'h', COMMAND_LINE_STRING, 1, "Output acquisition filename (.h5)", true, "acquisition_simulated.h5" );
+  parms.add_parameter( 'f', COMMAND_LINE_FLOAT, 3, "Input volume FOV in mm (3d)", true, "448, 448, 252" );
+  parms.add_parameter( 'p', COMMAND_LINE_FLOAT, 2, "Projection plate size in pixels (2d)", true, "512, 256" );
+  parms.add_parameter( 'q', COMMAND_LINE_FLOAT, 2, "Projection plate FOV in mm (2d)", true, "800.0, 400.0" );
+  parms.add_parameter( 'a', COMMAND_LINE_FLOAT, 1, "SAD", true, "1000.0" );
+  parms.add_parameter( 's', COMMAND_LINE_FLOAT, 1, "SDD", true, "1500.0" );
+  parms.add_parameter( 'u', COMMAND_LINE_FLOAT, 1, "Initial angle (degrees)", true, "0.0" );
+  parms.add_parameter( 'v', COMMAND_LINE_FLOAT, 1, "Angular spacing (degrees)", true, "0.5" );
+  parms.add_parameter( 'w', COMMAND_LINE_INT, 1, "Number of projections", true, "720" );
+  parms.add_parameter( 'P', COMMAND_LINE_INT, 1, "Projections per batch", false );
+  parms.add_parameter( 'S', COMMAND_LINE_FLOAT, 1, "Samples per pixel (float) in integral", false );
+  
+  parms.parse_parameter_list(argc, argv);
+  if( parms.all_required_parameters_set() ) {
+    parms.print_parameter_list();
+  }
+  else{
+    parms.print_parameter_list();
+    parms.print_usage();
+    return 1;
+  }
+  
+  std::string image_filename = (char*)parms.get_parameter('d')->get_string_value();
+  std::string projections_filename = (char*)parms.get_parameter('r')->get_string_value();
+  std::string acquisition_filename = (char*)parms.get_parameter('h')->get_string_value();
+  
+  // Load volume
+  //
+  
+  boost::shared_ptr< hoCuNDArray<float> > image(new hoCuNDArray<float>(*read_nd_array<float>( image_filename.c_str() )));
+  
+  if( image->get_number_of_dimensions() < 3 ){
+    std::cout << "Input image volume should have at least three dimensions" << std::endl;
+    exit(1);
+  }
+
+  // Add default temporal dimension of 1 since the operator only takes four-dimensional images
+  //
+
+  if( image->get_number_of_dimensions() == 3 ){
+    std::vector<size_t> dims = *image->get_dimensions();
+    dims.push_back(1);
+    image->reshape(&dims);
+  }
+
+  // Configuring...
+  //
+
+  uintd2 ps_dims_in_pixels( parms.get_parameter('p')->get_float_value(0), 
+			    parms.get_parameter('p')->get_float_value(1) );
+  
+  floatd2 ps_dims_in_mm( parms.get_parameter('q')->get_float_value(0),
+			 parms.get_parameter('q')->get_float_value(1) );
+
+  float SAD = parms.get_parameter('a')->get_float_value();
+  float SDD = parms.get_parameter('s')->get_float_value();
+
+  uintd3 is_dims_in_pixels ( image->get_size(0),
+			     image->get_size(1),
+			     image->get_size(2) );
+  
+  floatd3 is_dims_in_mm( parms.get_parameter('f')->get_float_value(0), 
+			 parms.get_parameter('f')->get_float_value(1), 
+			 parms.get_parameter('f')->get_float_value(2) );
+
+  float start_angle = parms.get_parameter('u')->get_float_value();
+  float angular_spacing = parms.get_parameter('v')->get_float_value();
+
+  unsigned int number_of_projections = parms.get_parameter('w')->get_int_value();
+
+  // Load or generate binning data
+  //
+  
+  boost::shared_ptr<CBCT_binning> binning( new CBCT_binning() );
+
+  if (parms.get_parameter('b')->get_is_set()){
+    std::string binningdata_filename = (char*)parms.get_parameter('b')->get_string_value();
+    std::cout << "Using binning data file: " << binningdata_filename << std::endl;
+    binning->load(binningdata_filename);
+
+    if( binning->get_maximum_projection_index() >= number_of_projections ) {
+      std::cout << "Maximum projection index in binning file (" << 
+	binning->get_maximum_projection_index() << 
+	") exceeds the number of projections requested at the command line (" << 
+	number_of_projections <<
+	")" << std::endl;
+      exit(1);
+    }
+  } 
+  else 
+    binning->set_as_default_3d_bin(number_of_projections);
+
+  binning->print();
+  
+  // Create projection angles array
+  //
+  
+  std::vector<float> angles;
+
+  for( unsigned int i=0; i<number_of_projections; i++ ){
+    float angle = start_angle + i*angular_spacing;
+    angles.push_back(angle);
+  }
+  
+  // Create projection offsets array
+  //
+
+  std::vector<floatd2> offsets;
+
+  if (parms.get_parameter('o')->get_is_set()){
+    std::string offsets_filename = (char*)parms.get_parameter('o')->get_string_value();
+    std::cout << "Using offsets file: " << offsets_filename << std::endl;
+    offsets = get_offsets(offsets_filename);
+  } 
+  else{
+    for( unsigned int i=0; i<number_of_projections; i++ ){
+      offsets.push_back(floatd2(0.0f));
+    } 
+  }   
+  
+  // Allocate and clear array to hold the result
+  //
+  
+  std::vector<size_t> ps_dims;
+  ps_dims.push_back(ps_dims_in_pixels[0]);
+  ps_dims.push_back(ps_dims_in_pixels[1]);
+  ps_dims.push_back(number_of_projections);
+
+  boost::shared_ptr< hoCuNDArray<float> > projections( new hoCuNDArray<float>(&ps_dims) );
+  clear(projections.get()); // Since the binning might not write to all projections
+
+  // Create geometry setup
+  //
+
+  boost::shared_ptr<CBCT_geometry> geometry( new CBCT_geometry() );
+  geometry->set_SAD(SAD);
+  geometry->set_SDD(SDD);
+  geometry->set_FOV(ps_dims_in_mm);
+  geometry->set_angles(angles);
+  geometry->set_offsets(offsets);
+
+  // Create acquisition setup
+  //
+
+  boost::shared_ptr<CBCT_acquisition> acquisition( new CBCT_acquisition() );
+  acquisition->set_geometry(geometry);
+  acquisition->set_projections(projections);
+
+  // Define conebeam projection operator
+  // - and configure based on input parameters
+  //
+
+  boost::shared_ptr< hoCuConebeamProjectionOperator > E( new hoCuConebeamProjectionOperator() );
+  
+  CommandLineParameter *parm = parms.get_parameter('P');
+  if( parm && parm->get_is_set() )
+    E->set_num_projections_per_batch( parm->get_int_value() );
+  
+  parm = parms.get_parameter('S');  
+  if( parm && parm->get_is_set() ) 
+    E->set_num_samples_per_pixel( parm->get_float_value() );
+  
+  E->setup( acquisition, binning, is_dims_in_mm );
+
+  // Initialize the device
+  // - just to report more accurate timings
+  //
+
+  cudaThreadSynchronize();
+
+  //
+  // Forwards projection (X-ray image simulation)
+  //
+  
+  {
+    GPUTimer timer("Running CBCT forwards projection");
+    E->mult_M( image.get(), projections.get() );
+    cudaThreadSynchronize();
+  }
+
+  write_nd_array<float>( projections.get(), projections_filename.c_str() );
+  acquisition->save( acquisition_filename );
+
+  return 0;
+}
diff --git a/apps/standalone/gpu/ct/xray/CBCT_reconstruct_CG.cpp b/apps/standalone/gpu/ct/xray/CBCT_reconstruct_CG.cpp
new file mode 100644
index 0000000..b789945
--- /dev/null
+++ b/apps/standalone/gpu/ct/xray/CBCT_reconstruct_CG.cpp
@@ -0,0 +1,204 @@
+#include "hoCuNDArray_utils.h"
+#include "radial_utilities.h"
+#include "hoNDArray_fileio.h"
+#include "cuNDArray.h"
+#include "imageOperator.h"
+#include "identityOperator.h"
+#include "hoPartialDerivativeOperator.h"
+#include "hoCuConebeamProjectionOperator.h"
+#include "cuConvolutionOperator.h"
+#include "hoCuIdentityOperator.h"
+#include "hoCuNDArray_math.h"
+#include "hoCuNDArray_blas.h"
+#include "hoCuCgSolver.h"
+#include "CBCT_acquisition.h"
+#include "complext.h"
+#include "encodingOperatorContainer.h"
+#include "vector_td_io.h"
+#include "hoCuPartialDerivativeOperator.h"
+#include "GPUTimer.h"
+
+#include <iostream>
+#include <algorithm>
+#include <sstream>
+#include <math_constants.h>
+#include <boost/program_options.hpp>
+
+using namespace std;
+using namespace Gadgetron;
+
+namespace po = boost::program_options;
+
+int 
+main(int argc, char** argv)
+{
+  string acquisition_filename;
+  string outputFile;
+  uintd3 imageSize;
+  floatd3 voxelSize;
+  float reg_weight;
+  int device;
+  unsigned int dump;
+  unsigned int downsamples;
+  unsigned int iterations;
+
+  po::options_description desc("Allowed options");
+  desc.add_options()
+    ("help", "produce help message")
+    ("acquisition,a", po::value<string>(&acquisition_filename)->default_value("acquisition.hdf5"), "Acquisition data")
+    ("samples,n",po::value<unsigned int>(),"Number of samples per ray")
+    ("output,f", po::value<string>(&outputFile)->default_value("reconstruction.real"), "Output filename")
+    ("size,s",po::value<uintd3>(&imageSize)->default_value(uintd3(512,512,1)),"Image size in pixels")
+    ("binning,b",po::value<string>(),"Binning file for 4d reconstruction")
+    ("SAG","Use exact SAG correction if present")
+    ("voxelSize,v",po::value<floatd3>(&voxelSize)->default_value(floatd3(0.488f,0.488f,1.0f)),"Voxel size in mm")
+    ("dimensions,d",po::value<floatd3>(),"Image dimensions in mm. Overwrites voxelSize.")
+    ("iterations,i",po::value<unsigned int>(&iterations)->default_value(10),"Number of iterations")
+    ("weight,w",po::value<float>(&reg_weight)->default_value(float(0.0f)),"Regularization weight")
+    ("device",po::value<int>(&device)->default_value(0),"Number of the device to use (0 indexed)")
+    ("downsample,D",po::value<unsigned int>(&downsamples)->default_value(0),"Downsample projections this factor")
+    ;
+  
+  po::variables_map vm;
+  po::store(po::parse_command_line(argc, argv, desc), vm);
+  po::notify(vm);
+
+  if (vm.count("help")) {
+    cout << desc << "\n";
+    return 1;
+  }
+  std::cout << "Command line options:" << std::endl;
+  for (po::variables_map::iterator it = vm.begin(); it != vm.end(); ++it){
+    boost::any a = it->second.value();
+    std::cout << it->first << ": ";
+    if (a.type() == typeid(std::string)) std::cout << it->second.as<std::string>();
+    else if (a.type() == typeid(int)) std::cout << it->second.as<int>();
+    else if (a.type() == typeid(unsigned int)) std::cout << it->second.as<unsigned int>();
+    else if (a.type() == typeid(float)) std::cout << it->second.as<float>();
+    else if (a.type() == typeid(vector_td<float,3>)) std::cout << it->second.as<vector_td<float,3> >();
+    else if (a.type() == typeid(vector_td<int,3>)) std::cout << it->second.as<vector_td<int,3> >();
+    else if (a.type() == typeid(vector_td<unsigned int,3>)) std::cout << it->second.as<vector_td<unsigned int,3> >();
+    else std::cout << "Unknown type" << std::endl;
+    std::cout << std::endl;
+  }
+  cudaSetDevice(device);
+  cudaDeviceReset();
+
+  //Really weird stuff. Needed to initialize the device?? Should find real bug.
+  cudaDeviceManager::Instance()->lockHandle();
+  cudaDeviceManager::Instance()->unlockHandle();
+
+  boost::shared_ptr<CBCT_acquisition> ps(new CBCT_acquisition());
+  ps->load(acquisition_filename);
+  ps->get_geometry()->print(std::cout);
+  ps->downsample(downsamples);
+
+  float SDD = ps->get_geometry()->get_SDD();
+  float SAD = ps->get_geometry()->get_SAD();
+
+  boost::shared_ptr<CBCT_binning> binning(new CBCT_binning());
+  if (vm.count("binning")){
+    std::cout << "Loading binning data" << std::endl;
+    binning->load(vm["binning"].as<string>());
+  } else 
+    binning->set_as_default_3d_bin(ps->get_projections()->get_size(2));
+
+  binning->print(std::cout);
+
+  floatd3 imageDimensions;
+  if (vm.count("dimensions")){
+    imageDimensions = vm["dimensions"].as<floatd3>();
+    voxelSize = imageDimensions/imageSize;
+  }
+  else imageDimensions = voxelSize*imageSize;
+
+  float lengthOfRay_in_mm = norm(imageDimensions);
+  unsigned int numSamplesPerPixel = 3;
+  float minSpacing = min(voxelSize)/numSamplesPerPixel;
+
+  unsigned int numSamplesPerRay;
+  if (vm.count("samples")) numSamplesPerRay = vm["samples"].as<unsigned int>();
+  else numSamplesPerRay = ceil( lengthOfRay_in_mm / minSpacing );
+
+  float step_size_in_mm = lengthOfRay_in_mm / numSamplesPerRay;
+  size_t numProjs = ps->get_projections()->get_size(2);
+  size_t needed_bytes = 2 * prod(imageSize) * sizeof(float);
+
+  std::vector<size_t> is_dims = to_std_vector((uint64d3)imageSize);
+  std::cout << "IS dimensions " << is_dims[0] << " " << is_dims[1] << " " << is_dims[2] << std::endl;
+  std::cout << "Image size " << imageDimensions << std::endl;
+
+  is_dims.push_back(binning->get_number_of_bins());
+
+  hoCuNDArray<float> projections(*ps->get_projections());
+
+  // Define encoding operator
+  boost::shared_ptr< hoCuConebeamProjectionOperator >
+    E( new hoCuConebeamProjectionOperator() );
+
+  E->setup(ps,binning,imageDimensions);
+  E->set_domain_dimensions(&is_dims);
+  E->set_codomain_dimensions(ps->get_projections()->get_dimensions().get());
+
+  if (E->get_use_offset_correction())
+    	E->offset_correct(&projections);
+
+  // Define regularization operator
+  boost::shared_ptr< hoCuIdentityOperator<float> >
+    I( new hoCuIdentityOperator<float>() );
+  
+  I->set_weight(reg_weight);
+
+  hoCuCgSolver<float> solver;
+
+  solver.set_encoding_operator(E);
+
+  if( reg_weight>0.0f ) {
+    std::cout << "Adding identity operator with weight " << reg_weight << std::endl;
+    solver.add_regularization_operator(I);
+  }
+
+  solver.set_max_iterations(iterations);
+  solver.set_tc_tolerance(1e-8);
+  solver.set_output_mode(hoCuCgSolver<float>::OUTPUT_VERBOSE);
+
+  /*  if (vm.count("TV")){
+    boost::shared_ptr<hoCuPartialDerivativeOperator<float,4> > dx (new hoCuPartialDerivativeOperator<float,4>(0) );
+    boost::shared_ptr<hoCuPartialDerivativeOperator<float,4> > dy (new hoCuPartialDerivativeOperator<float,4>(1) );
+    boost::shared_ptr<hoCuPartialDerivativeOperator<float,4> > dz (new hoCuPartialDerivativeOperator<float,4>(2) );
+    boost::shared_ptr<hoCuPartialDerivativeOperator<float,4> > dt (new hoCuPartialDerivativeOperator<float,4>(3) );
+
+    dx->set_codomain_dimensions(&is_dims);
+    dy->set_codomain_dimensions(&is_dims);
+    dz->set_codomain_dimensions(&is_dims);
+    dt->set_codomain_dimensions(&is_dims);
+
+    dx->set_domain_dimensions(&is_dims);
+    dy->set_domain_dimensions(&is_dims);
+    dz->set_domain_dimensions(&is_dims);
+    dt->set_domain_dimensions(&is_dims);
+
+    dx->set_weight(vm["TV"].as<float>());
+    dy->set_weight(vm["TV"].as<float>());
+    dz->set_weight(vm["TV"].as<float>());
+    dt->set_weight(vm["TV"].as<float>());
+
+    solver.add_regularization_group_operator(dx);
+    solver.add_regularization_group_operator(dy);
+    solver.add_regularization_group_operator(dz);
+    solver.add_regularization_group_operator(dt);
+    solver.add_group(1);
+    }*/
+
+  // Run solver
+  //
+
+  boost::shared_ptr< hoCuNDArray<float> > result;
+
+  {
+    GPUTimer timer("\nRunning conjugate gradient solver");
+    result = solver.solve(&projections);
+  }
+
+  write_nd_array<float>( result.get(), outputFile.c_str());
+}
diff --git a/apps/standalone/gpu/ct/xray/CBCT_reconstruct_FDK_3d.cpp b/apps/standalone/gpu/ct/xray/CBCT_reconstruct_FDK_3d.cpp
new file mode 100644
index 0000000..d5c9c3f
--- /dev/null
+++ b/apps/standalone/gpu/ct/xray/CBCT_reconstruct_FDK_3d.cpp
@@ -0,0 +1,143 @@
+#include "parameterparser.h"
+#include "CBCT_acquisition.h"
+#include "CBCT_binning.h"
+#include "hoCuConebeamProjectionOperator.h"
+#include "hoNDArray_fileio.h"
+#include "hoCuNDArray_math.h"
+#include "vector_td_utilities.h"
+#include "hoNDArray_utils.h"
+#include "GPUTimer.h"
+
+#include <iostream>
+#include <algorithm>
+#include <sstream>
+
+using namespace Gadgetron;
+using namespace std;
+
+int main(int argc, char** argv) 
+{ 
+  // Parse command line
+  //
+
+  ParameterParser parms(1024);
+  parms.add_parameter( 'd', COMMAND_LINE_STRING, 1, "Input acquisition filename (.hdf5)", true );
+  parms.add_parameter( 'b', COMMAND_LINE_STRING, 1, "Binning filename (.hdf5)", false );
+  parms.add_parameter( 'r', COMMAND_LINE_STRING, 1, "Output image filename (.real)", true, "reconstruction_FDK.real" );
+  parms.add_parameter( 'm', COMMAND_LINE_INT, 3, "Matrix size (3d)", true, "256, 256, 144" );
+  parms.add_parameter( 'f', COMMAND_LINE_FLOAT, 3, "FOV in mm (3d)", true, "448, 448, 252" );
+  parms.add_parameter( 'F', COMMAND_LINE_INT, 1, "Use filtered backprojection (fbp)", true, "1" );
+  parms.add_parameter( 'P', COMMAND_LINE_INT, 1, "Projections per batch", false );
+  parms.add_parameter( 'D', COMMAND_LINE_INT, 1, "Number of downsamples of projection plate", true, "0" );
+
+  parms.parse_parameter_list(argc, argv);
+  if( parms.all_required_parameters_set() ) {
+    parms.print_parameter_list();
+  }
+  else{
+    parms.print_parameter_list();
+    parms.print_usage();
+    return 1;
+  }
+  
+  std::string acquisition_filename = (char*)parms.get_parameter('d')->get_string_value();
+  std::string image_filename = (char*)parms.get_parameter('r')->get_string_value();
+
+  // Load acquisition data
+  //
+
+  boost::shared_ptr<CBCT_acquisition> acquisition( new CBCT_acquisition() );
+
+  {
+    GPUTimer timer("Loading projections");
+    acquisition->load(acquisition_filename);
+  }
+
+	// Downsample projections if requested
+	//
+
+	{
+		GPUTimer timer("Downsampling projections");
+		unsigned int num_downsamples = parms.get_parameter('D')->get_int_value();    
+		acquisition->downsample(num_downsamples);
+	}
+  
+  // Load or generate binning data
+  //
+  
+  boost::shared_ptr<CBCT_binning> binning( new CBCT_binning() );
+
+  if (parms.get_parameter('b')->get_is_set()){
+    std::string binningdata_filename = (char*)parms.get_parameter('b')->get_string_value();
+    std::cout << "Using binning data file: " << binningdata_filename << std::endl;
+    binning->load(binningdata_filename);
+    binning = boost::shared_ptr<CBCT_binning>(new CBCT_binning(binning->get_3d_binning()));
+  } 
+  else 
+    binning->set_as_default_3d_bin(acquisition->get_projections()->get_size(2));
+
+  // Configuring...
+  //
+
+  uintd2 ps_dims_in_pixels( acquisition->get_projections()->get_size(0),
+			    acquisition->get_projections()->get_size(1) );
+  
+  floatd2 ps_dims_in_mm( acquisition->get_geometry()->get_FOV()[0],
+			 acquisition->get_geometry()->get_FOV()[1] );
+
+  float SDD = acquisition->get_geometry()->get_SDD();
+  float SAD = acquisition->get_geometry()->get_SAD();
+
+  uintd3 is_dims_in_pixels( parms.get_parameter('m')->get_int_value(0),
+			    parms.get_parameter('m')->get_int_value(1),
+			    parms.get_parameter('m')->get_int_value(2) );
+  
+  floatd3 is_dims_in_mm( parms.get_parameter('f')->get_float_value(0), 
+			 parms.get_parameter('f')->get_float_value(1), 
+			 parms.get_parameter('f')->get_float_value(2) );
+  
+  bool use_fbp = parms.get_parameter('F')->get_int_value();
+
+  // Allocate array to hold the result
+  //
+  
+  std::vector<size_t> is_dims;
+  is_dims.push_back(is_dims_in_pixels[0]);
+  is_dims.push_back(is_dims_in_pixels[1]);
+  is_dims.push_back(is_dims_in_pixels[2]);
+  
+  hoCuNDArray<float> fdk_3d(&is_dims);
+  hoCuNDArray<float> projections(*acquisition->get_projections());  
+
+  // Define conebeam projection operator
+  // - and configure based on input parameters
+  //
+  
+  boost::shared_ptr< hoCuConebeamProjectionOperator > E( new hoCuConebeamProjectionOperator() );
+
+  E->setup( acquisition, binning, is_dims_in_mm );
+  E->set_use_filtered_backprojection(use_fbp);
+
+  CommandLineParameter *parm = parms.get_parameter('P');
+  if( parm && parm->get_is_set() )
+    E->set_num_projections_per_batch( parm->get_int_value() );
+  
+  // Initialize the device
+  // - just to report more accurate timings
+  //
+
+  cudaThreadSynchronize();
+
+  //
+  // Standard 3D FDK reconstruction
+  //
+
+  {
+    GPUTimer timer("Running 3D FDK reconstruction");
+    E->mult_MH( &projections, &fdk_3d );
+    cudaThreadSynchronize();
+  }
+
+  write_nd_array<float>( &fdk_3d, image_filename.c_str() );
+  return 0;
+}
diff --git a/apps/standalone/gpu/ct/xray/CBCT_reconstruct_FDK_4d.cpp b/apps/standalone/gpu/ct/xray/CBCT_reconstruct_FDK_4d.cpp
new file mode 100644
index 0000000..b2e50bd
--- /dev/null
+++ b/apps/standalone/gpu/ct/xray/CBCT_reconstruct_FDK_4d.cpp
@@ -0,0 +1,157 @@
+#include "parameterparser.h"
+#include "CBCT_acquisition.h"
+#include "CBCT_binning.h"
+#include "hoCuConebeamProjectionOperator.h"
+#include "hoNDArray_fileio.h"
+#include "hoCuNDArray_math.h"
+#include "vector_td_utilities.h"
+#include "GPUTimer.h"
+
+#include <iostream>
+#include <algorithm>
+#include <sstream>
+
+using namespace Gadgetron;
+using namespace std;
+
+int main(int argc, char** argv) 
+{ 
+	// Parse command line
+	//
+
+	ParameterParser parms(1024);
+	parms.add_parameter( 'd', COMMAND_LINE_STRING, 1, "Input acquisition filename (.hdf5)", true );
+	parms.add_parameter( 'b', COMMAND_LINE_STRING, 1, "Binning filename (.hdf5) - 4D FDK only", false );
+	parms.add_parameter( 'r', COMMAND_LINE_STRING, 1, "Output image filename (.real)", true, "reconstruction_FDK.real" );
+	parms.add_parameter( 'm', COMMAND_LINE_INT, 3, "Matrix size (3d)", true, "256, 256, 144" );
+	parms.add_parameter( 'f', COMMAND_LINE_FLOAT, 3, "FOV in mm (3d)", true, "448, 448, 252" );
+	parms.add_parameter( 'F', COMMAND_LINE_INT, 1, "Use filtered backprojection (fbp)", true, "1" );
+	parms.add_parameter( 'O', COMMAND_LINE_INT, 1, "Use oversampling in fbp", true, "0" );
+	parms.add_parameter( 'H', COMMAND_LINE_FLOAT, 1, "Half-scan mode maximum angle", true, "0" );
+	parms.add_parameter( 'P', COMMAND_LINE_INT, 1, "Projections per batch", true, "50" );
+  parms.add_parameter( 'D', COMMAND_LINE_INT, 1, "Number of downsamples of projection plate", true, "0" );
+
+	parms.parse_parameter_list(argc, argv);
+	if( parms.all_required_parameters_set() ) {
+		parms.print_parameter_list();
+	}
+	else{
+		parms.print_parameter_list();
+		parms.print_usage();
+		return 1;
+	}
+
+	std::string acquisition_filename = (char*)parms.get_parameter('d')->get_string_value();
+	std::string binning_filename = (char*)parms.get_parameter('b')->get_string_value();
+	std::string image_filename = (char*)parms.get_parameter('r')->get_string_value();
+
+	// Load acquisition data
+	//
+
+	boost::shared_ptr<CBCT_acquisition> acquisition( new CBCT_acquisition() );
+	acquisition->load(acquisition_filename);
+
+	// Downsample projections if requested
+	//
+
+	{
+		GPUTimer timer("Downsampling projections");
+		unsigned int num_downsamples = parms.get_parameter('D')->get_int_value();    
+		acquisition->downsample(num_downsamples);
+	}
+
+	// Configuring...
+	//
+
+	uintd2 ps_dims_in_pixels( acquisition->get_projections()->get_size(0),
+			acquisition->get_projections()->get_size(1) );
+
+	floatd2 ps_dims_in_mm( acquisition->get_geometry()->get_FOV()[0],
+			acquisition->get_geometry()->get_FOV()[1] );
+
+	float SDD = acquisition->get_geometry()->get_SDD();
+	float SAD = acquisition->get_geometry()->get_SAD();
+
+	uintd3 is_dims_in_pixels( parms.get_parameter('m')->get_int_value(0),
+			parms.get_parameter('m')->get_int_value(1),
+			parms.get_parameter('m')->get_int_value(2) );
+
+	floatd3 is_dims_in_mm( parms.get_parameter('f')->get_float_value(0),
+			parms.get_parameter('f')->get_float_value(1),
+			parms.get_parameter('f')->get_float_value(2) );
+
+	bool use_fbp = parms.get_parameter('F')->get_int_value();
+	bool use_fbp_os = parms.get_parameter('O')->get_int_value();
+	float half_scan_max_angle = parms.get_parameter('H')->get_float_value();
+	unsigned int projections_per_batch = parms.get_parameter('P')->get_int_value();
+	boost::shared_ptr<CBCT_binning> ps_bd4d(  new CBCT_binning());
+
+	std::cout << "binning data file: " << binning_filename << std::endl;
+	ps_bd4d->load(binning_filename);
+	ps_bd4d->print(std::cout);
+
+	// Load the binning data
+		//
+
+		boost::shared_ptr<CBCT_binning> binning( new CBCT_binning(ps_bd4d->get_3d_binning()) );
+
+	// Allocate array to hold the result
+	//
+
+	std::vector<size_t> is_dims;
+	is_dims.push_back(is_dims_in_pixels[0]);
+	is_dims.push_back(is_dims_in_pixels[1]);
+	is_dims.push_back(is_dims_in_pixels[2]);
+
+
+	hoCuNDArray<float> fdk_3d(&is_dims);
+
+	//
+	// Standard 3D FDK reconstruction
+	//
+
+	boost::shared_ptr< hoCuConebeamProjectionOperator > E( new hoCuConebeamProjectionOperator() );
+
+	E->setup( acquisition, binning, is_dims_in_mm );
+	E->set_use_filtered_backprojection(true);
+
+	hoCuNDArray<float> projections(*acquisition->get_projections());
+
+	{
+		GPUTimer timer("Running 3D FDK reconstruction");
+		E->mult_MH( &projections, &fdk_3d );
+	}
+
+	write_nd_array<float>( &fdk_3d, "fdk.real" );
+
+	/*4D FDK-MB algorithm starts here. McKinnon GC, RHT Bates,
+	 *
+	 *"Towards Imaging the Beating Heart Usefully with a Conventional CT Scanner,"
+	 *" Biomedical Engineering, IEEE Transactions on , vol.BME-28, no.2, pp.123,127, Feb. 1981
+	 * doi: 10.1109/TBME.1981.324785
+	 */
+
+
+
+	size_t numBins = ps_bd4d->get_number_of_bins();
+	is_dims.push_back(numBins);
+	boost::shared_ptr< hoCuConebeamProjectionOperator >
+	E4D( new hoCuConebeamProjectionOperator() );
+	E4D->setup(acquisition,ps_bd4d,is_dims_in_mm);
+	E4D->set_use_filtered_backprojection(true);
+	E4D->set_domain_dimensions(&is_dims);
+
+	hoCuNDArray<float> fdk(*expand(&fdk_3d,numBins));
+	hoCuNDArray<float> diff_proj(projections.get_dimensions());
+
+	E4D->mult_M(&fdk,&diff_proj);
+	projections -= diff_proj;
+
+	hoCuNDArray<float> result(&is_dims);
+	E4D->mult_MH(&projections,&result);
+
+	result += fdk;
+
+	write_nd_array<float>( &result, image_filename.c_str() );
+	return 0;
+}
diff --git a/apps/standalone/gpu/ct/xray/CBCT_reconstruct_NLCG.cpp b/apps/standalone/gpu/ct/xray/CBCT_reconstruct_NLCG.cpp
new file mode 100644
index 0000000..d49758d
--- /dev/null
+++ b/apps/standalone/gpu/ct/xray/CBCT_reconstruct_NLCG.cpp
@@ -0,0 +1,194 @@
+#include "hoCuNDArray_utils.h"
+#include "radial_utilities.h"
+#include "hoNDArray_fileio.h"
+#include "cuNDArray.h"
+#include "imageOperator.h"
+#include "identityOperator.h"
+#include "hoPartialDerivativeOperator.h"
+#include "hoCuConebeamProjectionOperator.h"
+#include "cuConvolutionOperator.h"
+#include "hoCuNDArray_math.h"
+#include "hoCuNDArray_blas.h"
+#include "cgSolver.h"
+#include "CBCT_acquisition.h"
+#include "complext.h"
+#include "encodingOperatorContainer.h"
+#include "vector_td_io.h"
+#include "hoPartialDerivativeOperator.h"
+#include "hoCuTvOperator.h"
+#include "hoCuTvPicsOperator.h"
+#include "hoCuNlcgSolver.h"
+#include "hoCuPartialDerivativeOperator.h"
+
+#include <iostream>
+#include <algorithm>
+#include <sstream>
+#include <math_constants.h>
+#include <boost/program_options.hpp>
+
+using namespace std;
+using namespace Gadgetron;
+
+namespace po = boost::program_options;
+
+int main(int argc, char** argv) 
+{  
+  string acquisition_filename;
+  string outputFile;  
+  uintd3 imageSize;
+  floatd3 voxelSize;
+  int device;
+  unsigned int downsamples;
+  unsigned int iterations;
+  float rho;
+  po::options_description desc("Allowed options");
+
+  desc.add_options()
+    ("help", "produce help message")
+    ("acquisition,a", po::value<string>(&acquisition_filename)->default_value("acquisition.hdf5"), "Acquisition data")
+    ("samples,n",po::value<unsigned int>(),"Number of samples per ray")
+    ("output,f", po::value<string>(&outputFile)->default_value("reconstruction.real"), "Output filename")
+    ("size,s",po::value<uintd3>(&imageSize)->default_value(uintd3(512,512,1)),"Image size in pixels")
+    ("binning,b",po::value<string>(),"Binning file for 4d reconstruction")
+    ("SAG","Use exact SAG correction if present")
+    ("voxelSize,v",po::value<floatd3>(&voxelSize)->default_value(floatd3(0.488f,0.488f,1.0f)),"Voxel size in mm")
+    ("dimensions,d",po::value<floatd3>(),"Image dimensions in mm. Overwrites voxelSize.")
+    ("iterations,i",po::value<unsigned int>(&iterations)->default_value(10),"Number of iterations")
+    ("TV,T",po::value<float>(),"TV Weight ")
+    ("PICS",po::value<float>(),"TV Weight of the prior image (Prior image compressed sensing)")
+    ("device",po::value<int>(&device)->default_value(0),"Number of the device to use (0 indexed)")
+    ("downsample,D",po::value<unsigned int>(&downsamples)->default_value(0),"Downsample projections this factor")
+    ("rho",po::value<float>(&rho)->default_value(0.9f),"Rho-value for line search. Must be between 0 and 1. Smaller value means faster runtime, but less stable algorithm.")
+    ;
+
+  po::variables_map vm;
+  po::store(po::parse_command_line(argc, argv, desc), vm);
+  po::notify(vm);
+
+  if (vm.count("help")) {
+    cout << desc << "\n";
+    return 1;
+  }
+
+  std::cout << "Command line options:" << std::endl;
+  for (po::variables_map::iterator it = vm.begin(); it != vm.end(); ++it){
+    boost::any a = it->second.value();
+    std::cout << it->first << ": ";
+    if (a.type() == typeid(std::string)) std::cout << it->second.as<std::string>();
+    else if (a.type() == typeid(int)) std::cout << it->second.as<int>();
+    else if (a.type() == typeid(unsigned int)) std::cout << it->second.as<unsigned int>();
+    else if (a.type() == typeid(float)) std::cout << it->second.as<float>();
+    else if (a.type() == typeid(vector_td<float,3>)) std::cout << it->second.as<vector_td<float,3> >();
+    else if (a.type() == typeid(vector_td<int,3>)) std::cout << it->second.as<vector_td<int,3> >();
+    else if (a.type() == typeid(vector_td<unsigned int,3>)) std::cout << it->second.as<vector_td<unsigned int,3> >();
+    else std::cout << "Unknown type" << std::endl;
+    std::cout << std::endl;
+  }
+
+  cudaSetDevice(device);
+  cudaDeviceReset();
+
+  //Really weird stuff. Needed to initialize the device?? Should find real bug.
+  cudaDeviceManager::Instance()->lockHandle();
+  cudaDeviceManager::Instance()->unlockHandle();
+       
+  boost::shared_ptr<CBCT_acquisition> ps(new CBCT_acquisition());
+  ps->load(acquisition_filename);
+  ps->get_geometry()->print(std::cout);
+	ps->downsample(downsamples);
+
+  float SDD = ps->get_geometry()->get_SDD();
+  float SAD = ps->get_geometry()->get_SAD();
+
+  boost::shared_ptr<CBCT_binning> binning(new CBCT_binning());
+  if (vm.count("binning")){
+    std::cout << "Loading binning data" << std::endl;
+    binning->load(vm["binning"].as<string>());	  
+  } else binning->set_as_default_3d_bin(ps->get_projections()->get_size(2));
+  binning->print(std::cout);
+
+  floatd3 imageDimensions;
+  if (vm.count("dimensions")){
+    imageDimensions = vm["dimensions"].as<floatd3>();
+    voxelSize = imageDimensions/imageSize;
+  }
+  else imageDimensions = voxelSize*imageSize;
+
+  float lengthOfRay_in_mm = norm(imageDimensions);
+  unsigned int numSamplesPerPixel = 3;
+  float minSpacing = min(voxelSize)/numSamplesPerPixel;
+
+  unsigned int numSamplesPerRay;
+  if (vm.count("samples")) numSamplesPerRay = vm["samples"].as<unsigned int>();
+  else numSamplesPerRay = ceil( lengthOfRay_in_mm / minSpacing );
+
+  float step_size_in_mm = lengthOfRay_in_mm / numSamplesPerRay;
+  size_t numProjs = ps->get_projections()->get_size(2);
+  size_t needed_bytes = 2 * prod(imageSize) * sizeof(float);
+  std::vector<size_t> is_dims = to_std_vector((uint64d3)imageSize);
+
+  std::cout << "IS dimensions " << is_dims[0] << " " << is_dims[1] << " " << is_dims[2] << std::endl;
+  std::cout << "Image size " << imageDimensions << std::endl;
+
+  is_dims.push_back(binning->get_number_of_bins());
+
+  // Define encoding matrix
+  boost::shared_ptr< hoCuConebeamProjectionOperator >
+    E( new hoCuConebeamProjectionOperator() );
+
+  E->setup(ps,binning,imageDimensions);
+  E->set_domain_dimensions(&is_dims);
+  E->set_codomain_dimensions(ps->get_projections()->get_dimensions().get());
+
+  hoCuNlcgSolver<float> solver;
+
+  solver.set_encoding_operator(E);
+  solver.set_domain_dimensions(&is_dims);
+  solver.set_max_iterations(iterations);
+  solver.set_output_mode(hoCuNlcgSolver<float>::OUTPUT_VERBOSE);
+  solver.set_non_negativity_constraint(true);
+  solver.set_rho(rho);
+
+  hoCuNDArray<float> projections = *ps->get_projections();
+  
+  if (E->get_use_offset_correction())
+    	E->offset_correct(&projections);
+
+
+  if (vm.count("TV")){
+    std::cout << "Total variation regularization in use" << std::endl;
+    boost::shared_ptr<hoCuTvOperator<float,4> > tv(new hoCuTvOperator<float,4>);
+    tv->set_weight(vm["TV"].as<float>());
+    solver.add_nonlinear_operator(tv);
+  }
+
+  if (vm.count("PICS")){
+    std::cout << "PICS in use" << std::endl;
+    boost::shared_ptr<CBCT_binning> binning_pics( new CBCT_binning() );
+    binning_pics->set_as_default_3d_bin(ps->get_projections()->get_size(2));
+    std::vector<size_t> is_dims3d = to_std_vector((uint64d3)imageSize);
+    boost::shared_ptr< hoCuConebeamProjectionOperator >
+      Ep( new hoCuConebeamProjectionOperator() );
+    Ep->setup(ps,binning_pics,imageDimensions);
+    Ep->set_codomain_dimensions(ps->get_projections()->get_dimensions().get());
+    Ep->set_domain_dimensions(&is_dims3d);
+
+    boost::shared_ptr<hoCuNDArray<float> > prior3d(new hoCuNDArray<float>(&is_dims3d));
+    Ep->mult_MH(&projections,prior3d.get());
+
+    hoCuNDArray<float> tmp_proj(*ps->get_projections());
+    Ep->mult_M(prior3d.get(),&tmp_proj);
+    float s = dot(ps->get_projections().get(),&tmp_proj)/dot(&tmp_proj,&tmp_proj);
+    *prior3d *= s;
+    boost::shared_ptr<hoCuNDArray<float> > prior(new hoCuNDArray<float>(*expand( prior3d.get(), is_dims.back() )));
+    boost::shared_ptr<hoCuTvPicsOperator<float,3> > pics (new hoCuTvPicsOperator<float,3>);
+    pics->set_prior(prior);
+    pics->set_weight(vm["PICS"].as<float>());
+    solver.add_nonlinear_operator(pics);
+    solver.set_x0(prior);
+  }
+
+  boost::shared_ptr< hoCuNDArray<float> > result = solver.solve(&projections);
+
+  write_nd_array<float>( result.get(), outputFile.c_str());
+}
diff --git a/apps/standalone/gpu/ct/xray/CBCT_reconstruct_SB.cpp b/apps/standalone/gpu/ct/xray/CBCT_reconstruct_SB.cpp
new file mode 100644
index 0000000..27b29cc
--- /dev/null
+++ b/apps/standalone/gpu/ct/xray/CBCT_reconstruct_SB.cpp
@@ -0,0 +1,281 @@
+#include "hoCuNDArray_utils.h"
+#include "radial_utilities.h"
+#include "hoNDArray_fileio.h"
+#include "cuNDArray.h"
+#include "imageOperator.h"
+#include "identityOperator.h"
+#include "hoPartialDerivativeOperator.h"
+#include "hoCuConebeamProjectionOperator.h"
+#include "cuConvolutionOperator.h"
+#include "hoCuNDArray_math.h"
+#include "cgSolver.h"
+#include "CBCT_acquisition.h"
+#include "complext.h"
+#include "encodingOperatorContainer.h"
+#include "vector_td_io.h"
+#include "hoCuPartialDerivativeOperator.h"
+#include "hoCuTvOperator.h"
+#include "hoCuTvPicsOperator.h"
+#include "hoCuSbcCgSolver.h"
+#include "GPUTimer.h"
+
+#include <iostream>
+#include <algorithm>
+#include <sstream>
+#include <math_constants.h>
+#include <boost/program_options.hpp>
+
+using namespace std;
+using namespace Gadgetron;
+
+namespace po = boost::program_options;
+
+class mySbcCgSolver : public hoCuSbcCgSolver<float> 
+{
+public:
+    mySbcCgSolver() : hoCuSbcCgSolver<float>() {
+        this->dumpFreq_ = 5;
+        this->counter_ = 1;
+    }
+    ~mySbcCgSolver() {}
+  
+  virtual bool post_linear_solver_callback( hoCuNDArray<float> *u ) {
+    printf("Iteration: %d\n", counter_);
+    if( (counter_ % dumpFreq_) == 0 ){
+      printf("Dumping frame\n");
+      char filename[19];
+      sprintf(filename, "img-itr-%04i.real", counter_);
+      write_nd_array<float>(u, filename);
+    }
+    counter_++;
+    return true;
+  }
+  void set_dump_frequency(unsigned int dumpFreq) {
+    if( dumpFreq == 0 )
+      this->dumpFreq_ = 9999999; // Not sure how modulus 0 behaves, so just make it a large number that is never reached...
+    else
+      this->dumpFreq_ = dumpFreq;
+  }
+protected:
+  unsigned int counter_;
+  unsigned int dumpFreq_;
+};
+
+
+int main(int argc, char** argv)
+{
+	string acquisition_filename;
+	string outputFile;
+	uintd3 imageSize;
+	floatd3 voxelSize;
+	int device;
+  unsigned int dump;
+	unsigned int downsamples;
+	unsigned int iterations;
+	unsigned int inner_iterations;
+	float non_negativity_weight;
+
+	po::options_description desc("Allowed options");
+	desc.add_options()
+    ("help", "produce help message")
+    ("acquisition,a", po::value<string>(&acquisition_filename)->default_value("acquisition.hdf5"), "Acquisition data")
+    ("samples,n",po::value<unsigned int>(),"Number of samples per ray")
+    ("output,f", po::value<string>(&outputFile)->default_value("reconstruction.real"), "Output filename")
+    ("size,s",po::value<uintd3>(&imageSize)->default_value(uintd3(512,512,1)),"Image size in pixels")
+    ("binning,b",po::value<string>(),"Binning file for 4d reconstruction")
+    ("SAG","Use exact SAG correction if present")
+    ("voxelSize,v",po::value<floatd3>(&voxelSize)->default_value(floatd3(0.488f,0.488f,1.0f)),"Voxel size in mm")
+    ("dimensions,d",po::value<floatd3>(),"Image dimensions in mm. Overwrites voxelSize.")
+    ("iterations,i",po::value<unsigned int>(&iterations)->default_value(10),"Number of iterations")
+    ("inner-iterations",po::value<unsigned int>(&inner_iterations)->default_value(5),"Number of iterations in the inner solver")
+    ("TV,T",po::value<float>(),"TV Weight ")
+    ("non-negativity,N",po::value<float>(&non_negativity_weight)->default_value(1.0f),"Weight for the non-negativity (soft) constraint ")
+    ("prior", po::value<std::string>(),"Prior image filename")
+    ("PICCS",po::value<float>(),"TV Weight of the prior image (Prior image constrained compressed sensing)")
+    ("device",po::value<int>(&device)->default_value(0),"Number of the device to use (0 indexed)")
+    ("dump",po::value<unsigned int>(&dump)->default_value(0),"Dump image every N iterations")    
+    ("downsample,D",po::value<unsigned int>(&downsamples)->default_value(0),"Downsample projections this factor")
+    ;
+  
+	po::variables_map vm;
+	po::store(po::parse_command_line(argc, argv, desc), vm);
+	po::notify(vm);
+
+	if (vm.count("help")) {
+		cout << desc << "\n";
+		return 1;
+	}
+	std::cout << "Command line options:" << std::endl;
+	for (po::variables_map::iterator it = vm.begin(); it != vm.end(); ++it){
+		boost::any a = it->second.value();
+		std::cout << it->first << ": ";
+		if (a.type() == typeid(std::string)) std::cout << it->second.as<std::string>();
+		else if (a.type() == typeid(int)) std::cout << it->second.as<int>();
+		else if (a.type() == typeid(unsigned int)) std::cout << it->second.as<unsigned int>();
+		else if (a.type() == typeid(float)) std::cout << it->second.as<float>();
+		else if (a.type() == typeid(vector_td<float,3>)) std::cout << it->second.as<vector_td<float,3> >();
+		else if (a.type() == typeid(vector_td<int,3>)) std::cout << it->second.as<vector_td<int,3> >();
+		else if (a.type() == typeid(vector_td<unsigned int,3>)) std::cout << it->second.as<vector_td<unsigned int,3> >();
+		else std::cout << "Unknown type" << std::endl;
+		std::cout << std::endl;
+	}
+	cudaSetDevice(device);
+	cudaDeviceReset();
+
+	//Really weird stuff. Needed to initialize the device?? Should find real bug.
+	cudaDeviceManager::Instance()->lockHandle();
+	cudaDeviceManager::Instance()->unlockHandle();
+
+	boost::shared_ptr<CBCT_acquisition> ps(new CBCT_acquisition());
+	ps->load(acquisition_filename);
+	ps->get_geometry()->print(std::cout);
+	ps->downsample(downsamples);
+
+	float SDD = ps->get_geometry()->get_SDD();
+	float SAD = ps->get_geometry()->get_SAD();
+
+	boost::shared_ptr<CBCT_binning> binning(new CBCT_binning());
+	if (vm.count("binning")){
+		std::cout << "Loading binning data" << std::endl;
+		binning->load(vm["binning"].as<string>());
+	} else 
+    binning->set_as_default_3d_bin(ps->get_projections()->get_size(2));
+
+	binning->print(std::cout);
+
+	floatd3 imageDimensions;
+	if (vm.count("dimensions")){
+		imageDimensions = vm["dimensions"].as<floatd3>();
+		voxelSize = imageDimensions/imageSize;
+	}
+	else imageDimensions = voxelSize*imageSize;
+
+	float lengthOfRay_in_mm = norm(imageDimensions);
+	unsigned int numSamplesPerPixel = 3;
+	float minSpacing = min(voxelSize)/numSamplesPerPixel;
+
+	unsigned int numSamplesPerRay;
+	if (vm.count("samples")) numSamplesPerRay = vm["samples"].as<unsigned int>();
+	else numSamplesPerRay = ceil( lengthOfRay_in_mm / minSpacing );
+
+	float step_size_in_mm = lengthOfRay_in_mm / numSamplesPerRay;
+	size_t numProjs = ps->get_projections()->get_size(2);
+	size_t needed_bytes = 2 * prod(imageSize) * sizeof(float);
+
+	std::vector<size_t> is_dims = to_std_vector((uint64d3)imageSize);
+	std::cout << "IS dimensions " << is_dims[0] << " " << is_dims[1] << " " << is_dims[2] << std::endl;
+	std::cout << "Image size " << imageDimensions << std::endl;
+
+	is_dims.push_back(binning->get_number_of_bins());
+
+	hoCuNDArray<float> projections(*ps->get_projections());
+
+	// Define encoding matrix
+	boost::shared_ptr< hoCuConebeamProjectionOperator >
+    E( new hoCuConebeamProjectionOperator() );
+
+	E->setup(ps,binning,imageDimensions);
+	E->set_domain_dimensions(&is_dims);
+	E->set_codomain_dimensions(ps->get_projections()->get_dimensions().get());
+
+	mySbcCgSolver solver;
+
+	solver.set_encoding_operator(E);
+	solver.set_max_outer_iterations(iterations);
+	solver.get_inner_solver()->set_max_iterations(inner_iterations);
+	solver.get_inner_solver()->set_tc_tolerance(1e-6);
+  solver.get_inner_solver()->set_output_mode(hoCuCgSolver<float>::OUTPUT_VERBOSE);
+	solver.set_non_negativity_filter(non_negativity_weight);
+	solver.set_output_mode(hoCuSbcCgSolver<float>::OUTPUT_VERBOSE);
+  solver.set_dump_frequency(dump);
+
+	if (vm.count("TV")){
+		boost::shared_ptr<hoCuPartialDerivativeOperator<float,4> > dx (new hoCuPartialDerivativeOperator<float,4>(0) );
+		boost::shared_ptr<hoCuPartialDerivativeOperator<float,4> > dy (new hoCuPartialDerivativeOperator<float,4>(1) );
+		boost::shared_ptr<hoCuPartialDerivativeOperator<float,4> > dz (new hoCuPartialDerivativeOperator<float,4>(2) );
+		boost::shared_ptr<hoCuPartialDerivativeOperator<float,4> > dt (new hoCuPartialDerivativeOperator<float,4>(3) );
+
+		dx->set_codomain_dimensions(&is_dims);
+		dy->set_codomain_dimensions(&is_dims);
+		dz->set_codomain_dimensions(&is_dims);
+		dt->set_codomain_dimensions(&is_dims);
+
+		dx->set_domain_dimensions(&is_dims);
+		dy->set_domain_dimensions(&is_dims);
+		dz->set_domain_dimensions(&is_dims);
+		dt->set_domain_dimensions(&is_dims);
+
+		dx->set_weight(vm["TV"].as<float>());
+		dy->set_weight(vm["TV"].as<float>());
+		dz->set_weight(vm["TV"].as<float>());
+		dt->set_weight(vm["TV"].as<float>());
+
+		solver.add_regularization_group_operator(dx);
+		solver.add_regularization_group_operator(dy);
+		solver.add_regularization_group_operator(dz);
+		solver.add_regularization_group_operator(dt);
+		solver.add_group(1);
+	}
+
+	if (vm.count("PICCS")){
+		std::cout << "PICCS in used" << std::endl;
+		CBCT_binning *binning_pics = new CBCT_binning();
+		binning_pics->set_as_default_3d_bin(ps->get_projections()->get_size(2));
+		std::vector<size_t> is_dims3d = to_std_vector((uint64d3)imageSize);
+		boost::shared_ptr< hoCuConebeamProjectionOperator >
+		Ep( new hoCuConebeamProjectionOperator() );
+		Ep->setup(ps,binning,imageDimensions);
+		Ep->set_use_filtered_backprojection(true);
+		Ep->set_codomain_dimensions(ps->get_projections()->get_dimensions().get());
+		Ep->set_domain_dimensions(&is_dims3d);
+
+		boost::shared_ptr<hoCuNDArray<float> > prior3d(new hoCuNDArray<float>(&is_dims3d));
+		Ep->mult_MH(&projections,prior3d.get());
+
+		hoCuNDArray<float> tmp_proj(projections);
+		Ep->mult_M(prior3d.get(),&tmp_proj);
+
+		float s = dot(&projections,&tmp_proj)/dot(&tmp_proj,&tmp_proj);
+		*prior3d *= s;
+		boost::shared_ptr<hoCuNDArray<float> > prior(new hoCuNDArray<float>(*expand( prior3d.get(), is_dims.back() )));
+		boost::shared_ptr<hoCuPartialDerivativeOperator<float,4> > dx (new hoCuPartialDerivativeOperator<float,4>(0) );
+		boost::shared_ptr<hoCuPartialDerivativeOperator<float,4> > dy (new hoCuPartialDerivativeOperator<float,4>(1) );
+		boost::shared_ptr<hoCuPartialDerivativeOperator<float,4> > dz (new hoCuPartialDerivativeOperator<float,4>(2) );
+		boost::shared_ptr<hoCuPartialDerivativeOperator<float,4> > dt (new hoCuPartialDerivativeOperator<float,4>(3) );
+
+		dx->set_weight(vm["PICCS"].as<float>());
+		dy->set_weight(vm["PICCS"].as<float>());
+		dz->set_weight(vm["PICCS"].as<float>());
+		dt->set_weight(vm["PICCS"].as<float>());
+
+		dx->set_codomain_dimensions(&is_dims);
+		dy->set_codomain_dimensions(&is_dims);
+		dz->set_codomain_dimensions(&is_dims);
+		dt->set_codomain_dimensions(&is_dims);
+    
+		dx->set_domain_dimensions(&is_dims);
+		dy->set_domain_dimensions(&is_dims);
+		dz->set_domain_dimensions(&is_dims);
+		dt->set_domain_dimensions(&is_dims);
+
+		solver.add_regularization_group_operator(dx);
+		solver.add_regularization_group_operator(dy);
+		solver.add_regularization_group_operator(dz);
+		solver.add_regularization_group_operator(dt);
+		solver.add_group(prior,1);
+
+		delete binning_pics;
+	}
+
+	// Run solver
+	//
+
+	boost::shared_ptr< hoCuNDArray<float> > result;
+
+	{
+		GPUTimer timer("\nRunning Split Bregman solver");
+		result = solver.solve(&projections);
+	}
+
+	write_nd_array<float>( result.get(), outputFile.c_str());
+}
diff --git a/apps/standalone/gpu/ct/xray/CMakeLists.txt b/apps/standalone/gpu/ct/xray/CMakeLists.txt
new file mode 100644
index 0000000..897d32d
--- /dev/null
+++ b/apps/standalone/gpu/ct/xray/CMakeLists.txt
@@ -0,0 +1,64 @@
+find_package(HDF5 REQUIRED HL)
+
+  if(${CUDA_VERSION} VERSION_GREATER "4.99")
+    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS2} ${CUDA_NVCC_FLAGS3} ${CUDA_NVCC_FLAGS4})
+  else(${CUDA_VERSION} VERSION_GREATER "4.99")    
+
+    if(${CUDA_VERSION} VERSION_GREATER "4.1")
+      set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS2} ${CUDA_NVCC_FLAGS3})
+    else(${CUDA_VERSION} VERSION_GREATER "4.1")
+
+      set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS2})
+
+    endif(${CUDA_VERSION} VERSION_GREATER "4.1")
+  endif(${CUDA_VERSION} VERSION_GREATER "4.99")
+
+include_directories(
+    ${CMAKE_SOURCE_DIR}/toolboxes/core
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/image
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/math
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/gpu
+    ${CMAKE_SOURCE_DIR}/toolboxes/solvers
+    ${CMAKE_SOURCE_DIR}/toolboxes/solvers/cpu
+    ${CMAKE_SOURCE_DIR}/toolboxes/operators
+    ${CMAKE_SOURCE_DIR}/toolboxes/operators/cpu
+    ${CMAKE_SOURCE_DIR}/toolboxes/ct/xray/gpu
+    ${CMAKE_SOURCE_DIR}/toolboxes/registration/optical_flow/cpu
+    ${CUDA_INCLUDE_DIRS}
+    ${Boost_INCLUDE_DIR}
+    ${ARMADILLO_INCLUDE_DIRS}
+    ${HDF5_INCLUDE_DIR}
+    ${HDF5_INCLUDE_DIR}/cpp
+    ${ISMRMRD_INCLUDE_DIR}
+)
+
+link_directories(${Boost_LIBRARY_DIR})
+link_libraries(${HDF5_LIBRARIES})
+
+add_executable(CBCT_reconstruct_FDK_3d CBCT_reconstruct_FDK_3d.cpp)
+target_link_libraries(CBCT_reconstruct_FDK_3d gadgetron_toolbox_gpuxray gadgetron_toolbox_cpucore gadgetron_toolbox_cpucore_math gadgetron_toolbox_gpucore gadgetron_toolbox_hostutils ${CUDA_LIBRARIES} ${Boost_LIBRARIES})
+
+add_executable(CBCT_reconstruct_FDK_4d CBCT_reconstruct_FDK_4d.cpp)
+target_link_libraries(CBCT_reconstruct_FDK_4d gadgetron_toolbox_gpuxray gadgetron_toolbox_cpucore gadgetron_toolbox_cpucore_math gadgetron_toolbox_gpucore gadgetron_toolbox_hostutils ${CUDA_LIBRARIES} ${Boost_LIBRARIES})
+
+add_executable(CBCT_reconstruct_CG CBCT_reconstruct_CG.cpp)
+target_link_libraries(CBCT_reconstruct_CG gadgetron_toolbox_gpuxray gadgetron_toolbox_cpucore gadgetron_toolbox_cpucore_math gadgetron_toolbox_gpucore gadgetron_toolbox_gpuoperators gadgetron_toolbox_hostutils ${CUDA_LIBRARIES} ${Boost_LIBRARIES})
+
+add_executable(CBCT_reconstruct_NLCG CBCT_reconstruct_NLCG.cpp)
+target_link_libraries(CBCT_reconstruct_NLCG gadgetron_toolbox_gpuxray gadgetron_toolbox_cpucore gadgetron_toolbox_cpucore_math gadgetron_toolbox_gpucore gadgetron_toolbox_gpuoperators gadgetron_toolbox_hostutils ${CUDA_LIBRARIES} ${Boost_LIBRARIES})
+
+add_executable(CBCT_reconstruct_SB CBCT_reconstruct_SB.cpp)
+target_link_libraries(CBCT_reconstruct_SB gadgetron_toolbox_gpuxray gadgetron_toolbox_cpucore gadgetron_toolbox_cpucore_math gadgetron_toolbox_gpucore gadgetron_toolbox_gpuoperators gadgetron_toolbox_hostutils ${CUDA_LIBRARIES} ${Boost_LIBRARIES})
+
+add_executable(CBCT_forwards_projection CBCT_forwards_projection.cpp)
+target_link_libraries(CBCT_forwards_projection gadgetron_toolbox_gpuxray gadgetron_toolbox_cpucore gadgetron_toolbox_cpucore_math gadgetron_toolbox_gpucore gadgetron_toolbox_hostutils ${CUDA_LIBRARIES} ${Boost_LIBRARIES})
+
+install(TARGETS 
+  CBCT_reconstruct_FDK_3d
+  CBCT_reconstruct_FDK_4d
+  CBCT_reconstruct_CG
+  CBCT_reconstruct_NLCG
+  CBCT_reconstruct_SB
+  CBCT_forwards_projection
+  DESTINATION bin COMPONENT main)
diff --git a/apps/standalone/gpu/deblurring/2d/CMakeLists.txt b/apps/standalone/gpu/deblurring/2d/CMakeLists.txt
index 7ef916a..435425e 100644
--- a/apps/standalone/gpu/deblurring/2d/CMakeLists.txt
+++ b/apps/standalone/gpu/deblurring/2d/CMakeLists.txt
@@ -6,9 +6,9 @@ add_executable(blur_2d blur_2d.cpp)
 add_executable(deblur_2d_cg deblur_2d_cg.cpp)
 add_executable(deblur_2d_sb deblur_2d_sb.cpp)
 
-target_link_libraries(deblur_2d_cg gpucore hostutils gpuoperators gpusolvers ${CUDA_LIBRARIES})
-target_link_libraries(deblur_2d_sb gpucore hostutils gpuoperators gpusolvers ${CUDA_LIBRARIES})
-target_link_libraries(blur_2d gpucore hostutils gpuoperators gpusolvers ${CUDA_LIBRARIES})
+target_link_libraries(deblur_2d_cg gadgetron_toolbox_gpucore gadgetron_toolbox_hostutils gadgetron_toolbox_gpuoperators gadgetron_toolbox_gpusolvers ${CUDA_LIBRARIES})
+target_link_libraries(deblur_2d_sb gadgetron_toolbox_gpucore gadgetron_toolbox_hostutils gadgetron_toolbox_gpuoperators gadgetron_toolbox_gpusolvers ${CUDA_LIBRARIES})
+target_link_libraries(blur_2d gadgetron_toolbox_gpucore gadgetron_toolbox_hostutils gadgetron_toolbox_gpuoperators gadgetron_toolbox_gpusolvers ${CUDA_LIBRARIES})
 
-install(TARGETS blur_2d deblur_2d_cg deblur_2d_sb DESTINATION bin)
+install(TARGETS blur_2d deblur_2d_cg deblur_2d_sb DESTINATION bin COMPONENT main)
 
diff --git a/apps/standalone/gpu/deblurring/3d/CMakeLists.txt b/apps/standalone/gpu/deblurring/3d/CMakeLists.txt
index 1d674bc..01b3825 100644
--- a/apps/standalone/gpu/deblurring/3d/CMakeLists.txt
+++ b/apps/standalone/gpu/deblurring/3d/CMakeLists.txt
@@ -6,8 +6,8 @@ add_executable(blur_3d blur_3d.cpp)
 add_executable(deblur_3d_cg deblur_3d_cg.cpp)
 add_executable(deblur_3d_sb deblur_3d_sb.cpp)
 
-target_link_libraries(deblur_3d_cg gpucore hostutils gpuoperators gpusolvers gpunfft ${CUDA_LIBRARIES})
-target_link_libraries(deblur_3d_sb gpucore hostutils gpuoperators gpusolvers gpunfft ${CUDA_LIBRARIES})
-target_link_libraries(blur_3d gpucore hostutils gpuoperators gpusolvers gpunfft ${CUDA_LIBRARIES})
+target_link_libraries(deblur_3d_cg gadgetron_toolbox_gpucore gadgetron_toolbox_hostutils gadgetron_toolbox_gpuoperators gadgetron_toolbox_gpusolvers gadgetron_toolbox_gpunfft ${CUDA_LIBRARIES})
+target_link_libraries(deblur_3d_sb gadgetron_toolbox_gpucore gadgetron_toolbox_hostutils gadgetron_toolbox_gpuoperators gadgetron_toolbox_gpusolvers gadgetron_toolbox_gpunfft ${CUDA_LIBRARIES})
+target_link_libraries(blur_3d gadgetron_toolbox_gpucore gadgetron_toolbox_hostutils gadgetron_toolbox_gpuoperators gadgetron_toolbox_gpusolvers gadgetron_toolbox_gpunfft ${CUDA_LIBRARIES})
 
-install(TARGETS blur_3d deblur_3d_cg deblur_3d_sb DESTINATION bin)
+install(TARGETS blur_3d deblur_3d_cg deblur_3d_sb DESTINATION bin COMPONENT main)
diff --git a/apps/standalone/gpu/denoising/2d/CMakeLists.txt b/apps/standalone/gpu/denoising/2d/CMakeLists.txt
index f4f6e68..6a9704d 100644
--- a/apps/standalone/gpu/denoising/2d/CMakeLists.txt
+++ b/apps/standalone/gpu/denoising/2d/CMakeLists.txt
@@ -4,6 +4,6 @@ endif (WIN32)
 
 add_executable(denoise_TV denoise_TV.cpp)
 
-target_link_libraries(denoise_TV gpucore hostutils gpusolvers gpuoperators ${CUDA_LIBRARIES})
+target_link_libraries(denoise_TV gadgetron_toolbox_gpucore gadgetron_toolbox_hostutils gadgetron_toolbox_gpusolvers gadgetron_toolbox_gpuoperators ${CUDA_LIBRARIES})
 
-install(TARGETS denoise_TV DESTINATION bin)
+install(TARGETS denoise_TV DESTINATION bin COMPONENT main)
diff --git a/apps/standalone/gpu/mri/nfft/2d/CMakeLists.txt b/apps/standalone/gpu/mri/nfft/2d/CMakeLists.txt
index 50c12ba..36fd0a5 100644
--- a/apps/standalone/gpu/mri/nfft/2d/CMakeLists.txt
+++ b/apps/standalone/gpu/mri/nfft/2d/CMakeLists.txt
@@ -2,16 +2,16 @@ include_directories(
   ${CMAKE_SOURCE_DIR}/toolboxes/mri/pmri/gpu
 )
 
-add_executable(nfft main_nfft.cpp)
-add_executable(nffth main_nffth.cpp)
+add_executable(nfft_2d_radial main_nfft.cpp)
+add_executable(nffth_2d_radial main_nffth.cpp)
 #add_executable(moco moco.cpp)
-add_executable(nffth_cg main_cg.cpp)
-add_executable(nffth_sb main_sb.cpp)
+add_executable(nffth_cg_2d_radial main_cg.cpp)
+add_executable(nffth_sb_2d_radial main_sb.cpp)
 
-target_link_libraries(nfft gpucore gpuoperators gpunfft hostutils ${CUDA_LIBRARIES})
-target_link_libraries(nffth gpucore gpuoperators gpunfft hostutils ${CUDA_LIBRARIES})
+target_link_libraries(nfft_2d_radial gadgetron_toolbox_gpucore gadgetron_toolbox_gpuoperators gadgetron_toolbox_gpunfft gadgetron_toolbox_hostutils ${CUDA_LIBRARIES})
+target_link_libraries(nffth_2d_radial gadgetron_toolbox_gpucore gadgetron_toolbox_gpuoperators gadgetron_toolbox_gpunfft gadgetron_toolbox_hostutils ${CUDA_LIBRARIES})
 #target_link_libraries(moco gpusolvers gpureg gpucore gpuparallelmri gpuoperators gpunfft hostutils ${CUDA_LIBRARIES})
-target_link_libraries(nffth_cg gpusolvers gpuoperators gpucore gpunfft hostutils ${CUDA_LIBRARIES})
-target_link_libraries(nffth_sb gpusolvers gpuoperators gpucore gpunfft hostutils ${CUDA_LIBRARIES})
+target_link_libraries(nffth_cg_2d_radial gadgetron_toolbox_gpusolvers gadgetron_toolbox_gpuoperators gadgetron_toolbox_gpucore gadgetron_toolbox_gpunfft gadgetron_toolbox_hostutils ${CUDA_LIBRARIES})
+target_link_libraries(nffth_sb_2d_radial gadgetron_toolbox_gpusolvers gadgetron_toolbox_gpuoperators gadgetron_toolbox_gpucore gadgetron_toolbox_gpunfft gadgetron_toolbox_hostutils ${CUDA_LIBRARIES})
 
-#install(TARGETS nfft moco nffth nffth_cg nffth_sb DESTINATION bin)
+install(TARGETS nfft_2d_radial nffth_2d_radial nffth_cg_2d_radial nffth_sb_2d_radial DESTINATION bin COMPONENT main)
diff --git a/apps/standalone/gpu/mri/nfft/2d/main_cg.cpp b/apps/standalone/gpu/mri/nfft/2d/main_cg.cpp
index 76d4685..a6040fc 100644
--- a/apps/standalone/gpu/mri/nfft/2d/main_cg.cpp
+++ b/apps/standalone/gpu/mri/nfft/2d/main_cg.cpp
@@ -87,8 +87,11 @@ int main( int argc, char** argv)
   timer = new GPUTimer("Computing density compensation weights");
   boost::shared_ptr< cuNDArray<_real> > dcw = compute_radial_dcw_golden_ratio_2d
     ( samples_per_profile, num_profiles, alpha, _real(1)/((_real)samples_per_profile/(_real)matrix_size.vec[0]) );
+  sqrt_inplace(dcw.get());
   delete timer;
 
+  samples *= *dcw;
+
   // Define and setup NFFT encoding operator
   boost::shared_ptr< cuNFFTOperator<_real,2> > E( new cuNFFTOperator<_real,2>() );
   
diff --git a/apps/standalone/gpu/mri/nfft/2d/main_nfft.cpp b/apps/standalone/gpu/mri/nfft/2d/main_nfft.cpp
index c80f060..8f29a26 100644
--- a/apps/standalone/gpu/mri/nfft/2d/main_nfft.cpp
+++ b/apps/standalone/gpu/mri/nfft/2d/main_nfft.cpp
@@ -124,15 +124,9 @@ int main( int argc, char** argv)
   plan.preprocess( traj.get(), plan_type::NFFT_PREP_C2NC );
   delete timer;
 
-  // Compute density compensation weights
-  timer = new GPUTimer("Computing density compensation weights");
-  boost::shared_ptr< cuNDArray<_real> > dcw = compute_radial_dcw_golden_ratio_2d
-    ( samples_per_profile, num_profiles, alpha,_real(1)/((_real)samples_per_profile/(_real)matrix_size.vec[0]) );
-  delete timer;
-
   // Gridder
   timer = new GPUTimer("Computing nfft");
-  plan.compute( image.get(), &samples, dcw.get(), plan_type::NFFT_FORWARDS_C2NC );
+  plan.compute( image.get(), &samples, 0, plan_type::NFFT_FORWARDS_C2NC );
   delete timer;
 
   //
diff --git a/apps/standalone/gpu/mri/nfft/2d/main_sb.cpp b/apps/standalone/gpu/mri/nfft/2d/main_sb.cpp
index 6771a1b..cab25fc 100644
--- a/apps/standalone/gpu/mri/nfft/2d/main_sb.cpp
+++ b/apps/standalone/gpu/mri/nfft/2d/main_sb.cpp
@@ -96,8 +96,13 @@ int main( int argc, char** argv)
   timer = new GPUTimer("Computing density compensation weights");
   boost::shared_ptr< cuNDArray<_real> > dcw = compute_radial_dcw_golden_ratio_2d
     ( samples_per_profile, num_profiles, alpha, _real(1)/((_real)samples_per_profile/(_real)matrix_size.vec[0]) );
+  sqrt_inplace(dcw.get());
+
+  samples *= *dcw;
+
   delete timer;
 
+
   // Define and setup NFFT encoding operator
   boost::shared_ptr< cuNFFTOperator<_real,2> > E( new cuNFFTOperator<_real,2>() );
   E->set_weight(lambda);
diff --git a/apps/standalone/gpu/mri/nfft/ms2d/CMakeLists.txt b/apps/standalone/gpu/mri/nfft/ms2d/CMakeLists.txt
index 5f2705c..a2600e8 100644
--- a/apps/standalone/gpu/mri/nfft/ms2d/CMakeLists.txt
+++ b/apps/standalone/gpu/mri/nfft/ms2d/CMakeLists.txt
@@ -1,7 +1,9 @@
-add_executable(nfft_ms nfft_main.cpp)
-add_executable(nffth_ms nffth_main.cpp)
+add_executable(nfft_2d_ms_radial nfft_main.cpp)
+add_executable(nffth_2d_ms_radial nffth_main.cpp)
+add_executable(nffth_2d_ms_generic nffth_generic.cpp)
 
-target_link_libraries(nfft_ms gpucore gpunfft hostutils ${CUDA_LIBRARIES})
-target_link_libraries(nffth_ms gpucore gpunfft hostutils ${CUDA_LIBRARIES})
+target_link_libraries(nfft_2d_ms_radial gadgetron_toolbox_gpucore gadgetron_toolbox_gpunfft gadgetron_toolbox_hostutils ${CUDA_LIBRARIES})
+target_link_libraries(nffth_2d_ms_radial gadgetron_toolbox_gpucore gadgetron_toolbox_gpunfft gadgetron_toolbox_hostutils ${CUDA_LIBRARIES})
+target_link_libraries(nffth_2d_ms_generic gadgetron_toolbox_gpucore gadgetron_toolbox_gpunfft gadgetron_toolbox_hostutils ${CUDA_LIBRARIES})
 
-install(TARGETS nfft_ms nffth_ms DESTINATION bin)
+install(TARGETS nfft_2d_ms_radial nffth_2d_ms_radial nffth_2d_ms_generic DESTINATION bin COMPONENT main)
diff --git a/apps/standalone/gpu/mri/nfft/ms2d/nfft_main.cpp b/apps/standalone/gpu/mri/nfft/ms2d/nfft_main.cpp
index 26a6d47..a5660cb 100644
--- a/apps/standalone/gpu/mri/nfft/ms2d/nfft_main.cpp
+++ b/apps/standalone/gpu/mri/nfft/ms2d/nfft_main.cpp
@@ -113,7 +113,7 @@ int main( int argc, char** argv)
   // Setup resulting samples array
   vector<size_t> samples_dims; 
   samples_dims.push_back( samples_per_profile ); samples_dims.push_back( profiles_per_frame ); samples_dims.push_back(frames_per_reconstruction);
-  cuNDArray<_complext> samples; samples.create(&samples_dims);
+  cuNDArray<_complext> samples(&samples_dims);
   
   // Initialize plan
   timer = new GPUTimer("Initializing plan");
diff --git a/apps/standalone/gpu/mri/nfft/ms2d/nffth_generic.cpp b/apps/standalone/gpu/mri/nfft/ms2d/nffth_generic.cpp
new file mode 100644
index 0000000..bb4080f
--- /dev/null
+++ b/apps/standalone/gpu/mri/nfft/ms2d/nffth_generic.cpp
@@ -0,0 +1,161 @@
+/*
+
+  Sample application of the NFFT toolbox: standalone "gridding" example.
+
+  -----------
+
+  The nfft is written generically and templetized to
+  - transform arbitrary trajectories
+  - transform an arbitrary number of dimensions (currently instantiated for 1d/2d/3d/4d)
+  - support both single and double precision
+
+  General principles of the implementation can be found in:
+
+  Accelerating the Non-equispaced Fast Fourier Transform on Commodity Graphics Hardware.
+  T.S. Sørensen, T. Schaeffter, K.Ø. Noe, M.S. Hansen. 
+  IEEE Transactions on Medical Imaging 2008; 27(4):538-547.
+
+  Real-time Reconstruction of Sensitivity Encoded Radial Magnetic Resonance Imaging Using a Graphics Processing Unit.
+  T.S. Sørensen, D. Atkinson, T. Schaeffter, M.S. Hansen.
+  IEEE Transactions on Medical Imaging 2009; 28(12):1974-1985. 
+
+  This example programme of the nnft utilizes golden ratio based radial trajectories 
+  and outputs gridded images from 2D multislice input ndarrays of the corresponding samples, trajectory, and density compensation weights.
+
+*/
+
+#include "cuNFFT.h"
+#include "radial_utilities.h"
+#include "vector_td_utilities.h"
+#include "hoNDArray_fileio.h"
+#include "cuNDArray_elemwise.h"
+#include "GPUTimer.h"
+#include "parameterparser.h"
+#include "complext.h"
+
+#include <iostream>
+
+using namespace std;
+using namespace Gadgetron;
+
+// Define desired precision
+typedef float _real; 
+typedef complext<_real> _complext;
+typedef reald<_real,2>::Type _reald2;
+typedef cuNFFT_plan<_real,2> plan_type;
+
+int main( int argc, char** argv) 
+{
+
+  //
+  // Parse command line
+  //
+
+  ParameterParser parms;
+  parms.add_parameter( 'd', COMMAND_LINE_STRING, 1, "Input samples file name (.cplx)", true );
+  parms.add_parameter( 't', COMMAND_LINE_STRING, 1, "Input trajectories file name (.real)", true );
+  parms.add_parameter( 'w', COMMAND_LINE_STRING, 1, "Input density compensation weights file name (.real)", true );
+  parms.add_parameter( 'r', COMMAND_LINE_STRING, 1, "Output image file name (.cplx)", true, "result.cplx" );
+  parms.add_parameter( 'm', COMMAND_LINE_INT,    1, "Matrix size", true );
+  parms.add_parameter( 'o', COMMAND_LINE_INT,    1, "Oversampled matrix size", true );
+  parms.add_parameter( 'f', COMMAND_LINE_INT,    1, "#frames/reconstruction (a negative value means all)", true, "-1" );
+  parms.add_parameter( 'k', COMMAND_LINE_FLOAT,  1, "Kernel width", true, "5.5" );
+
+  parms.parse_parameter_list(argc, argv);
+  if( parms.all_required_parameters_set() ){
+    cout << " Running reconstruction with the following parameters: " << endl;
+    parms.print_parameter_list();
+  }
+  else{
+    cout << " Some required parameters are missing: " << endl;
+    parms.print_parameter_list();
+    parms.print_usage();
+    return 1;
+  }
+  
+  GPUTimer *timer;
+  
+  // Load data from disk
+  timer = new GPUTimer("Loading data from disk");
+  boost::shared_ptr< hoNDArray<_complext> > host_samples = read_nd_array<_complext>((char*)parms.get_parameter('d')->get_string_value());
+  boost::shared_ptr< hoNDArray<_reald2> >   host_traj    = read_nd_array<_reald2>  ((char*)parms.get_parameter('t')->get_string_value());
+  boost::shared_ptr< hoNDArray<_real> >     host_dcw     = read_nd_array<_real>    ((char*)parms.get_parameter('w')->get_string_value());
+  delete timer;
+
+  /* {
+    std::vector<size_t> dims;
+    dims.push_back(host_traj->get_size(0));
+    dims.push_back(host_samples->get_number_of_elements()/dims[0]);
+    host_samples->reshape(&dims);
+    } */
+  
+  if( !(host_samples->get_number_of_dimensions() == 2 && host_traj->get_number_of_dimensions() == 2) ){
+    cout << endl << "Samples/trajectory arrays must be two-dimensional: (dim 0: samples/profile x #profiles/frame; dim 1: #frames). Quitting.\n" << endl;
+    return 1;
+  }
+
+  // Configuration from the command line
+  uint64d2 matrix_size = uint64d2(parms.get_parameter('m')->get_int_value(), parms.get_parameter('m')->get_int_value());
+  uint64d2 matrix_size_os = uint64d2(parms.get_parameter('o')->get_int_value(), parms.get_parameter('o')->get_int_value());
+  int frames_per_reconstruction = parms.get_parameter('f')->get_int_value();  
+  _real kernel_width = parms.get_parameter('k')->get_float_value();
+  _real alpha = (_real)matrix_size_os.vec[0]/(_real)matrix_size.vec[0];
+  
+  unsigned int num_frames = host_traj->get_size(1);  
+
+  if( frames_per_reconstruction < 0 ) frames_per_reconstruction = num_frames;
+  if( (unsigned int)frames_per_reconstruction > num_frames ) frames_per_reconstruction = num_frames;
+  
+  // Setup resulting image array
+  vector<size_t> image_dims = to_std_vector(matrix_size); 
+  image_dims.push_back((num_frames/frames_per_reconstruction)*frames_per_reconstruction);
+  cuNDArray<_complext> image(&image_dims);
+  clear(&image);
+  
+  // Initialize plan
+  timer = new GPUTimer("Initializing plan");
+  plan_type plan( matrix_size, matrix_size_os, kernel_width );
+  delete timer;
+
+  // Upload arrays to device
+  cuNDArray<_complext> _samples(host_samples.get());
+  cuNDArray<_reald2> _trajectory(host_traj.get());
+  cuNDArray<_real> dcw(host_dcw.get());
+
+  std::vector<size_t> dims_recon;
+  dims_recon.push_back(host_samples->get_size(0));
+  dims_recon.push_back(frames_per_reconstruction);
+
+  for( unsigned int iteration = 0; iteration < num_frames/frames_per_reconstruction; iteration++ ) {
+    
+    // Set samples/trajectory for sub-frames
+    cuNDArray<_complext> samples( dims_recon, _samples.get_data_ptr()+iteration*dims_recon[0]*dims_recon[1] );
+    cuNDArray<_reald2> trajectory( dims_recon, _trajectory.get_data_ptr()+iteration*dims_recon[0]*dims_recon[1] );
+
+    // Preprocess
+    timer = new GPUTimer("NFFT preprocessing");
+    plan.preprocess( &trajectory, plan_type::NFFT_PREP_NC2C );
+    delete timer;
+    
+    std::vector<size_t> image_dims = to_std_vector(matrix_size); 
+    image_dims.push_back(frames_per_reconstruction);
+    cuNDArray<_complext> tmp_image(&image_dims, image.get_data_ptr()+iteration*prod(matrix_size)*frames_per_reconstruction);
+
+    // Gridder
+    timer = new GPUTimer("Computing adjoint nfft (gridding)");
+    plan.compute( &samples, &tmp_image, &dcw, plan_type::NFFT_BACKWARDS_NC2C );
+    delete timer;
+  }
+  
+  //
+  // Output result
+  //
+  
+  timer = new GPUTimer("Output result to disk");
+  boost::shared_ptr< hoNDArray<_complext> > host_image = image.to_host();
+  write_nd_array<_complext>( host_image.get(), (char*)parms.get_parameter('r')->get_string_value() );
+  write_nd_array<_real>( abs(&image)->to_host().get(), "result.real" );
+  delete timer;
+
+  return 0;
+}
diff --git a/apps/standalone/gpu/mri/nfft/ms2d/nffth_main.cpp b/apps/standalone/gpu/mri/nfft/ms2d/nffth_main.cpp
index 13b8713..044d234 100644
--- a/apps/standalone/gpu/mri/nfft/ms2d/nffth_main.cpp
+++ b/apps/standalone/gpu/mri/nfft/ms2d/nffth_main.cpp
@@ -50,7 +50,7 @@ upload_data( unsigned int reconstruction, unsigned int samples_per_reconstructio
 	     hoNDArray<_complext> *host_data )
 {
   vector<size_t> dims; dims.push_back(samples_per_reconstruction);
-  cuNDArray<_complext> *data = new cuNDArray<_complext>(); data->create( &dims );
+  cuNDArray<_complext> *data = new cuNDArray<_complext>( &dims );
   cudaMemcpy( data->get_data_ptr(), 
 	      host_data->get_data_ptr()+reconstruction*samples_per_reconstruction, 
 	      samples_per_reconstruction*sizeof(_complext), cudaMemcpyHostToDevice );
@@ -151,7 +151,7 @@ int main( int argc, char** argv)
     
     vector<size_t> image_dims = to_std_vector(matrix_size); 
     image_dims.push_back(frames_per_reconstruction);
-    cuNDArray<_complext> tmp_image; tmp_image.create(&image_dims, image.get_data_ptr()+iteration*prod(matrix_size)*frames_per_reconstruction);
+    cuNDArray<_complext> tmp_image(&image_dims, image.get_data_ptr()+iteration*prod(matrix_size)*frames_per_reconstruction);
 
     // Gridder
     timer = new GPUTimer("Computing adjoint nfft (gridding)");
@@ -165,7 +165,7 @@ int main( int argc, char** argv)
   
   timer = new GPUTimer("Output result to disk");
   boost::shared_ptr< hoNDArray<_complext> > host_image = image.to_host();
-  write_nd_array<_complext>( host_image.get(), (char*)parms.get_parameter('d')->get_string_value() );
+  write_nd_array<_complext>( host_image.get(), (char*)parms.get_parameter('r')->get_string_value() );
   write_nd_array<_real>( abs(&image)->to_host().get(), "result.real" );
   delete timer;
 
diff --git a/apps/standalone/gpu/mri/sense/noncartesian/CMakeLists.txt b/apps/standalone/gpu/mri/sense/noncartesian/CMakeLists.txt
index 968542a..c319e2b 100644
--- a/apps/standalone/gpu/mri/sense/noncartesian/CMakeLists.txt
+++ b/apps/standalone/gpu/mri/sense/noncartesian/CMakeLists.txt
@@ -1,5 +1,11 @@
 include_directories( 
   ${CMAKE_SOURCE_DIR}/toolboxes/mri/pmri/gpu
-)
+  )
+
+add_executable(sense_cg_generic_2d generic_cg.cpp)
+
+target_link_libraries(sense_cg_generic_2d gadgetron_toolbox_gpuoperators gadgetron_toolbox_cpucore gadgetron_toolbox_gpucore gadgetron_toolbox_gpuparallelmri gadgetron_toolbox_gpunfft gadgetron_toolbox_hostutils gadgetron_toolbox_gpusolvers ${CUDA_LIBRARIES})
+
+install(TARGETS sense_cg_generic_2d DESTINATION bin COMPONENT main)
 
 add_subdirectory(radial)
diff --git a/apps/standalone/gpu/mri/sense/noncartesian/generic_cg.cpp b/apps/standalone/gpu/mri/sense/noncartesian/generic_cg.cpp
new file mode 100644
index 0000000..5ca90d0
--- /dev/null
+++ b/apps/standalone/gpu/mri/sense/noncartesian/generic_cg.cpp
@@ -0,0 +1,200 @@
+/*
+
+  Sample application of the NFFT toolbox: standalone "gridding" example.
+
+  -----------
+
+  The nfft is written generically and templetized to
+  - transform arbitrary trajectories
+  - transform an arbitrary number of dimensions (currently instantiated for 1d/2d/3d/4d)
+  - support both single and double precision
+
+  General principles of the implementation can be found in:
+
+  Accelerating the Non-equispaced Fast Fourier Transform on Commodity Graphics Hardware.
+  T.S. Sørensen, T. Schaeffter, K.Ø. Noe, M.S. Hansen. 
+  IEEE Transactions on Medical Imaging 2008; 27(4):538-547.
+
+  Real-time Reconstruction of Sensitivity Encoded Radial Magnetic Resonance Imaging Using a Graphics Processing Unit.
+  T.S. Sørensen, D. Atkinson, T. Schaeffter, M.S. Hansen.
+  IEEE Transactions on Medical Imaging 2009; 28(12):1974-1985. 
+
+  This example programme of the nnft utilizes golden ratio based radial trajectories 
+  and outputs gridded images from 2D multislice input ndarrays of the corresponding samples, trajectory, and density compensation weights.
+
+*/
+
+#include "cuNFFT.h"
+#include "radial_utilities.h"
+#include "vector_td_utilities.h"
+#include "hoNDArray_fileio.h"
+#include "cuNDArray_operators.h"
+#include "cuNDArray_elemwise.h"
+#include "cuNDArray_blas.h"
+#include "cuNDArray_utils.h"
+#include "cuNDArray_reductions.h"
+#include "cuNonCartesianSenseOperator.h"
+#include "cuCgPreconditioner.h"
+#include "cuImageOperator.h"
+#include "cuCgSolver.h"
+#include "GPUTimer.h"
+#include "parameterparser.h"
+#include "complext.h"
+
+#include <iostream>
+
+using namespace std;
+using namespace Gadgetron;
+
+// Define desired precision
+typedef float _real; 
+typedef complext<_real> _complext;
+typedef reald<_real,2>::Type _reald2;
+typedef cuNFFT_plan<_real,2> plan_type;
+
+int main( int argc, char** argv) 
+{
+
+  //
+  // Parse command line
+  //
+
+  ParameterParser parms;
+  parms.add_parameter( 'd', COMMAND_LINE_STRING, 1, "Input samples file name (.cplx)", true );
+  parms.add_parameter( 't', COMMAND_LINE_STRING, 1, "Input trajectories file name (.real)", true );
+  parms.add_parameter( 'w', COMMAND_LINE_STRING, 1, "Input density compensation weights file name (.real)", true );
+  parms.add_parameter( 'c', COMMAND_LINE_STRING, 1, "Input coil sensitivity maps file name (.cplx)", true );
+  parms.add_parameter( 'g', COMMAND_LINE_STRING, 1, "Input regularization image file name (.cplx)", true );
+  parms.add_parameter( 'r', COMMAND_LINE_STRING, 1, "Output image file name (.cplx)", true, "result.cplx" );
+  parms.add_parameter( 'i', COMMAND_LINE_INT,    1, "Number of iterations", true, "10" );
+  parms.add_parameter( 'l', COMMAND_LINE_FLOAT,  1, "Regularization weight", true, "0.3" );
+  parms.add_parameter( 'k', COMMAND_LINE_FLOAT,  1, "Kernel width", true, "5.5" );
+  parms.add_parameter( 'a', COMMAND_LINE_FLOAT,  1, "Oversampling factor", true, "2.0" );
+
+  parms.parse_parameter_list(argc, argv);
+  if( parms.all_required_parameters_set() ){
+    cout << " Running reconstruction with the following parameters: " << endl;
+    parms.print_parameter_list();
+  }
+  else{
+    cout << " Some required parameters are missing: " << endl;
+    parms.print_parameter_list();
+    parms.print_usage();
+    return 1;
+  }
+  
+  GPUTimer *timer;
+  
+  // Load data from disk
+  timer = new GPUTimer("Loading data from disk");
+  boost::shared_ptr< hoNDArray<_complext> > host_samples = read_nd_array<_complext> ((char*)parms.get_parameter('d')->get_string_value());
+  boost::shared_ptr< hoNDArray<_reald2> >   host_traj    = read_nd_array<_reald2>   ((char*)parms.get_parameter('t')->get_string_value());
+  boost::shared_ptr< hoNDArray<_real> >     host_dcw     = read_nd_array<_real>     ((char*)parms.get_parameter('w')->get_string_value());
+  boost::shared_ptr< hoNDArray<_complext> > host_csm     = read_nd_array<_complext> ((char*)parms.get_parameter('c')->get_string_value());
+  boost::shared_ptr< hoNDArray<_complext> > host_reg     = read_nd_array<_complext> ((char*)parms.get_parameter('g')->get_string_value());
+  delete timer;
+   
+  /* {
+    std::vector<size_t> dims;
+    dims.push_back(host_traj->get_size(0));
+    dims.push_back(host_samples->get_number_of_elements()/dims[0]);
+    host_samples->reshape(&dims);
+    } */
+
+  if( !(host_samples->get_number_of_dimensions() == 2 && host_traj->get_number_of_dimensions() == 2) ){
+    cout << endl << "Samples/trajectory arrays must be two-dimensional: (dim 0: samples/profile x #profiles/frame; dim 1: #frames). Quitting.\n" << endl;
+    return 1;
+  }
+
+  if( !(host_csm->get_number_of_dimensions() == 3 )){
+    cout << endl << "Coil sensitivity maps must be three-dimensional. Quitting.\n" << endl;
+    return 1;
+  }
+
+  if( !(host_reg->get_number_of_dimensions() == 2 )){
+    cout << endl << "Regularization image must be two-dimensional. Quitting.\n" << endl;
+    return 1;
+  }
+
+  // Configuration from the command line
+  uint64d2 matrix_size = uint64d2(host_csm->get_size(0), host_csm->get_size(0));
+  size_t _matrix_size_os = size_t((float)matrix_size[0]*parms.get_parameter('a')->get_float_value());
+  uint64d2 matrix_size_os = uint64d2(_matrix_size_os, _matrix_size_os);
+  int num_iterations = parms.get_parameter('i')->get_int_value();
+  _real kernel_width = parms.get_parameter('k')->get_float_value();
+  _real alpha = parms.get_parameter('a')->get_float_value();
+  _real kappa = parms.get_parameter('l')->get_float_value();
+  
+  unsigned int num_frames = host_traj->get_size(1);  
+  unsigned int num_coils = host_csm->get_size(2);
+
+  std::vector<size_t> recon_dims = to_std_vector(matrix_size);
+  recon_dims.push_back(num_frames);
+
+  // Upload arrays to device
+  cuNDArray<_complext> samples(host_samples.get());
+  cuNDArray<_reald2> trajectory(host_traj.get());
+  boost::shared_ptr< cuNDArray<_complext> > csm( new cuNDArray<_complext>(host_csm.get()));
+  boost::shared_ptr< cuNDArray<_complext> > reg_image( new cuNDArray<_complext>(host_reg.get()));
+  boost::shared_ptr< cuNDArray<_real> > dcw( new cuNDArray<_real>(host_dcw.get()));
+
+  // Define encoding matrix for non-Cartesian SENSE
+  boost::shared_ptr< cuNonCartesianSenseOperator<_real,2> > E( new cuNonCartesianSenseOperator<_real,2>() );  
+  E->setup( matrix_size, matrix_size_os, kernel_width );
+  E->set_dcw(dcw) ;
+  E->set_csm(csm);
+  E->set_domain_dimensions(&recon_dims);
+  E->set_codomain_dimensions(samples.get_dimensions().get());
+  E->preprocess(&trajectory);
+  
+  // Define regularization operator
+  boost::shared_ptr< cuImageOperator<_complext> > R( new cuImageOperator<_complext>() );
+  R->set_weight( kappa );
+  R->compute( reg_image.get() );
+
+  boost::shared_ptr< cuNDArray<_real> > _precon_weights = sum(abs_square(csm.get()).get(),2);
+  boost::shared_ptr< cuNDArray<_real> > R_diag = R->get();
+  *R_diag *= kappa;
+  *_precon_weights += *R_diag;
+  R_diag.reset();
+  reciprocal_sqrt_inplace(_precon_weights.get());
+  boost::shared_ptr< cuNDArray<_complext> > precon_weights = real_to_complex<_complext>( _precon_weights.get() );
+  _precon_weights.reset();
+
+  // Define preconditioning matrix
+  boost::shared_ptr< cuCgPreconditioner<_complext> > D( new cuCgPreconditioner<_complext>() );
+  D->set_weights( precon_weights );
+  precon_weights.reset();
+  csm.reset();
+
+  // Setup conjugate gradient solver
+  cuCgSolver<_complext> cg;
+  cg.set_preconditioner ( D );           // preconditioning matrix
+  cg.set_max_iterations( num_iterations );
+  cg.set_tc_tolerance( 1e-6 );
+  cg.set_output_mode( cuCgSolver< _complext>::OUTPUT_VERBOSE );
+  cg.set_encoding_operator( E );        // encoding matrix
+  cg.add_regularization_operator( R );  // regularization matrix
+
+  //
+  // Invoke conjugate gradient solver
+  //
+  
+  boost::shared_ptr< cuNDArray<_complext> > cgresult;
+  {
+    GPUTimer timer("GPU Conjugate Gradient solve");
+    cgresult = cg.solve(&samples);
+  }
+  
+  //
+  // Output result
+  //
+  
+  timer = new GPUTimer("Output result to disk");
+  boost::shared_ptr< hoNDArray<_complext> > host_image = cgresult->to_host();
+  write_nd_array<_complext>( host_image.get(), (char*)parms.get_parameter('r')->get_string_value() );
+  write_nd_array<_real>( abs(cgresult.get())->to_host().get(), "result.real" );
+  delete timer;
+
+  return 0;
+}
diff --git a/apps/standalone/gpu/mri/sense/noncartesian/radial/2d_golden_ratio/CMakeLists.txt b/apps/standalone/gpu/mri/sense/noncartesian/radial/2d_golden_ratio/CMakeLists.txt
index 9c78221..0b5b049 100644
--- a/apps/standalone/gpu/mri/sense/noncartesian/radial/2d_golden_ratio/CMakeLists.txt
+++ b/apps/standalone/gpu/mri/sense/noncartesian/radial/2d_golden_ratio/CMakeLists.txt
@@ -1,11 +1,11 @@
-add_executable(radial_sense_cg main_cg.cpp)
-add_executable(radial_sense_sbc main_sbc.cpp)
-add_executable(radial_sense_gpbb main_gpbb.cpp)
-MESSAGE("CUDA LIBRARIES:  ${CUDA_LIBRARIES}")
-target_link_libraries(radial_sense_cg gpuoperators cpucore gpucore gpuparallelmri gpunfft hostutils gpusolvers ${CUDA_LIBRARIES})
-target_link_libraries(radial_sense_sbc gpuoperators cpucore gpucore gpuparallelmri gpunfft hostutils gpusolvers ${CUDA_LIBRARIES})
-target_link_libraries(radial_sense_gpbb gpuoperators cpucore gpucore gpuparallelmri gpunfft hostutils gpusolvers ${CUDA_LIBRARIES})
+add_executable(sense_cg_radial_2d main_cg.cpp)
+add_executable(sense_sbc_radial_2d main_sbc.cpp)
+add_executable(sense_gpbb_radial_2d main_gpbb.cpp)
+add_executable(sense_nlcg_radial_2d main_nlcg.cpp)
 
-install(TARGETS radial_sense_cg radial_sense_sbc DESTINATION bin)
-install(TARGETS radial_sense_sbc radial_sense_sbc DESTINATION bin)
-install(TARGETS radial_sense_gpbb radial_sense_sbc DESTINATION bin)
+target_link_libraries(sense_cg_radial_2d gadgetron_toolbox_gpuoperators gadgetron_toolbox_cpucore gadgetron_toolbox_gpucore gadgetron_toolbox_gpuparallelmri gadgetron_toolbox_gpunfft gadgetron_toolbox_hostutils gadgetron_toolbox_gpusolvers ${CUDA_LIBRARIES})
+target_link_libraries(sense_sbc_radial_2d gadgetron_toolbox_gpuoperators gadgetron_toolbox_cpucore gadgetron_toolbox_gpucore gadgetron_toolbox_gpuparallelmri gadgetron_toolbox_gpunfft gadgetron_toolbox_hostutils gadgetron_toolbox_gpusolvers ${CUDA_LIBRARIES})
+target_link_libraries(sense_gpbb_radial_2d gadgetron_toolbox_gpuoperators gadgetron_toolbox_cpucore gadgetron_toolbox_gpucore gadgetron_toolbox_gpuparallelmri gadgetron_toolbox_gpunfft gadgetron_toolbox_hostutils gadgetron_toolbox_gpusolvers ${CUDA_LIBRARIES})
+target_link_libraries(sense_nlcg_radial_2d gadgetron_toolbox_gpuoperators gadgetron_toolbox_cpucore gadgetron_toolbox_gpucore gadgetron_toolbox_gpuparallelmri gadgetron_toolbox_gpunfft gadgetron_toolbox_hostutils gadgetron_toolbox_gpusolvers ${CUDA_LIBRARIES})
+
+install(TARGETS sense_cg_radial_2d sense_sbc_radial_2d sense_gpbb_radial_2d sense_nlcg_radial_2d DESTINATION bin COMPONENT main)
diff --git a/apps/standalone/gpu/mri/sense/noncartesian/radial/2d_golden_ratio/main_cg.cpp b/apps/standalone/gpu/mri/sense/noncartesian/radial/2d_golden_ratio/main_cg.cpp
index e62b605..462375a 100644
--- a/apps/standalone/gpu/mri/sense/noncartesian/radial/2d_golden_ratio/main_cg.cpp
+++ b/apps/standalone/gpu/mri/sense/noncartesian/radial/2d_golden_ratio/main_cg.cpp
@@ -124,8 +124,7 @@ int main(int argc, char** argv)
 
   E->setup( matrix_size, matrix_size_os, kernel_width );
 
-  // Notify encoding operator of dcw
-  E->set_dcw(dcw) ;
+
 
   // Define rhs buffer
   //
@@ -213,7 +212,9 @@ int main(int argc, char** argv)
   // 
   // Setup radial SENSE reconstructions
   //
-      
+  // Notify encoding operator of dcw
+  sqrt_inplace(dcw.get());
+	E->set_dcw(dcw);
   // Setup conjugate gradient solver
   cuCgSolver<_complext> cg;
   cg.set_preconditioner ( D );  // preconditioning matrix
@@ -253,6 +254,7 @@ int main(int argc, char** argv)
     // Set current trajectory and trigger NFFT preprocessing
     E->preprocess(traj.get());
     
+    *data *= *dcw;
     //
     // Invoke conjugate gradient solver
     //
diff --git a/apps/standalone/gpu/mri/sense/noncartesian/radial/2d_golden_ratio/main_gpbb.cpp b/apps/standalone/gpu/mri/sense/noncartesian/radial/2d_golden_ratio/main_gpbb.cpp
index 745017a..0f2ecc5 100644
--- a/apps/standalone/gpu/mri/sense/noncartesian/radial/2d_golden_ratio/main_gpbb.cpp
+++ b/apps/standalone/gpu/mri/sense/noncartesian/radial/2d_golden_ratio/main_gpbb.cpp
@@ -127,7 +127,7 @@ int main(int argc, char** argv)
   // Define encoding matrix for non-Cartesian SENSE
   boost::shared_ptr< cuNonCartesianSenseOperator<_real,2> > E( new cuNonCartesianSenseOperator<_real,2>() );
   E->setup( matrix_size, matrix_size_os, kernel_width );
-  E->set_dcw(dcw);
+
 
   // Define rhs buffer
   //
@@ -199,6 +199,8 @@ int main(int argc, char** argv)
   vector<size_t> data_dims;
   data_dims.push_back(samples_per_reconstruction); data_dims.push_back(num_coils);
 
+  sqrt_inplace(dcw.get());
+  E->set_dcw(dcw);
   E->set_domain_dimensions(recon_dims.get());
   E->set_codomain_dimensions(&data_dims);
 
@@ -225,7 +227,7 @@ int main(int argc, char** argv)
   solver.set_preconditioner ( D );
   solver.set_max_iterations( num_iterations );
   solver.set_output_mode( cuGpBbSolver<_complext>::OUTPUT_VERBOSE );
-  solver.set_x0( reg_image );
+//  solver.set_x0( reg_image );
 
   unsigned int num_reconstructions = num_profiles / profiles_per_reconstruction;
 
@@ -252,7 +254,7 @@ int main(int argc, char** argv)
     //
     // Split-Bregman solver
     //
-
+    *data *= *dcw;
     boost::shared_ptr< cuNDArray<_complext> > solve_result;
     {
       GPUTimer timer("GPU constrained Split Bregman solve");
diff --git a/apps/standalone/gpu/mri/sense/noncartesian/radial/2d_golden_ratio/main_nlcg.cpp b/apps/standalone/gpu/mri/sense/noncartesian/radial/2d_golden_ratio/main_nlcg.cpp
new file mode 100644
index 0000000..c441410
--- /dev/null
+++ b/apps/standalone/gpu/mri/sense/noncartesian/radial/2d_golden_ratio/main_nlcg.cpp
@@ -0,0 +1,341 @@
+// Gadgetron includes
+#include "hoNDArray_fileio.h"
+#include "cuNDArray_elemwise.h"
+#include "cuNDArray_utils.h"
+#include "cuNDArray_reductions.h"
+#include "radial_utilities.h"
+#include "cuNonCartesianSenseOperator.h"
+#include "cuSenseBuffer.h"
+#include "cuCgPreconditioner.h"
+#include "cuPartialDerivativeOperator.h"
+#include "cuNlcgSolver.h"
+#include "cuTvOperator.h"
+#include "cuTvPicsOperator.h"
+#include "b1_map.h"
+#include "GPUTimer.h"
+#include "parameterparser.h"
+
+// Std includes
+#include <iostream>
+
+using namespace std;
+using namespace Gadgetron;
+
+// Define desired precision
+typedef float _real;
+typedef complext<_real> _complext;
+typedef reald<_real,2>::Type _reald2;
+
+// Upload samples for one reconstruction from host to device
+boost::shared_ptr< cuNDArray<_complext> >
+upload_data( unsigned int reconstruction, unsigned int samples_per_reconstruction, unsigned int total_samples_per_coil, unsigned int num_coils, hoNDArray<_complext> *host_data )
+{
+	vector<size_t> dims; dims.push_back(samples_per_reconstruction); dims.push_back(num_coils);
+	cuNDArray<_complext> *data = new cuNDArray<_complext>(); data->create( &dims );
+	for( unsigned int i=0; i<num_coils; i++ )
+		cudaMemcpy( data->get_data_ptr()+i*samples_per_reconstruction,
+				host_data->get_data_ptr()+i*total_samples_per_coil+reconstruction*samples_per_reconstruction,
+				samples_per_reconstruction*sizeof(_complext), cudaMemcpyHostToDevice );
+
+	return boost::shared_ptr< cuNDArray<_complext> >(data);
+}
+
+int main(int argc, char** argv)
+{
+	//
+	// Parse command line
+	//
+
+	ParameterParser parms;
+	parms.add_parameter( 'd', COMMAND_LINE_STRING, 1, "Sample data file name", true );
+	parms.add_parameter( 'r', COMMAND_LINE_STRING, 1, "Result file name", true, "result.cplx" );
+	parms.add_parameter( 'm', COMMAND_LINE_INT,    1, "Matrix size", true );
+	parms.add_parameter( 'o', COMMAND_LINE_INT,    1, "Oversampled matrix size", true );
+	parms.add_parameter( 'p', COMMAND_LINE_INT,    1, "Profiles per frame", true );
+	parms.add_parameter( 'f', COMMAND_LINE_INT,    1, "Frames per reconstruction (negative meaning all)", true, "-1" );
+	parms.add_parameter( 'i', COMMAND_LINE_INT,    1, "Number of iterations", true, "10" );
+	parms.add_parameter( 'k', COMMAND_LINE_FLOAT,  1, "Kernel width", true, "5.5" );
+	parms.add_parameter( 'L', COMMAND_LINE_FLOAT,  1, "Lambda", true, "2e-7" );
+	parms.add_parameter( 'A', COMMAND_LINE_FLOAT,  1, "Alpha in [0;1] (for PICS)", true, "0.5" );
+
+	parms.parse_parameter_list(argc, argv);
+	if( parms.all_required_parameters_set() ){
+		cout << " Running reconstruction with the following parameters: " << endl;
+		parms.print_parameter_list();
+	}
+	else{
+		cout << " Some required parameters are missing: " << endl;
+		parms.print_parameter_list();
+		parms.print_usage();
+		return 1;
+	}
+
+	GPUTimer *timer;
+
+	// Load sample data from disk
+	timer = new GPUTimer("\nLoading data");
+	boost::shared_ptr< hoNDArray<_complext> > host_data = read_nd_array<_complext>((char*)parms.get_parameter('d')->get_string_value());
+	delete timer;
+
+	if( !(host_data->get_number_of_dimensions() == 3) ){
+		cout << endl << "Input data is not three-dimensional (#samples/profile x #profiles x #coils). Quitting!\n" << endl;
+		return 1;
+	}
+
+	// Configuration from the host data
+	unsigned int samples_per_profile = host_data->get_size(0);
+	unsigned int num_profiles = host_data->get_size(1);
+	unsigned int num_coils = host_data->get_size(2);
+
+	// Configuration from the command line
+	uint64d2 matrix_size = uint64d2(parms.get_parameter('m')->get_int_value(), parms.get_parameter('m')->get_int_value());
+	uint64d2 matrix_size_os = uint64d2(parms.get_parameter('o')->get_int_value(), parms.get_parameter('o')->get_int_value());
+	_real kernel_width = parms.get_parameter('k')->get_float_value();
+	unsigned int num_iterations = parms.get_parameter('i')->get_int_value();
+
+	unsigned int profiles_per_frame = parms.get_parameter('p')->get_int_value();
+	unsigned int frames_per_reconstruction = parms.get_parameter('f')->get_int_value();
+
+	_real lambda = (_real) parms.get_parameter('L')->get_float_value();
+	_real alpha = (_real) parms.get_parameter('A')->get_float_value();
+
+	if( alpha>1 ) alpha = 1;
+	if( alpha<0 ) alpha = 0;
+
+	// Silent correction of invalid command line parameters (clamp to valid range)
+	if( profiles_per_frame > num_profiles ) profiles_per_frame = num_profiles;
+	if( frames_per_reconstruction < 0 ) frames_per_reconstruction = num_profiles / profiles_per_frame;
+	if( frames_per_reconstruction*profiles_per_frame > num_profiles ) frames_per_reconstruction = num_profiles / profiles_per_frame;
+
+	unsigned int profiles_per_reconstruction = frames_per_reconstruction*profiles_per_frame;
+	unsigned int samples_per_frame = profiles_per_frame*samples_per_profile;
+	unsigned int samples_per_reconstruction = profiles_per_reconstruction*samples_per_profile;
+
+	cout << endl << "#samples/profile: " << samples_per_profile;
+	cout << endl << "#profiles/frame: " << profiles_per_frame;
+	cout << endl << "#profiles: " << num_profiles;
+	cout << endl << "#coils: " << num_coils;
+	cout << endl << "#frames/reconstruction " << frames_per_reconstruction;
+	cout << endl << "#profiles/reconstruction " << profiles_per_reconstruction;
+	cout << endl << "#samples/reconstruction " << samples_per_reconstruction << endl << endl;
+
+	// Density compensation weights are constant throughout all reconstrutions
+	boost::shared_ptr< cuNDArray<_real> > dcw = compute_radial_dcw_golden_ratio_2d
+			( samples_per_profile, profiles_per_frame, (_real)matrix_size_os[0]/(_real)matrix_size[0],
+					_real(1)/((_real)samples_per_profile/(_real)max(matrix_size[0],matrix_size[1])) );
+	// Define rhs buffer
+	//
+
+	boost::shared_ptr< cuSenseBuffer<_real,2> > rhs_buffer( new cuSenseBuffer<_real,2>() );
+
+	rhs_buffer->setup( matrix_size, matrix_size_os, kernel_width, num_coils, 8, 16 );
+	rhs_buffer->set_dcw(dcw);
+
+	//
+	// Compute CSM using accumulation in the rhs buffer
+	//
+
+	timer = new GPUTimer("CSM and regularization estimation");
+
+	// Go through all the data...
+	for( unsigned int iteration = 0; iteration < num_profiles/profiles_per_frame; iteration++ ) {
+
+		// Define trajectories
+		boost::shared_ptr< cuNDArray<_reald2> > traj = compute_radial_trajectory_golden_ratio_2d<_real>
+		( samples_per_profile, profiles_per_frame, 1, iteration*profiles_per_frame );
+
+		// Upload data
+		boost::shared_ptr< cuNDArray<_complext> > csm_data = upload_data
+				( iteration, samples_per_frame, num_profiles*samples_per_profile, num_coils, host_data.get() );
+
+		// Add frame to rhs buffer
+		rhs_buffer->add_frame_data( csm_data.get(), traj.get() );
+	}
+
+	// Estimate csm
+	boost::shared_ptr< cuNDArray<_complext> > acc_images = rhs_buffer->get_accumulated_coil_images();
+	*acc_images *= rhs_buffer->get_normalization_factor();
+	boost::shared_ptr< cuNDArray<_complext> > csm = estimate_b1_map<_real,2>( acc_images.get() );
+
+
+	// Define encoding matrix for non-Cartesian SENSE
+	boost::shared_ptr< cuNonCartesianSenseOperator<_real,2> > E( new cuNonCartesianSenseOperator<_real,2>() );
+	E->setup( matrix_size, matrix_size_os, kernel_width );
+
+
+
+	E->set_csm(csm);
+
+	std::vector<size_t> reg_dims = to_std_vector(matrix_size);
+	cuNDArray<_complext> _reg_image = cuNDArray<_complext>(&reg_dims);
+	E->mult_csm_conj_sum( acc_images.get(), &_reg_image );
+
+	// Duplicate the regularization image to 'frames_per_reconstruction' frames
+	boost::shared_ptr<cuNDArray<_complext> > reg_image = expand( &_reg_image, frames_per_reconstruction );
+
+	acc_images.reset();
+
+	// Define preconditioning weights
+	//
+
+	boost::shared_ptr< cuNDArray<_real> > _precon_weights = sum(abs_square(csm.get()).get(),2);
+	reciprocal_sqrt_inplace(_precon_weights.get());
+	boost::shared_ptr< cuNDArray<_complext> > precon_weights = real_to_complex<_complext>( _precon_weights.get() );
+	_precon_weights.reset();
+
+	// Define preconditioning matrix
+	boost::shared_ptr< cuCgPreconditioner<_complext> > D( new cuCgPreconditioner<_complext>() );
+	D->set_weights( precon_weights );
+	//precon_weights.reset();
+	csm.reset();
+
+	boost::shared_ptr< std::vector<size_t> > recon_dims( new std::vector<size_t> );
+	*recon_dims = to_std_vector(matrix_size); recon_dims->push_back(frames_per_reconstruction);
+
+	delete timer;
+
+	//
+	// Setup radial SENSE reconstructions
+	//
+
+	vector<size_t> data_dims;
+	data_dims.push_back(samples_per_reconstruction); data_dims.push_back(num_coils);
+
+	E->set_domain_dimensions(recon_dims.get());
+	E->set_codomain_dimensions(&data_dims);
+
+	// Setup split-Bregman solver
+	cuNlcgSolver<_complext> solver;
+
+	// Define regularization operators
+	// We need "a pair" for PICCS
+	//
+
+	boost::shared_ptr< cuPartialDerivativeOperator<_complext,3> >
+	Rx( new cuPartialDerivativeOperator<_complext,3>(0) );
+	Rx->set_weight( (1.0f-alpha)*lambda );
+	Rx->set_domain_dimensions(recon_dims.get());
+	Rx->set_codomain_dimensions(recon_dims.get());
+
+	boost::shared_ptr< cuPartialDerivativeOperator<_complext,3> >
+	Ry( new cuPartialDerivativeOperator<_complext,3>(1) );
+	Ry->set_weight( (1.0f-alpha)*lambda );
+	Ry->set_domain_dimensions(recon_dims.get());
+	Ry->set_codomain_dimensions(recon_dims.get());
+
+	boost::shared_ptr< cuPartialDerivativeOperator<_complext,3> >
+	Rz( new cuPartialDerivativeOperator<_complext,3>(2) );
+	Rz->set_weight( (1.0f-alpha)*lambda );
+	Rz->set_domain_dimensions(recon_dims.get());
+	Rz->set_codomain_dimensions(recon_dims.get());
+
+	boost::shared_ptr< cuPartialDerivativeOperator<_complext,3> >
+	Rx2( new cuPartialDerivativeOperator<_complext,3>(0) );
+	Rx2->set_weight( alpha*lambda );
+	Rx2->set_domain_dimensions(recon_dims.get());
+	Rx2->set_codomain_dimensions(recon_dims.get());
+
+	boost::shared_ptr< cuPartialDerivativeOperator<_complext,3> >
+	Ry2( new cuPartialDerivativeOperator<_complext,3>(1) );
+	Ry2->set_weight( alpha*lambda );
+	Ry2->set_domain_dimensions(recon_dims.get());
+	Ry2->set_codomain_dimensions(recon_dims.get());
+
+	boost::shared_ptr< cuPartialDerivativeOperator<_complext,3> >
+	Rz2( new cuPartialDerivativeOperator<_complext,3>(2) );
+	Rz2->set_weight( alpha*lambda );
+	Rz2->set_domain_dimensions(recon_dims.get());
+	Rz2->set_codomain_dimensions(recon_dims.get());
+
+
+	// Add "TV" regularization
+	if( (alpha<1.0f) && (lambda>0.0f)){
+		boost::shared_ptr<cuTvOperator<_complext,3> > TV(new cuTvOperator<_complext,3>);
+		TV->set_weight(lambda*(1.0f-alpha));
+		solver.add_nonlinear_operator(TV);
+		/*solver.add_regularization_group_operator(Rx);
+		solver.add_regularization_group_operator(Ry);
+		solver.add_regularization_group_operator(Rz);
+		solver.add_group(1);*/
+		std::cout << "Total variation in use " << std::endl;
+	}
+
+	// Add "PICS" regularization
+	boost::shared_ptr<cuTvPicsOperator<_complext,3> > PICS;
+	if( (alpha>0.0f) && (lambda>0.0f)){
+		PICS = boost::shared_ptr<cuTvPicsOperator<_complext,3> >(new cuTvPicsOperator<_complext,3>);
+		PICS->set_weight(lambda*alpha);
+		PICS->set_prior(reg_image);
+		solver.add_nonlinear_operator(PICS);
+		/*
+		solver.add_regularization_group_operator(Rx2);
+		solver.add_regularization_group_operator(Ry2);
+		solver.add_regularization_group_operator(Rz2);
+		solver.add_group(reg_image,1);*/
+		std::cout << "PICS in use " << std::endl;
+	}
+
+	sqrt_inplace(dcw.get());
+	E->set_dcw(dcw);
+	solver.set_encoding_operator( E );
+	solver.set_preconditioner ( D );
+	solver.set_max_iterations( num_iterations );
+	solver.set_output_mode( cuNlcgSolver<_complext>::OUTPUT_VERBOSE );
+	//solver.set_x0( reg_image );
+
+	unsigned int num_reconstructions = num_profiles / profiles_per_reconstruction;
+
+	// Allocate space for result
+	std::vector<size_t> res_dims = to_std_vector(matrix_size);
+	res_dims.push_back(frames_per_reconstruction*num_reconstructions);
+	cuNDArray<_complext> result = cuNDArray<_complext>(&res_dims);
+
+	timer = new GPUTimer("Full SENSE reconstruction with TV regularization.");
+
+	for( unsigned int reconstruction = 0; reconstruction<num_reconstructions; reconstruction++ ){
+
+		// Determine trajectories
+		boost::shared_ptr< cuNDArray<_reald2> > traj = compute_radial_trajectory_golden_ratio_2d<_real>
+		( samples_per_profile, profiles_per_frame, frames_per_reconstruction, reconstruction*profiles_per_reconstruction );
+
+		// Upload data
+		boost::shared_ptr< cuNDArray<_complext> > data = upload_data
+				( reconstruction, samples_per_reconstruction, num_profiles*samples_per_profile, num_coils, host_data.get() );
+
+		*data *= *dcw;
+		// Set current trajectory and trigger NFFT preprocessing
+		E->preprocess(traj.get());
+
+		//
+		// Split-Bregman solver
+		//
+
+		boost::shared_ptr< cuNDArray<_complext> > solve_result;
+		{
+			GPUTimer timer("GPU constrained Split Bregman solve");
+			solve_result = solver.solve(data.get());
+		}
+
+		vector<size_t> tmp_dims = to_std_vector(matrix_size); tmp_dims.push_back(frames_per_reconstruction);
+		cuNDArray<_complext> tmp(&tmp_dims, result.get_data_ptr()+reconstruction*prod(matrix_size)*frames_per_reconstruction );
+
+		// Copy sbresult to result (pointed to by tmp)
+		tmp = *solve_result;
+	}
+
+	delete timer;
+
+	// All done, write out the result
+
+	timer = new GPUTimer("Writing out result");
+
+	boost::shared_ptr< hoNDArray<_complext> > host_result = result.to_host();
+	write_nd_array<_complext>(host_result.get(), (char*)parms.get_parameter('r')->get_string_value());
+
+	boost::shared_ptr< hoNDArray<_real> > host_norm = abs(&result)->to_host();
+	write_nd_array<_real>( host_norm.get(), "result.real" );
+
+	delete timer;
+
+	return 0;
+}
diff --git a/apps/standalone/gpu/mri/sense/noncartesian/radial/2d_golden_ratio/main_sbc.cpp b/apps/standalone/gpu/mri/sense/noncartesian/radial/2d_golden_ratio/main_sbc.cpp
index 6a7b229..21fec1f 100644
--- a/apps/standalone/gpu/mri/sense/noncartesian/radial/2d_golden_ratio/main_sbc.cpp
+++ b/apps/standalone/gpu/mri/sense/noncartesian/radial/2d_golden_ratio/main_sbc.cpp
@@ -131,7 +131,7 @@ int main(int argc, char** argv)
   boost::shared_ptr< cuNonCartesianSenseOperator<_real,2> > E( new cuNonCartesianSenseOperator<_real,2>() );  
   E->set_weight( mu );
   E->setup( matrix_size, matrix_size_os, kernel_width );
-  E->set_dcw(dcw);
+
 
   // Define rhs buffer
   //
@@ -246,6 +246,8 @@ int main(int argc, char** argv)
   E->set_domain_dimensions(recon_dims.get());
   E->set_codomain_dimensions(&data_dims);
 
+  sqrt_inplace(dcw.get());
+  E->set_dcw(dcw);
   // Setup split-Bregman solver
   cuSbcCgSolver<_complext> sb;
   sb.set_encoding_operator( E );
@@ -297,6 +299,7 @@ int main(int argc, char** argv)
     // Set current trajectory and trigger NFFT preprocessing
     E->preprocess(traj.get());
         
+    *data *= *dcw;
     //
     // Split-Bregman solver
     //
diff --git a/apps/standalone/gpu/mri/sense/noncartesian/radial/2d_golden_ratio_gui/CMakeLists.txt b/apps/standalone/gpu/mri/sense/noncartesian/radial/2d_golden_ratio_gui/CMakeLists.txt
index 70c502d..afb4616 100644
--- a/apps/standalone/gpu/mri/sense/noncartesian/radial/2d_golden_ratio_gui/CMakeLists.txt
+++ b/apps/standalone/gpu/mri/sense/noncartesian/radial/2d_golden_ratio_gui/CMakeLists.txt
@@ -17,15 +17,15 @@ qt4_wrap_ui( UI_HEADERS ${UI_UIFILES} )
 set(UI_MOC_HEADERS radialSenseAppMainWidget.h reconWidget.h GLReconWidget.h)
 qt4_wrap_cpp (UI_MOC_OUTFILES ${UI_MOC_HEADERS})
 
-add_executable(radial_sense_gr_gui main.cpp ${UI_MOC_OUTFILES}
+add_executable(sense_2d_golden_radial_gui main.cpp ${UI_MOC_OUTFILES}
 radialSenseAppMainWidget.cpp reconWidget.cpp GLReconWidget.cpp ${UI_HEADERS} )
 
-target_link_libraries(radial_sense_gr_gui gpucore gpuparallelmri
-gpunfft hostutils gpusolvers gpuoperators ${CUDA_LIBRARIES} ${QT_QTGUI_LIBRARY} ${GLEW_LIBRARY}
+target_link_libraries(sense_2d_golden_radial_gui gadgetron_toolbox_gpucore gadgetron_toolbox_gpuparallelmri
+gadgetron_toolbox_gpunfft gadgetron_toolbox_hostutils gadgetron_toolbox_gpusolvers gpuoperators ${CUDA_LIBRARIES} ${QT_QTGUI_LIBRARY} ${GLEW_LIBRARY}
 ${QT_QTCORE_LIBRARY} ${QT_QTOPENGL_LIBRARY} ${OPENGL_gl_LIBRARY} )
 
 if (WIN32)
-set_target_properties( radial_sense_gr_gui PROPERTIES LINK_FLAGS "/FORCE:MULTIPLE") 
+set_target_properties( sense_2d_golden_radial_gui PROPERTIES LINK_FLAGS "/FORCE:MULTIPLE") 
 endif (WIN32)
 
-install(TARGETS radial_sense_gr_gui DESTINATION bin)
+install(TARGETS sense_2d_golden_radial_gui DESTINATION bin COMPONENT main)
diff --git a/apps/standalone/gpu/mri/sense/noncartesian/radial/2d_golden_ratio_kt/CMakeLists.txt b/apps/standalone/gpu/mri/sense/noncartesian/radial/2d_golden_ratio_kt/CMakeLists.txt
index e780877..5826cb5 100644
--- a/apps/standalone/gpu/mri/sense/noncartesian/radial/2d_golden_ratio_kt/CMakeLists.txt
+++ b/apps/standalone/gpu/mri/sense/noncartesian/radial/2d_golden_ratio_kt/CMakeLists.txt
@@ -1,5 +1,5 @@
-add_executable(radial_ktsense  main.cpp)
+add_executable(ktsense_radial_2d  main.cpp)
 
-target_link_libraries(radial_ktsense gpucore gpuparallelmri gpuoperators gpunfft hostutils gpusolvers ${CUDA_LIBRARIES})
+target_link_libraries(ktsense_radial_2d gadgetron_toolbox_gpucore gadgetron_toolbox_gpuparallelmri gadgetron_toolbox_gpuoperators gadgetron_toolbox_gpunfft gadgetron_toolbox_hostutils gadgetron_toolbox_gpusolvers ${CUDA_LIBRARIES})
 
-install(TARGETS radial_ktsense DESTINATION bin)
+install(TARGETS ktsense_radial_2d DESTINATION bin COMPONENT main)
diff --git a/apps/standalone/gpu/mri/sense/noncartesian/radial/CMakeLists.txt b/apps/standalone/gpu/mri/sense/noncartesian/radial/CMakeLists.txt
index 6add522..c5de847 100644
--- a/apps/standalone/gpu/mri/sense/noncartesian/radial/CMakeLists.txt
+++ b/apps/standalone/gpu/mri/sense/noncartesian/radial/CMakeLists.txt
@@ -1,6 +1,6 @@
 include_directories(
   ${CMAKE_SOURCE_DIR}/toolboxes/pmri/gpu
-  )
+)
 
 add_subdirectory(2d_golden_ratio)
 add_subdirectory(2d_golden_ratio_kt)
diff --git a/apps/standalone/gpu/registration/2d/CMakeLists.txt b/apps/standalone/gpu/registration/2d/CMakeLists.txt
index a0b8189..9d33eb7 100644
--- a/apps/standalone/gpu/registration/2d/CMakeLists.txt
+++ b/apps/standalone/gpu/registration/2d/CMakeLists.txt
@@ -1,47 +1,47 @@
 add_executable(register_HS_2d_gpu register_HS_2d.cpp)
 add_executable(register_CGHS_2d_gpu register_CGHS_2d.cpp)
 add_executable(register_CK_2d_gpu register_CK_2d.cpp)
-add_executable(test_reg_sense_recon test_reg_sense_recon.cpp)
+#add_executable(test_reg_sense_recon test_reg_sense_recon.cpp)
 
 include_directories(
   ${CMAKE_SOURCE_DIR}/toolboxes/mri/pmri/gpu
 )
 
 target_link_libraries(register_HS_2d_gpu 
-  hostutils 
-  gpureg 
-  gpucore 
-  gpuoperators
-  gpusolvers 
+  gadgetron_toolbox_hostutils 
+  gadgetron_toolbox_gpureg 
+  gadgetron_toolbox_gpucore 
+  gadgetron_toolbox_gpuoperators
+  gadgetron_toolbox_gpusolvers 
   ${CUDA_LIBRARIES}
   )
 
 target_link_libraries(register_CK_2d_gpu
-  hostutils 
-  gpureg 
-  gpucore 
-  gpuoperators
-  gpusolvers 
+  gadgetron_toolbox_hostutils 
+  gadgetron_toolbox_gpureg 
+  gadgetron_toolbox_gpucore 
+  gadgetron_toolbox_gpuoperators
+  gadgetron_toolbox_gpusolvers 
   ${CUDA_LIBRARIES}
   )
 
-target_link_libraries(test_reg_sense_recon 
-  hostutils 
-  gpureg 
-  gpucore 
-  gpuoperators
-  gpusolvers 
-  gpunfft 
-  gpuparallelmri 
-  ${CUDA_LIBRARIES}
-  )
+#target_link_libraries(test_reg_sense_recon 
+#  hostutils 
+#  gpureg 
+#  gpucore 
+#  gpuoperators
+#  gpusolvers 
+#  gpunfft 
+#  gpuparallelmri 
+#  ${CUDA_LIBRARIES}
+#  )
   
   target_link_libraries(register_CGHS_2d_gpu 
-  hostutils 
-  gpureg 
-  gpucore 
-  gpuoperators
-  gpusolvers 
+  gadgetron_toolbox_hostutils 
+  gadgetron_toolbox_gpureg 
+  gadgetron_toolbox_gpucore 
+  gadgetron_toolbox_gpuoperators
+  gadgetron_toolbox_gpusolvers 
   ${CUDA_LIBRARIES}
   )
 
@@ -49,4 +49,4 @@ install(TARGETS
   register_HS_2d_gpu
   register_CGHS_2d_gpu  
   register_CK_2d_gpu 
-  DESTINATION bin)
+  DESTINATION bin COMPONENT main)
diff --git a/apps/standalone/gpu/registration/3d/CMakeLists.txt b/apps/standalone/gpu/registration/3d/CMakeLists.txt
index 78abd8c..3980478 100644
--- a/apps/standalone/gpu/registration/3d/CMakeLists.txt
+++ b/apps/standalone/gpu/registration/3d/CMakeLists.txt
@@ -1,12 +1,12 @@
 add_executable(register_CK_3d_gpu register_CK_3d.cpp)
 
 target_link_libraries(register_CK_3d_gpu
-  hostutils 
-  gpureg 
-  gpucore 
-  gpuoperators
-  gpusolvers 
+  gadgetron_toolbox_hostutils 
+  gadgetron_toolbox_gpureg 
+  gadgetron_toolbox_gpucore 
+  gadgetron_toolbox_gpuoperators
+  gadgetron_toolbox_gpusolvers 
   ${CUDA_LIBRARIES}
   )
 
-install(TARGETS register_CK_3d_gpu DESTINATION bin)
+install(TARGETS register_CK_3d_gpu DESTINATION bin COMPONENT main)
diff --git a/chroot/CMakeLists.txt b/chroot/CMakeLists.txt
new file mode 100644
index 0000000..f849163
--- /dev/null
+++ b/chroot/CMakeLists.txt
@@ -0,0 +1,76 @@
+configure_file("start-gadgetron.sh.in" ${CMAKE_CURRENT_BINARY_DIR}/start-gadgetron.sh @ONLY)
+configure_file("enter-chroot-env.sh.in" ${CMAKE_CURRENT_BINARY_DIR}/enter-chroot-env.sh @ONLY)
+
+configure_file("gadgetron-dependency-query.sh.in" ${CMAKE_CURRENT_BINARY_DIR}/gadgetron-dependency-query.sh @ONLY)
+configure_file("siemens_to_ismrmrd.sh.in" ${CMAKE_CURRENT_BINARY_DIR}/siemens_to_ismrmrd.sh @ONLY)
+configure_file("gadgetron_ismrmrd_client.sh.in" ${CMAKE_CURRENT_BINARY_DIR}/gadgetron_ismrmrd_client.sh @ONLY)
+
+configure_file("${CMAKE_SOURCE_DIR}/apps/gadgetron/webapp/gadgetron_web_app.in" ${CMAKE_CURRENT_BINARY_DIR}/gadgetron_web_app.cfg @ONLY)
+configure_file("${CMAKE_SOURCE_DIR}/apps/gadgetron/webapp/gadgetron_web.conf.in" ${CMAKE_CURRENT_BINARY_DIR}/gadgetron_web.conf @ONLY)
+configure_file("${CMAKE_SOURCE_DIR}/apps/gadgetron/webapp/gadgetron_web_ld.conf.in" ${CMAKE_CURRENT_BINARY_DIR}/gadgetron_web_ld.conf @ONLY)
+
+configure_file("run-webapp.sh.in" ${CMAKE_CURRENT_BINARY_DIR}/run-webapp.sh @ONLY)
+
+configure_file("copy-cuda-lib.sh.in" ${CMAKE_CURRENT_BINARY_DIR}/copy-cuda-lib.sh @ONLY)
+
+set (LIBRARY_PATHS ${ISMRMRD_LIB_DIR})
+set (LIBRARY_PATHS ${LIBRARY_PATHS}:${MKL_LIB_DIR})
+
+FIND_LIBRARY( CUDA_LIBRARY NAMES libcuda.so )
+
+string(FIND "${CUDA_LIBRARIES}" "libcudart.so" LIBCUDART_LOC)
+string(FIND "${CUDA_LIBRARIES}" "libcuda.so" LIBCUDA_LOC)
+
+if (LIBCUDA_LOC EQUAL -1)
+    get_filename_component(CUDART_LIB_DIR ${CUDA_LIBRARIES} PATH)
+else (LIBCUDA_LOC EQUAL -1)
+    if(LIBCUDART_LOC LESS LIBCUDA_LOC)
+        string(SUBSTRING "${CUDA_LIBRARIES}" 0 ${LIBCUDART_LOC} CUDART_LIB_DIR)
+    else(LIBCUDART_LOC LESS LIBCUDA_LOC)
+        string(SUBSTRING "${CUDA_LIBRARIES}" ${LIBCUDA_LOC} -1 CUDART_LIB_TEMP)
+        string(SUBSTRING "${CUDART_LIB_TEMP}" 10 -1 CUDART_LIB_TEMP2)
+        get_filename_component(CUDART_LIB_DIR ${CUDART_LIB_TEMP2} PATH)
+    endif(LIBCUDART_LOC LESS LIBCUDA_LOC)
+endif (LIBCUDA_LOC EQUAL -1)
+
+get_filename_component(CUDA_LIB_DIR ${CUDA_LIBRARY} PATH)
+get_filename_component(CUDAFFT_LIB_DIR ${CUDA_CUFFT_LIBRARIES} PATH)
+get_filename_component(CUDABLAS_LIB_DIR ${CUDA_CUBLAS_LIBRARIES} PATH)
+
+set (LIBRARY_PATHS ${LIBRARY_PATHS}:${CUDART_LIB_DIR}:${CUDA_LIB_DIR}:${CUDAFFT_LIB_DIR}:${CUDABLAS_LIB_DIR})
+
+configure_file("copy-cuda-lib.sh.in" ${CMAKE_CURRENT_BINARY_DIR}/copy-cuda-lib.sh @ONLY)
+
+find_program(SIEMENS_TO_ISMRMRD_EXE siemens_to_ismrmrd)
+
+add_custom_target(chroot)
+if (SIEMENS_TO_ISMRMRD_EXE)
+  add_custom_command(
+    TARGET chroot
+    POST_BUILD
+    VERBATIM
+    COMMENT "Creating chroot tar file"
+    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+    COMMAND sudo bash create_chroot.sh ${CMAKE_INSTALL_PREFIX}
+    ${CMAKE_BINARY_DIR}
+    ${CMAKE_SOURCE_DIR}
+    ${GADGETRON_GIT_SHA1_HASH}
+    ${LIBRARY_PATHS}
+    ${CUDA_LIBRARY}
+    ${SIEMENS_TO_ISMRMRD_EXE}
+    )
+else(SIEMENS_TO_ISMRMRD_EXE)
+  add_custom_command(
+    TARGET chroot
+    POST_BUILD
+    VERBATIM
+    COMMENT "Creating chroot tar file"
+    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+    COMMAND sudo bash create_chroot.sh ${CMAKE_INSTALL_PREFIX}
+    ${CMAKE_BINARY_DIR}
+    ${CMAKE_SOURCE_DIR}
+    ${GADGETRON_GIT_SHA1_HASH}
+    ${LIBRARY_PATHS}
+    ${CUDA_LIBRARY}
+    )  
+endif(SIEMENS_TO_ISMRMRD_EXE)
diff --git a/chroot/README.rst b/chroot/README.rst
new file mode 100644
index 0000000..2053222
--- /dev/null
+++ b/chroot/README.rst
@@ -0,0 +1,35 @@
+Running Gadgetron in chroot
+============================
+
+
+Introduction
+------------
+
+
+Creating a chroot environment
+-----------------------------
+
+First we need to install the tools required to create chroot environments::
+
+  sudo apt-get install dchroot debootstrap
+
+Next we need to add an appropriate configuration to `/etc/schroot/schroot.conf'::
+  
+  [trusty]
+  description=trusty
+  location=/var/chroot/trusty
+  priority=3
+  users=doko
+  groups=sbuild
+  root-groups=root
+
+Create the folder where we will be making the root file system::
+  
+  sudo mkdir -p /var/chroot/trusty
+
+
+Now generate a basic root file system::
+
+  sudo debootstrap --variant=buildd --arch amd64 trusty /var/chroot/trusty http://archive.ubuntu.com/ubuntu/
+
+
diff --git a/chroot/chroot-manual.txt b/chroot/chroot-manual.txt
new file mode 100644
index 0000000..3aa7b67
--- /dev/null
+++ b/chroot/chroot-manual.txt
@@ -0,0 +1,156 @@
+---INSTALLING GADGETRON Ubuntu 14.04---
+
+
+
+*** Dependencies ***
+
+sudo apt-get install build-essential git-core cmake gcc-multilib libace-dev libarmadillo-dev libboost-all-dev libfftw3-dev libgtest-dev libxslt-dev xsltproc libhdf5-serial-dev h5utils hdf5-tools libxml2-dev python-dev python-numpy python-libxml2 python-psutil python-h5py python-scipy python-twisted python-matplotlib dcmtk git doxygen libqt4-dev libglew1.6-dev docbook5-xml docbook-xsl-doc-pdf docbook-xsl-doc-html docbook-xsl-ns fop freeglut3-dev libxi-dev liblapack-dev
+
+
+
+*** Steps to install CUDA 6.5 ***
+
+1. sudo apt-get install build-essential
+
+2. In order to be able to install nvidia driver (http://stackoverflow.com/questions/25463952/drm-ko-missing-for-cuda-6-5-ubuntu-14-04-aws-ec2-gpu-instance-g2-2xlarge)
+sudo apt-get install linux-image-extra-virtual
+
+3. Follow the instructions from: http://docs.nvidia.com/cuda/cuda-getting-started-guide-for-linux/index.html#ubuntu-installation or apply steps 1-11 from bellow:
+
+    1. cd && wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/cuda-repo-ubuntu1404_6.5-14_amd64.deb
+    2. sudo dpkg -i cuda-repo-ubuntu1404_6.5-14_amd64.deb
+    3. rm cuda-repo-ubuntu1404_6.5-14_amd64.deb
+    4. sudo apt-get update
+    5. sudo apt-get -y install cuda
+    6. echo "" >> ~/.bashrc
+    7. echo "export PATH=/usr/local/cuda-6.5/bin:$PATH" >> ~/.bashrc
+    8. echo "export LD_LIBRARY_PATH=/usr/local/cuda-6.5/lib64:$LD_LIBRARY_PATH" >> ~/.bashrc
+ 
+    ## You should probably restart the system here
+    9. sudo shutdown -r now
+ 
+    ## Lets make the deviceQuery sample.  This is used to verify cuda works
+    10. cd /usr/local/cuda/samples/1_Utilities/deviceQuery && sudo make
+ 
+    # If you see all of your cards listed, and the last line says "Result = PASS" you're good to go!
+    11. /usr/local/cuda/samples/1_Utilities/deviceQuery/deviceQuery
+
+4. Check driver details: nvidia-smi
+
+
+
+*** Steps to install MKL ***
+
+If you would like to use MKL (Intel Math Kernel Library), please download your installation file from Intel and do the installation. 
+Here is what we did with MKL version 11.0.5.192:
+
+tar -xzvf l_mkl_11.0.5.192_intel64.tgz 
+cd l_mkl_11.0.5.192_intel64/
+sudo ./install.sh
+
+Follow the instructions and add the following paths to your ~/.bashrc
+echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/intel/mkl/lib/intel64:/opt/intel/lib/intel64" >> ~/.bashrc
+
+You will also need to install Armadillo from source, skip the libarmadillo-dev package above. 
+Download latest armadillo from http://arma.sourceforge.net and unpack, build and install. 
+If you have MKL installed, this will pick it up and build the armadillo libs against it.
+
+cd armadillo-3.900.6/
+cmake
+make; sudo make install
+
+
+
+*** Extras ***
+
+Usefull links (but not 100% acurate):
+http://docs.nvidia.com/cuda/cuda-getting-started-guide-for-linux/index.html#ubuntu-installation
+https://gist.github.com/zcshiner/4b32980792d367222304
+http://www.r-tutor.com/gpu-computing/cuda-installation/cuda6.5-ubuntu
+http://askubuntu.com/questions/451672/installing-and-testing-cuda-in-ubuntu-14-04
+
+MH Usefull links: 
+https://sourceforge.net/p/gadgetron/discussion/general/thread/d0ee5b27/
+https://sourceforge.net/p/gadgetron/home/Linux%20Installation/
+
+
+------------------------------------------------------------------------------------------
+------------------------------------------------------------------------------------------
+------------------------------------------------------------------------------------------
+
+
+---CHROOT---
+
+
+1. apt-get update
+
+2. apt-get install debootstrap -y
+
+3. sudo debootstrap --variant=buildd --arch amd64 trusty /opt/chroot/trusty http://gb.archive.ubuntu.com/ubuntu/
+
+4. Place this script to /opt/chroot/chrootmounter.sh
+------------------------------------------------------------------------------------------
+#!/bin/bash
+  
+if [ $# -eq 2 ]; then
+  
+ if [ "$1" == "mount" ]; then
+  sudo mount --bind /dev "${2}/dev"
+  sudo mount --bind /sys "${2}/sys"
+  sudo mount --bind /proc "${2}/proc"
+  
+  exit 0
+ fi
+  
+ if [ "$1" == "umount" ]; then
+  sudo umount "${2}/dev"
+  sudo umount "${2}/sys"
+  sudo umount "${2}/proc"
+  
+  exit 0
+ fi
+  
+ echo -e "\nUsage:  $0 (mount or umount) (chrootdir)\n"
+ exit 1
+  
+else
+ echo -e "\nUsage:  $0 (mount or umount) (chrootdir)\n"
+ exit 1
+fi
+------------------------------------------------------------------------------------------
+
+5. chmod +x /opt/chroots/chrootmounter.sh
+
+6. Run the script: ./chrootmounter.sh mount /opt/chroot/trusty (check if it worked with mount command)
+
+*7. Make sure that /etc/resolv.conf is the same on 'real' computer and on 'chroot' one (if not, copy the file from the real one)
+
+*8. Copy/Extend your /etc/apt/sources.list (make sure you are using the correct version of Ubuntu Repos):
+  sudo cp /etc/apt/sources.list /opt/chroot/trusty/etc/apt/sources.list
+ 
+9. Run: /home/ubuntu/perl_scripts/generate_gadgetron_root <Arg 1> <Arg 2>
+  Arg 1: Gadgetron root: /usr/local/gadgetron
+  Arg 2: New root: /opt/chroot/trusty
+
+10. Enter the new environment: sudo chroot /opt/chroot/trusty
+
+*11. Install additional software. For example:
+  apt-get update
+  apt-get dist-upgrade -y
+  apt-get install nano vim htop less dialog -y
+
+12. Set the PATH and LD_LIBRARY_PATH variable:
+  export PATH=$PATH:/usr/local/gadgetron/bin
+  export LD_LIBRARY_PATH=$LD_LIBRARY_PATH/usr/local/gadgetron/lib:/usr/local/lib:/usr/local/cuda-6.5/lib64:/opt/intel/mkl/lib/intel64:/opt/intel/lib/intel64
+
+
+*** Extras ***
+
+Usefull links:
+https://help.ubuntu.com/community/BasicChroot
+http://ocsovszki-dorian.blogspot.com/2014/06/building-chroot-environment-ubuntu-1404.html
+
+
+
+
+gtplus_FetalHASTE.cfg
diff --git a/chroot/copy-cuda-lib.sh.in b/chroot/copy-cuda-lib.sh.in
new file mode 100644
index 0000000..ea1ceee
--- /dev/null
+++ b/chroot/copy-cuda-lib.sh.in
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+if [ $(id -u) -ne 0 ]; then
+ echo -e "\nPlease start the script as a root or sudo!\n"
+ exit 1
+else
+ if [ $# -eq 0 ]; then
+ BASEDIR=$(dirname $0)
+
+ # Absolute path this script is in
+ SCRIPTPATH=$(dirname "$SCRIPT")
+
+ # find the lib(s)
+ CANDIDATES=$(ldconfig -p | grep "libcuda.so\s")
+
+ # find the one that is 64-bit
+ for CANDIDATE in $CANDIDATES 
+ do
+  var=$(file -L $CANDIDATE | grep '64-bit')
+  if [ -n "$var" ]; then
+   NEW_CUDA_LIB=$CANDIDATE
+  fi 
+ done
+
+ # copy it to the right location (overwrite the previous one)
+ yes | cp $NEW_CUDA_LIB $SCRIPTPATH/gadgetron at CMAKE_INSTALL_PREFIX@/lib/
+ exit 0
+
+ else
+  echo -e "\nUsage: $0\n"
+  exit 1
+ fi
+fi
diff --git a/chroot/copy_file_and_dependencies b/chroot/copy_file_and_dependencies
new file mode 100755
index 0000000..138e499
--- /dev/null
+++ b/chroot/copy_file_and_dependencies
@@ -0,0 +1,25 @@
+#!/usr/bin/perl
+
+
+use FindBin;
+use lib $FindBin::Bin;
+
+my $file = $ARGV[0];
+
+my $new_root = $ARGV[1];
+
+my $gadgetron_root = $ARGV[2];
+
+my $deps = `$FindBin::Bin/get_dependencies_for_binary $file`;
+
+my @lines = split('\n', $deps);
+
+system("sudo cp -n $file $new_root/$gadgetron_root/lib/");
+
+foreach my $val (@lines) {
+    chomp($val);
+    if (not -e "$new_root/$val")
+    {
+        system("$FindBin::Bin/copy_file_and_dependencies $val $new_root $gadgetron_root");
+    }
+}
diff --git a/chroot/create_chroot.sh b/chroot/create_chroot.sh
new file mode 100755
index 0000000..176d654
--- /dev/null
+++ b/chroot/create_chroot.sh
@@ -0,0 +1,147 @@
+#!/bin/bash
+
+if [ $(id -u) -ne 0 ]; then
+ echo -e "\nPlease start the script as a root or sudo!\n"
+ exit 1
+
+else
+ if [ $# -ge 6 ]; then
+
+# --ARGUMENTS-- (example)
+
+# CHROOT_GADGETRON_INSTALL_PREFIX:    /usr/local/gadgetron
+# CHROOT_GADGETRON_BINARY_DIR:        /home/ubuntu/gadgetron/build
+# CHROOT_GADGETRON_SOURCE_DIR:        /home/ubuntu/gadgetron
+# CHROOT_GIT_SHA1_HASH:               f4d7a9189fd21b07e482d28ecb8b07e589f81f9e
+# CHROOT_LIBRARY_PATHS:               /usr/local/lib:/usr/lib/x86_64-linux-gnu
+# CHROOT_CUDA_LIBRARY:                /usr/lib/x86_64-linux-gnu/libcuda.so
+
+  CHROOT_GADGETRON_INSTALL_PREFIX=${1}
+  echo CHROOT_GADGETRON_INSTALL_PREFIX: ${CHROOT_GADGETRON_INSTALL_PREFIX}
+  
+  CHROOT_GADGETRON_BINARY_DIR=${2}
+  echo CHROOT_GADGETRON_BINARY_DIR: ${CHROOT_GADGETRON_BINARY_DIR}
+  
+  CHROOT_GADGETRON_SOURCE_DIR=${3}
+  echo CHROOT_GADGETRON_SOURCE_DIR: ${CHROOT_GADGETRON_SOURCE_DIR}
+  
+  CHROOT_GIT_SHA1_HASH=${4}
+  echo CHROOT_GIT_SHA1_HASH: ${CHROOT_GIT_SHA1_HASH}
+  
+  CHROOT_LIBRARY_PATHS=${5}
+  echo CHROOT_LIBRARY_PATHS: ${CHROOT_LIBRARY_PATHS}
+  
+  CHROOT_CUDA_LIBRARY=${6}
+  echo CHROOT_CUDA_LIBRARY: ${CHROOT_CUDA_LIBRARY}
+
+  # Add LIBRARY_PATHS to LD_LIBRARY_PATH
+  export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${CHROOT_LIBRARY_PATHS}
+  export LC_ALL=C
+
+  echo "***** LD_LIBRARY_PATH ***** : ${LD_LIBRARY_PATH}"
+
+  rm -rf ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-root
+
+  mkdir -p ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-root
+
+  touch ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-root/source-manifest.txt
+
+  echo "gadgetron    ${CHROOT_GIT_SHA1_HASH}" > ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-root/source-manifest.txt
+
+  mkdir -p ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-root/gadgetron
+
+  mkdir -p ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-root/gadgetron/webapp
+
+  mkdir -p ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-backups
+
+  apt-get install debootstrap -y
+
+  debootstrap --variant=buildd --arch amd64 trusty ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-root/gadgetron http://gb.archive.ubuntu.com/ubuntu/
+
+  cd ${CHROOT_GADGETRON_BINARY_DIR}
+  make install DESTDIR="${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-root/gadgetron" -j8
+
+  #This copies the ISMRMRD executable if it is installed
+  if [ $# -ge 7 ]; then
+      CHROOT_SIEMENS_TO_ISMRMRD_EXE=${7} 
+      cp $CHROOT_SIEMENS_TO_ISMRMRD_EXE "${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-root/gadgetron/${CHROOT_GADGETRON_INSTALL_PREFIX}/bin/"
+  else
+      echo "SIEMENS_TO_ISMRMRD_EXE not set"
+  fi
+
+  ${CHROOT_GADGETRON_SOURCE_DIR}/chroot/generate_gadgetron_root ${CHROOT_GADGETRON_INSTALL_PREFIX} ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-root/gadgetron
+
+  cp ${6} ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-root/gadgetron/${CHROOT_GADGETRON_INSTALL_PREFIX}/lib  
+
+  cp ${CHROOT_GADGETRON_SOURCE_DIR}/chroot/start.sh ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-root
+  cp ${CHROOT_GADGETRON_SOURCE_DIR}/chroot/stop.sh ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-root
+  cp ${CHROOT_GADGETRON_SOURCE_DIR}/chroot/start-env.sh ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-root 
+  cp ${CHROOT_GADGETRON_SOURCE_DIR}/chroot/start-webapp.sh ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-root 
+  cp ${CHROOT_GADGETRON_SOURCE_DIR}/chroot/mount.sh ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-root
+
+  cp ${CHROOT_GADGETRON_SOURCE_DIR}/chroot/umount_image.sh ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-backups
+  cp ${CHROOT_GADGETRON_SOURCE_DIR}/chroot/start-gadgetron-from-image.sh ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-backups
+  cp ${CHROOT_GADGETRON_SOURCE_DIR}/chroot/run-gadgetron_ismrmrd_client.sh ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-backups
+  cp ${CHROOT_GADGETRON_SOURCE_DIR}/chroot/run-siemens_to_ismrmrd.sh ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-backups
+  cp ${CHROOT_GADGETRON_SOURCE_DIR}/chroot/run-gadgetron-dependency-query.sh ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-backups
+
+  chmod +x ${CHROOT_GADGETRON_BINARY_DIR}/chroot/copy-cuda-lib.sh
+  cp ${CHROOT_GADGETRON_BINARY_DIR}/chroot/copy-cuda-lib.sh ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-root
+
+  chmod +x ${CHROOT_GADGETRON_BINARY_DIR}/chroot/start-gadgetron.sh
+  cp ${CHROOT_GADGETRON_BINARY_DIR}/chroot/start-gadgetron.sh ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-root/gadgetron 
+ 
+  chmod +x ${CHROOT_GADGETRON_BINARY_DIR}/chroot/enter-chroot-env.sh
+  cp ${CHROOT_GADGETRON_BINARY_DIR}/chroot/enter-chroot-env.sh ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-root/gadgetron
+
+  chmod +x ${CHROOT_GADGETRON_BINARY_DIR}/chroot/run-webapp.sh
+  cp ${CHROOT_GADGETRON_BINARY_DIR}/chroot/run-webapp.sh ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-root/gadgetron
+
+  chmod +x ${CHROOT_GADGETRON_BINARY_DIR}/chroot/siemens_to_ismrmrd.sh
+  cp ${CHROOT_GADGETRON_BINARY_DIR}/chroot/siemens_to_ismrmrd.sh ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-root/gadgetron
+
+  chmod +x ${CHROOT_GADGETRON_BINARY_DIR}/chroot/gadgetron_ismrmrd_client.sh
+  cp ${CHROOT_GADGETRON_BINARY_DIR}/chroot/gadgetron_ismrmrd_client.sh ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-root/gadgetron
+
+  chmod +x ${CHROOT_GADGETRON_BINARY_DIR}/chroot/gadgetron-dependency-query.sh
+  cp ${CHROOT_GADGETRON_BINARY_DIR}/chroot/gadgetron-dependency-query.sh ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-root/gadgetron
+
+  cp -n ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-root/gadgetron${CHROOT_GADGETRON_INSTALL_PREFIX}/config/gadgetron.xml.example ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-root/gadgetron${CHROOT_GADGETRON_INSTALL_PREFIX}/config/gadgetron.xml
+
+  cp ${CHROOT_GADGETRON_BINARY_DIR}/chroot/gadgetron_web_app.cfg ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-root/gadgetron/webapp
+  cp ${CHROOT_GADGETRON_BINARY_DIR}/chroot/gadgetron_web.conf ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-root/gadgetron/webapp
+  cp ${CHROOT_GADGETRON_BINARY_DIR}/chroot/gadgetron_web_ld.conf ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-root/gadgetron/webapp
+  cp ${CHROOT_GADGETRON_SOURCE_DIR}/chroot/gadgetron_chroot.conf ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-root/gadgetron/webapp/gadgetron_chroot.conf
+  cp ${CHROOT_GADGETRON_SOURCE_DIR}/apps/gadgetron/webapp/gadgetron_web_app.py ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-root/gadgetron/webapp
+  cp ${CHROOT_GADGETRON_SOURCE_DIR}/apps/gadgetron/webapp/gadgetron_web_app.py ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-root/gadgetron${CHROOT_GADGETRON_INSTALL_PREFIX}/bin/gadgetron_web_app.py
+  cp ${CHROOT_GADGETRON_BINARY_DIR}/chroot/gadgetron_web_app.cfg ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-root/gadgetron${CHROOT_GADGETRON_INSTALL_PREFIX}/config/
+
+  chroot ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-root/gadgetron apt-get install python-dev python-twisted python-psutil -y 
+  
+  TAR_FILE_NAME=gadgetron-`date '+%Y%m%d-%H%M'`-${CHROOT_GIT_SHA1_HASH:0:8}
+  IMAGE_FILE_NAME=${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-backups/${TAR_FILE_NAME}.img
+
+  tar -zcf "${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-backups/${TAR_FILE_NAME}.tar.gz" --directory "${CHROOT_GADGETRON_BINARY_DIR}/chroot" --exclude=./chroot-root/gadgetron/etc --exclude=./chroot-root/gadgetron/var --exclude=./chroot-root/gadgetron/dev --exclude=./chroot-root/gadgetron/sys --exclude=./chroot-root/gadgetron/proc --exclude=./chroot-root/gadgetron/root ./chroot-root
+
+  dd if=/dev/zero of=${IMAGE_FILE_NAME} bs=1024k seek=1024 count=0
+  mke2fs -F -t ext3 ${IMAGE_FILE_NAME}
+  mkdir ${CHROOT_GADGETRON_BINARY_DIR}/chroot/gadgetron_root
+  mount -o loop ${IMAGE_FILE_NAME} ${CHROOT_GADGETRON_BINARY_DIR}/chroot/gadgetron_root
+  tar -xzf ${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-backups/${TAR_FILE_NAME}.tar.gz -C ${CHROOT_GADGETRON_BINARY_DIR}/chroot/gadgetron_root/
+  sleep 3
+  umount ${CHROOT_GADGETRON_BINARY_DIR}/chroot/gadgetron_root
+  rmdir ${CHROOT_GADGETRON_BINARY_DIR}/chroot/gadgetron_root
+
+  rm -rf "${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-root"
+
+  chmod 666 "${CHROOT_GADGETRON_BINARY_DIR}/chroot/chroot-backups/${TAR_FILE_NAME}.tar.gz"
+  chmod 666 "${IMAGE_FILE_NAME}"
+ 
+  exit 0
+
+ else
+  echo -e "\nUsage:  $0 (gadgetron install prefix) (gadgetron binary dir) (gadgetron source dir) (GADGETRON_GIT_SHA1_HASH) (LIBRARY_PATHS) (CUDA_LIBRARY) (SIEMENS_TO_ISMRMRD)\n"
+  exit 1
+ fi
+
+fi
diff --git a/chroot/enter-chroot-env.sh.in b/chroot/enter-chroot-env.sh.in
new file mode 100644
index 0000000..b8c4b5a
--- /dev/null
+++ b/chroot/enter-chroot-env.sh.in
@@ -0,0 +1,3 @@
+#!/bin/bash                                                                                                                                                 
+
+PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:@CMAKE_INSTALL_PREFIX@/bin LD_LIBRARY_PATH=@CMAKE_INSTALL_PREFIX@/lib:/usr/local/lib:/opt/intel/mkl/lib/intel64:/opt/intel/lib/intel64 chroot .
diff --git a/chroot/gadgetron-dependency-query.sh.in b/chroot/gadgetron-dependency-query.sh.in
new file mode 100644
index 0000000..6224929
--- /dev/null
+++ b/chroot/gadgetron-dependency-query.sh.in
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+if [ $# -eq 3 ]; then
+
+    GT_HOST=${1}
+    GT_PORT=${2}
+    QUERY_OUT=${3}
+
+    PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:@CMAKE_INSTALL_PREFIX@/bin LD_LIBRARY_PATH=@CMAKE_INSTALL_PREFIX@/lib:/usr/local/lib:/opt/intel/mkl/lib/intel64:/opt/intel/lib/intel64 @CMAKE_INSTALL_PREFIX@/bin/gtdependencyquery -h $GT_HOST -p $GT_PORT -o $QUERY_OUT
+else
+    echo -e "\nUsage: $0 <Host> <port> <query out file>\n"
+    exit 1
+fi
+
+exit 0
diff --git a/chroot/gadgetron_chroot.conf b/chroot/gadgetron_chroot.conf
new file mode 100644
index 0000000..da9b1f2
--- /dev/null
+++ b/chroot/gadgetron_chroot.conf
@@ -0,0 +1,27 @@
+description "Webapp Upstart Script"
+version "1.0"
+author "Dusan Puletic (dusan.puletic at nih.gov)"
+
+start on started startup
+start on filesystem or runlevel [2345]
+stop on runlevel [!2345]
+
+expect fork
+
+# Log output to log file (/var/log/upstart/webapp.log)
+console log
+
+kill signal INT
+
+# Call a script that will mount the proc and start the webapp inside the chroot env
+script
+    exec su -c /home/gadgetron_chroot/current/chroot-root/start-webapp.sh root &
+end script
+
+# Call a script that will unmount the proc before the service stops
+pre-stop script
+    exec /home/gadgetron_chroot/current/chroot-root/stop.sh &
+end script
+
+# Respawn if process dies or is killed
+respawn
diff --git a/chroot/gadgetron_ismrmrd_client.sh.in b/chroot/gadgetron_ismrmrd_client.sh.in
new file mode 100644
index 0000000..4a2bdd4
--- /dev/null
+++ b/chroot/gadgetron_ismrmrd_client.sh.in
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+if [ $# -eq 4 ]; then
+
+    ISMRMRD_FILENAME=${1}
+    CONDIG_XML=${2}
+    GT_HOST=${3}
+    GT_PORT=${4}
+
+    PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:@CMAKE_INSTALL_PREFIX@/bin LD_LIBRARY_PATH=@CMAKE_INSTALL_PREFIX@/lib:/usr/local/lib:/opt/intel/mkl/lib/intel64:/opt/intel/lib/intel64 @CMAKE_INSTALL_PREFIX@/bin/gadgetron_ismrmrd_client -f $ISMRMRD_FILENAME -c $CONDIG_XML -a $GT_HOST -p $GT_PORT
+else
+    echo -e "\nUsage: $0 <ismrmrd filename> <config filename> <host> <port>\n"
+    exit 1
+fi
+
+exit 0
diff --git a/chroot/generate_gadgetron_root b/chroot/generate_gadgetron_root
new file mode 100755
index 0000000..fb7b7de
--- /dev/null
+++ b/chroot/generate_gadgetron_root
@@ -0,0 +1,45 @@
+#!/usr/bin/perl                                                                                                                                              
+use FindBin;
+use lib $FindBin::Bin;
+
+my $gadgetron_root = $ARGV[0];
+my $new_root = $ARGV[1];
+
+my $directory = $gadgetron_root . "/bin";
+
+opendir (DIR, $new_root . $directory) or die $!;
+while (my $file = readdir(DIR)) {
+    next if ($file =~ m/^\./);
+    my $deps = `$FindBin::Bin/get_dependencies_for_binary $new_root/$directory/$file`;
+    my @lines = split('\n', $deps);
+
+    foreach my $val (@lines) {
+	chomp($val);
+        if (not -e "$new_root/$val")
+        {
+            system("$FindBin::Bin/copy_file_and_dependencies $val $new_root $gadgetron_root");
+        }
+    }
+}
+closedir(DIR);
+
+
+$directory = $gadgetron_root . "/lib";
+
+opendir (DIR, $new_root . $directory) or die $!;
+while (my $file = readdir(DIR)) {
+    next unless ($file =~ m/\.so$/);
+
+    my $deps = `$FindBin::Bin/get_dependencies_for_binary $new_root/$directory/$file`;
+    my @lines = split('\n', $deps);
+
+    foreach my $val (@lines) 
+    {
+	chomp($val);
+        if (not -e "$new_root/$val")
+        {
+            system("$FindBin::Bin/copy_file_and_dependencies $val $new_root $gadgetron_root");
+        }
+    }
+}
+closedir(DIR);
diff --git a/chroot/get_dependencies_for_binary b/chroot/get_dependencies_for_binary
new file mode 100755
index 0000000..a956564
--- /dev/null
+++ b/chroot/get_dependencies_for_binary
@@ -0,0 +1,12 @@
+#!/usr/bin/perl
+
+use strict;
+
+my $o = `ldd $ARGV[0]`;
+my @lines = split('\n', $o);
+
+foreach my $val (@lines) {
+    if ($val =~ m/=> \/([^\s]*)/) {
+	print "/$1\n";
+    }
+}
diff --git a/chroot/install_chroot_image.sh b/chroot/install_chroot_image.sh
new file mode 100755
index 0000000..788273a
--- /dev/null
+++ b/chroot/install_chroot_image.sh
@@ -0,0 +1,91 @@
+#!/bin/bash
+
+if [ $(id -u) -ne 0 ]; then 
+ 	echo -e "\nPlease start the script as a root or sudo!\n"
+ 	exit 1
+
+else
+
+	BASEDIR=$(dirname $0)
+
+	install_img=0
+
+	if [ $# -eq 1 ]; then
+		CHROOT_IMAGE_FILENAME=${1}
+		CHROOT_INSTALL_PATH=/home/gadgetron_chroot
+	else
+		if [ $# -eq 2 ]; then
+			CHROOT_IMAGE_FILENAME=${1}
+			CHROOT_INSTALL_PATH=${2}
+		else
+			if [ $# -eq 3 ]; then
+				if [ ${2} == "latest" ]; then
+					TAR_NAME=`find ${1} -type f -name 'gadgetron-*.tar.gz' |sort |head -n1`
+					CHROOT_IMAGE_FILENAME=${TAR_NAME}
+				else
+					CHROOT_IMAGE_FILENAME=${1}/${2}			
+				fi
+				CHROOT_INSTALL_PATH=${3}
+			else
+				if [ $# -eq 4 ]; then
+					if [ ${2} == "latest" ]; then
+						TAR_NAME=`find ${1} -type f -name 'gadgetron-*.tar.gz' |sort |head -n1`
+						CHROOT_IMAGE_FILENAME=${1}/${TAR_NAME}
+					else
+						CHROOT_IMAGE_FILENAME=${1}/${2}			
+					fi
+					CHROOT_INSTALL_PATH=${3}
+
+					if [ ${4} -eq 1 ]; then
+						install_img=1
+						IMG_NAME=`find ${1} -type f -name 'gadgetron-*.img' |sort |head -n1`
+						CHROOT_IMAGE_IMG_FILENAME=${IMG_NAME}
+					fi
+				else
+					echo -e "\nUsage 1, install chroot image to /home/gadgetron_chroot: $0 chroot_image_file chroot_install_path"
+				  	echo -e "\nUsage 2, install chroot image to selected install path: $0 chroot_image_file chroot_install_path"
+				  	echo -e "\nUsage 3, : $0 chroot_image_path chroot_image_name chroot_install_path"
+				  	echo -e "\n           install chroot image to selected install path, if chroot_image_name=latest, the newest chroot image in the folder will be installed: $0 chroot_image_file chroot_install_path"
+				  	echo -e "\nUsage 4, : $0 chroot_image_path chroot_image_name chroot_install_path install_img"
+				  	echo -e "\n           like Usage 3, if install_img=1, the corresponding .img package will be copied to chroot_install_path"
+				  	exit 1
+				fi  
+			fi  
+		fi  
+	fi
+
+  	service gadgetron_chroot stop
+
+	echo CHROOT_IMAGE_FILENAME=${CHROOT_IMAGE_FILENAME}
+	echo CHROOT_INSTALL_PATH=${CHROOT_INSTALL_PATH}
+
+	mkdir -p ${CHROOT_INSTALL_PATH}
+
+	cp -rf ${CHROOT_IMAGE_FILENAME} ${CHROOT_INSTALL_PATH}/
+
+	FILENAME_WITH_EXTENSION=${CHROOT_IMAGE_FILENAME##*/}
+	FILENAME=${FILENAME_WITH_EXTENSION%.*}
+	FILENAME=${FILENAME%.*}
+	echo ${FILENAME}
+
+	mkdir ${CHROOT_INSTALL_PATH}/${FILENAME}
+
+	echo untar ${CHROOT_INSTALL_PATH}/${FILENAME_WITH_EXTENSION} ... 
+
+	tar -xzf ${CHROOT_INSTALL_PATH}/${FILENAME_WITH_EXTENSION} --directory="${CHROOT_INSTALL_PATH}/${FILENAME}" .
+
+	rm -f ${CHROOT_INSTALL_PATH}/current
+
+	ln -s ${CHROOT_INSTALL_PATH}/${FILENAME} ${CHROOT_INSTALL_PATH}/current
+
+	cp -f ${CHROOT_INSTALL_PATH}/current/chroot-root/gadgetron/webapp/gadgetron_chroot.conf /etc/init/
+
+	if [ ${install_img} -eq 1 ]; then
+                echo "copy image file : ${CHROOT_IMAGE_IMG_FILENAME} ... "		
+		cp -f ${CHROOT_IMAGE_IMG_FILENAME} ${CHROOT_INSTALL_PATH}/
+	fi
+
+	service gadgetron_chroot start
+
+	exit 0
+fi
diff --git a/chroot/make_list_of_dependencies b/chroot/make_list_of_dependencies
new file mode 100755
index 0000000..ed8f3bd
--- /dev/null
+++ b/chroot/make_list_of_dependencies
@@ -0,0 +1,25 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+use FindBin;
+use lib $FindBin::Bin;
+
+my $directory = $ARGV[0] . "/bin";
+
+opendir (DIR, $directory) or die $!;
+while (my $file = readdir(DIR)) {
+    next if ($file =~ m/^\./);
+    system("$FindBin::Bin/get_dependencies_for_binary $ARGV[0]/bin/$file\n");
+}
+closedir(DIR);
+
+
+$directory = $ARGV[0] . "/lib";
+
+opendir (DIR, $directory) or die $!;
+while (my $file = readdir(DIR)) {
+    next unless ($file =~ m/\.so$/);
+    system("$FindBin::Bin/get_dependencies_for_binary $ARGV[0]/lib/$file\n");
+}
+closedir(DIR);
diff --git a/chroot/mount.sh b/chroot/mount.sh
new file mode 100755
index 0000000..b51f57f
--- /dev/null
+++ b/chroot/mount.sh
@@ -0,0 +1,35 @@
+#!/bin/bash                                                                                                                                    
+if [ $(id -u) -ne 0 ]; then
+ echo -e "\nPlease start the script as a root or sudo!\n"
+ exit 1
+
+else
+ BASEDIR=$(dirname $0)
+
+ if [ $# -eq 1 ]; then
+
+  CHROOT_DIR=${1}
+
+  mkdir -p "${CHROOT_DIR}/gadgetron/proc"
+  if find "${CHROOT_DIR}/gadgetron/proc" -maxdepth 0 -empty | read v; then
+   mount --bind /proc "${CHROOT_DIR}/gadgetron/proc";
+  fi
+
+  mkdir -p "${CHROOT_DIR}/gadgetron/dev"
+  if find "${CHROOT_DIR}/gadgetron/dev" -maxdepth 0 -empty | read v; then
+   mount --bind /dev "${CHROOT_DIR}/gadgetron/dev";
+  fi
+
+  mkdir -p "${CHROOT_DIR}/gadgetron/sys"
+  if find "${CHROOT_DIR}/gadgetron/sys" -maxdepth 0 -empty | read v; then
+   mount --bind /sys "${CHROOT_DIR}/gadgetron/sys";
+  fi
+
+  exit 0
+
+ else
+  echo -e "\nUsage: $0 (chrootdir)\n"
+  exit 1
+ fi
+
+fi
diff --git a/chroot/run-gadgetron-dependency-query.sh b/chroot/run-gadgetron-dependency-query.sh
new file mode 100755
index 0000000..291dce4
--- /dev/null
+++ b/chroot/run-gadgetron-dependency-query.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+BASEDIR=$(dirname $0)
+
+if [ $(id -u) -ne 0 ]; then
+  echo -e "\nPlease start the script as a root or sudo!\n"
+  exit 1
+else
+  if [ $# -ge 4 ]; then
+
+    MOUNT_POINT=${1}
+    GT_HOST=${2}
+    GT_PORT=${3}
+    QUERY_OUT=${4}
+
+    if [ $# -eq 5 ]; then
+        FULL_PATH_TO_IMG_FILE=${5}
+
+        if find "${MOUNT_POINT}/chroot-root/gadgetron" -maxdepth 0 -empty | read v; then
+            mkdir -p ${MOUNT_POINT}
+            mount -o loop ${FULL_PATH_TO_IMG_FILE} ${MOUNT_POINT}
+        fi
+    fi
+
+    chroot ${MOUNT_POINT}/chroot-root/gadgetron /gadgetron-dependency-query.sh $GT_HOST $GT_PORT $QUERY_OUT
+    exit 0
+  else
+    echo -e "\nUsage: $0 <mount point> <Host> <port> <query out file> <optional: full path to img file>\n"
+    exit 1
+  fi
+fi
diff --git a/chroot/run-gadgetron_ismrmrd_client.sh b/chroot/run-gadgetron_ismrmrd_client.sh
new file mode 100755
index 0000000..c6903dc
--- /dev/null
+++ b/chroot/run-gadgetron_ismrmrd_client.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+BASEDIR=$(dirname $0)
+
+if [ $(id -u) -ne 0 ]; then
+  echo -e "\nPlease start the script as a root or sudo!\n"
+  exit 1
+else
+  if [ $# -ge 5 ]; then
+
+    MOUNT_POINT=${1}
+    ISMRMRD_FILENAME=${2}
+    CONDIG_XML=${3}
+    GT_HOST=${4}
+    GT_PORT=${5}
+
+    if [ $# -eq 5 ]; then
+        FULL_PATH_TO_IMG_FILE=${6}
+
+        if find "${MOUNT_POINT}/chroot-root/gadgetron" -maxdepth 0 -empty | read v; then
+            mkdir -p ${MOUNT_POINT}
+            mount -o loop ${FULL_PATH_TO_IMG_FILE} ${MOUNT_POINT}
+        fi
+    fi
+
+    chroot ${MOUNT_POINT}/chroot-root/gadgetron /gadgetron_ismrmrd_client.sh $ISMRMRD_FILENAME $CONDIG_XML $GT_HOST $GT_PORT
+    exit 0
+  else
+    echo -e "\nUsage: $0 <mount point> <ismrmrd filename> <config filename> <host> <port> <optional: full path to img file>\n"
+    exit 1
+  fi
+fi
diff --git a/chroot/run-siemens_to_ismrmrd.sh b/chroot/run-siemens_to_ismrmrd.sh
new file mode 100755
index 0000000..01c4663
--- /dev/null
+++ b/chroot/run-siemens_to_ismrmrd.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+BASEDIR=$(dirname $0)
+
+if [ $(id -u) -ne 0 ]; then
+  echo -e "\nPlease start the script as a root or sudo!\n"
+  exit 1
+else
+  if [ $# -ge 4 ]; then
+    
+    MOUNT_POINT=${1}
+    DAT_FILENAME=${2}
+    ISMRMRD_FILENAME=${3}
+    SCAN_NO=${4}
+
+    if [ $# -eq 5 ]; then
+        FULL_PATH_TO_IMG_FILE=${5}
+
+        if find "${MOUNT_POINT}/chroot-root/gadgetron" -maxdepth 0 -empty | read v; then
+            mkdir -p ${MOUNT_POINT}
+            mount -o loop ${FULL_PATH_TO_IMG_FILE} ${MOUNT_POINT}
+        fi
+    fi
+
+    chroot ${MOUNT_POINT}/chroot-root/gadgetron /siemens_to_ismrmrd.sh $DAT_FILENAME $ISMRMRD_FILENAME $SCAN_NO
+    exit 0
+  else
+    echo -e "\nUsage: $0 <mount point> <dat filename> <ismrmrd filename> <scan number> <optional: full path to img file>\n"
+    exit 1
+  fi
+fi
diff --git a/chroot/run-webapp.sh.in b/chroot/run-webapp.sh.in
new file mode 100644
index 0000000..ce4303c
--- /dev/null
+++ b/chroot/run-webapp.sh.in
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+start_gadgetron_job=0
+trap '(($start_gadgetron_job == 0)) || ((`kill -0 $start_gadgetron_job`)) || kill -SIGINT $start_gadgetron_job & while kill -0 $start_gadgetron_job 2>/dev/null; do sleep 1; done' HUP TERM INT
+
+PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:@CMAKE_INSTALL_PREFIX@/bin 
+LD_LIBRARY_PATH=@CMAKE_INSTALL_PREFIX@/lib:/usr/local/lib:/opt/intel/mkl/lib/intel64:/opt/intel/lib/intel64 
+python @CMAKE_INSTALL_PREFIX@/bin/gadgetron_web_app.py @CMAKE_INSTALL_PREFIX@/config/gadgetron_web_app.cfg &
+start_gadgetron_job=$!
+wait $!
+exit 0
diff --git a/chroot/siemens_to_ismrmrd.sh.in b/chroot/siemens_to_ismrmrd.sh.in
new file mode 100644
index 0000000..ada9174
--- /dev/null
+++ b/chroot/siemens_to_ismrmrd.sh.in
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+if [ $# -eq 3 ]; then
+
+    DAT_FILENAME=${1}
+    ISMRMRD_FILENAME=${2}
+    SCAN_NO=${3}
+
+    PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:@CMAKE_INSTALL_PREFIX@/bin LD_LIBRARY_PATH=@CMAKE_INSTALL_PREFIX@/lib:/usr/local/lib:/opt/intel/mkl/lib/intel64:/opt/intel/lib/intel64 @CMAKE_INSTALL_PREFIX@/bin/siemens_to_ismrmrd -f $DAT_FILENAME -o $ISMRMRD_FILENAME -z $SCAN_NO
+else
+    echo -e "\nUsage: $0 <dat filename> <ismrmrd filename> <scan number>\n"
+    exit 1
+fi
+
+exit 0
diff --git a/chroot/start-env.sh b/chroot/start-env.sh
new file mode 100755
index 0000000..4b93919
--- /dev/null
+++ b/chroot/start-env.sh
@@ -0,0 +1,18 @@
+#!/bin/bash                                                                                                                                                  
+if [ $(id -u) -ne 0 ]; then
+ echo -e "\nPlease start the script as a root or sudo!\n"
+ exit 1
+
+else
+ BASEDIR=$(dirname $0)
+
+ if [ $# -eq 0 ]; then
+  $BASEDIR/mount.sh $BASEDIR
+  chroot $BASEDIR/gadgetron /enter-chroot-env.sh
+  exit 0
+
+ else
+  echo -e "\nUsage: $0\n"
+  exit 1
+ fi
+fi
diff --git a/chroot/start-gadgetron-from-image.sh b/chroot/start-gadgetron-from-image.sh
new file mode 100755
index 0000000..d25acca
--- /dev/null
+++ b/chroot/start-gadgetron-from-image.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+start_gadgetron_image_job=0
+BASEDIR=$(dirname $0)
+
+trap '(($start_gadgetron_image_job == 0)) || ((`kill -0 $start_gadgetron_image_job`)) || kill $start_gadgetron_image_job & while kill -0 $start_gadgetron_image_job 2>/dev/null; do sleep 1; done' HUP TERM INT
+
+if [ $(id -u) -ne 0 ]; then
+  echo -e "\nPlease start the script as a root or sudo!\n"
+  exit 1
+else
+  if [ $# -eq 2 ]; then
+
+    FULL_PATH_TO_IMG_FILE=${1}
+    MOUNT_POINT=${2}
+
+    mkdir -p ${MOUNT_POINT}
+    mount -o loop ${FULL_PATH_TO_IMG_FILE} ${MOUNT_POINT}
+    ${MOUNT_POINT}/chroot-root/start.sh &
+    start_gadgetron_image_job=($!)
+    wait $!
+    sleep 1
+    $BASEDIR/umount_image.sh ${MOUNT_POINT}
+    exit 0
+  else
+    echo -e "\nUsage: $0 <full path to img file> <mount point>\n"
+    exit 1
+  fi
+fi
diff --git a/chroot/start-gadgetron.sh.in b/chroot/start-gadgetron.sh.in
new file mode 100644
index 0000000..7ea9f90
--- /dev/null
+++ b/chroot/start-gadgetron.sh.in
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+gadgetron_job=0
+trap '(($gadgetron_job == 0)) || ((`kill -0 $gadgetron_job`))|| kill $gadgetron_job' HUP TERM INT
+
+PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:@CMAKE_INSTALL_PREFIX@/bin LD_LIBRARY_PATH=@CMAKE_INSTALL_PREFIX@/lib:/usr/local/lib:/opt/intel/mkl/lib/intel64:/opt/intel/lib/intel64 @CMAKE_INSTALL_PREFIX@/bin/gadgetron &
+
+gadgetron_job=($!)
+wait $!
+exit 0
diff --git a/chroot/start-webapp.sh b/chroot/start-webapp.sh
new file mode 100755
index 0000000..b93da2a
--- /dev/null
+++ b/chroot/start-webapp.sh
@@ -0,0 +1,18 @@
+#!/bin/bash                 
+
+start_gadgetron_job=0
+BASEDIR=$(dirname $0)
+
+trap '(($start_gadgetron_job == 0)) || ((`kill -0 $start_gadgetron_job`)) || kill $start_gadgetron_job & while kill -0 $start_gadgetron_job 2>/dev/null; do sleep 1; done' HUP TERM INT
+
+if [ $(id -u) -ne 0 ]; then 
+    echo -e "\nPlease start the script as a root or sudo!\n"
+    exit 1
+else
+    $BASEDIR/mount.sh $BASEDIR
+    chroot $BASEDIR/gadgetron /run-webapp.sh &
+    start_gadgetron_job=($!)
+    wait $!
+    $BASEDIR/stop.sh
+    exit 0
+fi
diff --git a/chroot/start.sh b/chroot/start.sh
new file mode 100755
index 0000000..e4addd2
--- /dev/null
+++ b/chroot/start.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+start_gadgetron_job=0
+BASEDIR=$(dirname $0)
+
+trap '(($start_gadgetron_job == 0)) || ((`kill -0 $start_gadgetron_job`)) || kill $start_gadgetron_job & while kill -0 $start_gadgetron_job 2>/dev/null; do sleep 1; done & $BASEDIR/stop.sh' HUP TERM INT
+
+if [ $(id -u) -ne 0 ]; then
+ echo -e "\nPlease start the script as a root or sudo!\n"
+ exit 1
+else
+ if [ $# -eq 0 ]; then
+  $BASEDIR/mount.sh $BASEDIR
+  chroot $BASEDIR/gadgetron /start-gadgetron.sh &
+  start_gadgetron_job=($!)
+  wait $!
+  $BASEDIR/stop.sh
+  exit 0
+ else
+  echo -e "\nUsage: $0\n"
+  exit 1
+ fi
+fi
diff --git a/chroot/stop.sh b/chroot/stop.sh
new file mode 100755
index 0000000..879afde
--- /dev/null
+++ b/chroot/stop.sh
@@ -0,0 +1,26 @@
+#!/bin/bash                                                                                                                                                 
+
+if [ $(id -u) -ne 0 ]; then
+ echo -e "\nPlease start the script as a root or sudo!\n"
+ exit 1
+
+else
+ BASEDIR=$(dirname $0)
+
+ if [ $# -eq 0 ]; then
+  if mountpoint -q $BASEDIR/gadgetron/proc; then
+   umount $BASEDIR/gadgetron/proc
+  fi
+  if mountpoint -q $BASEDIR/gadgetron/sys; then
+    umount $BASEDIR/gadgetron/sys
+  fi
+  if mountpoint -q $BASEDIR/gadgetron/dev; then
+    umount $BASEDIR/gadgetron/dev
+  fi
+  exit 0
+
+ else
+  echo -e "\nUsage: $0\n"
+  exit 1
+ fi
+fi
diff --git a/chroot/umount_image.sh b/chroot/umount_image.sh
new file mode 100755
index 0000000..c10dbca
--- /dev/null
+++ b/chroot/umount_image.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+if [ $(id -u) -ne 0 ]; then
+  echo -e "\nPlease start the script as a root or sudo!\n"
+  exit 1
+else
+  if [ $# -eq 1 ]; then
+
+    MOUNT_POINT=${1}
+
+    if mountpoint -q ${MOUNT_POINT}; then
+      if mountpoint -q ${MOUNT_POINT}/chroot-root/gadgetron/proc; then
+        umount ${MOUNT_POINT}/chroot-root/gadgetron/proc
+      fi
+      if mountpoint -q ${MOUNT_POINT}/chroot-root/gadgetron/dev; then
+        umount ${MOUNT_POINT}/chroot-root/gadgetron/dev
+      fi
+      if mountpoint -q ${MOUNT_POINT}/chroot-root/gadgetron/sys; then
+        umount ${MOUNT_POINT}/chroot-root/gadgetron/sys
+      fi
+      umount ${MOUNT_POINT}
+      exit 0
+    fi
+  else
+    echo -e "\nUsage: $0 <mount point>\n"
+    exit 1
+  fi
+fi
diff --git a/chroot/unique_lines_in_file b/chroot/unique_lines_in_file
new file mode 100755
index 0000000..870039d
--- /dev/null
+++ b/chroot/unique_lines_in_file
@@ -0,0 +1,14 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+my $file = $ARGV[0];
+
+my %seen = ();
+{
+    while(<>){
+	$seen{$_}++;
+	next if $seen{$_} > 1;
+	print;
+    }
+}
diff --git a/chroot/upstart-instructions.txt b/chroot/upstart-instructions.txt
new file mode 100644
index 0000000..3c1e357
--- /dev/null
+++ b/chroot/upstart-instructions.txt
@@ -0,0 +1,10 @@
+*** Instructions for webapp.conf upstart script ***
+
+
+  - Edit the script and set the correct path to start-webapp.sh and stop.sh
+
+  - Copy the script to /etc/init/
+
+  - Run sudo start webapp or restart the computer
+
+  - Log file is in /var/log/upstart/webapp.log
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
index dc63e8b..2aea67b 100644
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@@ -1,15 +1,23 @@
 install(FILES 	
 FindACE.cmake
-FindCULA.cmake
 FindFFTW3.cmake
 FindGLEW.cmake
 FindNumPy.cmake
 FindTinyXML.cmake
 FindGadgetron.cmake
-FindXSD.cmake
-FindXercesC.cmake
-FindXalanC.cmake
 FindIsmrmrd.cmake
 FindGMatlab.cmake
 FindDCMTK.cmake
-DESTINATION cmake)
+FindMKL.cmake
+DESTINATION cmake COMPONENT main)
+
+if (WIN32)
+    install(FILES InstallWinGadgetron.bat DESTINATION cmake COMPONENT main)
+endif (WIN32)
+
+#if (UNIX)
+#    if (NOT APPLE)
+#        install(FILES ./debian/postinst DESTINATION cmake COMPONENT main)
+#        install(FILES ./debian/postinst_web DESTINATION cmake COMPONENT web)
+#    endif (NOT APPLE)
+#endif (UNIX)
diff --git a/cmake/FindACE.cmake b/cmake/FindACE.cmake
index 3eed289..9d556ce 100644
--- a/cmake/FindACE.cmake
+++ b/cmake/FindACE.cmake
@@ -47,14 +47,14 @@ ENDIF (WIN32 AND NOT CYGWIN)
  
 # Solaris needs some extra libraries that may not have been found already 
 IF(CMAKE_SYSTEM_NAME STREQUAL "SunOS") 
-  MESSAGE(STATUS "need to link solaris-specific libraries") 
+  #MESSAGE(STATUS "need to link solaris-specific libraries") 
   #  LINK_LIBRARIES(socket rt) 
   SET(ACE_LIBRARY ${ACE_LIBRARY} socket rt nsl) 
 ENDIF(CMAKE_SYSTEM_NAME STREQUAL "SunOS") 
  
 # Windows needs some extra libraries 
 IF (WIN32 AND NOT CYGWIN) 
-  MESSAGE(STATUS "need to link windows-specific libraries") 
+  #MESSAGE(STATUS "need to link windows-specific libraries") 
   #LINK_LIBRARIES(winmm) 
   SET(ACE_LIBRARY ${ACE_LIBRARY} winmm) 
 ENDIF (WIN32 AND NOT CYGWIN) 
diff --git a/cmake/FindArmadillo.cmake b/cmake/FindArmadillo.cmake
new file mode 100644
index 0000000..9fabca8
--- /dev/null
+++ b/cmake/FindArmadillo.cmake
@@ -0,0 +1,100 @@
+# - Find Armadillo
+# Find the Armadillo C++ library
+#
+# Using Armadillo:
+#  find_package(Armadillo REQUIRED)
+#  include_directories(${ARMADILLO_INCLUDE_DIRS})
+#  add_executable(foo foo.cc)
+#  target_link_libraries(foo ${ARMADILLO_LIBRARIES})
+# This module sets the following variables:
+#  ARMADILLO_FOUND - set to true if the library is found
+#  ARMADILLO_INCLUDE_DIRS - list of required include directories
+#  ARMADILLO_LIBRARIES - list of libraries to be linked
+#  ARMADILLO_VERSION_MAJOR - major version number
+#  ARMADILLO_VERSION_MINOR - minor version number
+#  ARMADILLO_VERSION_PATCH - patch version number
+#  ARMADILLO_VERSION_STRING - version number as a string (ex: "1.0.4")
+#  ARMADILLO_VERSION_NAME - name of the version (ex: "Antipodean Antileech")
+
+#=============================================================================
+# Copyright 2011 Clement Creusot <creusot at cs.york.ac.uk>
+#
+# Distributed under the OSI-approved BSD License (the "License");
+# see accompanying file Copyright.txt for details.
+#
+# This software is distributed WITHOUT ANY WARRANTY; without even the
+# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# See the License for more information.
+#=============================================================================
+# (To distribute this file outside of CMake, substitute the full
+#  License text for the above reference.)
+
+
+# UNIX paths are standard, no need to write.
+find_library(ARMADILLO_LIBRARY
+  NAMES armadillo
+  HINTS $ENV{ARMA_HOME}
+  PATHS "$ENV{ProgramFiles}/Armadillo/lib"  "$ENV{ProgramFiles}/Armadillo/lib64" "$ENV{ProgramFiles}/Armadillo"
+  PATH_SUFFIXES "lib"
+  )
+find_path(ARMADILLO_INCLUDE_DIR
+  NAMES armadillo
+  HINTS $ENV{ARMA_HOME}
+  PATHS "$ENV{ProgramFiles}/Armadillo/include"
+  PATH_SUFFIXES "include"
+  )
+
+
+if(ARMADILLO_INCLUDE_DIR)
+
+  # ------------------------------------------------------------------------
+  #  Extract version information from <armadillo>
+  # ------------------------------------------------------------------------
+
+  # WARNING: Early releases of Armadillo didn't have the arma_version.hpp file.
+  # (e.g. v.0.9.8-1 in ubuntu maverick packages (2001-03-15))
+  # If the file is missing, set all values to 0
+  set(ARMADILLO_VERSION_MAJOR 0)
+  set(ARMADILLO_VERSION_MINOR 0)
+  set(ARMADILLO_VERSION_PATCH 0)
+  set(ARMADILLO_VERSION_NAME "EARLY RELEASE")
+
+  if(EXISTS "${ARMADILLO_INCLUDE_DIR}/armadillo_bits/arma_version.hpp")
+
+    # Read and parse armdillo version header file for version number
+    file(STRINGS "${ARMADILLO_INCLUDE_DIR}/armadillo_bits/arma_version.hpp" _armadillo_HEADER_CONTENTS REGEX "#define ARMA_VERSION_[A-Z]+ ")
+    string(REGEX REPLACE ".*#define ARMA_VERSION_MAJOR ([0-9]+).*" "\\1" ARMADILLO_VERSION_MAJOR "${_armadillo_HEADER_CONTENTS}")
+    string(REGEX REPLACE ".*#define ARMA_VERSION_MINOR ([0-9]+).*" "\\1" ARMADILLO_VERSION_MINOR "${_armadillo_HEADER_CONTENTS}")
+    string(REGEX REPLACE ".*#define ARMA_VERSION_PATCH ([0-9]+).*" "\\1" ARMADILLO_VERSION_PATCH "${_armadillo_HEADER_CONTENTS}")
+
+    # WARNING: The number of spaces before the version name is not one.
+    string(REGEX REPLACE ".*#define ARMA_VERSION_NAME +\"([0-9a-zA-Z _-]+)\".*" "\\1" ARMADILLO_VERSION_NAME "${_armadillo_HEADER_CONTENTS}")
+
+    unset(_armadillo_HEADER_CONTENTS)
+  endif()
+
+  set(ARMADILLO_VERSION_STRING "${ARMADILLO_VERSION_MAJOR}.${ARMADILLO_VERSION_MINOR}.${ARMADILLO_VERSION_PATCH}")
+endif ()
+
+#======================
+
+
+# Checks 'REQUIRED', 'QUIET' and versions.
+include("FindPackageHandleStandardArgs")
+find_package_handle_standard_args(Armadillo
+  REQUIRED_VARS ARMADILLO_LIBRARY ARMADILLO_INCLUDE_DIR
+  VERSION_VAR ARMADILLO_VERSION_STRING)
+# version_var fails with cmake < 2.8.4.
+
+if (ARMADILLO_FOUND)
+  set(ARMADILLO_INCLUDE_DIRS ${ARMADILLO_INCLUDE_DIR})
+  set(ARMADILLO_LIBRARIES ${ARMADILLO_LIBRARY})
+endif ()
+
+
+# Hide internal variables
+mark_as_advanced(
+  ARMADILLO_INCLUDE_DIR
+  ARMADILLO_LIBRARY)
+
+#======================
diff --git a/cmake/FindCUDA/cuda_compute_capability.c b/cmake/FindCUDA/cuda_compute_capability.c
new file mode 100644
index 0000000..a69edee
--- /dev/null
+++ b/cmake/FindCUDA/cuda_compute_capability.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2011 Florian Rathgeber, florian.rathgeber at gmail.com
+ *
+ * This code is licensed under the MIT License.  See the FindCUDA.cmake script
+ * for the text of the license.
+ *
+ * Based on code by Christopher Bruns published on Stack Overflow (CC-BY):
+ * http://stackoverflow.com/questions/2285185
+ */
+
+#include <stdio.h>
+#include <cuda_runtime.h>
+
+int main() {
+  int deviceCount, device, major = 9999, minor = 9999;
+  int gpuDeviceCount = 0;
+  struct cudaDeviceProp properties;
+
+  if (cudaGetDeviceCount(&deviceCount) != cudaSuccess)
+    return 1;
+  /* machines with no GPUs can still report one emulation device */
+  for (device = 0; device < deviceCount; ++device) {
+    cudaGetDeviceProperties(&properties, device);
+    if (properties.major != 9999 && properties.major > 1) {/* 9999 means emulation only and we do not support compute model 1.x*/
+      ++gpuDeviceCount;
+      if (gpuDeviceCount > 1)
+      	printf(";");
+      if (properties.major == 2) //Need a special case for Fermi. Compute capabillity 2.1 exists, but compute model 2.1 does not.
+      	printf("%d%d",properties.major, 0);
+      else
+      	printf("%d%d",properties.major, properties.minor);
+      /*  get minimum compute capability of all devices */
+    }
+  }
+  /* don't just return the number of gpus, because other runtime cuda
+     errors can also yield non-zero return values */
+  if (gpuDeviceCount > 0) {
+    /* this output will be parsed by FindCUDA.cmake */
+    return 0; /* success */
+  }
+  return 1; /* failure */
+}
diff --git a/cmake/FindCUDA_advanced.cmake b/cmake/FindCUDA_advanced.cmake
new file mode 100644
index 0000000..574cc79
--- /dev/null
+++ b/cmake/FindCUDA_advanced.cmake
@@ -0,0 +1,38 @@
+find_package(CUDA 4.1)
+
+# Check for GPUs present and their compute capability
+# based on http://stackoverflow.com/questions/2285185/easiest-way-to-test-for-existence-of-cuda-capable-gpu-from-cmake/2297877#2297877 (Christopher Bruns)
+if(CUDA_FOUND)
+    set(CUDA_NVCC_FLAGS2 "-gencode arch=compute_20,code=sm_20")
+    set(CUDA_NVCC_FLAGS3 "-gencode arch=compute_30,code=sm_30") 
+    set(CUDA_NVCC_FLAGS4 "-gencode arch=compute_35,code=sm_35")   
+  cuda_find_helper_file(cuda_compute_capability c)
+  try_run(RUN_RESULT_VAR COMPILE_RESULT_VAR
+    ${CMAKE_BINARY_DIR} 
+    ${CUDA_cuda_compute_capability}
+    CMAKE_FLAGS 
+    -DINCLUDE_DIRECTORIES:STRING=${CUDA_TOOLKIT_INCLUDE}
+    -DLINK_LIBRARIES:STRING=${CUDA_CUDART_LIBRARY}
+    COMPILE_OUTPUT_VARIABLE COMPILE_OUTPUT_VAR
+    RUN_OUTPUT_VARIABLE RUN_OUTPUT_VAR)
+  # COMPILE_RESULT_VAR is TRUE when compile succeeds
+  # RUN_RESULT_VAR is zero when a GPU is found
+  if(COMPILE_RESULT_VAR AND NOT RUN_RESULT_VAR)
+    set(CUDA_HAVE_GPU TRUE CACHE BOOL "Whether CUDA-capable GPU is present")
+    set(CUDA_COMPUTE_CAPABILITY ${RUN_OUTPUT_VAR} CACHE STRING "Compute capability of CUDA-capable GPU present. Seperate multiple by ;. For all known, use ALL")
+  else()
+    
+    set(CUDA_HAVE_GPU FALSE CACHE BOOL "Whether CUDA-capable GPU is present")
+    set(CUDA_COMPUTE_CAPABILITY ALL CACHE STRING "Compute capability of CUDA-capable GPU present. Seperate multiple by ;. For all known, use ALL")
+  endif()
+
+if( "${CUDA_COMPUTE_CAPABILITY}" MATCHES ALL)
+set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${CUDA_NVCC_FLAGS2} ${CUDA_NVCC_FLAGS3} ${CUDA_NVCC_FLAGS4} ${CUDA_NVCC_FLAGS5})
+else()
+	foreach(code ${CUDA_COMPUTE_CAPABILITY})
+	   set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_${code},code=sm_${code} ")
+	endforeach()
+endif()
+
+endif(CUDA_FOUND)
+
diff --git a/cmake/FindCULA.cmake b/cmake/FindCULA.cmake
deleted file mode 100644
index 8b5d0f8..0000000
--- a/cmake/FindCULA.cmake
+++ /dev/null
@@ -1,63 +0,0 @@
-# - Find CULA
-# Find the native CULA includes and library
-#
-#   CULA_FOUND       - True if CULA found.
-#   CULA_INCLUDE_DIR - where to find cula.h, etc.
-#   CULA_LIBRARIES   - List of libraries when using TinyXML.
-#
-
-IF( CULA_INCLUDE_DIR )
-    # Already in cache, be silent
-    SET( CULA_FIND_QUIETLY TRUE )
-ENDIF( CULA_INCLUDE_DIR )
-
-FIND_PATH( CULA_INCLUDE_DIR "cula.h"
-           PATH_SUFFIXES "cula/include" )
-
-MESSAGE("CULA_INCLUDE_DIR = ${CULA_INCLUDE_DIR}")
-
-
-FIND_LIBRARY( CULA_LIBRARY
-              NAMES "cula"
-              PATH_SUFFIXES "cula/lib64" )
-
-FIND_LIBRARY( CULA_LAPACK_LIBRARY
-              NAMES "cula_lapack"
-              PATH_SUFFIXES "cula/lib64" )
-
-FIND_LIBRARY( CULA_CORE_LIBRARY
-              NAMES "cula_core"
-              PATH_SUFFIXES "cula/lib64" )
-
-FIND_LIBRARY( CULA_LAPACK_BASIC_LIBRARY
-              NAMES "cula_lapack_basic"
-              PATH_SUFFIXES "cula/lib64" )
-
-#This is version 12 of CULA
-if (CULA_LIBRARY)
-  list(APPEND CULA_LIBRARIES ${CULA_LIBRARY})
-endif (CULA_LIBRARY)
-
-#This is version 13 of CULA
-if (CULA_LAPACK_LIBRARY)
-  list(APPEND CULA_LIBRARIES ${CULA_LAPACK_LIBRARY})
-endif (CULA_LAPACK_LIBRARY)
-
-#This is version 13 of CULA
-if (CULA_CORE_LIBRARY)
-  list(APPEND CULA_LIBRARIES ${CULA_CORE_LIBRARY})
-endif (CULA_CORE_LIBRARY)
-
-#This is version 17 of CULA
-if (CULA_LAPACK_BASIC_LIBRARY)
-  list(APPEND CULA_LIBRARIES ${CULA_LAPACK_BASIC_LIBRARY})
-endif (CULA_LAPACK_BASIC_LIBRARY)
-
-MESSAGE("CULA_LIBRARIES = ${CULA_LIBRARIES}")
-
-# handle the QUIETLY and REQUIRED arguments and set CULA_FOUND to TRUE if
-# all listed variables are TRUE
-INCLUDE( "FindPackageHandleStandardArgs" )
-FIND_PACKAGE_HANDLE_STANDARD_ARGS( "CULA" DEFAULT_MSG CULA_INCLUDE_DIR CULA_LIBRARIES )
-
-MARK_AS_ADVANCED( CULA_INCLUDE_DIR CULA_LIBRARIES )
diff --git a/cmake/FindFFTW3.cmake b/cmake/FindFFTW3.cmake
index eccfe9f..b2b9f3f 100644
--- a/cmake/FindFFTW3.cmake
+++ b/cmake/FindFFTW3.cmake
@@ -72,22 +72,43 @@ endif(_use_threads)
 set(_check_list)
 
 # Search for all requested libraries.
-foreach(_lib ${_libraries})
-  string(TOUPPER ${_lib} _LIB)
-  find_library(${_LIB}_LIBRARY ${_lib}
-    HINTS ${FFTW3_ROOT_DIR} PATH_SUFFIXES lib)
-  mark_as_advanced(${_LIB}_LIBRARY)
-  list(APPEND FFTW3_LIBRARIES ${${_LIB}_LIBRARY})
-  list(APPEND _check_list ${_LIB}_LIBRARY)
-endforeach(_lib ${_libraries})
+if (WIN32)
+
+    foreach(_lib ${_libraries})
+
+      string(TOUPPER ${_lib} _LIB)
+
+      find_library(${_LIB}_LIBRARY lib${_lib}-3
+        HINTS $ENV{FFTW3_ROOT_DIR} PATH_SUFFIXES lib)
+      mark_as_advanced(${_LIB}_LIBRARY)
+      list(APPEND FFTW3_LIBRARIES ${${_LIB}_LIBRARY})
+      list(APPEND _check_list ${_LIB}_LIBRARY)
+    endforeach(_lib ${_libraries})
+
+    message("FFTW3 WINDOWS libraries: " ${FFTW3_LIBRARIES})
+
+else (WIN32)
+    foreach(_lib ${_libraries})
+
+      string(TOUPPER ${_lib} _LIB)
+
+      find_library(${_LIB}_LIBRARY ${_lib}
+        HINTS $ENV{FFTW3_ROOT_DIR} PATH_SUFFIXES lib)
+      mark_as_advanced(${_LIB}_LIBRARY)
+      list(APPEND FFTW3_LIBRARIES ${${_LIB}_LIBRARY})
+      list(APPEND _check_list ${_LIB}_LIBRARY)
+    endforeach(_lib ${_libraries})
+
+    message("FFTW3 UNIX libraries: " ${FFTW3_LIBRARIES})
+endif (WIN32)
 
 # Search for the header file.
 find_path(FFTW3_INCLUDE_DIR fftw3.h 
-  HINTS ${FFTW3_ROOT_DIR} PATH_SUFFIXES include)
+  HINTS $ENV{FFTW3_ROOT_DIR} PATH_SUFFIXES include)
 mark_as_advanced(FFTW3_INCLUDE_DIR)
 list(APPEND _check_list FFTW3_INCLUDE_DIR)
 
-# Handle the QUIETLY and REQUIRED arguments and set FFTW_FOUND to TRUE if
+# Handle the QUIETLY and REQUIRED arguments and set FFTW3_FOUND to TRUE if
 # all listed variables are TRUE
 include(FindPackageHandleStandardArgs)
 find_package_handle_standard_args(FFTW3 DEFAULT_MSG ${_check_list})
diff --git a/cmake/FindIsmrmrd.cmake b/cmake/FindIsmrmrd.cmake
index 07a3d3a..f09b5a8 100644
--- a/cmake/FindIsmrmrd.cmake
+++ b/cmake/FindIsmrmrd.cmake
@@ -1,29 +1,28 @@
 # - Find ISMRMRRD
 #   ISMRMRD_FOUND            - true if an ISMRMRD installation is found.
 #   ISMRMRD_INCLUDE_DIR      - where to find ismrmrd.h, etc.
-#   ISMRMRD_LIBRARIES        - libismrmrd.so.
-#   ISMRMRD_XSD_INCLUDE_DIR  - folder containing ismrmrd.hxx (autogenerated from xsd schema)
-#   ISMRMRD_XSD_SOURCE       - full path to ismrmrd.cxx (autogenerated from xsd schema)
+#   ISMRMRD_LIBRARIES        - libismrmrd.so and libismrmrd_xml.so
 #   ISMRMRD_SCHEMA_DIR       - where to find ismrmrd.xsd       
 
-FIND_PATH( ISMRMRD_INCLUDE_DIR ismrmrd.h 
-HINTS $ENV{ISMRMRD_HOME} PATHS /usr/local /usr PATH_SUFFIXES include ismrmrd ismrmrd/include)
-
-FIND_PATH( ISMRMRD_XSD_INCLUDE_DIR ismrmrd.hxx
-HINTS $ENV{ISMRMRD_HOME} PATHS /usr/local /usr PATH_SUFFIXES schema ismrmrd ismrmrd/schema)
+FIND_PATH( ISMRMRD_INCLUDE_DIR ismrmrd/ismrmrd.h 
+HINTS $ENV{ISMRMRD_HOME} PATHS /usr/local /usr PATH_SUFFIXES include)
 
 FIND_PATH( ISMRMRD_SCHEMA_DIR ismrmrd.xsd 
-HINTS $ENV{ISMRMRD_HOME} PATHS /usr/local /usr PATH_SUFFIXES schema ismrmrd ismrmrd/schema)
+HINTS $ENV{ISMRMRD_HOME} PATHS /usr/local /usr PATH_SUFFIXES share/ismrmrd/schema)
+
+FIND_LIBRARY( ISMRMRD_LIBRARY NAMES ismrmrd
+HINTS $ENV{ISMRMRD_HOME} /usr/local /usr PATH_SUFFIXES lib)
 
-FIND_LIBRARY( ISMRMRD_LIBRARIES
-              NAMES "ismrmrd"
-              PATHS  /usr/local/lib ${ISMRMRD_INCLUDE_DIR}/../lib /usr/lib )
+FIND_PATH( ISMRMRD_LIB_DIR libismrmrd.so
+HINTS $ENV{ISMRMRD_HOME} /usr/local /usr PATH_SUFFIXES lib)
 
-FIND_FILE( ISMRMRD_XSD_SOURCE
-           NAMES "ismrmrd.cxx"
-           HINTS $ENV{ISMRMRD_HOME} PATHS /usr/local /usr PATH_SUFFIXES schema ismrmrd ismrmrd/schema)
+SET(ISMRMRD_LIBRARIES ${ISMRMRD_LIBRARY})
 
 INCLUDE( "FindPackageHandleStandardArgs" )
 FIND_PACKAGE_HANDLE_STANDARD_ARGS( "Ismrmrd" DEFAULT_MSG ISMRMRD_INCLUDE_DIR ISMRMRD_LIBRARIES ISMRMRD_SCHEMA_DIR)
 
 MARK_AS_ADVANCED( ISMRMRD_INCLUDE_DIR ISMRMRD_LIBRARIES ISMRMRD_SCHEMA_DIR)
+
+#if(ISMRMRD_FOUND)
+#  message("ISMRMRD found ${ISMRMRD_LIBRARIES}")
+#endif(ISMRMRD_FOUND)
diff --git a/cmake/FindMKL.cmake b/cmake/FindMKL.cmake
index c73317d..8fe9153 100644
--- a/cmake/FindMKL.cmake
+++ b/cmake/FindMKL.cmake
@@ -13,7 +13,9 @@
 # in windows, try to find MKL at C:/Program Files (x86)/Intel/Composer XE/mkl
 
 if ( WIN32 )
+  if(NOT DEFINED ENV{MKLROOT_PATH})
     set(MKLROOT_PATH "C:/Program Files (x86)/Intel/Composer XE" CACHE PATH "Where the MKL are stored")
+  endif(NOT DEFINED ENV{MKLROOT_PATH}) 
 else ( WIN32 )
     set(MKLROOT_PATH "/opt/intel" CACHE PATH "Where the MKL are stored")
 endif ( WIN32 )
@@ -46,17 +48,17 @@ if (MKL_FOUND)
         set(MKL_COMPILER_LIB_DIR "${MKLROOT_PATH}/compiler/lib/intel64")
         set(MKL_COMPILER_LIB_DIR ${MKL_COMPILER_LIB_DIR} "${MKLROOT_PATH}/lib/intel64")
         if ( USE_MKL_64BIT_LIB )
-            if ( WIN32 )
-                set(MKL_LIBRARIES ${MKL_LIBRARIES} mkl_intel_ilp64)
-            else ( WIN32 )
-                set(MKL_LIBRARIES ${MKL_LIBRARIES} libmkl_intel_ilp64.a)
-            endif ( WIN32 )
+                if (WIN32)
+                    set(MKL_LIBRARIES ${MKL_LIBRARIES} mkl_intel_ilp64)
+                else (WIN32)
+                    set(MKL_LIBRARIES ${MKL_LIBRARIES} mkl_intel_ilp64)
+                endif (WIN32)
         else ( USE_MKL_64BIT_LIB )
-            if ( WIN32 )
-                set(MKL_LIBRARIES ${MKL_LIBRARIES} mkl_intel_lp64)
-            else ( WIN32 )
-                set(MKL_LIBRARIES ${MKL_LIBRARIES} libmkl_intel_lp64.a)
-            endif ( WIN32 )
+                if (WIN32)
+                    set(MKL_LIBRARIES ${MKL_LIBRARIES} mkl_intel_lp64)
+                else (WIN32)
+                    set(MKL_LIBRARIES ${MKL_LIBRARIES} mkl_intel_lp64)
+                endif (WIN32)
         endif ( USE_MKL_64BIT_LIB )
     else ( USE_MKL_64BIT )
         set(MKL_LIB_DIR "${MKLROOT_PATH}/mkl/lib/ia32")
@@ -65,19 +67,18 @@ if (MKL_FOUND)
         if ( WIN32 )
             set(MKL_LIBRARIES ${MKL_LIBRARIES} mkl_intel_c)
         else ( WIN32 )
-            set(MKL_LIBRARIES ${MKL_LIBRARIES} libmkl_intel.a)
+            set(MKL_LIBRARIES ${MKL_LIBRARIES} mkl_intel)
         endif ( WIN32 )
     endif ( USE_MKL_64BIT )
 
-    if ( WIN32 )
+    if (WIN32)
         SET(MKL_LIBRARIES ${MKL_LIBRARIES} mkl_intel_thread)
         SET(MKL_LIBRARIES ${MKL_LIBRARIES} mkl_core)
         SET(MKL_LIBRARIES ${MKL_LIBRARIES} libiomp5md)
-    else ( WIN32 )
-        SET(MKL_LIBRARIES ${MKL_LIBRARIES} libmkl_intel_thread.a)
-        SET(MKL_LIBRARIES ${MKL_LIBRARIES} libmkl_core.a)
-        SET(MKL_LIBRARIES ${MKL_LIBRARIES} iomp5)
-    endif ( WIN32 )
+    else (WIN32)
+        SET(MKL_LIBRARIES ${MKL_LIBRARIES} mkl_gnu_thread)
+        SET(MKL_LIBRARIES ${MKL_LIBRARIES} mkl_core)
+    endif (WIN32) 
 endif (MKL_FOUND)
 
 IF (MKL_FOUND)
diff --git a/cmake/FindXSD.cmake b/cmake/FindXSD.cmake
deleted file mode 100644
index 59a8834..0000000
--- a/cmake/FindXSD.cmake
+++ /dev/null
@@ -1,68 +0,0 @@
-# - Find CodeSynthesis XSD
-# This module can be used to find XSD and it's include path
-# Variables:
-#	XSD_FOUND - System has XSD
-#	XSD_EXECUTABLE - XSD binary executable
-#	XSD_INCLUDE_DIR - XSD include directory
-#
-# Functions:
-#       WRAP_XSD - Generates C++ bindings in the given output directory for a given schema file
-
-if(NOT DEFINED XSD_DIR AND DEFINED ENV{XSD_DIR})
-    set(XSD_DIR $ENV{XSD_DIR})
-endif(NOT DEFINED XSD_DIR AND DEFINED ENV{XSD_DIR})
-
-find_program(XSD_EXECUTABLE NAMES xsd xsdcxx xsd.exe
-    PATHS ${XSD_DIR} /usr /usr/local
-    PATH_SUFFIXES bin
-)
-
-find_path(XSD_INCLUDE_DIR NAMES xsd/cxx/pre.hxx
-    PATHS ${XSD_DIR} /usr /usr/local
-    PATH_SUFFIXES include
-)
-
-FUNCTION(XSD_EXTRACT_OPTIONS _xsd_files _xsd_options)
-	foreach(current_arg ${ARGN})
-		IF(${current_arg} STREQUAL "OPTIONS")
-			SET(_XSD_DOING_OPTIONS TRUE)
-		else(${current_arg} STREQUAL "OPTIONS")
-			if(_XSD_DOING_OPTIONS)
-				SET(_xsd_options_p ${_xsd_options_p} ${current_arg})
-			else(_XSD_DOING_OPTIONS)
-				SET(_xsd_files_p ${_xsd_files_p} ${current_arg})
-			endif(_XSD_DOING_OPTIONS)
-		endif(${current_arg} STREQUAL "OPTIONS")
-	endforeach(current_arg)
-	SET(${_xsd_files} ${_xsd_files_p} PARENT_SCOPE)
-	SET(${_xsd_options} ${_xsd_options_p} PARENT_SCOPE)
-ENDFUNCTION(XSD_EXTRACT_OPTIONS)
-
-
-FUNCTION(WRAP_XSD XSD_SRCS XSD_INCLUDES OUT_PATH)
-	SET(OUTPUT_DIR  ${CMAKE_CURRENT_BINARY_DIR}/src/xsd)
-	FILE(MAKE_DIRECTORY ${OUTPUT_DIR})
-	SET(${XSD_INCLUDES} ${OUTPUT_DIR} PARENT_SCOPE)
-	XSD_EXTRACT_OPTIONS(xsd_files xsd_options ${ARGN})
-	FOREACH(it ${xsd_files})
-		STRING(REGEX REPLACE ".*/" "" BARE_XSD "${it}" )
-		STRING(REGEX REPLACE ".xsd" ".cxx" SOURCE "${BARE_XSD}" )
-		STRING(REGEX REPLACE ".xsd" ".hxx" HEADER "${BARE_XSD}" )
-		CONFIGURE_FILE(${it} ${OUT_PATH}/${BARE_XSD} COPY_ONLY)
-		SET(SOURCE ${OUTPUT_DIR}/${SOURCE})
-		SET(HEADER ${OUTPUT_DIR}/${HEADER})
-		ADD_CUSTOM_COMMAND(OUTPUT ${SOURCE} ${HEADER}
-				COMMAND ${XSD_EXECUTABLE} ${xsd_options} "--output-dir" ${OUTPUT_DIR} ${OUT_PATH}/${BARE_XSD}
-				DEPENDS ${it}
-				VERBATIM
-		)
-		set_source_files_properties(${HEADER} PROPERTIES GENERATED TRUE)
-		set_source_files_properties(${SOURCE} PROPERTIES GENERATED TRUE)
-		SET(_XSD_SRCS ${_XSD_SRCS} ${SOURCE} ${HEADER})
-	ENDFOREACH(it)
-	SET(${XSD_SRCS} ${_XSD_SRCS} PARENT_SCOPE)
-ENDFUNCTION(WRAP_XSD)
-
-include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(XSD DEFAULT_MSG XSD_INCLUDE_DIR XSD_EXECUTABLE)
-mark_as_advanced(XSD_INCLUDE_DIR XSD_EXECUTABLE)
diff --git a/cmake/FindXalanC.cmake b/cmake/FindXalanC.cmake
deleted file mode 100644
index 096feaa..0000000
--- a/cmake/FindXalanC.cmake
+++ /dev/null
@@ -1,35 +0,0 @@
-# - Try to find XalanC
-# Once done this will define
-#
-#  XALANC_FOUND - System has XalanC
-#  XALANC_INCLUDE_DIR - The XalanC include directory
-#  XALANC_LIBRARY_DIR - The XalanC library dir
-#  XALANC_LIBRARIES - The libraries needed to use XalanC
-#  XALANC_DEFINITIONS - Compiler switches required for using XalanC
-
-# Copyright (c) 2009, Helio Chissini de Castro, <helio at kde.org>
-#
-# Redistribution and use is allowed according to the terms of the BSD license.
-# For details see the accompanying COPYING-CMAKE-SCRIPTS file.
-
-
-IF (XALANC_INCLUDE_DIR AND XALANC_LIBRARIES)
-   # in cache already
-   SET(XalanC_FIND_QUIETLY TRUE)
-ENDIF (XALANC_INCLUDE_DIR AND XALANC_LIBRARIES)
-
-
-FIND_PATH(XALANC_INCLUDE_DIR DOMSupport/DOMServices.hpp
-	PATHS
-	/usr/local/include/xalanc
-	/usr/include/xalanc
-	PATH_SUFFIXES
-	xalanc
-	)
-
-FIND_LIBRARY(XALANC_LIBRARIES NAMES xalan-c xalanMsg)
-
-INCLUDE(FindPackageHandleStandardArgs)
-FIND_PACKAGE_HANDLE_STANDARD_ARGS(XalanC DEFAULT_MSG XALANC_LIBRARIES XALANC_INCLUDE_DIR)
-
-MARK_AS_ADVANCED(XALANC_INCLUDE_DIR XALANC_LIBRARIES XALANC_LIBRARY_DIR)
diff --git a/cmake/FindXercesC.cmake b/cmake/FindXercesC.cmake
deleted file mode 100644
index 3bb9b81..0000000
--- a/cmake/FindXercesC.cmake
+++ /dev/null
@@ -1,37 +0,0 @@
-# This module defines
-# XERCESC_INCLUDE_DIR, where to find ptlib.h, etc.
-# XERCESC_LIBRARIES, the libraries to link against to use pwlib.
-# XERCESC_FOUND, If false, don't try to use pwlib.
-
-FIND_PATH(XERCESC_INCLUDE_DIR xercesc/dom/DOM.hpp
-  "[HKEY_CURRENT_USER\\software\\xerces-c\\src]"
-  "[HKEY_CURRENT_USER\\xerces-c\\src]"
-  $ENV{XERCESCROOT}/src/
-  /usr/local/include
-  /usr/include
-)
-
-FIND_LIBRARY(XERCESC_LIBRARIES
-  NAMES 
-    xerces-c
-  PATHS
-    "[HKEY_CURRENT_USER\\software\\xerces-c\\lib]"
-    "[HKEY_CURRENT_USER\\xerces-c\\lib]"
-    $ENV{XERCESCROOT}/lib
-    /usr/local/lib
-    /usr/lib
-)
-
-# if the include a the library are found then we have it
-IF(XERCESC_INCLUDE_DIR)
-  IF(XERCESC_LIBRARIES)
-    SET( XERCESC_FOUND "YES" )
-  ENDIF(XERCESC_LIBRARIES)
-ENDIF(XERCESC_INCLUDE_DIR)
-
-
-
-MARK_AS_ADVANCED(
-  XERCESC_INCLUDE_DIR
-  XERCESC_LIBRARIES
-) 
\ No newline at end of file
diff --git a/cmake/InstallLinuxDependencies.cmake b/cmake/InstallLinuxDependencies.cmake
new file mode 100644
index 0000000..beffdaa
--- /dev/null
+++ b/cmake/InstallLinuxDependencies.cmake
@@ -0,0 +1,24 @@
+
+#install dependencies
+if (MKL_FOUND)
+    if (HAS_64_BIT)
+        set(MKL_REDIST_DIR ${MKLROOT_PATH}/mkl/lib/intel64)
+        set(MKL_COMPILER_REDIST_DIR ${MKLROOT_PATH}/lib/intel64)
+    else (HAS_64_BIT)
+        set(MKL_REDIST_DIR ${MKLROOT_PATH}/mkl/lib/ia32)
+        set(MKL_COMPILER_REDIST_DIR ${MKLROOT_PATH}/lib/ia32)
+    endif (HAS_64_BIT)
+
+    message("Install mkl libraries from ${MKL_REDIST_DIR} ")
+    FILE(GLOB MKL_DLL ${MKL_REDIST_DIR}/*.so)
+    foreach(fileName ${MKL_DLL})
+        message("Install ${fileName} ")
+        install( FILES ${fileName} DESTINATION lib COMPONENT main)
+    endforeach(fileName)
+
+    FILE(GLOB MKL_COMPILER_DLL ${MKL_COMPILER_REDIST_DIR}/libiomp5*.so)
+    foreach(fileName ${MKL_COMPILER_DLL})
+        message("Install ${fileName} ")
+        install( FILES ${fileName} DESTINATION lib COMPONENT main)
+    endforeach(fileName)
+endif (MKL_FOUND)
diff --git a/cmake/InstallWinDependencies.cmake b/cmake/InstallWinDependencies.cmake
new file mode 100644
index 0000000..fc16558
--- /dev/null
+++ b/cmake/InstallWinDependencies.cmake
@@ -0,0 +1,137 @@
+
+#install dependencies
+if (HDF5_FOUND)
+    if(DEFINED ENV{HDF5_ROOT})
+        set(HDF5_BIN_DIR $ENV{HDF5_ROOT}/bin)
+    else (DEFINED ENV{HDF5_ROOT})
+        set(HDF5_BIN_DIR ${HDF5_C_INCLUDE_DIR}/../bin)
+    endif (DEFINED ENV{HDF5_ROOT})
+    message("Install hdf5 libraries from ${HDF5_BIN_DIR} ")
+
+    FILE(GLOB HDF5_DLL ${HDF5_BIN_DIR}/*.dll)
+    foreach(fileName ${HDF5_DLL})
+        message("Install ${fileName} ")
+        install( FILES ${fileName} DESTINATION lib COMPONENT main)
+    endforeach(fileName)
+endif (HDF5_FOUND)
+
+if (MKL_FOUND)
+    if (HAS_64_BIT)
+        set(MKL_REDIST_DIR ${MKLROOT_PATH}/redist/intel64)
+    else (HAS_64_BIT)
+        set(MKL_REDIST_DIR ${MKLROOT_PATH}/redist/ia32)
+    endif (HAS_64_BIT)
+
+    message("Install mkl libraries from ${MKL_REDIST_DIR}/compiler ")
+    FILE(GLOB MKL_DLL ${MKL_REDIST_DIR}/compiler/*iomp5md*.dll)
+    foreach(fileName ${MKL_DLL})
+        message("Install ${fileName} ")
+        install( FILES ${fileName} DESTINATION lib COMPONENT main)
+    endforeach(fileName)
+endif (MKL_FOUND)
+
+if (ACE_FOUND)
+    message("Install ACE libraries from ${ACE_INCLUDE_DIR}/lib ")
+    FILE(GLOB ACE_DLL ${ACE_INCLUDE_DIR}/lib/ACE.dll)
+    foreach(fileName ${ACE_DLL})
+        message("Install ${ACE_DLL} ")
+        install( FILES ${ACE_DLL} DESTINATION lib COMPONENT main)
+    endforeach(fileName)
+endif (ACE_FOUND)
+
+if (ARMADILLO_FOUND)
+    message("Install ARMADILLO libraries from ${ARMADILLO_INCLUDE_DIRS}/../lib ")
+    FILE(GLOB ARMADILLO_DLL ${ARMADILLO_INCLUDE_DIRS}/../lib/*.dll)
+    foreach(fileName ${ARMADILLO_DLL})
+        message("Install ${fileName} ")
+        install( FILES ${fileName} DESTINATION lib COMPONENT main)
+    endforeach(fileName)
+endif (ARMADILLO_FOUND)
+
+if (FFTW3_FOUND)
+    message("Install FFTW3 libraries from ${FFTW3_INCLUDE_DIR} ")
+    FILE(GLOB FFTW3_DLL ${FFTW3_INCLUDE_DIR}/*.dll)
+    foreach(fileName ${FFTW3_DLL})
+        message("Install ${fileName} ")
+        install( FILES ${fileName} DESTINATION lib COMPONENT main)
+    endforeach(fileName)
+endif (FFTW3_FOUND)
+
+if (ISMRMRD_FOUND)
+    message("Install ISMRMRD libraries from ${ISMRMRD_INCLUDE_DIR}/../lib ")
+    FILE(GLOB ISMRMRD_DLL ${ISMRMRD_INCLUDE_DIR}/../lib/*.dll)
+    foreach(fileName ${ISMRMRD_DLL})
+        message("Install ${fileName} ")
+        install( FILES ${fileName} DESTINATION lib COMPONENT main)
+    endforeach(fileName)
+endif (ISMRMRD_FOUND)
+
+if (Boost_FOUND)
+    message("Install BOOST libraries from ${Boost_LIBRARY_DIR} ")
+    FILE(GLOB BOOST_CHRONO_DLL ${Boost_LIBRARY_DIR}/boost_chrono*.dll)
+    foreach(fileName ${BOOST_CHRONO_DLL})
+        message("Install ${fileName} ")
+        install( FILES ${fileName} DESTINATION lib COMPONENT main)
+    endforeach(fileName)
+
+    FILE(GLOB BOOST_DATE_TIME_DLL ${Boost_LIBRARY_DIR}/boost_data_time*.dll)
+    foreach(fileName ${BOOST_DATE_TIME_DLL})
+        message("Install ${fileName} ")
+        install( FILES ${fileName} DESTINATION lib COMPONENT main)
+    endforeach(fileName)
+
+    FILE(GLOB BOOST_PROGRAM_OPTIONS_DLL ${Boost_LIBRARY_DIR}/boost_program_options*.dll)
+    foreach(fileName ${BOOST_PROGRAM_OPTIONS_DLL})
+        message("Install ${fileName} ")
+        install( FILES ${fileName} DESTINATION lib COMPONENT main)
+    endforeach(fileName)
+
+    if (Boost_PYTHON_FOUND AND PYTHONLIBS_FOUND AND NUMPY_FOUND)
+        FILE(GLOB BOOST_PYTHON_DLL ${Boost_LIBRARY_DIR}/boost_python*.dll)
+        foreach(fileName ${BOOST_PYTHON_DLL})
+            message("Install ${fileName} ")
+            install( FILES ${fileName} DESTINATION lib COMPONENT main)
+        endforeach(fileName)
+    endif (Boost_PYTHON_FOUND AND PYTHONLIBS_FOUND AND NUMPY_FOUND)
+
+    FILE(GLOB BOOST_SYSTEM_DLL ${Boost_LIBRARY_DIR}/boost_system*.dll)
+    foreach(fileName ${BOOST_SYSTEM_DLL})
+        message("Install ${fileName} ")
+        install( FILES ${fileName} DESTINATION lib COMPONENT main)
+    endforeach(fileName)
+
+    FILE(GLOB BOOST_THREAD_DLL ${Boost_LIBRARY_DIR}/boost_thread*.dll)
+    foreach(fileName ${BOOST_THREAD_DLL})
+        message("Install ${fileName} ")
+        install( FILES ${fileName} DESTINATION lib COMPONENT main)
+    endforeach(fileName)
+
+    FILE(GLOB BOOST_FILESYSTEM_DLL ${Boost_LIBRARY_DIR}/boost_filesystem*.dll)
+    foreach(fileName ${BOOST_FILESYSTEM_DLL})
+        message("Install ${fileName} ")
+        install( FILES ${fileName} DESTINATION lib COMPONENT main)
+    endforeach(fileName)
+endif (Boost_FOUND)
+
+if (DCMTK_FOUND)
+    message("Install DCMTK libraries from ${DCMTK_DIR}/lib ")
+    FILE(GLOB DCMTK_DLL ${DCMTK_DIR}/lib/*.dll)
+    foreach(fileName ${DCMTK_DLL})
+        message("Install ${fileName} ")
+        install( FILES ${fileName} DESTINATION lib COMPONENT main)
+    endforeach(fileName)
+endif (DCMTK_FOUND)
+
+if (GTEST_FOUND)
+    message("Install GTEST libraries from ${DCMTK_DIR}/lib ")
+
+    get_filename_component(GTEST_LIB_NAME ${GTEST_LIBRARY} NAME_WE)
+    get_filename_component(GTEST_DLL ${GTEST_LIBRARY} DIRECTORY)
+    message("Install ${GTEST_DLL}/${GTEST_LIB_NAME}.dll ")
+    install( FILES ${GTEST_DLL}/${GTEST_LIB_NAME}.dll DESTINATION lib COMPONENT main)
+
+    get_filename_component(GTEST_MAIN_NAME ${GTEST_MAIN_LIBRARY} NAME_WE)
+    get_filename_component(GTEST_MAIN_DLL ${GTEST_MAIN_LIBRARY} DIRECTORY)
+    message("Install ${GTEST_MAIN_DLL}/${GTEST_MAIN_NAME}.dll ")
+    install( FILES ${GTEST_MAIN_DLL}/${GTEST_MAIN_NAME}.dll DESTINATION lib COMPONENT main)
+endif (GTEST_FOUND)
diff --git a/cmake/InstallWinGadgetron.bat b/cmake/InstallWinGadgetron.bat
new file mode 100644
index 0000000..2a34033
--- /dev/null
+++ b/cmake/InstallWinGadgetron.bat
@@ -0,0 +1,6 @@
+set INSTALL_DIR=%~dp0
+
+: set the path of gadgetron
+setx PATH "%PATH%;%INSTALL_DIR%\..\lib;%INSTALL_DIR%\..\bin"
+: copy the gadgetron.xml file
+copy /Y %INSTALL_DIR%\..\config\gadgetron.xml.example %INSTALL_DIR%\..\config\gadgetron.xml
\ No newline at end of file
diff --git a/cmake/cpack_options.cmake.in b/cmake/cpack_options.cmake.in
new file mode 100644
index 0000000..1a6b8a9
--- /dev/null
+++ b/cmake/cpack_options.cmake.in
@@ -0,0 +1,41 @@
+################################################################################
+# Metadata for package generators
+################################################################################
+
+# Common options
+set(CPACK_PACKAGE_VERSION "@GADGETRON_VERSION_STRING@")
+set(CPACK_PACKAGE_VERSION_MAJOR "@GADGETRON_VERSION_MAJOR@")
+set(CPACK_PACKAGE_VERSION_MINOR "@GADGETRON_VERSION_MINOR@")
+set(CPACK_PACKAGE_VERSION_PATCH "@GADGETRON_VERSION_PATCH@")
+set(CPACK_PACKAGE_NAME "@PROJECT_NAME@")
+set(CPACK_PACKAGE_VENDOR "http://gadgetron.sourceforge.net/")
+set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Gadgetron framwork")
+set(CPACK_PACKAGE_INSTALL_DIRECTORY "@PROJECT_NAME_LOWER@")
+set(CPACK_RESOURCE_FILE_LICENSE "@CMAKE_CURRENT_SOURCE_DIR@/LICENSE")
+set(CPACK_PACKAGE_MAINTAINER "Michael S. Hansen <michael.hansen at nih.gov>")
+set(CPACK_PACKAGE_CONTACT "Michael S. Hansen <michael.hansen at nih.gov>")
+
+set(CPACK_ARCHIVE_COMPONENT_INSTALL ON)
+SET(CPACK_COMPONENTS_ALL_IN_ONE_PACKAGE ON)
+
+# DEB specific
+set(CPACK_DEBIAN_PACKAGE_DEPENDS "@DEBIAN_PACKAGE_DEPENDS@")
+set(CPACK_DEBIAN_PACKAGE_SECTION "devel")
+set(CPACK_DEBIAN_PACKAGE_PRIORITY "optional")
+set(CPACK_DEBIAN_PACKAGE_SHLIBDEPS ON)
+set(CPACK_DEBIAN_PACKAGE_DESCRIPTION "Implementation of the Gadgetron.")
+set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "@CMAKE_SOURCE_DIR@/cmake/debian/postinst;@CMAKE_SOURCE_DIR@/cmake/debian/prerm;" )
+set(CPACK_DEB_COMPONENT_INSTALL ON)
+
+# NSIS specific
+set(CPACK_NSIS_HELP_LINK "http:\\\\\\\\gadgetron.sourceforge.net")
+set(CPACK_NSIS_URL_INFO_ABOUT "http:\\\\\\\\gadgetron.sourceforge.net")
+set(CPACK_NSIS_MODIFY_PATH ON)
+set(CPACK_NSIS_DISPLAY_NAME "gadgetron")
+
+set(CPACK_NSIS_EXTRA_INSTALL_COMMANDS
+    "ExecWait '$INSTDIR/cmake/InstallWinGadgetron.bat'")
+
+# Output filename of the generated tarball / package
+set(CPACK_PACKAGE_FILE_NAME "@PROJECT_NAME_LOWER at -@GADGETRON_VERSION_STRING@")
+set(CPACK_SOURCE_PACKAGE_FILE_NAME "@PROJECT_NAME_LOWER at -@GADGETRON_VERSION_STRING@")
diff --git a/cmake/cpack_options_dependency.cmake.in b/cmake/cpack_options_dependency.cmake.in
new file mode 100644
index 0000000..8f3ed87
--- /dev/null
+++ b/cmake/cpack_options_dependency.cmake.in
@@ -0,0 +1,37 @@
+################################################################################
+# Metadata for package generators
+################################################################################
+
+# Common options
+set(CPACK_PACKAGE_VERSION "@GADGETRON_VERSION_STRING@")
+set(CPACK_PACKAGE_VERSION_MAJOR "@GADGETRON_VERSION_MAJOR@")
+set(CPACK_PACKAGE_VERSION_MINOR "@GADGETRON_VERSION_MINOR@")
+set(CPACK_PACKAGE_VERSION_PATCH "@GADGETRON_VERSION_PATCH@")
+set(CPACK_PACKAGE_NAME "@PROJECT_NAME@")
+set(CPACK_PACKAGE_VENDOR "http://gadgetron.sourceforge.net/")
+set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Gadgetron framwork")
+set(CPACK_PACKAGE_INSTALL_DIRECTORY "@PROJECT_NAME_LOWER@")
+set(CPACK_RESOURCE_FILE_LICENSE "@CMAKE_CURRENT_SOURCE_DIR@/LICENSE")
+set(CPACK_PACKAGE_MAINTAINER "Michael S. Hansen <michael.hansen at nih.gov>")
+set(CPACK_PACKAGE_CONTACT "Michael S. Hansen <michael.hansen at nih.gov>")
+
+set(CPACK_ARCHIVE_COMPONENT_INSTALL ON)
+SET(CPACK_COMPONENTS_ALL_IN_ONE_PACKAGE ON)
+
+# DEB specific
+set(CPACK_DEBIAN_PACKAGE_DEPENDS "@DEBIAN_PACKAGE_DEPENDS@")
+set(CPACK_DEBIAN_PACKAGE_SECTION "devel")
+set(CPACK_DEBIAN_PACKAGE_PRIORITY "optional")
+set(CPACK_DEBIAN_PACKAGE_SHLIBDEPS ON)
+set(CPACK_DEBIAN_PACKAGE_DESCRIPTION "Dependencies of the Gadgetron.")
+set(CPACK_DEB_COMPONENT_INSTALL ON)
+
+# NSIS specific
+set(CPACK_NSIS_HELP_LINK "http:\\\\\\\\gadgetron.sourceforge.net")
+set(CPACK_NSIS_URL_INFO_ABOUT "http:\\\\\\\\gadgetron.sourceforge.net")
+set(CPACK_NSIS_MODIFY_PATH ON)
+set(CPACK_NSIS_DISPLAY_NAME "gadgetron")
+
+# Output filename of the generated tarball / package
+set(CPACK_PACKAGE_FILE_NAME "@PROJECT_NAME_LOWER at -@GADGETRON_VERSION_STRING@")
+set(CPACK_SOURCE_PACKAGE_FILE_NAME "@PROJECT_NAME_LOWER at -@GADGETRON_VERSION_STRING@")
diff --git a/cmake/cpack_options_web.cmake.in b/cmake/cpack_options_web.cmake.in
new file mode 100644
index 0000000..345c3b2
--- /dev/null
+++ b/cmake/cpack_options_web.cmake.in
@@ -0,0 +1,38 @@
+################################################################################
+# Metadata for package generators
+################################################################################
+
+# Common options
+set(CPACK_PACKAGE_VERSION "@GADGETRON_VERSION_STRING@")
+set(CPACK_PACKAGE_VERSION_MAJOR "@GADGETRON_VERSION_MAJOR@")
+set(CPACK_PACKAGE_VERSION_MINOR "@GADGETRON_VERSION_MINOR@")
+set(CPACK_PACKAGE_VERSION_PATCH "@GADGETRON_VERSION_PATCH@")
+set(CPACK_PACKAGE_NAME "@PROJECT_NAME@")
+set(CPACK_PACKAGE_VENDOR "http://gadgetron.sourceforge.net/")
+set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Gadgetron web service")
+set(CPACK_PACKAGE_INSTALL_DIRECTORY "@PROJECT_NAME_LOWER@")
+set(CPACK_RESOURCE_FILE_LICENSE "@CMAKE_CURRENT_SOURCE_DIR@/LICENSE")
+set(CPACK_PACKAGE_MAINTAINER "Michael S. Hansen <michael.hansen at nih.gov>")
+set(CPACK_PACKAGE_CONTACT "Michael S. Hansen <michael.hansen at nih.gov>")
+
+set(CPACK_ARCHIVE_COMPONENT_INSTALL ON)
+SET(CPACK_COMPONENTS_ALL_IN_ONE_PACKAGE ON)
+
+# DEB specific
+set(CPACK_DEBIAN_PACKAGE_DEPENDS "@DEBIAN_PACKAGE_DEPENDS@")
+set(CPACK_DEBIAN_PACKAGE_SECTION "devel")
+set(CPACK_DEBIAN_PACKAGE_PRIORITY "optional")
+set(CPACK_DEBIAN_PACKAGE_SHLIBDEPS ON)
+set(CPACK_DEBIAN_PACKAGE_DESCRIPTION "Implementation of the Gadgetron web server.")
+set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "@CMAKE_SOURCE_DIR@/cmake/debian_web/postinst;@CMAKE_SOURCE_DIR@/cmake/debian_web/prerm;")
+set(CPACK_DEB_COMPONENT_INSTALL ON)
+
+# NSIS specific
+set(CPACK_NSIS_HELP_LINK "http:\\\\\\\\gadgetron.sourceforge.net")
+set(CPACK_NSIS_URL_INFO_ABOUT "http:\\\\\\\\gadgetron.sourceforge.net")
+set(CPACK_NSIS_MODIFY_PATH ON)
+set(CPACK_NSIS_DISPLAY_NAME "gadgetron_web")
+
+# Output filename of the generated tarball / package
+set(CPACK_PACKAGE_FILE_NAME "@PROJECT_NAME_LOWER at -@GADGETRON_VERSION_STRING@")
+set(CPACK_SOURCE_PACKAGE_FILE_NAME "@PROJECT_NAME_LOWER at -@GADGETRON_VERSION_STRING@")
diff --git a/cmake/debian/postinst b/cmake/debian/postinst
new file mode 100644
index 0000000..5f80f31
--- /dev/null
+++ b/cmake/debian/postinst
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+# copy the gadgetron.xml
+sudo cp -i /usr/local/gadgetron/config/gadgetron.xml.example /usr/local/gadgetron/config/gadgetron.xml
+
+# create the symbolic link for gadgetron
+sudo rm -f /usr/local/bin/gadgetron
+sudo ln -s /usr/local/gadgetron/bin/gadgetron /usr/local/bin/gadgetron
+
+sudo rm -f /usr/local/bin/gadgetron_ismrmrd_client
+sudo ln -s /usr/local/gadgetron/bin/gadgetron_ismrmrd_client /usr/local/bin/gadgetron_ismrmrd_client
+
+sudo rm -f /usr/local/bin/gt_alive
+sudo ln -s /usr/local/gadgetron/bin/gt_alive /usr/local/bin/gt_alive
+
+# load library path
+sudo ldconfig
+
diff --git a/cmake/debian/prerm b/cmake/debian/prerm
new file mode 100644
index 0000000..eda091b
--- /dev/null
+++ b/cmake/debian/prerm
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+# remove the gadgetron.xml
+sudo rm -f /usr/local/gadgetron/config/gadgetron.xml
+
+# remove the symbolic link for gadgetron
+sudo rm -f /usr/local/bin/gadgetron
+sudo rm -f /usr/local/bin/gadgetron_ismrmrd_client
+sudo rm -f /usr/local/bin/gt_alive
+
+# update library path
+sudo ldconfig
+
diff --git a/cmake/debian_web/postinst b/cmake/debian_web/postinst
new file mode 100644
index 0000000..6457e3d
--- /dev/null
+++ b/cmake/debian_web/postinst
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+# add the user 'gadgetron'
+sudo adduser --no-create-home --disabled-password --gecos "" gadgetron
+
+# start the gadgetron service
+sudo service gadgetron_web start
diff --git a/cmake/debian_web/prerm b/cmake/debian_web/prerm
new file mode 100644
index 0000000..83cc215
--- /dev/null
+++ b/cmake/debian_web/prerm
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+# stop the gadgetron service
+sudo service gadgetron_web stop
+
+# remove the user gadgetron
+sudo deluser gadgetron
diff --git a/cmake/gadgetron_cpack.cmake b/cmake/gadgetron_cpack.cmake
new file mode 100644
index 0000000..ed4329e
--- /dev/null
+++ b/cmake/gadgetron_cpack.cmake
@@ -0,0 +1,33 @@
+################################################################################
+# Find available package generators
+################################################################################
+
+if(UNIX)
+  # DEB
+  find_program(DPKG_PROGRAM dpkg)
+  if(EXISTS ${DPKG_PROGRAM})
+    list(APPEND CPACK_GENERATOR "DEB")
+  endif(EXISTS ${DPKG_PROGRAM})
+endif(UNIX)
+
+if(WIN32)
+    # NSLS
+    list(APPEND CPACK_GENERATOR "NSIS")    
+endif(WIN32)
+
+list(APPEND CPACK_SOURCE_GENERATOR "TGZ")
+list(APPEND CPACK_SOURCE_GENERATOR "ZIP")
+list(APPEND CPACK_SOURCE_IGNORE_FILES ";.git;.gitignore;todo.txt;_clang-format;build/")
+
+# set dependencies explictly
+include(InstallRequiredSystemLibraries)
+set(DEBIAN_PACKAGE_DEPENDS "libfftw3-dev, python, python-numpy, liblapack-dev, libxml2-dev, libxslt-dev, libarmadillo-dev, libace-dev, python-matplotlib, python-libxml2, libboost-system-dev, libboost-thread-dev, libboost-program-options-dev, libboost-chrono-dev, libboost-filesystem-dev, ismrmrd")
+
+# where the package metadata are
+set(GADGETRON_CPACK_CFG_FILE "${PROJECT_BINARY_DIR}/cpack_options.cmake")
+
+# where the package to be installed
+# set(CPACK_PACKAGE_INSTALL_DIRECTORY ${CMAKE_INSTALL_PREFIX})
+if (NOT WIN32)
+    set(CPACK_PACKAGING_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX})
+endif (NOT WIN32)
\ No newline at end of file
diff --git a/cmake/gadgetron_web_cpack.cmake b/cmake/gadgetron_web_cpack.cmake
new file mode 100644
index 0000000..63a7141
--- /dev/null
+++ b/cmake/gadgetron_web_cpack.cmake
@@ -0,0 +1,32 @@
+################################################################################
+# Find available package generators
+################################################################################
+
+if(UNIX)
+  # DEB
+  find_program(DPKG_PROGRAM dpkg)
+  if(EXISTS ${DPKG_PROGRAM})
+    list(APPEND CPACK_GENERATOR "DEB")
+  endif(EXISTS ${DPKG_PROGRAM})
+endif(UNIX)
+
+if(WIN32)
+    # NSLS
+    list(APPEND CPACK_GENERATOR "NSIS")    
+endif(WIN32)
+
+list(APPEND CPACK_SOURCE_GENERATOR "TGZ")
+list(APPEND CPACK_SOURCE_GENERATOR "ZIP")
+list(APPEND CPACK_SOURCE_IGNORE_FILES ";.git;.gitignore;todo.txt;_clang-format;build/")
+
+# set dependencies explictly
+set(DEBIAN_PACKAGE_DEPENDS "gadgetron, python-psutil, python-twisted")
+
+# where the package metadata are
+set(GADGETRON_WEB_CPACK_CFG_FILE "${PROJECT_BINARY_DIR}/cpack_options_web.cmake")
+
+# where the package to be installed
+# set(CPACK_PACKAGE_INSTALL_DIRECTORY ${CMAKE_INSTALL_PREFIX})
+if (NOT WIN32)
+    set(CPACK_PACKAGING_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX})
+endif (NOT WIN32)
diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt
index 76e79ff..68949ec 100644
--- a/doc/CMakeLists.txt
+++ b/doc/CMakeLists.txt
@@ -4,27 +4,3 @@ if(DOXYGEN_FOUND)
 else(DOXYGEN_FOUND)
 	MESSAGE("Doxygen not found. Will not be able to build documentation")
 endif(DOXYGEN_FOUND)
-
-find_file(XSLTPROCEXE xsltproc
-		HINTS /usr/bin
-		/usr/local/bin)
-
-find_path(DOCBOOK_XSL_DIR html/docbook.xsl
-  PATHS /usr/local/share/xml/xls/docbook-xsl-ns
-  		/usr/share/xml/xls/docbook-xsl-ns
-  		/usr/share/xml/docbook 
- 		/usr/share/xml/docbook/stylesheet/docbook-xsl-ns/
-  		/usr/share/sgml/docbook/xsl-ns-stylesheets
-       $ENV{DOCBOOKDIR}
-  NO_DEFAULT_PATH)
-
-if (NOT DOCBOOK_XSL_DIR)
-  message("Could not find HTML docbook.xsl, try to set DOCBOOKDIR")
-endif (NOT DOCBOOK_XSL_DIR)
-
-if (XSLTPROCEXE AND DOCBOOK_XSL_DIR)
-	MESSAGE("Docbook tools found, building manual XSLTPROCEXE: ${XSLTPROCEXE}")
-	add_subdirectory(manual)
-else (XSLTPROCEXE AND DOCBOOK_XSL_DIR)
-    MESSAGE("Docbook tools not found")
-endif (XSLTPROCEXE AND DOCBOOK_XSL_DIR)
diff --git a/doc/doxygen/Doxyfile.in b/doc/doxygen/Doxyfile.in
index ced4993..36f7a05 100644
--- a/doc/doxygen/Doxyfile.in
+++ b/doc/doxygen/Doxyfile.in
@@ -1445,7 +1445,7 @@ ENABLE_PREPROCESSING   = YES
 # compilation will be performed. Macro expansion can be done in a controlled
 # way by setting EXPAND_ONLY_PREDEF to YES.
 
-MACRO_EXPANSION        = NO
+MACRO_EXPANSION        = YES
 
 # If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
 # then the macro expansion is limited to the macros specified with the
diff --git a/doc/manual/CMakeLists.txt b/doc/manual/CMakeLists.txt
deleted file mode 100644
index 2603493..0000000
--- a/doc/manual/CMakeLists.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-
-
-#configure_file(${CMAKE_CURRENT_SOURCE_DIR}/figs ${CMAKE_CURRENT_BINARY_DIR}/ COPYONLY)
-
-#file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/figs DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
-
-set(XSLTPROC_PARAMS --xinclude --stringparam use.role.for.mediaobject 1 --stringparam section.autolabel 1 --stringparam section.label.includes.component.label 1)
-
-add_custom_target(htmlmanual ${XSLTPROCEXE} ${XSLTPROC_PARAMS} -o ${CMAKE_CURRENT_BINARY_DIR}/gadgetron_manual.html
-${DOCBOOK_XSL_DIR}/html/docbook.xsl ${CMAKE_CURRENT_SOURCE_DIR}/gadgetron_manual.xml  
-DEPENDS manualfigs 
-WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
-COMMENT "Generating HTML docbook manual" VERBATIM)
-
-add_custom_command(OUTPUT manualfigs COMMAND ${CMAKE_COMMAND} ARGS -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/figs ${CMAKE_CURRENT_BINARY_DIR}/figs
-	COMMENT  "Copying figure files" VERBATIM)
-
-find_file(FOPEXE fop
-		HINTS /usr/bin
-		/usr/local/fop
-		/usr/local/bin)
-
-if (FOPEXE)
-	MESSAGE("FOP found, Building PDF Manual, FOPEXE: ${FOPEXE}")
-	
-	add_custom_target(pdfmanual ${FOPEXE} ${CMAKE_CURRENT_BINARY_DIR}/gadgetron_manual.fo ${CMAKE_CURRENT_BINARY_DIR}/gadgetron_manual.pdf  DEPENDS fomanual manualfigs
-	COMMENT "Generating PDF docbook manual" VERBATIM)
-	
-	add_custom_command(OUTPUT fomanual COMMAND ${XSLTPROCEXE} ${XSLTPROC_PARAMS} -o ${CMAKE_CURRENT_BINARY_DIR}/gadgetron_manual.fo
-	${DOCBOOK_XSL_DIR}/fo/docbook.xsl ${CMAKE_CURRENT_SOURCE_DIR}/gadgetron_manual.xml  WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
-	COMMENT "Generating FO file for PDF docbook manual" VERBATIM)
-	
-	
-else (FOPEXE)
-	MESSAGE("FOP executable not found, PDF manual cannot be build")
-endif(FOPEXE)
\ No newline at end of file
diff --git a/doc/manual/figs/Gadgetron.png b/doc/manual/figs/Gadgetron.png
deleted file mode 100644
index 19d3f07..0000000
Binary files a/doc/manual/figs/Gadgetron.png and /dev/null differ
diff --git a/doc/manual/figs/Gadgetron.svg b/doc/manual/figs/Gadgetron.svg
deleted file mode 100644
index 82bbf65..0000000
--- a/doc/manual/figs/Gadgetron.svg
+++ /dev/null
@@ -1,1736 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<!-- Generator: Adobe Illustrator 15.0.2, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
-<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
-<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
-	 width="595.28px" height="841.89px" viewBox="0 0 595.28 841.89" enable-background="new 0 0 595.28 841.89" xml:space="preserve">
-<g>
-	<g>
-		<g>
-			<g>
-				<g>
-					<path fill="#988A38" d="M218.69,432.847l-1.143,3.861c1.968-0.576,3.913-2.162,4.833-4.514
-						c-0.895-0.346-1.805-0.65-2.721-0.934C219.465,431.891,219.113,432.419,218.69,432.847z"/>
-					<path d="M219.659,431.261c-0.174,0.564-0.511,1.036-0.919,1.419l-0.103,0.348C219.078,432.552,219.444,431.959,219.659,431.261
-						z"/>
-					<path fill="#988A38" d="M219.659,431.261c0.916,0.283,1.826,0.588,2.721,0.934c0.919-2.346,0.336-4.944-1.525-6.658
-						l-1.143,3.861C219.855,429.995,219.853,430.631,219.659,431.261z"/>
-					<g>
-						<g>
-							<path fill="#988A38" d="M222.335,432.178c0.157-0.41-0.041-0.877-0.449-1.02c-0.408-0.144-0.854,0.079-0.994,0.498
-								c-0.505,1.473-1.582,2.54-2.779,3.137l-0.55,1.859C219.508,436.073,221.43,434.5,222.335,432.178z"/>
-							<path fill="#988A38" d="M222.176,432.387c0.147-0.364-0.027-0.782-0.386-0.911c-0.359-0.132-0.752,0.063-0.887,0.434
-								c-0.535,1.436-1.638,2.462-2.833,3.034l-0.491,1.658C219.435,436.048,221.283,434.571,222.176,432.387z"/>
-							<path fill="#988A38" d="M222.016,432.597c0.136-0.317-0.011-0.689-0.324-0.806c-0.31-0.119-0.653,0.05-0.78,0.371
-								c-0.566,1.4-1.696,2.384-2.887,2.932l-0.432,1.46C219.362,436.019,221.133,434.644,222.016,432.597z"/>
-							<path fill="#988A38" d="M221.86,432.804c0.122-0.27,0-0.592-0.265-0.697c-0.263-0.107-0.557,0.034-0.676,0.308
-								c-0.6,1.368-1.75,2.306-2.938,2.831l-0.373,1.258C219.289,435.992,220.983,434.716,221.86,432.804z"/>
-							<path fill="#988A38" d="M221.702,433.014c0.11-0.224,0.014-0.497-0.205-0.59c-0.218-0.096-0.465,0.019-0.571,0.243
-								c-0.641,1.333-1.804,2.227-2.991,2.729l-0.312,1.056C219.216,435.96,220.825,434.791,221.702,433.014z"/>
-							<path fill="#988A38" d="M221.545,433.223c0.095-0.177,0.027-0.402-0.146-0.485s-0.374,0.003-0.468,0.181
-								c-0.687,1.298-1.857,2.15-3.041,2.63l-0.253,0.854C219.143,435.931,220.662,434.865,221.545,433.223z"/>
-							<path fill="#FEE676" d="M221.391,433.434c0.079-0.131,0.038-0.309-0.089-0.38c-0.127-0.072-0.285-0.019-0.365,0.116
-								c-0.745,1.252-1.909,2.073-3.092,2.53l-0.193,0.651C219.071,435.898,220.487,434.934,221.391,433.434z"/>
-							<path fill="#FEE676" d="M221.236,433.646c0.063-0.091,0.049-0.214-0.033-0.275c-0.083-0.062-0.2-0.038-0.262,0.051
-								c-0.828,1.188-1.96,1.997-3.14,2.429l-0.134,0.453C218.997,435.87,220.289,434.981,221.236,433.646z"/>
-						</g>
-						<g>
-							<path fill="#988A38" d="M219.754,431.215c-0.096,0.312,0.066,0.641,0.362,0.74c0.294,0.096,0.621-0.079,0.726-0.39
-								c0.483-1.417,0.254-2.914-0.564-4.086l-0.5,1.688C219.966,429.819,219.969,430.522,219.754,431.215z"/>
-							<path fill="#988A38" d="M219.825,431.08c-0.08,0.28,0.065,0.57,0.33,0.657c0.265,0.08,0.555-0.077,0.644-0.354
-								c0.424-1.32,0.195-2.704-0.552-3.798l-0.446,1.507C219.992,429.725,220.018,430.408,219.825,431.08z"/>
-							<path fill="#988A38" d="M219.896,430.946c-0.064,0.246,0.064,0.499,0.298,0.571c0.233,0.068,0.487-0.073,0.56-0.318
-								c0.365-1.219,0.136-2.49-0.539-3.507l-0.393,1.326C220.019,429.628,220.068,430.291,219.896,430.946z"/>
-							<path fill="#988A38" d="M219.967,430.812c-0.049,0.212,0.062,0.428,0.266,0.485c0.203,0.057,0.42-0.068,0.478-0.281
-								c0.302-1.123,0.079-2.281-0.526-3.217l-0.338,1.144C220.045,429.534,220.116,430.175,219.967,430.812z"/>
-							<path fill="#988A38" d="M220.038,430.679c-0.035,0.178,0.062,0.356,0.234,0.398c0.17,0.044,0.351-0.065,0.393-0.243
-								c0.241-1.024,0.023-2.072-0.512-2.927l-0.285,0.963C220.071,429.438,220.164,430.059,220.038,430.679z"/>
-							<path fill="#988A38" d="M220.109,430.546c-0.022,0.143,0.059,0.284,0.201,0.312c0.138,0.03,0.281-0.062,0.309-0.206
-								c0.177-0.929-0.032-1.86-0.498-2.639l-0.231,0.783C220.097,429.346,220.207,429.939,220.109,430.546z"/>
-							<path fill="#FEE676" d="M220.182,430.41c-0.012,0.109,0.056,0.215,0.164,0.23c0.108,0.014,0.211-0.063,0.226-0.175
-								c0.106-0.832-0.087-1.646-0.482-2.346l-0.178,0.602C220.123,429.248,220.245,429.82,220.182,430.41z"/>
-							<path fill="#FEE676" d="M220.252,430.276c-0.002,0.077,0.057,0.139,0.131,0.144c0.076,0.001,0.138-0.061,0.14-0.136
-								c0.022-0.738-0.14-1.435-0.466-2.058l-0.125,0.42C220.149,429.153,220.266,429.701,220.252,430.276z"/>
-						</g>
-					</g>
-					<g>
-						<path d="M222.379,432.194c0.96-2.453,0.446-5.135-1.455-6.896l-0.142,0.48C222.603,427.447,223.255,429.963,222.379,432.194z"
-							/>
-					</g>
-					<path d="M222.787,430.4c0.089-0.818,0.031-1.622-0.193-2.39c-0.105,1.247-1.66,1.084-2.672,0.675l-0.205,0.69
-						C220.752,429.676,221.774,430.025,222.787,430.4z"/>
-					<path fill="#988A38" d="M232.737,440.408c0.497,0.528,1.378,0.584,2.042,0.072c0.649-0.5,0.754-1.476,0.163-2.106
-						c-2.071-2.237-4.537-3.999-7.133-5.42c-2.604-1.417-5.353-2.505-8.162-3.339l-0.852,2.877
-						C224.027,434.056,229.126,436.544,232.737,440.408z"/>
-					<path d="M234.758,438.543c0.488,0.518,0.563,1.521,0.02,1.938c0.753-0.582,1.04-1.53,0.347-2.276
-						c-2.095-2.263-4.583-4.039-7.197-5.47c-2.623-1.428-5.387-2.521-8.211-3.358l-0.142,0.479c2.794,0.829,5.527,1.911,8.113,3.319
-						C230.267,434.584,232.711,436.332,234.758,438.543z"/>
-					<g>
-						<path fill="#988A38" d="M233.001,440.401c0.27,0.288,0.743,0.309,1.084,0.027c0.34-0.28,0.397-0.784,0.102-1.101
-							c-3.967-4.308-9.383-6.908-14.881-8.562l-0.454,1.533C224.163,433.889,229.351,436.422,233.001,440.401z"/>
-						<path fill="#988A38" d="M232.769,440.058c0.237,0.261,0.67,0.263,0.967,0.004s0.329-0.701,0.07-0.985
-							c-3.931-4.086-9.183-6.594-14.532-8.201l-0.405,1.369C224.055,433.795,229.109,436.243,232.769,440.058z"/>
-						<path fill="#988A38" d="M232.521,439.724c0.221,0.221,0.603,0.213,0.855-0.02c0.259-0.238,0.278-0.627,0.041-0.863
-							c-3.878-3.885-8.976-6.298-14.175-7.858l-0.357,1.206C223.944,433.702,228.874,436.062,232.521,439.724z"/>
-						<path fill="#988A38" d="M232.271,439.387c0.193,0.192,0.519,0.181,0.735-0.028c0.215-0.209,0.227-0.546,0.022-0.75
-							c-3.833-3.676-8.765-6.009-13.818-7.519l-0.308,1.041C223.838,433.602,228.629,435.89,232.271,439.387z"/>
-						<path fill="#988A38" d="M232.008,439.059c0.167,0.157,0.447,0.14,0.624-0.041s0.177-0.462,0.001-0.627
-							c-3.767-3.492-8.55-5.729-13.455-7.193l-0.26,0.877C223.727,433.506,228.397,435.706,232.008,439.059z"/>
-						<path fill="#FEE676" d="M231.743,438.73c0.139,0.126,0.362,0.11,0.502-0.039c0.139-0.149,0.14-0.382-0.005-0.513
-							c-3.71-3.299-8.335-5.457-13.094-6.873l-0.21,0.712C223.617,433.409,228.153,435.536,231.743,438.73z"/>
-						<path fill="#FEE676" d="M231.466,438.412c0.108,0.094,0.282,0.077,0.387-0.041s0.099-0.294-0.013-0.391
-							c-3.639-3.127-8.117-5.193-12.726-6.567l-0.162,0.548C223.504,433.316,227.916,435.36,231.466,438.412z"/>
-						<path fill="#FEE676" d="M231.185,438.091c0.079,0.064,0.198,0.053,0.27-0.032c0.071-0.083,0.067-0.209-0.014-0.274
-							c-3.566-2.957-7.899-4.931-12.359-6.263l-0.113,0.383C223.39,433.225,227.678,435.182,231.185,438.091z"/>
-					</g>
-				</g>
-			</g>
-			<g>
-				<g>
-					<path fill="#988A38" d="M215.965,430.212c-0.176,0.813,0.292,1.598,1.035,1.789c0.603,0.149,1.2,0.315,1.795,0.491l0.852-2.877
-						c-0.634-0.187-1.271-0.364-1.912-0.523C216.936,428.887,216.139,429.407,215.965,430.212z"/>
-					<g>
-						<path fill="#988A38" d="M216.253,430.792c-0.099,0.434,0.149,0.853,0.548,0.953c0.687,0.171,1.371,0.354,2.049,0.556
-							l0.454-1.535c-0.702-0.209-1.409-0.398-2.12-0.575C216.77,430.086,216.351,430.363,216.253,430.792z"/>
-						<path fill="#988A38" d="M216.549,430.888c-0.09,0.383,0.129,0.762,0.486,0.854c0.615,0.152,1.226,0.322,1.833,0.501
-							l0.405-1.368c-0.626-0.185-1.255-0.359-1.889-0.517C217.015,430.262,216.64,430.503,216.549,430.888z"/>
-						<path fill="#988A38" d="M216.846,430.984c-0.081,0.337,0.108,0.677,0.423,0.757c0.542,0.136,1.08,0.288,1.616,0.446
-							l0.357-1.206c-0.551-0.162-1.103-0.318-1.66-0.458C217.257,430.44,216.928,430.645,216.846,430.984z"/>
-						<path fill="#988A38" d="M217.139,431.086c-0.072,0.292,0.091,0.586,0.364,0.655c0.467,0.125,0.935,0.251,1.398,0.391
-							l0.308-1.042c-0.474-0.144-0.953-0.271-1.431-0.4C217.499,430.619,217.212,430.794,217.139,431.086z"/>
-						<path fill="#988A38" d="M217.434,431.187c-0.063,0.247,0.074,0.493,0.303,0.555l1.181,0.334l0.26-0.878l-1.204-0.341
-							C217.739,430.793,217.497,430.942,217.434,431.187z"/>
-						<path fill="#FEE676" d="M217.728,431.29c-0.052,0.197,0.057,0.401,0.244,0.454l0.963,0.273l0.211-0.713l-0.979-0.278
-							C217.976,430.974,217.78,431.089,217.728,431.29z"/>
-						<path fill="#FEE676" d="M218.019,431.396c-0.042,0.156,0.042,0.312,0.187,0.353l0.746,0.214l0.162-0.548l-0.754-0.216
-							C218.212,431.157,218.061,431.241,218.019,431.396z"/>
-						<path fill="#FEE676" d="M218.311,431.504c-0.03,0.108,0.029,0.219,0.131,0.247l0.527,0.153l0.114-0.384l-0.532-0.154
-							C218.447,431.338,218.34,431.396,218.311,431.504z"/>
-					</g>
-					<path d="M215.965,430.212c0.145-0.67,1.048-1.051,1.708-0.878c0.637,0.158,1.271,0.335,1.902,0.521l0.142-0.479
-						c-0.637-0.188-1.277-0.366-1.921-0.525C216.859,428.612,216.169,429.273,215.965,430.212z"/>
-					<path fill="#988A38" d="M207.598,422.53c-1.971-0.097-3.775,0.661-4.977,1.902c-1.212,1.239-1.817,2.957-1.675,4.767
-						c0.144,1.823,0.95,3.403,2.029,4.561c1.088,1.154,2.452,1.881,3.974,1.955c2.662,0.128,5.321,0.496,7.897,1.087
-						c0.91,0.205,1.817,0.158,2.701-0.093l1.143-3.862c-0.86,0.859-2.025,1.292-3.172,1.031c-2.755-0.632-5.585-1.022-8.421-1.159
-						c-1.747-0.084-3.216-1.671-3.275-3.65c-0.06-1.987,1.573-3.64,3.629-3.542c3.255,0.158,6.492,0.603,9.68,1.333
-						c1.336,0.305,2.293,1.333,2.58,2.539l1.144-3.862c-0.816-0.756-1.862-1.331-3.052-1.601
-						C214.434,423.164,211.027,422.697,207.598,422.53z"/>
-					<path fill="#988A38" d="M207.516,424.185c-2.859-0.141-5.075,2.167-4.916,4.85c0.014,0.328,0.277,0.581,0.588,0.571
-						c0.311-0.007,0.561-0.279,0.555-0.606c-0.065-2.031,1.605-3.726,3.715-3.625c3.264,0.158,6.509,0.604,9.707,1.337
-						c1.317,0.3,2.283,1.285,2.614,2.455l0.499-1.686c-0.638-0.926-1.627-1.653-2.847-1.93
-						C214.163,424.802,210.85,424.347,207.516,424.185z"/>
-					<path fill="#988A38" d="M207.513,424.273c-2.756-0.134-4.915,2.028-4.82,4.596c0.007,0.293,0.241,0.52,0.52,0.518
-						c0.278-0.005,0.499-0.245,0.499-0.537c-0.017-1.986,1.655-3.614,3.749-3.515c3.266,0.159,6.514,0.605,9.712,1.338
-						c1.316,0.3,2.28,1.266,2.628,2.421l0.446-1.507c-0.621-0.936-1.607-1.672-2.835-1.95
-						C214.148,424.89,210.841,424.436,207.513,424.273z"/>
-					<path fill="#988A38" d="M207.507,424.362c-2.65-0.131-4.747,1.895-4.72,4.341c-0.001,0.258,0.205,0.458,0.449,0.459
-						c0.244,0.002,0.438-0.206,0.445-0.459c0.033-1.946,1.705-3.504,3.78-3.404c3.269,0.158,6.518,0.605,9.72,1.339
-						c1.316,0.299,2.277,1.242,2.642,2.381l0.392-1.325c-0.605-0.945-1.585-1.689-2.824-1.97
-						C214.133,424.978,210.831,424.523,207.507,424.362z"/>
-					<path fill="#988A38" d="M207.503,424.452c-2.546-0.126-4.577,1.761-4.622,4.087c-0.006,0.221,0.168,0.396,0.379,0.402
-						s0.379-0.17,0.39-0.388c0.085-1.903,1.754-3.393,3.813-3.294c3.271,0.159,6.522,0.606,9.727,1.34
-						c1.315,0.3,2.272,1.225,2.654,2.346l0.339-1.146c-0.588-0.953-1.565-1.704-2.812-1.987
-						C214.119,425.066,210.821,424.613,207.503,424.452z"/>
-					<path fill="#988A38" d="M207.499,424.543c-2.444-0.121-4.399,1.626-4.522,3.832c-0.013,0.184,0.132,0.336,0.308,0.346
-						c0.177,0.007,0.319-0.135,0.336-0.313c0.146-1.867,1.802-3.281,3.845-3.184c3.273,0.159,6.527,0.606,9.733,1.341
-						c1.314,0.299,2.27,1.201,2.668,2.305l0.285-0.963c-0.57-0.964-1.544-1.722-2.801-2.007
-						C214.104,425.156,210.811,424.704,207.499,424.543z"/>
-					<path fill="#988A38" d="M207.494,424.632c-2.341-0.116-4.212,1.491-4.421,3.579c-0.017,0.146,0.096,0.275,0.237,0.288
-						c0.143,0.013,0.262-0.099,0.279-0.241c0.217-1.826,1.854-3.17,3.878-3.073c3.275,0.159,6.531,0.606,9.739,1.342
-						c1.314,0.3,2.266,1.182,2.682,2.27l0.232-0.783c-0.554-0.975-1.524-1.739-2.79-2.026
-						C214.089,425.244,210.802,424.792,207.494,424.632z"/>
-					<path fill="#FEE676" d="M207.49,424.721c-2.239-0.109-3.999,1.359-4.32,3.326c-0.02,0.111,0.059,0.215,0.165,0.23
-						c0.108,0.016,0.204-0.061,0.223-0.169c0.311-1.781,1.904-3.058,3.911-2.963c3.278,0.16,6.536,0.607,9.747,1.343
-						c1.314,0.3,2.262,1.161,2.695,2.232l0.178-0.602c-0.538-0.982-1.503-1.756-2.778-2.046
-						C214.075,425.332,210.792,424.881,207.49,424.721z"/>
-					<path fill="#FEE676" d="M207.485,424.811c-2.136-0.103-3.761,1.236-4.22,3.074c-0.021,0.076,0.022,0.15,0.094,0.171
-						c0.073,0.02,0.146-0.024,0.166-0.095c0.441-1.728,1.955-2.945,3.945-2.853c3.28,0.159,6.54,0.607,9.753,1.344
-						c1.313,0.299,2.26,1.14,2.709,2.193l0.124-0.418c-0.52-0.995-1.482-1.774-2.766-2.066
-						C214.06,425.421,210.783,424.971,207.485,424.811z"/>
-					<path d="M219.186,428.247c0.242,0.342,0.419,0.717,0.519,1.125l0.012,0.003l0.204-0.689c-0.309-0.133-0.573-0.284-0.726-0.449
-						L219.186,428.247z"/>
-					<path fill="#988A38" d="M207.03,434.066c-2.325-0.119-4.339-2.232-4.503-4.959c-0.018-0.438-0.383-0.789-0.814-0.76
-						c-0.431,0.028-0.753,0.408-0.719,0.848c0.14,1.807,0.941,3.376,2.012,4.525c1.081,1.146,2.436,1.869,3.946,1.942
-						c2.665,0.129,5.327,0.497,7.906,1.088c0.911,0.206,1.82,0.159,2.705-0.098l0.551-1.861c-0.916,0.449-1.907,0.625-2.898,0.401
-						C212.541,434.58,209.788,434.199,207.03,434.066z"/>
-					<path fill="#988A38" d="M207.024,434.194c-2.302-0.12-4.348-2.16-4.578-4.847c-0.025-0.392-0.349-0.701-0.73-0.668
-						c-0.381,0.028-0.666,0.37-0.627,0.763c0.167,1.733,0.964,3.234,2.018,4.328c1.063,1.094,2.387,1.78,3.847,1.852
-						c2.667,0.129,5.332,0.497,7.913,1.089c0.911,0.205,1.822,0.155,2.711-0.108l0.491-1.66c-0.916,0.434-1.9,0.597-2.883,0.375
-						C212.52,434.706,209.775,434.327,207.024,434.194z"/>
-					<path fill="#988A38" d="M207.018,434.323c-2.278-0.12-4.352-2.087-4.651-4.735c-0.032-0.343-0.314-0.609-0.645-0.577
-						c-0.332,0.03-0.581,0.335-0.539,0.679c0.196,1.66,0.991,3.093,2.027,4.129c1.044,1.042,2.336,1.691,3.747,1.761
-						c2.67,0.128,5.337,0.497,7.921,1.09c0.91,0.205,1.825,0.15,2.716-0.117l0.432-1.458c-0.916,0.416-1.893,0.569-2.867,0.35
-						C212.499,434.834,209.76,434.455,207.018,434.323z"/>
-					<path fill="#988A38" d="M207.011,434.451c-2.255-0.119-4.349-2.011-4.723-4.625c-0.037-0.296-0.279-0.517-0.562-0.484
-						c-0.283,0.033-0.494,0.299-0.453,0.594c0.229,1.59,1.021,2.952,2.039,3.935c1.026,0.984,2.285,1.599,3.646,1.666
-						c2.672,0.128,5.342,0.497,7.928,1.091c0.911,0.205,1.827,0.146,2.722-0.125l0.372-1.257c-0.915,0.397-1.887,0.541-2.851,0.323
-						C212.478,434.961,209.747,434.583,207.011,434.451z"/>
-					<path fill="#988A38" d="M207.005,434.58c-2.233-0.119-4.339-1.933-4.793-4.515c-0.04-0.248-0.245-0.427-0.481-0.394
-						c-0.236,0.033-0.407,0.265-0.365,0.51c0.264,1.521,1.052,2.812,2.052,3.742c1.006,0.927,2.231,1.507,3.542,1.57
-						c2.674,0.129,5.347,0.498,7.935,1.092c0.911,0.205,1.83,0.144,2.728-0.135l0.312-1.055c-0.915,0.38-1.879,0.513-2.834,0.298
-						C212.457,435.088,209.732,434.711,207.005,434.58z"/>
-					<path fill="#988A38" d="M206.999,434.708c-2.21-0.119-4.313-1.858-4.864-4.401c-0.04-0.201-0.21-0.337-0.398-0.302
-						c-0.19,0.034-0.321,0.225-0.279,0.425c0.656,2.895,3.021,4.887,5.504,5.021c2.677,0.129,5.352,0.499,7.943,1.093
-						c0.911,0.206,1.832,0.141,2.732-0.142l0.253-0.854c-0.916,0.365-1.873,0.485-2.818,0.271
-						C212.436,435.214,209.719,434.839,206.999,434.708z"/>
-					<path fill="#FEE676" d="M206.992,434.836c-2.188-0.117-4.263-1.792-4.934-4.29c-0.037-0.148-0.175-0.247-0.317-0.211
-						c-0.143,0.037-0.233,0.189-0.193,0.341c0.76,2.752,3.028,4.603,5.416,4.731c2.68,0.129,5.357,0.499,7.951,1.094
-						c0.91,0.205,1.834,0.136,2.737-0.148l0.193-0.654c-0.915,0.347-1.865,0.457-2.802,0.245
-						C212.415,435.341,209.705,434.967,206.992,434.836z"/>
-					<path fill="#FEE676" d="M206.986,434.964c-2.167-0.113-4.159-1.748-5.002-4.178c-0.035-0.104-0.141-0.157-0.238-0.119
-						c-0.096,0.038-0.145,0.151-0.109,0.256c0.919,2.587,3.036,4.32,5.33,4.441c2.682,0.129,5.361,0.5,7.958,1.095
-						c0.911,0.205,1.836,0.133,2.743-0.158l0.134-0.452c-0.915,0.331-1.859,0.43-2.787,0.221
-						C212.394,435.468,209.691,435.095,206.986,434.964z"/>
-					<path d="M215.574,433.633c-2.771-0.636-5.614-1.028-8.465-1.166c-1.634-0.081-3.231-1.559-3.288-3.398
-						c0.061,2.122,1.404,3.812,3.263,3.898c2.822,0.136,5.637,0.524,8.378,1.153c1.197,0.273,2.324-0.187,3.174-1.094l0.103-0.348
-						C217.87,433.487,216.665,433.879,215.574,433.633z"/>
-					<path d="M204.298,427.239c0.208-0.359,0.48-0.673,0.805-0.938l-0.005-0.013c-0.815,0.464-3.499,0.43-3.182-1.161
-						c-0.427,0.675-0.706,1.436-0.854,2.239C202.141,427.285,203.22,427.253,204.298,427.239z"/>
-					<path d="M207.61,422.28c-2.051-0.1-3.863,0.692-5.048,1.98c-1.195,1.285-1.763,3.062-1.616,4.938
-						c-0.137-1.738,0.508-3.395,1.738-4.59c1.219-1.196,3.017-1.923,4.902-1.83c3.415,0.167,6.808,0.632,10.161,1.4
-						c1.163,0.263,2.211,0.84,3.035,1.6l0.143-0.482c-0.811-0.752-1.854-1.33-3.066-1.604
-						C214.475,422.917,211.054,422.448,207.61,422.28z"/>
-				</g>
-				<g>
-					<path fill="#988A38" d="M190.138,433.308l1.412,3.771c1.142-1.616,1.797-4.04,1.149-6.485c-0.923,0.26-1.842,0.535-2.742,0.864
-						C190.18,432.08,190.213,432.706,190.138,433.308z"/>
-					<path d="M189.958,431.458c0.201,0.56,0.209,1.131,0.119,1.686l0.127,0.34C190.264,432.833,190.205,432.147,189.958,431.458z"/>
-					<path fill="#988A38" d="M189.958,431.458c0.9-0.329,1.819-0.604,2.742-0.864c-0.641-2.428-2.636-4.207-5.235-4.427l1.413,3.772
-						C189.354,430.32,189.736,430.841,189.958,431.458z"/>
-					<g>
-						<g>
-							<path fill="#988A38" d="M192.654,430.605c-0.116-0.427-0.547-0.675-0.964-0.554c-0.417,0.116-0.639,0.575-0.504,0.992
-								c0.464,1.479,0.235,2.982-0.335,4.164l0.681,1.817C192.653,435.417,193.297,433.021,192.654,430.605z"/>
-							<path fill="#988A38" d="M192.648,430.869c-0.097-0.384-0.48-0.607-0.849-0.505c-0.371,0.098-0.567,0.501-0.457,0.876
-								c0.42,1.471,0.143,2.954-0.438,4.114l0.607,1.622C192.582,435.435,193.221,433.169,192.648,430.869z"/>
-							<path fill="#988A38" d="M192.643,431.133c-0.078-0.337-0.413-0.539-0.736-0.456c-0.323,0.081-0.497,0.43-0.41,0.762
-								c0.375,1.462,0.053,2.925-0.538,4.063l0.533,1.425C192.509,435.451,193.143,433.316,192.643,431.133z"/>
-							<path fill="#988A38" d="M192.641,431.396c-0.062-0.294-0.348-0.474-0.625-0.408c-0.278,0.064-0.428,0.357-0.361,0.648
-								c0.327,1.457-0.038,2.894-0.638,4.014l0.459,1.228C192.439,435.473,193.064,433.465,192.641,431.396z"/>
-							<path fill="#988A38" d="M192.637,431.657c-0.044-0.246-0.28-0.404-0.513-0.355c-0.233,0.048-0.365,0.284-0.316,0.531
-								c0.275,1.457-0.125,2.865-0.737,3.965l0.386,1.03C192.369,435.497,192.981,433.617,192.637,431.657z"/>
-							<path fill="#988A38" d="M192.635,431.92c-0.029-0.201-0.215-0.337-0.404-0.306c-0.187,0.033-0.301,0.217-0.27,0.418
-								c0.217,1.455-0.213,2.835-0.835,3.913l0.312,0.834C192.298,435.519,192.893,433.77,192.635,431.92z"/>
-							<path fill="#FEE676" d="M192.634,432.183c-0.015-0.155-0.151-0.27-0.295-0.257c-0.145,0.016-0.24,0.151-0.225,0.307
-								c0.143,1.451-0.3,2.804-0.933,3.86l0.239,0.64C192.227,435.539,192.791,433.921,192.634,432.183z"/>
-							<path fill="#FEE676" d="M192.634,432.444c-0.002-0.11-0.086-0.202-0.188-0.204c-0.103-0.003-0.183,0.085-0.18,0.192
-								c0.036,1.446-0.385,2.772-1.03,3.807l0.166,0.442C192.156,435.563,192.659,434.071,192.634,432.444z"/>
-						</g>
-						<g>
-							<path fill="#988A38" d="M190.008,431.365c0.111,0.309,0.438,0.478,0.73,0.374c0.292-0.099,0.452-0.426,0.352-0.739
-								c-0.454-1.435-1.525-2.496-2.914-2.934l0.617,1.646C189.337,430.112,189.766,430.685,190.008,431.365z"/>
-							<path fill="#988A38" d="M189.984,431.215c0.104,0.274,0.393,0.421,0.655,0.327c0.258-0.095,0.4-0.386,0.307-0.664
-								c-0.444-1.318-1.451-2.29-2.731-2.707l0.551,1.471C189.302,430.021,189.736,430.563,189.984,431.215z"/>
-							<path fill="#988A38" d="M189.961,431.067c0.095,0.236,0.35,0.362,0.579,0.275c0.226-0.089,0.349-0.345,0.261-0.588
-								c-0.434-1.202-1.373-2.08-2.547-2.48l0.484,1.294C189.266,429.93,189.707,430.442,189.961,431.067z"/>
-							<path fill="#988A38" d="M189.937,430.916c0.088,0.202,0.307,0.308,0.503,0.229c0.194-0.08,0.296-0.306,0.215-0.511
-								c-0.425-1.086-1.294-1.875-2.363-2.256l0.419,1.119C189.229,429.838,189.676,430.318,189.937,430.916z"/>
-							<path fill="#988A38" d="M189.915,430.769c0.079,0.164,0.261,0.247,0.425,0.177c0.162-0.069,0.241-0.266,0.169-0.432
-								c-0.416-0.967-1.217-1.673-2.178-2.031l0.352,0.94C189.193,429.745,189.643,430.197,189.915,430.769z"/>
-							<path fill="#988A38" d="M189.891,430.618c0.069,0.131,0.218,0.191,0.348,0.129c0.129-0.061,0.188-0.222,0.124-0.354
-								c-0.408-0.846-1.136-1.47-1.992-1.807l0.287,0.766C189.157,429.654,189.608,430.077,189.891,430.618z"/>
-							<path fill="#FEE676" d="M189.868,430.467c0.058,0.097,0.174,0.137,0.271,0.083c0.096-0.053,0.131-0.178,0.077-0.275
-								c-0.409-0.725-1.056-1.269-1.806-1.584l0.22,0.587C189.121,429.562,189.567,429.958,189.868,430.467z"/>
-							<path fill="#FEE676" d="M189.846,430.317c0.045,0.063,0.129,0.076,0.191,0.034c0.062-0.046,0.076-0.13,0.031-0.194
-								c-0.419-0.595-0.975-1.071-1.619-1.361l0.153,0.409C189.085,429.471,189.512,429.85,189.846,430.317z"/>
-						</g>
-					</g>
-					<g>
-						<path d="M192.7,430.594c-0.671-2.54-2.658-4.427-5.322-4.659l0.176,0.47C190.088,426.613,192.091,428.285,192.7,430.594z"/>
-					</g>
-					<path d="M191.972,428.906c-0.411-0.707-0.936-1.326-1.579-1.812c0.657,1.064-0.702,1.865-1.777,2.149l0.252,0.674
-						C189.892,429.539,190.926,429.195,191.972,428.906z"/>
-					<path fill="#988A38" d="M206.178,430.509c0.768,0.026,1.433-0.613,1.483-1.437c0.051-0.83-0.574-1.534-1.396-1.562
-						c-5.832-0.158-11.748,0.574-17.309,2.642l1.052,2.81C195.106,431.062,200.674,430.355,206.178,430.509z"/>
-					<path d="M206.258,427.76c0.683,0.022,1.447,0.62,1.404,1.312c0.058-0.966-0.427-1.779-1.389-1.812
-						c-5.859-0.159-11.804,0.575-17.404,2.657l0.176,0.469C194.566,428.332,200.454,427.602,206.258,427.76z"/>
-					<g>
-						<path fill="#988A38" d="M206.366,430.315c0.412,0.017,0.764-0.327,0.786-0.77c0.022-0.441-0.307-0.813-0.734-0.831
-							c-5.762-0.176-11.606,0.537-17.041,2.561l0.562,1.499C195.123,430.842,200.78,430.143,206.366,430.315z"/>
-						<path fill="#988A38" d="M205.94,430.244c0.369,0.005,0.681-0.298,0.696-0.693c0.016-0.392-0.281-0.728-0.662-0.733
-							c-5.605-0.126-11.278,0.601-16.557,2.564l0.5,1.337C194.979,430.833,200.488,430.12,205.94,430.244z"/>
-						<path fill="#988A38" d="M205.513,430.178c0.325,0.006,0.597-0.271,0.607-0.617c0.01-0.348-0.253-0.634-0.588-0.641
-							c-5.448-0.075-10.953,0.66-16.077,2.566l0.441,1.177C194.835,430.825,200.197,430.103,205.513,430.178z"/>
-						<path fill="#988A38" d="M205.084,430.113c0.282,0,0.514-0.235,0.52-0.536c0.006-0.299-0.224-0.551-0.512-0.55
-							c-5.292-0.031-10.626,0.718-15.597,2.564l0.381,1.018C194.69,430.819,199.904,430.082,205.084,430.113z"/>
-						<path fill="#988A38" d="M204.656,430.055c0.236,0,0.432-0.206,0.434-0.458c0.002-0.253-0.193-0.458-0.435-0.458
-							c-5.135,0.009-10.303,0.768-15.12,2.558l0.321,0.858C194.545,430.812,199.612,430.062,204.656,430.055z"/>
-						<path fill="#FEE676" d="M204.227,429.994c0.192,0,0.349-0.165,0.348-0.371c0-0.206-0.16-0.372-0.355-0.372
-							c-4.979,0.058-9.98,0.812-14.645,2.551l0.261,0.695C194.398,430.796,199.32,430.05,204.227,429.994z"/>
-						<path fill="#FEE676" d="M203.795,429.944c0.148-0.003,0.269-0.134,0.267-0.292c-0.002-0.156-0.126-0.281-0.277-0.278
-							c-4.822,0.092-9.657,0.854-14.17,2.534l0.2,0.534C194.251,430.79,199.027,430.035,203.795,429.944z"/>
-						<path fill="#FEE676" d="M203.363,429.894c0.104-0.002,0.187-0.093,0.185-0.203s-0.09-0.198-0.195-0.196
-							c-4.666,0.128-9.337,0.895-13.699,2.519l0.14,0.374C194.104,430.782,198.734,430.021,203.363,429.894z"/>
-					</g>
-				</g>
-			</g>
-			<g>
-				<g>
-					<path fill="#988A38" d="M186.4,432.894c0.375,0.737,1.223,1.063,1.909,0.759c0.56-0.244,1.125-0.477,1.699-0.691l-1.052-2.809
-						c-0.625,0.233-1.245,0.487-1.859,0.756C186.33,431.248,186.024,432.155,186.4,432.894z"/>
-					<g>
-						<path fill="#988A38" d="M186.985,433.162c0.192,0.395,0.646,0.578,1.016,0.412c0.634-0.29,1.283-0.552,1.938-0.8l-0.562-1.499
-							c-0.687,0.259-1.367,0.534-2.035,0.84C186.95,432.292,186.791,432.762,186.985,433.162z"/>
-						<path fill="#988A38" d="M187.275,433.054c0.169,0.356,0.574,0.523,0.904,0.373c0.572-0.251,1.151-0.488,1.739-0.708
-							l-0.5-1.337c-0.612,0.229-1.217,0.478-1.816,0.74C187.253,432.28,187.106,432.697,187.275,433.054z"/>
-						<path fill="#988A38" d="M187.567,432.949c0.146,0.315,0.499,0.462,0.792,0.333c0.508-0.217,1.019-0.426,1.537-0.618
-							l-0.44-1.178c-0.539,0.2-1.069,0.417-1.597,0.643C187.551,432.264,187.421,432.634,187.567,432.949z"/>
-						<path fill="#988A38" d="M187.857,432.847c0.123,0.273,0.428,0.4,0.686,0.296c0.439-0.189,0.885-0.365,1.333-0.533
-							l-0.381-1.018c-0.463,0.174-0.924,0.355-1.378,0.552C187.85,432.252,187.734,432.571,187.857,432.847z"/>
-						<path fill="#988A38" d="M188.149,432.744c0.102,0.233,0.36,0.349,0.577,0.259c0.371-0.162,0.751-0.303,1.129-0.448
-							l-0.321-0.858c-0.389,0.149-0.779,0.295-1.161,0.461C188.149,432.251,188.048,432.512,188.149,432.744z"/>
-						<path fill="#FEE676" d="M188.444,432.65c0.081,0.189,0.289,0.284,0.467,0.21c0.304-0.131,0.617-0.24,0.924-0.362l-0.261-0.696
-							c-0.315,0.124-0.634,0.236-0.945,0.37C188.446,432.248,188.363,432.46,188.444,432.65z"/>
-						<path fill="#FEE676" d="M188.737,432.555c0.062,0.149,0.222,0.222,0.359,0.164l0.718-0.275l-0.2-0.534l-0.73,0.28
-							C188.744,432.248,188.676,432.407,188.737,432.555z"/>
-						<path fill="#FEE676" d="M189.031,432.462c0.042,0.104,0.156,0.158,0.254,0.12l0.508-0.195l-0.14-0.374l-0.514,0.197
-							C189.039,432.248,188.989,432.357,189.031,432.462z"/>
-					</g>
-					<path d="M186.4,432.894c-0.315-0.617,0.163-1.478,0.798-1.758c0.609-0.266,1.224-0.519,1.846-0.75l-0.175-0.469
-						c-0.63,0.235-1.254,0.491-1.873,0.762C186.096,431.08,185.965,432.034,186.4,432.894z"/>
-					<path fill="#988A38" d="M174.619,433.471c-1.589,1.618-2.243,3.866-1.888,5.497c0.29,1.726,1.335,3.212,2.896,4.146
-						c1.564,0.938,3.324,1.198,4.88,1.015c1.624-0.275,2.771-0.636,3.627-1.51c1.472-1.542,3.358-2.847,5.509-3.887
-						c0.762-0.362,1.385-0.938,1.906-1.652l-1.413-3.772c-0.162,1.204-0.793,2.239-1.797,2.723c-2.417,1.169-4.608,2.669-6.369,4.51
-						c-0.552,0.567-1.307,0.852-2.161,0.893c-0.842,0.036-1.805-0.167-2.617-0.73c-0.817-0.568-1.331-1.401-1.425-2.361
-						c-0.096-0.947,0.241-1.997,1.011-2.791c2.45-2.557,5.373-4.526,8.43-6.005c1.287-0.619,2.708-0.392,3.666,0.395l-1.413-3.771
-						c-1.142-0.104-2.379,0.112-3.557,0.676C180.583,428.45,177.356,430.615,174.619,433.471z"/>
-					<path fill="#988A38" d="M175.813,434.619c-1.094,1.131-1.566,2.64-1.404,3.934c0.194,1.263,0.92,2.384,2.044,3.119
-						c0.275,0.18,0.634,0.114,0.808-0.143c0.176-0.255,0.101-0.616-0.167-0.805c-0.836-0.58-1.369-1.429-1.462-2.413
-						c-0.101-0.973,0.248-2.051,1.04-2.868c2.464-2.571,5.402-4.552,8.474-6.036c1.271-0.611,2.672-0.425,3.647,0.306l-0.617-1.646
-						c-1.086-0.349-2.358-0.301-3.548,0.269C181.451,429.871,178.392,431.929,175.813,434.619z"/>
-					<path fill="#988A38" d="M175.878,434.68c-1.054,1.092-1.522,2.528-1.396,3.787c0.163,1.222,0.833,2.307,1.893,3.034
-						c0.242,0.165,0.562,0.107,0.723-0.119c0.159-0.228,0.093-0.547-0.143-0.72c-0.804-0.585-1.307-1.43-1.378-2.405
-						c-0.078-0.965,0.281-2.028,1.068-2.841c2.468-2.575,5.41-4.558,8.484-6.043c1.27-0.611,2.658-0.439,3.637,0.269l-0.551-1.471
-						c-1.078-0.368-2.352-0.329-3.548,0.244C181.498,429.947,178.448,431.998,175.878,434.68z"/>
-					<path fill="#988A38" d="M175.941,434.742c-1.011,1.051-1.48,2.417-1.381,3.635c0.123,1.187,0.745,2.229,1.734,2.95
-						c0.209,0.151,0.491,0.101,0.635-0.096c0.145-0.196,0.089-0.477-0.111-0.63c-0.773-0.594-1.246-1.434-1.295-2.4
-						c-0.056-0.955,0.313-2.004,1.094-2.81c2.472-2.579,5.417-4.564,8.495-6.052c1.27-0.61,2.643-0.456,3.625,0.229l-0.484-1.294
-						c-1.07-0.385-2.343-0.356-3.548,0.221C181.543,430.023,178.502,432.069,175.941,434.742z"/>
-					<path fill="#988A38" d="M176.006,434.805c-0.968,1.01-1.44,2.307-1.367,3.48c0.083,1.152,0.658,2.152,1.577,2.87
-						c0.175,0.133,0.42,0.095,0.548-0.072c0.127-0.168,0.083-0.407-0.086-0.544c-0.742-0.602-1.183-1.437-1.211-2.395
-						c-0.034-0.946,0.345-1.979,1.122-2.779c2.475-2.584,5.425-4.57,8.506-6.061c1.27-0.61,2.628-0.469,3.615,0.192l-0.419-1.119
-						c-1.061-0.402-2.334-0.383-3.548,0.199C181.591,430.101,178.559,432.141,176.006,434.805z"/>
-					<path fill="#988A38" d="M176.071,434.867c-0.926,0.971-1.398,2.195-1.35,3.325c0.041,1.115,0.574,2.075,1.418,2.789
-						c0.141,0.117,0.35,0.089,0.46-0.048c0.109-0.14,0.078-0.338-0.057-0.456c-0.71-0.614-1.12-1.445-1.127-2.393
-						c-0.013-0.936,0.375-1.951,1.147-2.745c2.479-2.587,5.432-4.577,8.517-6.067c1.27-0.61,2.613-0.486,3.604,0.15l-0.353-0.94
-						c-1.051-0.421-2.326-0.41-3.548,0.176C181.638,430.179,178.616,432.213,176.071,434.867z"/>
-					<path fill="#988A38" d="M176.135,434.929c-0.888,0.928-1.349,2.089-1.331,3.166c-0.001,1.077,0.493,1.999,1.258,2.713
-						c0.108,0.1,0.279,0.083,0.372-0.024c0.094-0.108,0.074-0.27-0.031-0.368c-1.38-1.198-1.427-3.498,0.132-5.103
-						c2.482-2.591,5.44-4.583,8.528-6.075c1.27-0.609,2.598-0.5,3.594,0.114l-0.287-0.765c-1.044-0.439-2.319-0.438-3.548,0.151
-						C181.685,430.255,178.671,432.283,176.135,434.929z"/>
-					<path fill="#FEE676" d="M176.2,434.99c-0.851,0.885-1.294,1.978-1.304,3c-0.031,1.031,0.405,1.921,1.09,2.643
-						c0.079,0.081,0.209,0.078,0.284,0.001c0.076-0.079,0.07-0.201-0.006-0.281c-1.289-1.247-1.3-3.478,0.243-5.066
-						c2.486-2.595,5.448-4.59,8.539-6.084c1.27-0.609,2.583-0.515,3.583,0.076l-0.22-0.588c-1.035-0.456-2.312-0.465-3.548,0.128
-						C181.731,430.331,178.727,432.354,176.2,434.99z"/>
-					<path fill="#FEE676" d="M176.265,435.053c-0.813,0.842-1.233,1.862-1.269,2.828c-0.048,0.979,0.313,1.846,0.916,2.578
-						c0.05,0.062,0.135,0.07,0.194,0.023s0.066-0.133,0.021-0.19c-1.167-1.325-1.173-3.457,0.354-5.032
-						c2.49-2.599,5.455-4.596,8.55-6.092c1.269-0.609,2.568-0.529,3.572,0.037l-0.153-0.408c-1.027-0.477-2.304-0.494-3.549,0.103
-						C181.778,430.408,178.783,432.425,176.265,435.053z"/>
-					<path d="M187.75,429.342c0.407,0.128,0.781,0.32,1.107,0.58l0.012-0.005l-0.252-0.673c-0.33,0.088-0.64,0.122-0.866,0.084
-						L187.75,429.342z"/>
-					<path fill="#988A38" d="M182.945,441.476c-0.704,0.729-1.666,1.069-2.833,1.159c-1.183,0.127-2.498-0.114-3.643-0.86
-						c-0.367-0.242-0.865-0.148-1.092,0.221c-0.231,0.366-0.106,0.851,0.273,1.078c1.549,0.931,3.296,1.19,4.842,1.009
-						c1.613-0.275,2.751-0.626,3.604-1.498c1.478-1.547,3.369-2.855,5.524-3.898c0.763-0.362,1.388-0.938,1.907-1.66l-0.681-1.817
-						c-0.44,0.899-1.069,1.631-1.921,2.038C186.63,438.356,184.576,439.77,182.945,441.476z"/>
-					<path fill="#988A38" d="M183.038,441.563c-0.694,0.719-1.641,1.065-2.794,1.179c-1.172,0.149-2.477-0.056-3.625-0.762
-						c-0.334-0.21-0.771-0.125-0.968,0.204c-0.2,0.326-0.087,0.756,0.256,0.953c1.508,0.868,3.194,1.091,4.685,0.893
-						c1.557-0.292,2.644-0.62,3.477-1.476c1.481-1.551,3.376-2.862,5.536-3.906c0.764-0.362,1.389-0.943,1.907-1.672l-0.607-1.622
-						c-0.45,0.887-1.078,1.604-1.922,2.007C186.697,438.467,184.656,439.87,183.038,441.563z"/>
-					<path fill="#988A38" d="M183.13,441.653c-0.683,0.708-1.614,1.062-2.754,1.195c-1.161,0.173-2.455,0.004-3.609-0.664
-						c-0.3-0.175-0.677-0.099-0.844,0.188c-0.169,0.288-0.067,0.666,0.237,0.832c1.468,0.804,3.094,0.989,4.529,0.772
-						c1.5-0.31,2.534-0.613,3.347-1.453c1.485-1.555,3.385-2.869,5.548-3.915c0.764-0.363,1.389-0.949,1.908-1.683l-0.534-1.424
-						c-0.458,0.871-1.086,1.577-1.92,1.974C186.764,438.577,184.736,439.973,183.13,441.653z"/>
-					<path fill="#988A38" d="M183.224,441.743c-0.673,0.698-1.586,1.056-2.712,1.21c-1.149,0.195-2.432,0.061-3.596-0.566
-						c-0.263-0.144-0.58-0.073-0.72,0.175c-0.138,0.249-0.044,0.574,0.22,0.712c1.43,0.737,2.995,0.886,4.376,0.652
-						c0.348-0.057,0.685-0.143,1.003-0.25c0.304-0.069,0.583-0.161,0.844-0.273c0.535-0.214,0.982-0.514,1.368-0.907
-						c1.49-1.56,3.394-2.876,5.561-3.925c0.765-0.362,1.389-0.953,1.908-1.692l-0.459-1.228c-0.468,0.856-1.096,1.55-1.92,1.942
-						C186.832,438.688,184.817,440.075,183.224,441.743z"/>
-					<path fill="#988A38" d="M183.316,441.832c-0.663,0.688-1.557,1.051-2.668,1.225c-1.136,0.216-2.408,0.115-3.584-0.469
-						c-0.224-0.112-0.486-0.048-0.598,0.162c-0.11,0.211-0.022,0.482,0.203,0.592c1.394,0.669,2.898,0.779,4.226,0.531
-						c0.332-0.056,0.661-0.162,0.956-0.246c0.292-0.07,0.555-0.166,0.809-0.273c0.512-0.215,0.943-0.508,1.316-0.888
-						c1.494-1.563,3.402-2.883,5.573-3.933c0.765-0.363,1.39-0.959,1.907-1.705l-0.386-1.03c-0.477,0.842-1.105,1.523-1.919,1.911
-						C186.899,438.798,184.897,440.177,183.316,441.832z"/>
-					<path fill="#988A38" d="M183.408,441.92c-0.653,0.678-1.527,1.045-2.621,1.234c-1.121,0.236-2.383,0.169-3.57-0.363
-						c-0.185-0.083-0.396-0.025-0.476,0.147c-0.083,0.174-0.004,0.391,0.183,0.473c1.361,0.593,2.804,0.665,4.077,0.404
-						c0.654-0.161,1.202-0.302,1.681-0.515c0.488-0.214,0.904-0.499,1.262-0.865c1.498-1.568,3.411-2.891,5.585-3.941
-						c0.766-0.364,1.391-0.964,1.908-1.715l-0.312-0.834c-0.486,0.829-1.114,1.495-1.919,1.879
-						C186.966,438.907,184.978,440.278,183.408,441.92z"/>
-					<path fill="#FEE676" d="M183.501,442.01c-0.643,0.667-1.497,1.032-2.572,1.233c-1.105,0.254-2.359,0.217-3.561-0.25
-						c-0.143-0.057-0.303-0.002-0.356,0.134c-0.053,0.136,0.018,0.298,0.166,0.354c1.332,0.507,2.715,0.541,3.932,0.269
-						c1.224-0.288,2.105-0.616,2.803-1.344c1.502-1.572,3.419-2.897,5.598-3.95c0.766-0.364,1.391-0.97,1.909-1.723l-0.239-0.64
-						c-0.495,0.814-1.122,1.468-1.918,1.847C187.033,439.018,185.058,440.38,183.501,442.01z"/>
-					<path fill="#FEE676" d="M183.593,442.099c-0.631,0.655-1.464,1.013-2.517,1.214c-1.087,0.264-2.333,0.255-3.557-0.12
-						c-0.104-0.031-0.209,0.022-0.234,0.123c-0.025,0.1,0.039,0.205,0.144,0.237c1.312,0.392,2.634,0.389,3.795,0.11
-						c1.146-0.26,1.991-0.594,2.657-1.287c1.506-1.577,3.428-2.904,5.61-3.96c0.767-0.364,1.392-0.973,1.909-1.734l-0.166-0.442
-						c-0.503,0.801-1.131,1.442-1.917,1.816C187.101,439.128,185.138,440.481,183.593,442.099z"/>
-					<path d="M188.231,435.804c-2.44,1.18-4.656,2.696-6.44,4.562c-0.522,0.534-1.263,0.843-2.094,0.924
-						c-0.817,0.079-1.747-0.063-2.502-0.588c0.869,0.604,1.864,0.867,2.731,0.873c0.876-0.002,1.645-0.263,2.225-0.863
-						c1.736-1.815,3.902-3.3,6.297-4.458c1.045-0.503,1.63-1.534,1.755-2.771l-0.127-0.34
-						C189.874,434.312,189.19,435.342,188.231,435.804z"/>
-					<path d="M176.008,439.233c-0.158-0.388-0.241-0.812-0.237-1.247l-0.014-0.004c-0.196,1.009-1.763,3.144-2.879,1.969
-						c0.3,0.747,0.752,1.415,1.322,2.003C174.771,441.025,175.359,440.131,176.008,439.233z"/>
-					<path d="M174.438,433.298c-0.823,0.849-1.4,1.838-1.72,2.835c-0.345,1.034-0.322,1.902-0.163,2.782
-						c0.337,1.74,1.453,3.229,3.071,4.199c-1.5-0.897-2.474-2.382-2.716-4.096c-0.311-1.623,0.378-3.841,1.887-5.375
-						c2.713-2.83,5.916-4.979,9.217-6.575c1.149-0.549,2.387-0.766,3.537-0.663l-0.176-0.471c-1.137-0.104-2.375,0.108-3.578,0.684
-						C180.453,428.235,177.2,430.417,174.438,433.298z"/>
-				</g>
-				<g>
-					<path fill="#988A38" d="M172.657,454.348l3.86,1.148c-0.573-1.99-2.157-3.94-4.52-4.846c-0.333,0.9-0.643,1.809-0.928,2.725
-						C171.699,453.569,172.229,453.922,172.657,454.348z"/>
-					<path d="M171.07,453.375c0.565,0.175,1.038,0.514,1.419,0.923l0.349,0.104C172.362,453.957,171.767,453.59,171.07,453.375z"/>
-					<path fill="#988A38" d="M171.07,453.375c0.285-0.916,0.595-1.824,0.928-2.725c-2.347-0.899-4.943-0.32-6.648,1.523l3.86,1.148
-						C169.806,453.178,170.44,453.181,171.07,453.375z"/>
-					<g>
-						<g>
-							<path fill="#988A38" d="M171.981,450.695c-0.413-0.155-0.868,0.048-1.015,0.454c-0.145,0.406,0.086,0.853,0.504,0.992
-								c1.468,0.5,2.543,1.58,3.132,2.785l1.86,0.553C175.884,453.512,174.314,451.586,171.981,450.695z"/>
-							<path fill="#988A38" d="M172.191,450.854c-0.367-0.146-0.774,0.034-0.908,0.391c-0.135,0.358,0.07,0.753,0.438,0.886
-								c1.436,0.531,2.465,1.639,3.031,2.841l1.659,0.494C175.858,453.586,174.388,451.734,172.191,450.854z"/>
-							<path fill="#988A38" d="M172.402,451.013c-0.319-0.134-0.68,0.019-0.803,0.329c-0.122,0.309,0.056,0.655,0.374,0.78
-								c1.401,0.562,2.387,1.694,2.93,2.894l1.458,0.435C175.829,453.66,174.461,451.886,172.402,451.013z"/>
-							<path fill="#988A38" d="M172.614,451.169c-0.274-0.122-0.588,0.005-0.699,0.269c-0.112,0.263,0.04,0.559,0.312,0.676
-								c1.368,0.597,2.307,1.75,2.829,2.948l1.256,0.373C175.802,453.734,174.535,452.037,172.614,451.169z"/>
-							<path fill="#988A38" d="M172.823,451.326c-0.225-0.108-0.493-0.01-0.59,0.207c-0.099,0.218,0.019,0.465,0.244,0.571
-								c1.336,0.638,2.231,1.806,2.729,3.001l1.055,0.314C175.775,453.811,174.609,452.195,172.823,451.326z"/>
-							<path fill="#988A38" d="M173.034,451.481c-0.179-0.095-0.399-0.022-0.486,0.149c-0.085,0.171,0,0.373,0.181,0.467
-								c1.3,0.684,2.154,1.86,2.628,3.053l0.854,0.254C175.747,453.886,174.68,452.357,173.034,451.481z"/>
-							<path fill="#FEE676" d="M173.246,451.637c-0.134-0.08-0.307-0.036-0.381,0.09c-0.073,0.128-0.021,0.285,0.115,0.365
-								c1.256,0.741,2.078,1.913,2.528,3.104l0.654,0.195C175.718,453.961,174.743,452.531,173.246,451.637z"/>
-							<path fill="#FEE676" d="M173.458,451.791c-0.09-0.063-0.214-0.049-0.277,0.033c-0.063,0.082-0.04,0.2,0.049,0.262
-								c1.191,0.827,2.002,1.966,2.428,3.154l0.453,0.135C175.69,454.036,174.789,452.73,173.458,451.791z"/>
-						</g>
-						<g>
-							<path fill="#988A38" d="M171.026,453.279c0.313,0.096,0.645-0.065,0.74-0.362c0.092-0.296-0.077-0.62-0.388-0.726
-								c-1.424-0.482-2.91-0.251-4.085,0.561l1.686,0.501C169.628,453.067,170.333,453.063,171.026,453.279z"/>
-							<path fill="#988A38" d="M170.892,453.208c0.281,0.081,0.573-0.063,0.657-0.33c0.078-0.265-0.077-0.554-0.354-0.644
-								c-1.323-0.422-2.7-0.191-3.794,0.549l1.505,0.448C169.534,453.04,170.218,453.015,170.892,453.208z"/>
-							<path fill="#988A38" d="M170.759,453.138c0.246,0.065,0.5-0.062,0.569-0.298c0.066-0.234-0.075-0.488-0.32-0.561
-								c-1.221-0.362-2.487-0.135-3.503,0.536l1.325,0.394C169.439,453.015,170.103,452.966,170.759,453.138z"/>
-							<path fill="#988A38" d="M170.624,453.066c0.214,0.051,0.43-0.062,0.485-0.266c0.055-0.202-0.071-0.42-0.284-0.477
-								c-1.119-0.301-2.279-0.079-3.213,0.522l1.146,0.341C169.343,452.987,169.984,452.917,170.624,453.066z"/>
-							<path fill="#988A38" d="M170.493,452.996c0.178,0.035,0.354-0.061,0.397-0.234c0.043-0.169-0.068-0.351-0.246-0.392
-								c-1.02-0.239-2.073-0.024-2.925,0.509l0.962,0.286C169.248,452.961,169.868,452.871,170.493,452.996z"/>
-							<path fill="#988A38" d="M170.358,452.926c0.146,0.022,0.284-0.06,0.312-0.2c0.029-0.14-0.064-0.282-0.209-0.309
-								c-0.923-0.177-1.862,0.029-2.636,0.493l0.783,0.233C169.153,452.936,169.751,452.826,170.358,452.926z"/>
-							<path fill="#FEE676" d="M170.223,452.854c0.112,0.012,0.213-0.057,0.229-0.166c0.016-0.107-0.063-0.21-0.174-0.225
-								c-0.828-0.104-1.649,0.085-2.346,0.479l0.601,0.179C169.057,452.908,169.631,452.788,170.223,452.854z"/>
-							<path fill="#FEE676" d="M170.09,452.782c0.077,0.002,0.138-0.057,0.142-0.131c0-0.077-0.059-0.138-0.137-0.141
-								c-0.731-0.021-1.439,0.137-2.055,0.463l0.418,0.125C168.963,452.883,169.511,452.768,170.09,452.782z"/>
-						</g>
-					</g>
-					<g>
-						<path d="M171.998,450.65c-2.455-0.94-5.134-0.43-6.887,1.452l0.481,0.144C167.252,450.442,169.766,449.794,171.998,450.65z"/>
-					</g>
-					<path d="M170.205,450.252c-0.812-0.082-1.624-0.025-2.389,0.198c1.246,0.102,1.096,1.65,0.682,2.66l0.689,0.205
-						C169.495,452.285,169.829,451.262,170.205,450.252z"/>
-					<path fill="#988A38" d="M179.742,439.759c0.476-0.554,0.435-1.445-0.149-2.039c-0.576-0.587-1.549-0.585-2.111,0.066
-						c-0.501,0.568-0.966,1.169-1.412,1.784c-0.426,0.603-0.835,1.211-1.23,1.831c-0.788,1.241-1.513,2.519-2.177,3.827
-						c-1.33,2.616-2.403,5.354-3.236,8.158l2.875,0.855c0.784-2.64,1.793-5.21,3.037-7.657c0.621-1.224,1.298-2.417,2.033-3.573
-						c0.367-0.578,0.751-1.147,1.144-1.704C178.903,440.775,179.306,440.254,179.742,439.759z"/>
-					<path d="M177.67,437.95c0.462-0.539,1.442-0.723,1.923-0.23c-0.671-0.681-1.636-0.862-2.299-0.099
-						c-0.507,0.575-0.977,1.183-1.428,1.804c-0.429,0.607-0.84,1.218-1.237,1.843c-0.792,1.247-1.521,2.532-2.189,3.848
-						c-1.336,2.631-2.416,5.383-3.253,8.2l0.479,0.143c0.829-2.79,1.897-5.515,3.22-8.116c0.66-1.302,1.381-2.573,2.165-3.807
-						C175.828,440.316,176.669,439.084,177.67,437.95z"/>
-					<g>
-						<path fill="#988A38" d="M179.709,439.497c0.258-0.299,0.225-0.778-0.092-1.084c-0.319-0.309-0.822-0.306-1.105,0.021
-							c-1.92,2.197-3.402,4.692-4.735,7.254c-1.314,2.574-2.378,5.272-3.199,8.041l1.535,0.457c0.795-2.681,1.823-5.288,3.091-7.771
-							c0.633-1.241,1.324-2.452,2.073-3.625C178.038,441.605,178.814,440.494,179.709,439.497z"/>
-						<path fill="#988A38" d="M179.389,439.766c0.236-0.265,0.191-0.697-0.099-0.964c-0.29-0.266-0.732-0.25-0.987,0.037
-							c-3.53,4.394-6.006,9.55-7.617,14.923l1.368,0.406c0.778-2.614,1.772-5.162,2.998-7.591c0.612-1.215,1.28-2.4,2.003-3.55
-							C177.789,441.874,178.542,440.754,179.389,439.766z"/>
-						<path fill="#988A38" d="M179.083,440.048c0.194-0.245,0.149-0.623-0.11-0.85c-0.264-0.23-0.654-0.21-0.863,0.053
-							c-3.361,4.309-5.756,9.32-7.317,14.542l1.205,0.358C173.508,449.076,175.877,444.155,179.083,440.048z"/>
-						<path fill="#988A38" d="M178.776,440.333c0.172-0.211,0.123-0.537-0.107-0.729c-0.231-0.192-0.565-0.166-0.748,0.058
-							c-3.191,4.228-5.506,9.092-7.02,14.163l1.042,0.31C173.416,449.184,175.7,444.406,178.776,440.333z"/>
-						<path fill="#988A38" d="M178.479,440.628c0.135-0.185,0.086-0.461-0.109-0.615c-0.2-0.159-0.48-0.13-0.623,0.064
-							c-3.039,4.14-5.273,8.856-6.739,13.78l0.878,0.261C173.321,449.291,175.523,444.653,178.479,440.628z"/>
-						<path fill="#FEE676" d="M178.182,440.926c0.111-0.149,0.07-0.37-0.095-0.494c-0.165-0.123-0.392-0.096-0.507,0.061
-							c-2.896,4.05-5.044,8.622-6.464,13.396l0.712,0.212C173.226,449.398,175.342,444.898,178.182,440.926z"/>
-						<path fill="#FEE676" d="M177.889,441.238c0.084-0.122,0.048-0.296-0.08-0.387c-0.128-0.091-0.302-0.063-0.388,0.061
-							c-2.764,3.952-4.821,8.385-6.196,13.01l0.546,0.162C173.131,449.513,175.163,445.136,177.889,441.238z"/>
-						<path fill="#FEE676" d="M177.597,441.557c0.059-0.085,0.033-0.205-0.058-0.268s-0.213-0.044-0.273,0.043
-							c-2.629,3.853-4.606,8.144-5.935,12.621l0.383,0.114C173.032,449.625,174.992,445.373,177.597,441.557z"/>
-					</g>
-				</g>
-			</g>
-			<g>
-				<g>
-					<path fill="#988A38" d="M170.012,457.067c0.812,0.18,1.599-0.285,1.793-1.028c0.153-0.603,0.318-1.201,0.498-1.796
-						l-2.875-0.855c-0.19,0.632-0.366,1.268-0.528,1.906C168.691,456.091,169.208,456.89,170.012,457.067z"/>
-					<g>
-						<path fill="#988A38" d="M170.593,456.782c0.433,0.101,0.853-0.146,0.956-0.545c0.174-0.688,0.357-1.373,0.562-2.052
-							l-1.535-0.456c-0.212,0.701-0.401,1.408-0.581,2.118C169.889,456.262,170.165,456.683,170.593,456.782z"/>
-						<path fill="#988A38" d="M170.69,456.486c0.382,0.091,0.764-0.127,0.857-0.483c0.155-0.615,0.325-1.227,0.507-1.834
-							l-1.368-0.407c-0.188,0.625-0.362,1.255-0.522,1.888C170.068,456.019,170.305,456.394,170.69,456.486z"/>
-						<path fill="#988A38" d="M170.789,456.189c0.336,0.083,0.677-0.104,0.757-0.421c0.142-0.541,0.29-1.081,0.452-1.616
-							l-1.205-0.358c-0.167,0.549-0.317,1.103-0.463,1.657C170.247,455.776,170.45,456.106,170.789,456.189z"/>
-						<path fill="#988A38" d="M170.891,455.896c0.292,0.073,0.585-0.089,0.655-0.362l0.396-1.398l-1.042-0.31l-0.404,1.429
-							C170.425,455.535,170.6,455.823,170.891,455.896z"/>
-						<path fill="#988A38" d="M170.993,455.602c0.247,0.064,0.492-0.072,0.557-0.301l0.336-1.182l-0.878-0.262l-0.343,1.204
-							C170.599,455.295,170.749,455.538,170.993,455.602z"/>
-						<path fill="#FEE676" d="M171.098,455.309c0.199,0.053,0.403-0.055,0.455-0.243l0.276-0.964l-0.713-0.212l-0.28,0.979
-							C170.783,455.06,170.897,455.256,171.098,455.309z"/>
-						<path fill="#FEE676" d="M171.204,455.018c0.156,0.042,0.312-0.042,0.353-0.187l0.216-0.746l-0.547-0.163l-0.218,0.755
-							C170.966,454.823,171.05,454.975,171.204,455.018z"/>
-						<path fill="#FEE676" d="M171.312,454.726c0.108,0.03,0.219-0.028,0.248-0.131l0.155-0.527l-0.383-0.113l-0.157,0.531
-							C171.146,454.589,171.204,454.695,171.312,454.726z"/>
-					</g>
-					<path d="M170.012,457.067c-0.67-0.147-1.046-1.052-0.871-1.711c0.161-0.637,0.336-1.27,0.525-1.898l-0.479-0.143
-						c-0.191,0.636-0.368,1.274-0.53,1.917C168.416,456.166,169.075,456.859,170.012,457.067z"/>
-					<path fill="#988A38" d="M162.271,465.363c-0.114,1.959,0.618,3.755,1.841,4.966c1.222,1.221,2.918,1.857,4.737,1.753
-						c1.82-0.104,3.423-0.884,4.595-1.955c1.17-1.08,1.917-2.453,2.005-3.985c0.163-2.679,0.536-5.339,1.147-7.926
-						c0.212-0.912,0.167-1.829-0.079-2.719l-3.861-1.149c0.855,0.862,1.289,2.036,1.019,3.184c-0.651,2.76-1.048,5.589-1.221,8.434
-						c-0.103,1.754-1.705,3.211-3.689,3.239c-1.984,0.029-3.618-1.616-3.499-3.664c0.197-3.243,0.649-6.477,1.399-9.652
-						c0.313-1.331,1.336-2.282,2.545-2.565l-3.86-1.148c-0.758,0.808-1.33,1.846-1.605,3.028
-						C162.954,458.553,162.478,461.954,162.271,465.363z"/>
-					<path fill="#988A38" d="M163.925,465.461c-0.17,2.843,2.108,5.072,4.785,4.964c0.328-0.009,0.59-0.269,0.584-0.58
-						c-0.008-0.311-0.272-0.564-0.601-0.563c-2.028,0.033-3.703-1.65-3.581-3.75c0.197-3.252,0.651-6.494,1.402-9.679
-						c0.308-1.312,1.289-2.271,2.462-2.599l-1.686-0.502c-0.928,0.633-1.652,1.615-1.936,2.829
-						C164.59,458.835,164.126,462.144,163.925,465.461z"/>
-					<path fill="#988A38" d="M164.014,465.465c-0.163,2.741,1.975,4.912,4.535,4.864c0.292-0.002,0.527-0.232,0.526-0.512
-						c-0.005-0.278-0.238-0.502-0.531-0.507c-1.982-0.015-3.59-1.698-3.469-3.783c0.197-3.253,0.652-6.497,1.403-9.684
-						c0.308-1.312,1.269-2.268,2.427-2.612l-1.505-0.448c-0.938,0.616-1.67,1.596-1.956,2.817
-						C164.678,458.85,164.215,462.153,164.014,465.465z"/>
-					<path fill="#988A38" d="M164.103,465.472c-0.156,2.636,1.84,4.744,4.282,4.761c0.256,0.005,0.466-0.197,0.466-0.442
-						c0.001-0.243-0.2-0.441-0.454-0.451c-1.941-0.063-3.479-1.746-3.359-3.812c0.197-3.255,0.652-6.502,1.404-9.691
-						c0.308-1.31,1.246-2.265,2.388-2.625l-1.325-0.395c-0.947,0.6-1.687,1.573-1.975,2.806
-						C164.765,458.865,164.303,462.164,164.103,465.472z"/>
-					<path fill="#988A38" d="M164.193,465.477c-0.148,2.533,1.703,4.574,4.03,4.659c0.219,0.01,0.403-0.162,0.408-0.372
-						c0.005-0.211-0.166-0.382-0.384-0.396c-1.897-0.114-3.367-1.793-3.247-3.843c0.198-3.258,0.652-6.506,1.405-9.698
-						c0.308-1.31,1.228-2.261,2.352-2.639l-1.145-0.34c-0.957,0.582-1.703,1.553-1.994,2.794
-						C164.854,458.881,164.393,462.175,164.193,465.477z"/>
-					<path fill="#988A38" d="M164.283,465.482c-0.141,2.432,1.566,4.396,3.778,4.556c0.184,0.015,0.339-0.127,0.348-0.303
-						c0.006-0.177-0.13-0.32-0.309-0.34c-1.86-0.175-3.256-1.84-3.138-3.873c0.198-3.26,0.653-6.511,1.406-9.704
-						c0.308-1.31,1.205-2.259,2.312-2.653l-0.963-0.286c-0.965,0.564-1.719,1.532-2.012,2.783
-						C164.943,458.896,164.483,462.185,164.283,465.482z"/>
-					<path fill="#988A38" d="M164.372,465.487c-0.135,2.33,1.436,4.208,3.528,4.452c0.146,0.02,0.275-0.092,0.289-0.232
-						c0.012-0.143-0.096-0.263-0.238-0.282c-1.818-0.245-3.143-1.889-3.026-3.905c0.198-3.262,0.653-6.514,1.407-9.71
-						c0.308-1.309,1.185-2.255,2.276-2.666l-0.783-0.232c-0.977,0.548-1.737,1.512-2.033,2.771
-						C165.031,458.911,164.571,462.195,164.372,465.487z"/>
-					<path fill="#FEE676" d="M164.461,465.492c-0.129,2.229,1.31,3.995,3.274,4.348c0.112,0.022,0.215-0.055,0.231-0.161
-						c0.015-0.107-0.059-0.204-0.166-0.225c-1.771-0.338-3.031-1.938-2.915-3.937c0.198-3.264,0.654-6.519,1.408-9.717
-						c0.308-1.309,1.165-2.251,2.239-2.68l-0.601-0.179c-0.985,0.532-1.754,1.492-2.052,2.761
-						C165.12,458.926,164.66,462.205,164.461,465.492z"/>
-					<path fill="#FEE676" d="M164.551,465.498c-0.123,2.127,1.193,3.757,3.023,4.243c0.076,0.021,0.15-0.02,0.171-0.091
-						c0.02-0.072-0.022-0.146-0.093-0.167c-1.714-0.467-2.918-1.986-2.804-3.968c0.198-3.267,0.654-6.523,1.409-9.724
-						c0.308-1.308,1.144-2.248,2.201-2.693l-0.418-0.124c-0.997,0.515-1.773,1.471-2.073,2.749
-						C165.208,458.941,164.75,462.215,164.551,465.498z"/>
-					<path d="M168.058,453.842c0.34-0.24,0.722-0.414,1.126-0.515l0.003-0.012l-0.689-0.205c-0.129,0.31-0.289,0.569-0.451,0.724
-						L168.058,453.842z"/>
-					<path fill="#988A38" d="M173.802,466.044c-0.145,2.339-2.283,4.342-5.019,4.456c-0.442,0.01-0.787,0.369-0.772,0.8
-						c0.015,0.431,0.397,0.76,0.836,0.734c1.803-0.102,3.395-0.875,4.559-1.939c1.163-1.072,1.905-2.437,1.993-3.956
-						c0.163-2.682,0.537-5.345,1.148-7.935c0.213-0.914,0.166-1.832-0.084-2.724l-1.86-0.554c0.443,0.919,0.62,1.919,0.387,2.913
-						C174.356,460.521,173.97,463.274,173.802,466.044z"/>
-					<path fill="#988A38" d="M173.929,466.051c-0.144,2.315-2.211,4.354-4.908,4.533c-0.394,0.019-0.698,0.336-0.68,0.718
-						c0.017,0.381,0.361,0.673,0.752,0.642c1.733-0.13,3.253-0.902,4.364-1.95c1.108-1.056,1.814-2.388,1.899-3.858
-						c0.164-2.684,0.537-5.349,1.149-7.939c0.213-0.914,0.163-1.835-0.094-2.73l-1.659-0.494c0.427,0.921,0.592,1.913,0.361,2.897
-						C174.482,460.543,174.098,463.288,173.929,466.051z"/>
-					<path fill="#988A38" d="M174.058,466.06c-0.143,2.292-2.14,4.359-4.796,4.608c-0.345,0.026-0.609,0.304-0.588,0.635
-						c0.02,0.333,0.326,0.587,0.668,0.553c1.663-0.162,3.11-0.933,4.166-1.964c1.055-1.039,1.723-2.339,1.805-3.758
-						c0.164-2.687,0.538-5.354,1.15-7.948c0.213-0.914,0.159-1.837-0.102-2.735l-1.458-0.434c0.41,0.92,0.564,1.905,0.336,2.882
-						C174.609,460.564,174.226,463.304,174.058,466.06z"/>
-					<path fill="#988A38" d="M174.187,466.066c-0.142,2.27-2.065,4.359-4.687,4.684c-0.296,0.032-0.517,0.271-0.496,0.555
-						c0.023,0.284,0.292,0.5,0.586,0.465c1.594-0.197,2.969-0.968,3.971-1.981c1-1.021,1.629-2.288,1.709-3.657
-						c0.164-2.688,0.538-5.359,1.152-7.956c0.212-0.913,0.154-1.839-0.111-2.739l-1.256-0.374c0.391,0.92,0.535,1.899,0.31,2.866
-						C174.737,460.587,174.354,463.318,174.187,466.066z"/>
-					<path fill="#988A38" d="M174.315,466.074c-0.141,2.246-1.989,4.352-4.577,4.758c-0.247,0.035-0.429,0.236-0.403,0.475
-						c0.025,0.236,0.258,0.412,0.503,0.375c1.525-0.233,2.832-1.003,3.777-2c0.946-1.003,1.537-2.234,1.614-3.554
-						c0.164-2.69,0.539-5.363,1.153-7.962c0.212-0.914,0.15-1.842-0.121-2.746l-1.055-0.313c0.374,0.92,0.508,1.894,0.284,2.851
-						C174.864,460.608,174.482,463.333,174.315,466.074z"/>
-					<path fill="#988A38" d="M174.442,466.082c-0.139,2.224-1.915,4.328-4.463,4.831c-0.201,0.036-0.342,0.204-0.312,0.394
-						c0.027,0.191,0.219,0.326,0.419,0.288c2.912-0.597,4.941-2.968,5.099-5.469c0.164-2.693,0.539-5.368,1.153-7.97
-						c0.213-0.914,0.147-1.845-0.128-2.751l-0.854-0.254c0.359,0.921,0.479,1.886,0.258,2.835
-						C174.99,460.631,174.609,463.348,174.442,466.082z"/>
-					<path fill="#FEE676" d="M174.571,466.09c-0.137,2.201-1.849,4.279-4.354,4.904c-0.149,0.034-0.251,0.171-0.219,0.314
-						c0.032,0.144,0.184,0.237,0.336,0.199c2.769-0.704,4.655-2.98,4.808-5.385c0.164-2.695,0.54-5.373,1.155-7.978
-						c0.212-0.913,0.143-1.847-0.135-2.755l-0.654-0.194c0.341,0.921,0.451,1.88,0.232,2.819
-						C175.117,460.652,174.737,463.362,174.571,466.09z"/>
-					<path fill="#FEE676" d="M174.699,466.097c-0.133,2.182-1.802,4.177-4.244,4.977c-0.104,0.032-0.161,0.138-0.125,0.236
-						c0.035,0.097,0.147,0.148,0.252,0.115c2.606-0.87,4.373-2.995,4.517-5.304c0.165-2.698,0.54-5.379,1.156-7.985
-						c0.212-0.913,0.14-1.848-0.144-2.76l-0.453-0.135c0.325,0.92,0.423,1.873,0.207,2.804
-						C175.243,460.675,174.865,463.377,174.699,466.097z"/>
-					<path d="M173.431,457.474c-0.655,2.774-1.054,5.617-1.228,8.476c-0.099,1.641-1.593,3.227-3.438,3.255
-						c2.126-0.028,3.831-1.356,3.937-3.225c0.172-2.831,0.567-5.646,1.215-8.392c0.283-1.199-0.179-2.334-1.081-3.187l-0.348-0.104
-						C173.293,455.169,173.687,456.382,173.431,457.474z"/>
-					<path d="M166.943,468.702c-0.36-0.212-0.667-0.487-0.927-0.814l-0.013,0.005c0.46,0.818,0.375,3.491-1.208,3.146
-						c0.666,0.436,1.417,0.728,2.222,0.89C166.964,470.854,166.936,469.778,166.943,468.702z"/>
-					<path d="M162.022,465.349c-0.118,2.039,0.647,3.842,1.917,5.036c1.27,1.205,3.026,1.805,4.911,1.697
-						c-1.747,0.101-3.382-0.574-4.559-1.812c-1.178-1.229-1.879-3.018-1.77-4.892c0.206-3.395,0.68-6.783,1.466-10.118
-						c0.27-1.157,0.843-2.197,1.605-3.014l-0.481-0.143c-0.755,0.801-1.329,1.836-1.61,3.042
-						C162.708,458.51,162.229,461.926,162.022,465.349z"/>
-				</g>
-				<g>
-					<path fill="#988A38" d="M172.461,483.169l3.883-1.072c-1.537-1.401-3.932-2.215-6.407-1.713
-						c0.193,0.94,0.419,1.873,0.666,2.801C171.239,483.015,171.875,483.033,172.461,483.169z"/>
-					<path d="M170.603,483.185c0.572-0.152,1.151-0.115,1.689,0.031l0.35-0.097C172.005,482.993,171.308,482.997,170.603,483.185z"
-						/>
-					<path fill="#988A38" d="M170.603,483.185c-0.246-0.928-0.472-1.86-0.666-2.801c-2.468,0.5-4.345,2.36-4.825,4.814l3.883-1.072
-						C169.425,483.688,169.966,483.354,170.603,483.185z"/>
-					<g>
-						<g>
-							<path fill="#988A38" d="M169.947,480.43c-0.431,0.09-0.713,0.506-0.618,0.927c0.097,0.419,0.524,0.676,0.953,0.571
-								c1.513-0.359,2.996-0.014,4.135,0.701l1.871-0.517C174.759,480.732,172.39,479.933,169.947,480.43z"/>
-							<path fill="#988A38" d="M170.209,480.451c-0.387,0.073-0.642,0.444-0.561,0.816c0.081,0.373,0.458,0.6,0.841,0.516
-								c1.5-0.315,2.962,0.078,4.079,0.804l1.669-0.461C174.775,480.811,172.531,480.02,170.209,480.451z"/>
-							<path fill="#988A38" d="M170.472,480.474c-0.34,0.057-0.569,0.38-0.504,0.707c0.066,0.326,0.394,0.525,0.731,0.462
-								c1.485-0.271,2.927,0.166,4.021,0.902l1.466-0.404C174.79,480.891,172.673,480.109,170.472,480.474z"/>
-							<path fill="#988A38" d="M170.733,480.493c-0.296,0.043-0.499,0.318-0.448,0.601c0.051,0.28,0.33,0.452,0.624,0.406
-								c1.475-0.224,2.887,0.257,3.963,1.003l1.264-0.349C174.806,480.969,172.816,480.199,170.733,480.493z"/>
-							<path fill="#988A38" d="M170.993,480.514c-0.247,0.028-0.425,0.257-0.388,0.492c0.036,0.235,0.264,0.384,0.512,0.354
-								c1.47-0.173,2.852,0.345,3.908,1.102l1.061-0.293C174.822,481.049,172.964,480.294,170.993,480.514z"/>
-							<path fill="#988A38" d="M171.254,480.533c-0.201,0.016-0.353,0.195-0.331,0.387c0.023,0.189,0.202,0.315,0.404,0.299
-								c1.463-0.114,2.815,0.434,3.849,1.2l0.858-0.236C174.834,481.13,173.115,480.394,171.254,480.533z"/>
-							<path fill="#FEE676" d="M171.516,480.552c-0.155,0.004-0.281,0.134-0.274,0.28c0.008,0.146,0.139,0.252,0.295,0.247
-								c1.457-0.041,2.778,0.521,3.791,1.298l0.658-0.182C174.846,481.21,173.264,480.508,171.516,480.552z"/>
-							<path fill="#FEE676" d="M171.777,480.569c-0.11-0.006-0.208,0.073-0.215,0.176c-0.008,0.104,0.075,0.19,0.184,0.195
-								c1.447,0.065,2.742,0.606,3.733,1.396l0.455-0.126C174.861,481.29,173.412,480.652,171.777,480.569z"/>
-						</g>
-						<g>
-							<path fill="#988A38" d="M170.514,483.126c0.317-0.084,0.507-0.396,0.433-0.699c-0.075-0.301-0.396-0.484-0.715-0.408
-								c-1.458,0.349-2.597,1.333-3.166,2.64l1.695-0.468C169.217,483.688,169.812,483.312,170.514,483.126z"/>
-							<path fill="#988A38" d="M170.361,483.138c0.282-0.081,0.448-0.356,0.381-0.628c-0.071-0.268-0.359-0.43-0.642-0.357
-								c-1.342,0.347-2.389,1.271-2.926,2.477l1.514-0.418C169.123,483.715,169.689,483.331,170.361,483.138z"/>
-							<path fill="#988A38" d="M170.212,483.148c0.244-0.075,0.387-0.317,0.323-0.555c-0.065-0.234-0.32-0.374-0.567-0.306
-								c-1.228,0.344-2.179,1.207-2.688,2.312l1.333-0.368C169.029,483.743,169.566,483.351,170.212,483.148z"/>
-							<path fill="#988A38" d="M170.06,483.16c0.208-0.071,0.329-0.279,0.269-0.482c-0.06-0.201-0.281-0.318-0.491-0.254
-								c-1.112,0.342-1.972,1.143-2.45,2.146l1.152-0.317C168.934,483.771,169.439,483.371,170.06,483.16z"/>
-							<path fill="#988A38" d="M169.911,483.17c0.169-0.064,0.265-0.238,0.211-0.409c-0.052-0.167-0.243-0.261-0.415-0.201
-								c-0.997,0.341-1.766,1.078-2.212,1.98l0.968-0.268C168.839,483.8,169.316,483.395,169.911,483.17z"/>
-							<path fill="#988A38" d="M169.758,483.182c0.136-0.058,0.208-0.2,0.158-0.335c-0.048-0.133-0.203-0.204-0.34-0.15
-								c-0.881,0.343-1.557,1.011-1.975,1.814l0.788-0.218C168.745,483.828,169.194,483.419,169.758,483.182z"/>
-							<path fill="#FEE676" d="M169.605,483.192c0.101-0.05,0.149-0.161,0.105-0.262c-0.043-0.1-0.164-0.145-0.266-0.098
-								c-0.762,0.352-1.347,0.944-1.736,1.647l0.604-0.166C168.649,483.856,169.071,483.45,169.605,483.192z"/>
-							<path fill="#FEE676" d="M169.455,483.203c0.066-0.04,0.085-0.122,0.05-0.187c-0.04-0.065-0.122-0.086-0.19-0.046
-								c-0.635,0.37-1.141,0.876-1.497,1.48l0.42-0.116C168.556,483.884,168.959,483.496,169.455,483.203z"/>
-						</g>
-					</g>
-					<g>
-						<path d="M169.937,480.384c-2.582,0.523-4.564,2.369-5.065,4.88l0.484-0.133C165.82,482.733,167.59,480.858,169.937,480.384z"
-							/>
-					</g>
-					<path d="M168.207,481.006c-0.736,0.364-1.386,0.843-1.916,1.437c1.11-0.577,1.802,0.809,1.987,1.882l0.693-0.191
-						C168.684,483.098,168.433,482.055,168.207,481.006z"/>
-					<path fill="#988A38" d="M170.236,466.862c0.032-0.77-0.598-1.44-1.421-1.499c-0.828-0.059-1.542,0.56-1.576,1.38
-						c-0.229,5.829,0.423,11.69,1.975,17.322l2.892-0.798C170.639,477.946,170.018,472.384,170.236,466.862z"/>
-					<path d="M167.488,466.753c0.028-0.681,0.636-1.438,1.327-1.39c-0.964-0.067-1.785,0.41-1.826,1.37
-						c-0.23,5.854,0.424,11.74,1.983,17.399l0.482-0.134C167.909,478.393,167.259,472.557,167.488,466.753z"/>
-					<g>
-						<path fill="#988A38" d="M170.044,466.673c0.021-0.412-0.319-0.769-0.761-0.795c-0.441-0.026-0.816,0.299-0.838,0.726
-							c-0.245,5.768,0.391,11.574,1.925,17.143l1.543-0.426C170.425,477.92,169.806,472.274,170.044,466.673z"/>
-						<path fill="#988A38" d="M169.969,467.099c0.01-0.37-0.292-0.685-0.686-0.704c-0.393-0.021-0.73,0.272-0.74,0.653
-							c-0.199,5.611,0.446,11.254,1.937,16.668l1.376-0.38C170.404,478.067,169.775,472.565,169.969,467.099z"/>
-						<path fill="#988A38" d="M169.896,467.525c0.011-0.325-0.262-0.602-0.609-0.615c-0.347-0.015-0.636,0.246-0.647,0.58
-							c-0.15,5.455,0.5,10.934,1.947,16.196l1.212-0.335C170.386,478.215,169.749,472.856,169.896,467.525z"/>
-						<path fill="#988A38" d="M169.828,467.954c0.003-0.282-0.231-0.519-0.531-0.527c-0.301-0.01-0.552,0.217-0.555,0.506
-							c-0.102,5.298,0.546,10.614,1.953,15.724l1.047-0.289C170.364,478.363,169.729,473.148,169.828,467.954z"/>
-						<path fill="#988A38" d="M169.763,468.382c0.004-0.237-0.199-0.435-0.451-0.44c-0.253-0.005-0.461,0.188-0.465,0.43
-							c-0.059,5.143,0.589,10.297,1.956,15.256l0.883-0.244C170.343,478.511,169.705,473.439,169.763,468.382z"/>
-						<path fill="#FEE676" d="M169.698,468.812c-0.001-0.192-0.162-0.352-0.369-0.354c-0.206-0.003-0.375,0.155-0.374,0.351
-							c-0.019,4.987,0.629,9.98,1.956,14.788l0.716-0.197C170.319,478.66,169.679,473.732,169.698,468.812z"/>
-						<path fill="#FEE676" d="M169.641,469.243c0-0.149-0.128-0.271-0.286-0.271c-0.156,0-0.284,0.123-0.284,0.273
-							c0.023,4.832,0.665,9.665,1.951,14.321l0.55-0.151C170.298,478.809,169.663,474.024,169.641,469.243z"/>
-						<path fill="#FEE676" d="M169.583,469.675c-0.002-0.104-0.091-0.188-0.202-0.188s-0.201,0.087-0.198,0.192
-							c0.059,4.677,0.702,9.35,1.944,13.857l0.386-0.106C170.28,478.957,169.642,474.317,169.583,469.675z"/>
-					</g>
-				</g>
-			</g>
-			<g>
-				<g>
-					<path fill="#988A38" d="M171.659,486.877c0.778-0.274,1.208-1.087,0.975-1.819c-0.186-0.594-0.363-1.19-0.529-1.79
-						l-2.892,0.799c0.176,0.635,0.363,1.267,0.561,1.896C170.023,486.746,170.876,487.153,171.659,486.877z"/>
-					<g>
-						<path fill="#988A38" d="M171.998,486.329c0.418-0.144,0.653-0.577,0.525-0.969c-0.218-0.675-0.42-1.355-0.61-2.04
-							l-1.543,0.427c0.195,0.705,0.404,1.407,0.628,2.104C171.132,486.258,171.579,486.472,171.998,486.329z"/>
-						<path fill="#988A38" d="M171.926,486.025c0.374-0.125,0.583-0.511,0.47-0.862c-0.19-0.605-0.371-1.214-0.541-1.826
-							l-1.376,0.38c0.174,0.629,0.36,1.255,0.556,1.877C171.152,485.957,171.552,486.15,171.926,486.025z"/>
-						<path fill="#988A38" d="M171.854,485.722c0.33-0.107,0.513-0.445,0.419-0.758c-0.168-0.534-0.324-1.072-0.475-1.611
-							l-1.212,0.334c0.155,0.553,0.314,1.104,0.486,1.651C171.169,485.66,171.523,485.83,171.854,485.722z"/>
-						<path fill="#988A38" d="M171.782,485.42c0.287-0.092,0.451-0.386,0.368-0.656c-0.146-0.462-0.275-0.93-0.408-1.396
-							l-1.047,0.289c0.135,0.476,0.268,0.953,0.417,1.426C171.197,485.359,171.496,485.512,171.782,485.42z"/>
-						<path fill="#988A38" d="M171.714,485.115c0.243-0.076,0.384-0.323,0.313-0.551l-0.342-1.181l-0.883,0.243l0.348,1.203
-							C171.223,485.062,171.473,485.19,171.714,485.115z"/>
-						<path fill="#FEE676" d="M171.65,484.811c0.197-0.061,0.31-0.259,0.254-0.446l-0.276-0.965l-0.717,0.198l0.28,0.979
-							C171.248,484.767,171.452,484.871,171.65,484.811z"/>
-						<path fill="#FEE676" d="M171.583,484.508c0.155-0.047,0.244-0.201,0.202-0.346l-0.213-0.747l-0.55,0.151l0.215,0.756
-							C171.279,484.469,171.429,484.554,171.583,484.508z"/>
-						<path fill="#FEE676" d="M171.522,484.202c0.108-0.031,0.171-0.14,0.142-0.242l-0.15-0.529l-0.386,0.106l0.152,0.533
-							C171.31,484.174,171.414,484.234,171.522,484.202z"/>
-					</g>
-					<path d="M171.659,486.877c-0.653,0.231-1.441-0.341-1.647-0.99c-0.196-0.626-0.383-1.255-0.558-1.887l-0.482,0.133
-						c0.177,0.638,0.365,1.272,0.563,1.904C169.829,486.955,170.748,487.198,171.659,486.877z"/>
-					<path fill="#988A38" d="M169.597,498.1c0.502,0.87,1.156,1.599,1.913,2.141c0.372,0.274,0.775,0.499,1.184,0.68
-						c0.403,0.174,0.879,0.374,1.322,0.466c1.815,0.418,3.568-0.15,4.908-1.375c1.348-1.23,2.119-2.82,2.559-4.226
-						c0.1-0.365,0.211-0.671,0.238-1.056c0.032-0.378,0.025-0.744-0.008-1.109c-0.077-0.724-0.279-1.423-0.651-2.065
-						c-1.286-2.253-2.356-4.67-3.21-7.183c-0.299-0.887-0.822-1.646-1.507-2.274l-3.883,1.071c1.19,0.276,2.169,1.051,2.549,2.166
-						c0.912,2.683,2.058,5.274,3.446,7.707c0.851,1.495,0.325,3.567-1.298,4.712c-1.621,1.144-3.92,0.671-4.956-1.142
-						c-1.634-2.863-2.961-5.875-4.012-8.966c-0.441-1.295-0.064-2.643,0.804-3.521l-3.883,1.072
-						c-0.211,1.08-0.152,2.259,0.237,3.411C166.457,491.871,167.861,495.057,169.597,498.1z"/>
-					<path fill="#988A38" d="M171.035,497.278c1.421,2.504,4.667,3.188,6.773,1.505c0.259-0.2,0.321-0.56,0.144-0.805
-						c-0.177-0.245-0.54-0.306-0.81-0.122c-1.654,1.173-4.011,0.691-5.073-1.169c-1.639-2.872-2.97-5.893-4.024-8.992
-						c-0.434-1.276-0.098-2.608,0.716-3.505l-1.695,0.468c-0.447,1.023-0.549,2.238-0.149,3.42
-						C167.995,491.245,169.356,494.334,171.035,497.278z"/>
-					<path fill="#988A38" d="M171.112,497.233c1.375,2.415,4.451,3.118,6.509,1.567c0.236-0.174,0.291-0.494,0.135-0.72
-						c-0.158-0.222-0.479-0.275-0.723-0.116c-1.642,1.107-3.945,0.588-4.999-1.259c-1.64-2.874-2.972-5.896-4.026-8.998
-						c-0.434-1.276-0.114-2.596,0.679-3.497l-1.514,0.418c-0.464,1.014-0.575,2.23-0.171,3.42
-						C168.077,491.211,169.436,494.295,171.112,497.233z"/>
-					<path fill="#988A38" d="M171.19,497.189c1.327,2.329,4.234,3.038,6.242,1.628c0.212-0.146,0.261-0.429,0.126-0.63
-						c-0.135-0.2-0.415-0.249-0.629-0.116c-1.636,1.043-3.881,0.484-4.925-1.347c-1.641-2.877-2.975-5.901-4.029-9.005
-						c-0.434-1.275-0.132-2.581,0.638-3.488l-1.333,0.368c-0.48,1.006-0.6,2.222-0.194,3.421
-						C168.159,491.178,169.517,494.257,171.19,497.189z"/>
-					<path fill="#988A38" d="M171.268,497.146c1.279,2.242,4.014,2.955,5.975,1.687c0.184-0.12,0.231-0.364,0.119-0.541
-						c-0.115-0.177-0.354-0.222-0.542-0.111c-1.629,0.977-3.814,0.381-4.85-1.436c-1.642-2.879-2.977-5.906-4.032-9.012
-						c-0.434-1.275-0.145-2.567,0.601-3.48l-1.152,0.318c-0.497,0.996-0.625,2.213-0.215,3.422
-						C168.243,491.144,169.598,494.218,171.268,497.146z"/>
-					<path fill="#988A38" d="M171.347,497.101c1.233,2.158,3.79,2.867,5.707,1.744c0.158-0.093,0.202-0.3,0.111-0.451
-						c-0.095-0.15-0.292-0.192-0.449-0.108c-1.632,0.905-3.749,0.279-4.776-1.522c-1.644-2.882-2.979-5.911-4.035-9.019
-						c-0.433-1.274-0.163-2.554,0.56-3.471l-0.968,0.268c-0.514,0.986-0.65,2.204-0.237,3.422
-						C168.327,491.109,169.68,494.178,171.347,497.101z"/>
-					<path fill="#988A38" d="M171.424,497.057c1.184,2.07,3.565,2.773,5.438,1.799c0.131-0.067,0.173-0.234,0.104-0.36
-						c-0.072-0.126-0.231-0.165-0.359-0.102c-1.637,0.822-3.684,0.174-4.702-1.612c-1.645-2.884-2.981-5.915-4.039-9.025
-						c-0.433-1.273-0.178-2.539,0.522-3.462l-0.787,0.217c-0.532,0.979-0.676,2.197-0.26,3.424
-						C168.409,491.076,169.76,494.14,171.424,497.057z"/>
-					<path fill="#FEE676" d="M171.501,497.012c1.132,1.979,3.339,2.663,5.169,1.854c0.104-0.046,0.145-0.17,0.097-0.269
-						c-0.049-0.1-0.167-0.139-0.268-0.096c-1.65,0.719-3.618,0.069-4.628-1.701c-1.646-2.886-2.984-5.919-4.042-9.032
-						c-0.432-1.273-0.193-2.524,0.484-3.454l-0.604,0.167c-0.548,0.97-0.702,2.188-0.283,3.424
-						C168.492,491.042,169.84,494.101,171.501,497.012z"/>
-					<path fill="#FEE676" d="M171.58,496.968c1.08,1.887,3.107,2.527,4.9,1.906c0.075-0.025,0.112-0.103,0.088-0.175
-						c-0.025-0.073-0.105-0.111-0.174-0.089c-1.677,0.578-3.553-0.036-4.555-1.791c-1.647-2.888-2.986-5.924-4.044-9.038
-						c-0.432-1.273-0.209-2.512,0.444-3.446l-0.42,0.116c-0.567,0.961-0.729,2.181-0.307,3.426
-						C168.576,491.008,169.921,494.062,171.58,496.968z"/>
-					<path d="M168.293,485.176c0.167-0.385,0.39-0.732,0.682-1.031l-0.003-0.012l-0.693,0.191c0.054,0.329,0.061,0.633,0.001,0.85
-						L168.293,485.176z"/>
-					<path fill="#988A38" d="M179.627,492.373c0.574,1.009,0.666,2.177,0.37,3.334c-0.317,1.136-1.02,2.24-2.085,3.094
-						c-0.351,0.267-0.418,0.77-0.135,1.109c0.283,0.339,0.789,0.363,1.115,0.065c1.338-1.216,2.105-2.794,2.543-4.189
-						c0.239-0.67,0.302-1.429,0.227-2.155c-0.076-0.719-0.277-1.413-0.646-2.051c-1.288-2.256-2.358-4.676-3.213-7.191
-						c-0.3-0.889-0.824-1.647-1.515-2.276l-1.871,0.517c0.87,0.547,1.544,1.308,1.871,2.273
-						C177.174,487.508,178.286,490.021,179.627,492.373z"/>
-					<path fill="#988A38" d="M179.738,492.309c0.567,0.998,0.675,2.159,0.405,3.313c-0.293,1.132-0.96,2.233-1.989,3.105
-						c-0.308,0.247-0.365,0.688-0.113,0.982c0.252,0.294,0.7,0.312,0.988,0.04c1.268-1.189,1.991-2.714,2.4-4.063
-						c0.228-0.644,0.258-1.404,0.187-2.1c-0.08-0.697-0.28-1.369-0.636-1.986c-1.289-2.259-2.361-4.682-3.217-7.198
-						c-0.299-0.889-0.828-1.648-1.526-2.276l-1.669,0.461c0.856,0.557,1.517,1.316,1.841,2.274
-						C177.292,487.459,178.401,489.965,179.738,492.309z"/>
-					<path fill="#988A38" d="M179.851,492.246c0.561,0.985,0.685,2.139,0.44,3.29c-0.271,1.125-0.903,2.223-1.895,3.115
-						c-0.264,0.227-0.311,0.606-0.091,0.857c0.223,0.249,0.615,0.262,0.863,0.019c1.196-1.166,1.876-2.638,2.253-3.938
-						c0.216-0.618,0.215-1.384,0.147-2.045c-0.084-0.675-0.282-1.325-0.626-1.922c-1.291-2.262-2.364-4.686-3.22-7.206
-						c-0.299-0.888-0.832-1.647-1.536-2.275l-1.466,0.405c0.842,0.564,1.491,1.325,1.811,2.273
-						C177.412,487.411,178.517,489.909,179.851,492.246z"/>
-					<path fill="#988A38" d="M179.962,492.182c0.555,0.975,0.693,2.118,0.474,3.266c-0.25,1.118-0.847,2.212-1.802,3.128
-						c-0.222,0.202-0.255,0.522-0.067,0.733c0.196,0.205,0.532,0.211,0.742-0.002c1.126-1.148,1.76-2.563,2.108-3.818
-						c0.325-1.302,0.133-2.709-0.511-3.845c-1.292-2.264-2.366-4.691-3.224-7.214c-0.3-0.888-0.837-1.647-1.545-2.275l-1.263,0.35
-						c0.825,0.574,1.463,1.335,1.78,2.274C177.532,487.362,178.634,489.853,179.962,492.182z"/>
-					<path fill="#988A38" d="M180.074,492.118c0.548,0.964,0.702,2.096,0.507,3.237c-0.228,1.11-0.793,2.199-1.71,3.141
-						c-0.177,0.175-0.203,0.442-0.042,0.612c0.166,0.165,0.45,0.159,0.621-0.022c1.055-1.131,1.642-2.492,1.962-3.7
-						c0.273-1.28,0.081-2.622-0.543-3.722c-1.293-2.267-2.369-4.696-3.227-7.221c-0.299-0.888-0.842-1.647-1.557-2.274l-1.061,0.293
-						c0.811,0.584,1.437,1.345,1.75,2.275C177.651,487.313,178.75,489.797,180.074,492.118z"/>
-					<path fill="#988A38" d="M180.185,492.055c0.542,0.953,0.708,2.07,0.537,3.205c-0.205,1.102-0.742,2.185-1.611,3.153
-						c-0.139,0.148-0.154,0.364-0.018,0.491c0.138,0.128,0.366,0.115,0.501-0.039c0.976-1.117,1.52-2.426,1.81-3.585
-						c0.222-1.26,0.03-2.531-0.573-3.594c-1.295-2.27-2.372-4.702-3.23-7.229c-0.3-0.888-0.846-1.647-1.566-2.274l-0.859,0.237
-						c0.799,0.593,1.409,1.354,1.72,2.275C177.77,487.266,178.865,489.741,180.185,492.055z"/>
-					<path fill="#FEE676" d="M180.297,491.991c0.536,0.941,0.709,2.041,0.559,3.167c-0.184,1.094-0.696,2.168-1.508,3.173
-						c-0.099,0.117-0.102,0.283,0.008,0.372c0.112,0.088,0.285,0.066,0.383-0.057c0.887-1.109,1.394-2.366,1.65-3.477
-						c0.172-1.238-0.013-2.435-0.595-3.462c-1.296-2.272-2.374-4.707-3.234-7.237c-0.3-0.887-0.851-1.647-1.574-2.274l-0.658,0.182
-						c0.783,0.604,1.381,1.363,1.689,2.276C177.889,487.217,178.981,489.685,180.297,491.991z"/>
-					<path fill="#FEE676" d="M180.408,491.928c0.53,0.93,0.701,2.003,0.564,3.117c-0.166,1.087-0.658,2.146-1.391,3.202
-						c-0.063,0.087-0.05,0.203,0.037,0.254c0.085,0.051,0.204,0.02,0.267-0.07c0.774-1.129,1.254-2.302,1.466-3.398
-						c0.144-1.184-0.035-2.319-0.595-3.304c-1.298-2.274-2.376-4.711-3.237-7.244c-0.299-0.888-0.854-1.647-1.584-2.274
-						l-0.455,0.126c0.769,0.612,1.355,1.374,1.661,2.277C178.008,487.168,179.097,489.629,180.408,491.928z"/>
-					<path d="M174.772,485.416c0.917,2.696,2.068,5.303,3.465,7.751c0.793,1.401,0.428,3.521-1.08,4.587
-						c1.74-1.225,2.421-3.248,1.514-4.835c-1.38-2.418-2.519-4.995-3.426-7.664c-0.397-1.165-1.378-1.893-2.603-2.136l-0.35,0.097
-						C173.443,483.527,174.41,484.354,174.772,485.416z"/>
-					<path d="M175.366,498.355c-0.419,0.023-0.827-0.037-1.229-0.168l-0.008,0.011c0.438,0.224,0.972,0.882,1.288,1.619
-						c0.313,0.729,0.385,1.487-0.375,1.81c0.828-0.012,1.618-0.229,2.353-0.595C176.656,500.201,175.946,499.245,175.366,498.355z"
-						/>
-					<path d="M169.379,498.224c0.523,0.905,1.196,1.65,1.97,2.193c0.38,0.275,0.794,0.496,1.209,0.672
-						c0.421,0.181,0.899,0.364,1.352,0.449c1.847,0.377,3.623-0.259,5.013-1.527c-1.286,1.176-3.018,1.677-4.803,1.217
-						c-0.908-0.259-1.722-0.629-2.454-1.168c-0.738-0.541-1.374-1.252-1.854-2.084c-1.727-3.028-3.124-6.199-4.228-9.446
-						c-0.381-1.127-0.44-2.31-0.229-3.398l-0.484,0.134c-0.212,1.073-0.156,2.251,0.24,3.425
-						C166.225,491.965,167.635,495.166,169.379,498.224z"/>
-				</g>
-				<g>
-					<path fill="#988A38" d="M189.214,504.887l1.542-3.722c-1.912,0.412-4.013,1.738-5.254,3.938c0.84,0.46,1.704,0.877,2.583,1.258
-						C188.346,505.757,188.753,505.268,189.214,504.887z"/>
-					<path d="M188.084,506.361c0.235-0.543,0.622-0.976,1.062-1.312l0.139-0.336C188.802,505.141,188.374,505.691,188.084,506.361z"
-						/>
-					<path fill="#988A38" d="M188.084,506.361c-0.879-0.381-1.743-0.798-2.583-1.258c-1.238,2.189-0.981,4.882,0.795,6.826
-						l1.542-3.721C187.751,507.598,187.823,506.966,188.084,506.361z"/>
-					<g>
-						<g>
-							<path fill="#988A38" d="M185.543,505.126c-0.213,0.387-0.075,0.871,0.313,1.07c0.387,0.197,0.86,0.027,1.05-0.372
-								c0.679-1.395,1.87-2.335,3.084-2.812l0.743-1.793C188.84,501.636,186.764,502.952,185.543,505.126z"/>
-							<path fill="#988A38" d="M185.729,504.94c-0.197,0.343-0.076,0.774,0.263,0.953c0.34,0.18,0.756,0.031,0.934-0.32
-								c0.708-1.358,1.917-2.252,3.125-2.706l0.663-1.6C188.904,501.667,186.917,502.899,185.729,504.94z"/>
-							<path fill="#988A38" d="M185.916,504.754c-0.179,0.298-0.078,0.678,0.214,0.839c0.293,0.159,0.655,0.032,0.82-0.27
-								c0.735-1.321,1.96-2.169,3.161-2.603l0.582-1.404C188.966,501.702,187.073,502.844,185.916,504.754z"/>
-							<path fill="#988A38" d="M186.099,504.566c-0.16,0.256-0.079,0.583,0.168,0.727c0.247,0.142,0.556,0.033,0.707-0.223
-								c0.766-1.282,2.002-2.083,3.197-2.495l0.501-1.211C189.029,501.735,187.23,502.788,186.099,504.566z"/>
-							<path fill="#988A38" d="M186.282,504.382c-0.138,0.208-0.079,0.485,0.123,0.609c0.202,0.124,0.46,0.04,0.594-0.169
-								c0.802-1.244,2.045-2.003,3.233-2.393l0.421-1.017C189.094,501.769,187.393,502.733,186.282,504.382z"/>
-							<path fill="#988A38" d="M186.464,504.193c-0.118,0.164-0.079,0.392,0.079,0.499c0.157,0.104,0.368,0.044,0.482-0.122
-								c0.842-1.199,2.086-1.919,3.267-2.286l0.341-0.822C189.158,501.803,187.559,502.682,186.464,504.193z"/>
-							<path fill="#FEE676" d="M186.645,504.003c-0.096,0.122-0.078,0.299,0.035,0.389c0.114,0.089,0.278,0.053,0.373-0.07
-								c0.893-1.146,2.125-1.839,3.3-2.183l0.261-0.631C189.222,501.837,187.734,502.638,186.645,504.003z"/>
-							<path fill="#FEE676" d="M186.826,503.813c-0.075,0.081-0.078,0.205-0.006,0.276c0.071,0.073,0.189,0.064,0.261-0.018
-								c0.967-1.071,2.164-1.758,3.332-2.078l0.181-0.437C189.288,501.869,187.931,502.614,186.826,503.813z"/>
-						</g>
-						<g>
-							<path fill="#988A38" d="M187.984,506.396c0.13-0.302,0.006-0.646-0.275-0.774c-0.28-0.127-0.622,0.005-0.764,0.3
-								c-0.655,1.353-0.596,2.863,0.127,4.136l0.673-1.624C187.622,507.761,187.694,507.061,187.984,506.396z"/>
-							<path fill="#988A38" d="M187.898,506.521c0.111-0.272,0-0.575-0.253-0.688c-0.251-0.11-0.557,0.012-0.679,0.275
-								c-0.581,1.261-0.508,2.663,0.149,3.846l0.601-1.451C187.585,507.852,187.633,507.17,187.898,506.521z"/>
-							<path fill="#988A38" d="M187.815,506.645c0.092-0.237-0.008-0.502-0.233-0.598c-0.223-0.095-0.492,0.016-0.594,0.251
-								c-0.507,1.168-0.422,2.46,0.169,3.554l0.529-1.276C187.549,507.943,187.572,507.278,187.815,506.645z"/>
-							<path fill="#988A38" d="M187.729,506.771c0.074-0.207-0.015-0.434-0.211-0.511c-0.194-0.079-0.424,0.02-0.506,0.224
-								c-0.433,1.078-0.34,2.26,0.188,3.265l0.457-1.104C187.513,508.035,187.509,507.391,187.729,506.771z"/>
-							<path fill="#988A38" d="M187.645,506.895c0.054-0.174-0.022-0.359-0.19-0.42c-0.163-0.064-0.356,0.024-0.418,0.196
-								c-0.359,0.988-0.256,2.06,0.206,2.976l0.384-0.928C187.477,508.128,187.451,507.501,187.645,506.895z"/>
-							<path fill="#988A38" d="M187.559,507.021c0.039-0.143-0.028-0.289-0.166-0.332c-0.135-0.045-0.288,0.029-0.332,0.171
-								c-0.284,0.899-0.175,1.853,0.224,2.685l0.312-0.755C187.441,508.219,187.394,507.612,187.559,507.021z"/>
-							<path fill="#FEE676" d="M187.472,507.146c0.023-0.109-0.034-0.219-0.142-0.246c-0.106-0.027-0.217,0.038-0.244,0.146
-								c-0.2,0.813-0.093,1.648,0.241,2.395l0.24-0.58C187.404,508.312,187.342,507.728,187.472,507.146z"/>
-							<path fill="#FEE676" d="M187.386,507.271c0.01-0.076-0.042-0.144-0.116-0.155c-0.077-0.009-0.145,0.043-0.156,0.12
-								c-0.102,0.724-0.016,1.446,0.257,2.102l0.167-0.403C187.368,508.403,187.309,507.845,187.386,507.271z"/>
-						</g>
-					</g>
-					<g>
-						<path d="M185.501,505.104c-1.293,2.29-1.117,5.057,0.7,7.057l0.192-0.464C184.66,509.803,184.323,507.186,185.501,505.104z"/>
-					</g>
-					<path d="M184.857,506.825c-0.192,0.798-0.235,1.611-0.093,2.408c0.252-1.224,1.807-0.86,2.789-0.338l0.275-0.664
-						C186.819,507.812,185.826,507.346,184.857,506.825z"/>
-					<path fill="#988A38" d="M177.085,494.999c-0.392-0.655-1.284-0.871-2.007-0.473c-0.727,0.401-0.987,1.312-0.562,2.021
-						c0.771,1.249,1.548,2.484,2.527,3.684c0.962,1.18,2.044,2.249,3.203,3.211c2.319,1.927,4.941,3.419,7.679,4.558l1.148-2.772
-						c-2.5-1.038-4.863-2.391-6.91-4.093c-1.023-0.85-1.967-1.784-2.796-2.802C178.548,497.338,177.803,496.165,177.085,494.999z"/>
-					<path d="M174.729,496.419c-0.354-0.588-0.258-1.559,0.349-1.893c-0.844,0.468-1.274,1.318-0.777,2.151
-						c0.775,1.255,1.555,2.496,2.548,3.712c0.974,1.193,2.066,2.274,3.236,3.246c2.342,1.945,4.985,3.449,7.743,4.596l0.191-0.462
-						c-2.718-1.13-5.318-2.611-7.615-4.519c-1.147-0.953-2.217-2.012-3.168-3.178C176.271,498.892,175.496,497.661,174.729,496.419z
-						"/>
-					<g>
-						<path fill="#988A38" d="M176.822,494.946c-0.208-0.354-0.688-0.463-1.072-0.244c-0.384,0.219-0.52,0.696-0.303,1.064
-							c0.748,1.237,1.523,2.455,2.435,3.596c0.914,1.132,1.943,2.167,3.058,3.097c2.228,1.866,4.772,3.322,7.444,4.433l0.612-1.479
-							c-2.542-1.057-4.948-2.437-7.029-4.181c-1.042-0.869-1.996-1.831-2.838-2.874C178.305,497.33,177.538,496.133,176.822,494.946
-							z"/>
-						<path fill="#988A38" d="M176.991,495.341c-0.195-0.311-0.616-0.409-0.958-0.209c-0.338,0.198-0.463,0.63-0.26,0.953
-							c0.74,1.191,1.502,2.378,2.407,3.462c0.902,1.084,1.915,2.071,3.001,2.967c2.179,1.787,4.649,3.195,7.245,4.272l0.546-1.318
-							c-2.483-1.03-4.833-2.373-6.885-4.057c-1.023-0.843-1.972-1.769-2.811-2.777C178.447,497.643,177.711,496.5,176.991,495.341z"
-							/>
-						<path fill="#988A38" d="M177.165,495.734c-0.169-0.275-0.549-0.354-0.847-0.174c-0.297,0.179-0.395,0.556-0.22,0.84
-							c1.404,2.339,3.179,4.47,5.325,6.165c2.127,1.715,4.526,3.073,7.046,4.118l0.481-1.162
-							C184.087,503.516,179.788,500.299,177.165,495.734z"/>
-						<path fill="#988A38" d="M177.344,496.126c-0.154-0.233-0.477-0.301-0.732-0.143c-0.255,0.158-0.341,0.486-0.183,0.729
-							c2.725,4.579,7.208,7.864,12.083,9.867l0.416-1.004C184.197,503.62,179.958,500.516,177.344,496.126z"/>
-						<path fill="#988A38" d="M177.527,496.517c-0.128-0.198-0.407-0.249-0.619-0.113c-0.213,0.136-0.279,0.411-0.147,0.614
-							c2.71,4.384,7.075,7.508,11.795,9.458l0.351-0.846C184.307,503.723,180.132,500.731,177.527,496.517z"/>
-						<path fill="#FEE676" d="M177.711,496.906c-0.108-0.158-0.33-0.199-0.503-0.086c-0.172,0.112-0.225,0.338-0.114,0.499
-							c2.698,4.18,6.934,7.16,11.504,9.053l0.284-0.687C184.411,503.831,180.315,500.943,177.711,496.906z"/>
-						<path fill="#FEE676" d="M177.904,497.294c-0.083-0.122-0.257-0.151-0.388-0.063c-0.13,0.088-0.167,0.261-0.083,0.386
-							c2.682,3.972,6.788,6.817,11.209,8.65l0.218-0.527C184.515,503.938,180.506,501.15,177.904,497.294z"/>
-						<path fill="#FEE676" d="M178.096,497.677c-0.059-0.083-0.18-0.099-0.271-0.036s-0.116,0.183-0.056,0.267
-							c2.668,3.765,6.641,6.485,10.916,8.257l0.153-0.369C184.615,504.046,180.706,501.36,178.096,497.677z"/>
-					</g>
-				</g>
-			</g>
-			<g>
-				<g>
-					<path fill="#988A38" d="M191.664,507.743c0.235-0.794-0.163-1.612-0.875-1.862c-0.578-0.202-1.15-0.419-1.715-0.652
-						L187.925,508c0.619,0.256,1.244,0.493,1.875,0.713C190.588,508.989,191.429,508.537,191.664,507.743z"/>
-					<g>
-						<path fill="#988A38" d="M191.422,507.146c0.132-0.423-0.077-0.862-0.461-0.994c-0.663-0.226-1.318-0.472-1.964-0.739
-							l-0.612,1.479c0.678,0.28,1.366,0.538,2.06,0.775C190.851,507.806,191.291,507.566,191.422,507.146z"/>
-						<path fill="#988A38" d="M191.136,507.026c0.121-0.374-0.062-0.772-0.406-0.892c-0.592-0.205-1.178-0.428-1.756-0.667
-							l-0.546,1.318c0.604,0.25,1.215,0.482,1.833,0.697C190.622,507.607,191.015,507.402,191.136,507.026z"/>
-						<path fill="#988A38" d="M190.851,506.903c0.109-0.329-0.049-0.679-0.352-0.787c-0.521-0.186-1.037-0.384-1.548-0.595
-							l-0.481,1.162c0.531,0.219,1.067,0.424,1.607,0.617C190.394,507.414,190.74,507.234,190.851,506.903z"/>
-						<path fill="#988A38" d="M190.568,506.779c0.098-0.284-0.038-0.59-0.3-0.685c-0.45-0.163-0.896-0.338-1.339-0.52l-0.416,1.004
-							c0.458,0.188,0.919,0.368,1.384,0.537C190.169,507.214,190.47,507.063,190.568,506.779z"/>
-						<path fill="#988A38" d="M190.285,506.653c0.085-0.239-0.026-0.498-0.247-0.581c-0.381-0.14-0.757-0.29-1.131-0.442
-							l-0.351,0.846c0.385,0.158,0.772,0.312,1.163,0.456C189.947,507.017,190.2,506.893,190.285,506.653z"/>
-						<path fill="#FEE676" d="M190.004,506.522c0.071-0.193-0.018-0.405-0.199-0.472l-0.922-0.365l-0.285,0.687l0.943,0.374
-							C189.728,506.814,189.934,506.717,190.004,506.522z"/>
-						<path fill="#FEE676" d="M189.725,506.393c0.057-0.15-0.011-0.314-0.15-0.368l-0.714-0.285l-0.218,0.527l0.726,0.29
-							C189.511,506.611,189.668,506.543,189.725,506.393z"/>
-						<path fill="#FEE676" d="M189.447,506.258c0.041-0.105-0.006-0.222-0.104-0.26l-0.504-0.203l-0.153,0.369l0.511,0.206
-							C189.295,506.409,189.407,506.362,189.447,506.258z"/>
-					</g>
-					<path d="M191.664,507.743c-0.195,0.663-1.13,0.964-1.782,0.734c-0.626-0.219-1.247-0.454-1.861-0.708l-0.191,0.462
-						c0.623,0.258,1.253,0.497,1.888,0.719C190.644,509.272,191.389,508.666,191.664,507.743z"/>
-					<path fill="#988A38" d="M199.852,515.811c2.04,0.134,3.907-0.63,5.123-1.885c1.229-1.257,1.806-2.981,1.628-4.793
-						c-0.178-1.814-1.01-3.386-2.069-4.53c-1.071-1.146-2.373-1.87-3.82-1.965c-2.534-0.168-5.046-0.624-7.41-1.37
-						c-0.834-0.263-1.686-0.278-2.548-0.103l-1.542,3.722c0.938-0.771,2.103-1.098,3.187-0.758c2.61,0.823,5.353,1.319,8.117,1.502
-						c1.696,0.113,3.141,1.685,3.214,3.669c0.075,1.981-1.582,3.653-3.684,3.517c-3.316-0.22-6.613-0.812-9.814-1.821
-						c-1.343-0.421-2.23-1.557-2.396-2.786l-1.542,3.722c0.774,0.853,1.817,1.542,3.034,1.926
-						C192.777,514.942,196.306,515.575,199.852,515.811z"/>
-					<path fill="#988A38" d="M199.96,514.157c1.447,0.095,2.781-0.442,3.682-1.34c0.908-0.899,1.384-2.152,1.311-3.492
-						c-0.018-0.327-0.283-0.579-0.592-0.568c-0.309,0.014-0.557,0.283-0.55,0.611c0.081,2.028-1.614,3.741-3.772,3.601
-						c-3.327-0.221-6.637-0.814-9.85-1.828c-1.326-0.415-2.227-1.508-2.442-2.708l-0.673,1.624c0.562,0.996,1.521,1.831,2.757,2.22
-						C193.14,513.32,196.542,513.93,199.96,514.157z"/>
-					<path fill="#988A38" d="M199.965,514.068c1.395,0.091,2.685-0.41,3.567-1.26c0.889-0.85,1.372-2.037,1.33-3.318
-						c-0.01-0.291-0.246-0.519-0.524-0.514c-0.277,0.011-0.496,0.248-0.496,0.541c0.033,1.983-1.665,3.629-3.808,3.49
-						c-3.33-0.222-6.642-0.816-9.858-1.83c-1.325-0.416-2.226-1.488-2.46-2.675l-0.601,1.451c0.543,1.003,1.498,1.847,2.741,2.237
-						C193.16,513.232,196.553,513.842,199.965,514.068z"/>
-					<path fill="#988A38" d="M199.972,513.979c1.343,0.088,2.585-0.379,3.449-1.179c0.869-0.801,1.357-1.921,1.348-3.144
-						c-0.001-0.256-0.209-0.46-0.453-0.456c-0.244,0.003-0.437,0.209-0.442,0.463c-0.018,1.943-1.717,3.519-3.84,3.38
-						c-3.333-0.221-6.648-0.816-9.868-1.831c-1.324-0.416-2.225-1.466-2.479-2.638l-0.529,1.277c0.525,1.01,1.472,1.86,2.726,2.254
-						C193.179,513.146,196.567,513.754,199.972,513.979z"/>
-					<path fill="#988A38" d="M199.978,513.89c2.618,0.174,4.683-1.745,4.698-4.067c0.004-0.22-0.172-0.399-0.382-0.4
-						c-0.211-0.001-0.378,0.174-0.389,0.392c-0.07,1.901-1.768,3.406-3.875,3.269c-3.336-0.221-6.654-0.816-9.876-1.833
-						c-1.325-0.415-2.223-1.446-2.497-2.604l-0.457,1.104c0.505,1.017,1.449,1.874,2.71,2.271
-						C193.199,513.058,196.58,513.664,199.978,513.89z"/>
-					<path fill="#988A38" d="M199.983,513.799c2.511,0.166,4.501-1.604,4.598-3.813c0.011-0.184-0.135-0.336-0.311-0.342
-						c-0.177-0.003-0.319,0.137-0.335,0.315c-0.132,1.866-1.817,3.296-3.908,3.16c-3.339-0.222-6.66-0.817-9.885-1.835
-						c-1.324-0.416-2.223-1.424-2.516-2.565l-0.384,0.928c0.486,1.024,1.424,1.889,2.695,2.288
-						C193.219,512.969,196.592,513.574,199.983,513.799z"/>
-					<path fill="#988A38" d="M199.989,513.711c2.403,0.158,4.31-1.468,4.497-3.562c0.016-0.146-0.098-0.273-0.24-0.284
-						c-0.143-0.009-0.262,0.101-0.279,0.244c-0.204,1.824-1.871,3.184-3.942,3.048c-3.342-0.222-6.666-0.818-9.895-1.836
-						c-1.323-0.416-2.222-1.403-2.533-2.532l-0.313,0.755c0.468,1.032,1.401,1.903,2.679,2.306
-						C193.238,512.882,196.605,513.486,199.989,513.711z"/>
-					<path fill="#FEE676" d="M199.995,513.621c2.296,0.15,4.092-1.334,4.396-3.306c0.019-0.112-0.061-0.214-0.167-0.228
-						c-0.108-0.013-0.204,0.062-0.223,0.17c-0.298,1.779-1.923,3.072-3.977,2.938c-3.345-0.222-6.672-0.819-9.904-1.838
-						c-1.323-0.416-2.22-1.383-2.551-2.496l-0.24,0.579c0.449,1.039,1.376,1.918,2.663,2.323
-						C193.258,512.794,196.618,513.397,199.995,513.621z"/>
-					<path fill="#FEE676" d="M200.001,513.531c2.189,0.142,3.849-1.211,4.293-3.052c0.02-0.077-0.023-0.15-0.096-0.17
-						c-0.073-0.019-0.146,0.025-0.166,0.097c-0.431,1.725-1.976,2.959-4.012,2.828c-3.348-0.223-6.678-0.82-9.913-1.84
-						c-1.323-0.416-2.22-1.362-2.57-2.461l-0.167,0.403c0.43,1.049,1.352,1.935,2.647,2.342
-						C193.277,512.706,196.63,513.309,200.001,513.531z"/>
-					<path d="M188.257,509.412c-0.214-0.365-0.355-0.765-0.417-1.176l-0.011-0.005l-0.275,0.664
-						c0.302,0.161,0.552,0.349,0.693,0.526L188.257,509.412z"/>
-					<path fill="#988A38" d="M200.606,504.284c1.14,0.074,2.159,0.639,2.962,1.52c0.797,0.881,1.379,2.083,1.457,3.447
-						c0.022,0.441,0.393,0.779,0.825,0.749c0.432-0.033,0.748-0.422,0.706-0.862c-0.174-1.798-1-3.359-2.051-4.495
-						c-1.064-1.139-2.357-1.859-3.794-1.954c-2.538-0.167-5.054-0.624-7.422-1.372c-0.835-0.263-1.689-0.278-2.555-0.098
-						l-0.743,1.793c0.932-0.362,1.891-0.462,2.816-0.17C195.307,503.631,197.946,504.108,200.606,504.284z"/>
-					<path fill="#988A38" d="M200.614,504.156c1.126,0.073,2.148,0.621,2.964,1.479c0.809,0.858,1.414,2.031,1.525,3.375
-						c0.029,0.395,0.356,0.689,0.738,0.659c0.381-0.033,0.66-0.382,0.614-0.774c-0.199-1.727-1.017-3.218-2.05-4.302
-						c-1.044-1.085-2.308-1.771-3.698-1.862c-2.541-0.168-5.06-0.626-7.431-1.374c-0.835-0.264-1.693-0.276-2.562-0.089l-0.663,1.6
-						c0.93-0.348,1.879-0.436,2.795-0.147C195.334,503.505,197.963,503.981,200.614,504.156z"/>
-					<path fill="#988A38" d="M200.623,504.028c1.113,0.072,2.136,0.602,2.962,1.437c0.82,0.835,1.447,1.979,1.594,3.304
-						c0.036,0.345,0.321,0.601,0.652,0.568c0.332-0.035,0.575-0.344,0.527-0.688c-0.226-1.656-1.038-3.072-2.053-4.106
-						c-1.025-1.029-2.257-1.682-3.601-1.77c-2.544-0.168-5.067-0.626-7.442-1.376c-0.835-0.263-1.696-0.272-2.57-0.081l-0.582,1.405
-						c0.926-0.331,1.867-0.41,2.773-0.125C195.363,503.379,197.982,503.854,200.623,504.028z"/>
-					<path fill="#988A38" d="M200.631,503.899c1.101,0.071,2.123,0.584,2.959,1.395c0.83,0.812,1.479,1.929,1.664,3.234
-						c0.04,0.295,0.285,0.511,0.567,0.479c0.282-0.038,0.488-0.307,0.441-0.602c-0.257-1.588-1.062-2.932-2.058-3.912
-						c-1.005-0.977-2.207-1.591-3.503-1.677c-2.548-0.168-5.073-0.627-7.452-1.378c-0.836-0.264-1.699-0.269-2.577-0.074
-						l-0.501,1.211c0.922-0.312,1.856-0.384,2.751-0.102C195.391,503.253,198,503.726,200.631,503.899z"/>
-					<path fill="#988A38" d="M200.64,503.771c1.087,0.07,2.108,0.563,2.954,1.352c0.839,0.787,1.509,1.878,1.735,3.165
-						c0.042,0.245,0.249,0.423,0.484,0.388c0.234-0.037,0.401-0.27,0.354-0.515c-0.29-1.519-1.088-2.794-2.065-3.72
-						c-0.985-0.925-2.153-1.5-3.403-1.584c-2.551-0.168-5.08-0.627-7.461-1.38c-0.836-0.263-1.704-0.264-2.584-0.064l-0.421,1.017
-						c0.918-0.296,1.844-0.357,2.73-0.078C195.419,503.127,198.019,503.598,200.64,503.771z"/>
-					<path fill="#988A38" d="M200.648,503.644c2.111,0.149,4.162,1.871,4.753,4.401c0.042,0.198,0.213,0.337,0.4,0.299
-						c0.188-0.037,0.316-0.229,0.271-0.429c-0.331-1.447-1.123-2.654-2.079-3.524c-0.963-0.872-2.094-1.409-3.296-1.489
-						c-2.554-0.169-5.086-0.628-7.472-1.382c-0.837-0.264-1.707-0.262-2.592-0.058l-0.341,0.822
-						c0.917-0.281,1.832-0.331,2.708-0.055C195.447,503.002,198.037,503.471,200.648,503.644z"/>
-					<path fill="#FEE676" d="M200.656,503.516c2.088,0.146,4.108,1.807,4.817,4.289c0.04,0.148,0.178,0.247,0.318,0.208
-						c0.141-0.038,0.229-0.191,0.186-0.343c-0.382-1.372-1.168-2.509-2.101-3.328c-0.938-0.818-2.028-1.321-3.183-1.396
-						c-2.558-0.169-5.093-0.63-7.482-1.384c-0.837-0.264-1.711-0.259-2.598-0.054l-0.261,0.631c0.913-0.264,1.82-0.305,2.687-0.032
-						C195.476,502.876,198.055,503.344,200.656,503.516z"/>
-					<path fill="#FEE676" d="M200.665,503.388c2.068,0.144,4,1.763,4.879,4.177c0.036,0.104,0.142,0.157,0.237,0.117
-						c0.095-0.039,0.142-0.154,0.104-0.258c-0.962-2.568-3.014-4.286-5.195-4.436c-2.561-0.17-5.1-0.63-7.493-1.386
-						c-0.838-0.264-1.713-0.256-2.605-0.045l-0.181,0.437c0.909-0.249,1.808-0.28,2.665-0.01
-						C195.503,502.75,198.073,503.216,200.665,503.388z"/>
-					<path d="M192.325,504.368c2.631,0.83,5.393,1.33,8.176,1.514c1.589,0.11,3.16,1.573,3.23,3.418
-						c-0.076-2.126-1.396-3.801-3.198-3.917c-2.745-0.182-5.469-0.674-8.058-1.491c-1.129-0.355-2.257,0.004-3.19,0.821
-						l-0.139,0.336C190.091,504.331,191.291,504.045,192.325,504.368z"/>
-					<path d="M203.257,511.128c-0.209,0.363-0.485,0.675-0.815,0.938l0.005,0.014c0.828-0.462,3.538-0.444,3.239,1.143
-						c0.427-0.68,0.698-1.445,0.833-2.252C205.433,511.064,204.345,511.119,203.257,511.128z"/>
-					<path d="M199.835,516.06c2.125,0.139,4.002-0.661,5.201-1.962c1.212-1.303,1.75-3.087,1.567-4.965
-						c0.171,1.74-0.447,3.405-1.694,4.617c-1.233,1.209-3.092,1.94-5.041,1.811c-3.526-0.234-7.036-0.863-10.462-1.943
-						c-1.188-0.373-2.232-1.063-3.012-1.921l-0.192,0.464c0.77,0.851,1.812,1.543,3.054,1.935
-						C192.723,515.188,196.271,515.823,199.835,516.06z"/>
-				</g>
-				<g>
-					<path fill="#988A38" d="M217.193,504.596l-1.62-3.688c-1.043,1.672-1.554,4.125-0.789,6.532
-						c0.912-0.298,1.809-0.637,2.692-1.008C217.221,505.826,217.149,505.195,217.193,504.596z"/>
-					<path d="M217.477,506.433c-0.23-0.545-0.274-1.123-0.213-1.677l-0.146-0.332C217.091,505.071,217.193,505.761,217.477,506.433z
-						"/>
-					<path fill="#988A38" d="M217.477,506.433c-0.884,0.371-1.781,0.71-2.692,1.008c0.76,2.397,2.859,4.063,5.475,4.136l-1.62-3.688
-						C218.139,507.529,217.732,507.04,217.477,506.433z"/>
-					<g>
-						<g>
-							<path fill="#988A38" d="M214.829,507.426c0.137,0.421,0.583,0.649,0.993,0.506c0.408-0.146,0.61-0.605,0.452-1.018
-								c-0.544-1.454-0.398-2.966,0.103-4.177l-0.781-1.776C214.571,502.623,214.07,505.05,214.829,507.426z"/>
-							<path fill="#988A38" d="M214.821,507.163c0.116,0.379,0.512,0.584,0.875,0.462c0.363-0.124,0.542-0.527,0.409-0.898
-								c-0.499-1.449-0.304-2.941,0.208-4.133l-0.696-1.585C214.639,502.6,214.138,504.898,214.821,507.163z"/>
-							<path fill="#988A38" d="M214.814,506.9c0.094,0.333,0.439,0.519,0.758,0.418c0.316-0.104,0.474-0.453,0.368-0.781
-								c-0.453-1.443-0.213-2.917,0.31-4.087l-0.612-1.393C214.709,502.578,214.208,504.748,214.814,506.9z"/>
-							<path fill="#988A38" d="M214.803,506.638c0.076,0.29,0.371,0.455,0.646,0.376c0.272-0.085,0.408-0.381,0.326-0.667
-								c-0.404-1.439-0.121-2.889,0.413-4.042l-0.527-1.199C214.778,502.555,214.277,504.596,214.803,506.638z"/>
-							<path fill="#988A38" d="M214.793,506.377c0.057,0.243,0.301,0.39,0.53,0.329c0.229-0.063,0.347-0.307,0.286-0.548
-								c-0.351-1.439-0.032-2.865,0.514-3.997l-0.442-1.008C214.848,502.529,214.351,504.44,214.793,506.377z"/>
-							<path fill="#988A38" d="M214.781,506.114c0.039,0.199,0.232,0.326,0.419,0.286c0.184-0.045,0.287-0.237,0.245-0.436
-								c-0.292-1.437,0.058-2.837,0.615-3.948l-0.358-0.815C214.918,502.506,214.43,504.283,214.781,506.114z"/>
-							<path fill="#FEE676" d="M214.768,505.853c0.022,0.154,0.164,0.262,0.308,0.241c0.144-0.023,0.23-0.166,0.207-0.32
-								c-0.218-1.438,0.145-2.811,0.715-3.9l-0.274-0.625C214.989,502.484,214.521,504.123,214.768,505.853z"/>
-							<path fill="#FEE676" d="M214.754,505.591c0.008,0.11,0.096,0.197,0.199,0.195c0.102-0.002,0.177-0.096,0.168-0.204
-								c-0.111-1.439,0.231-2.784,0.813-3.853l-0.19-0.433C215.06,502.46,214.642,503.962,214.754,505.591z"/>
-						</g>
-						<g>
-							<path fill="#988A38" d="M217.431,506.528c-0.128-0.304-0.461-0.449-0.75-0.336c-0.285,0.115-0.426,0.454-0.311,0.76
-								c0.53,1.406,1.659,2.408,3.073,2.768l-0.707-1.61C218.168,507.736,217.711,507.197,217.431,506.528z"/>
-							<path fill="#988A38" d="M217.463,506.677c-0.119-0.269-0.414-0.396-0.673-0.292c-0.253,0.106-0.376,0.412-0.269,0.683
-								c0.514,1.29,1.574,2.206,2.877,2.55l-0.631-1.438C218.208,507.826,217.748,507.316,217.463,506.677z"/>
-							<path fill="#988A38" d="M217.495,506.823c-0.109-0.231-0.368-0.34-0.593-0.244c-0.223,0.096-0.328,0.367-0.229,0.603
-								c0.497,1.172,1.486,2.003,2.681,2.335l-0.556-1.266C218.25,507.915,217.784,507.436,217.495,506.823z"/>
-							<path fill="#988A38" d="M217.527,506.973c-0.099-0.197-0.322-0.287-0.515-0.2c-0.191,0.085-0.278,0.321-0.188,0.521
-								c0.482,1.058,1.398,1.807,2.485,2.122l-0.48-1.094C218.291,508.005,217.822,507.559,217.527,506.973z"/>
-							<path fill="#988A38" d="M217.557,507.119c-0.088-0.159-0.274-0.23-0.435-0.153c-0.159,0.074-0.228,0.275-0.146,0.438
-								c0.467,0.942,1.31,1.608,2.289,1.909l-0.404-0.92C218.333,508.096,217.861,507.677,217.557,507.119z"/>
-							<path fill="#988A38" d="M217.589,507.269c-0.077-0.128-0.228-0.179-0.355-0.11c-0.126,0.066-0.177,0.229-0.105,0.357
-								c0.455,0.826,1.217,1.409,2.091,1.695l-0.329-0.748C218.374,508.184,217.902,507.795,217.589,507.269z"/>
-							<path fill="#FEE676" d="M217.621,507.418c-0.063-0.093-0.181-0.126-0.275-0.067c-0.093,0.056-0.122,0.183-0.062,0.276
-								c0.45,0.706,1.125,1.209,1.892,1.482l-0.252-0.574C218.416,508.275,217.95,507.912,217.621,507.418z"/>
-							<path fill="#FEE676" d="M217.651,507.565c-0.048-0.06-0.133-0.068-0.192-0.022c-0.06,0.048-0.069,0.133-0.021,0.194
-								c0.452,0.574,1.033,1.014,1.692,1.269l-0.175-0.399C218.456,508.363,218.011,508.017,217.651,507.565z"/>
-						</g>
-					</g>
-					<g>
-						<path d="M214.784,507.44c0.796,2.507,2.893,4.282,5.575,4.363l-0.202-0.46C217.607,511.277,215.506,509.721,214.784,507.44z"
-							/>
-					</g>
-					<path d="M215.598,509.09c0.447,0.686,1.005,1.276,1.675,1.725c-0.713-1.027,0.608-1.897,1.665-2.246l-0.289-0.658
-						C217.649,508.351,216.63,508.741,215.598,509.09z"/>
-					<path fill="#988A38" d="M201.391,507.851c-0.76-0.028-1.429,0.592-1.497,1.417c-0.067,0.824,0.55,1.55,1.38,1.581
-						c5.881,0.223,11.84-0.783,17.273-3.168l-1.206-2.747C212.369,507.119,206.847,508.06,201.391,507.851z"/>
-					<path d="M201.284,510.6c-0.688-0.023-1.447-0.645-1.39-1.332c-0.077,0.96,0.397,1.792,1.371,1.831
-						c5.916,0.225,11.912-0.787,17.384-3.188l-0.201-0.458C213.052,509.82,207.129,510.821,201.284,510.6z"/>
-					<g>
-						<path fill="#988A38" d="M201.203,508.042c-0.408-0.016-0.763,0.317-0.792,0.761c-0.029,0.438,0.294,0.822,0.722,0.839
-							c5.772,0.248,11.625-0.729,16.933-3.06l-0.644-1.466C212.364,507.338,206.746,508.281,201.203,508.042z"/>
-						<path fill="#988A38" d="M201.625,508.118c-0.366-0.011-0.679,0.292-0.7,0.687c-0.021,0.392,0.271,0.729,0.653,0.74
-							c5.612,0.182,11.287-0.806,16.441-3.066l-0.574-1.307C212.508,507.339,207.038,508.294,201.625,508.118z"/>
-						<path fill="#988A38" d="M202.047,508.188c-0.322-0.007-0.595,0.265-0.609,0.612c-0.014,0.347,0.248,0.637,0.582,0.645
-							c5.453,0.115,10.951-0.873,15.955-3.069l-0.506-1.151C212.652,507.341,207.33,508.301,202.047,508.188z"/>
-						<path fill="#988A38" d="M202.473,508.254c-0.28-0.005-0.513,0.235-0.521,0.535c-0.008,0.301,0.22,0.547,0.509,0.552
-							c5.292,0.053,10.616-0.938,15.468-3.067l-0.437-0.995C212.796,507.34,207.622,508.307,202.473,508.254z"/>
-						<path fill="#988A38" d="M202.898,508.314c-0.235-0.001-0.43,0.205-0.433,0.457c-0.003,0.253,0.191,0.458,0.433,0.459
-							c5.133-0.004,10.283-0.995,14.986-3.06l-0.369-0.839C212.941,507.341,207.913,508.312,202.898,508.314z"/>
-						<path fill="#FEE676" d="M203.325,508.372c-0.191,0.002-0.347,0.169-0.347,0.374c0,0.207,0.16,0.371,0.355,0.369
-							c4.973-0.059,9.952-1.047,14.505-3.048l-0.298-0.68C213.088,507.345,208.205,508.315,203.325,508.372z"/>
-						<path fill="#FEE676" d="M203.754,508.423c-0.147,0.005-0.267,0.135-0.264,0.292c0.002,0.157,0.127,0.283,0.277,0.278
-							c4.813-0.107,9.621-1.096,14.026-3.03l-0.229-0.521C213.233,507.343,208.498,508.317,203.754,508.423z"/>
-						<path fill="#FEE676" d="M204.184,508.471c-0.104,0.005-0.186,0.096-0.183,0.206c0.003,0.11,0.091,0.198,0.196,0.194
-							c4.654-0.157,9.292-1.139,13.552-3.01l-0.161-0.366C213.38,507.344,208.791,508.316,204.184,508.471z"/>
-					</g>
-				</g>
-			</g>
-			<g>
-				<g>
-					<path fill="#988A38" d="M220.95,504.804c-0.414-0.718-1.278-0.995-1.95-0.654c-0.545,0.276-1.098,0.538-1.658,0.784
-						l1.207,2.747c0.612-0.27,1.217-0.556,1.813-0.858C221.109,506.443,221.363,505.521,220.95,504.804z"/>
-					<g>
-						<path fill="#988A38" d="M220.351,504.566c-0.216-0.389-0.676-0.541-1.037-0.355c-0.62,0.322-1.251,0.624-1.892,0.905
-							l0.644,1.466c0.672-0.295,1.334-0.611,1.986-0.95C220.434,505.436,220.564,504.951,220.351,504.566z"/>
-						<path fill="#988A38" d="M220.066,504.689c-0.187-0.346-0.6-0.488-0.923-0.322c-0.558,0.284-1.124,0.552-1.697,0.804
-							l0.574,1.308c0.599-0.264,1.189-0.543,1.772-0.84C220.132,505.464,220.255,505.037,220.066,504.689z"/>
-						<path fill="#988A38" d="M219.781,504.812c-0.162-0.308-0.524-0.435-0.812-0.291c-0.493,0.249-0.994,0.481-1.5,0.704
-							l0.506,1.151c0.525-0.231,1.046-0.473,1.558-0.732C219.834,505.494,219.944,505.119,219.781,504.812z"/>
-						<path fill="#988A38" d="M219.496,504.93c-0.137-0.267-0.451-0.381-0.701-0.258c-0.429,0.212-0.863,0.414-1.302,0.606
-							l0.437,0.994c0.453-0.198,0.902-0.407,1.345-0.626C219.535,505.519,219.635,505.197,219.496,504.93z"/>
-						<path fill="#988A38" d="M219.21,505.047c-0.115-0.228-0.378-0.327-0.59-0.224c-0.365,0.175-0.732,0.347-1.104,0.509
-							l0.369,0.839c0.381-0.167,0.759-0.343,1.134-0.523C219.238,505.541,219.323,505.272,219.21,505.047z"/>
-						<path fill="#FEE676" d="M218.922,505.158c-0.09-0.184-0.304-0.267-0.479-0.185l-0.902,0.413l0.299,0.681l0.923-0.422
-							C218.942,505.561,219.013,505.345,218.922,505.158z"/>
-						<path fill="#FEE676" d="M218.634,505.27c-0.07-0.146-0.235-0.21-0.369-0.146l-0.701,0.316l0.229,0.522l0.713-0.322
-							C218.644,505.575,218.703,505.414,218.634,505.27z"/>
-						<path fill="#FEE676" d="M218.345,505.378c-0.048-0.103-0.164-0.149-0.26-0.105l-0.496,0.223l0.161,0.366l0.502-0.226
-							C218.349,505.592,218.393,505.479,218.345,505.378z"/>
-					</g>
-					<path d="M220.95,504.804c0.346,0.597-0.083,1.485-0.701,1.796c-0.592,0.302-1.193,0.585-1.8,0.853l0.201,0.458
-						c0.616-0.271,1.226-0.56,1.827-0.864C221.351,506.598,221.429,505.639,220.95,504.804z"/>
-					<path fill="#988A38" d="M232.555,504.047c1.493-1.526,2.141-3.527,1.936-5.291c-0.19-1.775-1.201-3.299-2.738-4.269
-						c-1.543-0.975-3.288-1.284-4.821-1.206c-1.539,0.094-2.87,0.568-3.817,1.54c-1.663,1.702-3.589,3.155-5.716,4.325
-						c-0.751,0.411-1.347,1.018-1.823,1.761l1.62,3.688c0.095-1.21,0.666-2.279,1.65-2.82c2.373-1.306,4.541-2.938,6.415-4.857
-						c1.146-1.166,3.245-1.269,4.84-0.081c1.59,1.181,1.827,3.562,0.311,5.113c-2.38,2.438-5.13,4.505-8.093,6.133
-						c-1.244,0.685-2.675,0.538-3.676-0.195l1.62,3.688c1.148,0.037,2.369-0.243,3.503-0.865
-						C226.971,508.948,229.964,506.702,232.555,504.047z"/>
-					<path fill="#988A38" d="M231.37,502.89c1.048-1.073,1.517-2.481,1.399-3.765c-0.111-1.29-0.794-2.446-1.887-3.222
-						c-0.267-0.19-0.631-0.143-0.813,0.105c-0.186,0.246-0.127,0.61,0.132,0.812c1.628,1.202,1.875,3.642,0.317,5.237
-						c-2.391,2.449-5.154,4.524-8.129,6.159c-1.228,0.676-2.638,0.569-3.653-0.107l0.707,1.609c1.105,0.288,2.373,0.171,3.52-0.459
-						C226.036,507.571,228.895,505.425,231.37,502.89z"/>
-					<path fill="#988A38" d="M231.306,502.828c1.01-1.034,1.477-2.383,1.387-3.621c-0.083-1.243-0.71-2.365-1.738-3.131
-						c-0.235-0.174-0.56-0.132-0.728,0.088c-0.168,0.219-0.117,0.541,0.11,0.726c1.564,1.212,1.754,3.612,0.208,5.195
-						c-2.394,2.452-5.159,4.529-8.137,6.166c-1.227,0.675-2.623,0.583-3.641-0.071l0.631,1.438c1.099,0.307,2.368,0.199,3.522-0.436
-						C225.986,507.497,228.837,505.356,231.306,502.828z"/>
-					<path fill="#988A38" d="M231.243,502.765c0.971-0.994,1.435-2.282,1.372-3.474c-0.056-1.194-0.627-2.283-1.587-3.038
-						c-0.202-0.159-0.489-0.123-0.639,0.069c-0.153,0.188-0.11,0.471,0.083,0.635c1.501,1.227,1.635,3.584,0.102,5.153
-						c-2.396,2.454-5.165,4.534-8.146,6.173c-1.227,0.675-2.606,0.598-3.627-0.032l0.556,1.266c1.091,0.324,2.36,0.227,3.523-0.412
-						C225.936,507.424,228.781,505.287,231.243,502.765z"/>
-					<path fill="#988A38" d="M231.178,502.702c0.932-0.956,1.393-2.183,1.356-3.326c-0.03-1.146-0.546-2.201-1.438-2.948
-						c-0.169-0.141-0.417-0.112-0.551,0.05c-0.135,0.161-0.102,0.403,0.061,0.549c1.437,1.241,1.515,3.555-0.006,5.11
-						c-2.399,2.458-5.171,4.539-8.156,6.18c-1.227,0.675-2.591,0.61-3.615,0.004l0.48,1.095c1.083,0.343,2.354,0.253,3.524-0.39
-						C225.885,507.349,228.722,505.218,231.178,502.702z"/>
-					<path fill="#988A38" d="M231.114,502.639c0.894-0.918,1.349-2.083,1.339-3.177c-0.008-1.1-0.464-2.114-1.287-2.857
-						c-0.137-0.124-0.345-0.103-0.462,0.029c-0.115,0.135-0.094,0.335,0.035,0.46c1.37,1.269,1.396,3.524-0.113,5.068
-						c-2.401,2.46-5.176,4.545-8.164,6.187c-1.227,0.675-2.575,0.626-3.601,0.045l0.404,0.919c1.075,0.361,2.347,0.281,3.526-0.366
-						C225.834,507.273,228.664,505.148,231.114,502.639z"/>
-					<path fill="#988A38" d="M231.05,502.577c1.742-1.783,1.704-4.347,0.185-5.795c-0.104-0.104-0.274-0.094-0.373,0.009
-						c-0.099,0.105-0.086,0.269,0.014,0.371c1.293,1.3,1.273,3.497-0.222,5.027c-2.404,2.463-5.182,4.549-8.173,6.193
-						c-1.227,0.674-2.56,0.64-3.589,0.08l0.329,0.748c1.068,0.381,2.341,0.309,3.527-0.343
-						C225.784,507.199,228.607,505.079,231.05,502.577z"/>
-					<path fill="#FEE676" d="M230.986,502.515c1.661-1.703,1.667-4.087,0.318-5.553c-0.075-0.085-0.206-0.087-0.284-0.013
-						c-0.08,0.075-0.08,0.199-0.008,0.282c1.198,1.346,1.152,3.468-0.331,4.985c-2.407,2.466-5.188,4.555-8.182,6.2
-						c-1.227,0.674-2.543,0.652-3.576,0.118l0.252,0.574c1.06,0.397,2.335,0.336,3.529-0.319
-						C225.734,507.125,228.549,505.011,230.986,502.515z"/>
-					<path fill="#FEE676" d="M230.922,502.451c1.581-1.623,1.605-3.813,0.448-5.312c-0.047-0.063-0.132-0.075-0.193-0.031
-						c-0.061,0.045-0.072,0.131-0.03,0.19c1.072,1.417,1.03,3.439-0.439,4.945c-2.41,2.468-5.194,4.56-8.191,6.207
-						c-1.226,0.673-2.528,0.666-3.562,0.156l0.175,0.399c1.054,0.419,2.329,0.365,3.531-0.295
-						C225.683,507.051,228.491,504.941,230.922,502.451z"/>
-					<path d="M219.798,508.421c-0.414-0.104-0.796-0.271-1.137-0.516l-0.012,0.005l0.29,0.658c0.326-0.103,0.632-0.158,0.859-0.134
-						V508.421z"/>
-					<path fill="#988A38" d="M224.294,495.975c0.76-0.779,1.811-1.165,2.987-1.216c1.171-0.044,2.472,0.252,3.588,1.041
-						c0.357,0.256,0.856,0.178,1.099-0.185c0.245-0.36,0.13-0.852-0.243-1.088c-1.527-0.967-3.259-1.276-4.782-1.199
-						c-1.528,0.093-2.854,0.564-3.794,1.529c-1.667,1.706-3.596,3.162-5.729,4.334c-0.752,0.412-1.35,1.021-1.825,1.769l0.781,1.777
-						c0.388-0.922,0.978-1.686,1.815-2.145C220.456,499.349,222.515,497.796,224.294,495.975z"/>
-					<path fill="#988A38" d="M224.202,495.886c0.751-0.77,1.789-1.167,2.955-1.24c1.159-0.067,2.453,0.195,3.573,0.942
-						c0.325,0.222,0.764,0.152,0.974-0.171c0.212-0.319,0.106-0.757-0.231-0.961c-1.484-0.903-3.158-1.181-4.626-1.085
-						c-1.477,0.108-2.754,0.583-3.667,1.516c-1.669,1.709-3.603,3.167-5.738,4.342c-0.752,0.411-1.35,1.024-1.824,1.78l0.696,1.585
-						c0.397-0.909,0.988-1.659,1.817-2.113C220.383,499.242,222.432,497.698,224.202,495.886z"/>
-					<path fill="#988A38" d="M224.11,495.795c0.741-0.759,1.766-1.167,2.919-1.263c1.147-0.089,2.431,0.139,3.558,0.846
-						c0.29,0.185,0.673,0.125,0.849-0.158c0.18-0.281,0.083-0.664-0.216-0.838c-1.443-0.838-3.058-1.081-4.472-0.966
-						c-1.424,0.123-2.654,0.6-3.537,1.501c-1.673,1.712-3.609,3.173-5.749,4.35c-0.753,0.411-1.35,1.029-1.823,1.79l0.612,1.393
-						c0.407-0.896,0.998-1.633,1.818-2.081C220.311,499.135,222.349,497.599,224.11,495.795z"/>
-					<path fill="#988A38" d="M224.018,495.705c0.732-0.749,1.741-1.168,2.882-1.284c1.136-0.109,2.408,0.084,3.546,0.75
-						c0.255,0.151,0.579,0.096,0.725-0.147c0.147-0.244,0.06-0.572-0.202-0.717c-1.407-0.772-2.959-0.979-4.32-0.847
-						c-1.372,0.139-2.554,0.617-3.408,1.486c-1.675,1.716-3.616,3.179-5.758,4.357c-0.754,0.412-1.35,1.034-1.823,1.801l0.527,1.2
-						c0.417-0.88,1.009-1.606,1.82-2.05C220.238,499.027,222.266,497.499,224.018,495.705z"/>
-					<path fill="#988A38" d="M223.926,495.615c0.722-0.739,1.715-1.167,2.842-1.304c1.123-0.13,2.383,0.029,3.535,0.653
-						c0.218,0.119,0.486,0.068,0.603-0.139c0.116-0.205,0.035-0.479-0.188-0.594c-1.375-0.709-2.861-0.875-4.172-0.729
-						c-1.316,0.156-2.453,0.633-3.274,1.472c-1.678,1.719-3.622,3.185-5.768,4.364c-0.754,0.413-1.35,1.04-1.822,1.813l0.442,1.008
-						c0.427-0.867,1.02-1.58,1.821-2.019C220.166,498.921,222.183,497.399,223.926,495.615z"/>
-					<path fill="#988A38" d="M223.834,495.526c0.712-0.73,1.688-1.164,2.799-1.319c1.109-0.149,2.357-0.022,3.523,0.55
-						c0.184,0.092,0.396,0.043,0.48-0.127c0.087-0.17,0.015-0.387-0.171-0.475c-1.344-0.635-2.77-0.766-4.025-0.603
-						c-1.258,0.174-2.348,0.645-3.138,1.454c-1.682,1.721-3.629,3.189-5.778,4.372c-0.755,0.412-1.35,1.044-1.822,1.822l0.358,0.815
-						c0.436-0.855,1.03-1.553,1.823-1.986C220.093,498.814,222.101,497.301,223.834,495.526z"/>
-					<path fill="#FEE676" d="M223.743,495.437c0.703-0.72,1.657-1.155,2.754-1.326c1.095-0.164,2.331-0.068,3.517,0.442
-						c0.142,0.062,0.301,0.013,0.358-0.119c0.056-0.134-0.011-0.296-0.156-0.356c-1.316-0.548-2.684-0.646-3.883-0.468
-						c-1.201,0.188-2.24,0.648-2.999,1.428c-1.685,1.725-3.635,3.195-5.788,4.38c-0.755,0.412-1.351,1.05-1.823,1.83l0.275,0.625
-						c0.446-0.84,1.04-1.525,1.824-1.954C220.021,498.708,222.018,497.202,223.743,495.437z"/>
-					<path fill="#FEE676" d="M223.651,495.347c0.693-0.711,1.624-1.135,2.702-1.314c1.078-0.173,2.303-0.104,3.514,0.317
-						c0.103,0.035,0.209-0.015,0.236-0.113c0.028-0.098-0.034-0.204-0.138-0.239c-1.295-0.434-2.607-0.501-3.747-0.313
-						c-1.146,0.192-2.126,0.637-2.854,1.383c-1.688,1.729-3.642,3.202-5.798,4.388c-0.755,0.413-1.351,1.054-1.821,1.843
-						l0.189,0.432c0.456-0.826,1.051-1.5,1.825-1.923C219.949,498.601,221.935,497.104,223.651,495.347z"/>
-					<path d="M218.965,501.995c2.394-1.316,4.583-2.965,6.474-4.901c1.08-1.091,3.176-1.357,4.66-0.257
-						c-1.707-1.275-3.807-1.332-5.018-0.093c-1.856,1.901-4.004,3.52-6.357,4.813c-1.025,0.562-1.549,1.626-1.606,2.866l0.146,0.332
-						C217.401,503.58,218.026,502.514,218.965,501.995z"/>
-					<path d="M231.215,498.362c0.144,0.393,0.21,0.81,0.197,1.235l0.013,0.005c0.194-0.937,1.915-3.076,2.969-1.856
-						c-0.263-0.768-0.692-1.455-1.247-2.062C232.551,496.61,231.902,497.501,231.215,498.362z"/>
-					<path d="M232.734,504.222c1.557-1.594,2.181-3.629,1.931-5.407c-0.237-1.792-1.32-3.321-2.913-4.327
-						c1.477,0.931,2.415,2.451,2.559,4.212c0.159,1.749-0.509,3.718-1.935,5.173c-2.574,2.637-5.546,4.868-8.734,6.62
-						c-1.107,0.607-2.329,0.888-3.485,0.852l0.202,0.46c1.143,0.038,2.366-0.238,3.524-0.873
-						C227.112,509.156,230.125,506.895,232.734,504.222z"/>
-				</g>
-				<g>
-					<path fill="#988A38" d="M234.206,483.447l-3.998-0.488c0.959,1.374,2.665,3.214,5.075,3.971
-						c0.283-0.914,0.499-1.848,0.645-2.792C235.277,484.039,234.703,483.771,234.206,483.447z"/>
-					<path d="M235.927,484.138c-0.584-0.089-1.099-0.351-1.547-0.669l-0.36-0.044C234.567,483.756,235.207,484.028,235.927,484.138z
-						"/>
-					<path fill="#988A38" d="M235.927,484.138c-0.146,0.944-0.362,1.878-0.645,2.792c2.379,0.754,5.026,0.018,6.491-2.56
-						l-3.998-0.487C237.211,484.146,236.579,484.237,235.927,484.138z"/>
-					<g>
-						<g>
-							<path fill="#988A38" d="M235.297,486.884c0.424,0.132,0.866-0.105,0.982-0.53c0.112-0.427-0.159-0.853-0.592-0.947
-								c-1.513-0.349-2.691-1.293-3.495-2.205l-1.927-0.234C231.225,484.329,232.914,486.142,235.297,486.884z"/>
-							<path fill="#988A38" d="M235.078,486.74c0.375,0.123,0.771-0.085,0.877-0.456c0.102-0.373-0.133-0.747-0.517-0.841
-								c-1.484-0.387-2.62-1.356-3.402-2.261l-1.719-0.21C231.236,484.277,232.829,486.002,235.078,486.74z"/>
-							<path fill="#988A38" d="M234.857,486.596c0.327,0.114,0.677-0.062,0.775-0.382c0.09-0.321-0.109-0.646-0.442-0.739
-								c-1.453-0.424-2.55-1.415-3.311-2.312l-1.51-0.184C231.249,484.226,232.746,485.861,234.857,486.596z"/>
-							<path fill="#988A38" d="M234.635,486.455c0.281,0.104,0.585-0.041,0.675-0.312c0.083-0.271-0.088-0.548-0.371-0.638
-								c-1.422-0.464-2.476-1.472-3.217-2.361l-1.301-0.159C231.261,484.174,232.662,485.721,234.635,486.455z"/>
-							<path fill="#988A38" d="M234.415,486.315c0.232,0.092,0.492-0.021,0.572-0.242c0.075-0.223-0.061-0.454-0.296-0.538
-								c-1.39-0.509-2.406-1.527-3.125-2.41l-1.092-0.133C231.271,484.119,232.577,485.575,234.415,486.315z"/>
-							<path fill="#988A38" d="M234.193,486.178c0.186,0.08,0.399-0.003,0.472-0.177c0.065-0.174-0.037-0.363-0.225-0.438
-								c-1.353-0.559-2.334-1.581-3.029-2.456l-0.884-0.108C231.284,484.066,232.49,485.427,234.193,486.178z"/>
-							<path fill="#FEE676" d="M233.969,486.041c0.14,0.068,0.309,0.016,0.373-0.111c0.06-0.128-0.011-0.276-0.151-0.342
-								c-1.312-0.619-2.266-1.633-2.937-2.501l-0.678-0.083C231.3,484.014,232.405,485.27,233.969,486.041z"/>
-							<path fill="#FEE676" d="M233.746,485.906c0.095,0.055,0.217,0.033,0.272-0.049c0.055-0.083,0.019-0.194-0.076-0.246
-								c-1.254-0.705-2.198-1.681-2.844-2.543l-0.469-0.058C231.312,483.96,232.336,485.089,233.746,485.906z"/>
-						</g>
-						<g>
-							<path fill="#988A38" d="M235.985,484.227c-0.327-0.049-0.621,0.153-0.682,0.451c-0.054,0.297,0.15,0.597,0.47,0.669
-								c1.461,0.331,2.939-0.067,3.988-1.222l-1.746-0.213C237.403,484.231,236.701,484.339,235.985,484.227z"/>
-							<path fill="#988A38" d="M236.128,484.277c-0.293-0.039-0.551,0.143-0.604,0.412c-0.044,0.268,0.143,0.536,0.427,0.595
-								c1.358,0.278,2.72-0.118,3.699-1.173l-1.559-0.19C237.5,484.244,236.821,484.371,236.128,484.277z"/>
-							<path fill="#988A38" d="M236.27,484.327c-0.255-0.028-0.479,0.134-0.522,0.373c-0.039,0.238,0.133,0.474,0.384,0.519
-								c1.251,0.226,2.5-0.165,3.41-1.121l-1.372-0.167C237.598,484.255,236.942,484.404,236.27,484.327z"/>
-							<path fill="#988A38" d="M236.414,484.379c-0.221-0.019-0.411,0.124-0.444,0.332c-0.031,0.207,0.121,0.408,0.338,0.441
-								c1.146,0.17,2.285-0.208,3.124-1.067l-1.186-0.146C237.697,484.266,237.066,484.437,236.414,484.379z"/>
-							<path fill="#988A38" d="M236.554,484.431c-0.183-0.009-0.338,0.113-0.361,0.291c-0.023,0.174,0.109,0.342,0.291,0.362
-								c1.044,0.117,2.071-0.25,2.837-1.013l-0.997-0.122C237.795,484.277,237.187,484.466,236.554,484.431z"/>
-							<path fill="#988A38" d="M236.698,484.482c-0.149-0.002-0.27,0.103-0.282,0.247c-0.012,0.143,0.097,0.274,0.244,0.283
-								c0.94,0.062,1.851-0.286,2.55-0.955l-0.811-0.099C237.893,484.288,237.31,484.493,236.698,484.482z"/>
-							<path fill="#FEE676" d="M236.841,484.535c-0.113,0.006-0.201,0.091-0.204,0.202c-0.003,0.11,0.087,0.204,0.199,0.204
-								c0.837-0.003,1.631-0.324,2.263-0.897l-0.623-0.076C237.992,484.299,237.434,484.515,236.841,484.535z"/>
-							<path fill="#FEE676" d="M236.983,484.589c-0.077,0.01-0.128,0.079-0.123,0.155c0.008,0.078,0.074,0.132,0.152,0.124
-								c0.728-0.08,1.415-0.357,1.975-0.838l-0.433-0.053C238.089,484.31,237.555,484.518,236.983,484.589z"/>
-						</g>
-					</g>
-					<g>
-						<path d="M235.283,486.93c2.488,0.786,5.226,0.121,6.738-2.529l-0.499-0.061C240.101,486.843,237.545,487.648,235.283,486.93z"
-							/>
-					</g>
-					<path d="M237.101,487.207c0.819,0.02,1.629-0.114,2.382-0.453c-1.253,0.03-1.209-1.616-0.97-2.781l-0.714-0.087
-						C237.662,485.026,237.414,486.133,237.101,487.207z"/>
-					<path fill="#988A38" d="M227.5,497.706c-0.503,0.555-0.467,1.46,0.113,2.051c0.579,0.591,1.538,0.593,2.104-0.03
-						c2.008-2.2,3.696-4.686,5.029-7.337c1.305-2.644,2.417-5.445,2.804-8.534l-2.978-0.363c-0.311,2.552-1.302,5.119-2.506,7.549
-						C230.851,493.461,229.318,495.713,227.5,497.706z"/>
-					<path d="M229.533,499.559c-0.466,0.519-1.438,0.692-1.919,0.198c0.675,0.687,1.623,0.868,2.289,0.139
-						c2.023-2.218,3.726-4.723,5.068-7.393c1.313-2.662,2.435-5.483,2.829-8.617l-0.497-0.061c-0.381,3.044-1.483,5.826-2.779,8.452
-						C233.201,494.91,231.525,497.375,229.533,499.559z"/>
-					<g>
-						<path fill="#988A38" d="M227.526,497.972c-0.271,0.298-0.248,0.78,0.07,1.091c0.314,0.308,0.817,0.312,1.106-0.006
-							c1.967-2.127,3.622-4.539,4.929-7.122c1.288-2.582,2.365-5.319,2.728-8.225l-1.589-0.194
-							c-0.321,2.615-1.334,5.229-2.567,7.695C230.959,493.669,229.391,495.955,227.526,497.972z"/>
-						<path fill="#988A38" d="M227.854,497.701c-0.241,0.27-0.21,0.702,0.077,0.975c0.285,0.269,0.731,0.264,0.986-0.021
-							c1.88-2.094,3.459-4.452,4.714-6.968c1.23-2.519,2.27-5.179,2.616-7.99l-1.417-0.173c-0.311,2.561-1.293,5.107-2.477,7.526
-							C231.152,493.457,229.646,495.705,227.854,497.701z"/>
-						<path fill="#988A38" d="M228.177,497.426c-0.211,0.238-0.175,0.623,0.081,0.856c0.256,0.234,0.645,0.22,0.867-0.031
-							c1.791-2.06,3.304-4.359,4.502-6.812c1.181-2.451,2.177-5.037,2.509-7.757l-1.248-0.152c-0.302,2.505-1.248,4.988-2.39,7.355
-							C231.346,493.245,229.894,495.45,228.177,497.426z"/>
-						<path fill="#988A38" d="M228.495,497.143c-0.18,0.211-0.142,0.542,0.082,0.739c0.227,0.199,0.561,0.182,0.748-0.038
-							c1.711-2.02,3.148-4.268,4.297-6.65c1.134-2.386,2.083-4.896,2.403-7.524l-1.079-0.132c-0.295,2.448-1.201,4.869-2.303,7.184
-							C231.531,493.026,230.145,495.195,228.495,497.143z"/>
-						<path fill="#988A38" d="M228.81,496.855c-0.146,0.182-0.112,0.46,0.079,0.622c0.193,0.164,0.477,0.147,0.629-0.04
-							c1.628-1.981,3.001-4.17,4.099-6.489c1.088-2.318,1.988-4.756,2.297-7.293l-0.909-0.11c-0.29,2.391-1.153,4.748-2.216,7.011
-							C231.722,492.811,230.388,494.935,228.81,496.855z"/>
-						<path fill="#FEE676" d="M229.124,496.565c-0.116,0.15-0.09,0.371,0.071,0.503c0.158,0.13,0.39,0.114,0.51-0.04
-							c1.548-1.94,2.853-4.073,3.902-6.325c1.042-2.253,1.898-4.613,2.196-7.062l-0.737-0.09c-0.283,2.334-1.109,4.628-2.132,6.838
-							C231.908,492.591,230.633,494.672,229.124,496.565z"/>
-						<path fill="#FEE676" d="M229.432,496.268c-0.09,0.115-0.064,0.289,0.059,0.387c0.123,0.098,0.299,0.08,0.392-0.037
-							c1.471-1.898,2.711-3.974,3.71-6.159c0.996-2.188,1.808-4.471,2.095-6.83l-0.566-0.069c-0.277,2.275-1.065,4.506-2.047,6.662
-							C232.093,492.369,230.875,494.406,229.432,496.268z"/>
-						<path fill="#FEE676" d="M229.736,495.966c-0.062,0.082-0.042,0.203,0.046,0.27c0.088,0.067,0.21,0.054,0.273-0.029
-							c1.395-1.854,2.572-3.872,3.527-5.99c0.951-2.123,1.718-4.328,1.996-6.602l-0.397-0.048c-0.271,2.216-1.021,4.383-1.963,6.485
-							C232.274,492.145,231.113,494.136,229.736,495.966z"/>
-					</g>
-				</g>
-			</g>
-			<g>
-				<g>
-					<path fill="#988A38" d="M236.084,480.328c-0.818,0.116-1.414,0.756-1.41,1.445c0.005,0.568-0.031,1.143-0.101,1.718
-						l2.978,0.363c0.084-0.692,0.129-1.396,0.124-2.105C237.667,480.851,236.905,480.21,236.084,480.328z"/>
-					<g>
-						<path fill="#988A38" d="M235.628,480.779c-0.438,0.043-0.769,0.38-0.757,0.757c0.017,0.654-0.019,1.316-0.1,1.979l1.589,0.193
-							c0.089-0.733,0.13-1.476,0.111-2.217C236.458,481.055,236.07,480.735,235.628,480.779z"/>
-						<path fill="#988A38" d="M235.625,481.08c-0.397,0.028-0.69,0.324-0.692,0.664c0.006,0.589-0.031,1.184-0.103,1.778
-							l1.417,0.173c0.08-0.651,0.12-1.309,0.114-1.966C236.362,481.343,236.015,481.052,235.625,481.08z"/>
-						<path fill="#988A38" d="M235.614,481.381c-0.343,0.016-0.622,0.272-0.619,0.574c-0.005,0.522-0.042,1.049-0.106,1.575
-							l1.248,0.152c0.069-0.57,0.11-1.145,0.115-1.719C236.25,481.626,235.965,481.364,235.614,481.381z"/>
-						<path fill="#988A38" d="M235.594,481.681c-0.303,0.007-0.537,0.225-0.547,0.488c-0.008,0.454-0.045,0.911-0.101,1.368
-							l1.079,0.132c0.06-0.49,0.1-0.982,0.108-1.476C236.144,481.904,235.893,481.674,235.594,481.681z"/>
-						<path fill="#988A38" d="M235.572,481.98c-0.255-0.001-0.466,0.18-0.471,0.402c-0.019,0.387-0.047,0.773-0.097,1.161
-							l0.909,0.111c0.053-0.411,0.083-0.823,0.103-1.236C236.022,482.178,235.825,481.98,235.572,481.98z"/>
-						<path fill="#FEE676" d="M235.537,482.278c-0.206-0.005-0.375,0.139-0.388,0.322c-0.016,0.316-0.045,0.634-0.085,0.951
-							l0.738,0.09c0.042-0.332,0.072-0.666,0.089-0.999C235.905,482.446,235.744,482.284,235.537,482.278z"/>
-						<path fill="#FEE676" d="M235.5,482.576c-0.161-0.008-0.296,0.102-0.305,0.244c-0.021,0.246-0.039,0.492-0.073,0.738
-							l0.567,0.069c0.035-0.255,0.053-0.512,0.075-0.768C235.773,482.711,235.66,482.584,235.5,482.576z"/>
-						<path fill="#FEE676" d="M235.456,482.873c-0.111-0.008-0.209,0.067-0.219,0.169l-0.055,0.523l0.397,0.049l0.057-0.538
-							C235.646,482.972,235.569,482.881,235.456,482.873z"/>
-					</g>
-					<path d="M236.084,480.328c0.688-0.102,1.34,0.688,1.341,1.423c0.005,0.697-0.038,1.391-0.122,2.073l0.497,0.061
-						c0.085-0.702,0.131-1.417,0.125-2.138C237.911,480.683,237.036,480.195,236.084,480.328z"/>
-					<path fill="#988A38" d="M236.271,467.917c-1.903-1.653-4.137-2.073-5.884-1.637c-1.772,0.431-3.107,1.679-3.823,3.35
-						c-0.719,1.676-0.738,3.452-0.479,4.934c0.276,1.487,0.791,2.67,1.512,3.305c0.651,0.562,1.078,1.113,1.367,1.645
-						c0.28,0.536,0.452,1.079,0.514,1.79c0.045,0.493,0.312,1.031,0.729,1.656l3.998,0.487c-0.995-0.672-1.657-1.537-1.739-2.41
-						c-0.186-2.138-1.141-3.897-2.898-5.43c-0.538-0.471-0.854-1.188-0.958-2.032c-0.099-0.839,0.025-1.803,0.52-2.663
-						c0.494-0.859,1.293-1.445,2.243-1.61c0.941-0.167,2.038,0.103,2.926,0.878c1.415,1.225,2.707,2.747,3.656,4.517
-						c0.95,1.767,1.521,3.758,1.683,5.7c0.148,1.646-0.719,2.979-1.863,3.486l3.998,0.488c0.636-1.125,1-2.605,0.853-4.241
-						c-0.192-2.305-0.88-4.722-2.027-6.851C239.457,471.146,237.911,469.337,236.271,467.917z"/>
-					<path fill="#988A38" d="M235.182,469.166c-1.29-1.124-2.853-1.474-4.139-1.208c-1.3,0.262-2.343,1.116-2.948,2.312
-						c-0.148,0.294-0.045,0.642,0.222,0.788c0.27,0.144,0.618,0.039,0.782-0.245c0.502-0.881,1.319-1.484,2.292-1.653
-						c0.966-0.173,2.093,0.106,3.008,0.904c1.427,1.234,2.731,2.771,3.69,4.56c0.96,1.785,1.537,3.799,1.701,5.759
-						c0.147,1.633-0.672,2.975-1.775,3.529l1.746,0.213c0.821-0.892,1.357-2.268,1.216-3.848c-0.176-2.104-0.799-4.287-1.837-6.216
-						C238.104,472.129,236.698,470.478,235.182,469.166z"/>
-					<path fill="#988A38" d="M235.125,469.233c-1.24-1.08-2.74-1.438-3.985-1.207c-1.258,0.226-2.278,1.018-2.882,2.146
-						c-0.138,0.26-0.047,0.57,0.191,0.708c0.242,0.13,0.55,0.039,0.702-0.212c0.509-0.849,1.326-1.421,2.292-1.567
-						c0.959-0.15,2.074,0.141,2.984,0.935c1.429,1.237,2.737,2.778,3.699,4.57c0.962,1.789,1.54,3.809,1.705,5.773
-						c0.146,1.634-0.653,2.971-1.738,3.542l1.559,0.19c0.833-0.872,1.38-2.241,1.238-3.827c-0.175-2.094-0.794-4.263-1.826-6.181
-						C238.032,472.182,236.634,470.54,235.125,469.233z"/>
-					<path fill="#988A38" d="M235.065,469.3c-1.19-1.035-2.625-1.402-3.829-1.204c-1.215,0.193-2.21,0.919-2.816,1.979
-						c-0.127,0.224-0.049,0.501,0.161,0.623c0.21,0.12,0.481,0.043,0.617-0.171c0.52-0.819,1.336-1.36,2.295-1.483
-						c0.952-0.128,2.052,0.176,2.956,0.964c1.433,1.239,2.744,2.784,3.708,4.581c0.965,1.794,1.544,3.817,1.709,5.787
-						c0.146,1.636-0.631,2.967-1.697,3.555l1.372,0.168c0.844-0.853,1.401-2.212,1.259-3.806c-0.174-2.083-0.79-4.24-1.816-6.147
-						C237.958,472.234,236.568,470.601,235.065,469.3z"/>
-					<path fill="#988A38" d="M235.006,469.368c-1.141-0.993-2.51-1.363-3.67-1.201c-1.171,0.158-2.143,0.821-2.751,1.812
-						c-0.113,0.187-0.051,0.431,0.128,0.539c0.181,0.107,0.414,0.045,0.536-0.137c0.53-0.788,1.346-1.297,2.297-1.398
-						c0.945-0.104,2.031,0.21,2.929,0.994c1.436,1.242,2.75,2.79,3.716,4.592c0.967,1.799,1.548,3.828,1.713,5.803
-						c0.146,1.637-0.613,2.961-1.659,3.567l1.186,0.145c0.855-0.832,1.421-2.185,1.279-3.784c-0.173-2.071-0.786-4.216-1.807-6.112
-						C237.885,472.288,236.502,470.663,235.006,469.368z"/>
-					<path fill="#988A38" d="M234.947,469.437c-1.092-0.953-2.395-1.322-3.51-1.197c-1.125,0.124-2.073,0.728-2.687,1.646
-						c-0.103,0.153-0.053,0.36,0.096,0.455c0.151,0.093,0.345,0.046,0.452-0.099c0.547-0.757,1.362-1.235,2.304-1.314
-						c0.936-0.081,2.005,0.244,2.897,1.023c1.438,1.244,2.755,2.796,3.724,4.602c0.97,1.804,1.552,3.838,1.718,5.816
-						c0.146,1.639-0.59,2.957-1.617,3.581l0.997,0.121c0.866-0.812,1.442-2.156,1.299-3.763c-0.173-2.061-0.782-4.192-1.796-6.077
-						C237.811,472.342,236.436,470.725,234.947,469.437z"/>
-					<path fill="#988A38" d="M234.888,469.503c-1.043-0.911-2.277-1.281-3.348-1.188c-1.078,0.09-2.001,0.632-2.625,1.478
-						c-0.088,0.118-0.053,0.286,0.064,0.368c0.12,0.081,0.278,0.048,0.367-0.064c0.566-0.722,1.382-1.167,2.314-1.225
-						c0.926-0.059,1.977,0.274,2.863,1.049c1.441,1.247,2.762,2.802,3.733,4.613c0.973,1.808,1.557,3.848,1.723,5.831
-						c0.145,1.639-0.571,2.952-1.58,3.593l0.811,0.099c0.879-0.792,1.465-2.129,1.321-3.741c-0.171-2.05-0.777-4.169-1.786-6.043
-						C237.738,472.395,236.371,470.786,234.888,469.503z"/>
-					<path fill="#FEE676" d="M234.83,469.57c-0.996-0.868-2.155-1.234-3.177-1.171c-1.028,0.061-1.925,0.534-2.569,1.3
-						c-0.074,0.087-0.056,0.216,0.029,0.283c0.088,0.066,0.21,0.052,0.282-0.031c1.156-1.369,3.355-1.612,5.154-0.06
-						c1.444,1.249,2.768,2.809,3.742,4.624c0.975,1.812,1.561,3.858,1.727,5.846c0.145,1.641-0.551,2.947-1.541,3.606l0.623,0.076
-						c0.89-0.773,1.486-2.103,1.343-3.72c-0.171-2.04-0.772-4.146-1.775-6.01C237.666,472.447,236.305,470.848,234.83,469.57z"/>
-					<path fill="#FEE676" d="M234.771,469.638c-1.938-1.664-4.209-1.38-5.52-0.029c-0.057,0.056-0.056,0.142-0.003,0.195
-						c0.055,0.054,0.141,0.055,0.195,0.004c1.246-1.252,3.346-1.481,5.131,0.056c1.446,1.252,2.773,2.814,3.75,4.635
-						c0.978,1.816,1.564,3.868,1.731,5.86c0.145,1.642-0.53,2.942-1.501,3.619l0.433,0.053c0.903-0.754,1.509-2.074,1.365-3.698
-						c-0.17-2.028-0.769-4.122-1.766-5.976C237.592,472.5,236.24,470.909,234.771,469.638z"/>
-					<path d="M238.821,483.086c-0.294,0.34-0.632,0.607-1.02,0.786l-0.001,0.013l0.714,0.087c0.082-0.355,0.186-0.675,0.319-0.878
-						L238.821,483.086z"/>
-					<path fill="#988A38" d="M228.682,476.624c-0.65-0.57-1.062-1.503-1.232-2.644c-0.163-1.132-0.067-2.461,0.546-3.684
-						c0.201-0.391,0.047-0.874-0.351-1.059c-0.396-0.186-0.863,0.004-1.037,0.41c-0.715,1.66-0.736,3.423-0.48,4.895
-						c0.274,1.478,0.784,2.653,1.503,3.287c0.654,0.565,1.086,1.121,1.378,1.658c0.283,0.543,0.457,1.093,0.521,1.81
-						c0.046,0.497,0.313,1.04,0.735,1.668l1.927,0.235c-0.608-0.706-1.008-1.382-1.069-2.045
-						C230.95,479.355,230.231,477.997,228.682,476.624z"/>
-					<path fill="#988A38" d="M228.598,476.721c-0.637-0.559-1.054-1.476-1.243-2.601c-0.183-1.115-0.118-2.432,0.453-3.652
-						c0.17-0.354,0.034-0.778-0.318-0.936c-0.35-0.16-0.765,0.013-0.912,0.377c-0.661,1.61-0.66,3.307-0.397,4.722
-						c0.278,1.422,0.778,2.553,1.478,3.168c0.657,0.568,1.092,1.128,1.387,1.67c0.286,0.548,0.461,1.104,0.525,1.826
-						c0.046,0.5,0.317,1.046,0.746,1.678l1.719,0.21c-0.593-0.703-0.981-1.367-1.041-2.016
-						C230.824,479.399,230.127,478.079,228.598,476.721z"/>
-					<path fill="#988A38" d="M228.513,476.817c-0.624-0.547-1.044-1.447-1.252-2.555c-0.201-1.1-0.167-2.401,0.361-3.622
-						c0.14-0.316,0.021-0.683-0.285-0.814c-0.306-0.134-0.669,0.023-0.792,0.345c-0.604,1.56-0.58,3.189-0.311,4.548
-						c0.284,1.365,0.774,2.45,1.453,3.047c0.661,0.571,1.1,1.135,1.397,1.683c0.289,0.553,0.466,1.115,0.53,1.843
-						c0.046,0.503,0.322,1.052,0.755,1.688l1.51,0.185c-0.577-0.698-0.955-1.352-1.013-1.984
-						C230.697,479.443,230.023,478.16,228.513,476.817z"/>
-					<path fill="#988A38" d="M228.428,476.915c-0.611-0.536-1.036-1.419-1.26-2.51c-0.218-1.082-0.215-2.368,0.271-3.593
-						c0.112-0.277,0.007-0.585-0.255-0.693c-0.263-0.107-0.574,0.033-0.675,0.312c-0.545,1.512-0.498,3.075-0.224,4.377
-						c0.288,1.309,0.772,2.346,1.429,2.925c0.664,0.574,1.106,1.142,1.407,1.694c0.292,0.559,0.471,1.126,0.535,1.859
-						c0.046,0.506,0.327,1.058,0.765,1.698l1.301,0.158c-0.559-0.694-0.928-1.336-0.984-1.953
-						C230.57,479.486,229.918,478.242,228.428,476.915z"/>
-					<path fill="#988A38" d="M228.344,477.012c-0.598-0.524-1.026-1.388-1.266-2.461c-0.234-1.065-0.262-2.334,0.18-3.566
-						c0.085-0.234-0.006-0.488-0.227-0.572c-0.22-0.084-0.479,0.044-0.557,0.279c-0.484,1.466-0.414,2.963-0.135,4.21
-						c0.291,1.251,0.769,2.24,1.403,2.801c0.667,0.577,1.113,1.148,1.417,1.706c0.294,0.563,0.475,1.137,0.54,1.875
-						c0.047,0.509,0.332,1.064,0.776,1.708l1.092,0.134c-0.542-0.69-0.9-1.321-0.956-1.923
-						C230.443,479.53,229.814,478.324,228.344,477.012z"/>
-					<path fill="#988A38" d="M228.26,477.108c-0.585-0.514-1.014-1.356-1.268-2.41c-0.248-1.048-0.305-2.299,0.084-3.538
-						c0.062-0.194-0.019-0.395-0.198-0.453c-0.179-0.062-0.384,0.05-0.44,0.245c-0.416,1.423-0.326,2.854-0.041,4.042
-						c0.293,1.193,0.762,2.133,1.375,2.675c1.335,1.209,1.813,2.156,1.971,3.61c0.047,0.513,0.335,1.071,0.784,1.719l0.884,0.107
-						c-0.529-0.686-0.874-1.306-0.928-1.893C230.317,479.573,229.71,478.405,228.26,477.108z"/>
-					<path fill="#FEE676" d="M228.175,477.205c-0.573-0.503-0.997-1.323-1.261-2.358c-0.259-1.029-0.343-2.265-0.019-3.512
-						c0.04-0.148-0.032-0.299-0.17-0.335c-0.138-0.035-0.29,0.06-0.326,0.212c-0.338,1.384-0.227,2.748,0.061,3.878
-						c0.297,1.133,0.747,2.024,1.339,2.546c1.342,1.215,1.827,2.175,1.986,3.64c0.047,0.515,0.34,1.078,0.791,1.729l0.678,0.083
-						c-0.511-0.682-0.846-1.29-0.899-1.862C230.19,479.617,229.606,478.487,228.175,477.205z"/>
-					<path fill="#FEE676" d="M228.091,477.302c-0.561-0.491-0.97-1.289-1.236-2.304c-0.263-1.01-0.368-2.229-0.137-3.487
-						c0.02-0.106-0.044-0.204-0.145-0.216c-0.099-0.013-0.195,0.066-0.214,0.175c-0.232,1.349-0.099,2.645,0.181,3.718
-						c0.291,1.075,0.716,1.914,1.288,2.416c1.349,1.22,1.841,2.192,2,3.668c0.048,0.519,0.344,1.084,0.802,1.739l0.468,0.057
-						c-0.496-0.677-0.82-1.274-0.871-1.831C230.064,479.66,229.502,478.568,228.091,477.302z"/>
-					<path d="M232.717,481.014c-0.189-2.2-1.187-4.035-2.984-5.597c-0.51-0.449-0.849-1.15-0.99-1.968
-						c-0.138-0.812-0.072-1.737,0.388-2.538c-0.53,0.921-0.711,1.922-0.652,2.788c0.066,0.871,0.361,1.604,0.926,2.095
-						c1.719,1.503,2.63,3.188,2.814,5.264c0.082,0.894,0.759,1.716,1.8,2.366l0.361,0.044
-						C233.436,482.774,232.8,481.863,232.717,481.014z"/>
-					<path d="M230.501,469.606c0.375-0.189,0.79-0.304,1.228-0.329l0.002-0.014c-0.506-0.043-1.339-0.455-1.934-0.967
-						c-0.59-0.514-0.96-1.163-0.417-1.78c-0.728,0.371-1.344,0.903-1.855,1.546C228.539,468.51,229.536,469.019,230.501,469.606z"/>
-					<path d="M236.435,467.729c-1.998-1.729-4.263-2.113-6.019-1.63c-1.781,0.481-3.11,1.799-3.853,3.531
-						c0.687-1.605,2.031-2.781,3.794-3.163c1.74-0.389,3.942,0.066,5.748,1.639c1.622,1.403,3.146,3.188,4.273,5.291
-						c1.13,2.099,1.809,4.48,1.998,6.755c0.144,1.585-0.217,3.062-0.855,4.188l0.499,0.062c0.634-1.125,1.005-2.613,0.854-4.294
-						c-0.194-2.334-0.892-4.788-2.056-6.946C239.661,470.997,238.094,469.164,236.435,467.729z"/>
-				</g>
-				<g>
-					<path fill="#988A38" d="M214.991,468.459l-0.565,3.988c1.945-0.847,3.619-2.735,4.129-5.209
-						c-0.941-0.187-1.888-0.35-2.837-0.491C215.621,467.399,215.352,467.974,214.991,468.459z"/>
-					<path d="M215.718,466.747c-0.088,0.587-0.352,1.101-0.703,1.539l-0.051,0.359C215.337,468.109,215.61,467.47,215.718,466.747z"
-						/>
-					<path fill="#988A38" d="M215.718,466.747c0.949,0.142,1.896,0.305,2.837,0.491c0.51-2.467-0.48-4.92-2.495-6.327l-0.565,3.987
-						C215.724,465.465,215.815,466.097,215.718,466.747z"/>
-					<g>
-						<g>
-							<path fill="#988A38" d="M218.509,467.229c0.087-0.433-0.186-0.852-0.61-0.93c-0.422-0.079-0.827,0.219-0.899,0.652
-								c-0.265,1.529-1.167,2.76-2.293,3.518l-0.272,1.922C216.354,471.541,218.007,469.672,218.509,467.229z"/>
-							<path fill="#988A38" d="M218.386,467.461c0.084-0.385-0.157-0.762-0.531-0.834c-0.374-0.074-0.731,0.187-0.805,0.572
-								c-0.299,1.501-1.237,2.692-2.366,3.425l-0.243,1.714C216.274,471.523,217.871,469.769,218.386,467.461z"/>
-							<path fill="#988A38" d="M218.263,467.694c0.082-0.335-0.127-0.67-0.453-0.739c-0.325-0.069-0.638,0.155-0.712,0.491
-								c-0.335,1.473-1.305,2.625-2.435,3.334l-0.213,1.506C216.194,471.505,217.734,469.864,218.263,467.694z"/>
-							<path fill="#988A38" d="M218.143,467.928c0.077-0.289-0.099-0.581-0.377-0.646c-0.278-0.064-0.545,0.125-0.62,0.414
-								c-0.374,1.445-1.373,2.554-2.506,3.241l-0.184,1.298C216.113,471.487,217.595,469.962,218.143,467.928z"/>
-							<path fill="#988A38" d="M218.021,468.159c0.07-0.238-0.069-0.489-0.301-0.55c-0.231-0.06-0.457,0.091-0.526,0.331
-								c-0.42,1.421-1.441,2.488-2.577,3.151l-0.154,1.09C216.031,471.471,217.45,470.062,218.021,468.159z"/>
-							<path fill="#988A38" d="M217.901,468.393c0.065-0.191-0.041-0.4-0.226-0.456c-0.184-0.054-0.37,0.06-0.434,0.252
-								c-0.47,1.392-1.508,2.421-2.645,3.06l-0.125,0.882C215.949,471.452,217.299,470.159,217.901,468.393z"/>
-							<path fill="#FEE676" d="M217.782,468.626c0.057-0.145-0.014-0.31-0.151-0.362c-0.138-0.051-0.287,0.028-0.344,0.173
-								c-0.535,1.355-1.574,2.353-2.713,2.968l-0.096,0.676C215.868,471.433,217.136,470.25,217.782,468.626z"/>
-							<path fill="#FEE676" d="M217.663,468.859c0.048-0.099,0.014-0.219-0.078-0.267c-0.092-0.049-0.205-0.007-0.251,0.091
-								c-0.63,1.304-1.64,2.285-2.781,2.876l-0.066,0.467C215.785,471.416,216.945,470.327,217.663,468.859z"/>
-						</g>
-						<g>
-							<path fill="#988A38" d="M215.807,466.688c-0.048,0.324,0.162,0.627,0.472,0.675c0.307,0.05,0.602-0.172,0.657-0.494
-								c0.254-1.48-0.203-2.912-1.16-3.95l-0.247,1.74C215.805,465.274,215.915,465.973,215.807,466.688z"/>
-							<path fill="#988A38" d="M215.856,466.545c-0.037,0.29,0.149,0.557,0.427,0.598c0.274,0.04,0.536-0.159,0.581-0.448
-								c0.211-1.371-0.227-2.697-1.104-3.666l-0.22,1.556C215.818,465.178,215.945,465.852,215.856,466.545z"/>
-							<path fill="#988A38" d="M215.906,466.403c-0.027,0.253,0.138,0.484,0.381,0.517c0.242,0.031,0.471-0.146,0.504-0.4
-								c0.168-1.264-0.25-2.478-1.047-3.382l-0.194,1.369C215.83,465.08,215.977,465.73,215.906,466.403z"/>
-							<path fill="#988A38" d="M215.956,466.259c-0.018,0.219,0.126,0.415,0.336,0.438c0.209,0.022,0.403-0.133,0.427-0.352
-								c0.124-1.157-0.271-2.262-0.99-3.098l-0.167,1.183C215.842,464.981,216.007,465.606,215.956,466.259z"/>
-							<path fill="#988A38" d="M216.005,466.118c-0.008,0.181,0.114,0.341,0.292,0.356c0.174,0.017,0.335-0.12,0.349-0.303
-								c0.08-1.05-0.292-2.047-0.933-2.813l-0.141,0.994C215.854,464.883,216.035,465.484,216.005,466.118z"/>
-							<path fill="#988A38" d="M216.055,465.974c-0.001,0.148,0.102,0.271,0.245,0.278c0.142,0.007,0.268-0.107,0.272-0.254
-								c0.034-0.942-0.311-1.83-0.874-2.53l-0.115,0.81C215.865,464.786,216.061,465.361,216.055,465.974z"/>
-							<path fill="#FEE676" d="M216.105,465.83c0.005,0.111,0.088,0.202,0.198,0.2c0.108-0.001,0.197-0.095,0.194-0.206
-								c-0.022-0.836-0.332-1.613-0.816-2.246l-0.088,0.621C215.877,464.687,216.081,465.236,216.105,465.83z"/>
-							<path fill="#FEE676" d="M216.156,465.688c0.01,0.076,0.077,0.128,0.151,0.12c0.075-0.011,0.126-0.079,0.117-0.157
-								c-0.089-0.726-0.351-1.397-0.757-1.96l-0.062,0.432C215.889,464.59,216.083,465.115,216.156,465.688z"/>
-						</g>
-					</g>
-					<g>
-						<path d="M218.555,467.238c0.533-2.581-0.403-5.125-2.46-6.574l-0.07,0.497C217.996,462.532,219.041,464.893,218.555,467.238z"
-							/>
-					</g>
-					<path d="M218.661,465.403c-0.047-0.818-0.231-1.604-0.566-2.325c0.094,1.246-1.448,1.345-2.495,1.085l-0.101,0.712
-						C216.554,465.024,217.609,465.199,218.661,465.403z"/>
-					<path fill="#988A38" d="M230.256,473.366c0.563,0.431,1.443,0.327,2.012-0.271c0.572-0.602,0.504-1.581-0.194-2.115
-						c-2.469-1.864-5.251-3.067-8.055-3.97c-2.813-0.896-5.682-1.477-8.556-1.887l-0.421,2.971c2.748,0.391,5.465,0.945,8.064,1.773
-						C225.697,470.697,228.193,471.803,230.256,473.366z"/>
-					<path d="M231.922,471.178c0.577,0.436,0.826,1.414,0.345,1.917c0.662-0.699,0.779-1.68-0.042-2.314
-						c-2.502-1.89-5.309-3.101-8.13-4.01c-2.831-0.9-5.712-1.484-8.597-1.896l-0.07,0.495c2.864,0.408,5.72,0.987,8.515,1.877
-						C226.729,468.144,229.487,469.339,231.922,471.178z"/>
-					<g>
-						<path fill="#988A38" d="M230.512,473.313c0.305,0.236,0.776,0.17,1.067-0.162c0.292-0.333,0.26-0.835-0.083-1.101
-							c-2.312-1.792-5.007-2.98-7.752-3.869c-2.756-0.885-5.595-1.465-8.449-1.871l-0.225,1.586
-							c2.786,0.396,5.544,0.961,8.183,1.809C225.882,470.555,228.422,471.689,230.512,473.313z"/>
-						<path fill="#988A38" d="M230.226,473.021c0.282,0.206,0.7,0.137,0.951-0.171c0.247-0.303,0.208-0.75-0.103-0.977
-							c-2.279-1.676-4.902-2.801-7.574-3.651c-2.683-0.843-5.443-1.403-8.22-1.798l-0.2,1.413c2.718,0.387,5.408,0.935,7.991,1.746
-							C225.643,470.399,228.134,471.48,230.226,473.021z"/>
-						<path fill="#988A38" d="M229.929,472.735c0.255,0.175,0.622,0.105,0.834-0.173c0.209-0.273,0.164-0.666-0.113-0.856
-							c-2.243-1.567-4.794-2.633-7.395-3.442c-2.609-0.805-5.292-1.346-7.993-1.729l-0.176,1.244
-							c2.649,0.377,5.272,0.907,7.797,1.687C225.401,470.247,227.843,471.275,229.929,472.735z"/>
-						<path fill="#988A38" d="M229.623,472.456c0.226,0.146,0.542,0.078,0.717-0.167c0.174-0.243,0.127-0.58-0.115-0.737
-							c-2.205-1.463-4.683-2.476-7.212-3.245c-2.536-0.767-5.142-1.288-7.766-1.661l-0.152,1.075
-							c2.581,0.367,5.136,0.88,7.603,1.626C225.158,470.094,227.545,471.076,229.623,472.456z"/>
-						<path fill="#988A38" d="M229.309,472.182c0.194,0.12,0.46,0.055,0.602-0.155c0.14-0.209,0.093-0.491-0.111-0.618
-							c-2.164-1.365-4.57-2.323-7.028-3.057c-2.463-0.731-4.993-1.232-7.54-1.596l-0.128,0.907c2.512,0.357,5.001,0.852,7.408,1.566
-							C224.912,469.945,227.247,470.879,229.309,472.182z"/>
-						<path fill="#FEE676" d="M228.988,471.912c0.161,0.094,0.375,0.038,0.485-0.137c0.11-0.174,0.066-0.4-0.102-0.499
-							c-4.274-2.527-9.24-3.688-14.156-4.409l-0.104,0.735C219.971,468.312,224.868,469.472,228.988,471.912z"/>
-						<path fill="#FEE676" d="M228.658,471.649c0.126,0.07,0.292,0.022,0.373-0.113c0.08-0.135,0.042-0.309-0.088-0.381
-							c-4.174-2.36-8.976-3.477-13.742-4.175l-0.08,0.564C219.845,468.235,224.595,469.349,228.658,471.649z"/>
-						<path fill="#FEE676" d="M228.321,471.392c0.09,0.048,0.205,0.012,0.259-0.085c0.054-0.096,0.025-0.217-0.066-0.266
-							c-4.073-2.201-8.711-3.277-13.33-3.951l-0.056,0.396C219.719,468.154,224.321,469.229,228.321,471.392z"/>
-					</g>
-				</g>
-			</g>
-			<g>
-				<g>
-					<path fill="#988A38" d="M211.912,466.216c-0.075,0.825,0.493,1.552,1.265,1.64l1.866,0.238l0.421-2.971l-1.948-0.249
-						C212.706,464.782,211.987,465.392,211.912,466.216z"/>
-					<g>
-						<path fill="#988A38" d="M212.271,466.753c-0.043,0.44,0.258,0.83,0.671,0.876c0.711,0.078,1.42,0.167,2.127,0.268l0.224-1.586
-							c-0.724-0.103-1.45-0.193-2.176-0.273C212.694,465.99,212.314,466.313,212.271,466.753z"/>
-						<path fill="#988A38" d="M212.578,466.812c-0.04,0.392,0.229,0.741,0.599,0.783l1.902,0.242l0.2-1.413l-1.941-0.248
-							C212.959,466.134,212.619,466.418,212.578,466.812z"/>
-						<path fill="#988A38" d="M212.886,466.87c-0.037,0.345,0.198,0.654,0.524,0.692l1.677,0.216l0.176-1.245l-1.708-0.22
-							C213.223,466.274,212.923,466.523,212.886,466.87z"/>
-						<path fill="#988A38" d="M213.192,466.931c-0.033,0.298,0.17,0.567,0.452,0.601l1.451,0.189l0.152-1.075l-1.474-0.192
-							C213.486,466.419,213.225,466.631,213.192,466.931z"/>
-						<path fill="#988A38" d="M213.499,466.991c-0.029,0.253,0.141,0.479,0.378,0.51l1.226,0.162l0.128-0.907l-1.242-0.164
-							C213.748,466.562,213.528,466.74,213.499,466.991z"/>
-						<path fill="#FEE676" d="M213.805,467.055c-0.024,0.204,0.112,0.391,0.307,0.415l1,0.134l0.104-0.736l-1.011-0.136
-							C214.008,466.707,213.83,466.85,213.805,467.055z"/>
-						<path fill="#FEE676" d="M214.11,467.118c-0.02,0.16,0.086,0.303,0.236,0.322l0.773,0.105l0.08-0.565l-0.78-0.106
-							C214.268,466.854,214.13,466.959,214.11,467.118z"/>
-						<path fill="#FEE676" d="M214.415,467.184c-0.015,0.112,0.06,0.213,0.166,0.228l0.547,0.075l0.056-0.396l-0.55-0.075
-							C214.527,467,214.43,467.072,214.415,467.184z"/>
-					</g>
-					<path d="M211.912,466.216c0.062-0.687,0.904-1.171,1.576-1.094l1.94,0.248l0.07-0.495l-1.955-0.249
-						C212.597,464.52,212,465.256,211.912,466.216z"/>
-					<path fill="#988A38" d="M203.098,459.314c-1.884,0.013-3.585,0.823-4.763,2.105c-1.14,1.272-1.958,3.055-1.482,4.885
-						c0.41,1.72,1.006,3.315,2.147,4.425c1.108,1.118,2.574,1.797,4.19,1.785c2.825-0.018,5.669,0.103,8.444,0.377
-						c0.979,0.098,1.92-0.072,2.791-0.444l0.565-3.988c-0.73,0.978-1.854,1.567-3.059,1.448c-2.894-0.287-5.833-0.411-8.762-0.392
-						c-1.814,0.01-3.359-1.523-3.479-3.504c-0.122-1.981,1.432-3.686,3.429-3.697c3.176-0.02,6.345,0.113,9.525,0.427
-						c1.331,0.132,2.396,1.009,2.851,2.157l0.565-3.988c-0.886-0.624-1.954-1.038-3.118-1.154
-						C209.642,459.431,206.377,459.294,203.098,459.314z"/>
-					<path fill="#988A38" d="M203.109,460.972c-2.741,0.012-4.882,2.366-4.641,5.046c0.025,0.327,0.296,0.574,0.606,0.554
-						c0.311-0.02,0.551-0.299,0.535-0.626c-0.127-2.026,1.462-3.771,3.508-3.783c3.181-0.021,6.355,0.112,9.541,0.428
-						c1.311,0.13,2.376,0.962,2.869,2.069l0.247-1.741c-0.748-0.818-1.793-1.394-2.998-1.513
-						C209.544,461.085,206.332,460.951,203.109,460.972z"/>
-					<path fill="#988A38" d="M203.111,461.061c-2.646,0.012-4.728,2.228-4.556,4.79c0.017,0.292,0.258,0.514,0.537,0.501
-						c0.277-0.017,0.49-0.263,0.481-0.555c-0.078-1.982,1.515-3.662,3.545-3.674c3.183-0.02,6.357,0.113,9.544,0.428
-						c1.31,0.13,2.37,0.944,2.876,2.033l0.22-1.556c-0.734-0.831-1.777-1.414-2.991-1.535
-						C209.539,461.174,206.331,461.04,203.111,461.061z"/>
-					<path fill="#988A38" d="M203.111,461.149c-2.548,0.012-4.565,2.087-4.467,4.532c0.008,0.257,0.22,0.454,0.464,0.444
-						c0.244-0.008,0.432-0.222,0.429-0.475c-0.026-1.944,1.567-3.554,3.58-3.564c3.185-0.021,6.36,0.112,9.55,0.428
-						c1.308,0.13,2.363,0.922,2.883,1.992l0.194-1.369c-0.72-0.843-1.76-1.435-2.984-1.556
-						C209.533,461.263,206.327,461.129,203.111,461.149z"/>
-					<path fill="#988A38" d="M203.111,461.239c-2.45,0.013-4.401,1.946-4.378,4.276c0.001,0.219,0.182,0.394,0.392,0.39
-						c0.211-0.004,0.373-0.185,0.377-0.402c0.027-1.903,1.619-3.443,3.615-3.455c3.186-0.02,6.363,0.113,9.553,0.429
-						c1.308,0.13,2.356,0.903,2.891,1.954l0.167-1.183c-0.705-0.855-1.744-1.454-2.978-1.576
-						C209.528,461.354,206.325,461.22,203.111,461.239z"/>
-					<path fill="#988A38" d="M203.112,461.33c-2.355,0.012-4.23,1.804-4.289,4.02c-0.007,0.184,0.143,0.332,0.319,0.334
-						c0.177-0.001,0.314-0.146,0.325-0.325c0.089-1.87,1.669-3.336,3.65-3.347c3.187-0.021,6.365,0.112,9.558,0.428
-						c1.307,0.13,2.35,0.882,2.897,1.913l0.141-0.995c-0.69-0.866-1.727-1.473-2.971-1.596
-						C209.522,461.443,206.322,461.31,203.112,461.33z"/>
-					<path fill="#988A38" d="M203.113,461.419c-2.259,0.012-4.05,1.662-4.199,3.764c-0.012,0.146,0.105,0.271,0.246,0.278
-						c0.143,0.006,0.258-0.108,0.271-0.252c0.161-1.83,1.723-3.225,3.686-3.236c3.188-0.02,6.368,0.113,9.562,0.429
-						c1.306,0.13,2.343,0.862,2.904,1.876l0.115-0.81c-0.676-0.88-1.711-1.493-2.964-1.618
-						C209.517,461.532,206.32,461.398,203.113,461.419z"/>
-					<path fill="#FEE676" d="M203.113,461.508c-2.163,0.013-3.845,1.525-4.108,3.506c-0.017,0.113,0.066,0.213,0.172,0.225
-						c0.108,0.011,0.201-0.068,0.217-0.177c0.256-1.788,1.776-3.116,3.722-3.128c3.189-0.02,6.371,0.113,9.566,0.429
-						c1.305,0.13,2.335,0.843,2.912,1.837l0.088-0.621c-0.662-0.891-1.694-1.514-2.958-1.64
-						C209.512,461.621,206.317,461.488,203.113,461.508z"/>
-					<path fill="#FEE676" d="M203.114,461.598c-2.066,0.014-3.615,1.396-4.018,3.251c-0.018,0.077,0.027,0.149,0.1,0.167
-						c0.073,0.017,0.145-0.029,0.163-0.101c0.387-1.738,1.828-3.006,3.758-3.019c3.19-0.021,6.373,0.112,9.569,0.429
-						c1.305,0.13,2.33,0.822,2.919,1.797l0.062-0.432c-0.647-0.905-1.677-1.535-2.951-1.662
-						C209.507,461.711,206.315,461.578,203.114,461.598z"/>
-					<path d="M214.821,463.836c0.282,0.305,0.508,0.652,0.665,1.037l0.012,0.002l0.101-0.712c-0.32-0.081-0.598-0.198-0.771-0.339
-						L214.821,463.836z"/>
-					<path fill="#988A38" d="M203.179,470.865c-2.457,0.01-4.532-2.055-4.782-4.771c-0.033-0.44-0.409-0.772-0.848-0.727
-						c-0.443,0.042-0.767,0.469-0.664,0.91c0.413,1.741,1.005,3.303,2.145,4.411c1.102,1.11,2.558,1.785,4.161,1.774
-						c2.826-0.019,5.672,0.103,8.449,0.377c0.98,0.098,1.923-0.073,2.794-0.45l0.272-1.922c-0.86,0.575-1.855,0.886-2.908,0.781
-						C208.958,470.969,206.061,470.847,203.179,470.865z"/>
-					<path fill="#988A38" d="M203.181,470.992c-2.436,0.011-4.542-1.982-4.855-4.655c-0.039-0.393-0.372-0.684-0.757-0.638
-						c-0.388,0.042-0.655,0.419-0.564,0.807c0.189,0.847,0.456,1.635,0.781,2.353c0.344,0.714,0.803,1.349,1.346,1.878
-						c1.088,1.058,2.51,1.696,4.06,1.686c2.828-0.018,5.674,0.103,8.453,0.378c0.979,0.098,1.924-0.077,2.798-0.462l0.243-1.714
-						c-0.864,0.559-1.854,0.856-2.899,0.753C208.95,471.097,206.058,470.975,203.181,470.992z"/>
-					<path fill="#988A38" d="M203.181,471.121c-2.415,0.01-4.547-1.909-4.926-4.541c-0.044-0.343-0.335-0.594-0.666-0.55
-						c-0.335,0.043-0.551,0.372-0.472,0.71c0.192,0.812,0.428,1.573,0.776,2.255c0.351,0.682,0.804,1.285,1.341,1.787
-						c1.073,1.004,2.46,1.606,3.955,1.597c2.83-0.019,5.678,0.103,8.458,0.378c0.979,0.097,1.925-0.081,2.8-0.471l0.213-1.506
-						c-0.867,0.541-1.854,0.828-2.89,0.725C208.942,471.226,206.054,471.104,203.181,471.121z"/>
-					<path fill="#988A38" d="M203.182,471.25c-2.395,0.009-4.546-1.835-4.996-4.428c-0.046-0.295-0.297-0.504-0.576-0.463
-						c-0.283,0.045-0.455,0.328-0.385,0.618c0.601,3.121,3.061,5.369,5.964,5.358c2.831-0.019,5.681,0.103,8.462,0.378
-						c0.979,0.098,1.926-0.086,2.804-0.479l0.184-1.298c-0.871,0.522-1.854,0.799-2.88,0.697
-						C208.935,471.354,206.05,471.232,203.182,471.25z"/>
-					<path fill="#988A38" d="M203.183,471.379c-2.374,0.009-4.539-1.759-5.063-4.315c-0.048-0.244-0.259-0.416-0.488-0.374
-						c-0.232,0.044-0.364,0.285-0.303,0.527c0.633,2.989,3.072,5.087,5.861,5.078c2.833-0.019,5.684,0.102,8.467,0.378
-						c0.978,0.097,1.927-0.091,2.807-0.491l0.154-1.09c-0.874,0.506-1.853,0.771-2.871,0.67
-						C208.927,471.482,206.046,471.36,203.183,471.379z"/>
-					<path fill="#988A38" d="M203.184,471.507c-2.353,0.009-4.517-1.686-5.13-4.198c-0.047-0.198-0.22-0.33-0.4-0.287
-						c-0.185,0.042-0.282,0.239-0.228,0.437c0.692,2.853,3.085,4.802,5.763,4.792c2.833-0.018,5.686,0.103,8.472,0.379
-						c0.978,0.098,1.929-0.094,2.81-0.499l0.125-0.882c-0.878,0.491-1.852,0.741-2.861,0.641
-						C208.919,471.61,206.043,471.488,203.184,471.507z"/>
-					<path fill="#FEE676" d="M203.185,471.635c-2.33,0.01-4.472-1.622-5.195-4.084c-0.043-0.147-0.182-0.241-0.314-0.199
-						c-0.134,0.042-0.201,0.198-0.155,0.348c0.784,2.708,3.101,4.519,5.668,4.508c2.835-0.019,5.689,0.103,8.476,0.379
-						c0.977,0.097,1.931-0.099,2.813-0.506l0.096-0.676c-0.881,0.472-1.851,0.712-2.852,0.612
-						C208.912,471.738,206.04,471.617,203.185,471.635z"/>
-					<path fill="#FEE676" d="M203.186,471.764c-2.308,0.011-4.375-1.581-5.257-3.971c-0.037-0.103-0.143-0.153-0.229-0.11
-						c-0.088,0.041-0.122,0.157-0.084,0.259c0.925,2.541,3.118,4.234,5.573,4.223c2.837-0.019,5.692,0.103,8.481,0.379
-						c0.978,0.097,1.931-0.102,2.816-0.517l0.066-0.467c-0.885,0.456-1.851,0.684-2.843,0.585
-						C208.904,471.866,206.036,471.745,203.186,471.764z"/>
-					<path d="M211.956,469.656c-2.904-0.287-5.852-0.411-8.788-0.393c-1.694,0.008-3.365-1.41-3.478-3.252
-						c0.128,2.121,1.547,3.765,3.481,3.752c2.919-0.019,5.851,0.105,8.735,0.391c1.262,0.125,2.343-0.487,3.057-1.509l0.051-0.359
-						C214.268,469.214,213.101,469.77,211.956,469.656z"/>
-					<path d="M203.096,459.064c-1.958,0.014-3.664,0.856-4.826,2.185c-1.128,1.32-1.911,3.162-1.417,5.056
-						c-0.459-1.762,0.396-3.482,1.552-4.711c1.193-1.237,2.891-2.018,4.695-2.029c3.271-0.02,6.527,0.115,9.817,0.44
-						c1.14,0.113,2.212,0.529,3.107,1.156l0.07-0.497c-0.878-0.622-1.944-1.039-3.128-1.156
-						C209.657,459.181,206.385,459.044,203.096,459.064z"/>
-				</g>
-			</g>
-		</g>
-	</g>
-</g>
-<g>
-	<g>
-		<path d="M260.284,489.56l14.617-0.216v-4.176c0-2.593-0.864-4.177-2.952-5.113c-1.512-0.647-3.6-0.863-6.553-0.863
-			c-6.769,0-13.465,1.584-13.465,1.584l-0.648-3.601c0,0,5.904-1.656,14.041-1.656c4.68,0,8.064,0.433,10.369,1.729
-			c2.592,1.296,3.745,3.672,3.745,7.776v19.874c0,0.936,0.144,1.151,1.008,1.296l1.872,0.36v2.88h-7.056l-0.288-1.8
-			c-0.072-0.433-0.216-1.152-1.152-1.152c-0.576,0-1.08,0.36-1.512,0.648l-3.384,2.304c0,0-3.816,0.504-6.84,0.504
-			c-10.153,0-13.105-2.16-13.105-10.297C248.979,492.008,251.427,489.704,260.284,489.56z M274.901,492.729l-11.593,0.216
-			c-6.265,0.072-9.577-0.721-9.577,6.769c0,6.192,2.592,6.553,9.145,6.553c4.968,0,12.025-1.584,12.025-1.584V492.729z"/>
-		<g>
-			<path fill="#D5BF51" d="M259.917,490.14c4.873-0.072,9.745-0.144,14.617-0.216c0.348-0.005,1.335-0.173,1.335-0.691
-				c0-4.283,0.493-8.405-4.315-10.067c-5.792-2.003-13.973-0.236-19.763,1.09c0.369,0.087,0.737,0.175,1.105,0.262
-				c-0.216-1.2-0.432-2.4-0.648-3.601c-0.275,0.26-0.551,0.52-0.826,0.779c6.861-1.855,15.829-2.867,22.591-0.408
-				c5.376,1.954,4.455,8.413,4.455,13c0,4.585,0,9.171,0,13.756c0,0.542-0.174,2.048,0.385,2.285
-				c2.37,1.006,2.495,0.322,2.495,3.217c0.445-0.23,0.891-0.461,1.336-0.691c-1.646,0-3.291,0-4.936,0
-				c-3.229,0-0.413-0.438-2.134-2.346c-1.9-2.107-5.696,1.516-7.111,2.479c-1.113,0.758-6.649,0.387-8.887,0.302
-				c-3.43-0.131-7.076-0.784-8.76-4.095c-1.334-2.622-1.026-6.27-0.687-9.052C250.8,490.973,255.528,490.278,259.917,490.14
-				c0.755-0.023,2.037-1.201,0.734-1.16c-4.086,0.129-8.627,0.374-11.233,3.948c-2.539,3.483-1.934,10.828,0.355,14.24
-				c2.874,4.283,11.07,3.463,15.306,3.21c1.815-0.108,3.607-0.05,5.124-1.082c2.059-1.401,3.562-2.911,4.092,0.396
-				c0.039,0.24,0.409,0.321,0.599,0.321c2.352,0,4.704,0,7.056,0c0.348,0,1.336-0.176,1.336-0.691c0-0.96,0-1.92,0-2.88
-				c0-0.245-0.135-0.392-0.384-0.439c-2.364-0.455-2.496-1.243-2.496-3.283c0-2.624,0-5.247,0-7.87
-				c0-3.846,0.086-7.701-0.058-11.544c-0.459-12.295-21.856-8.639-29.205-6.651c-0.325,0.088-0.904,0.349-0.826,0.779
-				c0.216,1.2,0.432,2.4,0.648,3.601c0.078,0.432,0.836,0.323,1.105,0.262c4.787-1.097,9.723-1.578,14.628-1.497
-				c3.828,0.063,7.233,1.378,7.233,5.712c0,1.314,0,2.629,0,3.944c0.445-0.23,0.89-0.461,1.335-0.691
-				c-4.873,0.072-9.745,0.144-14.617,0.216C259.891,488.99,258.611,490.159,259.917,490.14z"/>
-			<path fill="#D5BF51" d="M275.268,492.148c-3.839,0.071-7.678,0.143-11.517,0.215c-3.099,0.058-8.686-0.765-10.229,2.94
-				c-1.746,4.191-1.274,10.114,3.745,11.24c5.634,1.265,12.248-0.125,17.773-1.342c0.245-0.054,0.829-0.304,0.829-0.632
-				c0-3.984,0-7.969,0-11.953c0-0.855-1.937-0.458-1.937,0.223c0,3.637,0,7.274,0,10.911c0,1.264-0.263,0.65,0.708,0.437
-				c-1.433,0.316-2.879,0.566-4.327,0.807c-4.069,0.675-8.555,1.193-12.625,0.28c-3.66-0.822-3.171-6.075-2.713-8.77
-				c0.654-3.844,5.405-2.933,8.262-2.986c3.766-0.069,7.532-0.14,11.297-0.21C275.293,493.295,276.573,492.124,275.268,492.148z"/>
-		</g>
-	</g>
-	<g>
-		<path d="M313.78,509.434l-0.287-1.8c-0.072-0.433-0.217-1.152-1.225-1.152c-0.504,0-1.008,0.36-1.44,0.648l-3.456,2.304
-			c0,0-2.736,0.504-6.12,0.504c-10.945,0-12.745-7.057-12.745-17.209c0-9.793,1.728-17.21,13.321-17.21
-			c5.472,0,10.585,0.937,11.521,1.152v-10.657c0-0.936-0.144-1.152-1.008-1.296l-1.872-0.36v-2.88h7.416v43.42
-			c0,0.936,0.145,1.151,1.009,1.296l1.872,0.36v2.88H313.78z M313.349,480.055c-0.864-0.144-5.688-0.863-10.009-0.863
-			c-8.353,0-10.081,2.808-10.081,13.537c0,10.801,1.729,13.465,10.081,13.465c5.04,0,10.009-1.44,10.009-1.44V480.055z"/>
-		<g>
-			<path fill="#D5BF51" d="M314.746,509.175c-0.581-3.641-2.268-4.065-5.256-2.073c-5.341,3.561-14.478,3.178-18.064-2.804
-				c-3.302-5.506-2.624-15.643-0.708-21.535c2.885-8.871,15.265-6.816,22.308-5.345c0.464,0.097,1.305-0.238,1.305-0.807
-				c0-3.553,0-7.105,0-10.657c0-1.535-0.792-1.814-2.153-2.076c-1.279-0.246-0.727,0.253-0.727-1.446
-				c0-1.191-1.684-0.197,0.104-0.197c2.037,0,4.074,0,6.111,0c-1.085,0-0.764-0.949-0.764,1.062c0,4.07,0,8.141,0,12.211
-				c0,9.082,0,18.165,0,27.248c0,1.658-0.459,3.774,1.669,4.184c1.409,0.271,1.212-0.318,1.212,0.688c0,0.522,0,1.045,0,1.567
-				c1.067-0.42,1.216-0.592,0.446-0.518c-2.126,0-4.251,0-6.377,0c-1.14,0-1.451,1.514-0.145,1.514c2.328,0,4.656,0,6.985,0
-				c0.407,0,1.055-0.341,1.055-0.816c0-0.96,0-1.92,0-2.88c0-0.404-0.287-0.615-0.66-0.688c-3.322-0.639-2.221-3.876-2.221-6.678
-				c0-5.005,0-10.01,0-15.015c0-7.565,0-15.131,0-22.696c0-0.517-0.46-0.697-0.91-0.697c-2.472,0-4.943,0-7.416,0
-				c-0.407,0-1.055,0.341-1.055,0.816c0,0.96,0,1.92,0,2.88c0,0.404,0.287,0.615,0.66,0.687c2.995,0.577,2.22,2.763,2.22,5.305
-				c0,2.107,0,4.215,0,6.322c0.436-0.269,0.87-0.537,1.305-0.807c-6.704-1.399-15.418-2.647-21.354,1.589
-				c-5.802,4.141-5.07,14.18-4.59,20.271c0.535,6.777,3.63,11.606,10.55,12.743c2.623,0.432,7.088,0.746,9.537-0.474
-				c2.108-1.051,4.402-4.11,5.001-0.36C312.968,510.653,314.903,510.157,314.746,509.175z"/>
-			<path fill="#D5BF51" d="M313.671,479.309c-5.312-0.831-12.02-2.008-17.098,0.389c-4.561,2.153-4.262,8.981-4.296,13.091
-				c-0.034,4.073-0.31,10.93,4.19,13.002c5.105,2.351,11.887,1.113,17.074-0.324c0.328-0.091,0.791-0.376,0.791-0.772
-				c0-8.233,0-16.466,0-24.698c0-1.067-1.965-0.819-1.965,0.119c0,4.441,0,8.883,0,13.323c0,2.851,0,5.701,0,8.553
-				c0,0.795,0,1.59,0,2.385c0,0.305,1.102-0.384-1.047,0.12c-4.116,0.967-11.642,2.439-15.054-1.145
-				c-2.693-2.829-2.041-8.676-2.012-12.161c0.032-3.849,0.046-9.228,4.372-10.738c4.331-1.512,9.976-0.342,14.401,0.35
-				C314.034,480.959,314.967,479.511,313.671,479.309z"/>
-		</g>
-	</g>
-	<g>
-		<path d="M341.14,498.488c-3.456,0-6.049-0.359-8.064-1.008c0,0-1.152,2.448-1.152,4.608c0,2.809,0.216,3.456,5.904,3.456h9.577
-			c8.856,0,11.521,1.44,11.521,9.001c0,7.057-2.592,9.217-17.569,9.217c-7.633,0-13.104-1.008-13.104-1.008l0.359-3.744
-			c0,0,6.265,1.008,12.602,1.008c2.376,0,6.696-0.072,8.784-0.576c3.169-0.648,4.393-1.656,4.393-4.824
-			c0-3.889-1.152-4.681-4.608-5.041c-1.368-0.144-3.096-0.144-4.536-0.144h-9.648c-4.104,0-8.354-0.288-8.137-5.545l0.144-1.943
-			l3.385-3.745c0.216-0.287,0.432-0.575,0.432-0.936c0-0.576-0.36-0.792-0.72-1.008c-2.593-1.585-3.601-3.457-3.601-9.289
-			c0-8.856,2.376-11.593,14.401-11.449l19.01,0.216v2.809l-6.625,0.432c-0.504,0.072-1.152,0-1.368,0.576
-			c-0.216,0.504,0,1.009,0.288,1.44l3.024,4.32c0,0,0.144,1.152,0.144,1.656C355.973,496.328,352.661,498.488,341.14,498.488z
-			 M350.429,478.975c0,0-5.329-0.216-8.929-0.216c-7.489,0-9.577,0.792-9.577,8.209c0,7.849,2.305,8.209,9.577,8.209
-			c7.2,0,9.721-0.505,9.721-8.354C351.221,484.447,351.005,481.279,350.429,478.975z"/>
-		<g>
-			<path fill="#D5BF51" d="M341.772,498.118c-2.703-0.048-5.299-0.257-7.899-0.994c-0.383-0.109-1.565,0.121-1.748,0.536
-				c-0.885,2.007-1.856,5.024-0.729,7.117c0.821,1.522,4.679,1.138,5.92,1.138c5.361,0,12.703-1.086,17.624,1.214
-				c3.064,1.432,3.05,5.674,3.005,8.499c-0.075,4.762-2.631,6.688-7.385,7.365c-5.815,0.827-12,0.495-17.821-0.091
-				c-1.197-0.121-2.392-0.286-3.578-0.484c0.083,0.014,0.381-3.31,0.415-3.666c-0.588,0.205-1.176,0.41-1.764,0.615
-				c6.447,1.002,13.014,1.27,19.519,0.769c4.87-0.375,8.194-1.776,7.968-7.095c-0.449-10.511-26.418,3.036-26.874-9.411
-				c-0.088-2.408,1.775-3.687,3.287-5.359c1.554-1.719-0.233-2.328-1.47-3.349c-2.738-2.258-2.25-7.298-2.119-10.387
-				c0.21-4.962,2.211-7.766,7.514-8.454c4.938-0.642,10.265-0.136,15.227-0.079c2.505,0.028,5.009,0.057,7.513,0.085
-				c2.016,0.023,1.168,0.645,1.168,2.715c0.464-0.206,0.928-0.412,1.393-0.617c-1.272,0.083-11.569-0.465-8.968,3.251
-				c2.132,3.045,3.164,4.263,2.96,8.073C354.522,497.065,348.362,498.062,341.772,498.118c-0.374,0.004-2.188,0.748-1.266,0.74
-				c6.418-0.056,14.427-0.265,16.09-7.813c0.947-4.3,0-6.276-2.354-9.638c-2.364-3.378,3.507-2.354,5.84-2.506
-				c0.277-0.018,1.393-0.207,1.393-0.617c0-0.937,0-1.872,0-2.809c0-0.111-0.288-0.11-0.333-0.111
-				c-8.16-0.093-16.514-0.712-24.653-0.001c-3.279,0.286-6.786,1.249-8.813,4.004c-2.713,3.686-2.301,12.035,0.137,15.545
-				c0.491,0.709,1.273,1.197,1.975,1.664c1.545,1.032-0.824,2.992-1.362,3.587c-1.511,1.672-1.769,1.78-1.932,3.984
-				c-0.416,5.625,5.445,5.656,9.473,5.656c3.97,0,14.502-1.449,16.91,1.864c0.942,1.297,0.566,4.021,0.246,5.357
-				c-0.76,3.164-8.651,2.61-11.277,2.623c-4.151,0.021-8.334-0.357-12.437-0.994c-0.37-0.058-1.715,0.108-1.764,0.615
-				c-0.12,1.248-0.239,2.496-0.359,3.744c0.036,0.062,0.092,0.095,0.168,0.098c7.939,1.405,16.665,1.676,24.608,0.222
-				c5.215-0.954,7.745-3.742,7.829-9.046c0.13-8.188-5.346-9.112-12.318-9.112c-2.997,0-5.993,0-8.99,0
-				c-1.896,0-5.055,0.455-5.625-1.907c-0.471-1.945,0.285-4.189,1.068-5.967c-0.583,0.179-1.166,0.357-1.748,0.536
-				c2.717,0.771,5.418,0.971,8.229,1.021C340.883,498.865,342.701,498.135,341.772,498.118z"/>
-			<path fill="#D5BF51" d="M351.062,478.604c-5.372-0.213-11.929-1.271-17.023,0.952c-2.016,0.88-2.647,2.863-2.943,4.88
-				c-0.376,2.559-0.699,7.359,1.396,9.377c1.707,1.645,4.196,1.648,6.432,1.717c2.957,0.091,6.096,0.168,8.96-0.688
-				c6.244-1.864,4.533-11.434,3.513-16.127c-0.08-0.368-2.021,0.104-1.932,0.518c0.854,3.927,2.257,11.462-1.06,14.635
-				c-1.709,1.635-6.023,0.952-8.216,0.923c-2.195-0.03-4.688-0.104-6.148-1.979c-2.089-2.682-1.611-9.184-0.074-11.938
-				c1.864-3.34,12.446-1.664,15.831-1.529C350.174,479.359,351.994,478.642,351.062,478.604z"/>
-		</g>
-	</g>
-	<g>
-		<path d="M381.387,506.193c6.553,0,11.809-1.152,11.809-1.152l0.648,3.456c0,0-4.681,1.44-12.169,1.44
-			c-11.018,0-16.562-2.016-16.562-17.209c0-4.537,0.647-8.785,1.728-11.306c1.944-4.536,6.265-5.904,13.97-5.904
-			c5.977,0,10.297,0.648,12.529,3.385c1.8,2.304,2.52,5.472,2.52,11.089c0,1.296-0.071,2.52-0.144,3.888
-			c-1.225,0.072-5.761,0.576-11.161,0.576c-5.185,0-9.937-0.288-9.937-0.288l-2.232-1.512c-0.433-0.288-0.72-0.504-1.296-0.504
-			c-1.368,0-1.225,1.296-1.225,2.232c0,3.023,0.072,6.84,2.16,9.145C373.754,505.474,376.922,506.193,381.387,506.193z
-			 M381.242,479.119c-10.585,0-10.873,1.872-11.305,11.161c4.969,0.792,10.801,0.647,10.801,0.647s5.977,0.145,10.297-0.288
-			C391.035,480.199,389.02,479.119,381.242,479.119z"/>
-		<g>
-			<path fill="#D5BF51" d="M381.023,506.789c4.132-0.069,8.265-0.359,12.316-1.21c-0.37-0.093-0.74-0.187-1.11-0.279
-				c0.216,1.152,0.433,2.304,0.648,3.456c0.273-0.266,0.548-0.531,0.821-0.797c-9.129,2.683-24.03,3.769-26.908-7.652
-				c-1.729-6.857-2.097-20.529,6.043-23.354c3.875-1.345,8.527-0.949,12.533-0.624c5.109,0.414,7.958,2.86,9.068,7.745
-				c0.438,1.927,0.425,4.092,0.454,6.025c0.02,1.316,0.577,3.219-0.336,3.295c-3.848,0.322-7.689,0.479-11.552,0.455
-				c-1.704-0.012-6.402,0.65-7.854-0.333c-1.106-0.749-2.291-1.883-3.694-1.959c-4.104-0.221-2.497,6.446-2.12,8.617
-				C370.332,505.934,376.209,506.686,381.023,506.789c0.791,0.017,2.036-1.163,0.727-1.191c-6.383-0.137-10.238-2.022-10.855-8.74
-				c-0.518-5.624,0.363-2.825,3.143-2.126c3.093,0.778,7.001,0.3,10.154,0.32c3.814,0.024,7.607-0.305,11.406-0.584
-				c0.328-0.024,1.065-0.268,1.088-0.694c0.223-4.27,0.733-9.685-1.409-13.582c-2.249-4.091-6.388-4.828-10.719-5.179
-				c-9.217-0.748-17.73,0.566-19.82,10.596c-1.57,7.54-1.768,20.119,6.734,23.647c6.452,2.678,16.02,1.689,22.517-0.221
-				c0.332-0.098,0.904-0.355,0.821-0.797c-0.216-1.152-0.433-2.304-0.648-3.456c-0.083-0.442-0.827-0.339-1.11-0.279
-				c-3.705,0.777-7.525,1.031-11.301,1.095C380.97,505.611,379.719,506.811,381.023,506.789z"/>
-			<path fill="#D5BF51" d="M381.605,478.523c-3.314,0.024-7.312-0.151-10.078,2.003c-2.745,2.139-2.413,6.805-2.56,9.86
-				c-0.018,0.367,0.305,0.446,0.606,0.489c6.934,0.979,14.367,0.95,21.343,0.352c0.308-0.026,1.092-0.27,1.088-0.694
-				c-0.028-3.296,0.172-7.27-2.057-9.962C388.101,478.34,384.153,478.541,381.605,478.523c-0.787-0.005-2.034,1.183-0.727,1.191
-				c2.452,0.016,5.372-0.16,7.283,1.642c2.2,2.074,1.881,6.736,1.903,9.39c0.362-0.231,0.725-0.463,1.088-0.694
-				c-5.939,0.51-12.15,0.48-18.098-0.021c-3.286-0.277-2.014-4.664-1.318-7.066c1.062-3.673,6.179-3.228,9.142-3.249
-				C381.662,479.709,382.912,478.514,381.605,478.523z"/>
-		</g>
-	</g>
-	<g>
-		<path d="M414.793,509.722c-3.672,0-5.544-0.576-6.696-1.729c-1.584-1.512-1.872-4.32-1.872-8.713v-19.801h-6.337v-3.528h6.337
-			v-8.93h4.536v8.93h9.577v3.528h-9.577v21.241c0,2.16,0.072,3.601,1.08,4.536c0.792,0.721,2.16,1.009,4.753,1.009
-			c1.656,0,3.672-0.36,3.672-0.36l0.433,3.168C420.698,509.073,417.098,509.722,414.793,509.722z"/>
-		<g>
-			<path fill="#D5BF51" d="M415.127,509.02c-6.973-0.198-7.862-3.569-7.924-9.812c-0.017-1.711,0-3.423,0-5.134
-				c0-4.89,0-9.778,0-14.667c0-0.507-0.467-0.634-0.893-0.634c-1.369,0-2.737,0-4.106,0c-0.685,0-1.369,0-2.053,0
-				c1.083,0,0.715,0.405,0.715-1.131c0-1.831,0.372-0.983-0.887-0.983c0.685,0,1.368,0,2.053,0c1.369,0,2.737,0,4.106,0
-				c0.379,0,1.064-0.322,1.064-0.78c0-1.806,0-3.61,0-5.415c0-1.007,0-2.014,0-3.021c0-0.134-1.543,0.286,0.721,0.286
-				c0.617,0,1.234,0,1.852,0c0.874,0.133,0.876-0.034,0.007-0.503c0,0.824,0,1.648,0,2.473c0,2.109,0,4.218,0,6.326
-				c0,0.507,0.467,0.634,0.893,0.634c1.756,0,3.512,0,5.267,0c1.105,0,2.21,0,3.314,0c1.267,0,0.104-0.791,0.104,1.131
-				c0,0.501,0,1.002,0,1.503c1.087-0.401,1.287-0.574,0.599-0.52c-0.968,0-1.937,0-2.905,0c-2.068,0-4.138,0-6.206,0
-				c-0.379,0-1.064,0.323-1.064,0.78c0,6.616,0,13.232,0,19.848c0,2.206-0.495,5.881,2.051,6.889
-				c1.902,0.754,3.931,0.717,5.947,0.627c0.395-0.041,0.79-0.082,1.185-0.124c1.033-0.009,1.16-0.108,0.38-0.299
-				c0.129,0.946,0.259,1.893,0.388,2.839c0.263-0.307,0.525-0.613,0.788-0.921c-1.87,0.324-3.745,0.53-5.642,0.604
-				c-1.097,0.043-1.488,1.465-0.172,1.414c2.077-0.081,4.122-0.34,6.169-0.693c0.431-0.075,0.851-0.467,0.788-0.921
-				c-0.145-1.056-0.288-2.112-0.433-3.168c-0.048-0.354-0.594-0.495-0.88-0.448c-2.488,0.411-7.364,1.411-8.463-2.009
-				c-0.344-1.07-0.149-3.231-0.149-4.503c0-3.87,0-7.741,0-11.612c0-1.688,0-3.376,0-5.063c0-0.564,0-1.128,0-1.692
-				c0-1.478,0.195-0.132-0.942-0.132c3.151,0,6.304,0,9.455,0c0.379,0,1.064-0.323,1.064-0.78c0-1.177,0-2.353,0-3.528
-				c0-0.507-0.467-0.634-0.893-0.634c-3.151,0-6.304,0-9.455,0c0.998,0,0.771,0.99,0.771-0.421c0-1.137,0-2.273,0-3.41
-				c0-1.488,0-2.977,0-4.465c0-0.507-0.467-0.634-0.893-0.634c-1.512,0-3.024,0-4.536,0c-0.379,0-1.064,0.322-1.064,0.78
-				c0,2.626,0,5.251,0,7.876c0,1.552-0.223,0.273,0.887,0.273c-0.685,0-1.368,0-2.053,0c-1.369,0-2.738,0-4.106,0
-				c-0.379,0-1.064,0.322-1.064,0.78c0,1.177,0,2.353,0,3.528c0,0.507,0.467,0.634,0.893,0.634c1.656,0,3.312,0,4.968,0
-				c1.607,0,0.477-1.158,0.477,0.833c0,4.4,0,8.801,0,13.201c0,3.202-0.073,6.412,0.188,9.607c0.436,5.339,4.293,6.461,9.025,6.596
-				C415.39,510.45,416.434,509.057,415.127,509.02z"/>
-		</g>
-	</g>
-	<g>
-		<path d="M427.32,475.951h6.625l0.287,1.8c0.072,0.432,0.217,1.152,1.152,1.152c0.576,0,1.08-0.36,1.513-0.648l3.456-2.304
-			c0,0,3.96-0.433,5.904-0.433c1.872,0,3.24,0.072,4.608,0.288l-0.288,4.32c0,0-2.664-0.36-5.616-0.36
-			c-6.049,0-10.225,0.721-10.225,0.721v24.481c0,0.864,0.144,1.08,1.008,1.297l1.872,0.359v2.809H427.32v-2.88l1.872-0.36
-			c0.864-0.145,1.008-0.36,1.008-1.296v-24.482c0-0.864-0.144-1.08-1.008-1.296l-1.872-0.36V475.951z"/>
-		<g>
-			<path fill="#D5BF51" d="M426.688,476.322c1.104,0,2.209,0,3.312,0c1.923,0,2.895-0.645,3.192,1.221
-				c0.318,2,1.763,1.882,3.328,1.332c0.653-0.229,1.224-0.69,1.796-1.071c0.695-0.464,1.391-0.927,2.086-1.391
-				c1.369-0.912-1.497,0,0.624-0.213c0.903-0.091,8.909-0.662,8.858,0.104c-0.091,1.36-0.182,2.721-0.272,4.081
-				c0.588-0.206,1.176-0.411,1.764-0.617c-5.411-0.674-11.068-0.49-16.451,0.393c-0.245,0.04-1.153,0.223-1.153,0.585
-				c0,5.847,0,11.692,0,17.539c0,2.296,0,4.593,0,6.89c0,1.18,0.665,1.351,1.661,1.542c1.735,0.333,1.219,1.445,1.219,2.976
-				c0.532-0.21,1.065-0.42,1.599-0.63c-2.347,0-4.693,0-7.039,0c-3.258,0-2.925,0.422-2.925-2.768
-				c-0.385,0.195-0.77,0.39-1.153,0.585c1.944-0.375,4.033-0.32,4.033-2.698c0-3.763,0-7.525,0-11.288c0-3.98,0-7.961,0-11.94
-				c0-1.112,0.05-1.611-1.062-2.154c-1.997-0.977-1.818-0.598-1.818-3.106c0-0.264-1.932-0.001-1.932,0.518c0,0.936,0,1.872,0,2.808
-				c0.036,0.062,0.092,0.096,0.168,0.1c4.728,0.91,2.712,9.659,2.712,13.302c0,3.267,0,6.532,0,9.799
-				c0,2.003,0.568,3.568-1.727,4.01c-0.245,0.048-1.153,0.219-1.153,0.585c0,0.96,0,1.92,0,2.88c0,0.112,0.288,0.112,0.333,0.112
-				c3.433,0,6.865,0,10.297,0c0.312,0,1.599-0.163,1.599-0.63c0-0.937,0-1.872,0-2.809c-0.036-0.062-0.092-0.096-0.168-0.1
-				c-4.611-0.885-2.712-8.746-2.712-12.312c0-4.576,0-9.151,0-13.727c-0.385,0.195-0.77,0.39-1.153,0.585
-				c4.979-0.817,10.229-0.952,15.23-0.328c0.364,0.045,1.729-0.102,1.764-0.617c0.096-1.44,0.192-2.88,0.288-4.32
-				c-0.036-0.062-0.092-0.096-0.168-0.1c-3.542-0.436-7.883-0.701-11.382,0.236c-2.62,0.702-4.633,4.634-5.371,0.008
-				c-0.019-0.114-0.27-0.112-0.333-0.112c-2.209,0-4.417,0-6.625,0C427.576,475.58,425.762,476.322,426.688,476.322z"/>
-		</g>
-	</g>
-	<g>
-		<path d="M469.01,509.938c-12.89,0-15.193-3.024-15.193-17.209c0-14.186,2.304-17.21,15.193-17.21
-			c12.889,0,15.193,3.024,15.193,17.21C484.203,506.913,481.898,509.938,469.01,509.938z M469.01,479.119
-			c-9.505,0-10.441,1.44-10.441,13.609s0.937,13.608,10.441,13.608s10.44-1.439,10.44-13.608S478.515,479.119,469.01,479.119z"/>
-		<g>
-			<path fill="#D5BF51" d="M469.629,509.516c-3.869-0.033-8.21,0.013-11.41-2.474c-2.979-2.313-3.155-7.126-3.354-10.52
-				c-0.271-4.654-0.304-9.634,0.82-14.18c1.376-5.566,6.627-6.249,11.619-6.392c4.114-0.116,9.086-0.187,12.496,2.463
-				c2.979,2.313,3.155,7.126,3.354,10.521c0.285,4.89,0.393,10.263-1.068,14.98c-1.707,5.512-7.836,5.56-12.667,5.601
-				c-0.557,0.005-2.106,0.854-0.818,0.844c4.206-0.036,8.649-0.177,12.306-2.496c3.656-2.32,3.954-7.48,4.18-11.341
-				c0.319-5.471,0.896-12.976-2.46-17.709c-2.716-3.831-8.896-3.682-12.997-3.717c-3.793-0.032-8.562-0.013-11.916,2.116
-				c-4.041,2.564-4.521,7.305-4.779,11.722c-0.319,5.47-0.896,12.975,2.459,17.708c2.717,3.832,8.897,3.682,12.998,3.717
-				C468.83,510.363,470.605,509.524,469.629,509.516z"/>
-			<path fill="#D5BF51" d="M469.419,478.697c-2.717,0.02-6.237-0.127-8.637,1.464c-2.905,1.926-2.971,5.573-3.121,8.748
-				c-0.215,4.551-1.069,11.401,1.598,15.445c1.779,2.7,6.406,2.385,9.132,2.404c2.756,0.02,6.41,0.151,8.847-1.464
-				c2.905-1.925,2.97-5.572,3.12-8.747c0.215-4.551,1.069-11.401-1.598-15.446c-1.779-2.699-6.405-2.385-9.131-2.404
-				c-0.439-0.003-2.215,0.836-1.238,0.844c2.719,0.02,5.941-0.186,8.115,1.72c1.921,1.683,1.812,5.376,1.92,7.648
-				c0.188,3.988,0.24,8.165-0.359,12.119c-0.617,4.067-3.766,4.782-7.503,4.879c-2.906,0.074-6.713,0.336-9.051-1.711
-				c-1.921-1.683-1.812-5.376-1.92-7.648c-0.188-3.988-0.24-8.165,0.359-12.119c0.695-4.584,4.685-4.859,8.648-4.888
-				C469.157,479.537,470.707,478.688,469.419,478.697z"/>
-		</g>
-	</g>
-	<g>
-		<path d="M491.327,475.951h6.625l0.287,1.8c0.072,0.432,0.217,1.152,1.152,1.152c0.576,0,1.08-0.36,1.513-0.648l3.456-2.304
-			c0,0,3.96-0.433,5.904-0.433c9.937,0,12.529,2.521,12.529,12.241v17.209c0,0.864,0.144,1.152,1.008,1.297l1.872,0.359v2.881
-			h-10.297v-2.881l1.872-0.359c0.864-0.145,1.009-0.433,1.009-1.297v-16.345c0-7.921-1.368-9.433-9.289-9.433
-			c-5.04,0-10.225,1.224-10.225,1.224v24.554c0,0.864,0.144,1.08,1.008,1.297l1.872,0.359v2.809h-10.297v-2.88l1.872-0.36
-			c0.864-0.145,1.008-0.36,1.008-1.296v-24.482c0-0.864-0.144-1.08-1.008-1.296l-1.872-0.36V475.951z"/>
-		<g>
-			<path fill="#D5BF51" d="M490.926,476.401c1.104,0,2.208,0,3.312,0c2.219,0,2.681-0.611,2.961,1.142
-				c0.307,1.922,1.746,2.051,3.355,1.433c2.157-0.829,3.35-2.579,5.627-2.79c4.666-0.433,11.415-0.998,14.255,3.6
-				c2.836,4.591,1.392,13.637,1.392,18.808c0,2.212,0,4.423,0,6.635c0,1.246,0.791,1.374,1.846,1.576
-				c1.606,0.309,1.034,1.528,1.034,2.961c0.456-0.236,0.912-0.473,1.368-0.709c-2.347,0-4.693,0-7.039,0c-1.048,0-2.096,0-3.143,0
-				c0.847,0,0.448-2.036,0.448-2.689c-0.376,0.229-0.751,0.456-1.127,0.685c3.057-0.587,4.008-0.891,4.008-4.041
-				c0-4.665,0-9.33,0-13.995c0-3.872,0.168-8.227-4.051-9.717c-4.911-1.734-11.641-0.357-16.528,0.747
-				c-0.283,0.064-0.866,0.264-0.866,0.629c0,5.863,0,11.727,0,17.591c0,2.303,0,4.606,0,6.91c0,1,0.367,1.345,1.36,1.536
-				c2.023,0.388,1.52,1.063,1.52,2.981c0.456-0.236,0.912-0.473,1.367-0.709c-2.346,0-4.692,0-7.038,0c-1.048,0-2.095,0-3.143,0
-				c0.847,0,0.448-2.035,0.448-2.688c-0.376,0.229-0.751,0.456-1.127,0.685c2.022-0.39,4.007-0.364,4.007-2.798
-				c0-3.763,0-7.525,0-11.288c0-3.98,0-7.961,0-11.94c0-1.238-0.012-1.668-1.189-2.244c-1.947-0.952-1.69-0.509-1.69-3.017
-				c0-0.429-1.932-0.115-1.932,0.518c0,0.936,0,1.872,0,2.808c0,0.141,0.274,0.171,0.353,0.187
-				c4.493,0.864,2.527,9.749,2.527,13.215c0,3.032,0,6.064,0,9.097c0,2.119,0.685,4.143-1.753,4.612
-				c-0.329,0.063-1.127,0.229-1.127,0.685c0,0.96,0,1.92,0,2.88c0,0.204,0.461,0.191,0.564,0.191c3.432,0,6.864,0,10.297,0
-				c0.368,0,1.367-0.205,1.367-0.709c0-0.937,0-1.872,0-2.809c0-0.141-0.274-0.171-0.353-0.187
-				c-4.387-0.842-2.527-8.864-2.527-12.257c0-4.589,0-9.178,0-13.767c-0.289,0.21-0.578,0.419-0.866,0.629
-				c4.669-1.055,12.363-2.802,16.59,0.269c2.398,1.744,1.858,6.377,1.858,8.95c0,3.157,0,6.313,0,9.471
-				c0,2.364,1.021,6.191-1.754,6.725c-0.329,0.063-1.127,0.229-1.127,0.685c0,0.96,0,1.921,0,2.881c0,0.204,0.461,0.191,0.563,0.191
-				c3.433,0,6.865,0,10.297,0c0.369,0,1.368-0.205,1.368-0.709c0-0.96,0-1.921,0-2.881c0-0.141-0.274-0.171-0.353-0.187
-				c-4.463-0.856-2.527-9.524-2.527-13c0-4.711,0.763-10.425-1.803-14.578c-2.77-4.483-10.627-3.669-14.972-3.317
-				c-1.185,0.097-2.543,0.032-3.569,0.716c-2.268,1.512-3.912,3.367-4.498-0.308c-0.033-0.208-0.432-0.191-0.564-0.191
-				c-2.208,0-4.416,0-6.625,0C491.135,475.501,489.631,476.401,490.926,476.401z"/>
-		</g>
-	</g>
-</g>
-</svg>
diff --git a/doc/manual/figs/architecture.png b/doc/manual/figs/architecture.png
deleted file mode 100644
index 1a2ec67..0000000
Binary files a/doc/manual/figs/architecture.png and /dev/null differ
diff --git a/doc/manual/figs/architecture.svg b/doc/manual/figs/architecture.svg
deleted file mode 100644
index f9923ed..0000000
--- a/doc/manual/figs/architecture.svg
+++ /dev/null
@@ -1,748 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<!-- Created with Inkscape (http://www.inkscape.org/) -->
-
-<svg
-   xmlns:dc="http://purl.org/dc/elements/1.1/"
-   xmlns:cc="http://creativecommons.org/ns#"
-   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
-   xmlns:svg="http://www.w3.org/2000/svg"
-   xmlns="http://www.w3.org/2000/svg"
-   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
-   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
-   width="810"
-   height="810"
-   id="svg2"
-   version="1.1"
-   inkscape:version="0.48.2 r9819"
-   sodipodi:docname="architecture.svg">
-  <sodipodi:namedview
-     id="base"
-     pagecolor="#ffffff"
-     bordercolor="#666666"
-     borderopacity="1.0"
-     inkscape:pageopacity="0.0"
-     inkscape:pageshadow="2"
-     inkscape:zoom="1.1559256"
-     inkscape:cx="606.42175"
-     inkscape:cy="406.0602"
-     inkscape:document-units="px"
-     inkscape:current-layer="layer1"
-     showgrid="false"
-     inkscape:window-width="1920"
-     inkscape:window-height="1176"
-     inkscape:window-x="0"
-     inkscape:window-y="24"
-     inkscape:window-maximized="1"
-     units="in" />
-  <defs
-     id="defs4">
-    <marker
-       style="overflow:visible;"
-       id="Arrow1Mend"
-       refX="0.0"
-       refY="0.0"
-       orient="auto"
-       inkscape:stockid="Arrow1Mend">
-      <path
-         transform="scale(0.4) rotate(180) translate(10,0)"
-         style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;marker-start:none;"
-         d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
-         id="path4320" />
-    </marker>
-    <marker
-       style="overflow:visible"
-       id="Arrow1Mstart"
-       refX="0.0"
-       refY="0.0"
-       orient="auto"
-       inkscape:stockid="Arrow1Mstart">
-      <path
-         transform="scale(0.4) translate(10,0)"
-         style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;marker-start:none"
-         d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
-         id="path4317" />
-    </marker>
-    <marker
-       style="overflow:visible"
-       id="Arrow2Lstart"
-       refX="0"
-       refY="0"
-       orient="auto"
-       inkscape:stockid="Arrow2Lstart">
-      <path
-         inkscape:connector-curvature="0"
-         transform="matrix(1.1,0,0,1.1,1.1,0)"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         style="font-size:12px;fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round"
-         id="path3846" />
-    </marker>
-    <marker
-       style="overflow:visible"
-       id="Arrow1Lstart"
-       refX="0"
-       refY="0"
-       orient="auto"
-       inkscape:stockid="Arrow1Lstart">
-      <path
-         inkscape:connector-curvature="0"
-         transform="matrix(0.8,0,0,0.8,10,0)"
-         style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt;marker-start:none"
-         d="M 0,0 5,-5 -12.5,0 5,5 0,0 z"
-         id="path3828" />
-    </marker>
-    <marker
-       style="overflow:visible"
-       id="Arrow2Lend"
-       refX="0"
-       refY="0"
-       orient="auto"
-       inkscape:stockid="Arrow2Lend">
-      <path
-         inkscape:connector-curvature="0"
-         transform="matrix(-1.1,0,0,-1.1,-1.1,0)"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         style="font-size:12px;fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round"
-         id="path3849" />
-    </marker>
-    <marker
-       style="overflow:visible"
-       id="TriangleOutL"
-       refX="0"
-       refY="0"
-       orient="auto"
-       inkscape:stockid="TriangleOutL">
-      <path
-         inkscape:connector-curvature="0"
-         transform="scale(0.8,0.8)"
-         style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt;marker-start:none"
-         d="m 5.77,0 -8.65,5 0,-10 8.65,5 z"
-         id="path3971" />
-    </marker>
-    <marker
-       style="overflow:visible"
-       id="Arrow2Lend-0"
-       refX="0"
-       refY="0"
-       orient="auto"
-       inkscape:stockid="Arrow2Lend">
-      <path
-         transform="matrix(-1.1,0,0,-1.1,-1.1,0)"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         style="font-size:12px;fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round"
-         id="path3849-8"
-         inkscape:connector-curvature="0" />
-    </marker>
-    <marker
-       style="overflow:visible"
-       id="Arrow2Lend-2"
-       refX="0"
-       refY="0"
-       orient="auto"
-       inkscape:stockid="Arrow2Lend">
-      <path
-         transform="matrix(-1.1,0,0,-1.1,-1.1,0)"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         style="font-size:12px;fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round"
-         id="path3849-0"
-         inkscape:connector-curvature="0" />
-    </marker>
-    <marker
-       style="overflow:visible;"
-       id="Arrow1Mendy"
-       refX="0.0"
-       refY="0.0"
-       orient="auto"
-       inkscape:stockid="Arrow1Mendy">
-      <path
-         transform="scale(0.4) rotate(180) translate(10,0)"
-         style="marker-start:none;stroke:#000000;stroke-width:1.0pt;fill:#000000;fill-rule:evenodd"
-         d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
-         id="path3104" />
-    </marker>
-    <marker
-       style="overflow:visible;"
-       id="Arrow1Mendyf"
-       refX="0.0"
-       refY="0.0"
-       orient="auto"
-       inkscape:stockid="Arrow1Mendyf">
-      <path
-         transform="scale(0.4) rotate(180) translate(10,0)"
-         style="fill-rule:evenodd;marker-start:none;stroke:#0f0fff;stroke-width:1.0pt;fill:#0f0fff"
-         d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
-         id="path4334" />
-    </marker>
-    <marker
-       style="overflow:visible;"
-       id="Arrow1MendyfL"
-       refX="0.0"
-       refY="0.0"
-       orient="auto"
-       inkscape:stockid="Arrow1MendyfL">
-      <path
-         transform="scale(0.4) rotate(180) translate(10,0)"
-         style="marker-start:none;stroke:#dcdcdc;stroke-width:1.0pt;fill:#dcdcdc;fill-rule:evenodd"
-         d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
-         id="path4543" />
-    </marker>
-    <marker
-       style="overflow:visible;"
-       id="Arrow1MendyfLA"
-       refX="0.0"
-       refY="0.0"
-       orient="auto"
-       inkscape:stockid="Arrow1MendyfLA">
-      <path
-         transform="scale(0.4) rotate(180) translate(10,0)"
-         style="fill-rule:evenodd;marker-start:none;stroke:#c0dcdc;stroke-width:1.0pt;fill:#c0dcdc"
-         d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
-         id="path4741" />
-    </marker>
-    <marker
-       style="overflow:visible;"
-       id="Arrow1MendyfLT"
-       refX="0.0"
-       refY="0.0"
-       orient="auto"
-       inkscape:stockid="Arrow1MendyfLT">
-      <path
-         transform="scale(0.4) rotate(180) translate(10,0)"
-         style="fill-rule:evenodd;marker-start:none;stroke:#c0dcdc;stroke-width:1.0pt;fill:#c0dcdc"
-         d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
-         id="path4744" />
-    </marker>
-    <marker
-       style="overflow:visible;"
-       id="Arrow1MendyfL2"
-       refX="0.0"
-       refY="0.0"
-       orient="auto"
-       inkscape:stockid="Arrow1MendyfL2">
-      <path
-         transform="scale(0.4) rotate(180) translate(10,0)"
-         style="fill-rule:evenodd;marker-start:none;stroke:#c0dcdc;stroke-width:1.0pt;fill:#c0dcdc"
-         d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
-         id="path4747" />
-    </marker>
-  </defs>
-  <metadata
-     id="metadata7">
-    <rdf:RDF>
-      <cc:Work
-         rdf:about="">
-        <dc:format>image/svg+xml</dc:format>
-        <dc:type
-           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
-        <dc:title />
-      </cc:Work>
-    </rdf:RDF>
-  </metadata>
-  <g
-     transform="translate(0,-242.362)"
-     id="layer1"
-     inkscape:groupmode="layer"
-     inkscape:label="Layer 1">
-    <rect
-       y="525.13312"
-       x="443.63504"
-       height="83.438591"
-       width="84.852814"
-       id="rect2985"
-       style="opacity:1;fill:#c6eaea;fill-opacity:1;fill-rule:nonzero;stroke:#ffffff;stroke-width:0.69999999000000002;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
-    <text
-       sodipodi:linespacing="125%"
-       id="text4821"
-       y="542.81079"
-       x="457.17929"
-       style="font-size:12.60000038px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans;-inkscape-font-specification:Bitstream Vera Sans"
-       xml:space="preserve"><tspan
-         y="542.81079"
-         x="457.17929"
-         id="tspan4823"
-         sodipodi:role="line">Gadget 1</tspan></text>
-    <rect
-       y="525.13312"
-       x="543.36841"
-       height="83.438591"
-       width="84.852814"
-       id="rect2985-7"
-       style="opacity:1;fill:#c6eaea;fill-opacity:1;fill-rule:nonzero;stroke:#ffffff;stroke-width:0.69999999000000002;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
-    <text
-       sodipodi:linespacing="125%"
-       id="text4821-3"
-       y="542.75134"
-       x="556.96179"
-       style="font-size:12.60000038px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans;-inkscape-font-specification:Bitstream Vera Sans"
-       xml:space="preserve"><tspan
-         y="542.75134"
-         x="556.96179"
-         id="tspan4823-1"
-         sodipodi:role="line">Gadget 2</tspan></text>
-    <rect
-       y="525.13312"
-       x="643.10162"
-       height="83.438591"
-       width="84.852814"
-       id="rect2985-7-9"
-       style="opacity:1;fill:#c6eaea;fill-opacity:1;fill-rule:nonzero;stroke:#ffffff;stroke-width:0.69999999000000002;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
-    <text
-       sodipodi:linespacing="125%"
-       id="text4821-0"
-       y="542.04425"
-       x="656.56897"
-       style="font-size:12.60000038px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans;-inkscape-font-specification:Bitstream Vera Sans"
-       xml:space="preserve"><tspan
-         y="542.04425"
-         x="656.56897"
-         id="tspan4823-8"
-         sodipodi:role="line">Gadget 3</tspan></text>
-    <rect
-       y="407.16122"
-       x="190.73268"
-       height="257.52689"
-       width="239.8537"
-       id="rect2985-0"
-       style="opacity:1;fill:#c6eaea;fill-opacity:1;fill-rule:nonzero;stroke:none" />
-    <text
-       sodipodi:linespacing="125%"
-       id="text4821-1"
-       y="427.9028"
-       x="235.80434"
-       style="font-size:12.60000038px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans;-inkscape-font-specification:Bitstream Vera Sans"
-       xml:space="preserve"><tspan
-         y="427.9028"
-         x="235.80434"
-         id="tspan4823-2"
-         sodipodi:role="line">GadgetStreamController</tspan></text>
-    <rect
-       style="fill:#97d2ea;fill-opacity:1;fill-rule:nonzero;stroke:none"
-       id="rect14081"
-       width="84.852814"
-       height="27.909048"
-       x="325.49469"
-       y="439.86765" />
-    <text
-       sodipodi:linespacing="125%"
-       id="text4821-4"
-       y="459.42972"
-       x="339.05682"
-       style="font-size:12.60000038px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans;-inkscape-font-specification:Bitstream Vera Sans"
-       xml:space="preserve"><tspan
-         y="459.42972"
-         x="339.05682"
-         id="tspan4823-4"
-         sodipodi:role="line">Reader 1</tspan></text>
-    <rect
-       y="476.23312"
-       x="324.98962"
-       height="27.909048"
-       width="84.852814"
-       id="rect2985-8-6"
-       style="opacity:1;fill:#97d2ea;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.40484342000000001;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
-    <text
-       sodipodi:linespacing="125%"
-       id="text4821-4-1"
-       y="494.88498"
-       x="339.10602"
-       style="font-size:12.60000038px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans;-inkscape-font-specification:Bitstream Vera Sans"
-       xml:space="preserve"><tspan
-         y="494.88498"
-         x="339.10602"
-         id="tspan4823-4-3"
-         sodipodi:role="line">Reader 2</tspan></text>
-    <rect
-       y="512.26196"
-       x="324.98962"
-       height="27.909048"
-       width="84.852814"
-       id="rect2985-8-6-0"
-       style="opacity:1;fill:#97d2ea;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.40484342000000001;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
-    <text
-       sodipodi:linespacing="125%"
-       id="text4821-4-1-1"
-       y="530.91382"
-       x="338.97992"
-       style="font-size:12.60000038000000089px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans;-inkscape-font-specification:Bitstream Vera Sans"
-       xml:space="preserve"><tspan
-         y="530.91382"
-         x="338.97992"
-         id="tspan4823-4-3-7"
-         sodipodi:role="line">Reader 3</tspan></text>
-    <rect
-       y="548.93774"
-       x="215.44989"
-       height="27.909048"
-       width="84.852814"
-       id="rect2985-8-8"
-       style="opacity:1;fill:#97d2ea;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.40484342000000001;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
-    <text
-       sodipodi:linespacing="125%"
-       id="text4821-4-0"
-       y="567.5896"
-       x="233.00238"
-       style="font-size:12.60000038px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans;-inkscape-font-specification:Bitstream Vera Sans"
-       xml:space="preserve"><tspan
-         y="567.5896"
-         x="233.00238"
-         id="tspan4823-4-8"
-         sodipodi:role="line">Writer 1</tspan></text>
-    <rect
-       y="584.06647"
-       x="215.44989"
-       height="27.909048"
-       width="84.852814"
-       id="rect2985-8-6-9"
-       style="opacity:1;fill:#97d2ea;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.40484342000000001;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
-    <text
-       sodipodi:linespacing="125%"
-       id="text4821-4-1-7"
-       y="602.71832"
-       x="233.05161"
-       style="font-size:12.60000038px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans;-inkscape-font-specification:Bitstream Vera Sans"
-       xml:space="preserve"><tspan
-         y="602.71832"
-         x="233.05161"
-         id="tspan4823-4-3-0"
-         sodipodi:role="line">Writer 2</tspan></text>
-    <rect
-       y="619.74835"
-       x="215.44989"
-       height="27.909048"
-       width="84.852814"
-       id="rect2985-8-6-0-8"
-       style="opacity:1;fill:#97d2ea;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.40484342000000001;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
-    <text
-       sodipodi:linespacing="125%"
-       id="text4821-4-1-1-8"
-       y="638.40021"
-       x="232.92548"
-       style="font-size:12.60000038px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans;-inkscape-font-specification:Bitstream Vera Sans"
-       xml:space="preserve"><tspan
-         y="638.40021"
-         x="232.92548"
-         id="tspan4823-4-3-7-6"
-         sodipodi:role="line">Writer 3</tspan></text>
-    <rect
-       y="435.50378"
-       x="212.2749"
-       height="83.438591"
-       width="91.202812"
-       id="rect2985-71"
-       style="opacity:1;fill:#97d2ea;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.72599999999999998;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
-    <text
-       sodipodi:linespacing="125%"
-       id="text4821-8"
-       y="452.6991"
-       x="230.46149"
-       style="font-size:12.60000038px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Bookshelf Symbol 7;-inkscape-font-specification:'Bookshelf Symbol 7,'"
-       xml:space="preserve"><tspan
-         y="452.6991"
-         x="230.46149"
-         id="tspan4823-41"
-         sodipodi:role="line">Message</tspan><tspan
-         id="tspan5132"
-         y="468.4491"
-         x="230.46149"
-         sodipodi:role="line">Dispatch</tspan></text>
-    <text
-       sodipodi:linespacing="125%"
-       id="text4821-8-8"
-       y="504.66107"
-       x="217.32932"
-       style="font-size:12.60000038px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Bookshelf Symbol 7;-inkscape-font-specification:'Bookshelf Symbol 7,'"
-       xml:space="preserve"><tspan
-         id="tspan5132-9"
-         y="504.66107"
-         x="217.32932"
-         sodipodi:role="line">Message ID?</tspan></text>
-    <rect
-       y="563.71161"
-       x="321.81464"
-       height="83.438591"
-       width="91.202812"
-       id="rect2985-71-6"
-       style="opacity:1;fill:#97d2ea;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.72571987000000004;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
-    <text
-       sodipodi:linespacing="125%"
-       id="text4821-8-5"
-       y="580.90698"
-       x="345.36603"
-       style="font-size:12.60000038px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Bookshelf Symbol 7;-inkscape-font-specification:'Bookshelf Symbol 7,'"
-       xml:space="preserve"><tspan
-         id="tspan5132-6"
-         y="580.90698"
-         x="345.36603"
-         sodipodi:role="line">Output</tspan><tspan
-         id="tspan5417"
-         y="596.65698"
-         x="345.36603"
-         sodipodi:role="line">Queue</tspan></text>
-    <text
-       sodipodi:linespacing="125%"
-       id="text4821-8-8-6"
-       y="632.86896"
-       x="326.86902"
-       style="font-size:12.60000038px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Bookshelf Symbol 7;-inkscape-font-specification:'Bookshelf Symbol 7,'"
-       xml:space="preserve"><tspan
-         id="tspan5132-9-3"
-         y="632.86896"
-         x="326.86902"
-         sodipodi:role="line">Message ID?</tspan></text>
-    <rect
-       y="435.50381"
-       x="67.253731"
-       height="83.438591"
-       width="84.852814"
-       id="rect2985-87"
-       style="opacity:1;fill:#c662ea;fill-opacity:1;fill-rule:nonzero;stroke:#ffffff;stroke-width:0.69999999000000002;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
-    <text
-       sodipodi:linespacing="125%"
-       id="text4821-7"
-       y="469.69919"
-       x="88.316109"
-       style="font-size:12.60000038px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans;-inkscape-font-specification:Bitstream Vera Sans"
-       xml:space="preserve"><tspan
-         y="469.69919"
-         x="88.316109"
-         id="tspan4823-3"
-         sodipodi:role="line">Socket</tspan></text>
-    <text
-       sodipodi:linespacing="125%"
-       id="text4821-7-4"
-       y="496.92044"
-       x="90.438667"
-       style="font-size:12.60000038px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans;-inkscape-font-specification:Bitstream Vera Sans"
-       xml:space="preserve"><tspan
-         y="496.92044"
-         x="90.438667"
-         id="tspan4823-3-3"
-         sodipodi:role="line">TCP/IP</tspan></text>
-    <rect
-       y="533.73926"
-       x="44.306839"
-       height="130.24045"
-       width="130.7466"
-       id="rect2985-87-3-5"
-       style="opacity:1;fill:#c662ea;fill-opacity:1;fill-rule:nonzero;stroke:#ffffff;stroke-width:1.08559929999999993;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
-    <text
-       sodipodi:linespacing="125%"
-       id="text4821-7-5-2"
-       y="594.16174"
-       x="110.49312"
-       style="font-size:18px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans;-inkscape-font-specification:Bitstream Vera Sans"
-       xml:space="preserve"><tspan
-         y="594.16174"
-         x="110.49312"
-         id="tspan4823-3-1-3"
-         sodipodi:role="line">CLIENT</tspan><tspan
-         id="tspan7073"
-         y="616.66174"
-         x="110.49312"
-         sodipodi:role="line">APPLICATION</tspan></text>
-    <rect
-       style="opacity:1;fill:#c6eaea;fill-opacity:1;fill-rule:nonzero;stroke:#ffffff;stroke-width:1.11457253000000001;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
-       id="rect10590"
-       width="268.66602"
-       height="66.809937"
-       x="449.53723"
-       y="419.26859" />
-    <text
-       sodipodi:linespacing="125%"
-       id="text4108"
-       y="455.50485"
-       x="462.42859"
-       style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       xml:space="preserve"><tspan
-         style="font-size:28px"
-         y="455.50485"
-         x="462.42859"
-         id="tspan4110"
-         sodipodi:role="line">Shared Toolboxes</tspan></text>
-    <path
-       inkscape:connector-curvature="3"
-       inkscape:connector-type="polyline"
-       id="path4112"
-       d="m 109.68014,533.73926 0,-14.79687"
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-start:url(#Arrow1Mstart);marker-mid:none;marker-end:url(#Arrow1Mend)" />
-    <path
-       inkscape:connector-curvature="3"
-       inkscape:connector-type="polyline"
-       id="path4114"
-       d="m 151.0964,475.2028 60.16836,-1e-5"
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-start:none;marker-end:url(#Arrow1Mend)" />
-    <path
-       sodipodi:nodetypes="cc"
-       inkscape:connector-curvature="0"
-       id="path5948"
-       d="m 694.21429,634.21914 -279.73054,0"
-       style="fill:none;stroke:#000000;stroke-width:0.99685216px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Mend)" />
-    <path
-       sodipodi:nodetypes="cc"
-       inkscape:connector-curvature="0"
-       id="path5954"
-       d="m 693.65718,608.32883 0,23.94651"
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Mend)" />
-    <path
-       sodipodi:nodetypes="cc"
-       transform="translate(0,242.362)"
-       inkscape:connector-curvature="0"
-       id="path8722"
-       d="m 588.6664,365.53288 0,24.74873"
-       style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:1, 1;stroke-dashoffset:0;marker-end:url(#Arrow1Mend)" />
-    <path
-       style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:1, 1;stroke-dashoffset:0;marker-end:url(#Arrow1Mend)"
-       d="m 483.10546,608.65249 0,23.99112"
-       id="path8968"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc" />
-    <path
-       sodipodi:nodetypes="ccc"
-       transform="translate(0,242.362)"
-       inkscape:connector-curvature="0"
-       id="path10592"
-       d="m 421.48615,213.31551 0,124.86871 22.22336,0"
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Mend)" />
-    <path
-       sodipodi:nodetypes="cc"
-       transform="translate(0,242.362)"
-       inkscape:connector-curvature="0"
-       id="path10790"
-       d="m 409.61686,213.31551 11.86929,0"
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
-    <path
-       sodipodi:nodetypes="cc"
-       transform="translate(0,242.362)"
-       inkscape:connector-curvature="0"
-       id="path10792"
-       d="m 409.61686,246.71369 11.86929,0"
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
-    <path
-       sodipodi:nodetypes="cc"
-       transform="translate(0,242.362)"
-       inkscape:connector-curvature="0"
-       id="path10794"
-       d="m 409.8694,284.72068 11.61675,0"
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
-    <path
-       sodipodi:nodetypes="cc"
-       transform="translate(0,242.362)"
-       inkscape:connector-curvature="0"
-       id="path10800"
-       d="m 303.55084,246.46115 9.97525,0"
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
-    <path
-       sodipodi:nodetypes="cccc"
-       transform="translate(0,242.362)"
-       inkscape:connector-curvature="0"
-       id="path10796"
-       d="m 323.37508,282.82664 -12.43492,-0.21628 0,-72.73098 12.43492,0.21628"
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-start:url(#Arrow1Mstart);marker-end:url(#Arrow1Mend)" />
-    <path
-       sodipodi:nodetypes="cc"
-       transform="translate(0,242.362)"
-       inkscape:connector-curvature="0"
-       id="path10798"
-       d="m 313.52609,246.46115 9.84899,0"
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Mend)" />
-    <path
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
-       d="m 321.24051,599.22561 -7.9703,0"
-       id="path11782"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc" />
-    <path
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Mend)"
-       d="m 313.25336,599.22561 -11.06721,0"
-       id="path11786"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc" />
-    <path
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Mend)"
-       d="m 204.27544,629.80732 0,-125.06952 -51.77032,0"
-       id="path11788"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="ccc" />
-    <path
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
-       d="m 216.14473,629.80732 -11.86929,0"
-       id="path11790"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc" />
-    <path
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
-       d="m 216.14473,596.40914 -11.86929,0"
-       id="path11792"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc" />
-    <path
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
-       d="m 215.89219,558.40215 -11.61675,0"
-       id="path11794"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc" />
-    <path
-       sodipodi:nodetypes="cc"
-       transform="translate(0,242.362)"
-       inkscape:connector-curvature="0"
-       id="path11796"
-       d="m 528.21429,338.18422 13.57143,0"
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Mend)" />
-    <path
-       sodipodi:nodetypes="cc"
-       transform="translate(0,242.362)"
-       inkscape:connector-curvature="0"
-       id="path11798"
-       d="m 627.85714,338.18422 13.92858,0"
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Mend)" />
-    <path
-       sodipodi:nodetypes="cc"
-       inkscape:connector-curvature="0"
-       id="path12390"
-       d="m 482.0953,485.43419 0,35.35534"
-       style="stroke-linejoin:miter;marker-end:url(#Arrow1MendyfLA);stroke-opacity:1;stroke:#c0dcdc;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;stroke-width:3.10000000000000009;fill:none" />
-    <path
-       style="stroke-linejoin:miter;marker-end:url(#Arrow1MendyfLT);stroke-opacity:1;stroke:#c0dcdc;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;stroke-width:3.10000000000000009;fill:none"
-       d="m 586.64609,485.4342 0,35.35533"
-       id="path4730"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc" />
-    <path
-       sodipodi:nodetypes="cc"
-       inkscape:connector-curvature="0"
-       id="path4732"
-       d="m 686.14612,485.43419 0,35.35534"
-       style="stroke-linejoin:miter;marker-end:url(#Arrow1MendyfL2);stroke-opacity:1;stroke:#c0dcdc;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;stroke-width:3.10000000000000009;fill:none" />
-    <path
-       sodipodi:nodetypes="ccc"
-       transform="translate(0,242.362)"
-       inkscape:connector-curvature="0"
-       id="path4753"
-       d="m 315.83503,393.22325 0,-71.72158 -13.64888,0"
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Mend)" />
-    <path
-       sodipodi:nodetypes="cc"
-       transform="translate(0,242.362)"
-       inkscape:connector-curvature="0"
-       id="path4957"
-       d="m 315.83503,393.22325 -13.64888,0"
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Mend)" />
-    <rect
-       y="335.89279"
-       x="180.4389"
-       height="348.21826"
-       width="560.12915"
-       id="rect4135"
-       style="fill:none;stroke:#000000;stroke-width:2.16550922;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:2.16550904, 2.16550904;stroke-dashoffset:0" />
-    <text
-       sodipodi:linespacing="125%"
-       id="text4711"
-       y="382.76871"
-       x="356.42432"
-       style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       xml:space="preserve"><tspan
-         y="382.76871"
-         x="356.42432"
-         id="tspan4713"
-         sodipodi:role="line">Gadgetron</tspan></text>
-  </g>
-</svg>
diff --git a/doc/manual/figs/arrayfileformat.png b/doc/manual/figs/arrayfileformat.png
deleted file mode 100644
index c211c2f..0000000
Binary files a/doc/manual/figs/arrayfileformat.png and /dev/null differ
diff --git a/doc/manual/figs/arrayfileformat.svg b/doc/manual/figs/arrayfileformat.svg
deleted file mode 100644
index 6a587e2..0000000
--- a/doc/manual/figs/arrayfileformat.svg
+++ /dev/null
@@ -1,247 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<!-- Created with Inkscape (http://www.inkscape.org/) -->
-
-<svg
-   xmlns:dc="http://purl.org/dc/elements/1.1/"
-   xmlns:cc="http://creativecommons.org/ns#"
-   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
-   xmlns:svg="http://www.w3.org/2000/svg"
-   xmlns="http://www.w3.org/2000/svg"
-   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
-   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
-   width="744.09448819"
-   height="1052.3622047"
-   id="svg2"
-   version="1.1"
-   inkscape:version="0.48.2 r9819"
-   sodipodi:docname="arrayfileformat.svg"
-   inkscape:export-filename="/home/hansenms/mrprogs/gadgetron/doc/manual/figs/arrayfileformat.png"
-   inkscape:export-xdpi="300"
-   inkscape:export-ydpi="300">
-  <defs
-     id="defs4" />
-  <sodipodi:namedview
-     id="base"
-     pagecolor="#ffffff"
-     bordercolor="#666666"
-     borderopacity="1.0"
-     inkscape:pageopacity="0.0"
-     inkscape:pageshadow="2"
-     inkscape:zoom="0.7"
-     inkscape:cx="-13.284581"
-     inkscape:cy="808.61822"
-     inkscape:document-units="px"
-     inkscape:current-layer="layer1"
-     showgrid="false"
-     inkscape:window-width="1746"
-     inkscape:window-height="967"
-     inkscape:window-x="64"
-     inkscape:window-y="24"
-     inkscape:window-maximized="0" />
-  <metadata
-     id="metadata7">
-    <rdf:RDF>
-      <cc:Work
-         rdf:about="">
-        <dc:format>image/svg+xml</dc:format>
-        <dc:type
-           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
-        <dc:title></dc:title>
-      </cc:Work>
-    </rdf:RDF>
-  </metadata>
-  <g
-     inkscape:label="Layer 1"
-     inkscape:groupmode="layer"
-     id="layer1">
-    <rect
-       style="fill:none;stroke:#000000;stroke-width:3.0999999;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
-       id="rect3775"
-       width="82.85714"
-       height="211.42857"
-       x="111.42857"
-       y="40.933613" />
-    <path
-       style="fill:none;stroke:#000000;stroke-width:5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:5, 30;stroke-dashoffset:0"
-       d="m 153.57142,263.79075 0,192.85714"
-       id="path3777"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc" />
-    <path
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
-       d="m 112.85714,83.362147 80,0"
-       id="path3779"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc" />
-    <text
-       xml:space="preserve"
-       style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       x="140.27902"
-       y="75.505035"
-       id="text3781"
-       sodipodi:linespacing="125%"><tspan
-         sodipodi:role="line"
-         id="tspan3783"
-         x="140.27902"
-         y="75.505035">4</tspan></text>
-    <path
-       sodipodi:nodetypes="cc"
-       inkscape:connector-curvature="0"
-       id="path3785"
-       d="m 112.85714,125.64792 80,0"
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
-    <path
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
-       d="m 112.85714,167.93364 80,0"
-       id="path3787"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc" />
-    <path
-       sodipodi:nodetypes="cc"
-       inkscape:connector-curvature="0"
-       id="path3789"
-       d="m 112.85714,210.21929 80,0"
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
-    <text
-       xml:space="preserve"
-       style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       x="113.8337"
-       y="119.79939"
-       id="text3781-9"
-       sodipodi:linespacing="125%"><tspan
-         sodipodi:role="line"
-         id="tspan3783-4"
-         x="113.8337"
-         y="119.79939">128</tspan></text>
-    <text
-       sodipodi:linespacing="125%"
-       id="text3816"
-       y="164.08511"
-       x="113.8337"
-       style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       xml:space="preserve"><tspan
-         y="164.08511"
-         x="113.8337"
-         id="tspan3818"
-         sodipodi:role="line">128</tspan></text>
-    <text
-       sodipodi:linespacing="125%"
-       id="text3820"
-       y="202.6479"
-       x="139.78098"
-       style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       xml:space="preserve"><tspan
-         y="202.6479"
-         x="139.78098"
-         id="tspan3822"
-         sodipodi:role="line">1</tspan></text>
-    <text
-       xml:space="preserve"
-       style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       x="139.78098"
-       y="245.50505"
-       id="text3824"
-       sodipodi:linespacing="125%"><tspan
-         sodipodi:role="line"
-         id="tspan3826"
-         x="139.78098"
-         y="245.50505">1</tspan></text>
-    <text
-       xml:space="preserve"
-       style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       x="-384.66272"
-       y="140.69617"
-       id="text3828"
-       sodipodi:linespacing="125%"
-       transform="matrix(0,-1,1,0,0,0)"><tspan
-         sodipodi:role="line"
-         id="tspan3830"
-         x="-384.66272"
-         y="140.69617">Data</tspan></text>
-    <text
-       transform="matrix(0,-1,1,0,0,0)"
-       sodipodi:linespacing="125%"
-       id="text3832"
-       y="99.267601"
-       x="-224.66272"
-       style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       xml:space="preserve"><tspan
-         y="99.267601"
-         x="-224.66272"
-         id="tspan3834"
-         sodipodi:role="line">Header</tspan></text>
-    <text
-       transform="matrix(0,-1,1,0,0,0)"
-       sodipodi:linespacing="125%"
-       id="text3836"
-       y="179.26761"
-       x="-442.37701"
-       style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       xml:space="preserve"><tspan
-         y="179.26761"
-         x="-442.37701"
-         id="tspan3838"
-         sodipodi:role="line"
-         style="font-size:20px">(16384 elements)</tspan></text>
-    <text
-       xml:space="preserve"
-       style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       x="208.87842"
-       y="69.505043"
-       id="text3859"
-       sodipodi:linespacing="125%"><tspan
-         sodipodi:role="line"
-         id="tspan3861"
-         x="208.87842"
-         y="69.505043"
-         style="font-size:22px">N-dimensions</tspan></text>
-    <text
-       sodipodi:linespacing="125%"
-       id="text3863"
-       y="113.21933"
-       x="200.39746"
-       style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       xml:space="preserve"><tspan
-         style="font-size:22px"
-         y="113.21933"
-         x="200.39746"
-         id="tspan3865"
-         sodipodi:role="line">length of dim 1</tspan></text>
-    <text
-       xml:space="preserve"
-       style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       x="200.4834"
-       y="156.07648"
-       id="text3867"
-       sodipodi:linespacing="125%"><tspan
-         sodipodi:role="line"
-         id="tspan3869"
-         x="200.4834"
-         y="156.07648"
-         style="font-size:22px">length of dim 2</tspan></text>
-    <text
-       sodipodi:linespacing="125%"
-       id="text3871"
-       y="196.6479"
-       x="200.26318"
-       style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       xml:space="preserve"><tspan
-         style="font-size:22px"
-         y="196.6479"
-         x="200.26318"
-         id="tspan3873"
-         sodipodi:role="line">length of dim 3</tspan></text>
-    <text
-       sodipodi:linespacing="125%"
-       id="text3879"
-       y="237.21933"
-       x="200"
-       style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       xml:space="preserve"><tspan
-         style="font-size:22px"
-         y="237.21933"
-         x="200"
-         id="tspan3881"
-         sodipodi:role="line">length of dim 4</tspan></text>
-  </g>
-</svg>
diff --git a/doc/manual/figs/cgsense.png b/doc/manual/figs/cgsense.png
deleted file mode 100644
index dcfe43d..0000000
Binary files a/doc/manual/figs/cgsense.png and /dev/null differ
diff --git a/doc/manual/figs/cgsense.svg b/doc/manual/figs/cgsense.svg
deleted file mode 100644
index 345d6c9..0000000
--- a/doc/manual/figs/cgsense.svg
+++ /dev/null
@@ -1,671 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<!-- Created with Inkscape (http://www.inkscape.org/) -->
-
-<svg
-   xmlns:dc="http://purl.org/dc/elements/1.1/"
-   xmlns:cc="http://creativecommons.org/ns#"
-   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
-   xmlns:svg="http://www.w3.org/2000/svg"
-   xmlns="http://www.w3.org/2000/svg"
-   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
-   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
-   width="225"
-   height="540"
-   id="svg7000"
-   version="1.1"
-   inkscape:version="0.48.2 r9819"
-   sodipodi:docname="cgsense.svg">
-  <sodipodi:namedview
-     id="base"
-     pagecolor="#ffffff"
-     bordercolor="#666666"
-     borderopacity="1.0"
-     inkscape:pageopacity="0.0"
-     inkscape:pageshadow="2"
-     inkscape:zoom="1.9404392"
-     inkscape:cx="26.752607"
-     inkscape:cy="293.58724"
-     inkscape:document-units="px"
-     inkscape:current-layer="layer1"
-     showgrid="false"
-     units="in"
-     inkscape:window-width="1440"
-     inkscape:window-height="793"
-     inkscape:window-x="0"
-     inkscape:window-y="0"
-     inkscape:window-maximized="0" />
-  <defs
-     id="defs7002">
-    <marker
-       style="overflow:visible;"
-       id="Arrow2Mend"
-       refX="0.0"
-       refY="0.0"
-       orient="auto"
-       inkscape:stockid="Arrow2Mend">
-      <path
-         transform="scale(0.6) rotate(180) translate(0,0)"
-         d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
-         style="font-size:12.0;fill-rule:evenodd;stroke-width:0.62500000;stroke-linejoin:round;"
-         id="path5505" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow1Lend"
-       orient="auto"
-       refY="0.0"
-       refX="0.0"
-       id="Arrow1Lend"
-       style="overflow:visible;">
-      <path
-         id="path6118"
-         d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
-         style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;marker-start:none;"
-         transform="scale(0.8) rotate(180) translate(12.5,0)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Lend"
-       orient="auto"
-       refY="0.0"
-       refX="0.0"
-       id="Arrow2Lend"
-       style="overflow:visible;">
-      <path
-         id="path6136"
-         style="font-size:12.0;fill-rule:evenodd;stroke-width:0.62500000;stroke-linejoin:round;"
-         d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
-         transform="scale(1.1) rotate(180) translate(1,0)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow1Mend"
-       orient="auto"
-       refY="0.0"
-       refX="0.0"
-       id="Arrow1Mend"
-       style="overflow:visible;">
-      <path
-         id="path6124"
-         d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
-         style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;marker-start:none;"
-         transform="scale(0.4) rotate(180) translate(10,0)" />
-    </marker>
-    <marker
-       style="overflow:visible;"
-       id="Arrow2Mendw"
-       refX="0.0"
-       refY="0.0"
-       orient="auto"
-       inkscape:stockid="Arrow2Mendw">
-      <path
-         transform="scale(0.6) rotate(180) translate(0,0)"
-         d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
-         style="stroke-linejoin:round;font-size:12.0;fill-rule:evenodd;stroke:#000000;stroke-width:0.62500000;fill:#000000"
-         id="path8552" />
-    </marker>
-    <marker
-       style="overflow:visible;"
-       id="Arrow2MendwQ"
-       refX="0.0"
-       refY="0.0"
-       orient="auto"
-       inkscape:stockid="Arrow2MendwQ">
-      <path
-         transform="scale(0.6) rotate(180) translate(0,0)"
-         d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
-         style="stroke-linejoin:round;font-size:12.0;fill-rule:evenodd;stroke:#c8c8c8;stroke-width:0.62500000;fill:#c8c8c8"
-         id="path8732" />
-    </marker>
-    <marker
-       style="overflow:visible;"
-       id="Arrow2MendwQw"
-       refX="0.0"
-       refY="0.0"
-       orient="auto"
-       inkscape:stockid="Arrow2MendwQw">
-      <path
-         transform="scale(0.6) rotate(180) translate(0,0)"
-         d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
-         style="stroke-linejoin:round;font-size:12.0;fill-rule:evenodd;stroke:#b4b4b4;stroke-width:0.62500000;fill:#b4b4b4"
-         id="path8915" />
-    </marker>
-    <marker
-       style="overflow:visible;"
-       id="Arrow2MendwQwi"
-       refX="0.0"
-       refY="0.0"
-       orient="auto"
-       inkscape:stockid="Arrow2MendwQwi">
-      <path
-         transform="scale(0.6) rotate(180) translate(0,0)"
-         d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
-         style="stroke-linejoin:round;font-size:12.0;fill-rule:evenodd;stroke:#828282;stroke-width:0.62500000;fill:#828282"
-         id="path9106" />
-    </marker>
-    <marker
-       style="overflow:visible;"
-       id="Arrow2MendwQwK"
-       refX="0.0"
-       refY="0.0"
-       orient="auto"
-       inkscape:stockid="Arrow2MendwQwK">
-      <path
-         transform="scale(0.6) rotate(180) translate(0,0)"
-         d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
-         style="stroke-linejoin:round;font-size:12.0;fill-rule:evenodd;stroke:#000000;stroke-width:0.62500000;fill:#000000"
-         id="path4159" />
-    </marker>
-    <marker
-       style="overflow:visible;"
-       id="Arrow2MendwQwif"
-       refX="0.0"
-       refY="0.0"
-       orient="auto"
-       inkscape:stockid="Arrow2MendwQwif">
-      <path
-         transform="scale(0.6) rotate(180) translate(0,0)"
-         d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
-         style="stroke-linejoin:round;font-size:12.0;fill-rule:evenodd;stroke:#000000;stroke-width:0.62500000;fill:#000000"
-         id="path4162" />
-    </marker>
-    <marker
-       style="overflow:visible;"
-       id="Arrow2MendwQwo"
-       refX="0.0"
-       refY="0.0"
-       orient="auto"
-       inkscape:stockid="Arrow2MendwQwo">
-      <path
-         transform="scale(0.6) rotate(180) translate(0,0)"
-         d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
-         style="stroke-linejoin:round;font-size:12.0;fill-rule:evenodd;stroke:#000000;stroke-width:0.62500000;fill:#000000"
-         id="path4165" />
-    </marker>
-  </defs>
-  <metadata
-     id="metadata7005">
-    <rdf:RDF>
-      <cc:Work
-         rdf:about="">
-        <dc:format>image/svg+xml</dc:format>
-        <dc:type
-           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
-        <dc:title />
-      </cc:Work>
-    </rdf:RDF>
-  </metadata>
-  <g
-     transform="translate(0,-512.35975)"
-     id="layer1"
-     inkscape:groupmode="layer"
-     inkscape:label="Layer 1">
-    <rect
-       y="523.48932"
-       x="66.106224"
-       height="31.714287"
-       width="104.11561"
-       id="rect5207"
-       style="fill:#97d2ea;fill-opacity:1;stroke:#dcdcdc;stroke-width:0.21841221000000000;stroke-linecap:butt;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
-    <text
-       sodipodi:linespacing="125%"
-       id="text5209"
-       y="533.77509"
-       x="83.459534"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       xml:space="preserve"><tspan
-         y="533.77509"
-         x="83.459534"
-         id="tspan5211"
-         sodipodi:role="line">NoiseAdjustGadget</tspan></text>
-    <text
-       sodipodi:linespacing="125%"
-       id="text5721"
-       y="549.6322"
-       x="91.235802"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       xml:space="preserve"><tspan
-         style="font-size:6px"
-         y="549.6322"
-         x="91.235802"
-         id="tspan5723"
-         sodipodi:role="line">Noise Prewitening</tspan></text>
-    <rect
-       style="fill:#97d2ea;fill-opacity:1;stroke:#dcdcdc;stroke-width:0.21841221000000000;stroke-linecap:butt;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
-       id="rect5725"
-       width="104.11561"
-       height="31.714287"
-       x="66.106224"
-       y="570.19067" />
-    <text
-       xml:space="preserve"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       x="97.613441"
-       y="580.47638"
-       id="text5727"
-       sodipodi:linespacing="125%"><tspan
-         sodipodi:role="line"
-         id="tspan5729"
-         x="97.613441"
-         y="580.47638">PCAGadget</tspan></text>
-    <text
-       xml:space="preserve"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       x="157.30838"
-       y="858.38953"
-       id="text5731"
-       sodipodi:linespacing="125%"><tspan
-         id="tspan5737"
-         sodipodi:role="line"
-         x="157.30838"
-         y="858.38953"
-         style="font-size:10px;text-align:center;text-anchor:middle">Downstream Gadgets</tspan><tspan
-         sodipodi:role="line"
-         x="157.30838"
-         y="870.88953"
-         style="font-size:10px;text-align:center;text-anchor:middle"
-         id="tspan11864">Image Scaling, etc.</tspan></text>
-    <rect
-       y="616.89203"
-       x="66.106224"
-       height="31.714287"
-       width="104.11561"
-       id="rect5768"
-       style="fill:#97d2ea;fill-opacity:1;stroke:#dcdcdc;stroke-width:0.21841221000000000;stroke-linecap:butt;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
-    <text
-       sodipodi:linespacing="125%"
-       id="text5770"
-       y="625.89203"
-       x="80.130241"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       xml:space="preserve"><tspan
-         y="625.89203"
-         x="80.130241"
-         id="tspan5772"
-         sodipodi:role="line">CoilReductionGadget</tspan></text>
-    <text
-       sodipodi:linespacing="125%"
-       id="text5774"
-       y="637.03485"
-       x="118.01754"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       xml:space="preserve"><tspan
-         style="font-size:6px;text-align:center;text-anchor:middle"
-         y="637.03485"
-         x="118.01754"
-         sodipodi:role="line"
-         id="tspan5778">Reduce Channels</tspan><tspan
-         id="tspan5782"
-         style="font-size:6px;text-align:center;text-anchor:middle"
-         y="644.53485"
-         x="118.01754"
-         sodipodi:role="line">e.g 32 -> 16</tspan></text>
-    <text
-       xml:space="preserve"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       x="116.98685"
-       y="590.65363"
-       id="text11971"
-       sodipodi:linespacing="125%"><tspan
-         sodipodi:role="line"
-         x="116.98685"
-         y="590.65363"
-         style="font-size:6px;text-align:center;text-anchor:middle"
-         id="tspan11975">Virtual Channels</tspan><tspan
-         sodipodi:role="line"
-         x="116.98685"
-         y="598.15363"
-         style="font-size:6px;text-align:center;text-anchor:middle"
-         id="tspan11979">Principal Component Analysis</tspan></text>
-    <path
-       sodipodi:nodetypes="cc"
-       inkscape:connector-curvature="0"
-       id="path7323"
-       d="m 118.16403,555.55174 0,12.85714"
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Mend)" />
-    <path
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Mend)"
-       d="m 118.16403,602.12825 0,12.85714"
-       id="path7511"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc" />
-    <path
-       sodipodi:nodetypes="cc"
-       inkscape:connector-curvature="0"
-       id="path7513"
-       d="m 118.16403,648.78769 0,12.85714"
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Mend)" />
-    <rect
-       style="fill:#97d2ea;fill-opacity:1;stroke:#dcdcdc;stroke-width:0.72486401000000000;stroke-linecap:butt;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
-       id="rect7752"
-       width="210.44579"
-       height="172.81833"
-       x="9.1387424"
-       y="664.21027" />
-    <rect
-       y="686.64551"
-       x="23.571426"
-       height="43.214283"
-       width="183.57141"
-       id="rect7770"
-       style="fill:#000000;fill-opacity:0.19607843;stroke:none" />
-    <rect
-       y="699.21692"
-       x="27.5"
-       height="22.857143"
-       width="7.5"
-       id="rect8298"
-       style="fill:#000000;fill-opacity:0.19607843;stroke:none" />
-    <text
-       xml:space="preserve"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       x="49.807228"
-       y="694.1322"
-       id="text8304"
-       sodipodi:linespacing="125%"><tspan
-         sodipodi:role="line"
-         id="tspan8306"
-         x="49.807228"
-         y="694.1322"
-         style="font-size:6px">Circular Data Buffer</tspan></text>
-    <rect
-       style="fill:#000000;fill-opacity:0.19607843;stroke:none"
-       id="rect8315"
-       width="7.5"
-       height="22.857143"
-       x="38.547619"
-       y="699.21692" />
-    <rect
-       y="699.21692"
-       x="49.595238"
-       height="22.857143"
-       width="7.5"
-       id="rect8317"
-       style="fill:#000000;fill-opacity:0.19607843;stroke:none" />
-    <rect
-       style="fill:#000000;fill-opacity:0.19607843;stroke:none"
-       id="rect8319"
-       width="7.5"
-       height="22.857143"
-       x="60.642849"
-       y="699.21692" />
-    <rect
-       y="699.21692"
-       x="71.690475"
-       height="22.857143"
-       width="7.5"
-       id="rect8321"
-       style="fill:#000000;fill-opacity:0.19607843;stroke:none" />
-    <rect
-       style="fill:#000000;fill-opacity:0.19607843;stroke:none"
-       id="rect8323"
-       width="7.5"
-       height="22.857143"
-       x="82.738098"
-       y="699.21692" />
-    <rect
-       y="699.21692"
-       x="93.785713"
-       height="22.857143"
-       width="7.5"
-       id="rect8325"
-       style="fill:#000000;fill-opacity:0.19607843;stroke:none" />
-    <rect
-       style="fill:#000000;fill-opacity:0.19607843;stroke:none"
-       id="rect8327"
-       width="7.5"
-       height="22.857143"
-       x="104.83333"
-       y="699.21692" />
-    <rect
-       y="699.21692"
-       x="115.88095"
-       height="22.857143"
-       width="7.5"
-       id="rect8329"
-       style="fill:#000000;fill-opacity:0.19607843;stroke:none" />
-    <rect
-       style="fill:#000000;fill-opacity:0.19607843;stroke:none"
-       id="rect8331"
-       width="7.5"
-       height="22.857143"
-       x="126.92857"
-       y="699.21692" />
-    <rect
-       y="699.21692"
-       x="137.9762"
-       height="22.857143"
-       width="7.5"
-       id="rect8333"
-       style="fill:#000000;fill-opacity:0.19607843;stroke:none" />
-    <rect
-       style="fill:#000000;fill-opacity:0.19607843;stroke:none"
-       id="rect8335"
-       width="7.5"
-       height="22.857143"
-       x="149.0238"
-       y="699.21692" />
-    <rect
-       y="699.21692"
-       x="160.07143"
-       height="22.857143"
-       width="7.5"
-       id="rect8337"
-       style="fill:#000000;fill-opacity:0.19607843;stroke:none" />
-    <rect
-       style="fill:#000000;fill-opacity:0.19607843;stroke:none"
-       id="rect8339"
-       width="7.5"
-       height="22.857143"
-       x="171.11905"
-       y="699.21692" />
-    <rect
-       y="699.21692"
-       x="182.16666"
-       height="22.857143"
-       width="7.5"
-       id="rect8341"
-       style="fill:#000000;fill-opacity:0.19607843;stroke:none" />
-    <rect
-       style="fill:#000000;fill-opacity:0.19607843;stroke:none"
-       id="rect8343"
-       width="7.5"
-       height="22.857143"
-       x="193.21428"
-       y="699.21692" />
-    <rect
-       y="690.43115"
-       x="146.42857"
-       height="35"
-       width="57.857143"
-       id="rect8347"
-       style="fill:#000000;fill-opacity:0.19607843;stroke:none" />
-    <path
-       sodipodi:nodetypes="cc"
-       inkscape:connector-curvature="0"
-       id="path8349"
-       d="m 41.071429,729.7169 -0.357143,21.60715"
-       style="stroke-linejoin:miter;marker-end:url(#Arrow2MendwQwK);stroke-opacity:1;stroke:#000000;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;stroke-width:2.50000000000000000;fill:none" />
-    <path
-       style="stroke-linejoin:miter;marker-end:url(#Arrow2MendwQwo);stroke-opacity:1;stroke:#000000;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;stroke-width:2.50000000000000000;fill:none"
-       d="m 94.071429,729.7169 -0.357143,21.60715"
-       id="path9084"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc" />
-    <path
-       sodipodi:nodetypes="cc"
-       inkscape:connector-curvature="0"
-       id="path9086"
-       d="m 177.5,725.43119 -0.35714,25.89286"
-       style="stroke-linejoin:miter;marker-end:url(#Arrow2MendwQwif);stroke-opacity:1;stroke:#000000;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;stroke-width:2.50000000000000000;fill:none" />
-    <rect
-       style="fill:#000000;fill-opacity:0.19607843;stroke:none"
-       id="rect9283"
-       width="48.214287"
-       height="23.571428"
-       x="17.050394"
-       y="755.28833" />
-    <text
-       sodipodi:linespacing="125%"
-       id="text9287"
-       y="765.06079"
-       x="41.078438"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       xml:space="preserve"><tspan
-         style="font-size:6px;text-align:center;text-anchor:middle"
-         y="765.06079"
-         x="41.078438"
-         id="tspan9289"
-         sodipodi:role="line">Coil Sensitivity</tspan><tspan
-         id="tspan9291"
-         style="font-size:6px;text-align:center;text-anchor:middle"
-         y="772.56079"
-         x="41.078438"
-         sodipodi:role="line">Map (B1 map)</tspan></text>
-    <rect
-       y="755.28833"
-       x="69.764679"
-       height="23.571428"
-       width="48.214287"
-       id="rect9301"
-       style="fill:#000000;fill-opacity:0.19607843;stroke:none" />
-    <text
-       xml:space="preserve"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       x="93.792725"
-       y="765.06079"
-       id="text9303"
-       sodipodi:linespacing="125%"><tspan
-         sodipodi:role="line"
-         x="93.792725"
-         y="765.06079"
-         style="font-size:6px;text-align:center;text-anchor:middle"
-         id="tspan9307">Regularization</tspan><tspan
-         sodipodi:role="line"
-         x="93.792725"
-         y="772.56079"
-         style="font-size:6px;text-align:center;text-anchor:middle"
-         id="tspan9311">Mask</tspan></text>
-    <rect
-       style="fill:#000000;fill-opacity:0.19607843;stroke:none"
-       id="rect9313"
-       width="48.214287"
-       height="23.571428"
-       x="154.40753"
-       y="755.28833" />
-    <text
-       sodipodi:linespacing="125%"
-       id="text9315"
-       y="765.56085"
-       x="178.47806"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       xml:space="preserve"><tspan
-         id="tspan9319"
-         style="font-size:6px;text-align:center;text-anchor:middle"
-         y="765.56085"
-         x="178.47806"
-         sodipodi:role="line">Undersampled</tspan><tspan
-         style="font-size:6px;text-align:center;text-anchor:middle"
-         y="773.06085"
-         x="178.47806"
-         sodipodi:role="line"
-         id="tspan10825">Data Frame</tspan></text>
-    <path
-       sodipodi:nodetypes="cccc"
-       inkscape:connector-curvature="0"
-       id="path10625"
-       d="m 118.21429,664.45068 0,18.30193 78.75,0 0,15.17856"
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow2Mend)" />
-    <rect
-       style="fill:#000000;fill-opacity:0.19607843;stroke:none"
-       id="rect10831"
-       width="85.000008"
-       height="23.571428"
-       x="45.836105"
-       y="801.21692" />
-    <text
-       sodipodi:linespacing="125%"
-       id="text10833"
-       y="811.48944"
-       x="87.007004"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       xml:space="preserve"><tspan
-         id="tspan10837"
-         style="font-size:6px;text-align:center;text-anchor:middle"
-         y="811.48944"
-         x="87.007004"
-         sodipodi:role="line">Conjugate Gradient Solver</tspan><tspan
-         id="tspan11996"
-         style="font-size:6px;text-align:center;text-anchor:middle"
-         y="818.98944"
-         x="87.007004"
-         sodipodi:role="line">GPU Based</tspan></text>
-    <path
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Mend)"
-       d="m 42.321428,779.0026 0,9.68751 52.053572,0 0,11.38392"
-       id="path10841"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cccc" />
-    <path
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
-       d="m 180.35714,779.0026 0,9.68751 -111.785711,0"
-       id="path11039"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="ccc" />
-    <path
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
-       d="m 94.375,779.0026 0,9.82144"
-       id="path11041"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc" />
-    <rect
-       y="801.21692"
-       x="143.82033"
-       height="23.571428"
-       width="27.500006"
-       id="rect11049"
-       style="fill:#000000;fill-opacity:0.19607843;stroke:none" />
-    <text
-       xml:space="preserve"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       x="157.43556"
-       y="814.56561"
-       id="text11051"
-       sodipodi:linespacing="125%"><tspan
-         sodipodi:role="line"
-         x="157.43556"
-         y="814.56561"
-         style="font-size:6px;text-align:center;text-anchor:middle"
-         id="tspan11053">Image</tspan></text>
-    <path
-       sodipodi:nodetypes="cc"
-       inkscape:connector-curvature="0"
-       id="path11065"
-       d="m 130.76209,813.00263 11.74695,0"
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-mid:none;marker-end:url(#Arrow1Mend)" />
-    <path
-       sodipodi:nodetypes="cc"
-       inkscape:connector-curvature="0"
-       id="path7521"
-       d="m 157.27953,825.07986 0,23.26505"
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Mend)" />
-    <text
-       transform="translate(0,512.35975)"
-       sodipodi:linespacing="125%"
-       id="text11990"
-       y="290.05658"
-       x="126.77542"
-       style="font-size:12px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       xml:space="preserve"><tspan
-         id="tspan11992"
-         sodipodi:role="line" /></text>
-    <text
-       sodipodi:linespacing="125%"
-       id="text11998"
-       y="678.21765"
-       x="12.88368"
-       style="font-size:12px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       xml:space="preserve"><tspan
-         y="678.21765"
-         x="12.88368"
-         id="tspan12000"
-         sodipodi:role="line">CGSenseGadget</tspan></text>
-  </g>
-</svg>
diff --git a/doc/manual/figs/examplecgsenseresult.png b/doc/manual/figs/examplecgsenseresult.png
deleted file mode 100644
index 6745b60..0000000
Binary files a/doc/manual/figs/examplecgsenseresult.png and /dev/null differ
diff --git a/doc/manual/figs/examplegrapparesult.png b/doc/manual/figs/examplegrapparesult.png
deleted file mode 100644
index b2d0ca3..0000000
Binary files a/doc/manual/figs/examplegrapparesult.png and /dev/null differ
diff --git a/doc/manual/figs/examplelibresult.png b/doc/manual/figs/examplelibresult.png
deleted file mode 100644
index 21cfd1e..0000000
Binary files a/doc/manual/figs/examplelibresult.png and /dev/null differ
diff --git a/doc/manual/figs/gadget.png b/doc/manual/figs/gadget.png
deleted file mode 100644
index 48548bb..0000000
Binary files a/doc/manual/figs/gadget.png and /dev/null differ
diff --git a/doc/manual/figs/gadget.svg b/doc/manual/figs/gadget.svg
deleted file mode 100644
index 7aafdca..0000000
--- a/doc/manual/figs/gadget.svg
+++ /dev/null
@@ -1,573 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<!-- Created with Inkscape (http://www.inkscape.org/) -->
-
-<svg
-   xmlns:dc="http://purl.org/dc/elements/1.1/"
-   xmlns:cc="http://creativecommons.org/ns#"
-   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
-   xmlns:svg="http://www.w3.org/2000/svg"
-   xmlns="http://www.w3.org/2000/svg"
-   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
-   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
-   width="225"
-   height="540"
-   id="svg8632"
-   version="1.1"
-   inkscape:version="0.48.2 r9819"
-   sodipodi:docname="gadget.svg"
-   inkscape:export-filename="/home/hansenms/mrprogs/gadgetron/doc/manual/figs/gadget.png"
-   inkscape:export-xdpi="299.97357"
-   inkscape:export-ydpi="299.97357">
-  <sodipodi:namedview
-     id="base"
-     pagecolor="#ffffff"
-     bordercolor="#666666"
-     borderopacity="1.0"
-     inkscape:pageopacity="1"
-     inkscape:pageshadow="2"
-     inkscape:zoom="2.8"
-     inkscape:cx="76.089576"
-     inkscape:cy="420.2177"
-     inkscape:document-units="px"
-     inkscape:current-layer="layer1"
-     showgrid="false"
-     inkscape:window-width="1920"
-     inkscape:window-height="1176"
-     inkscape:window-x="0"
-     inkscape:window-y="24"
-     inkscape:window-maximized="1"
-     units="in" />
-  <defs
-     id="defs8634">
-    <marker
-       inkscape:stockid="Arrow2Lend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="Arrow2Lend"
-       style="overflow:visible">
-      <path
-         id="path3849"
-         style="font-size:12px;fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="matrix(-1.1,0,0,-1.1,-1.1,0)"
-         inkscape:connector-curvature="0" />
-    </marker>
-    <marker
-       inkscape:stockid="TriangleOutL"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="TriangleOutL-9"
-       style="overflow:visible">
-      <path
-         inkscape:connector-curvature="0"
-         id="path4614-0"
-         d="m 5.77,0 -8.65,5 0,-10 8.65,5 z"
-         style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt;marker-start:none"
-         transform="scale(0.8,0.8)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow1Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="Arrow1Mend"
-       style="overflow:visible">
-      <path
-         inkscape:connector-curvature="0"
-         id="path3972"
-         d="M 0,0 5,-5 -12.5,0 5,5 0,0 z"
-         style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt;marker-start:none"
-         transform="matrix(-0.4,0,0,-0.4,-4,0)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Lend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="Arrow2Lend-6"
-       style="overflow:visible">
-      <path
-         inkscape:connector-curvature="0"
-         id="path3849-4"
-         style="font-size:12px;fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="matrix(-1.1,0,0,-1.1,-1.1,0)" />
-    </marker>
-    <marker
-       inkscape:stockid="TriangleOutL-9B"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="TriangleOutL-9B"
-       style="overflow:visible">
-      <path
-         inkscape:connector-curvature="0"
-         id="path5674"
-         d="m 5.77,0 -8.65,5 0,-10 8.65,5 z"
-         style="marker-start:none;stroke:#7dbbc1;stroke-width:1pt;fill:#7dbbc1;fill-rule:evenodd"
-         transform="scale(0.8,0.8)" />
-    </marker>
-    <marker
-       inkscape:stockid="TriangleOutL-9Bp"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="TriangleOutL-9Bp"
-       style="overflow:visible">
-      <path
-         inkscape:connector-curvature="0"
-         id="path5828"
-         d="m 5.77,0 -8.65,5 0,-10 8.65,5 z"
-         style="fill-rule:evenodd;marker-start:none;stroke:#053df0;stroke-width:1pt;fill:#053df0"
-         transform="scale(0.8,0.8)" />
-    </marker>
-  </defs>
-  <metadata
-     id="metadata8637">
-    <rdf:RDF>
-      <cc:Work
-         rdf:about="">
-        <dc:format>image/svg+xml</dc:format>
-        <dc:type
-           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
-        <dc:title />
-      </cc:Work>
-    </rdf:RDF>
-  </metadata>
-  <g
-     inkscape:label="Layer 1"
-     inkscape:groupmode="layer"
-     id="layer1"
-     transform="translate(0,-512.35975)">
-    <path
-       style="opacity:0.29999999999999999;stroke-linejoin:miter;marker-end:url(#TriangleOutL-9Bp);stroke-opacity:1;stroke:#053df0;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;stroke-width:1.16955005999999995;fill:none"
-       d="m 144.9696,545.92479 c 105.22119,20.68554 72.64169,110.52695 -4.16809,114.10406 C 53.424124,659.61535 9.1283266,578.98608 115.88548,545.62781"
-       id="path4372"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="ccc"
-       inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-       inkscape:export-xdpi="599.78741"
-       inkscape:export-ydpi="599.78741" />
-    <rect
-       style="opacity:0.93999999000000001;fill:#0085cb;fill-opacity:1;stroke:#000000;stroke-width:0.42998171000000002;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:0.85996340000000004, 0.42998170000000002;stroke-dashoffset:0"
-       id="rect9203"
-       width="25.184643"
-       height="6.1425958"
-       x="16.957426"
-       y="571.45758" />
-    <rect
-       style="opacity:0.93999999000000001;fill:#0085cb;fill-opacity:1;stroke:#000000;stroke-width:0.42998171000000002;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:0.85996340000000004, 0.42998170000000002;stroke-dashoffset:0"
-       id="rect9203-1"
-       width="25.184643"
-       height="6.1425958"
-       x="16.957426"
-       y="580.67151" />
-    <rect
-       style="opacity:0.93999999000000001;fill:#0085cb;fill-opacity:1;stroke:#000000;stroke-width:0.42998171000000002;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:0.85996340000000004, 0.42998170000000002;stroke-dashoffset:0"
-       id="rect9203-1-9"
-       width="25.184643"
-       height="6.1425958"
-       x="16.957426"
-       y="591.72815" />
-    <rect
-       style="opacity:0.93999999000000001;fill:#0085cb;fill-opacity:1;stroke:#000000;stroke-width:0.42998171000000002;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:0.85996340000000004, 0.42998170000000002;stroke-dashoffset:0"
-       id="rect9203-1-9-1"
-       width="25.184643"
-       height="6.1425958"
-       x="16.957426"
-       y="601.55634" />
-    <rect
-       style="opacity:0.93999999000000001;fill:#0085cb;fill-opacity:1;stroke:#000000;stroke-width:0.42998171000000002;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:0.85996340000000004, 0.42998170000000002;stroke-dashoffset:0"
-       id="rect9203-1-9-6"
-       width="25.184643"
-       height="6.1425958"
-       x="16.957426"
-       y="610.7702" />
-    <rect
-       style="opacity:0.93999999000000001;fill:#0085cb;fill-opacity:1;stroke:#000000;stroke-width:0.42998171000000002;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:0.85996340000000004, 0.42998170000000002;stroke-dashoffset:0"
-       id="rect9203-1-9-7"
-       width="25.184643"
-       height="6.1425958"
-       x="16.957426"
-       y="620.59839" />
-    <rect
-       style="opacity:0.93999999000000001;fill:#0085cb;fill-opacity:1;stroke:#000000;stroke-width:0.42998171000000002;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:0.85996340000000004, 0.42998170000000002;stroke-dashoffset:0"
-       id="rect9203-1-9-7-4"
-       width="25.184643"
-       height="6.1425958"
-       x="16.957426"
-       y="631.65503" />
-    <rect
-       style="opacity:0.93999999000000001;fill:#0085cb;fill-opacity:1;stroke:#000000;stroke-width:0.42998171000000002;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:0.85996340000000004, 0.42998170000000002;stroke-dashoffset:0"
-       id="rect9203-1-9-7-4-1"
-       width="25.184643"
-       height="6.1425958"
-       x="16.957426"
-       y="643.94025" />
-    <rect
-       style="opacity:0.93999999;fill:none;stroke:#000000;stroke-width:0.51803416;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:1.03606836, 0.51803418;stroke-dashoffset:0"
-       id="rect9696"
-       width="39.345463"
-       height="90.463341"
-       x="9.8770142"
-       y="565.91284" />
-    <text
-       xml:space="preserve"
-       style="font-size:4.72979879px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans"
-       x="10.547411"
-       y="561.62946"
-       id="text9698"
-       sodipodi:linespacing="125%"><tspan
-         sodipodi:role="line"
-         id="tspan9700"
-         x="10.547411"
-         y="561.62946">Message Queue</tspan></text>
-    <path
-       style="fill:none;stroke:#000000;stroke-width:0.42998168px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow2Lend)"
-       d="m 42.770819,574.65177 43.740043,0"
-       id="path9715"
-       inkscape:connector-curvature="0"
-       inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-       inkscape:export-xdpi="599.78741"
-       inkscape:export-ydpi="599.78741"
-       sodipodi:nodetypes="cc" />
-    <text
-       xml:space="preserve"
-       style="font-size:4.72979832px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans"
-       x="56.739758"
-       y="571.4574"
-       id="text9905"
-       sodipodi:linespacing="125%"
-       inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-       inkscape:export-xdpi="599.78741"
-       inkscape:export-ydpi="599.78741"><tspan
-         sodipodi:role="line"
-         id="tspan9907"
-         x="56.739758"
-         y="571.4574"
-         style="font-size:5.73308945px">Dequeue</tspan></text>
-    <text
-       xml:space="preserve"
-       style="font-size:3.29999995px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans"
-       x="-0.90061849"
-       y="609.21912"
-       id="text9939"
-       sodipodi:linespacing="125%"
-       inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-       inkscape:export-xdpi="599.78741"
-       inkscape:export-ydpi="599.78741"><tspan
-         sodipodi:role="line"
-         id="tspan9941"
-         x="-0.90061849"
-         y="609.21912" /></text>
-    <text
-       xml:space="preserve"
-       style="font-size:13.75941467px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans;-inkscape-font-specification:Bitstream Vera Sans"
-       x="81.228851"
-       y="534.04047"
-       id="text9943"
-       sodipodi:linespacing="125%"
-       inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-       inkscape:export-xdpi="599.78741"
-       inkscape:export-ydpi="599.78741"><tspan
-         sodipodi:role="line"
-         id="tspan9945"
-         x="81.228851"
-         y="534.04047">Gadget</tspan></text>
-    <rect
-       style="opacity:0.93999999000000001;fill:#97d2ea;fill-opacity:1;stroke:none"
-       id="rect9947-1"
-       width="27.363867"
-       height="13.464759"
-       x="88.395607"
-       y="585.8028"
-       inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-       inkscape:export-xdpi="599.78741"
-       inkscape:export-ydpi="599.78741" />
-    <g
-       id="g9982"
-       transform="matrix(0.4299817,0,0,0.4299817,-85.56041,532.0746)"
-       inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-       inkscape:export-xdpi="599.78741"
-       inkscape:export-ydpi="599.78741">
-      <text
-         sodipodi:linespacing="125%"
-         id="text9949-9"
-         y="137.66905"
-         x="436.13882"
-         style="font-size:11px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans;-inkscape-font-specification:Bitstream Vera Sans"
-         xml:space="preserve"><tspan
-           y="137.66905"
-           x="436.13882"
-           id="tspan9951-3"
-           sodipodi:role="line">Message</tspan><tspan
-           id="tspan9953-3"
-           y="151.41905"
-           x="436.13882"
-           sodipodi:role="line">Block</tspan></text>
-    </g>
-    <rect
-       style="opacity:0.93999999000000001;fill:#97d2ea;fill-opacity:1;stroke:none"
-       id="rect9947-1-8"
-       width="27.363867"
-       height="13.464759"
-       x="88.395607"
-       y="603.90387"
-       inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-       inkscape:export-xdpi="599.78741"
-       inkscape:export-ydpi="599.78741" />
-    <g
-       id="g9982-9"
-       transform="matrix(0.4299817,0,0,0.4299817,-85.56041,550.17533)"
-       inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-       inkscape:export-xdpi="599.78741"
-       inkscape:export-ydpi="599.78741">
-      <text
-         sodipodi:linespacing="125%"
-         id="text9949-9-5"
-         y="137.66905"
-         x="436.13882"
-         style="font-size:11px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans;-inkscape-font-specification:Bitstream Vera Sans"
-         xml:space="preserve"><tspan
-           y="137.66905"
-           x="436.13882"
-           id="tspan9951-3-5"
-           sodipodi:role="line">Message</tspan><tspan
-           id="tspan9953-3-1"
-           y="151.41905"
-           x="436.13882"
-           sodipodi:role="line">Block</tspan></text>
-    </g>
-    <rect
-       style="opacity:0.93999999000000001;fill:#97d2ea;fill-opacity:1;stroke:none"
-       id="rect9947-1-8-4"
-       width="27.363867"
-       height="13.464759"
-       x="88.395607"
-       y="622.00421"
-       inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-       inkscape:export-xdpi="599.78741"
-       inkscape:export-ydpi="599.78741" />
-    <g
-       id="g9982-9-2"
-       transform="matrix(0.4299817,0,0,0.4299817,-85.56041,568.27603)"
-       inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-       inkscape:export-xdpi="599.78741"
-       inkscape:export-ydpi="599.78741">
-      <text
-         sodipodi:linespacing="125%"
-         id="text9949-9-5-9"
-         y="137.66905"
-         x="436.13882"
-         style="font-size:11px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans;-inkscape-font-specification:Bitstream Vera Sans"
-         xml:space="preserve"><tspan
-           y="137.66905"
-           x="436.13882"
-           id="tspan9951-3-5-4"
-           sodipodi:role="line">Message</tspan><tspan
-           id="tspan9953-3-1-1"
-           y="151.41905"
-           x="436.13882"
-           sodipodi:role="line">Block</tspan></text>
-    </g>
-    <rect
-       style="opacity:0.93999999000000001;fill:#97d2ea;fill-opacity:1;stroke:none"
-       id="rect9947-1-8-41"
-       width="27.363867"
-       height="13.464759"
-       x="88.395607"
-       y="567.70215"
-       inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-       inkscape:export-xdpi="599.78741"
-       inkscape:export-ydpi="599.78741" />
-    <g
-       id="g9982-9-8"
-       transform="matrix(0.4299817,0,0,0.4299817,-85.56041,513.97391)"
-       inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-       inkscape:export-xdpi="599.78741"
-       inkscape:export-ydpi="599.78741">
-      <text
-         sodipodi:linespacing="125%"
-         id="text9949-9-5-1"
-         y="137.66905"
-         x="436.13882"
-         style="font-size:11px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans;-inkscape-font-specification:Bitstream Vera Sans"
-         xml:space="preserve"><tspan
-           y="137.66905"
-           x="436.13882"
-           id="tspan9951-3-5-7"
-           sodipodi:role="line">Message</tspan><tspan
-           id="tspan9953-3-1-9"
-           y="151.41905"
-           x="436.13882"
-           sodipodi:role="line">Block</tspan></text>
-    </g>
-    <rect
-       style="opacity:0.93999999000000001;fill:#97d2ea;fill-opacity:1;stroke:none"
-       id="rect9947-1-8-41-5"
-       width="53.42469"
-       height="13.464759"
-       x="136.60361"
-       y="593.30469"
-       inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-       inkscape:export-xdpi="599.78741"
-       inkscape:export-ydpi="599.78741" />
-    <text
-       xml:space="preserve"
-       style="font-size:4.72979832px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans"
-       x="145.20862"
-       y="601.80029"
-       id="text9905-5"
-       sodipodi:linespacing="125%"
-       inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-       inkscape:export-xdpi="599.78741"
-       inkscape:export-ydpi="599.78741"><tspan
-         sodipodi:role="line"
-         id="tspan9907-6"
-         x="145.20862"
-         y="601.80029">Data Validation</tspan></text>
-    <rect
-       style="opacity:0.93999999000000001;fill:#97d2ea;fill-opacity:1;stroke:none"
-       id="rect9947-1-8-41-5-1"
-       width="53.42469"
-       height="13.464759"
-       x="136.60361"
-       y="621.99536"
-       inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-       inkscape:export-xdpi="599.78741"
-       inkscape:export-ydpi="599.78741" />
-    <text
-       xml:space="preserve"
-       style="font-size:4.72979832px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans"
-       x="144.2594"
-       y="630.03259"
-       id="text9905-5-8"
-       sodipodi:linespacing="125%"
-       inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-       inkscape:export-xdpi="599.78741"
-       inkscape:export-ydpi="599.78741"><tspan
-         sodipodi:role="line"
-         id="tspan9907-6-9"
-         x="144.2594"
-         y="630.03259">Data Processing</tspan></text>
-    <path
-       style="fill:none;stroke:#000000;stroke-width:0.42998168px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow2Lend-6)"
-       d="m 115.75949,580.15476 31.4533,13.14992"
-       id="path10174"
-       inkscape:connector-type="polyline"
-       inkscape:connector-curvature="0"
-       inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-       inkscape:export-xdpi="599.78741"
-       inkscape:export-ydpi="599.78741" />
-    <path
-       style="fill:none;stroke:#000000;stroke-width:0.42998168px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow2Lend-6)"
-       d="m 115.75949,594.21141 20.84413,2.55339"
-       id="path10176"
-       inkscape:connector-type="polyline"
-       inkscape:connector-curvature="0"
-       inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-       inkscape:export-xdpi="599.78741"
-       inkscape:export-ydpi="599.78741" />
-    <path
-       style="fill:none;stroke:#000000;stroke-width:0.42998168px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow2Lend-6)"
-       d="m 115.75949,608.26805 20.84413,-3.6077"
-       id="path10178"
-       inkscape:connector-type="polyline"
-       inkscape:connector-curvature="0"
-       inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-       inkscape:export-xdpi="599.78741"
-       inkscape:export-ydpi="599.78741" />
-    <path
-       style="fill:none;stroke:#000000;stroke-width:0.42998168px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow2Lend-6)"
-       d="m 115.75949,622.32465 33.1912,-15.55524"
-       id="path10180"
-       inkscape:connector-type="polyline"
-       inkscape:connector-curvature="0"
-       inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-       inkscape:export-xdpi="599.78741"
-       inkscape:export-ydpi="599.78741" />
-    <path
-       style="fill:none;stroke:#000000;stroke-width:0.42998168px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow2Lend-6)"
-       d="m 163.31598,606.76941 0,15.22619"
-       id="path10182"
-       inkscape:connector-type="polyline"
-       inkscape:connector-curvature="0"
-       inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-       inkscape:export-xdpi="599.78741"
-       inkscape:export-ydpi="599.78741" />
-    <text
-       xml:space="preserve"
-       style="font-size:6.87970734px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans;-inkscape-font-specification:Bitstream Vera Sans"
-       x="119.88577"
-       y="572.26294"
-       id="text10608"
-       sodipodi:linespacing="125%"
-       inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-       inkscape:export-xdpi="599.78741"
-       inkscape:export-ydpi="599.78741"><tspan
-         sodipodi:role="line"
-         id="tspan10610"
-         x="119.88577"
-         y="572.26294">Execution Thread(s)</tspan></text>
-    <path
-       style="fill:none;stroke:#000000;stroke-width:0.42998168;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow2Lend-6)"
-       d="m 29.753802,671.11779 0,-20.47533"
-       id="path13563"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc"
-       inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-       inkscape:export-xdpi="599.78741"
-       inkscape:export-ydpi="599.78741" />
-    <text
-       xml:space="preserve"
-       style="font-size:17.19926834px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       x="44.491711"
-       y="672.14148"
-       id="text13753"
-       sodipodi:linespacing="125%"
-       inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-       inkscape:export-xdpi="599.78741"
-       inkscape:export-ydpi="599.78741"><tspan
-         sodipodi:role="line"
-         x="44.491711"
-         y="672.14148"
-         id="tspan13757"
-         style="font-size:5.73308945px;text-align:center;text-anchor:middle">Enqueue</tspan><tspan
-         sodipodi:role="line"
-         x="44.491711"
-         y="679.30786"
-         id="tspan13761"
-         style="font-size:5.73308945px;text-align:center;text-anchor:middle">(From Upstream Gadget)</tspan></text>
-    <path
-       sodipodi:nodetypes="cc"
-       inkscape:connector-curvature="0"
-       id="path13776"
-       d="m 163.48324,635.44182 0,26.36199"
-       style="fill:none;stroke:#000000;stroke-width:0.42998168;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow2Lend-6)"
-       inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-       inkscape:export-xdpi="599.78741"
-       inkscape:export-ydpi="599.78741" />
-    <text
-       sodipodi:linespacing="125%"
-       id="text13778"
-       y="671.62988"
-       x="160.68909"
-       style="font-size:17.19926834px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       xml:space="preserve"
-       inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-       inkscape:export-xdpi="599.78741"
-       inkscape:export-ydpi="599.78741"><tspan
-         style="font-size:5.73308945px;text-align:center;text-anchor:middle"
-         id="tspan13782"
-         y="671.62988"
-         x="160.68909"
-         sodipodi:role="line">Pass on data</tspan><tspan
-         style="font-size:5.73308945px;text-align:center;text-anchor:middle"
-         y="678.79626"
-         x="160.68909"
-         sodipodi:role="line"
-         id="tspan13786">to downstream Gadget</tspan></text>
-  </g>
-</svg>
diff --git a/doc/manual/figs/grappa.png b/doc/manual/figs/grappa.png
deleted file mode 100644
index ca9e219..0000000
Binary files a/doc/manual/figs/grappa.png and /dev/null differ
diff --git a/doc/manual/figs/grappa.svg b/doc/manual/figs/grappa.svg
deleted file mode 100644
index 2835440..0000000
--- a/doc/manual/figs/grappa.svg
+++ /dev/null
@@ -1,594 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<!-- Created with Inkscape (http://www.inkscape.org/) -->
-
-<svg
-   xmlns:dc="http://purl.org/dc/elements/1.1/"
-   xmlns:cc="http://creativecommons.org/ns#"
-   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
-   xmlns:svg="http://www.w3.org/2000/svg"
-   xmlns="http://www.w3.org/2000/svg"
-   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
-   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
-   width="270"
-   height="540"
-   id="svg5199"
-   version="1.1"
-   inkscape:version="0.48.2 r9819"
-   sodipodi:docname="grappa.svg"
-   inkscape:export-filename="/home/hansenms/mrprogs/gadgetron/doc/manual/figs/grappa.png"
-   inkscape:export-xdpi="299.97357"
-   inkscape:export-ydpi="299.97357">
-  <defs
-     id="defs5201">
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0.0"
-       refX="0.0"
-       id="Arrow2Mend"
-       style="overflow:visible;">
-      <path
-         id="path7268"
-         style="font-size:12.0;fill-rule:evenodd;stroke-width:0.62500000;stroke-linejoin:round;"
-         d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
-         transform="scale(0.6) rotate(180) translate(0,0)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow1Mend"
-       orient="auto"
-       refY="0.0"
-       refX="0.0"
-       id="Arrow1Mend"
-       style="overflow:visible;">
-      <path
-         id="path6124"
-         d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
-         style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;marker-start:none;"
-         transform="scale(0.4) rotate(180) translate(10,0)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Lend"
-       orient="auto"
-       refY="0.0"
-       refX="0.0"
-       id="Arrow2Lend"
-       style="overflow:visible;">
-      <path
-         id="path6136"
-         style="font-size:12.0;fill-rule:evenodd;stroke-width:0.62500000;stroke-linejoin:round;"
-         d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
-         transform="scale(1.1) rotate(180) translate(1,0)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow1Lend"
-       orient="auto"
-       refY="0.0"
-       refX="0.0"
-       id="Arrow1Lend"
-       style="overflow:visible;">
-      <path
-         id="path6118"
-         d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
-         style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;marker-start:none;"
-         transform="scale(0.8) rotate(180) translate(12.5,0)" />
-    </marker>
-  </defs>
-  <sodipodi:namedview
-     id="base"
-     pagecolor="#ffffff"
-     bordercolor="#666666"
-     borderopacity="1.0"
-     inkscape:pageopacity="0.0"
-     inkscape:pageshadow="2"
-     inkscape:zoom="1.979899"
-     inkscape:cx="189.05381"
-     inkscape:cy="284.36943"
-     inkscape:document-units="px"
-     inkscape:current-layer="layer1"
-     showgrid="false"
-     units="in"
-     inkscape:window-width="1629"
-     inkscape:window-height="1147"
-     inkscape:window-x="474"
-     inkscape:window-y="109"
-     inkscape:window-maximized="0" />
-  <metadata
-     id="metadata5204">
-    <rdf:RDF>
-      <cc:Work
-         rdf:about="">
-        <dc:format>image/svg+xml</dc:format>
-        <dc:type
-           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
-        <dc:title></dc:title>
-      </cc:Work>
-    </rdf:RDF>
-  </metadata>
-  <g
-     inkscape:label="Layer 1"
-     inkscape:groupmode="layer"
-     id="layer1"
-     transform="translate(0,-512.35975)">
-    <rect
-       style="fill:#97d2ea;fill-opacity:1;stroke:#dcdcdc;stroke-width:0.21841221000000000;stroke-linecap:butt;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
-       id="rect5207"
-       width="104.11561"
-       height="31.714287"
-       x="19.316126"
-       y="532.71069" />
-    <text
-       xml:space="preserve"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       x="36.669441"
-       y="542.99646"
-       id="text5209"
-       sodipodi:linespacing="125%"><tspan
-         sodipodi:role="line"
-         id="tspan5211"
-         x="36.669441"
-         y="542.99646">NoiseAdjustGadget</tspan></text>
-    <text
-       xml:space="preserve"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       x="44.445709"
-       y="558.85358"
-       id="text5721"
-       sodipodi:linespacing="125%"><tspan
-         sodipodi:role="line"
-         id="tspan5723"
-         x="44.445709"
-         y="558.85358"
-         style="font-size:6px">Noise Prewitening</tspan></text>
-    <rect
-       y="627.30786"
-       x="19.316126"
-       height="31.714287"
-       width="104.11561"
-       id="rect5725"
-       style="fill:#97d2ea;fill-opacity:1;stroke:#dcdcdc;stroke-width:0.21841221000000000;stroke-linecap:butt;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
-    <text
-       sodipodi:linespacing="125%"
-       id="text5727"
-       y="637.59357"
-       x="50.823345"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       xml:space="preserve"><tspan
-         y="637.59357"
-         x="50.823345"
-         id="tspan5729"
-         sodipodi:role="line">PCAGadget</tspan></text>
-    <text
-       sodipodi:linespacing="125%"
-       id="text5731"
-       y="647.45068"
-       x="71.22744"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       xml:space="preserve"><tspan
-         style="font-size:6px;text-align:center;text-anchor:middle"
-         y="647.45068"
-         x="71.22744"
-         id="tspan5733"
-         sodipodi:role="line">Virtual Channels</tspan><tspan
-         style="font-size:6px;text-align:center;text-anchor:middle"
-         y="654.95068"
-         x="71.22744"
-         sodipodi:role="line"
-         id="tspan5737">Principal Components</tspan></text>
-    <rect
-       style="fill:#97d2ea;fill-opacity:1;stroke:#dcdcdc;stroke-width:0.21841221000000000;stroke-linecap:butt;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
-       id="rect5768"
-       width="104.11561"
-       height="31.714287"
-       x="19.316126"
-       y="674.99121" />
-    <text
-       xml:space="preserve"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       x="33.340145"
-       y="683.99121"
-       id="text5770"
-       sodipodi:linespacing="125%"><tspan
-         sodipodi:role="line"
-         id="tspan5772"
-         x="33.340145"
-         y="683.99121">CoilReductionGadget</tspan></text>
-    <text
-       xml:space="preserve"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       x="71.22744"
-       y="695.13403"
-       id="text5774"
-       sodipodi:linespacing="125%"><tspan
-         id="tspan5778"
-         sodipodi:role="line"
-         x="71.22744"
-         y="695.13403"
-         style="font-size:6px;text-align:center;text-anchor:middle">Reduce Channels</tspan><tspan
-         sodipodi:role="line"
-         x="71.22744"
-         y="702.63403"
-         style="font-size:6px;text-align:center;text-anchor:middle"
-         id="tspan5782">e.g 32 -> 16</tspan></text>
-    <rect
-       y="580.81476"
-       x="19.316126"
-       height="31.714287"
-       width="104.11561"
-       id="rect5830"
-       style="fill:#97d2ea;fill-opacity:1;stroke:#dcdcdc;stroke-width:0.21841221000000000;stroke-linecap:butt;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
-    <text
-       sodipodi:linespacing="125%"
-       id="text5832"
-       y="588.52905"
-       x="27.484425"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       xml:space="preserve"><tspan
-         y="588.52905"
-         x="27.484425"
-         id="tspan5834"
-         sodipodi:role="line"
-         style="font-size:5.4000001px">RemoveROOversamplingGadget</tspan></text>
-    <text
-       sodipodi:linespacing="125%"
-       id="text5836"
-       y="600.95764"
-       x="71.337303"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       xml:space="preserve"><tspan
-         id="tspan5840"
-         style="font-size:6px;text-align:center;text-anchor:middle"
-         y="600.95764"
-         x="71.337303"
-         sodipodi:role="line">Reduce Readout Length</tspan><tspan
-         style="font-size:6px;text-align:center;text-anchor:middle"
-         y="608.45764"
-         x="71.337303"
-         sodipodi:role="line"
-         id="tspan5895">e.g. 256 -> 128</tspan></text>
-    <rect
-       y="731.96539"
-       x="19.316126"
-       height="31.714287"
-       width="104.11561"
-       id="rect5869"
-       style="fill:#97d2ea;fill-opacity:1;stroke:#dcdcdc;stroke-width:0.21841221000000000;stroke-linecap:butt;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
-    <text
-       sodipodi:linespacing="125%"
-       id="text5871"
-       y="740.10822"
-       x="44.801079"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       xml:space="preserve"><tspan
-         y="740.10822"
-         x="44.801079"
-         id="tspan5873"
-         sodipodi:role="line">GrappaGadget</tspan></text>
-    <text
-       sodipodi:linespacing="125%"
-       id="text5875"
-       y="752.10822"
-       x="71.353416"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       xml:space="preserve"><tspan
-         id="tspan5879"
-         style="font-size:6px;text-align:center;text-anchor:middle"
-         y="752.10822"
-         x="71.353416"
-         sodipodi:role="line">Calculate Image Space</tspan><tspan
-         style="font-size:6px;text-align:center;text-anchor:middle"
-         y="759.60822"
-         x="71.353416"
-         sodipodi:role="line"
-         id="tspan5899">GRAPPA Unmixing Coefficients</tspan></text>
-    <rect
-       style="fill:#97d2ea;fill-opacity:1;stroke:#dcdcdc;stroke-width:0.21841221000000000;stroke-linecap:butt;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
-       id="rect5881"
-       width="104.11561"
-       height="31.714287"
-       x="19.316126"
-       y="778.66675" />
-    <text
-       xml:space="preserve"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       x="27.672955"
-       y="787.66675"
-       id="text5883"
-       sodipodi:linespacing="125%"><tspan
-         sodipodi:role="line"
-         id="tspan5885"
-         x="27.672955"
-         y="787.66675">GrappaUnmixingGadget</tspan></text>
-    <text
-       xml:space="preserve"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       x="71.477928"
-       y="798.80963"
-       id="text5887"
-       sodipodi:linespacing="125%"><tspan
-         sodipodi:role="line"
-         x="71.477928"
-         y="798.80963"
-         style="font-size:6px;text-align:center;text-anchor:middle"
-         id="tspan5891">FFT of Raw Data</tspan><tspan
-         sodipodi:role="line"
-         x="71.477928"
-         y="806.30963"
-         style="font-size:6px;text-align:center;text-anchor:middle"
-         id="tspan5903">GRAPPA Unmixing</tspan></text>
-    <rect
-       y="825.3681"
-       x="19.316126"
-       height="31.714287"
-       width="104.11561"
-       id="rect5905"
-       style="fill:#97d2ea;fill-opacity:1;stroke:#dcdcdc;stroke-width:0.21841221000000000;stroke-linecap:butt;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
-    <text
-       sodipodi:linespacing="125%"
-       id="text5907"
-       y="834.3681"
-       x="45.001469"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       xml:space="preserve"><tspan
-         y="834.3681"
-         x="45.001469"
-         id="tspan5909"
-         sodipodi:role="line">ExtractGadget</tspan></text>
-    <text
-       sodipodi:linespacing="125%"
-       id="text5911"
-       y="850.56177"
-       x="71.177635"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       xml:space="preserve"><tspan
-         id="tspan5915"
-         style="font-size:6px;text-align:center;text-anchor:middle"
-         y="850.56177"
-         x="71.177635"
-         sodipodi:role="line">Extract Magnitude from Complex</tspan></text>
-    <rect
-       style="fill:#97d2ea;fill-opacity:1;stroke:#dcdcdc;stroke-width:0.21841221000000000;stroke-linecap:butt;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
-       id="rect5917"
-       width="104.11561"
-       height="31.714287"
-       x="19.316126"
-       y="872.06946" />
-    <text
-       xml:space="preserve"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       x="40.186821"
-       y="881.06946"
-       id="text5919"
-       sodipodi:linespacing="125%"><tspan
-         sodipodi:role="line"
-         id="tspan5921"
-         x="40.186821"
-         y="881.06946">AutoScaleGadget</tspan></text>
-    <text
-       xml:space="preserve"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       x="71.174706"
-       y="892.21234"
-       id="text5923"
-       sodipodi:linespacing="125%"><tspan
-         sodipodi:role="line"
-         x="71.174706"
-         y="892.21234"
-         style="font-size:6px;text-align:center;text-anchor:middle"
-         id="tspan5927">Histogram based scaling</tspan><tspan
-         sodipodi:role="line"
-         x="71.174706"
-         y="899.71234"
-         style="font-size:6px;text-align:center;text-anchor:middle"
-         id="tspan5945">prior to conversion to short</tspan></text>
-    <rect
-       y="918.77081"
-       x="19.316126"
-       height="31.714287"
-       width="104.11561"
-       id="rect5929"
-       style="fill:#97d2ea;fill-opacity:1;stroke:#dcdcdc;stroke-width:0.21841221000000000;stroke-linecap:butt;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
-    <text
-       sodipodi:linespacing="125%"
-       id="text5931"
-       y="927.77081"
-       x="35.558502"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       xml:space="preserve"><tspan
-         y="927.77081"
-         x="35.558502"
-         id="tspan5933"
-         sodipodi:role="line">FloatToShortGadget</tspan></text>
-    <text
-       sodipodi:linespacing="125%"
-       id="text5935"
-       y="940.42896"
-       x="71.353416"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       xml:space="preserve"><tspan
-         id="tspan5939"
-         style="font-size:6px;text-align:center;text-anchor:middle"
-         y="940.42896"
-         x="71.353416"
-         sodipodi:role="line">Generate 16-bit unsigned images</tspan></text>
-    <rect
-       style="fill:#97d2ea;fill-opacity:1;stroke:#dcdcdc;stroke-width:0.21841221000000000;stroke-linecap:butt;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
-       id="rect5975"
-       width="104.11561"
-       height="31.714287"
-       x="19.316126"
-       y="965.47217" />
-    <text
-       xml:space="preserve"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       x="36.374126"
-       y="974.47217"
-       id="text5977"
-       sodipodi:linespacing="125%"><tspan
-         sodipodi:role="line"
-         id="tspan5979"
-         x="36.374126"
-         y="974.47217">ImageFinishGadget</tspan></text>
-    <text
-       xml:space="preserve"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       x="71.152733"
-       y="987.13031"
-       id="text5981"
-       sodipodi:linespacing="125%"><tspan
-         sodipodi:role="line"
-         x="71.152733"
-         y="987.13031"
-         style="font-size:6px;text-align:center;text-anchor:middle"
-         id="tspan5985">Return Images to Gadgetron</tspan><tspan
-         sodipodi:role="line"
-         x="71.152733"
-         y="994.63031"
-         style="font-size:6px;text-align:center;text-anchor:middle"
-         id="tspan5989">Images are forwarded to client</tspan></text>
-    <rect
-       style="fill:#97d25d;fill-opacity:1;stroke:#dcdcdc;stroke-width:0.29617276999999997;stroke-linecap:butt;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
-       id="rect6068"
-       width="103.20275"
-       height="42.245083"
-       x="139.32896"
-       y="724.11749" />
-    <text
-       xml:space="preserve"
-       style="font-size:12px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       x="146.82634"
-       y="735.22919"
-       id="text6070"
-       sodipodi:linespacing="125%"><tspan
-         style="font-size:10px"
-         sodipodi:role="line"
-         id="tspan6072"
-         x="146.82634"
-         y="735.22919">GrappaCalculator</tspan></text>
-    <text
-       sodipodi:linespacing="125%"
-       id="text6074"
-       y="751.39166"
-       x="190.89909"
-       style="font-size:12px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       xml:space="preserve"><tspan
-         style="font-size:8px;text-align:center;text-anchor:middle"
-         y="751.39166"
-         x="190.89909"
-         id="tspan6076"
-         sodipodi:role="line">GPU Based</tspan><tspan
-         id="tspan6104"
-         style="font-size:8px;text-align:center;text-anchor:middle"
-         y="761.39166"
-         x="192.17253"
-         sodipodi:role="line">Active Object </tspan></text>
-    <path
-       sodipodi:nodetypes="cc"
-       inkscape:connector-curvature="0"
-       id="path6110"
-       d="m 123.92093,746.95015 14.30769,0"
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Mend)" />
-    <path
-       sodipodi:nodetypes="cc"
-       inkscape:connector-curvature="0"
-       id="path6930"
-       d="m 190.93035,766.96331 0,13.8896"
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Mend)" />
-    <rect
-       y="782.74701"
-       x="137.76913"
-       height="32.324883"
-       width="106.32242"
-       id="rect7118"
-       style="fill:#97d25d;fill-opacity:1;stroke:#dcdcdc;stroke-width:0.28393653000000002;stroke-linecap:butt;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
-    <text
-       sodipodi:linespacing="125%"
-       id="text7120"
-       y="798.90942"
-       x="142.00163"
-       style="font-size:12px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       xml:space="preserve"><tspan
-         style="font-size:9px"
-         y="798.90942"
-         x="142.00163"
-         id="tspan7122"
-         sodipodi:role="line">Image Space Weights</tspan></text>
-    <path
-       sodipodi:nodetypes="cc"
-       inkscape:connector-curvature="0"
-       id="path7515"
-       d="m 71.37393,707.32695 0,22.85714"
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Mend)" />
-    <path
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Mend)"
-       d="m 71.37393,763.79886 0,12.85714"
-       id="path7517"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc" />
-    <path
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Mend)"
-       d="m 71.37393,810.43664 0,12.85714"
-       id="path7521"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc" />
-    <path
-       sodipodi:nodetypes="cc"
-       inkscape:connector-curvature="0"
-       id="path7523"
-       d="m 71.37393,857.28363 0,12.85714"
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Mend)" />
-    <path
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Mend)"
-       d="m 71.37393,903.96474 0,12.85714"
-       id="path7525"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc" />
-    <path
-       sodipodi:nodetypes="cc"
-       inkscape:connector-curvature="0"
-       id="path7527"
-       d="m 71.37393,950.64584 0,12.85714"
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Mend)" />
-    <path
-       sodipodi:nodetypes="cc"
-       inkscape:connector-curvature="0"
-       id="path4301"
-       d="m 71.37393,565.4261 0,12.85714"
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Mend)" />
-    <path
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Mend)"
-       d="m 138.97862,798.95015 -14.30769,0"
-       id="path6496"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc" />
-    <path
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Mend)"
-       d="m 71.37393,613.40834 0,12.85714"
-       id="path8376"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc" />
-    <path
-       sodipodi:nodetypes="cc"
-       inkscape:connector-curvature="0"
-       id="path8378"
-       d="m 71.37393,660.38044 0,12.85714"
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Mend)" />
-    <rect
-       style="fill:none;stroke:#000000;stroke-width:0.92284429;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:1.84568849, 0.92284424;stroke-dashoffset:0"
-       id="rect8981"
-       width="119.12972"
-       height="119.48805"
-       x="131.82491"
-       y="702.52356" />
-    <text
-       xml:space="preserve"
-       style="font-size:6px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       x="135.86551"
-       y="713.95862"
-       id="text8983"
-       sodipodi:linespacing="125%"><tspan
-         sodipodi:role="line"
-         id="tspan8985"
-         x="135.86551"
-         y="713.95862">Managed by threads outside Gadget</tspan></text>
-  </g>
-</svg>
diff --git a/doc/manual/figs/hdfview_image_view.png b/doc/manual/figs/hdfview_image_view.png
deleted file mode 100644
index 05248e3..0000000
Binary files a/doc/manual/figs/hdfview_image_view.png and /dev/null differ
diff --git a/doc/manual/figs/hdfview_image_view_setting.png b/doc/manual/figs/hdfview_image_view_setting.png
deleted file mode 100644
index 0595227..0000000
Binary files a/doc/manual/figs/hdfview_image_view_setting.png and /dev/null differ
diff --git a/doc/manual/figs/hdfview_mri_testdata.png b/doc/manual/figs/hdfview_mri_testdata.png
deleted file mode 100644
index 64faeb3..0000000
Binary files a/doc/manual/figs/hdfview_mri_testdata.png and /dev/null differ
diff --git a/doc/manual/figs/math/HOWTO.txt b/doc/manual/figs/math/HOWTO.txt
deleted file mode 100644
index e746453..0000000
--- a/doc/manual/figs/math/HOWTO.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-Images converted using this process:
-http://infohost.nmt.edu/tcc/help/pubs/docbook/web/math-procedure.html
-
-Sadly I could not get thr PDF/EPS/SVG accepted in fo. 
-Hence reverting to jpeg for ro also.
diff --git a/doc/manual/figs/math/lls.jpg b/doc/manual/figs/math/lls.jpg
deleted file mode 100644
index 2d20028..0000000
Binary files a/doc/manual/figs/math/lls.jpg and /dev/null differ
diff --git a/doc/manual/figs/math/lls.tex b/doc/manual/figs/math/lls.tex
deleted file mode 100644
index bea6fdc..0000000
--- a/doc/manual/figs/math/lls.tex
+++ /dev/null
@@ -1,11 +0,0 @@
-% lamath.tex: Sample of LaTeX math for inclusion in DocBook
-%
-\documentclass[leqno]{article}
-\usepackage{bm} 
-\pagestyle{empty}
-\setlength{\textwidth}{6in}
-\begin{document}
-\[
-\min_{\mathbf{u}} \left( \left\| \mathbf{E}\mathbf{u}-\mathbf{m} \right\|_2^2 + \lambda \left\| \mathbf{R} \mathbf{u} -\mathbf{p} \right\|_2^2 \right)
-\]
-\end{document}
diff --git a/doc/manual/figs/math/lls_form.jpg b/doc/manual/figs/math/lls_form.jpg
deleted file mode 100644
index 9e7b76f..0000000
Binary files a/doc/manual/figs/math/lls_form.jpg and /dev/null differ
diff --git a/doc/manual/figs/math/lls_form.tex b/doc/manual/figs/math/lls_form.tex
deleted file mode 100644
index 9aa4231..0000000
--- a/doc/manual/figs/math/lls_form.tex
+++ /dev/null
@@ -1,11 +0,0 @@
-% lamath.tex: Sample of LaTeX math for inclusion in DocBook
-%
-\documentclass[leqno]{article}
-\usepackage{bm} 
-\pagestyle{empty}
-\setlength{\textwidth}{6in}
-\begin{document}
-\[
-\left( \mathbf{E}^H \mathbf{E} + \lambda \mathbf{R}^H \mathbf{R} \right) \mathbf{u} =  \mathbf{E}^H \mathbf{m} + \mathbf{R}^H \mathbf{p} 
-\]
-\end{document}
diff --git a/doc/manual/figs/math/sb.jpg b/doc/manual/figs/math/sb.jpg
deleted file mode 100644
index 18a4fdb..0000000
Binary files a/doc/manual/figs/math/sb.jpg and /dev/null differ
diff --git a/doc/manual/figs/math/sb.tex b/doc/manual/figs/math/sb.tex
deleted file mode 100644
index 11f679e..0000000
--- a/doc/manual/figs/math/sb.tex
+++ /dev/null
@@ -1,14 +0,0 @@
-% lamath.tex: Sample of LaTeX math for inclusion in DocBook
-%
-\documentclass[leqno]{article}
-\usepackage{bm} 
-\pagestyle{empty}
-\setlength{\textwidth}{6in}
-\begin{document}
-\[
-\min_{\mathbf{u}} \left| \mathbf{u} \right|_{TV} + \lambda \left\| \mathbf{E}\mathbf{u}-\mathbf{m} \right\|_2^2
-\]
-\[
-\min_{\mathbf{u}} \left| \mathbf{u} \right|_{TV} s.t. \left\| \mathbf{E}\mathbf{u}-\mathbf{m} \right\|_2^2<\sigma^2
-\]
-\end{document}
diff --git a/doc/manual/figs/python.png b/doc/manual/figs/python.png
deleted file mode 100644
index ddaa8ed..0000000
Binary files a/doc/manual/figs/python.png and /dev/null differ
diff --git a/doc/manual/figs/python.svg b/doc/manual/figs/python.svg
deleted file mode 100644
index 3cf82a3..0000000
--- a/doc/manual/figs/python.svg
+++ /dev/null
@@ -1,635 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<!-- Created with Inkscape (http://www.inkscape.org/) -->
-
-<svg
-   xmlns:dc="http://purl.org/dc/elements/1.1/"
-   xmlns:cc="http://creativecommons.org/ns#"
-   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
-   xmlns:svg="http://www.w3.org/2000/svg"
-   xmlns="http://www.w3.org/2000/svg"
-   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
-   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
-   width="225"
-   height="540"
-   id="svg8632"
-   version="1.1"
-   inkscape:version="0.48.2 r9819"
-   sodipodi:docname="python.svg"
-   inkscape:export-filename="/Users/hansenms/Documents/mrprogs/gadgetron/doc/manual/figs/python.png"
-   inkscape:export-xdpi="299.784"
-   inkscape:export-ydpi="299.784">
-  <sodipodi:namedview
-     id="base"
-     pagecolor="#ffffff"
-     bordercolor="#666666"
-     borderopacity="1.0"
-     inkscape:pageopacity="1"
-     inkscape:pageshadow="2"
-     inkscape:zoom="3.959798"
-     inkscape:cx="175.64027"
-     inkscape:cy="411.47029"
-     inkscape:document-units="px"
-     inkscape:current-layer="layer1"
-     showgrid="false"
-     inkscape:window-width="2560"
-     inkscape:window-height="1305"
-     inkscape:window-x="0"
-     inkscape:window-y="0"
-     inkscape:window-maximized="0"
-     units="in" />
-  <defs
-     id="defs8634">
-    <marker
-       inkscape:stockid="Arrow2Lend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="Arrow2Lend"
-       style="overflow:visible">
-      <path
-         id="path3849"
-         style="font-size:12px;fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="matrix(-1.1,0,0,-1.1,-1.1,0)"
-         inkscape:connector-curvature="0" />
-    </marker>
-    <marker
-       inkscape:stockid="TriangleOutL"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="TriangleOutL-9"
-       style="overflow:visible">
-      <path
-         inkscape:connector-curvature="0"
-         id="path4614-0"
-         d="m 5.77,0 -8.65,5 0,-10 8.65,5 z"
-         style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt;marker-start:none"
-         transform="scale(0.8,0.8)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow1Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="Arrow1Mend"
-       style="overflow:visible">
-      <path
-         inkscape:connector-curvature="0"
-         id="path3972"
-         d="M 0,0 5,-5 -12.5,0 5,5 0,0 z"
-         style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt;marker-start:none"
-         transform="matrix(-0.4,0,0,-0.4,-4,0)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Lend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="Arrow2Lend-6"
-       style="overflow:visible">
-      <path
-         inkscape:connector-curvature="0"
-         id="path3849-4"
-         style="font-size:12px;fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="matrix(-1.1,0,0,-1.1,-1.1,0)" />
-    </marker>
-    <marker
-       inkscape:stockid="TriangleOutL-9B"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="TriangleOutL-9B"
-       style="overflow:visible">
-      <path
-         inkscape:connector-curvature="0"
-         id="path5674"
-         d="m 5.77,0 -8.65,5 0,-10 8.65,5 z"
-         style="marker-start:none;stroke:#7dbbc1;stroke-width:1pt;fill:#7dbbc1;fill-rule:evenodd"
-         transform="scale(0.8,0.8)" />
-    </marker>
-    <marker
-       inkscape:stockid="TriangleOutL-9Bp"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="TriangleOutL-9Bp"
-       style="overflow:visible">
-      <path
-         inkscape:connector-curvature="0"
-         id="path5828"
-         d="m 5.77,0 -8.65,5 0,-10 8.65,5 z"
-         style="fill-rule:evenodd;marker-start:none;stroke:#053df0;stroke-width:1pt;fill:#053df0"
-         transform="scale(0.8,0.8)" />
-    </marker>
-  </defs>
-  <metadata
-     id="metadata8637">
-    <rdf:RDF>
-      <cc:Work
-         rdf:about="">
-        <dc:format>image/svg+xml</dc:format>
-        <dc:type
-           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
-        <dc:title></dc:title>
-      </cc:Work>
-    </rdf:RDF>
-  </metadata>
-  <g
-     inkscape:label="Layer 1"
-     inkscape:groupmode="layer"
-     id="layer1"
-     transform="translate(0,-512.35975)">
-    <text
-       xml:space="preserve"
-       style="font-size:3.29999995px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans"
-       x="-0.90061849"
-       y="609.21912"
-       id="text9939"
-       sodipodi:linespacing="125%"
-       inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-       inkscape:export-xdpi="599.78741"
-       inkscape:export-ydpi="599.78741"><tspan
-         sodipodi:role="line"
-         id="tspan9941"
-         x="-0.90061849"
-         y="609.21912" /></text>
-    <g
-       id="g5829"
-       transform="translate(0,-1.2626953)">
-      <rect
-         inkscape:export-ydpi="599.78741"
-         inkscape:export-xdpi="599.78741"
-         inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-         y="526.03345"
-         x="30.816908"
-         height="46.799793"
-         width="42.263618"
-         id="rect9947-1-8-41"
-         style="opacity:0.93999999;fill:#97d25d;fill-opacity:1;stroke:none" />
-      <text
-         sodipodi:linespacing="125%"
-         id="text9949-9-5-1"
-         y="533.77313"
-         x="51.774353"
-         style="font-size:4.72979879px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans;-inkscape-font-specification:Bitstream Vera Sans"
-         xml:space="preserve"><tspan
-           id="tspan9953-3-1-9"
-           y="533.77313"
-           x="51.774353"
-           sodipodi:role="line">Python Gadget</tspan></text>
-    </g>
-    <rect
-       style="opacity:0.93999999000000001;fill:#978bea;fill-opacity:1;stroke:none"
-       id="rect9947-1-8-41-5"
-       width="132.46913"
-       height="13.464759"
-       x="47.457645"
-       y="604.66888"
-       inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-       inkscape:export-xdpi="599.78741"
-       inkscape:export-ydpi="599.78741" />
-    <text
-       xml:space="preserve"
-       style="font-size:4.72979832px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans"
-       x="91.711807"
-       y="612.70612"
-       id="text9905-5"
-       sodipodi:linespacing="125%"
-       inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-       inkscape:export-xdpi="599.78741"
-       inkscape:export-ydpi="599.78741"><tspan
-         sodipodi:role="line"
-         id="tspan9907-6"
-         x="91.711807"
-         y="612.70612">Python Interpreter</tspan></text>
-    <g
-       id="g5585"
-       transform="translate(-2.5253814,-23.233439)">
-      <rect
-         inkscape:export-ydpi="599.78741"
-         inkscape:export-xdpi="599.78741"
-         inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-         y="655.14764"
-         x="31.62396"
-         height="86.195747"
-         width="71.607445"
-         id="rect9947-1-8-41-5-1"
-         style="opacity:0.93999999;fill:#97d25d;fill-opacity:1;stroke:none" />
-      <text
-         inkscape:export-ydpi="599.78741"
-         inkscape:export-xdpi="599.78741"
-         inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-         sodipodi:linespacing="125%"
-         id="text9905-5-8"
-         y="662.67981"
-         x="47.997078"
-         style="font-size:4.72979832px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans"
-         xml:space="preserve"><tspan
-           y="662.67981"
-           x="47.997078"
-           id="tspan9907-6-9"
-           sodipodi:role="line">Python Module 1</tspan></text>
-      <g
-         transform="translate(-33.587572,39.718279)"
-         id="g5455">
-        <text
-           xml:space="preserve"
-           style="font-size:4px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:American Typewriter;-inkscape-font-specification:American Typewriter"
-           x="72.225906"
-           y="641.48022"
-           id="text4597"
-           sodipodi:linespacing="125%"><tspan
-             sodipodi:role="line"
-             id="tspan4599"
-             x="72.225906"
-             y="641.48022">def set_gadget_ref:</tspan><tspan
-             sodipodi:role="line"
-             x="72.225906"
-             y="646.48022"
-             id="tspan4615">     #Code for setting reference</tspan><tspan
-             sodipodi:role="line"
-             x="72.225906"
-             y="651.48022"
-             id="tspan4613"> </tspan><tspan
-             sodipodi:role="line"
-             x="72.225906"
-             y="656.48022"
-             id="tspan4601" /><tspan
-             sodipodi:role="line"
-             x="72.225906"
-             y="661.48022"
-             id="tspan5863">def config_function:</tspan><tspan
-             sodipodi:role="line"
-             x="72.225906"
-             y="666.48022"
-             id="tspan4617">    #Code for processing conf</tspan><tspan
-             sodipodi:role="line"
-             x="72.225906"
-             y="671.48022"
-             id="tspan4607" /><tspan
-             sodipodi:role="line"
-             x="72.225906"
-             y="676.48022"
-             id="tspan4609" /><tspan
-             sodipodi:role="line"
-             x="72.225906"
-             y="681.48022"
-             id="tspan4611">def recon_function:</tspan><tspan
-             sodipodi:role="line"
-             x="72.225906"
-             y="686.48022"
-             id="tspan6257" /><tspan
-             sodipodi:role="line"
-             x="72.225906"
-             y="691.48022"
-             id="tspan4619"
-             style="font-weight:bold;-inkscape-font-specification:American Typewriter Bold">       #Actual recon code</tspan></text>
-        <rect
-           style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
-           id="rect5425"
-           width="65.154839"
-           height="70.409737"
-           x="68.437836"
-           y="627.33807" />
-        <text
-           xml:space="preserve"
-           style="font-size:4px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-           x="70.458138"
-           y="631.88373"
-           id="text5427"
-           sodipodi:linespacing="125%"><tspan
-             sodipodi:role="line"
-             id="tspan5429"
-             x="70.458138"
-             y="631.88373">MyPythonScript1.py</tspan></text>
-      </g>
-    </g>
-    <g
-       id="g5606"
-       transform="translate(-0.5253814,-12.374369)">
-      <rect
-         style="opacity:0.93999999;fill:#97d2ea;fill-opacity:1;stroke:none"
-         id="rect5472"
-         width="71.607445"
-         height="86.195747"
-         x="122.03262"
-         y="644.28857"
-         inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-         inkscape:export-xdpi="599.78741"
-         inkscape:export-ydpi="599.78741" />
-      <text
-         xml:space="preserve"
-         style="font-size:4.72979832px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans"
-         x="140.17349"
-         y="651.82074"
-         id="text5474"
-         sodipodi:linespacing="125%"
-         inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-         inkscape:export-xdpi="599.78741"
-         inkscape:export-ydpi="599.78741"><tspan
-           sodipodi:role="line"
-           id="tspan5476"
-           x="140.17349"
-           y="651.82074">Python Module 2</tspan></text>
-      <g
-         id="g5478"
-         transform="translate(56.821082,28.859139)">
-        <text
-           sodipodi:linespacing="125%"
-           id="text5480"
-           y="641.48022"
-           x="72.225906"
-           style="font-size:4px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:American Typewriter;-inkscape-font-specification:American Typewriter"
-           xml:space="preserve"><tspan
-             y="641.48022"
-             x="72.225906"
-             id="tspan5482"
-             sodipodi:role="line">def set_gadget_ref:</tspan><tspan
-             id="tspan5484"
-             y="646.48022"
-             x="72.225906"
-             sodipodi:role="line">     #Code for setting reference</tspan><tspan
-             id="tspan5486"
-             y="651.48022"
-             x="72.225906"
-             sodipodi:role="line"> </tspan><tspan
-             id="tspan5488"
-             y="656.48022"
-             x="72.225906"
-             sodipodi:role="line" /><tspan
-             id="tspan5492"
-             y="661.48022"
-             x="72.225906"
-             sodipodi:role="line">def config_function:</tspan><tspan
-             id="tspan5494"
-             y="666.48022"
-             x="72.225906"
-             sodipodi:role="line">    #Code for processing conf</tspan><tspan
-             id="tspan5496"
-             y="671.48022"
-             x="72.225906"
-             sodipodi:role="line" /><tspan
-             id="tspan5498"
-             y="676.48022"
-             x="72.225906"
-             sodipodi:role="line" /><tspan
-             id="tspan5500"
-             y="681.48022"
-             x="72.225906"
-             sodipodi:role="line">def recon_function:</tspan><tspan
-             y="686.48022"
-             x="72.225906"
-             sodipodi:role="line"
-             id="tspan6459" /><tspan
-             id="tspan5502"
-             y="691.48022"
-             x="72.225906"
-             sodipodi:role="line"
-             style="font-weight:bold;-inkscape-font-specification:American Typewriter Bold">       #Actual recon code</tspan></text>
-        <rect
-           y="627.33807"
-           x="68.437836"
-           height="70.409737"
-           width="65.154839"
-           id="rect5504"
-           style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
-        <text
-           sodipodi:linespacing="125%"
-           id="text5506"
-           y="631.88373"
-           x="70.458138"
-           style="font-size:4px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-           xml:space="preserve"><tspan
-             y="631.88373"
-             x="70.458138"
-             id="tspan5508"
-             sodipodi:role="line">MyPythonScript2.py</tspan></text>
-      </g>
-    </g>
-    <rect
-       inkscape:export-ydpi="599.78741"
-       inkscape:export-xdpi="599.78741"
-       inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-       y="582.69806"
-       x="46.700031"
-       height="13.464759"
-       width="132.46913"
-       id="rect5535"
-       style="opacity:0.93999999000000001;fill:#978bea;fill-opacity:1;stroke:none" />
-    <text
-       inkscape:export-ydpi="599.78741"
-       inkscape:export-xdpi="599.78741"
-       inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-       sodipodi:linespacing="125%"
-       id="text5537"
-       y="590.73529"
-       x="86.279831"
-       style="font-size:4.72979832px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans"
-       xml:space="preserve"><tspan
-         y="590.73529"
-         x="86.279831"
-         id="tspan5539"
-         sodipodi:role="line">Python Communicator</tspan></text>
-    <g
-       id="g5839">
-      <rect
-         style="opacity:0.93999999;fill:#97d2ea;fill-opacity:1;stroke:none"
-         id="rect5548"
-         width="42.263618"
-         height="46.799793"
-         x="141.93369"
-         y="524.77075"
-         inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-         inkscape:export-xdpi="599.78741"
-         inkscape:export-ydpi="599.78741" />
-      <text
-         xml:space="preserve"
-         style="font-size:4.72979879px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans;-inkscape-font-specification:Bitstream Vera Sans"
-         x="162.89113"
-         y="532.51044"
-         id="text5550"
-         sodipodi:linespacing="125%"><tspan
-           sodipodi:role="line"
-           x="162.89113"
-           y="532.51044"
-           id="tspan5552">Python Gadget</tspan></text>
-    </g>
-    <g
-       id="g5579">
-      <rect
-         style="opacity:0.93999999;fill:#97d25d;fill-opacity:1;stroke:none"
-         id="rect5557"
-         width="36.455242"
-         height="19.273136"
-         x="4.805479"
-         y="591.94592"
-         inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-         inkscape:export-xdpi="599.78741"
-         inkscape:export-ydpi="599.78741" />
-      <text
-         xml:space="preserve"
-         style="font-size:4.72979879px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans;-inkscape-font-specification:Bitstream Vera Sans"
-         x="22.926865"
-         y="600.38965"
-         id="text5561"
-         sodipodi:linespacing="125%"><tspan
-           sodipodi:role="line"
-           x="22.926865"
-           y="600.38965"
-           id="tspan5563">Gadget</tspan><tspan
-           id="tspan5565"
-           sodipodi:role="line"
-           x="22.926865"
-           y="606.30188">Reference</tspan></text>
-    </g>
-    <g
-       id="g5573">
-      <rect
-         inkscape:export-ydpi="599.78741"
-         inkscape:export-xdpi="599.78741"
-         inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-         y="591.44086"
-         x="182.84486"
-         height="19.273136"
-         width="36.455242"
-         id="rect5559"
-         style="opacity:0.93999999000000001;fill:#97d2ea;fill-opacity:1;stroke:none" />
-      <text
-         sodipodi:linespacing="125%"
-         id="text5567"
-         y="599.88458"
-         x="200.96625"
-         style="font-size:4.72979879px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans;-inkscape-font-specification:Bitstream Vera Sans"
-         xml:space="preserve"><tspan
-           id="tspan5569"
-           y="599.88458"
-           x="200.96625"
-           sodipodi:role="line">Gadget</tspan><tspan
-           y="605.79681"
-           x="200.96625"
-           sodipodi:role="line"
-           id="tspan5571">Reference</tspan></text>
-    </g>
-    <rect
-       inkscape:export-ydpi="599.78741"
-       inkscape:export-xdpi="599.78741"
-       inkscape:export-filename="/Users/hansenms/Documents/Publications/WIP/Gadgetron/manuscript/figures/gadget.pdf.png"
-       y="524.77075"
-       x="86.375298"
-       height="46.799793"
-       width="42.263618"
-       id="rect5627"
-       style="opacity:0.93999999000000001;fill:#97d25d;fill-opacity:0.23555556;stroke:none" />
-    <text
-       sodipodi:linespacing="125%"
-       id="text5629"
-       y="532.51044"
-       x="107.43204"
-       style="font-size:4.72979879px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans;-inkscape-font-specification:Bitstream Vera Sans"
-       xml:space="preserve"><tspan
-         id="tspan5631"
-         y="532.51044"
-         x="107.43204"
-         sodipodi:role="line">C/C++ Gadget</tspan></text>
-    <path
-       style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
-       d="m 6.8185297,546.14706 23.2335093,0"
-       id="path5633"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc" />
-    <path
-       inkscape:connector-curvature="0"
-       id="path5827"
-       d="m 73.23606,546.14706 12.626907,0"
-       style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
-       sodipodi:nodetypes="cc" />
-    <path
-       sodipodi:nodetypes="cc"
-       style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
-       d="m 128.54191,546.14706 12.62691,0"
-       id="path5844"
-       inkscape:connector-curvature="0" />
-    <path
-       inkscape:connector-curvature="0"
-       id="path5846"
-       d="m 184.1003,546.14706 23.23351,-0.18941"
-       style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
-       sodipodi:nodetypes="cc" />
-    <path
-       sodipodi:nodetypes="cc"
-       style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
-       d="m 63.639611,571.14834 0,11.11168"
-       id="path5857"
-       inkscape:connector-curvature="0" />
-    <path
-       inkscape:connector-curvature="0"
-       id="path5859"
-       d="m 63.639611,595.89708 0,8.08122"
-       style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
-       sodipodi:nodetypes="cc" />
-    <path
-       sodipodi:nodetypes="cc"
-       style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
-       d="m 63.639611,618.12043 0,85.86298"
-       id="path5861"
-       inkscape:connector-curvature="0" />
-    <path
-       style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
-       d="m 39.39595,194.6541 -23.359779,0 -0.505076,-95.333148"
-       id="path5867"
-       inkscape:connector-curvature="0"
-       transform="translate(0,512.35975)"
-       sodipodi:nodetypes="ccc" />
-    <path
-       style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
-       d="m 15.531095,78.865363 0,-13.51079 29.673231,0 0.252539,-6.187184"
-       id="path6061"
-       inkscape:connector-curvature="0"
-       transform="translate(0,512.35975)"
-       sodipodi:nodetypes="cccc" />
-    <path
-       style="fill:none;stroke:#000000;stroke-width:0.50000000000000000;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;stroke-miterlimit:4;stroke-dasharray:0.50000000000000000, 0.50000000000000000;stroke-dashoffset:0;marker-end:url(#Arrow1Mend)"
-       d="m 45.204326,57.147084 0,-23.233509 27.021581,0"
-       id="path6259"
-       inkscape:connector-curvature="0"
-       transform="translate(0,512.35975)" />
-    <path
-       inkscape:connector-curvature="0"
-       id="path6453"
-       d="m 168.44294,571.40088 0,11.11168"
-       style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
-       sodipodi:nodetypes="cc" />
-    <path
-       sodipodi:nodetypes="cc"
-       style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
-       d="m 168.44294,596.14962 0,8.08122"
-       id="path6455"
-       inkscape:connector-curvature="0" />
-    <path
-       inkscape:connector-curvature="0"
-       id="path6457"
-       d="m 168.44294,618.37297 0,85.86298"
-       style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
-       sodipodi:nodetypes="cc" />
-    <path
-       sodipodi:nodetypes="ccc"
-       inkscape:connector-curvature="0"
-       id="path6461"
-       d="m 174.50386,706.88758 26.76903,0 0,-95.20688"
-       style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow1Mend)" />
-    <path
-       sodipodi:nodetypes="cccc"
-       inkscape:connector-curvature="0"
-       id="path6463"
-       d="m 201.27289,591.73018 0,-13.51079 -27.27411,0 0,-6.18718"
-       style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow1Mend)" />
-    <path
-       inkscape:connector-curvature="0"
-       id="path6465"
-       d="m 173.99878,570.26445 0,-24.3068 8.58629,0"
-       style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:0.5, 0.5;stroke-dashoffset:0;marker-end:url(#Arrow1Mend)"
-       sodipodi:nodetypes="ccc" />
-  </g>
-</svg>
diff --git a/doc/manual/figs/sense_cg.png b/doc/manual/figs/sense_cg.png
deleted file mode 100644
index 67e60ae..0000000
Binary files a/doc/manual/figs/sense_cg.png and /dev/null differ
diff --git a/doc/manual/figs/sense_sbc.png b/doc/manual/figs/sense_sbc.png
deleted file mode 100644
index 974d90c..0000000
Binary files a/doc/manual/figs/sense_sbc.png and /dev/null differ
diff --git a/doc/manual/figs/shepp.png b/doc/manual/figs/shepp.png
deleted file mode 100644
index b0c5137..0000000
Binary files a/doc/manual/figs/shepp.png and /dev/null differ
diff --git a/doc/manual/figs/shepp_blurred.png b/doc/manual/figs/shepp_blurred.png
deleted file mode 100644
index e12159e..0000000
Binary files a/doc/manual/figs/shepp_blurred.png and /dev/null differ
diff --git a/doc/manual/figs/shepp_deblurred_cg.png b/doc/manual/figs/shepp_deblurred_cg.png
deleted file mode 100644
index 9b03331..0000000
Binary files a/doc/manual/figs/shepp_deblurred_cg.png and /dev/null differ
diff --git a/doc/manual/figs/shepp_deblurred_sb.png b/doc/manual/figs/shepp_deblurred_sb.png
deleted file mode 100644
index 9dc4360..0000000
Binary files a/doc/manual/figs/shepp_deblurred_sb.png and /dev/null differ
diff --git a/doc/manual/figs/shepp_denoised.png b/doc/manual/figs/shepp_denoised.png
deleted file mode 100644
index 30b9f0b..0000000
Binary files a/doc/manual/figs/shepp_denoised.png and /dev/null differ
diff --git a/doc/manual/figs/shepp_iteration.png b/doc/manual/figs/shepp_iteration.png
deleted file mode 100644
index 2700719..0000000
Binary files a/doc/manual/figs/shepp_iteration.png and /dev/null differ
diff --git a/doc/manual/figs/shepp_noisy.png b/doc/manual/figs/shepp_noisy.png
deleted file mode 100644
index 52ccb42..0000000
Binary files a/doc/manual/figs/shepp_noisy.png and /dev/null differ
diff --git a/doc/manual/figs/simple2dft.png b/doc/manual/figs/simple2dft.png
deleted file mode 100644
index b60f27f..0000000
Binary files a/doc/manual/figs/simple2dft.png and /dev/null differ
diff --git a/doc/manual/figs/simple2dft.svg b/doc/manual/figs/simple2dft.svg
deleted file mode 100644
index 388f772..0000000
--- a/doc/manual/figs/simple2dft.svg
+++ /dev/null
@@ -1,355 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<!-- Created with Inkscape (http://www.inkscape.org/) -->
-
-<svg
-   xmlns:dc="http://purl.org/dc/elements/1.1/"
-   xmlns:cc="http://creativecommons.org/ns#"
-   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
-   xmlns:svg="http://www.w3.org/2000/svg"
-   xmlns="http://www.w3.org/2000/svg"
-   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
-   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
-   width="744.09448819"
-   height="1052.3622047"
-   id="svg8987"
-   version="1.1"
-   inkscape:version="0.48.2 r9819"
-   sodipodi:docname="simple2dft.svg"
-   inkscape:export-filename="/home/hansenms/mrprogs/gadgetron/doc/manual/figs/simple2dft.png"
-   inkscape:export-xdpi="299.97357"
-   inkscape:export-ydpi="299.97357">
-  <defs
-     id="defs8989">
-    <marker
-       inkscape:stockid="Arrow1Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="Arrow1Mend"
-       style="overflow:visible">
-      <path
-         inkscape:connector-curvature="0"
-         id="path6124"
-         d="M 0,0 5,-5 -12.5,0 5,5 0,0 z"
-         style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt;marker-start:none"
-         transform="matrix(-0.4,0,0,-0.4,-4,0)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow1Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="marker9053"
-       style="overflow:visible">
-      <path
-         inkscape:connector-curvature="0"
-         id="path9055"
-         d="M 0,0 5,-5 -12.5,0 5,5 0,0 z"
-         style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt;marker-start:none"
-         transform="matrix(-0.4,0,0,-0.4,-4,0)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow1Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="marker9057"
-       style="overflow:visible">
-      <path
-         inkscape:connector-curvature="0"
-         id="path9059"
-         d="M 0,0 5,-5 -12.5,0 5,5 0,0 z"
-         style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt;marker-start:none"
-         transform="matrix(-0.4,0,0,-0.4,-4,0)" />
-    </marker>
-  </defs>
-  <sodipodi:namedview
-     id="base"
-     pagecolor="#ffffff"
-     bordercolor="#666666"
-     borderopacity="1.0"
-     inkscape:pageopacity="0.0"
-     inkscape:pageshadow="2"
-     inkscape:zoom="2.8"
-     inkscape:cx="456.5887"
-     inkscape:cy="530.90344"
-     inkscape:document-units="px"
-     inkscape:current-layer="layer1"
-     showgrid="false"
-     inkscape:window-width="1587"
-     inkscape:window-height="891"
-     inkscape:window-x="64"
-     inkscape:window-y="24"
-     inkscape:window-maximized="0" />
-  <metadata
-     id="metadata8992">
-    <rdf:RDF>
-      <cc:Work
-         rdf:about="">
-        <dc:format>image/svg+xml</dc:format>
-        <dc:type
-           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
-        <dc:title></dc:title>
-      </cc:Work>
-    </rdf:RDF>
-  </metadata>
-  <g
-     inkscape:label="Layer 1"
-     inkscape:groupmode="layer"
-     id="layer1">
-    <rect
-       style="fill:#97d2ea;fill-opacity:1;stroke:#dcdcdc;stroke-width:0.21841221;stroke-linecap:butt;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
-       id="rect5207"
-       width="104.11561"
-       height="31.714287"
-       x="322.9422"
-       y="445.36478" />
-    <text
-       xml:space="preserve"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       x="340.2955"
-       y="455.65054"
-       id="text5209"
-       sodipodi:linespacing="125%"><tspan
-         sodipodi:role="line"
-         id="tspan5211"
-         x="340.2955"
-         y="455.65054">AccumulatorGadget</tspan></text>
-    <text
-       xml:space="preserve"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       x="348.07178"
-       y="463.72195"
-       id="text5721"
-       sodipodi:linespacing="125%"><tspan
-         sodipodi:role="line"
-         id="tspan5723"
-         x="348.07178"
-         y="463.72195"
-         style="font-size:6px">Collecting k-space</tspan></text>
-    <rect
-       y="539.96198"
-       x="322.9422"
-       height="31.714287"
-       width="104.11561"
-       id="rect5725"
-       style="fill:#97d2ea;fill-opacity:1;stroke:#dcdcdc;stroke-width:0.21841221;stroke-linecap:butt;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
-    <text
-       sodipodi:linespacing="125%"
-       id="text5727"
-       y="550.24768"
-       x="339.80655"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       xml:space="preserve"><tspan
-         y="550.24768"
-         x="339.80655"
-         id="tspan5729"
-         sodipodi:role="line">CropCombineGadget</tspan></text>
-    <text
-       sodipodi:linespacing="125%"
-       id="text5731"
-       y="560.1048"
-       x="374.85352"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       xml:space="preserve"><tspan
-         style="font-size:6px;text-align:center;text-anchor:middle"
-         y="560.1048"
-         x="374.85352"
-         sodipodi:role="line"
-         id="tspan5737">Remove Readout Oversampling</tspan><tspan
-         style="font-size:6px;text-align:center;text-anchor:middle"
-         y="567.6048"
-         x="374.85352"
-         sodipodi:role="line"
-         id="tspan9162">Combine Channels (RMS)</tspan></text>
-    <rect
-       style="fill:#97d2ea;fill-opacity:1;stroke:#dcdcdc;stroke-width:0.21841221;stroke-linecap:butt;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
-       id="rect5768"
-       width="104.11561"
-       height="31.714287"
-       x="322.9422"
-       y="587.64532" />
-    <text
-       xml:space="preserve"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       x="340.53766"
-       y="598.43103"
-       id="text5770"
-       sodipodi:linespacing="125%"><tspan
-         sodipodi:role="line"
-         id="tspan5772"
-         x="340.53766"
-         y="598.43103">ImageFinishGadget</tspan></text>
-    <text
-       xml:space="preserve"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       x="374.85352"
-       y="611.35956"
-       id="text5774"
-       sodipodi:linespacing="125%"><tspan
-         sodipodi:role="line"
-         x="374.85352"
-         y="611.35956"
-         style="font-size:6px;text-align:center;text-anchor:middle"
-         id="tspan5782">Return Images to Client</tspan></text>
-    <rect
-       y="493.46884"
-       x="322.9422"
-       height="31.714287"
-       width="104.11561"
-       id="rect5830"
-       style="fill:#97d2ea;fill-opacity:1;stroke:#dcdcdc;stroke-width:0.21841221;stroke-linecap:butt;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
-    <path
-       sodipodi:nodetypes="cc"
-       inkscape:connector-curvature="0"
-       id="path4301"
-       d="m 375,478.08019 0,12.85714"
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Mend)" />
-    <path
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Mend)"
-       d="m 375,526.06243 0,12.85714"
-       id="path8376"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc" />
-    <path
-       sodipodi:nodetypes="cc"
-       inkscape:connector-curvature="0"
-       id="path8378"
-       d="m 375,573.03452 0,12.85714"
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Mend)" />
-    <text
-       sodipodi:linespacing="125%"
-       id="text9156"
-       y="511.72198"
-       x="354.58121"
-       style="font-size:7.19999981px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       xml:space="preserve"><tspan
-         y="511.72198"
-         x="354.58121"
-         id="tspan9158"
-         sodipodi:role="line">FFTGadget</tspan></text>
-    <rect
-       style="fill:none;stroke:#000000;stroke-width:0.75691628;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
-       id="rect9185"
-       width="82.409531"
-       height="74.859619"
-       x="457.15237"
-       y="448.80029" />
-    <text
-       xml:space="preserve"
-       style="font-size:6px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       x="475.35715"
-       y="444.50507"
-       id="text9713"
-       sodipodi:linespacing="125%"><tspan
-         sodipodi:role="line"
-         id="tspan9715"
-         x="475.35715"
-         y="444.50507">k-space buffer</tspan></text>
-    <path
-       style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:1, 2;stroke-dashoffset:0"
-       d="m 460.71429,452.3315 77.85714,0"
-       id="path9717"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc" />
-    <path
-       sodipodi:nodetypes="cc"
-       inkscape:connector-curvature="0"
-       id="path9719"
-       d="m 460.71429,457.88466 77.85714,0"
-       style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:1, 2;stroke-dashoffset:0" />
-    <path
-       style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:1, 2;stroke-dashoffset:0"
-       d="m 460.71429,463.43782 77.85714,0"
-       id="path9721"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc" />
-    <path
-       sodipodi:nodetypes="cc"
-       inkscape:connector-curvature="0"
-       id="path9723"
-       d="m 460.71429,468.99092 77.85714,0"
-       style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:1, 2;stroke-dashoffset:0" />
-    <path
-       style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:1, 2;stroke-dashoffset:0"
-       d="m 460.71429,474.54408 77.85714,0"
-       id="path9725"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc" />
-    <path
-       sodipodi:nodetypes="cc"
-       inkscape:connector-curvature="0"
-       id="path9727"
-       d="m 460.71429,480.09725 77.85714,0"
-       style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:1, 2;stroke-dashoffset:0" />
-    <path
-       style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:1, 2;stroke-dashoffset:0"
-       d="m 460.71429,485.65041 77.85714,0"
-       id="path9729"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc" />
-    <path
-       sodipodi:nodetypes="cc"
-       inkscape:connector-curvature="0"
-       id="path9731"
-       d="m 460.71429,491.20357 77.85714,0"
-       style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:1, 2;stroke-dashoffset:0" />
-    <path
-       style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:1, 2;stroke-dashoffset:0"
-       d="m 460.71429,496.75673 77.85714,0"
-       id="path9733"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc" />
-    <path
-       sodipodi:nodetypes="cc"
-       inkscape:connector-curvature="0"
-       id="path9735"
-       d="m 460.71429,502.30983 77.85714,0"
-       style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:1, 2;stroke-dashoffset:0" />
-    <path
-       style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:1, 2;stroke-dashoffset:0"
-       d="m 460.71429,507.86299 77.85714,0"
-       id="path9737"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc" />
-    <path
-       sodipodi:nodetypes="cc"
-       inkscape:connector-curvature="0"
-       id="path9739"
-       d="m 460.71429,513.41615 77.85714,0"
-       style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:1, 2;stroke-dashoffset:0" />
-    <path
-       style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:1, 2;stroke-dashoffset:0"
-       d="m 461.07143,518.96932 77.85714,0"
-       id="path9741"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc" />
-    <path
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-mid:none;marker-end:url(#marker9057)"
-       d="m 427.14286,461.6479 c 23.07443,-32.01801 38.58246,-18.63764 53.21428,0"
-       id="path9743"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc" />
-    <text
-       xml:space="preserve"
-       style="font-size:6px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
-       x="472.85715"
-       y="563.79077"
-       id="text10121"
-       sodipodi:linespacing="125%"><tspan
-         sodipodi:role="line"
-         id="tspan10123"></tspan></text>
-    <path
-       style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-start:none;marker-end:url(#marker9057)"
-       d="m 457.14286,485.75504 -17.5,0 0,-18.03571 -12.14286,0"
-       id="path10125"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cccc" />
-    <path
-       style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;stroke-miterlimit:4;stroke-dasharray:1,1;stroke-dashoffset:0"
-       d="m 425,467.71933 -50,0 0,9.28571"
-       id="path10687"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="ccc" />
-  </g>
-</svg>
diff --git a/doc/manual/gadgetron_manual.xml b/doc/manual/gadgetron_manual.xml
deleted file mode 100644
index 3ce9d5d..0000000
--- a/doc/manual/gadgetron_manual.xml
+++ /dev/null
@@ -1,6472 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<book version="5.0" xmlns="http://docbook.org/ns/docbook"
-      xmlns:xlink="http://www.w3.org/1999/xlink"
-      xmlns:xi="http://www.w3.org/2001/XInclude"
-      xmlns:ns5="http://www.w3.org/2000/svg"
-      xmlns:ns4="http://www.w3.org/1998/Math/MathML"
-      xmlns:ns3="http://www.w3.org/1999/xhtml"
-      xmlns:ns="http://docbook.org/ns/docbook">
-  <info>
-    <title>Gadgetron Users Guide</title>
-
-    <subtitle>A Medical Image Reconstruction Framework</subtitle>
-
-    <author>
-      <personname><honorific>Dr</honorific> <firstname>Michael
-      Schacht</firstname> <surname>Hansen</surname></personname>
-
-      <email>michael.hansen at nih.gov</email>
-    </author>
-
-    <address>National Heart, Lung, and Blood Institute
-National Institutes of Health, Bethesda, MD, USA</address>
-
-    <author>
-      <personname><honorific>Dr</honorific> <firstname>Thomas
-      Sangild</firstname> <surname>Sørensen</surname></personname>
-
-      <email>sangild at cs.au.dk</email>
-    </author>
-
-    <address>Department of Computer Science and Department of Clinical Medicine
-Aarhus University, Denmark</address>
-    
-    <author>
-      <personname><honorific>Dr</honorific> <firstname>Hui
-      </firstname> <surname>Xue</surname></personname>
-
-      <email>hui.xue at nih.gov</email>
-    </author>
-
-    <address>National Heart, Lung, and Blood Institute
-National Institutes of Health, Bethesda, MD, USA</address>
-
-    <author>
-      <personname><firstname>Revision</firstname>
-      <surname>1.2</surname></personname>
-
-      <email/>
-    </author>
-
-    <edition>1.2</edition>
-  </info>
-
-  <chapter>
-    <title>Introduction</title>
-
-    <sect1>
-      <title>What is the Gadgetron</title>
-
-      <para>The Gadgetron is a streaming data processing framework for medical
-      image reconstruction. It has been developed to make it easier to
-      prototype, test, and deploy new image reconstruction algorithms.</para>
-
-      <para>The framework features a number of reconstruction applications
-      that can be employed directly. Moreover, it contains a wide range of
-      toolboxes with common data structures and algorithms designed for a much
-      broader use. These toolboxes can be used within the streaming framework
-      to create new dedicated reconstruction components or used as shared
-      libraries in standalone (or third party) applications.</para>
-
-      <para>This document serves as an introduction to the Gadgetron framework
-      and provides some "getting started" examples of using it. A scientific
-      paper is also available <xref linkend="hansen12"/>.</para>
-
-      <para>Although the Gadgetron is a generic, multi-modality image
-      reconstruction framework, it was initially developed to support the work
-      of the authors in the field of advanced MRI reconstruction. Specifically
-      to support work on fast image reconstruction, not only on traditional
-      CPU architectures, but also using commodity graphics hardware (GPUs).
-      Some examples that are made publicly available through the Gadgetron
-      framework include fast (re)gridding on the GPU <citation><xref
-      linkend="sorensen08"/></citation>, Cartesian parallel imaging on the GPU
-      <citation><xref linkend="hansen08"/></citation>, and non-Cartesian
-      parallel imaging on the GPU <citation><xref
-      linkend="sorensen09"/></citation>.</para>
-    </sect1>
-
-    <sect1>
-      <title>Revision History</title>
-
-    <sect2>
-        <title>Version 2.5</title>
-
-        <para>Version 2.5 contains a number of extension and enhancements to the Gadgetron. 
-            In particular, a toolbox, named Gadgetron Plus or GtPlus is added to the package.
-            GtPlus toolbox implements the complete reconstruction workflow for ISMRMRD data format 
-            and different parallel imaging modes (embedded, interleaved, seperate etc.). Multiple 
-            linear and non-linear reconstruction algorithms are implemented in this toolbox. The data 
-            accumulation and reconstruction triggering scheme are extended to support on-the-fly reconstruction.
-            Another major extension is to extend the Gadgetron to support cloud computing. This feature is named as 
-            GtPlus Cloud. 
-            A non-exhaustive list of changes can be found below:</para>
-
-        <para><itemizedlist>
-            <listitem>
-              <para>The <classname>GadgetMessageAcquisition</classname>,
-              <classname>GadgetMessageImage</classname>, etc. (previously used
-              to describe MRI raw data and images) have been replaced with the
-              corresponding classes from the ISMRMRD library.</para>
-            </listitem>
-
-            <listitem>
-              <para>There is now a Gadgetron configuration file
-              (<filename>gadgetron.xml</filename>) used to control the port
-              number of the Gadgetron when starting. That makes it easier to
-              maintain the same port for a given installation without
-              supplying it on the command line.</para>
-            </listitem>
-
-            <listitem>
-              <para>The dependency on TinyXML has been almost entirely
-              removed. We are now using a class representations of headers and
-              configuration generated with CodeSynthesis XSD
-              (<uri>http://www.codesynthesis.com/products/xsd/</uri>).</para>
-            </listitem>
-
-            <listitem>
-              <para>All XML representations now have schema definitions to
-              make it easier to validate configuration files etc.</para>
-            </listitem>
-
-            <listitem>
-              <para>New toolbox functionality.</para>
-            </listitem>
-
-            <listitem>
-              <para>Various bug fixes.</para>
-            </listitem>
-          </itemizedlist></para>
-      </sect2>
-        
-      <sect2>
-        <title>Version 1.1</title>
-
-        <para>Version 1.1 contains multiple bug fixes, optimizations and some
-        structural changes. Most notably, the Gadgetron now uses the proposed
-        ISMRM Raw Data format (<uri>http://ismrmrd.sourceforge.net</uri>)
-        throughout the MRI specific Gadgets. A non-exhaustive list of changes
-        can be found below:</para>
-
-        <para><itemizedlist>
-            <listitem>
-              <para>The <classname>GadgetMessageAcquisition</classname>,
-              <classname>GadgetMessageImage</classname>, etc. (previously used
-              to describe MRI raw data and images) have been replaced with the
-              corresponding classes from the ISMRMRD library.</para>
-            </listitem>
-
-            <listitem>
-              <para>There is now a Gadgetron configuration file
-              (<filename>gadgetron.xml</filename>) used to control the port
-              number of the Gadgetron when starting. That makes it easier to
-              maintain the same port for a given installation without
-              supplying it on the command line.</para>
-            </listitem>
-
-            <listitem>
-              <para>The dependency on TinyXML has been almost entirely
-              removed. We are now using a class representations of headers and
-              configuration generated with CodeSynthesis XSD
-              (<uri>http://www.codesynthesis.com/products/xsd/</uri>).</para>
-            </listitem>
-
-            <listitem>
-              <para>All XML representations now have schema definitions to
-              make it easier to validate configuration files etc.</para>
-            </listitem>
-
-            <listitem>
-              <para>New toolbox functionality.</para>
-            </listitem>
-
-            <listitem>
-              <para>Various bug fixes.</para>
-            </listitem>
-          </itemizedlist></para>
-      </sect2>
-
-      <sect2>
-        <title>Version 1.0</title>
-
-        <para>First release of the Gadgetron</para>
-      </sect2>
-    </sect1>
-
-    <sect1>
-      <title>Obtaining Gadgetron</title>
-
-      <para>The Gadgetron is made available as a cross-platform source code
-      distribution, which compiles and has been tested to run on Linux, Mac OS
-      X, and Windows 7. Compilation instructions for these platforms are
-      provided below.</para>
-
-      <para>Generally speaking, the Gadgetron is easiest set up on Linux since
-      all dependencies are readily available. If you want to get started
-      quickly with the Gadgetron and happen to not be using Linux, it is easy
-      to install Ubuntu (our preferred Linux distribution) in a virtual
-      machine (e.g. VirtualBox, <uri type="website"
-      xlink:href="https://www.virtualbox.org/">https://www.virtualbox.org/</uri>)
-      and follow the Linux compilation instructions below.</para>
-
-      <para>The Gadgetron is available from the project Sourceforge
-      website:</para>
-
-      <para><uri type="website"
-      xlink:href="http://sourceforge.net/projects/gadgetron">http://sourceforge.net/projects/gadgetron</uri></para>
-
-      <para>This manual is available in HTML form at:</para>
-
-      <para><uri type="website"
-      xlink:href="http://gadgetron.sourceforge.net/latest/manual/gadgetron_manual.html">http://gadgetron.sourceforge.net/latest/manual/gadgetron_manual.html</uri></para>
-
-      <para>Or in PDF form at:</para>
-
-      <para><uri type="website"
-      xlink:href="http://gadgetron.sourceforge.net/latest/manual/gadgetron_manual.pdf">http://gadgetron.sourceforge.net/latest/manual/gadgetron_manual.pdf</uri></para>
-
-      <para>API documentation (generated with Doxygen) is available
-      from:</para>
-
-      <para><uri type="website"
-      xlink:href="http://gadgetron.sourceforge.net/latest/api/">http://gadgetron.sourceforge.net/latest/api/</uri></para>
-
-      <sect2 xml:id="sect.dependencies">
-        <title>Dependencies</title>
-
-        <para>The Gadgetron depends on a number of libraries that can either
-        be downloaded for free or that may already be part of the installation
-        on your workstation. If you are working on a Linux platform you should
-        be able to install all dependencies without compiling anything. The
-        following is a list of the components that you will need. Some are
-        optional.</para>
-
-        <para>To install these components please follow the platform specific
-        installation instructions provided below (<xref
-        linkend="sect.installation"/>).</para>
-
-        <sect3 xml:id="sect.required">
-          <title>Required libraries</title>
-
-          <itemizedlist>
-            <listitem>
-              <para><emphasis>CMake</emphasis>. Available from <uri
-              type="website"
-              xlink:href="http://www.cmake.org/cmake/resources/software.html">http://www.cmake.org/cmake/resources/software.html</uri>.</para>
-            </listitem>
-
-            <listitem>
-              <para><emphasis>ADAPTIVE Computing Environment (ACE)</emphasis>.
-              Available from <uri type="website"
-              xlink:href="http://www.cs.wustl.edu/~schmidt/ACE.html">http://www.cs.wustl.edu/~schmidt/ACE.html</uri>.</para>
-            </listitem>
-
-            <listitem>
-              <para><emphasis>Boost</emphasis>. Available from <uri
-              type="website"
-              xlink:href="http://www.boost.org">http://www.boost.org</uri>.</para>
-            </listitem>
-
-            <listitem>
-              <para><emphasis>FFT3W</emphasis>. Available from <uri
-              type="website"
-              xlink:href="http://www.fftw.org">http://www.fftw.org</uri>.</para>
-            </listitem>
-
-            <listitem>
-              <para><emphasis>CodeSynthesis XSD</emphasis>. Available from
-              <uri type="website"
-              xlink:href="http://www.codesynthesis.com/products/xsd">http://www.codesynthesis.com/products/xsd</uri>.</para>
-            </listitem>
-          </itemizedlist>
-        </sect3>
-
-        <sect3 xml:id="sect.optional">
-          <title>Optional libraries</title>
-
-          <itemizedlist>
-            <listitem>
-              <para><emphasis>ISMRM Raw Data format</emphasis> (optional).
-              This is the MRI raw data format used in the streaming framework.
-              Without this library installed you will not be able to
-              reconstruct the provided MRI examples. The toolboxes can however
-              still be used. Available from <uri type="website"
-              xlink:href="http://ismrmrd.sourceforge.net">http://ismrmrd.sourceforge.net</uri>.</para>
-            </listitem>
-
-            <listitem>
-              <para><emphasis>HDF5</emphasis> (optional). The ISMRM Raw Data
-              Format (see above) uses the HDF5 file format for storing raw
-              data and images. Available from <uri
-              xlink:href="http://www.hdfgroup.org/HDF5">http://www.hdfgroup.org/HDF5</uri>.</para>
-            </listitem>
-
-            <listitem>
-              <para><emphasis>BLAS</emphasis> and <emphasis>LAPACK</emphasis>
-              (optional). Most Linux distributions come with these libraries
-              and they are included on Mac OS X as well, but the vendor
-              depends on your distribution and platform. See specific
-              instructions below for Windows. Without these libraries present
-              some gadgets and toolbox functionality are disabled.</para>
-            </listitem>
-
-            <listitem>
-              <para><emphasis>CUDA</emphasis> (optional). For GPU support you
-              need to install CUDA from Nvidia. You will need a CUDA driver
-              for your graphics card too. Available from <uri type="website"
-              xlink:href="http://developer.nvidia.com/cuda-downloads">http://developer.nvidia.com/cuda-downloads</uri>.</para>
-            </listitem>
-
-            <listitem>
-              <para><emphasis>CULA</emphasis> (optional). We use CULA for
-              LAPACK routines on the GPU. This is the only dependency which is
-              not Open Source. You can however download a free (registration
-              required) version of CULA. Available from <uri type="website"
-              xlink:href="http://www.culatools.com/downloads/dense">http://www.culatools.com/downloads/dense</uri>.</para>
-            </listitem>
-
-            <listitem>
-              <para><emphasis>QT4</emphasis> (optional). A few standalone
-              applications use QT for creating user interfaces. Available from
-              <uri type="website"
-              xlink:href="http://qt.nokia.com">http://qt.nokia.com</uri>.</para>
-            </listitem>
-
-            <listitem>
-              <para><emphasis>Doxygen</emphasis> (optional). Required if you
-              would like to build the API documentation. Available from <uri
-              type="website"
-              xlink:href="http://www.stack.nl/~dimitri/doxygen">http://www.stack.nl/~dimitri/doxygen</uri>.</para>
-            </listitem>
-
-            <listitem>
-              <para><emphasis>Docbook</emphasis> (optional). Required if you
-              would like to build the manual (this document). A number of
-              corresponding tools such as <application>xsltproc</application>
-              and <application>fop</application> (for the PDF version of the
-              library) are also needed. Additionally you need the Docbook
-              stylesheets. Available from <uri type="website"
-              xlink:href="http://docbook.sourceforge.net">http://docbook.sourceforge.net</uri>.</para>
-            </listitem>
-
-            <listitem>
-              <para><emphasis>Git (optional)</emphasis>. We use
-              <application>git</application> to manage our source code
-              archives. You can use any source code management system you
-              prefer (or none at all), but if you would like to stay in line
-              with the Gadgetron team, use <application>git</application>.
-              Available from <uri type="website"
-              xlink:href="http://git-scm.com">http://git-scm.com</uri>.</para>
-            </listitem>
-          </itemizedlist>
-        </sect3>
-      </sect2>
-    </sect1>
-
-    <sect1 xml:id="sect.installation">
-      <title>Compiling and Installing Gadgetron</title>
-
-      <sect2 xml:id="sect.linuxinstall">
-        <title>Linux Installation Instructions</title>
-
-        <para>Linux is the preferred operating system to get started using the
-        Gadgetron. All of the required dependencies are included in most major
-        Linux distributions and can be installed easily and without having to
-        compile anything. In the following sections we walk you through the
-        required steps to set up a full Gadgetron installation. We assume that
-        you are starting with a freshly installed Ubuntu 12.04 available from
-        the Ubuntu website (<uri
-        xlink:href="http://www.ubuntu.com">http://www.ubuntu.com</uri>). If
-        you don't have a machine available for installing Ubuntu, you can
-        always try it out in a virtual machine using virtualization software
-        such as VirtualBox (<uri type="website"
-        xlink:href="https://www.virtualbox.org">https://www.virtualbox.org</uri>).</para>
-
-        <para>If you would like to use the GPU components included in the
-        Gadgetron and you have an Nvidia GPU available on your system, please
-        complete the CUDA/CULA installations as described in <xref
-        linkend="section.linuxgpuinstall"/>.</para>
-
-        <para>First install all dependencies for Gadgetron. The following will
-        install everything you need:</para>
-
-        <screen><prompt>user at mycomputer:~$</prompt> <userinput>sudo apt-get install doxygen cmake \
- libqt4-dev libglew1.6-dev \
- docbook5-xml docbook-xsl-doc-pdf \
- docbook-xsl-doc-html docbook-xsl-ns xsltproc \
- fop git-core libboost-dev libboost-python-dev \
- libfftw3-dev libace-dev python-dev python-numpy \
- freeglut3-dev libxi-dev liblapack-dev build-essential \
- libhdf5-serial-dev h5utils hdf5-tools hdfview \
- libboost-system-dev libboost-thread-dev xsdcxx \
- libxerces-c-dev </userinput></screen>
-
-        <para>Next (optional but recommended) download, compile, and install
-        ISMRMRD (there are more detailed instructions on the
-        <uri>http://ismrmrd.sourceforge.net</uri> website):</para>
-
-        <screen>  git clone git://git.code.sf.net/p/ismrmrd/code ismrmrd-code
-  cd ismrmrd-code/
-  mkdir build
-  cd build
-  cmake ../
-  make
-  sudo make install</screen>
-
-        <para>Last command will install the library in
-        <filename>/usr/local/ismrmrd</filename>.</para>
-
-        <para>Now download the Gadgetron archive and compile it. If you have
-        access to a git repository, you can get the code with:</para>
-
-        <screen><userinput>  git clone git://git.code.sf.net/p/gadgetron/gadgetron gadgetron</userinput></screen>
-
-        <para>Configure and build the Gadgetron:</para>
-
-        <screen>  cd gadgetron/
-  mkdir build
-  cd build
-  cmake ../
-  make  </screen>
-
-        <para>Install (default location is
-        <filename>/usr/local/gadgetron</filename>):</para>
-
-        <screen><prompt>user at mycomputer:~/gadgetron/build$</prompt> <userinput>sudo make install</userinput> </screen>
-
-        <para>The final step is to add/modify a few environment variables in
-        your <filename>~/.bashrc</filename> file.</para>
-
-        <programlisting>export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/gadgetron/lib:/usr/local/ismrmrd/lib
-export PATH=$PATH:/usr/local/gadgetron/bin:/usr/local/ismrmrd/bin
-export GADGETRON_HOME=/usr/local/gadgetron     </programlisting>
-
-        <para>Rename the example configuration file
-        <filename>GADGETRON_HOME/config/gadgetron.xml.example</filename> to
-        <filename>GADGETRON_HOME/config/gadgetron.xml</filename></para>
-
-        <para>You are now set up to run a simple example reconstruction as
-        outlined in <xref linkend="sect.simpleexample"/>.</para>
-
-        <sect3 xml:id="section.linuxgpuinstall">
-          <title>Installing GPU components (CUDA and CULA) on Linux</title>
-
-          <para>First install the Nvidia driver. The Ubuntu distribution comes
-          with a driver that will work with CUDA in some instances, but we
-          recommend that you install the latest developer driver from the
-          Nvidia website, e.g.:</para>
-
-          <para>Download (to the current directory)
-          <filename>devdriver_4.2_linux_64_295.41.run</filename> from <uri
-          xlink:href="http://developer.nvidia.com/cuda/cuda-downloads">http://developer.nvidia.com/cuda/cuda-downloads</uri>
-          and install the driver:</para>
-
-          <para><screen><userinput>sudo sh ./devdriver_4.2_linux_64_295.41.run</userinput></screen></para>
-
-          <para>The process of getting this driver installed may vary from
-          installation to installation. Specifically, you may need to remove
-          any existing Nvidia driver before installing and you will have to
-          shut down the display manager before installing.</para>
-
-          <para>The display manager can be shut down with:</para>
-
-          <para><screen><userinput>sudo service lightdm stop
-	  </userinput></screen></para>
-
-          <para><remark>Important notice</remark>: Unfortunately we have
-          experienced on several Ubuntu installations that the machine hangs
-          at the splash screen after installation of the Nvidia graphics
-          driver. If you experience this problem, or if you just want to be on
-          the safe side before rebooting, open a terminal (boot in recovery
-          mode) and edit <filename>/etc/default/grub</filename>. Locate the
-          line defining <varname>GRUB_CMDLINE_LINUX_DEFAULT</varname> and
-          change <envar>splash</envar> to <envar>nosplash</envar> (or add
-          <envar>nosplash</envar> if <envar>splash</envar> is not present).
-          Furthermore add <envar>nomodeset</envar>. E.g.
-          <code>GRUB_CMDLINE_LINUX_DEFAULT="quiet nosplash nomodeset"</code>.
-          Finally update the boot manager with the new settings:
-          <userinput>sudo update-grub</userinput></para>
-
-          <para>Next we need to install gcc 4.4 since Ubuntu comes
-          preconfigured with gcc 4.6, which is not compatible with the current
-          versions of the CUDA nvcc compiler.</para>
-
-          <screen><userinput>sudo apt-get install gcc-4.4 g++-4.4 build-essential</userinput></screen>
-
-          <para>Set up alternative systems to allow easy switching between the
-          two versions of gcc/g++</para>
-
-          <screen><userinput>sudo update-alternatives --install /usr/bin/gcc gcc \
- /usr/bin/gcc-4.6 40 --slave /usr/bin/g++ g++ /usr/bin/g++-4.6
-
-sudo update-alternatives --install /usr/bin/gcc \
- gcc /usr/bin/gcc-4.4 60 --slave /usr/bin/g++ g++ /usr/bin/g++-4.4</userinput></screen>
-
-          <para>Check your gcc compiler (should now be version 4.4.7):</para>
-
-          <screen><userinput>gcc -v</userinput></screen>
-
-          <para>When you want to switch between the two compiler
-          versions:</para>
-
-          <screen><userinput>sudo update-alternatives --config gcc</userinput></screen>
-
-          <para>The final step is to actually install CUDA and CULA. Download
-          the following files (for CUDA release 4.2):</para>
-
-          <itemizedlist>
-            <listitem>
-              <para><filename>cudatoolkit_4.2.9_linux_64_ubuntu11.04.run</filename>
-              from <uri type="website"
-              xlink:href="http://developer.nvidia.com/cuda/cuda-downloads">http://developer.nvidia.com/cuda/cuda-downloads</uri></para>
-            </listitem>
-
-            <listitem>
-              <para>cula_dense_free_R15-linux64.run from <uri type="website"
-              xlink:href="http://www.culatools.com/downloads/dense">http://www.culatools.com/downloads/dense</uri>
-              (free registration required)</para>
-            </listitem>
-          </itemizedlist>
-
-          <para>Go to the folder where the files were downloaded and
-          type:</para>
-
-          <screen><prompt>user at mycomputer:</prompt><userinput>sudo sh ./cudatoolkit_4.2.9_linux_64_ubuntu11.04.run
-sudo sh ./cula_dense_free_R15-linux64.run
-	  </userinput></screen>
-
-          <para>Follow the instructions. When you are done with the
-          installation you may want to add the following to your
-          <filename>~/.bashrc</filename> file.</para>
-
-          <programlisting>export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64
-export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib
-export CULA_ROOT="/usr/local/cula"
-export CULA_INC_PATH="$CULA_ROOT/include"
-export CULA_BIN_PATH_32="$CULA_ROOT/bin"
-export CULA_BIN_PATH_64="$CULA_ROOT/bin64"
-export CULA_LIB_PATH_32="$CULA_ROOT/lib"
-export CULA_LIB_PATH_64="$CULA_ROOT/lib64"
-export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CULA_LIB_PATH_64    </programlisting>
-
-          <para>You are now ready to compile and run CUDA (and CULA)
-          applications. You may want to download the CUDA SDK from Nvidia to
-          validate your installation but this is not required.</para>
-        </sect3>
-      </sect2>
-
-      <sect2>
-        <title>Mac OS X Installation Instructions</title>
-
-        <para>The following instructions assume that you are starting on a Mac
-        with OS X 10.6.8 (Snow Leopard) installed. Additionally it assumes
-        that you have Xcode (3.2.6) installed. If you have upgraded to Lion or
-        are on an older release, you should still be able to make it all
-        compile, but you may have to make some adjustments.</para>
-
-        <para>We use MacPorts (<uri
-        xlink:href="http://www.macports.org/">http://www.macports.org/</uri>)
-        to install the required dependencies. You may use a different package
-        management system or prefer to install packages manually. In that
-        case, please look at the list of dependencies (<xref
-        linkend="sect.dependencies"/>) and install the required dependencies
-        for the components you would like to use.</para>
-
-        <para>MacPorts is not the fastest way to install packages as they are
-        compiled locally. We use this method here nonetheless to make it
-        easier to follow the instructions. Please be patient when running the
-        <command>port</command> commands.</para>
-
-        <itemizedlist>
-          <listitem>
-            <para>Install MacPorts.</para>
-
-            <para>Download <filename>MacPorts-2.1.2.pkg</filename> from <uri
-            xlink:href="http://www.macports.org/">http://www.macports.org/</uri>.</para>
-
-            <para>Run <command><userinput>sudo port -v
-            selfupdate</userinput></command> to make sure you are up to
-            date.</para>
-          </listitem>
-
-          <listitem>
-            <para>Get your Python installation up to date. Mac OS X ships with
-            Python installed, but it is not a complete distribution. You need
-            to update it if you would like to do Python development with the
-            Gadgetron. If you already have <package>numpy</package> and
-            <package>SciPy</package> installed, you may be able to skip this
-            step. If you do not wish to use Python, you can also skip this
-            step.</para>
-
-            <screen><userinput>sudo port install python27 py27-numpy py27-scipy py27-libxml2</userinput></screen>
-
-            <para>This should install Python 2.7. Now select Python 2.7 as as
-            the active Python installation:</para>
-
-            <screen><userinput>sudo port select python python27</userinput></screen>
-
-            <para>To make sure the build system finds the right version of
-            Python we need to edit a couple of symbolic links manually:</para>
-
-            <screen><userinput>cd /System/Library/Frameworks/Python.framework/Versions
-sudo ln -s /opt/local/Library/Frameworks/Python.framework/Versions/2.7
-sudo rm Current
-sudo ln -s 2.7 Current</userinput></screen>
-          </listitem>
-
-          <listitem>
-            <para>Install Boost. Boost gets special treatment here. Depending
-            on whether you would like to do Python development, you need to
-            install Boost with or without boost_python. If you would like
-            Python:</para>
-
-            <screen><userinput>sudo port install boost +python27</userinput></screen>
-
-            <para>If you don't need Python support:</para>
-
-            <screen><userinput>sudo port install boost</userinput></screen>
-          </listitem>
-
-          <listitem>
-            <para>Now we can install the rest of the packages:</para>
-
-            <screen><userinput>sudo port install git-core cmake libACE \
-fftw-3-single fftw-3 qt4-mac-devel hdf5-18 \
-libxml2 xercesc3
-	    </userinput></screen>
-
-            <para>This may take quite a long time (hours).</para>
-          </listitem>
-
-          <listitem>
-            <para>Install CodeSynthesis XSD</para>
-
-            <para><screen><userinput>wget http://www.codesynthesis.com/download/xsd/3.3/macosx/i686/xsd-3.3.0-i686-macosx.tar.bz2
-tar -xzf xsd-3.3.0-i686-macosx.tar.bz2
-cd xsd-3.3.0-i686-macosx
-sudo cp bin/xsd /usr/local/bin/
-sudo cp -r libxsd/xsd /usr/local/include/</userinput></screen></para>
-          </listitem>
-
-          <listitem>
-            <para>Download, compile, and install ISMRMRD. Detailed
-            instructions can be found at <uri
-            xlink:href="http://ismrmrd.sourceforge.net">http://ismrmrd.sourceforge.net</uri>.</para>
-
-            <para><screen><userinput>git clone git://git.code.sf.net/p/ismrmrd/code ismrmrd-code
-cd ismrmrd-code/
-mkdir build
-cd build/
-cmake ../
-make
-sudo make install</userinput></screen>Last command will install the library in
-            <filename>/usr/local/ismrmrd</filename>.</para>
-
-            <para>Make sure that <filename>/usr/local/ismrmrd/lib</filename>
-            is in your <varname>DYLD_LIBRARY_PATH</varname> environment
-            variable (see below).</para>
-          </listitem>
-
-          <listitem>
-            <para>To visualize HDF5 files you may also want to install HDFView
-            from <uri
-            xlink:href="http://www.hdfgroup.org/ftp/HDF5/hdf-java/hdfview/hdfview_install_macosx_intel64.zip">http://www.hdfgroup.org/ftp/HDF5/hdf-java/hdfview/hdfview_install_macosx_intel64.zip</uri></para>
-          </listitem>
-
-          <listitem>
-            <para>Install CUDA and CULA. If you would like to use the GPU
-            components, you need to install the following:</para>
-
-            <itemizedlist>
-              <listitem>
-                <para>The Nvidia development driver
-                (<filename>devdriver_4.2.10_macos.dmg</filename>) from <uri
-                xlink:href="http://developer.nvidia.com/cuda/cuda-downloads">http://developer.nvidia.com/cuda/cuda-downloads</uri>.</para>
-              </listitem>
-
-              <listitem>
-                <para>The CUDA Toolkit
-                (<filename>cudatoolkit_4.2.9_macos.pkg</filename>) from <uri
-                xlink:href="http://developer.nvidia.com/cuda/cuda-downloads">http://developer.nvidia.com/cuda/cuda-downloads</uri>.</para>
-              </listitem>
-
-              <listitem>
-                <para>The CULA Dense Libraries
-                (<filename>cula_dense_free_R15-osx.dmg</filename>) from <uri
-                xlink:href="http://www.culatools.com/downloads/dense">http://www.culatools.com/downloads/dense</uri>.</para>
-              </listitem>
-            </itemizedlist>
-          </listitem>
-
-          <listitem>
-            <para>Compiling the Gadgetron:</para>
-
-            <screen><prompt>$</prompt> <userinput>cd gadgetron</userinput>
-$ <userinput>mkdir build</userinput>
-$ <userinput>cd build</userinput>
-$ <userinput>make</userinput>
-$ <userinput>sudo make install</userinput></screen>
-
-            <para>The long path for the <package>numpy</package> header files
-            is only needed if you want Python support. You can avoid this by
-            creating a symbolic link:</para>
-
-            <screen>$ <userinput>cd /opt/local/Library/Frameworks/Python.framework</userinput>
-$ <userinput>cd Versions/2.7/include/python2.7</userinput>
-$ <userinput>sudo ln -s ../../lib/python2.7/site-packages/numpy/core/include/numpy</userinput></screen>
-
-            <para>After creating this link you should be able to compile with
-            the following:</para>
-
-            <screen><prompt>$</prompt> <userinput>cd gadgetron</userinput>
-$ <userinput>mkdir build</userinput>
-$ <userinput>cd build</userinput>
-$ <userinput>cmake ../</userinput>
-$ <userinput>make</userinput>
-$ <userinput>sudo make install</userinput></screen>
-          </listitem>
-
-          <listitem>
-            <para>Set environment variables:</para>
-
-            <screen>$ <userinput>export GADGETRON_HOME=/usr/local/gadgetron</userinput>
-$ <userinput>export PATH=$PATH:/usr/local/gadgetron/bin:/usr/local/ismrmrd/bin</userinput>
-$ <userinput>export DYLD_LIBRARY_PATH=$DYLD_LIBRARY_PATH:
-/usr/local/gadgetron/lib:/usr/local/ismrmrd/lib</userinput></screen>
-
-            <para>You may wish to add these lines to
-            <filename>~/.bash_profile</filename>, You may also want to add
-            paths to CUDA and CULA libraries if you are using those:</para>
-
-            <screen>$ <userinput>export DYLD_LIBRARY_PATH=$DYLD_LIBRARY_PATH:/usr/local/cula/lib64</userinput>
-$ <userinput>export DYLD_LIBRARY_PATH=$DYLD_LIBRARY_PATH:/usr/local/cuda/lib</userinput></screen>
-          </listitem>
-
-          <listitem>
-            <para>After compiling and installing, please rename the file
-            <filename>GADGETRON_HOME/config/gadgetron.xml.example</filename>
-            to <filename>GADGETRON_HOME/config/gadgetron.xml</filename></para>
-          </listitem>
-
-          <listitem>
-            <para>Test your Gadgetron by following the instructions in <xref
-            linkend="sect.simpleexample"/>.</para>
-          </listitem>
-        </itemizedlist>
-      </sect2>
-
-      <sect2>
-        <title>Windows Installation Instructions</title>
-
-        <para>It is probably appropriate to start this section with a warning:
-        Windows is not the easiest environment in which to work with the
-        Gadgetron. As indicated in <xref linkend="sect.dependencies"/>, the
-        Gadgetron relies on multiple external libraries. Many of those
-        libraries are not available as easy install packages and must be
-        compiled separately. If you are uncomfortable setting up development
-        tools on Windows, or if you are just looking for a fast and easy way
-        to get started with the Gadgetron, we recommend installing on Ubuntu
-        Linux - possibly using a virtual machine inside Windows (see <xref
-        linkend="sect.linuxinstall"/>).</para>
-
-        <para>The following is a list of steps we have used to install the
-        Gadgetron on a clean Windows 7 (64-bit) machine. It has also been
-        tested successfully on a 32 bit machine (but in this case you choose
-        32 bit packages/configuration where appropriate).</para>
-
-        <para>The Gadgetron distribution also includes a Windows Powershell
-        Script in
-        <uri>doc/windows_installation/GadgetronWindowsInstallation.ps1</uri>,
-        which describes the command line steps for installing the
-        dependencies. You cannot complete the installation by simply running
-        the script. Most likely your Windows machine will not allow you to run
-        the script the directly without changing security settings and the
-        download of some of the dependencies cannot be automated since you
-        have to log in (or provide an email address) when downloading. The
-        script can serve as a guide and we recommend (if you would like to use
-        the script) to open it in the Powershell ISE and execute it
-        line-by-line (or section by section). The general installation steps
-        are written out here too, you may need to make some adjustments for
-        your particular setup.</para>
-
-        <itemizedlist>
-          <listitem>
-            <para>Install Visual Studio 2010 (with Service Pack 1)</para>
-          </listitem>
-
-          <listitem>
-            <para>Install CUDA/CULA (optional, but required for GPU
-            support).</para>
-
-            <para>Download Cuda drivers/toolkit from from</para>
-
-            <para><uri
-            xlink:href="http://developer.nvidia.com/cuda/cuda-downloads">http://developer.nvidia.com/cuda/cuda-downloads</uri>.</para>
-
-            <para>Install Nvidia Developer Driver (Version 301.32)</para>
-
-            <para>Install Nvdia Toolkit (4.2)</para>
-
-            <para>Install gpucomputingsdk</para>
-
-            <para>Install <filename>cula_dense_free_R15-win64.exe</filename>
-            from</para>
-
-            <para><uri
-            xlink:href="http://www.culatools.com/downloads/dense">http://www.culatools.com/downloads/dense</uri>.</para>
-
-            <para>Assuming CULA was installed in <filename>C:\Program
-            Files\CULA\R145</filename>, add</para>
-
-            <para><filename>C:\Program Files\CULA\R15\bin64</filename> to your
-            <varname>PATH</varname> environment variable.</para>
-          </listitem>
-
-          <listitem>
-            <para>Create a folder for external libraries, say
-            <filename>C:\Libraries</filename>.</para>
-          </listitem>
-
-          <listitem>
-            <para>Install FFTW3 (<uri
-            xlink:href="http://www.fftw.org/install/windows.html">http://www.fftw.org/install/windows.html</uri>)</para>
-
-            <para>Copy FFTW3 binaries to
-            <filename>C:\Libraries\FFTW3</filename></para>
-
-            <para>Create *.lib files, on the command line type:</para>
-
-            <programlisting>c:\Libraries\FFTW3>lib /machine:x64 /def:libfftw3f-3.def
-c:\Libraries\FFTW3>lib /machine:x64 /def:libfftw3-3.def
-c:\Libraries\FFTW3>lib /machine:x64 /def:libfftw3l-3.def
-</programlisting>
-
-            <para>Add <filename>C:\Libraries\FFTW3</filename> to <envar>your
-            PATH environment variable</envar>.</para>
-
-            <para>On 32 bit Windows remember to remove the /machine:x64
-            argument, the default is 32 bit.</para>
-          </listitem>
-
-          <listitem>
-            <para>Install ACE (<uri
-            xlink:href="http://download.dre.vanderbilt.edu/">http://download.dre.vanderbilt.edu/</uri>)</para>
-
-            <para>Unpack ACE into C:\Libraries\ACE-6.1.0\ACE_wrappers</para>
-
-            <para>Add <filename>config.h</filename> in
-            <filename>ACE_ROOT/ace/</filename> with the following
-            content:</para>
-
-            <programlisting>//We are on Windows
-#include "ace/config-win32.h" 
-
-//This ensured that INLINE settings 
-//do not vary between Debug and Release modes
-#define ACE_NO_INLINE </programlisting>
-
-            <para>Open the VS 2010 project in the source code archive</para>
-
-            <para>Set build type to Release/x64</para>
-
-            <para>Build (this takes a while)</para>
-
-            <para>Add to <varname>PATH</varname> environment variable:
-            <filename>C:\Libraries\ACE-6.0.5\ACE_wrappers\lib</filename></para>
-          </listitem>
-
-          <listitem>
-            <para>Install Python (optional).</para>
-
-            <para>Regrettably, the off-the-shelf Python header files cannot be
-            compiled in debug mode on Windows. This enforces the Gadgetron
-            framework to be compiled in release mode only if you enable the
-            Python components.</para>
-
-            <para>Install python-2.7.3.amd64 (<uri
-            xlink:href="http://www.python.org">http://www.python.org</uri>)</para>
-
-            <para>Add install folder (e.g. <filename>C:\Python27</filename>)
-            to PATH environment variable</para>
-
-            <para>Add <varname>PYTHON_ROOT</varname> environment
-            variable</para>
-
-            <para>From <uri
-            xlink:href="http://www.lfd.uci.edu/~gohlke/pythonlibs/">http://www.lfd.uci.edu/~gohlke/pythonlibs/</uri>
-            download and install the following (+ additional libraries that
-            you may need for your Python development):</para>
-
-            <para><itemizedlist>
-                <listitem>
-                  <para><filename>numpy-MKL-1.6.2.win-amd64-py2.7</filename></para>
-                </listitem>
-
-                <listitem>
-                  <para><filename>scipy-0.10.1.win-amd64-py2.7</filename></para>
-                </listitem>
-
-                <listitem>
-                  <para><filename>libxml2-python-2.7.8.win-amd64-py2.7</filename></para>
-                </listitem>
-              </itemizedlist></para>
-          </listitem>
-
-          <listitem>
-            <para>Install ACML (BLAS and LAPACK)</para>
-
-            <para>Download <filename>acml4.4.0-win64.exe</filename> from: <uri
-            xlink:href="http://developer.amd.com/downloads/acml4.4.0-win64.exe">http://developer.amd.com/downloads/acml4.4.0-win64.exe</uri></para>
-
-            <para>Install Library in say
-            <filename>C:\Libraries\acml4.4.0</filename></para>
-
-            <para>Add
-            <filename>C:\Libraries\acml4.4.0\win64\lib;C:\Libraries\acml4.4.0\win64_mp\lib</filename>
-            to <varname>your PATH environment variable.</varname></para>
-
-            <para>Notice. Newer versions of the ACML-library are available
-            (version 5.2.0 at the time of preparing this manual) - however,
-            these libraries are distributed without required dependencies and
-            will not work out of the box. We recommend sticking to the earlier
-            version 4.4.0 until these issues have been resolved.</para>
-          </listitem>
-
-          <listitem>
-            <para>Install the newest Boost release (<uri
-            xlink:href="http://www.boost.org">http://www.boost.org</uri>)</para>
-
-            <para>We recommend using the precompiled binaries from BoostPro
-            (e.g. <uri
-            xlink:href="http://boostpro.com/download/x64/boost_1_51_setup.exe">http://boostpro.com/download/x64/boost_1_51_setup.exe</uri>)</para>
-
-            <para>Just install everything, you might need other components
-            later.</para>
-          </listitem>
-
-          <listitem>
-            <para>Install <application>git</application> (if you are using
-            source code management):</para>
-
-            <para>Run the newest installation package named something like
-            Git-*-preview*.exe from: <uri
-            xlink:href="http://code.google.com/p/msysgit/">http://code.google.com/p/msysgit/</uri></para>
-
-            <para>Use run in git bash only option</para>
-
-            <para>Use checkout Windows LF and commit Unix Line feeds</para>
-          </listitem>
-
-          <listitem>
-            <para>Install CMake (<uri
-            xlink:href="http://www.cmake.org/cmake/resources/software.html">http://www.cmake.org/cmake/resources/software.html</uri>)</para>
-
-            <para>Install the latest release (e.g. <uri
-            xlink:href="http://www.cmake.org/files/v2.8/cmake-2.8.9-win32-x86.exe">http://www.cmake.org/files/v2.8/cmake-2.8.9-win32-x86.exe</uri>)</para>
-          </listitem>
-
-          <listitem>
-            <para>Install <application>HDF5</application>:</para>
-
-            <para>You will need the HDFView application to view data files
-            used by the Gadgetron, it can be downloaded rom <uri
-            xlink:href="http://www.hdfgroup.org/HDF5/">http://www.hdfgroup.org/HDF5/</uri>
-            install HDFView :
-            <filename>hdfview_install_win64.exe</filename></para>
-
-            <para>The precompiled binaries (e.g. <uri
-            xlink:href="http://www.hdfgroup.org/ftp/HDF5/current/bin/windows/HDF5189-win64-vs10-shared.zip">http://www.hdfgroup.org/ftp/HDF5/current/bin/windows/HDF5189-win64-vs10-shared.zip</uri>)
-            should work fine with the Gadgetron. Remember to add the path
-            (e.g. <filename>C:\Program Files\HDF
-            Group\HDF5\1.8.9\bin</filename>) to the HDF libraries to your
-            <varname>PATH</varname> environment library.</para>
-          </listitem>
-
-          <listitem>
-            <para>Install CodeSynthesis XSD (<uri
-            xlink:href="http://www.codesynthesis.com/download/xsd/3.3/windows/i686/xsd-3.3.msi">http://www.codesynthesis.com/download/xsd/3.3/windows/i686/xsd-3.3.msi</uri>).</para>
-
-            <para>Remember to add the path to the XSD binaries to your
-            <varname>PATH</varname> environment variable. E.g.
-            <filename>C:\Program Files (x86)\CodeSynthesis XSD
-            3.3\bin\</filename> and <filename>C:\Program Files
-            (x86)\CodeSynthesis XSD 3.3\bin64\.</filename></para>
-          </listitem>
-
-          <listitem>
-            <para>Download, compile, and install the ISMRM Raw Data format.
-            Detailed instructions are available at (<uri
-            xlink:href="http://ismrmrd.sourceforge.net">http://ismrmrd.sourceforge.net</uri>).</para>
-
-            <para>From a git bash shell:</para>
-
-            <para><screen>git clone git://git.code.sf.net/p/ismrmrd/code ismrmrd-code
-cd ismrmrd-code/
-mkdir build
-cd build/
-cmake-gui.exe
-</screen>Last command will open CMake's graphical user interface. Hit the
-            configure button and deal with the dependencies that CMake is
-            unable to find. Hit configure again and repeat the process until
-            CMake has enough information to configure. Once the configuration
-            is complete, you can hit generate to generate a Visual Studio
-            project, which you can open and use to build ISMRMRD.</para>
-          </listitem>
-
-          <listitem>
-            <para>Download and unpack Gadgetron source code.</para>
-          </listitem>
-
-          <listitem>
-            <para>Create Visual Studio project (your process may vary):</para>
-
-            <para>Start <application>cmake-gui</application></para>
-
-            <para>Select source (<envar>$GADGETRON_HOME</envar>) and target
-            directories
-            (<envar>$GADGETRON_HOME</envar><filename>/build</filename>)</para>
-
-            <para>Hit configure (first time) -- "ok" the dialogue box.</para>
-
-            <para>Add PATH variable BOOST_ROOT to point to BOOST folder (use
-            GUI button "+Add Entry" to do this)</para>
-
-            <para>Hit configure (again)</para>
-
-            <para>Specify location of FFTW and FFTWf libraries</para>
-
-            <para>Hit configure (again)</para>
-
-            <para>Specify the locations of GLEW and GLUT:</para>
-
-            <para>Header files in <filename>C:\ProgramData\NVIDIA
-            Corporation\NVIDIA GPU COMPUTING SDK
-            4.1\shared\inc</filename></para>
-
-            <para>Library files in <filename>C:\ProgramData\NVIDIA
-            Corporation\NVIDIA GPU COMPUTING SDK 4.1\lib/x64</filename></para>
-
-            <para>Specify location of CULA (include path and core/lapack
-            library filepaths)</para>
-
-            <para>Specify location of ACE include directory</para>
-
-            <para>Hit configure (again)</para>
-
-            <para>Specify <varname>NUMPY_INCLUDE_DIRS</varname> =
-            <filename>C:/Python27/Lib/site-packages/numpy/core/include</filename></para>
-
-            <para>Hit configure (again)</para>
-
-            <para>Specify the following <envar>CMAKE </envar>FILEPATH
-            variables:</para>
-
-            <programlisting>
-BLAS_acml_LIBRARY= \
-  C:/Libraries/acml4.4.0/win64/lib/libacml_dll.lib
-
-BLAS_acml_mp_LIBRARY= \
-  C:/Libraries/acml4.4.0/win64_mp/lib/libacml_mp_dll.lib
-
-</programlisting>
-
-            <para>Hit configure</para>
-
-            <para>Make sure that the <envar>HDF5_C_LIBRARY</envar> and
-            <envar>HDF5_CXX_LIBRARY</envar> FILEPATH variables are set
-            correctly (we have observed that they might be incorrectly set to
-            point to the dll instead of the lib files by default).</para>
-
-            <para>Hit configure</para>
-
-            <para>Hit generate</para>
-          </listitem>
-
-          <listitem>
-            <para>You should now have a visual studio project that you can
-            open and build (try Release/x64 mode and try the install target).
-            If you are lacking sufficient write permission to install in the
-            default location, run Visual Studio as Administrator or change
-            <envar>CMAKE_INSTALL_PREFIX</envar> to a folder to which you have
-            write permissions. Notice that /gadgetron is automatically
-            appended to the path you specify.</para>
-          </listitem>
-
-          <listitem>
-            <para>After compiling and installing, please rename the file
-            <filename>GADGETRON_HOME/config/gadgetron.xml.example</filename>
-            to <filename>GADGETRON_HOME/config/gadgetron.xml</filename></para>
-          </listitem>
-        </itemizedlist>
-
-        <para>Before attempting to run any reconstructions, please set the
-        environment variable <varname>GADGETRON_HOME</varname> to point to the
-        installation folder of your Gadgetron installation and make sure that
-        the paths of all dependencies are in your <varname>PATH</varname>
-        environment variable.</para>
-
-        <para>You now have a working installation of the Gadgetron in Windows.
-        Follow the instructions below to run a simple reconstruction example
-        (<xref linkend="sect.simpleexample"/>).</para>
-      </sect2>
-    </sect1>
-
-    <sect1 xml:id="sect.simpleexample">
-      <title>Hello Gadgetron: Your First Image Reconstruction</title>
-
-      <para>Some basic sample datasets are available from the Sourceforge
-      website:</para>
-
-      <para><uri type="website"
-      xlink:href="https://sourceforge.net/projects/gadgetron/files/testdata/">https://sourceforge.net/projects/gadgetron/files/testdata/</uri></para>
-
-      <para>You will generally encounter two types of data in this manual: a)
-      Simple array format described in <xref linkend="simplearrayfiles"/> and
-      b) ISMRMRD HDF5 files which are described in more detail at <uri
-      xlink:href="http://ismrmrd.sourceforge.net">http://ismrmrd.sourceforge.net</uri>.
-      It is beyond the scope of this manual to explain the HDF5 file format,
-      but we have added a small introductory section in the appendix (<xref
-      linkend="section.hdf5"/>).</para>
-
-      <para>Download the file <filename>simple_gre.h5</filename> from the
-      website (on Linux simply type):</para>
-
-      <screen>wget http://sourceforge.net/projects/gadgetron/files/
-      testdata/ismrmrd/simple_gre.h5</screen>
-
-      <para>Open two terminal windows to observe both client and Gadgetron
-      communication output. In the Gadgetron terminal window simply
-      type:</para>
-
-      <screen><prompt>user at mycomputer:~/temp/gadgetron_out$</prompt> <userinput>gadgetron</userinput>  </screen>
-
-      <para>In the client window (in the folder where you just downloaded the
-      data) type:</para>
-
-      <screen><prompt>user at mycomputer:~/temp/test_data$</prompt> <userinput><command>mriclient \
-    -d simple_gre.h5 \
-    -c default.xml</command></userinput>
-</screen>
-
-      <para>You should now see some logging information both in the Gadgetron
-      window and in the client window. Specifically, you should see that a
-      connection is being made and when the reconstruction is done the client
-      should shut down:</para>
-
-      <screen><prompt>user at mycomputer:~/temp/test_data$</prompt> <userinput>mriclient \
-    -d simple_gre.h5 \  
-    -c default.xml</userinput>
-
-Gadgetron MRI Data Sender
-  -- host            :      localhost
-  -- port            :      9002
-  -- hdf5 file  in   :      gadgetron_testdata.h5
-  -- hdf5 group in   :      simple_gre
-  -- conf            :      default.xml
-  -- loop            :      1
-  -- hdf5 file out   :      ./out.h5
-  -- hdf5 group out  :      2012-05-11 12:52:14
-(31540|140170355443520) Connection from 127.0.0.1:9002
-31540, 81, GadgetronConnector, Close Message received
-(31540|140170283570944) Handling close...
-(31540|140170283570944) svc done...
-(31540|140170283570944) Handling close...
-</screen>
-
-      <para>The images are saved in the folder in which you started the
-      <application>mriclient</application>. The client appends the result to
-      an HDF5 file called <filename>out.h5</filename> (if no other file name
-      is specified). A group is created with the current time and data and the
-      images are stored in that group. If you run multiple reconstructions one
-      after another, the results will be added to the same file, but a new
-      group is created for each run. That makes it easy to compare results
-      from different reconstructions. The images are stored in a single
-      precision format as specified by the <filename>default.xml</filename>
-      configuration file. Please see <xref linkend="section.hdf5"/> for
-      details on how to read the output file. Briefly you could read and
-      display the data in Matlab with:</para>
-
-      <programlisting>images = h5read('out.h5','/<INSERT CORRECT DATE HERE>/image_0.img');
-imagesc(images(:,:,1,1));colormap(gray);</programlisting>
-    </sect1>
-  </chapter>
-
-  <chapter>
-    <title>Framework Overview</title>
-
-    <sect1>
-      <title>Gadgetron Streaming Architecture</title>
-
-      <para>The Gadgetron consists of a streaming processing architecture and
-      a set of toolboxes. The toolboxes are used within the streaming
-      components but come as individual shared libraries and can thus also be
-      used in standalone applications. The architecture is outlined in <xref
-      linkend="fig.gadgetron.architecture"/>.</para>
-
-      <figure xml:id="fig.gadgetron.architecture">
-        <title>Gadgetron Architecture</title>
-
-        <mediaobject>
-          <imageobject role="html">
-            <imagedata align="left" fileref="figs/architecture.png"
-                       format="PNG" width="10in"/>
-          </imageobject>
-
-          <imageobject role="fo">
-            <imagedata align="left" fileref="figs/architecture.png"
-                       format="PNG" width="5in"/>
-          </imageobject>
-
-          <textobject>
-            <phrase>Gadgetron Architecture</phrase>
-          </textobject>
-        </mediaobject>
-      </figure>
-
-      <para>The Gadgetron receives connections from clients through a TCP/IP
-      connection. A client can be any application from which you can open a
-      TCP/IP socket and send data. Once a connection to a client has been
-      established (see <xref linkend="sect.communicationprotocol"/>), the
-      Gadgetron will read data from the socket and pass it on down a chain of
-      processing steps. The responsibility of reading and writing packages on
-      the socket is dispatched to a set of Readers and Writers (see <xref
-      linkend="sect.readerswriters"/>). Each step in the processing chain is
-      implemented in a module or Gadget (see <xref linkend="sect.gadgets"/>).
-      A reconstruction process is defined by defining a chain of Gadgets. The
-      assembly of Gadgets is done dynamically at run-time (see <xref
-      linkend="sect.streamconfiguration"/>).</para>
-
-      <sect2 xml:id="sect.gadgets">
-        <title>Gadgets</title>
-
-        <para>A Gadget is the functional unit of the Gadgetron. You can think
-        of the Gadget as a device with an input and output. Data passes
-        through the device and is modified and/or transformed between input
-        and output. By wiring multiple Gadgets together you create a
-        reconstruction program. A schematic outline of a Gadget is seen in
-        <xref linkend="fig.gadgetron.gadget"/></para>
-
-        <figure xml:id="fig.gadgetron.gadget">
-          <title>Gadget</title>
-
-          <mediaobject>
-            <imageobject role="html">
-              <imagedata align="left" fileref="figs/gadget.png" format="PNG"
-                         width="6in"/>
-            </imageobject>
-
-            <imageobject role="fo">
-              <imagedata align="left" fileref="figs/gadget.png" format="PNG"
-                         width="3in"/>
-            </imageobject>
-
-            <textobject>
-              <phrase>Gadget</phrase>
-            </textobject>
-          </mediaobject>
-        </figure>
-
-        <para>The Gadget is an active object based on the
-        <classname>ACE_Task</classname> from the ACE library. It has its own
-        thread (or threads) of execution and an input queue where data is
-        placed for processing by either the Gadgetron framework or an upstream
-        Gadget.</para>
-
-        <para>The active thread(s) in the Gadget will pick up a data package
-        from the queue, and then pass it on to a virtual
-        <function>process</function>. An abbreviated version of the header
-        <filename>Gadget.h</filename> is seen below:</para>
-
-        <programlisting>class Gadget : public ACE_Task<ACE_MT_SYNCH>
-{
-
-public:
-   virtual int svc(void)
-   {
-      //Pick up package from queue
-     
-      //Call process
-      if (this->process(m) == -1) {
-         //Handle error
-      }
-      return 0;
-   }
-
-   //More function (left out for simplicity)
-
-protected:
-   virtual int process(ACE_Message_Block * m) = 0;
-
-   virtual int process_config(ACE_Message_Block * m) {
-      return 0;
-   }
-
-};</programlisting>
-
-        <para>The data package used by the <classname>ACE_Task</classname> is
-        the <classname>ACE_Message_Block</classname>, which is a very basic
-        block of data (essentially just a byte array). To allow the Gadgets to
-        check if the data blocks on the message queue are of the expected
-        type, the Gadgetron uses a modified
-        <classname>ACE_Message_Block</classname> called
-        <classname>GadgetContainerMessage</classname>, which can contain any
-        class with a no-argument constructor. It is possible to check if the
-        <classname>GadgetContainerMessage</classname> contains a specific type
-        of data, and if so, access that object. Suppose we want to store a
-        class named <classname>MyClass</classname>:</para>
-
-        <programlisting>GadgetContainerMessage<MyClass>* m = 
-  new GadgetContainerMessage<MyClass>();
-
-MyClass* mc = m->getObjectPtr();
-
-//Do something with mc
-
-m->release(); //Delete the message block and containing data</programlisting>
-
-        <para>When a function receives an
-        <classname>ACE_Message_Block</classname> it is possible to check if it
-        is of a certain type:</para>
-
-        <programlisting>int process(ACE_Message_Block* mb)
-{
-  
-  GadgetContainerMessage<MyClass>* m = 
-    AsContainerMessage<MyClass>(mb);
-
-  if (m) {
-    MyClass* mc = m->getObjectPtr();
-    
-    //Do something with mc
-
-  } else {
-    //Something went wrong, deal with error
-    return -1;
-  }
-
-  mb->release();
-
-  return 0;
-}</programlisting>
-
-        <para>It is possible to chain more than one
-        <classname>ACE_Message_Block</classname> together using the
-        <function>cont</function> function. This effectively provides a way to
-        pass multiple arguments into a Gadget and checking if they have the
-        appropriate types:</para>
-
-        <programlisting>int process(ACE_Message_Block* mb)
-{
-  
-  GadgetContainerMessage<MyClass>* m1 = 
-    AsContainerMessage<MyClass>(mb);
-
-  GadgetContainerMessage<MyOtherClass>* m2 = 
-    AsContainerMessage<MyOtherClass>(mb->cont());
-
-  if (m1 && m2) {
-    MyClass* mc = m1->getObjectPtr();
-    MyOtherClass* moc = m2->getObjectPtr();
-    
-    //Do something with mc
-
-  } else {
-    //Something went wrong, deal with error
-    return -1;
-  }
-
-  mb->release(); //This deletes both message blocks
-
-  return 0;
-}</programlisting>
-
-        <para>It gets a bit tedious and error prone to repeat code like the
-        above in every Gadget. To overcome this, the Gadgetron comes with a
-        set of templated classes to automate the steps. Say we would like to
-        make a Gadget which takes a single input argument, we would inherit
-        from <classname>Gadget1</classname>. If you need two arguments, you
-        inherit from <classname>Gadget2</classname>:</para>
-
-        <programlisting>template <class P1, class P2> class Gadget2 : public Gadget
-{
-protected:
-   int process(ACE_Message_Block* mb)
-   {
-     //Do type checking 
-   }
-
-   virtual int process(GadgetContainerMessage<P1>* m1, 
-     GadgetContainerMessage<P2>* m2) = 0;
-};</programlisting>
-
-        <para>The base class performs the type checking for you and only when
-        the arguments have been verified, it will call the virtual
-        <function>process</function> above. So, all you need to do in order to
-        implement a Gadget that takes two arguments is to implement this
-        function. As an example, let's look at a very simple Gadget, which
-        receives an image header (in ISMRM Raw Data format) and some image
-        data and does a Fourier transform of the first 3 dimensions. First the
-        header file <filename>FFTGadget.h</filename></para>
-
-        <programlisting>#include "gadgetroncore_export.h"
-#include "Gadget.h"
-#include "ismrmrd.h"
-#include "hoNDArray.h"
-#include <complex>
-
-class EXPORTGADGETSCORE FFTGadget : 
-public Gadget2<ISMRMRD::ImageHeader, hoNDArray< std::complex<float> > >
-{
- public:
-  GADGET_DECLARE(FFTGadget)
-
- protected:
-  virtual int process( 
-     GadgetContainerMessage< ISMRMRD::ImageHeader >* m1,
-     GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2);
-
-};</programlisting>
-
-        <para>Let us walk through the code step by step. The Gadget takes two
-        arguments: 1) <classname>GadgetMessageImage</classname>, which is just
-        a struct with some image header information (it is defined in
-        <filename>GadgetMRIHeaders.h</filename>), and 2) a
-        <classname>hoNDArray</classname>, which is a multidimensional array
-        (see <xref linkend="sect.ndarray"/>) storage container. In this case
-        the <classname>hoNDArray</classname> contains complex floating point
-        data.</para>
-
-        <para>There are a couple of other things to notice. One is the
-        <function>EXPORTGADGETSCORE</function> macro in the class definition.
-        This is needed to make things work properly on Windows. It is defined
-        in <filename>gadgetroncore_export.h</filename> and is used (on
-        Windows) to indicate if the class is being imported or exported from a
-        DLL. It translates into <function>__declspec(dllexport)</function> or
-        <function>__declspec(dllimport)</function> in Windows and is empty in
-        Linux/OSX. It is beyond the scope of this manual to go into why such a
-        declaration is needed, but keep this in mind when you start creating
-        your own Gadgets. Each shared library (DLL) has its own export
-        declaration macro.</para>
-
-        <para>The other thing to notice is the
-        <function>GADGET_DECLARE(FFTGadget)</function> macro. This macro is
-        required for Windows to correctly handle shared libraries and is
-        needed whenever you create a new Gadget to make things work properly
-        on Windows.</para>
-
-        <para>The actual implementation looks like this:</para>
-
-        <programlisting>#include "FFTGadget.h"
-#include "FFT.h"
-
-int FFTGadget::process( 
-   GadgetContainerMessage< ISMRMRD::ImageHeader >* m1,
-   GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2)
-{
-  FFT<float>::instance()->ifft(m2->getObjectPtr(),0);
-  FFT<float>::instance()->ifft(m2->getObjectPtr(),1);
-  FFT<float>::instance()->ifft(m2->getObjectPtr(),2);
-
-  if (this->next()->putq(m1) < 0) {
-     return GADGET_FAIL;
-  }
-
-  return GADGET_OK;
-}
-
-GADGET_FACTORY_DECLARE(FFTGadget)</programlisting>
-
-        <para>Once we are inside the <function>process</function> function,
-        the data has already been converted to the appropriate container
-        messages and we can start processing the data. This function uses an
-        FFT toolbox (more on toolboxes in <xref linkend="sect.toolboxes"/>).
-        After the data has been Fourier transformed along the first 3
-        dimensions it is placed on the next Gadgets queue. Remember the two
-        <classname>GadgetContainerMessage</classname> objects were originally
-        picked up from the message queue as a chain of
-        <classname>ACE_Message_Block</classname> objects. They are still
-        chained together, i.e. when passing <varname>m1</varname> on to the
-        next Gadget we are effectively passing on both arguments.</para>
-
-        <para>Another couple of macros to notice are the
-        <varname>GADGET_OK</varname> and <varname>GADGET_FAIL</varname>. They
-        are defined as 0 and -1 respectively. The convention in the Gadgetron
-        is to return 0 when a function succeeds and < 0 when it fails -
-        unless the function returns a pointer.</para>
-
-        <para>Last thing to notice is the
-        <function>GADGET_FACTORY_DECLARE(FFTGadget)</function> statement. This
-        is a macro which declares functions for loading a Gadget of this type
-        out of a shared library and destroying it again when we are done. It
-        ensures that we can load the Gadget on all platforms. When you create
-        your own gadgets you should use this macro to declare the factory
-        function for the Gadget.</para>
-
-        <para>For a tutorial on how to make your own Gadget library see <xref
-        linkend="sect.makingnewgadgetlibrary"/>.</para>
-
-        <sect3 xml:id="sect.xmlparameters">
-          <title>Gadget XML Configuration</title>
-
-          <para>In addition to defining a Gadget's behavior in response to a
-          data package, it is also possible for the Gadgets to receive
-          configuration information or parameters. The user can define the
-          Gadgets behavior in response to configuration information by
-          implementing the <function>process_config</function> function in the
-          Gadget header file. The configuration information or parameters is
-          typically transmitted in the beginning of the reconstruction process
-          from the client (see <xref linkend="sect.communicationprotocol"/>).
-          The configuration information can in principle be in any format (a
-          given application can use a binary format or a text format defined
-          for the specific purpose), but conventionally the parameters are
-          transmitted in XML format and for the MRI Gadgets, the XML
-          configuration is the XML header from the ISMRM Raw Data file. More
-          details on this format and how to easily parse it with the included
-          C++ XML data binding classes can be found at
-          <uri>http://ismrmrd.sourceforge.net</uri>.</para>
-
-          <para>An example of a parameter XML file for an MRI data set is
-          shown here:</para>
-
-          <programlisting><?xml version="1.0"?>
-
-<ismrmrdHeader xmlns="http://www.ismrm.org/ISMRMRD" 
-  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
-  xmlns:xs="http://www.w3.org/2001/XMLSchema" 
-  xsi:schemaLocation="http://www.ismrm.org/ISMRMRD ismrmrd.xsd">
-
-  <subjectInformation>
-    <patientName>phantom</patientName>
-    <patientWeight_kg>72.5748</patientWeight_kg>
-  </subjectInformation>
-  <acquisitionSystemInformation>
-    <systemVendor>SIEMENS</systemVendor>
-    <systemModel>Avanto</systemModel>
-    <systemFieldStrength_T>1.494</systemFieldStrength_T>
-    <receiverChannels>32</receiverChannels>
-    <relativeReceiverNoiseBandwidth>0.79</relativeReceiverNoiseBandwidth>
-  </acquisitionSystemInformation>
-  <experimentalConditions>
-    <H1resonanceFrequency_Hz>63620740</H1resonanceFrequency_Hz>
-  </experimentalConditions>
-  <encoding>
-    <trajectory>cartesian</trajectory>
-    <encodedSpace>
-      <matrixSize>
-        <x>256</x>
-        <y>128</y>
-        <z>1</z>
-      </matrixSize>
-      <fieldOfView_mm>
-        <x>600</x>
-        <y>300</y>
-        <z>5</z>
-      </fieldOfView_mm>
-    </encodedSpace>
-    <reconSpace>
-      <matrixSize>
-        <x>128</x>
-        <y>128</y>
-        <z>1</z>
-      </matrixSize>
-      <fieldOfView_mm>
-        <x>300</x>
-        <y>300</y>
-        <z>5</z>
-      </fieldOfView_mm>
-    </reconSpace>
-    <encodingLimits>
-      <kspace_encoding_step_1>
-        <minimum>0</minimum>
-        <maximum>127</maximum>
-        <center>64</center>
-      </kspace_encoding_step_1>
-      <kspace_encoding_step_2>
-        <minimum>0</minimum>
-        <maximum>0</maximum>
-        <center>0</center>
-      </kspace_encoding_step_2>
-      <slice>
-        <minimum>0</minimum>
-        <maximum>0</maximum>
-        <center>0</center>
-      </slice>
-      <set>
-        <minimum>0</minimum>
-        <maximum>0</maximum>
-        <center>0</center>
-      </set>
-    </encodingLimits>
-  </encoding>
-  <sequenceTiming>
-    <TR>5.86</TR>
-    <TE>2.96</TE>
-  </sequenceTiming>
-</ismrmrdHeader></programlisting>
-
-          <para>The user/developer can use any XML parsing technique to
-          extract parameters from this XML header, but we encourage developers
-          to use the C++ XML Data Binding classes that are included with the
-          ISMRM Raw Data C++ library. For example, to parse encoding limits
-          (example from <filename>AccumulatorGadget.cpp</filename>):</para>
-
-          <programlisting>int AccumulatorGadget::process_config(ACE_Message_Block* mb)
-{
- 
- //Calling parsing convenience function found in GadgetIsmrmrdReadWrite.cpp
- boost::shared_ptr<ISMRMRD::ismrmrdHeader> cfg = parseIsmrmrdXMLHeader(std::string(mb->rd_ptr()));
-
- ISMRMRD::ismrmrdHeader::encoding_sequence e_seq = cfg->encoding(); 
- if (e_seq.size() != 1) {
-  GADGET_DEBUG2("Number of encoding spaces: %d\n", e_seq.size());
-  GADGET_DEBUG1("Only supports one encoding space supported\n");
-  return GADGET_FAIL;
- }
-
- ISMRMRD::encodingSpaceType e_space = (*e_seq.begin()).encodedSpace();
- ISMRMRD::encodingSpaceType r_space = (*e_seq.begin()).reconSpace();
- ISMRMRD::encodingLimitsType e_limits = (*e_seq.begin()).encodingLimits();
-
- GADGET_DEBUG2("Matrix size: %d, %d, %d\n", 
-   e_space.matrixSize().x(), 
-   e_space.matrixSize().y(), 
-   e_space.matrixSize().z());
-
- dimensions_.push_back(e_space.matrixSize().x());
- dimensions_.push_back(e_space.matrixSize().y());
- dimensions_.push_back(e_space.matrixSize().z());
-
- slices_ = e_limits.slice().present() ? 
-   e_limits.slice().get().maximum()+1 : 1;
-
-  return GADGET_OK;
-}</programlisting>
-        </sect3>
-      </sect2>
-
-      <sect2 xml:id="sect.readerswriters">
-        <title>Readers and Writers</title>
-
-        <para>As illustrated in <xref linkend="fig.gadgetron.architecture"/>
-        the Gadgetron uses a set of Readers and Writers to deal with the
-        incoming communication on the TCP/IP socket. Readers are responsible
-        for deserialization of packages and Writers are responsible for
-        serialization of packages. All packages that arrive on the socket will
-        start with a message ID. Based on this ID, the Gadgetron delegates the
-        responsibility of reading the package of the socket to a particular
-        instance of a <classname>GadgetMessageReader</classname> defined by
-        the following abstract class:</para>
-
-        <programlisting>class GadgetMessageReader
-{
- public:
-  virtual ACE_Message_Block* read(ACE_SOCK_Stream* stream) = 0;
-};</programlisting>
-
-        <para>In order to be able to read a specific type of data, the
-        <function>read</function> function must be implemented for that data
-        type. As an example here is the
-        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>, which
-        reads an MRI data acquisition from the socket.</para>
-
-        <programlisting>class GadgetIsmrmrdAcquisitionMessageReader 
-: public GadgetMessageReader
-{
- public:
-  GADGETRON_READER_DECLARE(GadgetIsmrmrdAcquisitionMessageReader);
-  virtual ACE_Message_Block* read(ACE_SOCK_Stream* socket);
-};</programlisting>
-
-        <para>Note the
-        <function>GADGETRON_READER_DECLARE(GadgetIsmrmrdAcquisitionMessageReader)</function>
-        declaration. This is equivalent to the declaration needed for the
-        Gadgets (see <xref linkend="sect.gadgets"/>) in order to make them
-        load properly from shared libraries.</para>
-
-        <para>The implementation of this particular reader is as follows (this
-        is an abbreviated version without error checking, etc.):</para>
-
-        <programlisting>ACE_Message_Block* GadgetIsmrmrdAcquisitionMessageReader::read(ACE_SOCK_Stream* sock)
-{
- GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1 =
-   new GadgetContainerMessage<ISMRMRD::AcquisitionHeader>();
-
- GadgetContainerMessage<hoNDArray< std::complex<float> > >* m2 =
-   new GadgetContainerMessage< hoNDArray< std::complex<float> > >();
-
- m1->cont(m2);
-
- ssize_t recv_count = 0;
-
- if ((recv_count = stream->recv_n(m1->getObjectPtr(), sizeof(ISMRMRD::AcquisitionHeader))) <= 0) {
-  m1->release();
-  return 0;
- }
-
- if (m1->getObjectPtr()->trajectory_dimensions) {
-  GadgetContainerMessage<hoNDArray< float > >* m3 =
-    new GadgetContainerMessage< hoNDArray< float > >();
-
- m2->cont(m3);
-
- std::vector<unsigned int> tdims;
- tdims.push_back(m1->getObjectPtr()->trajectory_dimensions);
- tdims.push_back(m1->getObjectPtr()->number_of_samples);
-
- if (!m3->getObjectPtr()->create(&tdims)) {
-  m1->release();
-  return 0;
- }
-
- if ((recv_count =
-   stream->recv_n
-    (m3->getObjectPtr()->get_data_ptr(),
-     sizeof(float)*tdims[0]*tdims[1])) <= 0) {
-
-     m1->release();
-
-   return 0;
- }
-
- std::vector<unsigned int> adims;
- adims.push_back(m1->getObjectPtr()->number_of_samples);
- adims.push_back(m1->getObjectPtr()->active_channels);
-
- if (!m2->getObjectPtr()->create(&adims)) {
-   m1->release();
-   return 0;
- }
-
- if ((recv_count =
-      stream->recv_n
-      (m2->getObjectPtr()->get_data_ptr(),
-      sizeof(std::complex<float>)*adims[0]*adims[1])) <= 0) {
-
-    m1->release();
-
-    return 0;
- }
-
-return m1;
-}
-
-GADGETRON_READER_FACTORY_DECLARE(GadgetIsmrmrdAcquisitionMessageReader)</programlisting>
-
-        <para>The Reader allocates two
-        <classname>GadgetContainerMessage</classname> data blocks to contain
-        the incoming data. First an MRI acquisition header (defined in
-        <filename>GadgetMRIHeaders.h</filename>) is read. Based hereon the
-        length of each acquisition (number of samples) and the number of
-        acquisition channels are determined. A
-        <classname>hoNDArray</classname> is allocated to store the data read
-        from the socket. Notice that the two
-        <classname>GadgetContainerMessage</classname> are chained together
-        using the <function>cont</function> function.</para>
-
-        <para>A final important statement to notice is:</para>
-
-        <programlisting>GADGETRON_READER_FACTORY_DECLARE(GadgetIsmrmrdAcquisitionMessageReader)</programlisting>
-
-        <para>This macro declares create and destroy functions to load the
-        reader from a shared library on all platforms supported.</para>
-
-        <para>Whereas the Readers are responsible for deserialization, the
-        <classname>GadgetMessageWriter</classname> is responsible for the
-        opposite operation (serialization). In practice, Gadgets that produce
-        an output for the client application can hand that data back to the
-        Gadgetron framework where it is placed on the output queue along with
-        a message ID. This is for instance done in this (abbreviated) code
-        from an <classname>ImageFinishGadget</classname>:</para>
-
-        <programlisting>template <typename T>
-int ImageFinishGadget<T>
-::process(GadgetContainerMessage<ISMRMRD::ImageHeader>* m1,
-   GadgetContainerMessage< hoNDArray< T > >* m2)
-{
-  if (!this->controller_) {
-    return -1;
-  }
-
-  GadgetContainerMessage<GadgetMessageIdentifier>* mb =
-    new GadgetContainerMessage<GadgetMessageIdentifier>();
-
-  switch (sizeof(T)) {
-  case 2: //Unsigned short
-   mb->getObjectPtr()->id = 
-      GADGET_MESSAGE_IMAGE_REAL_USHORT;
-   break;
-  case 4: //Float
-   mb->getObjectPtr()->id = 
-      GADGET_MESSAGE_IMAGE_REAL_FLOAT;
-   break;
-  case 8: //Complex float
-   mb->getObjectPtr()->id = 
-      GADGET_MESSAGE_IMAGE_CPLX_FLOAT;
-   break;
-  default:
-   GADGET_DEBUG2("Wrong data size detected: %d\n", sizeof(T));
-   mb->release();
-   m1->release();
-   return GADGET_FAIL;
-  }
-
-  mb->cont(m1);
-
-  int ret =  this->controller_->output_ready(mb);
-
-  if ( (ret < 0) ) {
-   GADGET_DEBUG1("Failed to return massage to controller\n");
-   return GADGET_FAIL;
-  }
-
-  return GADGET_OK;
-}</programlisting>
-
-        <para>Notice that the Gadget has a reference to the Gadgetron
-        framework through the <varname>controller_</varname> member variable,
-        which is set during initialization.</para>
-
-        <para>In the framework (more specifically in the
-        <classname>GadgetStreamController</classname>) there is an active
-        thread responsible for writing messages that are put on to the output
-        queue. This is done by investigating the message ID and then picking
-        the <classname>GadgetMessageWriter</classname> associated with this
-        ID. A Writer must implement the following abstract class:</para>
-
-        <programlisting>class GadgetMessageWriter
-{
- public:
-  virtual int write(ACE_SOCK_Stream* stream, 
-                    ACE_Message_Block* mb) = 0;
-};</programlisting>
-
-        <para>The Writer is handed control of the socket along with the
-        message block. A Writer declaration could look like:</para>
-
-        <programlisting>class MRIImageWriter 
-  : public GadgetMessageWriter
-{
-
-public:
-   GADGETRON_WRITER_DECLARE(MRIImageWriter);
-   virtual int write(ACE_SOCK_Stream* sock, 
-                     ACE_Message_Block* mb);
-};</programlisting>
-
-        <para>Notice again the
-        <function>GADGETRON_WRITER_DECLARE(MRIImageWriter)</function> which
-        ensures proper run-time linking behavior. The implementation could
-        look like (abbreviated with no error checking, etc.):</para>
-
-        <programlisting>int MRIImageWriter
-     ::write(ACE_SOCK_Stream* sock, 
-             ACE_Message_Block* mb)
-{
-
-   GadgetContainerMessage<ISMRMRD::ImageHeader>* imagemb = 
-      AsContainerMessage<ISMRMRD::ImageHeader>(mb);
-  
-   GadgetContainerMessage< hoNDArray< float > >* datamb =
-      AsContainerMessage< hoNDArray< float > >(imagemb->cont());
-  
-   if (!datamb || !imagemb) {
-      //Deal with errors
-   }
-   
-   GadgetMessageIdentifier id;
-   //Example for real flow image.
-   id.id = GADGET_MESSAGE_ISMRMRD_IMAGE_REAL_FLOAT; 
- 
-   sock->send_n (&id, sizeof(GadgetMessageIdentifier));
-
-   sock->send_n (imagemb->getObjectPtr(), sizeof(ISMRMRD::ImageHeader));
-
-   sock->send_n (datamb->getObjectPtr()->get_data_ptr(), 
-      sizeof(float)*datamb->getObjectPtr()->get_number_of_elements());
-
-   return 0;
-}
-
-GADGETRON_WRITER_FACTORY_DECLARE(MRIImageWriter)</programlisting>
-
-        <para>Once again notice the required
-        <function>GADGETRON_WRITER_FACTORY_DECLARE(MRIImageWriter)</function>
-        macro. Also notice that the message ID is transmitted to the client.
-        The client is expected to follow the same communication model as the
-        Reader, but it is determined entirely by the Writer implementation how
-        the message is transmitted.</para>
-
-        <para>Readers and Writers are loaded dynamically at run-time along
-        with the Gadgets (see <xref linkend="sect.streamconfiguration"/>). The
-        input and output behaviour can be adapted by manipulating which
-        Readers and Writers are associated with which message IDs.</para>
-      </sect2>
-
-      <sect2 xml:id="sect.streamconfiguration">
-        <title>Stream Configuration</title>
-
-        <para>A Gadgetron reconstruction is made up of modules, i.e. Readers,
-        Writers, and Gadgets. New reconstruction programs can be created by
-        simply assembling existing components in a new way. The configuration
-        of the Gadgetron stream is done at run-time and new configuration
-        chains can be created without recompiling any of the underlying
-        Gadgets. More specifically, the configuration is specified in an XML
-        file that the Gadgetron will read before receiving data. The best way
-        to explain the format is by looking at a (simplified) example:</para>
-
-        <programlisting><?xml version="1.0" encoding="UTF-8"?>
-<gadgetronStreamConfiguration 
-  xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
-  xmlns="http://gadgetron.sf.net/gadgetron"
-  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-        
-    <reader>
-      <slot>1008</slot>
-      <dll>gadgetroncore</dll>
-      <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
-    </reader>
-  
-    <writer>
-      <slot>1004</slot>
-      <dll>gadgetroncore</dll>
-      <classname>MRIImageWriterCPLX</classname>
-    </writer>
-    <writer>
-      <slot>1005</slot>
-      <dll>gadgetroncore</dll>
-      <classname>MRIImageWriterFLOAT</classname>
-    </writer>
-    <writer>
-      <slot>1006</slot>
-      <dll>gadgetroncore</dll>
-      <classname>MRIImageWriterUSHORT</classname>
-    </writer>
-  
-    <gadget>
-      <name>Acc</name>
-      <dll>gadgetroncore</dll>
-      <classname>AccumulatorGadget</classname>
-    </gadget>
-    <gadget>
-      <name>FFT</name>
-      <dll>gadgetroncore</dll>
-      <classname>FFTGadget</classname>
-    </gadget>
-    <gadget>
-      <name>Extract</name>
-      <dll>gadgetroncore</dll>
-      <classname>ExtractGadget</classname>
-    </gadget>  
-    <gadget>
-      <name>ImageFinishFLOAT</name>
-      <dll>gadgetroncore</dll>
-      <classname>ImageFinishGadgetFLOAT</classname>
-    </gadget>
-
-</gadgetronStreamConfiguration></programlisting>
-
-        <para>The stream configuration XML layout is defined in the
-        <filename>GADGETRON_HOME/schema/gadgetron.xsd</filename>. A stream
-        configuration must conform to this schema definition or an error will
-        be generated when the Gadgetron attempts to load the
-        configuration.</para>
-
-        <para>The configuration file format contains 3 sections: 1) Readers,
-        2) Writers, 3) Stream (with Gadgets) corresponding to the 3 different
-        types of components that can be assembled in the Gadgetron.</para>
-
-        <para>In the example above, the Readers section contains only one
-        reader, which is the
-        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname> mentioned
-        previously. The message ID associated with this Reader is 1008. Every
-        time a message with ID 1008 arrives on the socket, responsibility for
-        reading the message will be delegated to the
-        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>. When the
-        Gadgetron configuration is loaded, the framework will load the
-        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname> from the
-        DLL (shared library) <filename>gadgetroncore</filename>. On the Linux
-        platform this would be a shared library called
-        <filename>libgadgetroncore.so</filename> and on the Windows platform
-        it would be called <filename>gadgetroncore.dll</filename>.</para>
-
-        <para>The Gadgetron framework knows how to load the components from
-        the DLLs assuming that they have been declared properly as described
-        in <xref linkend="sect.readerswriters"/> and <xref
-        linkend="sect.gadgets"/>.</para>
-
-        <para>The example Gadgetron configuration has two Writers, i.e. it is
-        capable of outputting two different types of data. Again the
-        declarations cause the Gadgetron framework to load specific instances
-        of <classname>GadgetMessageWriter</classname> and associate them with
-        specific ID numbers.</para>
-
-        <para>There are certain built-in Readers and Writers in addition to
-        those specified in the configuration file. As an example, there are
-        Readers for receiving configurations to be used by the Gadgetron and
-        for receiving the parameters that will be passed to all Gadgets (see
-        <xref linkend="sect.communicationprotocol"/>). If the Gadgetron
-        receives a message with an ID for which there is no associated Reader
-        or encounters a message on the output queue for which there is no
-        associated Writer an error will be generated, the Gadgetron stream
-        shuts down, and the connection to the client will be closed.</para>
-
-        <para>In the example above, we have 4 Gadgets in the reconstruction
-        chain. The first Gadget is an
-        <classname>AccumulatorGadget</classname>, which collects individual
-        lines and inserts them in k-space. When the k-space image is complete
-        it is sent to the next Gadget in the chain, the
-        <classname>FFTGadget</classname>, which is responsible for Fourier
-        transforming the data into image space. The next Gadget
-        (<classname>ExtractGadget</classname>) will extract the magnitude of
-        the complex image. Finally the last Gadget in the chain
-        (<classname>ImageFinishGadgetFLOAT</classname>) sends the
-        reconstructed image back to the Gadgetron framework where it is added
-        to the output queue.</para>
-
-        <para>It is also possible to send configuration parameters to Gadgets
-        using the XML file. For example, to set a parameter in a Gadget, one
-        could write:</para>
-
-        <programlisting>  <gadget>
-   <name>Accumulator</name>
-   <dll>gadgetroncore</dll>
-   <classname>AccumulatorGadget</classname>
-   <property><name>MyTestProperty</name>
-   <value>Blah Blah</value></property>
-   <property><name>MyTestProperty2</name>
-   <value>98776.862187</value></property>
-  </gadget>
-</programlisting>
-
-        <para>The two properties will now be accessible inside the Gadget
-        using the parameter access functions defined in
-        <filename>Gadget.h</filename>:</para>
-
-        <programlisting>class Gadget : public ACE_Task<ACE_MT_SYNCH>
-{
-
-//Other definitions
-
-int get_bool_value(const char* name);
-int get_int_value(const char* name);
-double get_double_value(const char* name);
-
-};</programlisting>
-
-        <para>Additionally it is also possible to specify how many active
-        threads there should be in a Gadget. This is specified with:</para>
-
-        <programlisting>  <gadget>
-   <name>Accumulator</name>
-   <dll>gadgetroncore</dll>
-   <classname>AccumulatorGadget</classname>
-   <property><name>threads</name><value>5</value></property>
-  </gadget></programlisting>
-
-        <para>Which would make the <classname>AccumulatorGadget</classname>
-        have 5 threads.</para>
-      </sect2>
-
-      <sect2 xml:id="sect.communicationprotocol">
-        <title>Communication Sequence</title>
-
-        <para>Communication between a client and the Gadgetron follows a
-        straightforward communication protocol. When the Gadgetron is started
-        it will be expecting a connection on a specific port (port 9002 is the
-        default). The communication sequence is as follows:</para>
-
-        <orderedlist>
-          <listitem>
-            <para>The client makes connection</para>
-          </listitem>
-
-          <listitem>
-            <para>The Gadgetron accepts the connection and creates a new
-            instance of a <classname>GadgetStreamController</classname> (see
-            <xref linkend="fig.gadgetron.architecture"/>). After creating the
-            <classname>GadgetStreamController</classname> the Gadgetron
-            returns to accept connections on the socket such that multiple
-            clients can be connected simultaneously.</para>
-          </listitem>
-
-          <listitem>
-            <para><classname>The GadgetStreamController</classname> takes
-            control of the socket and expects to read a specific type of
-            message, which either contains the filename of a specific stream
-            configuration (see <xref linkend="sect.streamconfiguration"/>) or
-            alternatively it can receive the actual XML stream specification
-            directly on the socket. These two types of messages are read with
-            Readers that are always registered for the Gadgetron (see <xref
-            linkend="sect.readerswriters"/>). If the Gadgetron receives the
-            filename of a Gadget stream it expects to be able to find that
-            configuration file in the <filename>gadegtron/config</filename>
-            folder (see <xref linkend="sect.fileorganization"/>).</para>
-          </listitem>
-
-          <listitem>
-            <para>The <classname>GadgetStreamController</classname> is then
-            expecting to receive parameters that will be transmitted to each
-            individual Gadget. In principle the "parameters" is just a raw
-            buffer of characters that will be transmitted as such to each
-            individual Gadget. It is the convention however to send the
-            parameters in an XML format. It is up to each individual Gadget to
-            interpret the parameters. The user can implement any behavior in
-            response to the parameters by implementing the
-            <function>process_config</function> function (see <xref
-            linkend="sect.gadgets"/>). The client can send parameters at any
-            time during a reconstruction and they will always be transmitted
-            to all Gadgets through the <function>process_config</function>
-            function.</para>
-          </listitem>
-
-          <listitem>
-            <para>The client then starts transmitting data packages that the
-            Gadgetron processes. Images are returned to the client.</para>
-          </listitem>
-
-          <listitem>
-            <para>When the client has no more data it will send a closure
-            package. This package causes all Gadgets (in order) to process all
-            remaining data on their input queue and then shut down.</para>
-          </listitem>
-
-          <listitem>
-            <para>Once the final Gadget has shut down, the connection with the
-            client is terminated.</para>
-          </listitem>
-        </orderedlist>
-
-        <para>To make it easier to create a new client, the Gadgetron comes
-        with a <classname>GadgetronConnector</classname> class:</para>
-
-        <programlisting>class GadgetronConnector: 
-  public ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_MT_SYNCH> {
-
-public:
-
- int open (std::string hostname, std::string port);   
- int putq  (ACE_Message_Block * mb ,  
-     ACE_Time_Value *  timeout = 0);
-
- int register_reader(unsigned int slot, 
-     GadgetMessageReader* reader);
-
- int register_writer(unsigned int slot, 
-     GadgetMessageWriter* writer);
-
- int send_gadgetron_configuration_file(std::string config_xml_name);   
- int send_gadgetron_configuration_script(std::string config_xml_name);
- int send_gadgetron_parameters(std::string xml_string);
-};</programlisting>
-
-        <para>This class can be used to create simple clients that open a
-        connection with the Gadgetron using the <function>open</function>
-        function and then communicate with the Gadgetron through the Readers
-        and Writers registered with the connector. See the
-        <application>mriclient</application> example application
-        (<filename>gagetron/apps/clients/mriclient</filename> in the source
-        code archive) for a simple example of how to build a Gadgetron
-        client.</para>
-      </sect2>
-
-      <sect2 xml:id="sect.fileorganization">
-        <title>File Organization</title>
-
-        <para>This section provides a brief overview of the file organization
-        in the Gadgetron installation. Once you have compiled the Gadgetron
-        and installed it (see <xref linkend="sect.installation"/>), it will
-        reside in its designated installation folder
-        (<varname>GADGETRON_HOME</varname>). For the purposes of this
-        description, we will assume that the Gadgetron was installed in
-        <filename>/usr/local/gadgetron</filename>.</para>
-
-        <para>In <varname>GADGETRON_HOME</varname> you should find the
-        following folders:</para>
-
-        <para><itemizedlist>
-            <listitem>
-              <para><filename>bin</filename>: Contains all executables from
-              the Gadgetron framework including the
-              <application>gadgetron</application> executable itself and all
-              clients and standalone applications.</para>
-            </listitem>
-
-            <listitem>
-              <para><filename>config</filename>: Contains Gadgetron XML
-              configuration files (see <xref
-              linkend="sect.streamconfiguration"/>). This is where the
-              Gadgetron searches for the configurations requested by the
-              clients during initialization of the Gadget chain (see <xref
-              linkend="sect.communicationprotocol"/>). It also contains the
-              global <filename>gadgetron.xml</filename> configuration file,
-              which is used to set global configuration parameters such as the
-              port number for the Gadgetron.</para>
-            </listitem>
-
-            <listitem>
-              <para><filename>lib:</filename> Contains all shared libraries
-              (Gadgets and toolboxes). Additionally, this is the default path
-              where Python Gadgets look for Python modules.</para>
-            </listitem>
-
-            <listitem>
-              <para><filename>include</filename>: Contains all header files
-              for the Gadgets and Toolboxes in order that they can be linked
-              into external applications and Gadget libraries compiled outside
-              the Gadgetron source tree.</para>
-            </listitem>
-
-            <listitem>
-              <para><filename>schema</filename>: Contains all the XML schema
-              definitions used by the Gadgetron (e.g.
-              <filename>gadgetron.xsd</filename>) and also serves as a
-              container for schema files used by client applications and
-              copied to this folder during installation.</para>
-            </listitem>
-
-            <listitem>
-              <para><filename>cmake:</filename> Contains a set of helpful
-              CMake scripts that can be used if you wish to build applications
-              or Gadget libraries outside the Gadgetron source tree. Among
-              other things it contains a
-              <filename>FindGadgetron.cmake</filename> script, which can be
-              used to localize and set paths for the Gadgetron using
-              CMake.</para>
-            </listitem>
-          </itemizedlist></para>
-      </sect2>
-    </sect1>
-
-    <sect1 xml:id="sect.toolboxes">
-      <title>Gadgetron Toolboxes</title>
-
-      <para>The core reconstruction data structures and algorithms are made
-      available through a set of toolboxes in shared libraries. The toolboxes
-      implement the functionality of the various Gadgets, but they can also be
-      used in standalone applications. A non-exhaustive overview of key
-      functionality is covered in the following sections.</para>
-
-      <sect2 xml:id="sect.ndarray">
-        <title><classname>NDArray</classname></title>
-
-        <para>Most image processing operations involve multi-dimensional
-        arrays. Although the Gadgetron framework does not impose any specific
-        array structure on the user, it does come with an abstract
-        multi-dimensional array used throughout: the
-        <classname>NDArray</classname>. It has a specific implementation for
-        the CPU (<classname>hoNDArray</classname>) and GPU
-        (<classname>cuNDArray</classname>). The abstract class definition
-        looks like (abbreviated version):</para>
-
-        <programlisting>template <class T> class NDArray
-{
- public:
-  
-  NDArray ();
-
-  virtual ~NDArray();
-  
-  virtual T* create(std::vector<unsigned int> *dimensions); 
-
-  virtual T* create(std::vector<unsigned int> *dimensions, 
-                    T* data, bool delete_data_on_destruct = false);
-
-  virtual int permute(std::vector<unsigned int> *dim_order,
-                      NDArray<T> *out = 0, int shift_mode = 0);
-  
-  inline unsigned int get_number_of_dimensions() const {
-    return dimensions_->size();
-  }
-
-  unsigned int get_size(unsigned int dimension);
-
-  boost::shared_ptr< std::vector<unsigned int> > get_dimensions();
-  
-  inline T* get_data_ptr() const { 
-    return data_; 
-  }
-  
-  inline unsigned long int get_number_of_elements() const {
-    return elements_;
-  }
-
-  //Other public functions...
-
-protected:
-
-  virtual int allocate_memory() = 0;
-  virtual int deallocate_memory() = 0;
-
-  //Other private functions
-  
-};</programlisting>
-
-        <para>The CPU (host) definition would look like (abbreviated):</para>
-
-        <programlisting>template <class T> class hoNDArray : public NDArray<T>
-{
-
-public:
-   //Public functions...
-
-protected:
-   virtual int allocate_memory();
-   virtual int deallocate_memory();
-};</programlisting>
-
-        <para>As is seen from the <classname>NDArray</classname> header file,
-        this class has a no-argument constructor, which makes it suited for
-        encapsulating in the <classname>GadgetContainerMessage</classname>
-        mentioned in <xref linkend="sect.gadgets"/>. The procedure for
-        creating an array with complex float values would look something like
-        this:</para>
-
-        <programlisting>#include <hoNDArray.h>
-#include <complex>
-
-hoNDArray< std::complex<float> > myArray;
-
-std::vector<unsigned int> dimensions;
-dimensions.push_back(128);
-dimensions.push_back(128);
-
-if(!myArray.create(&dimensions)) {
-   //Deal with errors
-}
-
-//process data</programlisting>
-
-        <para>To create an <classname>NDArray</classname> contained in a
-        <classname>GadgetContainerMessage</classname> would look something
-        like this:</para>
-
-        <programlisting>GadgetContainerMessage< hoNDArray< std::complex<float> > >* m = 
-  new GadgetContainerMessage< hoNDArray< std::complex<float> > >();
-
-std::vector<unsigned int> dimensions;
-dimensions.push_back(128);
-dimensions.push_back(128);
-
-if(!m->getObjectPtr()->create(&dimensions)) {
-   //Deal with errors
-}
-
-//Process data or pass on to other Gadget, etc. 
-
-m->release(); //Delete the message block and containing data
-</programlisting>
-
-        <para>As mentioned in <xref linkend="sect.gadgets"/>, the
-        <classname>GadgetContainerMessage</classname> is a specialized version
-        of the <classname>ACE_Message_Block</classname> class from the ACE
-        framework. Data is passed between Gadgets in the form of
-        <classname>ACE_Message_Block</classname>s and Gadgets have access to
-        utility functions that allow them to test if a given
-        <classname>ACE_Message_Block</classname> is in fact a particular type
-        of <classname>GagetContainerMessage</classname>.</para>
-
-        <sect3>
-          <title>GPU Support</title>
-
-          <para>The <classname>NDArray</classname> data structure also has a
-          GPU implementation (abbreviated version of header below):</para>
-
-          <programlisting>template <class T> class cuNDArray : public NDArray<T>
-{
- public:
-  cuNDArray();
-
-  cuNDArray(const cuNDArray<T>& a);
-
-  // Constructor from hoNDArray
-  cuNDArray(hoNDArray<T> *a);
-
-  // Assignment operator
-  cuNDArray& operator=(const cuNDArray<T>& rhs);
-  
-  virtual ~cuNDArray();
-
-  virtual T* create(std::vector<unsigned int> *dimensions);
-
-  virtual T* create(std::vector<unsigned int> *dimensions, 
-                    int device_no);
-
-  virtual T* create(std::vector<unsigned int> *dimensions, 
-                    T* data, bool delete_data_on_destruct = false);
-
-  virtual boost::shared_ptr< hoNDArray<T> > to_host() const;
-  
-  virtual int set_device(int device_no);
-  inline int get_device() { return device_; }
-  
- protected:
-  
-  int device_; 
-  virtual int allocate_memory();
-  virtual int deallocate_memory();
-  
-};</programlisting>
-
-          <para>It has a few extra <function>create</function> functions
-          compared to the host (CPU) version of this array. Specifically, it
-          is possible to provide the array with the device number that the
-          array should be allocated on. This is important when working on
-          systems with multiple GPU processors. The default is to allocate it
-          on the current device (device 0 unless specifically set otherwise).
-          It is possible to query on which device the data is allocated and to
-          effectively move the data from one device to another through
-          operators. Similarly, one copy constructor takes a
-          <classname>hoNDArray</classname> and transparently copies the host
-          data to the GPU.</para>
-        </sect3>
-      </sect2>
-
-      <sect2>
-        <title><classname>vector_td</classname></title>
-
-        <para>The class <classname>vector_td</classname> provides a basic
-        representation of one-, two-, three-, or four-dimensional vectors
-        (positions). It is templetized with the datatype <varname>T</varname>
-        and dimensionality <varname>D</varname>. For convenience we provide a
-        set of typedefs to commonly encountered instances. A subset of the
-        definitions provided in <filename>vector_td.h</filename> is provided
-        here (users should check the actual file e.g. for additional often
-        used constructors):</para>
-
-        <programlisting>
-template< class T, unsigned int D > class vector_td
-{
-public:
-
-  T vec[D];
-
-  __inline__ __host__ __device__ T& operator[](const int i) 
-  {
-    return vec[i];
-  }
-
-  __inline__ __host__ __device__ const T& operator[](const int i) const
-  { 
-    return vec[i];
-  }
-};
-
-
-// Some typedefs for convenience
-
-template< class REAL, unsigned int D > struct reald{
-  typedef vector_td< REAL, D > Type;
-};
-
-template< unsigned int D > struct intd{
-  typedef vector_td< int, D > Type;
-};
-
-template< unsigned int D > struct uint64d{
-  typedef vector_td< unsigned int, D > Type;
-};
-
-template< unsigned int D > struct floatd{
- typedef typename reald< float, D >::Type Type;
-};
-
-template< unsigned int D > struct doubled{
-  typedef typename reald< double, D >::Type Type;
-};
-
-template< class T > struct complext{
-  typedef vector_td< T, 2 > Type;
-};
-
-</programlisting>
-
-        <para>A number of arithmetic and conditional operators on the
-        <classname>vector_td</classname> are defined in
-        <filename>vector_td_operators.h</filename>. Similarly, the header
-        <filename>vector_td_utilities.h</filename> wraps common math
-        functionality for the <classname>vector_td</classname> class. Many
-        common operations that take one of more
-        <classname>cuNDArray</classname> instances with element type
-        <classname>vector_td</classname> are defined in
-        <filename>ndarray_vector_utilities.h</filename>. We encourage the
-        reader to explore these utilities on his own.</para>
-
-        <para>The <classname>vector_td</classname> can be used in both host
-        and device code. As an example of use it is contained in the interface
-        of the non-Cartesian FFT described in <xref
-        linkend="sect.NFFT"/>.</para>
-      </sect2>
-
-      <sect2>
-        <title>complext</title>
-
-        <para>A complex number class that can be used in both host and device
-        code is found in <filename>complext.h</filename>. It contains a
-        substantial set of useful operators and functions.</para>
-      </sect2>
-
-      <sect2 xml:id="sect.ffttoolbox">
-        <title>Fourier Transforms</title>
-
-        <sect3>
-          <title>Cartesian FFT</title>
-
-          <sect4>
-            <title>FFT of a <classname>hoNDArray</classname></title>
-
-            <para>The Gadgetron uses the FFTW library for Fourier transform of
-            <classname>hoNDArray</classname> structures. Users can call the
-            FFTW directly from their code, but to make things a little easier,
-            we provide a simple wrapper class defined in
-            <filename>toolboxes/ndarray/FFT.h</filename>. Here is an
-            abbreviated version:</para>
-
-            <programlisting>template <typename T> class EXPORTNDARRAY FFT
-{
-
-public:
- static FFT<T>* instance(); 
-
- void fft(hoNDArray< std::complex<T> >* input, 
-          unsigned int dim_to_transform);
-
- void ifft(hoNDArray< std::complex<T> >* input, 
-          unsigned int dim_to_transform);
-
- void fft(hoNDArray< std::complex<T> >* input);
-
- void ifft(hoNDArray< std::complex<T> >* input);
-
-protected:
- FFT();
- virtual ~FFT();
-};</programlisting>
-
-            <para>The <classname>FFT</classname> class provides simple wrapper
-            functionality to perform FFTs of <classname>hoNDArray</classname>s
-            along a specific dimension or along all dimensions. It performs
-            <emphasis>in place</emphasis> FFTs and works on complex arrays of
-            single or double precision.</para>
-
-            <para>An important feature of this class is that it is a process
-            wide singleton for the Gadgetron. As outlined in the definition
-            above, the constructor and destructor are protected and it is not
-            possible to allocate a new <classname>FFT</classname> object. The
-            way to use the class is through the <function>instance
-            </function>function:</para>
-
-            <programlisting>#include "FFT.h"
-
-FFT<float>::instance()->fft(...);</programlisting>
-
-            <para>The reason for this is that the FFTW planning routines are
-            not thread safe. Multiple Gadgets (that each have their own thread
-            of execution) may need to use FFTs and consequently the planning
-            routines need to be protected with a mutex. All of this is handled
-            inside the <classname>FFT</classname> class and since it is a
-            singleton only one thread can run the planning routines at any
-            given time.</para>
-
-            <para>As mentioned it is possible for the users to call FFTW
-            routines directly, and there may be some performance reasons for
-            doing so (as opposed to using this wrapper), but please be aware
-            of this thread safety issue when you design your Gadgets. If you
-            want to be on the safe side, use the wrapper.</para>
-          </sect4>
-
-          <sect4>
-            <title>FFT of a <classname>cuNDArray</classname></title>
-
-            <para>Cartesian Fast Fourier Transform on the GPU is supported by
-            wrapping Cuda's FFT routines as defined in
-            <filename>cuNDFFT.h</filename>.</para>
-
-            <programlisting>template<class T> class EXPORTGPUCORE cuNDFFT
-{
- public:
-
-  cuNDFFT() {}
-  virtual ~cuNDFFT() {}
-
-  int fft ( cuNDArray<T> *input, 
-    std::vector<unsigned int> *dims_to_transform );
-
-  int ifft( cuNDArray<T> *input, 
-    std::vector<unsigned int> *dims_to_transform, 
-    bool do_scale = true );
-
-  int fft ( cuNDArray<T> *input, 
-    unsigned int dim_to_transform);
-
-  int ifft( cuNDArray<T> *input, 
-    unsigned int dim_to_transform, 
-    bool do_scale = true );
-
-  int fft ( cuNDArray<T> *input );
-  int ifft( cuNDArray<T> *input, 
-    bool do_scale = true );
-
- protected:
-  int fft_int( cuNDArray<T> *input, 
-    std::vector<unsigned int> *dims_to_transform, 
-    int direction, 
-    bool do_scale = true );
-};
- </programlisting>
-
-            <para>The interface defines forwards and inverse transforms of a
-            single array dimension, all dimensions of the array, or a subset
-            of dimensions.</para>
-          </sect4>
-        </sect3>
-
-        <sect3 xml:id="sect.NFFT">
-          <title>Non-Cartesian FFT</title>
-
-          <para>A dedicated GPU-implementation of the NUFFT - often referred
-          to a gridding - is provided. The interface is defined in
-          <filename>NFFT.h</filename> provided below in abbreviated
-          form</para>
-
-          <programlisting>template< class REAL, unsigned int D, bool ATOMICS=false > 
-class EXPORTGPUNFFT NFFT_plan
-{
- public: // Main interface
-    
-  // Constructors
-  NFFT_plan();
-  NFFT_plan( typename uint64d<D>::Type matrix_size, 
-             typename uint64d<D>::Type matrix_size_os, 
-             REAL W, int device = -1 );
-
-  // Destructor
-  virtual ~NFFT_plan();
-
-  // Clear internal storage in plan
-  enum NFFT_wipe_mode { NFFT_WIPE_ALL, NFFT_WIPE_PREPROCESSING };
-  bool wipe( NFFT_wipe_mode mode );
-
-  // Replan 
-  bool setup( typename uint64d<D>::Type matrix_size, 
-              typename uint64d<D>::Type matrix_size_os, 
-              REAL W, int device = -1 );
-    
-  // Preproces trajectory 
-  // Cartesian to non-Cartesian / non-Cartesian to Cartesian / both
-  enum NFFT_prep_mode { NFFT_PREP_C2NC, 
-                        NFFT_PREP_NC2C, 
-                        NFFT_PREP_ALL };
-
-  bool preprocess
-    ( cuNDArray<typename reald<REAL,D>::Type> *trajectory, 
-      NFFT_prep_mode mode );
-    
-  // Execute NFFT 
-  // ( Cartesian to non-Cartesian or non-Cartesian to Cartesian)  
-  enum NFFT_comp_mode { NFFT_FORWARDS_C2NC, 
-                        NFFT_FORWARDS_NC2C, 
-                        NFFT_BACKWARDS_C2NC, 
-                        NFFT_BACKWARDS_NC2C };
-
-  bool compute( cuNDArray<complext<REAL> > *in, 
-                cuNDArray<complext<REAL> > *out, 
-                cuNDArray<REAL> *dcw, NFFT_comp_mode mode );
-
-  // Execute NFFT iteration 
-  // (Cartesian to non-Cartesian and back to Cartesian space)
-  bool mult_MH_M( cuNDArray<complext<REAL> > *in, 
-                  cuNDArray<complext<REAL> > *out, 
-                  cuNDArray<REAL> *dcw, 
-                  std::vector<unsigned int> halfway_dims );
-  
- public: // Utilities
-  
-  // NFFT convolution 
-  // (Cartesian to non-Cartesian or non-Cartesian to Cartesian)
-  enum NFFT_conv_mode { NFFT_CONV_C2NC, NFFT_CONV_NC2C };
-  bool convolve( cuNDArray<complext<REAL> > *in, 
-                 cuNDArray<complext<REAL> > *out, 
-                 cuNDArray<REAL> *dcw, 
-                 NFFT_conv_mode mode, bool accumulate = false );
-    
-  // NFFT FFT
-  enum NFFT_fft_mode { NFFT_FORWARDS, NFFT_BACKWARDS };
-  bool fft( cuNDArray<complext<REAL> > *data, 
-            NFFT_fft_mode mode, bool do_scale = true );
-  
-  // NFFT deapodization
-  bool deapodize( cuNDArray<complext<REAL> > *image );
-
- public: // Setup queries
-
-  typename uint64d<D>::Type get_matrix_size();
-  typename uint64d<D>::Type get_matrix_size_os();
-  REAL get_W();
-  unsigned int get_device();
-  
-...
-};</programlisting>
-
-          <para>After a <classname>NFFT_plan</classname> is constructed the
-          <function>preprocess</function> function should be called with the
-          desired trajectory. In the special case of radial sampling the
-          header <filename>radial_utilities.h</filename> defines some
-          convenient functions to compute radial trajectories and
-          corresponding density compensation weights. After preprocessing the
-          NFFT can be executed through the <function>compute</function>
-          function. The individual building blocks of the NFFT - convolution,
-          FFT, and deapodization - are exposed in the public interface and
-          hence available for use in custom algorithms.</para>
-
-          <para>It is often required to perform the NFFT on a number of
-          different inputs. Particularly in 1D and 2D the best performance is
-          obtained if many transforms are executed concurrently in order to
-          keep the device fully occupied. Two strategies can be
-          combined:<itemizedlist>
-              <listitem>
-                <para>The trajectory passed to the preprocess method is
-                normally a one-dimension cuNDArray containing normalized (to
-                the range [-0.5;0.5]) non-Cartesian positions as
-                <classname>reald<REAL,D></classname> elements of
-                precision <classname>REAL</classname> and dimensionality
-                <classname>D</classname>. However, if the cuNDArray is
-                two-dimensional, the latter dimension specifies that we wish
-                to transform a number of frames with different trajectories
-                concurrently.</para>
-              </listitem>
-
-              <listitem>
-                <para>If a number of transformations with identical
-                trajectories are to be transformed, the input and output
-                arrays to the compute methods can be any multiplum of the
-                Cartesian and non-Cartesian dimensions configured from the
-                <classname>setup</classname> and
-                <classname>preprocess</classname> methods. The images provided
-                are consequently batch transformed.</para>
-              </listitem>
-            </itemizedlist></para>
-
-          <para><remark>Please note</remark>. The NFFT performs significantly
-          better on GPUs supporting Cuda's shader model 2.0 or newer compared
-          to devices supporting only shader models 1.x. The reason being that
-          we rely on the inherent caching of global memory - available only on
-          hardware supporting at least shader model 2.0.</para>
-
-          <para>As of the Gadgetron release 1.1 a version of the NFFT
-          implemented using atomic operations is available. It is enabled
-          through the ATOMICS booloean template arguments (and defaults to
-          false, i.e. disabled). At the time of the Gadgetron 1.1 release the
-          current generation hardware showed inferior performed using the
-          atomic version over the non-atomic version. However, using the
-          atomic version significantly reduces the memory requirements, and
-          could thus be the only viable option, particularly for three- or
-          four-dimensional reconstructions, on GPUs lacking sufficient
-          memory.</para>
-        </sect3>
-      </sect2>
-
-      <sect2 xml:id="sect.matrix_operators">
-        <title>Linear (Matrix) Operators</title>
-
-        <para>A fundamental building block of most image reconstruction
-        algorithms is the abstract class
-        <classname>linearOperator</classname>. A range of linear imaging and
-        regularization operators are inherited from this pure virtual base
-        class (abbreviated):<programlisting>template < class REAL, class ARRAY_TYPE > class linearOperator
-{
- public:
-
-  linearOperator() { weight_ = REAL(1); }
-
-  virtual ~linearOperator() {}
-
-  virtual void set_weight( REAL weight ){ weight_ = weight; }
-  virtual REAL get_weight(){ return weight_; }
-
-  virtual bool set_domain_dimensions
-    ( std::vector<unsigned int> *dims ) { ... }
-  virtual bool set_codomain_dimensions
-    ( std::vector<unsigned int> *dims ) { ... }
-
-  virtual boost::shared_ptr< std::vector<unsigned int> > 
-    get_domain_dimensions() { ... }
-
-  virtual boost::shared_ptr< std::vector<unsigned int> > 
-    get_codomain_dimensions() { ... }
-
-  virtual int mult_M( ARRAY_TYPE* in, ARRAY_TYPE* out, 
-                      bool accumulate = false) = 0;
-  virtual int mult_MH( ARRAY_TYPE* in, ARRAY_TYPE* out, 
-                       bool accumulate = false) = 0;
-  virtual int mult_MH_M( ARRAY_TYPE* in, ARRAY_TYPE* out, 
-                       bool accumulate = false)
-  {
-    // Perform mult_M followed by mult_MH
-    ...
-  }
-  
-  virtual boost::shared_ptr< 
-    linearOperator< REAL, ARRAY_TYPE > > clone() = 0;
-
-  ...
-};</programlisting>The <classname>linearOperator</classname> is templated by
-        two arguments: 1) the basic precision <classname>REAL</classname>
-        (<classname>e.g. float</classname> or <classname>double</classname>)
-        and 2) the <classname>ARRAY_TYPE</classname> (e.g.
-        <classname>hoNDArray<T></classname> or
-        <classname>cuNDArray<T></classname>) representing the expected
-        vector format for the matrix-vector multiplication the operator
-        implements.</para>
-
-        <para>Every <classname>MatrixOperator</classname> has an associated
-        weight that is used to balance multiple matrix terms when added to a
-        cost function (see <xref linkend="sect.linear_solvers"/>).</para>
-
-        <para>The main functionality is provided in the two pure virtual
-        functions <function>mult_M</function> and <function>mult_MH</function>
-        denoting multiplication with the matrix operator
-        (<varname>M</varname>) and multiplication with the adjoint (i.e.
-        conjugate transpose) of the matrix operator
-        (<varname>M<superscript>H</superscript></varname>) respectively. The
-        default implementation of <function>mult_MH_M</function> computes an
-        "iteration" of the two
-        (<varname>M<superscript>H</superscript>M</varname>) by invoking
-        <function>mult_M</function> and <function>mult_MH</function> in turn.
-        Specialized operators can redefine the virtual
-        <function>mult_MH_M</function> to increase performance when
-        appropriate.</para>
-
-        <para>The <classname>clone</classname> method is required by some
-        solvers to make a clone (copy) of a given
-        <classname>linearOperator</classname>. Similarly, some solvers require
-        knowledge of the <varname>domain</varname> and
-        <varname>codomain</varname> dimensions on which the operator can be
-        applied. The <classname>mult_M</classname> method converts the input
-        vector of <varname>domain_size</varname> to one of
-        <varname>codomain_size</varname> - and vice versa for
-        <classname>mult_MH</classname>.</para>
-
-        <para>The <classname>linearOperator</classname> is used to model a
-        linear imaging modality's encodig operation (Fourier transform for
-        MRI, Radon transform for CT, convolution for Microscopy etc.) but also
-        common regularization operators such the identity matrix, the partial
-        derivatives etc.</para>
-
-        <para>Here follows a list that briefly describes the linear operators
-        that are used for the reconstruction examples discussed later in this
-        document (<xref linkend="sect.exampleapplications"/>, <xref
-        linkend="sect.standalone_applications"/>).</para>
-
-        <sect3>
-          <title>List of linear operators</title>
-
-          <para>The section provides a non-exhaustive list of available linear
-          operators in Gadgetron toolboxes.</para>
-
-          <para>A two-level implementation strategy is used for most of the
-          operators the Gadgetron provide. We first derive a class, say
-          <classname>identityOperator</classname>, from the
-          <classname>linearOperator</classname> base class. In this derived
-          class we implement the pure virtual functions of the base class,
-          e.g. <function>mult_M</function>, <function>mult_MH</function>, and
-          <function>mult_MH_M</function>. The overall algorithm and
-          functionality of the operator is implemented at this level. Like its
-          superclass, the <classname>identityOperator</classname> is however
-          templated on the underlying <classname>ARRAY_TYPE</classname> and
-          thus cannot contain dedicated implementation code to a specific
-          array implementation. The implementation of
-          <function>mult_M</function>, <function>mult_MH</function>, and
-          <function>mult_MH_M</function> is consequently based on a new set of
-          pure virtual functions of the templated
-          <classname>ARRAY_TYPE</classname>. We provide another level of
-          inheritance, e.g. <classname>cuIdentityOperator</classname>, which
-          in this case provides the <classname>cuNDArray</classname>-specific
-          implementation of the pure virtual function in
-          <classname>identityOperator</classname>. This hierarchy has the
-          desired design goal, that the core algorithm implementation is
-          shared in the base class of the operator. Only the host/device
-          specific sub-components are defined individually. It is thus fairly
-          straightforward to derive both an <classname>cuNDArray</classname>
-          and an <classname>hoNDArray</classname> version of an
-          operator.</para>
-
-          <para>As an example we provide a simplified declaration of the
-          <classname>identityOperator</classname> and
-          <classname>cuIdentityOperator</classname> below. Without specific
-          mentioning for the subsequent operators, many follow a similar
-          inheritance hierarchy.</para>
-
-          <itemizedlist>
-            <listitem>
-              <para><classname>identityOperator</classname></para>
-
-              <para>Implements multiplication of a vector with the identity
-              matrix.</para>
-
-              <para><programlisting>// Notice: simplified code without error-checking
-
-template <class REAL, class ARRAY_TYPE> class identityOperator
- : public linearOperator<REAL, ARRAY_TYPE>
-{
- public:
-
-  identityOperator() : linearOperator<REAL, ARRAY_TYPE>() {}
-  virtual ~identityOperator() {}
-  
-  // operator_xpy computes "x+y" and stores the result in y
-  virtual bool operator_xpy( ARRAY_TYPE *x, ARRAY_TYPE *y ) = 0;
-
-  virtual int mult_M( ARRAY_TYPE *in, ARRAY_TYPE *out, 
-                      bool accumulate = false )
-  {
-    if( accumulate )
-      operator_xpy( in, out );
-    else 
-      *out = *in;
-  }
-
-  ... // Similar code for mul_MH and mult_MH_M
-};
-
-</programlisting><parameter>The Cuda specific
-              implementation:</parameter><programlisting>// Notice: 
-// Simplified code without error checking and multi-device support
-
-template <class REAL, class T> 
-class cuIdentityOperator 
-: public identityOperator< REAL, cuNDArray<T> >
-{
- public:
-
-  cuIdentityOperator() : 
-    identityOperator< REAL, cuNDArray<T> >() {}
-  
-  virtual ~cuIdentityOperator() {}
-  
-  virtual bool operator_xpy( cuNDArray<T> *x, cuNDArray<T> *y )
-  { 
-    return cuNDA_axpy( T(1), x, y );
-  }
-
- ...
-};
-
-</programlisting>Notice that the template arguments to the
-              <classname>cuIdentitytOperator</classname> differ from its base
-              class. <classname>REAL</classname> specifies the desired
-              precision (<classname>float</classname> or
-              <classname>double</classname>) and <classname>T</classname>
-              specifies the desired type - which could be identical to
-              <classname>REAL</classname> or e.g. a
-              <classname>complext<REAL></classname>. Also notice how the
-              <classname>cuIdentitytOperator</classname> class definition
-              directly specifies the ARRAY_TYPE of its superclass (in this
-              case to be of type
-              <classname>cuNDArray<T></classname>).</para>
-            </listitem>
-
-            <listitem>
-              <para><classname>partialDerivativeOperator</classname></para>
-
-              <para>Provides the partial derivative of an image in a given
-              spatial dimension.</para>
-            </listitem>
-
-            <listitem>
-              <para><classname>laplaceOperator</classname></para>
-
-              <para>Computes the Laplacian of an image.</para>
-            </listitem>
-
-            <listitem>
-              <para><classname>imageOperator</classname></para>
-
-              <para>Performs multiplication with a diagonal matrix of the
-              element-wise reciprocal of a given image.</para>
-            </listitem>
-
-            <listitem>
-              <para><classname>convolutionOperator</classname></para>
-
-              <para>Performs convolution of an image with a given
-              kernel.</para>
-            </listitem>
-
-            <listitem>
-              <para><classname>nfftOperator</classname></para>
-
-              <para>Implements the non-Cartesian Fast Fourier Transform</para>
-            </listitem>
-
-            <listitem>
-              <para><classname>senseOperator</classname></para>
-
-              <para>Implements the encoding operator for the parallel MRI
-              imaging technique Sense. Comes in two flavours for 1) Cartesian
-              and 2) non-Cartesian reconstruction.</para>
-            </listitem>
-
-            <listitem>
-              <para><classname>multiplicationOperatorContainer</classname></para>
-
-              <para>An operator can often be considered the result of
-              multiplicative concatenation of a sequence of simpler linear
-              operators. The
-              <classname>multiplicationOperatorContainer</classname> defines a
-              convenient interface to ease the construction of such
-              concatenations.</para>
-            </listitem>
-
-            <listitem>
-              <para><classname>encodingOperatorContainer</classname></para>
-
-              <para>As we require exactly one encoding operator (but allow
-              multiple regularization operators) to be added to our solvers
-              (see <xref linkend="sect.linear_solvers"/> below), this operator
-              acts as a container when multiple encoding operators are
-              desired. For example: The cost function right below (<xref
-              linkend="sect.cg_solver"/>) has two terms in its general form.
-              Most often the vector <emphasis role="bold">p</emphasis> is
-              <emphasis role="bold">0</emphasis> and consequently the operator
-              <emphasis role="bold">R</emphasis> is considered a
-              regularization operator while the operator <emphasis
-              role="bold">E</emphasis> the single encoding operator. However,
-              if <emphasis role="bold">p</emphasis> is non-zero, both
-              <emphasis role="bold">E</emphasis> and <emphasis
-              role="bold">R</emphasis> must be added to an
-              <classname>encodingOperatorContainer</classname> that takes in
-              both <emphasis role="bold">m</emphasis> and <emphasis
-              role="bold">p</emphasis> during multiplication. A single
-              <classname>encodingOperatorContainer</classname> is then added
-              to the solver.</para>
-            </listitem>
-          </itemizedlist>
-        </sect3>
-      </sect2>
-
-      <sect2 xml:id="sect.linear_solvers">
-        <title>Linear Solvers</title>
-
-        <para>The Gadgetron's solvers toolbox contains both a generic
-        conjugate gradient solver to solve linear least squares reconstruction
-        problems (see <xref linkend="sect.linear_solvers"/> ) and a two
-        flavors of a Split Bregman solver for non-linear problems using
-        l1-norms for regularization (see <xref
-        linkend="sect.nonlinear_solvers"/>). More solvers can be expected in
-        upcoming releases.</para>
-
-        <sect3 xml:id="sect.cg_solver">
-          <title>Conjugate Gradient Method for Linear Least Squares</title>
-
-          <para>The conjugate gradient solver is used to reconstruct an image
-          posed as a minimizer to an l2-based optimization problem:</para>
-
-          <informalequation>
-            <mediaobject>
-              <imageobject role="html">
-                <imagedata fileref="figs/math/lls.jpg" format="JPEG"
-                           width="3in"/>
-              </imageobject>
-
-              <imageobject role="fo">
-                <imagedata fileref="figs/math/lls.jpg" format="JPEG"
-                           width="3in"/>
-              </imageobject>
-            </mediaobject>
-          </informalequation>
-
-          <para>The unknown image to be reconstructed is denoted here by
-          <emphasis role="bold">u</emphasis> and the measured data by
-          <emphasis role="bold">m</emphasis>. <emphasis
-          role="bold">E</emphasis> is a linear operator modelling the encoding
-          of the imaging modality (e.g. a Fourier transform for MRI, a Radon
-          transform for CT etc.). <emphasis role="bold">R</emphasis> is a
-          regularization operator often required to ensure uniqueness of the
-          solution. Lambda is a scalar weight (with a default value of one)
-          associated to each matrix operator and used to balance the various
-          terms in the cost function. Finally <emphasis
-          role="bold">p</emphasis> denotes some (possibly blank) prior image
-          in the regularization term. Any number of terms can be added.</para>
-
-          <para>The closed form solution to the optimization problem is given
-          by the linear system of equations:</para>
-
-          <informalequation>
-            <mediaobject>
-              <imageobject role="html">
-                <imagedata fileref="figs/math/lls_form.jpg" format="JPEG"
-                           width="3in"/>
-              </imageobject>
-
-              <imageobject role="fo">
-                <imagedata fileref="figs/math/lls_form.jpg" format="JPEG"
-                           width="3in"/>
-              </imageobject>
-            </mediaobject>
-          </informalequation>
-
-          <para>Put extremely short; you set up and run a solver by 1) adding
-          the corresponding linear operators to the solver, and 2) invoking
-          the <function>solve</function> function in the solver providing
-          <emphasis role="bold">m</emphasis> (and <emphasis
-          role="bold">p</emphasis> if non-zero) as input arguments.</para>
-
-          <para>An abbreviated version of the interface to the conjugate
-          gradient solver is shown here</para>
-
-          <para><programlisting>// Defined in solver.h
-
-template <class ARRAY_TYPE_IN, class ARRAY_TYPE_OUT> 
-class solver
-{
-public:
-
-  // Constructor/destructor
-  //
-
-  solver() { output_mode_ = OUTPUT_SILENT; }
-  virtual ~solver() {}
-  
-  // Output modes
-  //
-
-  enum solverOutputModes { OUTPUT_SILENT = 0, 
-                           OUTPUT_WARNINGS = 1, 
-                           OUTPUT_VERBOSE = 2, 
-                           OUTPUT_MAX = 3 };
-  
-  // Set/get output mode
-  //
-
-  virtual int get_output_mode() { return output_mode_; }
-
-  virtual void set_output_mode( int output_mode ) {
-      output_mode_ = output_mode;
-  }
-  
-  // Set/get starting solution/estimate for solver
-  //
-
-  virtual void set_x0( boost::shared_ptr<ARRAY_TYPE_OUT> x0 )
-    { x0_ = x0; }
-
-  virtual boost::shared_ptr<ARRAY_TYPE_OUT> get_x0()
-    { return x0_; }
-
-  // Default error output
-  //
-
-  virtual void solver_error( std::string msg ) { ... }
-
-  // Default warning output
-  //
-
-  virtual void solver_warning( std::string msg ) { ... }
-
-  // Invoke solver
-  //
-
-  virtual boost::shared_ptr<ARRAY_TYPE_OUT> solve
-    ( ARRAY_TYPE_IN* ) = 0;
- 
-protected:
-  int output_mode_;
-  boost::shared_ptr<ARRAY_TYPE_OUT> x0_;
-};</programlisting></para>
-
-          <para>The abstract <classname>cgSolver</classname> class:</para>
-
-          <para><programlisting>// Defined in cgSolver.h
-
-template <class REAL, 
-          class ELEMENT_TYPE, 
-          class ARRAY_TYPE> 
-class cgSolver : public linearSolver
-  <REAL, ELEMENT_TYPE, ARRAY_TYPE>
-{
-public:
-
-  // Class defining the termination criterium
-  //
-
-  friend class cgTerminationCallback
-    <REAL, ELEMENT_TYPE, ARRAY_TYPE>;
-
-  // Constructor / destructor
-  //
-
-  cgSolver() : linearSolver<REAL, ELEMENT_TYPE, ARRAY_TYPE>() 
-   {...}
- 
-  virtual ~cgSolver() {}
-
-  // Set preconditioner
-  //
-
-  virtual void set_preconditioner( 
-    boost::shared_ptr< cgPreconditioner<ARRAY_TYPE> > precond ) {
-      precond_ = precond;
-  }
-  
-  // Set termination callback
-  //
-
-  virtual void set_termination_callback(
-    boost::shared_ptr< cgTerminationCallback
-      <REAL, ELEMENT_TYPE, ARRAY_TYPE> > cb ){
-      cb_ = cb;
-  }
-
-  // Set/get maximally allowed number of iterations
-  //
-
-  virtual void set_max_iterations( unsigned int iterations ) { 
-    iterations_ = iterations; }
-
-  virtual unsigned int get_max_iterations() { return iterations_; }  
-
-  // Set/get tolerance threshold for termination criterium
-  //
-
-  virtual void set_tc_tolerance( REAL tolerance ) 
-    { tc_tolerance_ = tolerance; }
-
-  virtual REAL get_tc_tolerance() { return tc_tolerance_; }
-  
-  // Pre/post solver callbacks
-  //
-
-  virtual bool pre_solve( ARRAY_TYPE** ) { return true; }
-  virtual bool post_solve( boost::shared_ptr<ARRAY_TYPE>& ) 
-    { return true; }
-
-  // Pure virtual functions defining core solver functionality
-  // Implemented on the host/device respectively in a derived class
-  //
-
-  virtual ELEMENT_TYPE solver_dot( ARRAY_TYPE*, ARRAY_TYPE* ) = 0;
-  virtual bool solver_clear( ARRAY_TYPE* ) = 0;
-  virtual bool solver_scal( ELEMENT_TYPE, ARRAY_TYPE* ) = 0;
-  virtual bool solver_dump( ARRAY_TYPE* ) { return true; }
-  virtual bool solver_axpy
-    ( ELEMENT_TYPE, ARRAY_TYPE*, ARRAY_TYPE* ) = 0;
-
-  //
-  // Main solver interfaces
-  //
-
-  virtual boost::shared_ptr<ARRAY_TYPE> solve( ARRAY_TYPE *_d ) 
-    { ... }
-
-  virtual boost::shared_ptr<ARRAY_TYPE> solve_from_rhs
-    ( ARRAY_TYPE *_rhs ) { ... }
-
-  ...
-};
-</programlisting><parameter>The Cuda specific
-          implementation:</parameter><programlisting>// Defined in cuCGSolver.h
-
-template <class REAL, class T> class cuCgSolver 
-  : public cgSolver< REAL, T, cuNDArray<T> >
-{
-public:
-
-  cuCgSolver() : cgSolver< REAL, T, cuNDArray<T> >() { ... }
-  virtual ~cuCgSolver() {}
-
-  cuCGSolver() : cgSolver< REAL, T, cuNDArray<T> >() {}
-  virtual ~cuCGSolver() {}
-
-  virtual bool pre_solve(cuNDArray<T>**)
-   { ... }
-
-  virtual bool post_solve(cuNDArray<T>**)
-   { ... }
-
-  virtual void solver_error( std::string err )
-   { ... }
-
-  virtual T solver_dot( cuNDArray<T> *x, 
-   cuNDArray<T> *y ){ ... }
-
-  virtual bool solver_clear( cuNDArray<T> *x )
-   { ... }
-
-  virtual bool solver_scal( T a, 
-   cuNDArray<T> *x ){ ... }
-
-  virtual bool solver_axpy( T a, cuNDArray<T> *x, 
-   cuNDArray<T> *y ){ ... }
-
-  ...
-};</programlisting>The overall inheritance hierarchy is modelled and
-          implemented similarly to the <classname>linearOperator</classname>
-          class hierarchy described above (see <xref
-          linkend="sect.matrix_operators"/>). To use the solver the user
-          creates an instance of the solver for either the host or device
-          (e.g. the <classname>cuCGSolver</classname> above for a GPU-based
-          solver). The solver is configured using the functions in the
-          <classname>cgSolver</classname> base class. The core solve function
-          itself is found in the root of the hierarchy; the
-          <classname>solver</classname>.</para>
-
-          <para>Note that any number of terms (linear operators) can be added
-          to the solver (or cost function).</para>
-
-          <para>The following code listing provides a short example of how to
-          define a conjugate gradient solver for GPU-based image deblurring
-          given an image and an estimate of the point spread function that
-          degraded the image. It uses the
-          <classname>convolutionOperator</classname> to model the blurring and
-          a <classname>partialDerivativeOperator</classname> in each spatial
-          dimension for regularization. The full code can be found in
-          <envar>$(GADGETRON_SOURCE)</envar><filename>/apps/standalone/gpu/deblurring/2d/deblur_2d_cg.cpp</filename>.<programlisting>{
-  << Code that parses the command line 
-     and loads the image and kernel from disk >>
-
-  // Define the desired precision
-  typedef float _real; 
-  typedef complext<_real>::Type _complext;
-
-  // Upload host data to device
-  cuNDArray<_complext> data(host_data.get());
-  cuNDArray<_complext> kernel(host_kernel.get());
-    
-  // Setup regularization operators
-
-  boost::shared_ptr
-    < cuPartialDerivativeOperator<_real,_complext,2> > 
-      Rx( new cuPartialDerivativeOperator<_real,_complext,2>(0) ); 
-
-  boost::shared_ptr
-    < cuPartialDerivativeOperator<_real,_complext,2> > 
-      Ry( new cuPartialDerivativeOperator<_real,_complext,2>(1) ); 
-
-  Rx->set_weight( lambda );
-  Ry->set_weight( lambda );
-     
-  //
-  // Setup conjugate gradients solver
-  //
-
-  // Define encoding matrix
-  boost::shared_ptr< cuConvolutionOperator<_real,2> > 
-    E( new cuConvolutionOperator<_real,2>() );
-
-  E->set_kernel( &kernel );
-  E->set_domain_dimensions(data.get_dimensions().get());
-    
-  // Setup conjugate gradient solver
-  cuCGSolver<_real, _complext> cg;
-
-  // encoding matrix
-  cg.set_encoding_operator( E );
-
-  // regularization matrix                   
-  if( kappa>0.0 ) cg.add_reuglarization_operator( Rx );
-  
-  // regularization matrix
-  if( kappa>0.0 ) cg.add_regularization_operator( Ry ); 
-
-  cg.set_max_iterations( num_iterations );
-  cg.set_tc_tolerance( 1e-8 );
-  cg.set_output_mode( cuCGSolver<_real, _complext>::OUTPUT_VERBOSE );
-                
-  //
-  // Conjugate gradient solver
-  //
-  
-  boost::shared_ptr< cuNDArray<_complext> > 
-    cgresult = cg.solve(&data);
-
-  // All done, write out the result
-  
-  boost::shared_ptr< hoNDArray<_complext> > 
-    host_result = cgresult->to_host();
-  
-  write_nd_array<_complext>(host_result.get(), 
-    (char*)parms.get_parameter('r')->get_string_value());
-}</programlisting></para>
-
-          <para>For an overview of the various standalone applications the
-          Gadgetron provides - and instruction on how to run them - we refer
-          to <xref linkend="sect.standalone_applications"/>.</para>
-        </sect3>
-      </sect2>
-
-      <sect2 xml:id="sect.nonlinear_solvers">
-        <title>Non-linear Solvers</title>
-
-        <sect3>
-          <title>Split Bregman Solver for L1-regularized Problems</title>
-
-          <para>The Gadgetron includes two Split Bregman solvers to solve
-          respectively</para>
-
-          <informalequation>
-            <mediaobject>
-              <imageobject role="html">
-                <imagedata fileref="figs/math/sb.jpg" format="JPEG"
-                           width="3in"/>
-              </imageobject>
-
-              <imageobject role="fo">
-                <imagedata fileref="figs/math/sb.jpg" format="JPEG"
-                           width="3in"/>
-              </imageobject>
-            </mediaobject>
-          </informalequation>
-
-          <para>where |.|<subscript>TV</subscript> denotes the Total Variation
-          norm. The solver to the upper (unconstraint) optimization problem is
-          defined in <filename>sbSolver.h</filename> while the solver to the
-          latter constraint problem declared in
-          <filename>sbcSolver.h</filename>. The Split Bregman solver was
-          chosen as it integrates nicely with the linear conjugate solver
-          described above (<xref linkend="sect.linear_solvers"/>). In fact,
-          most of the work in the two Split Bregman solvers is performed by a
-          linear inner solver (e.g. a conjugate gradient solver), but the
-          input (right hand side) to the inner solver varies from iteration to
-          iteration.</para>
-
-          <para>The interface to the unconstraint Split Bregman solver is
-          given here. We have seen the overall inheritance hierarchy several
-          times already, so it should suffice to provide only very abbreviated
-          headers here:<programlisting>// Defined in sbSolver.h
-
-
-template< class REAL, 
-          class ELEMENT_TYPE, 
-          class ARRAY_TYPE_REAL, 
-          class ARRAY_TYPE_ELEMENT, 
-          class INNER_SOLVER,
-          class OPERATOR_CONTAINER > class sbSolver 
-
- : public linearSolver<REAL, ELEMENT_TYPE, ARRAY_TYPE_ELEMENT>
-{
-public:
-
-  // Constructor
-  //
-
-  sbSolver() : linearSolver<REAL, ELEMENT_TYPE, ARRAY_TYPE_ELEMENT>() 
-   { ... }
-  
-  // Destructor
-  //
-
-  virtual ~sbSolver() {}
-   
-
-  // Add regularization group operator 
-  // (isotropic, multiple operators per group allowed)
-  //
-
-  virtual bool add_regularization_group_operator( 
-    boost::shared_ptr< linearOperator<REAL, ARRAY_TYPE_ELEMENT> > op ) 
-  { ... }
-
-  // Add isotroic regularization group (multiple groups allowed)
-  //
-
-  virtual bool add_group() { ... }
-
-  // Get regularization group operator
-  //
- 
-  < omitted for brevity>
-  
-  // Set/get prior image (PICCS style). 
-  // I.e. for every regularization operator (group) 
-  // R that is added we minimize:
-  // alpha|R(x-prior)|_{l1} + (1-alpha)|R(x)|_{l1}
-  //
-
-  virtual bool set_prior_image( 
-    boost::shared_ptr<ARRAY_TYPE_ELEMENT> prior, REAL alpha )
-  { ... }
- 
-  // Get the prior image and corresponding weighing factor
-  //
-  
-  virtual boost::shared_ptr<ARRAY_TYPE_ELEMENT> get_prior_image() 
-    { return prior_; }
-
-  virtual REAL get_prior_alpha() { return alpha_; }
-
-
-  // Set termination criterium tolerance
-  //
-
-  virtual void set_tc_tolerance( REAL tolerance ) 
-  { ... }
-
-  // Set/get maximum number of outer Split-Bregman iterations
-  //
-
-  virtual void set_max_outer_iterations( 
-    unsigned int iterations ) { outer_iterations_ = iterations; }
- 
-  virtual unsigned int get_max_outer_iterations() { 
-    return outer_iterations_; }
-
-  // Set/get maximum number of inner Split-Bregman iterations
-  //
-
-  virtual void set_max_inner_iterations( 
-    unsigned int iterations ) { inner_iterations_ = iterations; }
-
-  virtual unsigned int get_max_inner_iterations() 
-   { return inner_iterations_; }
-
-  // Get the inner solver
-  //
-
-  virtual boost::shared_ptr<INNER_SOLVER> get_inner_solver() 
-   { return inner_solver_; }
-  
-
-  // Core solver functionality to be implemented
-  // in a derived class (host/device specific implementations)
-  //
-
-  virtual bool solver_clear_real( ARRAY_TYPE_REAL* ) = 0;
-
-  virtual bool solver_clear_element( ARRAY_TYPE_ELEMENT* ) = 0;
-
-  virtual bool solver_sqrt( ARRAY_TYPE_REAL* ) = 0;
-
-  virtual bool solver_scal( ELEMENT_TYPE, 
-                            ARRAY_TYPE_ELEMENT* ) = 0;
-
-  virtual bool solver_axpy_real( REAL, ARRAY_TYPE_REAL*, 
-                                 ARRAY_TYPE_REAL* ) = 0;
-
-  virtual bool solver_axpy_element( ELEMENT_TYPE, 
-                                    ARRAY_TYPE_ELEMENT*, 
-                                    ARRAY_TYPE_ELEMENT* ) = 0;
-
-  virtual REAL solver_asum( ARRAY_TYPE_ELEMENT* ) = 0;
-
-  virtual boost::shared_ptr<ARRAY_TYPE_REAL> solver_abs
-    ( ARRAY_TYPE_ELEMENT* ) = 0;
-
-  virtual boost::shared_ptr<ARRAY_TYPE_REAL> solver_norm
-    ( ARRAY_TYPE_ELEMENT* ) = 0;
-
-  virtual bool solver_shrink1( REAL, ARRAY_TYPE_ELEMENT*, 
-                               ARRAY_TYPE_ELEMENT* ) = 0;
-
-  virtual bool solver_shrinkd( REAL, ARRAY_TYPE_REAL*, 
-                               ARRAY_TYPE_ELEMENT*, 
-                               ARRAY_TYPE_ELEMENT* ) = 0;
-
-  //
-  // Main solver interface
-  //
-
-  virtual boost::shared_ptr<ARRAY_TYPE_ELEMENT> solve
-   ( ARRAY_TYPE_ELEMENT *f ) { ... }
-
- ...
-};
-</programlisting><programlisting>// Defined in cuSbCgSolver.h
-
-template <class REAL, class T> class cuSbCgSolver 
-  : public sbSolver< REAL, T, cuNDArray<REAL>, 
-                     cuNDArray<T>, cuCgSolver<REAL,T>, 
-                     cuEncodingOperatorContainer<REAL,T> >
-{
-public:
-  
-  cuSbCgSolver() : sbSolver< REAL, T, cuNDArray<REAL>, 
-                             cuNDArray<T>, cuCgSolver<REAL,T>, 
-                             cuEncodingOperatorContainer<REAL,T> >() 
-  { ... }
-  
-  virtual ~cuSbCgSolver() {}
-
-  // Implementation of pure virtual functions
-  ...
-};</programlisting></para>
-
-          <para>To run the algorithm on the GPU the user would create an
-          instance of a <classname>cuSbCgSolver</classname> providing the two
-          template arguments; the desired precision and data type. Prior to
-          running the <function>solve</function> function with the measured
-          data <emphasis role="bold">m</emphasis>, the user should provide 1)
-          the encoding operator, 2) the regularization operators, and 3) the
-          desired domain and codomain dimensions as these cannot in general be
-          deduced from the measured data.</para>
-
-          <para>We outline the code required to set up the solver for TV-based
-          image denoising. The full code can be found in
-          <envar>$(GADGETRON_SOURCE)</envar><filename>/apps/standalone/gpu/denoising/2d/denoise_TV.cpp</filename>.<programlisting>{
-  << Command line parsing and data loading >>
-  
-  //
-  // Setup regularization operators
-  // 
-
-  boost::shared_ptr< cuPartialDerivativeOperator<_real,_real,2> > 
-    Rx( new cuPartialDerivativeOperator<_real,_real,2>(0) ); 
-
-  boost::shared_ptr< cuPartialDerivativeOperator<_real,_real,2> > 
-    Ry( new cuPartialDerivativeOperator<_real,_real,2>(1) ); 
-
-  Rx->set_weight( lambda );
-  Rx->set_domain_dimensions(data.get_dimensions().get());
-  Rx->set_codomain_dimensions(data.get_dimensions().get());
-
-  Ry->set_weight( lambda );
-  Ry->set_domain_dimensions(data.get_dimensions().get());
-  Ry->set_codomain_dimensions(data.get_dimensions().get());
-
-  // Define encoding operator (identity)
-  boost::shared_ptr< cuIdentityOperator<_real,_real> > 
-    E( new cuIdentityOperator<_real,_real>() );
-
-  E->set_weight( mu );
-  E->set_domain_dimensions(data.get_dimensions().get());
-  E->set_codomain_dimensions(data.get_dimensions().get());
-
-  // Setup split-Bregman solver
-  //
-
-  cuSbCgSolver<_real,_real> sb;
-
-  sb.set_encoding_operator( E );
-
-  sb.add_regularization_group_operator( Rx );
-  sb.add_regularization_group_operator( Ry);
-  sb.add_group();
-
-  sb.set_max_outer_iterations(num_outer_iterations);
-  sb.set_max_inner_iterations(num_inner_iterations);
-
-  sb.set_output_mode( cuCgSolver<_real,_real>::OUTPUT_VERBOSE );
-  
-  // Setup inner conjugate gradient solver
-  //
-
-  sb.get_inner_solver()->set_max_iterations
-   ( num_cg_iterations );
-  sb.get_inner_solver()->set_tc_tolerance( 1e-4 );
-  sb.get_inner_solver()->set_output_mode
-   ( cuCgSolver<_real,_real>::OUTPUT_WARNINGS );  
-
-  //
-  // Run split-Bregman solver
-  //
-
-  boost::shared_ptr< cuNDArray<_real> > 
-   sbresult = sb.solve(&data);
-
-  << do something with the result >>
-}</programlisting></para>
-
-          <para>The constrained Split Bregman solver inherits from the
-          unconstaint Split Bregman solver is thus defined with an identical
-          interface.</para>
-        </sect3>
-      </sect2>
-    </sect1>
-
-    <sect1>
-      <title>Gadgetron Gadgets</title>
-
-      <para>Gadgets wrap the functionality of the toolboxes and provide
-      generic building blocks for configuring the streaming reconstruction in
-      the Gadgetron.</para>
-
-      <sect2 xml:id="sect.mrigadgets">
-        <title>MRI Gadgets</title>
-
-        <para>One of the original motivations for creating the Gadgetron was
-        to make a high throughput MRI reconstruction engine that could be
-        interfaced to different MRI vendor systems. Consequently, a lot of the
-        functionality present in the initial release toolboxes and Gadgets is
-        focused on MRI reconstruction. In this section we review the basic
-        data structures used to describe MRI data and list some of the MRI
-        Gadgets that are available. These Gadgets are used in several of the
-        example applications in <xref
-        linkend="sect.exampleapplications"/>.</para>
-
-        <sect3 xml:id="sect.mridatastructures">
-          <title>MRI Data Structures</title>
-
-          <para>MRI data is processed in two different phases. In the first
-          phase individual data (k-space) acquisitions are processed while in
-          the second phase these acquisitions have been combined into images
-          (which may still be in k-space). Correspondingly, there are two
-          different types of Gadgets that dominate the MRI Gadgets; those who
-          operate on individual acquisitions and those who operate on images.
-          Naturally, there are also transitional Gadgets that operate on
-          acquisitions but output images.</para>
-
-          <para>The data header structures used by these MRI Gadgets are
-          defined by the ISMRM Raw Data format (<uri
-          xlink:href="http://ismrmrd.sourceforge.net">http://ismrmrd.sourceforge.net</uri>).</para>
-
-          <para>Most MRI Gadgets inherit from <classname>Gadget2</classname>
-          as described in <xref linkend="sect.gadgets"/>, i.e. they operate on
-          two argument types, the main two base classes used are:</para>
-
-          <programlisting>Gadget2< ISMRMRD::AcquisitionHeader, hoNDArray< std::complex<float> > >
-Gadget2< ISMRMRD::ImageHeader, hoNDArray< std::complex<float> > ></programlisting>
-
-          <para>As seen, they take a data array (which is typically of complex
-          float type) and a header describing either the acquisition or the
-          image. These headers are defined in <filename>ismrmrd.h</filename>
-          (from the ISMRM Raw Data format). The definition of
-          <classname>ISMRMRD::AcquisitionHeader</classname> looks like
-          (abbreviated):</para>
-
-          <programlisting>struct EncodingCounters {
- uint16_t kspace_encode_step_1; 
- uint16_t kspace_encode_step_2; 
- uint16_t average;              
- uint16_t slice;                
- uint16_t contrast;             
- uint16_t phase;                
- uint16_t repetition;           
- uint16_t set;                  
- uint16_t segment;              
- uint16_t user[8];              
-};
-
-struct AcquisitionHeader
-{
- uint16_t           version;                        
- uint64_t           flags;                          
- uint32_t           measurement_uid;                
- uint32_t           scan_counter;                   
- uint32_t           acquisition_time_stamp;         
- uint32_t           physiology_time_stamp[3];       
- uint16_t           number_of_samples;              
- uint16_t           available_channels;             
- uint16_t           active_channels;                
- uint64_t           channel_mask[16];               
- uint16_t           discard_pre;                    
- uint16_t           discard_post;                   
- uint16_t           center_sample;                  
- uint16_t           encoding_space_ref;             
- uint16_t           trajectory_dimensions;          
- float              sample_time_us;                 
- float              position[3];                    
- float              read_dir[3];                    
- float              phase_dir[3];                    
- float              slice_dir[3];                    
- float              patient_table_position[3];      
- EncodingCounters   idx;                            
- int32_t            user_int[8];                    
- float              user_float[8];                 
-};</programlisting>
-
-          <para>It is a simple struct, which mainly serves the purpose of
-          keeping track of a) the encoding properties of a given acquisition
-          (phase ending number, etc.) and b) the spatial position and
-          orientation that the data was acquired from. Different MRI systems
-          have different conventions for how to label data, but in most cases
-          one would be able to convert to this format.</para>
-
-          <para>The <classname>ISMRMRD::ImageHeader</classname> data structure
-          is also just a struct for keeping track of image labels, position,
-          and orientation:</para>
-
-          <programlisting>struct ImageHeader
-{
-uint16_t            version;                        
- uint64_t            flags;                         
- uint32_t            measurement_uid;               
- uint16_t            matrix_size[3];                
- float               field_of_view[3];              
- uint16_t            channels;                      
- float               position[3];                   
- float               read_dir[3];                    
- float               phase_dir[3];                    
- float               slice_dir[3];                    
- float               patient_table_position[3];     
- uint16_t            average;                       
- uint16_t            slice;                         
- uint16_t            contrast;                      
- uint16_t            phase;                         
- uint16_t            repetition;                    
- uint16_t            set;                           
- uint32_t            acquisition_time_stamp;        
- uint32_t            physiology_time_stamp[3];      
- uint16_t            image_data_type;               
- uint16_t            image_type;                    
- uint16_t            image_index;  
- uint16_t            image_series_index;
- int32_t             user_int[8];       
- float               user_float[8];     
-};</programlisting>
-
-          <para/>
-        </sect3>
-
-        <sect3>
-          <title>List of available MRI Gadgets</title>
-
-          <para>This section contains a non-exhaustive list of available MRI
-          Gadgets with a few brief comments on their function. The purpose is
-          to make it easier to read the XML configuration files provided with
-          the Gadgetron and to give some ideas of what modules can be reused
-          in new reconstruction programs.</para>
-
-          <itemizedlist>
-            <listitem>
-              <para><classname>AccumulatorGadget</classname>
-              (<filename>gadgetroncore</filename>):</para>
-
-              <para>Simple Gadget for accumulating k-space profiles in an
-              array and passing it on to next Gadget. Used for simple
-              Cartesian FT MRI reconstructions.</para>
-            </listitem>
-
-            <listitem>
-              <para><classname>AutoScaleGadget</classname>
-              (<filename>gadgetroncore</filename>):</para>
-
-              <para>Does simple histogram analysis of floating point images
-              passing through and scales them. This is typically used upstream
-              of conversion from floating point to unsigned short
-              images.</para>
-            </listitem>
-
-            <listitem>
-              <para><classname>CoilReductionGadget</classname>
-              (<filename>gadgetroncore</filename>):</para>
-
-              <para>Used to reduce the number of coils in a dataset. Typically
-              used to tune the performance of a given reconstruction by
-              eliminating data. This Gadget is commonly used in conjunction
-              with the <classname>PCACoilGadget</classname> which generates
-              virtual coils based on principal component analysis. The coil
-              reduction can be specified with either a mask or the number of
-              target coils as illustrated below</para>
-
-              <programlisting><gadget>
- <name>CoilReduction</name>
- <dll>gadgetroncore</dll>
- <class>CoilReductionGadget</class>
- <!-- Keep a max of 16 coils -->
- <property><name>coils_out</name><value>16</value></property>
-</gadget>
-
-<gadget>
- <name>CoilReduction</name>
- <dll>gadgetroncore</dll>
- <class>CoilReductionGadget</class>
- <!-- Keep only coil 2,3,4,5 and discard the rest-->
- <property>
-  <name>coil_mask</name>
-  <value>0 1 1 1 0 0 0 0</value>
- </property>
-</gadget>
-</programlisting>
-            </listitem>
-
-            <listitem>
-              <para><classname>CropAndCombineGadget</classname>
-              (<filename>gadgetroncore</filename>):</para>
-
-              <para>This Gadget is used to do a simple RMS coil combination in
-              the image domain and remove 2x oversampling in the first
-              dimension of the image as is commonly used in MRI. This Gadget
-              is intended to be used after FFT of the data.</para>
-            </listitem>
-
-            <listitem>
-              <para><classname>ExtractGadget</classname>
-              (<filename>gadgetroncore</filename>):</para>
-
-              <para>This Gadget is used to extract a given component
-              (magnitude, real, imaginary, phase) from complex images, i.e. it
-              converts complex images to real images containing specific
-              components. The Gadget can be used to extract multiple
-              components using a mask. The bit fields used to define the
-              components are defined as:</para>
-
-              <programlisting>#define GADGET_EXTRACT_MAGNITUDE              (1 << 0) //1
-#define GADGET_EXTRACT_REAL                   (1 << 1) //2
-#define GADGET_EXTRACT_IMAG                   (1 << 2) //4
-#define GADGET_EXTRACT_PHASE                  (1 << 3) //8
-</programlisting>
-
-              <para>To specify the components, you just specify the mask, for
-              example, the following specification would extract magnitude (1)
-              and phase (8):</para>
-
-              <programlisting><gadget>
- <name>Extract</name>
- <dll>gadgetroncore</dll>
- <class>ExtractGadget</class>
- <property><name>extract_mask</name><value>9</value></property>
-</gadget>
-</programlisting>
-
-              <para>Default behavior is to extract magnitude.</para>
-            </listitem>
-
-            <listitem>
-              <para><classname>FFTGadget</classname>
-              (<filename>gadgetroncore</filename>):</para>
-
-              <para>This Gadget Fourier transforms along the first 3
-              dimensions of the dataset (frequency, phase, partition encoding
-              directions) and passes on the data to the next Gadget.</para>
-            </listitem>
-
-            <listitem>
-              <para><classname>FloatToUShortGadget</classname>
-              (<filename>gadgetroncore</filename>):</para>
-
-              <para>Converts floating point images to unsigned short images.
-              This Gadget would often be used in conjunction with a scaling
-              step (e.g. <classname>AutoScaleGadget</classname>) upstream to
-              ensure that the values will not get clipped or overflow during
-              the conversion to unsigned short. This Gadget does not make any
-              attempt to scale the data, it is assumed to be scaled upon
-              entry.</para>
-            </listitem>
-
-            <listitem>
-              <para><classname>GPUCGGoldenRadial</classname>,
-              <classname>GPUCGFixedRadial</classname>
-              (<filename>gadgetroncgsense</filename>):</para>
-
-              <para>These Gadgets perform conjugate gradient based
-              non-Cartesian SENSE reconstruction (<xref
-              linkend="sect.cgsense"/>). The reconstruction behavior can be
-              controlled with number of properties:</para>
-
-              <programlisting><gadget>
- <name>GPUCGRadial0</name>
- <dll>gadgetroncgsense</dll>
- <classname>GPUCGGoldenRadialGadget</classname>
-
- <property>
-  <name>deviceno</name>
-  <value>0</value>
- </property>
- 
- <property>
-  <name>sliceno</name>
-  <value>0</value>
- </property>
- 
- <property>
-  <name>profiles_per_frame</name>
-  <value>32</value>
- </property>
- 
- <property>
-  <name>shared_profiles</name>
-  <value>0</value>
- </property>
-
- <property>
-  <name>number_of_iterations</name>
-  <value>10</value>
- </property>
-
- <property>
-  <name>cg_limit</name>
-  <value>1e-6</value>
- </property>
-
- <property>
-  <name>oversampling</name>
-  <value>1.5</value>
- </property>
-
- <property>
-  <name>kernel_width</name>
-  <value>5.5</value>
- </property>
-
- <property>
-  <name>kappa</name>
-  <value>0.1</value>
- </property>
-
- <property>
-  <name>pass_on_undesired_data</name>
-  <value>true</value>
- </property>
-
-</gadget>
-</programlisting>
-            </listitem>
-
-            <listitem>
-              <para><classname>GrappaGadget</classname>,
-              <classname>GrappaUnmixingGadget</classname>
-              (<filename>gadgetrongrappa</filename>):</para>
-
-              <para>These Gadgets are used together to perform 2D Cartesian
-              parallel imaging on the GPU. The
-              <classname>GrappaGadget</classname> is responsible for
-              calculating GRAPPA coefficients and the
-              <classname>GrappeUnmixingGadget</classname> Fourier transforms
-              the raw data and applies the coefficients. The
-              <classname>GrappaGadget</classname> has the ability to use
-              target channel compression, i.e. it can reconstruct using fewer
-              target channels than input channels to improve performance. See
-              <xref linkend="sect.grappa"/> for details. The target channel
-              compression is specificied like this:</para>
-
-              <programlisting><gadget>
- <name>Grappa</name>
- <dll>gadgetrongrappa</dll>
- <class>GrappaGadget</class>
- <property><name>target_coils</name><value>8</value></property>
-</gadget>
-</programlisting>
-            </listitem>
-
-            <listitem>
-              <para><classname>ImageFinishGadgetSHORT</classname>,
-              <classname>ImageFinishFLOAT</classname>,
-              <classname>ImageFinishCPLX</classname>
-              (<filename>gadgetroncore</filename>):</para>
-
-              <para>These 3 Gadgets are all template instances of the same
-              <classname>ImageFinishGadget</classname>. The only different
-              between them is that they operate on different types of image
-              data types as indicated by their names. Their purpose is to
-              return the reconstructed images to the output queue of the
-              Gadgetron so that they can be returned to the client.</para>
-            </listitem>
-
-            <listitem>
-              <para><classname>MRINoiseAdjustGadget</classname>
-              (<filename>gadgetronmricore</filename>):</para>
-
-              <para>The Gadgetron has two noise pre-whitening Gadgets with
-              similar names <classname>MRINoiseAdjustGadget</classname> and
-              <classname>NoiseAdjustGadget</classname>. They both perform the
-              same operation, which is a) to collect noise adjust data when
-              present, calculate the noise decorrelation matrix, and perform
-              noise decorrelation (when the noise adjustment data is
-              available). The difference between the two Gadgets is that
-              <classname>MRINoiseAdjustGadget</classname> uses BLAS and LAPACK
-              routines to perform the operation, which makes it much faster
-              than the <classname>NoiseAdjustGadget</classname>. The latter
-              Gadget is provided to enable reconstruction on systems where
-              those libraries are not available.</para>
-            </listitem>
-
-            <listitem>
-              <para><classname>NoiseAdjustGadget</classname>
-              (<filename>gadgetroncore</filename>):</para>
-
-              <para>See description of
-              <classname>MRINoiseAdjustGadget</classname>.</para>
-            </listitem>
-
-            <listitem>
-              <para><classname>PCACoilGadget</classname>
-              (<filename>gadgetronmricore</filename>):</para>
-
-              <para>This Gadget is used to create virtual channels based on
-              principal component analysis of a portion of the data.
-              Specifically, data is accumulated for the first frame (for each
-              location, i.e. slice) and a principal component analysis is done
-              of this data. Once the PCA coefficients are available, all
-              subsequent data will be transformed into the virtual channel
-              domain and passed on down the Gadget chain. This Gadget is often
-              combined with the
-              <classname>CoilReductionGadget</classname>.</para>
-            </listitem>
-
-            <listitem>
-              <para><classname>RemoveROOversamplingGadget</classname>
-              (<filename>gadgetroncore</filename>):</para>
-
-              <para>Removes the 2x oversampling often used in the readout
-              direction for (Cartesian) MRI.</para>
-            </listitem>
-          </itemizedlist>
-        </sect3>
-      </sect2>
-
-      <sect2>
-        <title>Python Gadgets</title>
-
-        <para>The Gadgetron provides a mechanism to do prototype development
-        in Python. Again, we use MRI as the example application.</para>
-
-        <para>The Python layer is accessed through a set of Python Gadgets
-        that can encapsulate a Python module. This is seen in <xref
-        linkend="fig.pythonoverview"/>, which illustrates a part of a Gadget
-        chain with two Python Gadgets and one C/C++ Gadget. A Gadget chain can
-        have any number of Python Gadgets and Python Gadgets can be mixed with
-        C++ Gadgets.</para>
-
-        <figure xml:id="fig.pythonoverview">
-          <title>Overview of Python Prototyping</title>
-
-          <mediaobject>
-            <imageobject>
-              <imagedata fileref="figs/python.png" format="PNG" width="5in"/>
-            </imageobject>
-          </mediaobject>
-        </figure>
-
-        <para>The Python modules that are encapsulated in the Python Gadgets
-        are expected to have certain characteristics. Specifically, the
-        Gadgets must have at least 3 functions and these functions will be
-        called by the Gadgetron framework at certain specific times:</para>
-
-        <orderedlist>
-          <listitem>
-            <para><emphasis>Gadget reference function</emphasis>. A specific
-            function will be called when the Python Gadget is created. This
-            function is expected to receive a
-            <classname>GadgetReference</classname> which is a class (wrapped
-            in a Python module), which holds a reference to the Gadget, which
-            owns the Python module. The purpose of passing this reference is
-            to allow the Python module to return data to the Gadget when
-            reconstruction outputs are ready. See below for details.</para>
-          </listitem>
-
-          <listitem>
-            <para><emphasis>Configuration function</emphasis>. This function
-            is used to receive the configuration (usually in XML format), when
-            it is passed to the Gadget, i.e. it is the Python equivalent of
-            <function>process_config</function> in the Gadget (see <xref
-            linkend="sect.gadgets"/>).</para>
-          </listitem>
-
-          <listitem>
-            <para><emphasis>Reconstruction function</emphasis>. This function
-            is called when the Gadget receives data, i.e. it is the Python
-            equivalent of the <function>process</function> function in the
-            Gadget (see <xref linkend="sect.gadgets"/>).</para>
-          </listitem>
-        </orderedlist>
-
-        <para>The user can chose the names of these functions freely in the
-        Python module, but the function names must be specified when the
-        Gadget is inserted in the XML configuration:</para>
-
-        <programlisting><gadget>
- <name>AccReconPython</name>
- <dll>gadgetronpython</dll>
- <class>AcquisitionPythonGadget</class>
-
- <property>
-  <name>python_path</name>
-  <value>/home/myuser/scripts/python</value>
- </property>
-
- <property>
-  <name>python_module</name>
-  <value>accumulate_and_recon</value>
- </property>
-
- <property>
-  <name>gadget_reference_function</name>
-  <value>set_gadget_reference</value>
- </property>
-
- <property>
-  <name>input_function</name>
-  <value>recon_function</value>
- </property>
-
- <property>
-  <name>config_function</name>
-  <value>config_function</value>
- </property>
-</gadget>
-
-</programlisting>
-
-        <para>Notice how the 3 function names are specified through the
-        <varname>gadget_reference_function</varname>,
-        <varname>input_function</varname>, and
-        <varname>config_function</varname> parameter names. Also notice that
-        it is possible to specify a <varname>python_path</varname> to let the
-        Python interpreter know where to search for script. By default, the
-        <filename>gadgetron/lib</filename> is added to the search path.
-        Multiple pathnames can be added by separating the paths with
-        "<filename>;</filename>".</para>
-
-        <para>The Python script referenced in the XML configuration above
-        could look like this:</para>
-
-        <programlisting>import numpy as np
-import GadgetronPythonMRI as g
-import kspaceandimage as ki
-import libxml2
-
-myLocalGadgetReference = g.GadgetReference()
-myBuffer = 0
-myParameters = 0
-myCounter = 1;
-mySeries = 1;
-
-def set_gadget_reference(gadref):
-    global myLocalGadgetReference
-    myLocalGadgetReference = gadref
-
-def config_function(conf):
-    global myBuffer
-    global myParameters
-
-    myParameters = dict()
-
-    doc = libxml2.parseDoc(str(conf))
-    context = doc.xpathNewContext()
-    context.xpathRegisterNs("ismrm", "http://www.ismrm.org/ISMRMRD")
-    myParameters["matrix_x"] = int((context.xpathEval("/ismrm:ismrmrdHeader/ismrm:encoding/ismrm:encodedSpace/ismrm:matrixSize/ismrm:x")[0]).content)
-    myParameters["matrix_y"] = int((context.xpathEval("/ismrm:ismrmrdHeader/ismrm:encoding/ismrm:encodedSpace/ismrm:matrixSize/ismrm:y")[0]).content)
-    myParameters["matrix_z"] = int((context.xpathEval("/ismrm:ismrmrdHeader/ismrm:encoding/ismrm:encodedSpace/ismrm:matrixSize/ismrm:z")[0]).content)
-    myParameters["channels"] = int((context.xpathEval("/ismrm:ismrmrdHeader/ismrm:acquisitionSystemInformation/ismrm:receiverChannels")[0]).content)
-    myParameters["slices"] = int((context.xpathEval("/ismrm:ismrmrdHeader/ismrm:encoding/ismrm:encodingLimits/ismrm:slice/ismrm:maximum")[0]).content)+1
-    myParameters["center_line"] = int((context.xpathEval("/ismrm:ismrmrdHeader/ismrm:encoding/ismrm:encodingLimits/ismrm:kspace_encoding_step_1/ismrm:center")[0]).content)
-
-    myBuffer = (np.zeros((myParameters["channels"],myParameters["slices"],myParameters["matrix_z"],myParameters["matrix_y"],(myParameters["matrix_x"]>>1)))).astype('complex64')
-
-def recon_function(acq, data):
-    global myLocalGadgetReference
-    global myBuffer
-    global myParameters
-    global myCounter
-    global mySeries
-
-    line_offset = (myParameters["matrix_y"]>>1)-myParameters["center_line"];
-    myBuffer[:,acq.idx.slice,acq.idx.kspace_encode_step_2,acq.idx.kspace_encode_step_1+line_offset,:] = data
-    
-    if (acq.flags & (1<<7)): #Is this the last scan in slice
-        image = ki.ktoi(myBuffer,(2,3,4))
-        image = image * np.product(image.shape)*100 #Scaling for the scanner
-        #Create a new image header and transfer value
-        img_head = g.ImageHeader()
-        img_head.channels = acq.active_channels
-        img_head.slice = acq.idx.slice
-        g.img_set_matrix_size(img_head, 0, myBuffer.shape[4])
-        g.img_set_matrix_size(img_head, 1, myBuffer.shape[3])
-        g.img_set_matrix_size(img_head, 2, myBuffer.shape[2])
-        g.img_set_position(img_head, 0,g.acq_get_position(acq,0))
-        g.img_set_position(img_head, 1,g.acq_get_position(acq,1))
-        g.img_set_position(img_head, 2,g.acq_get_position(acq,2))
-        g.img_set_read_dir(img_head, 0,g.acq_get_read_dir(acq,0))
-        g.img_set_read_dir(img_head, 1,g.acq_get_read_dir(acq,1))
-        g.img_set_read_dir(img_head, 2,g.acq_get_read_dir(acq,2))
-        g.img_set_phase_dir(img_head, 0,g.acq_get_phase_dir(acq,0))
-        g.img_set_phase_dir(img_head, 1,g.acq_get_phase_dir(acq,1))
-        g.img_set_phase_dir(img_head, 2,g.acq_get_phase_dir(acq,2))
-        g.img_set_slice_dir(img_head, 0,g.acq_get_slice_dir(acq,0))
-        g.img_set_slice_dir(img_head, 1,g.acq_get_slice_dir(acq,1))
-        g.img_set_slice_dir(img_head, 2,g.acq_get_slice_dir(acq,2))
-        g.img_set_patient_table_position(img_head, 0, g.acq_get_patient_table_position(acq,0))
-        g.img_set_patient_table_position(img_head, 1, g.acq_get_patient_table_position(acq,1))
-        g.img_set_patient_table_position(img_head, 2, g.acq_get_patient_table_position(acq,2))
-        img_head.acquisition_time_stamp = acq.acquisition_time_stamp
-        img_head.image_index = myCounter;
-        img_head.image_series_index = mySeries;
-
-        myCounter = myCounter + 1
-        if (myCounter > 5):
-            mySeries = mySeries + 1
-            myCounter = 1
-
-        #Return image to Gadgetron
-        return myLocalGadgetReference.return_image(img_head,image.astype('complex64'))
-
-        #print "Returning to Gadgetron"
-        return 0 #Everything OK
-
-</programlisting>
-
-        <para>There is a lot going on in this script. Let us walk through the
-        different parts and add some explanation. First look at the
-        imports:</para>
-
-        <programlisting>import numpy as np
-import GadgetronPythonMRI as g
-import GadgetronXML
-import kspaceandimage as ki</programlisting>
-
-        <para>All the Python Gadget modules must include
-        <filename>numpy</filename>. The arrays
-        (<classname>NDArray</classname>) are passed to the Python module as
-        <filename>numpy</filename> arrays. The second module
-        <filename>GadgetronPythonMRI</filename> is a Python wrapped version of
-        some of the data structures used in the MRI part of the Gadgetron (see
-        <xref linkend="sect.mrigadgets"/>). Specifically, the
-        <classname>IMRMRD::AcquisitionHeader</classname> and
-        <classname>ISMRMRD::ImageHeader</classname> headers are wrapped as
-        Python types (using Boost Python). The
-        <filename>GadgetronPythonMRI</filename> also contains a wrapped
-        version of the <classname>GadgetReference</classname> class:</para>
-
-        <programlisting>class GadgetReference
-{
-
- public:
-  GadgetReference();
-  ~GadgetReference();
-  
-  int set_gadget(Gadget* g)
-  {
-    gadget_ = g;
-    return 0;
-  }
-
-  template<class T> int return_data(T header, 
-          boost::python::numeric::array arr);
-
-  int return_acquisition(ISMRMRD::AcquisitionHeader acq, 
-          boost::python::numeric::array arr);
-
-  int return_image(ISMRMRD::ImageHeader img, 
-          boost::python::numeric::array arr);
-
- protected:
-  Gadget* gadget_;
-
-};
-</programlisting>
-
-        <para>Using the return functions in this class interface, it is
-        possible for the Python module to return data to the Gadget.
-        <filename>GadgetronXML</filename> is a Python module provided with the
-        Gadgetron, which contains some XML helper functions that can (it is
-        not a requirement) be used to parse the XML parameters that the module
-        will receive from <function>process_config</function>.
-        <filename>kspaceandimage</filename> is also a python module provided
-        with the Gadgetron, it contains some simple wrapper functions for
-        performing Fourier transforms (to and from k-space) of MRI data. The
-        following section contains some initialization of global variables in
-        the Python module;</para>
-
-        <programlisting>myRef = g.GadgetReference()
-myBuffer = 0
-myParameters = 0
-myCounter = 1;
-mySeries = 1;</programlisting>
-
-        <para>As described above, each Python module must contain at least 3
-        functions corresponding to the 3 entry points from the Gadgetron
-        framework. The first one of these functions captures the
-        <classname>GadgetReference</classname>:</para>
-
-        <programlisting>def set_gadget_reference(gadref):
-    global myLocalGadgetReference
-    myLocalGadgetReference = gadref
-</programlisting>
-
-        <para>Using this reference, the Python module will be able to return
-        images (or acquisitions) to the Gadget. The next function
-        (<function>config_function</function> processes the configuration data
-        and finally, the <function>recon_function</function> simply takes the
-        data as it comes it and stores it in a buffer. Based on the
-        <varname>flags</varname> field in the header, it is determined when
-        the last acquisition in each slice has arrived. As this happens the
-        buffer is Fourier transformed, an image header is populated, and the
-        result is returned (via the <classname>GadgetReference</classname>) to
-        the Gadgetron where it will be processed by the next Gadget in the
-        chain.</para>
-
-        <para>The Gadgetron distribution comes with a simple Python-based 2D
-        FT MRI reconstruction. The Gadget chain configuration for this
-        reconstruction can be found in
-        <filename>gadgets/python/python.xml</filename>.</para>
-      </sect2>
-
-      <sect2 xml:id="sect.makingnewgadgetlibrary">
-        <title>Making a new Gadget Library</title>
-
-        <para>The easiest way to get started making a new Gadget library is to
-        follow an example. In this example we create a new Gadget library
-        containing a single Gadget; <classname>ThresholdGadget</classname>.
-        Its purpose is to set all values below a certain fraction of the max
-        value to zero.</para>
-
-        <para>New Gadget libraries can either be created in the Gadgetron
-        source tree, which allows easy access to all the other files in the
-        Gadgetron, or they can be made as external libraries that link against
-        an installed Gadgetron system. In this example we do the latter since
-        this creates a new library that does not "taint" the Gadgetron source
-        tree. It is trivial to move the library inside the Gadgetron source
-        tree at some later point in time if desired. We assume that the
-        Gadgetron is installed on the machine that you are working on. The
-        command line entries, etc. correspond to a Linux console. If you are
-        using Windows you have to adjust a bit.</para>
-
-        <para>Start by creating a new folder for the library:</para>
-
-        <screen><prompt>user at mycomputer:~/temp$</prompt> <userinput>mkdir gadgetron_examplelib</userinput>
-user at mycomputer:~/temp$ <userinput>cd gadgetron_examplelib</userinput></screen>
-
-        <para>We start by creating the class
-        <classname>ThresholdGadget</classname>. Create the following 3 files:
-        <filename>ThresholdGadget.h</filename>,
-        <filename>ThresholdGadget.cpp</filename>,
-        <filename>examplelib_export.h</filename> (the last file is just to
-        help us make sure that things work on Windows) with the following
-        content:</para>
-
-        <programlisting>//ThresholdGadget.h
-
-#ifndef THRESHOLDGADGET_H
-#define THRESHOLDGADGET_H
-
-#include "examplelib_export.h"
-#include "Gadget.h"
-#include "GadgetMRIHeaders.h"
-#include "hoNDArray.h"
-#include <complex>
-
-class EXPORTGADGETSEXAMPLE ThresholdGadget : 
-public Gadget2<ISMRMRD::ImageHeader, hoNDArray< std::complex<float> > >
-{
- public:
-  GADGET_DECLARE(ThresholdGadget)
-
- protected:
-  virtual int process( GadgetContainerMessage< ISMRMRD::ImageHeader>* m1,
-       GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2);
-
-  virtual int process_config(ACE_Message_Block* mb);
-
-  float threshold_level_;
-
-};
-
-#endif //THRESHOLDGADGET_H</programlisting>
-
-        <programlisting>//ThresholdGadget.cpp
-
-#include "ThresholdGadget.h"
-
-int ThresholdGadget::process_config(ACE_Message_Block* mb) 
-{
-  threshold_level_ = get_double_value("level");
-  if (threshold_level_ == 0.0) {
-    threshold_level_ = 1.0;
-  }
-
-  return GADGET_OK;
-}
-
-int ThresholdGadget::process( 
-   GadgetContainerMessage< ISMRMRD::ImageHeader>* m1,
-   GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2)
-{
-
-  std::complex<float>* d = 
-    m2->getObjectPtr()->get_data_ptr();
-
-  unsigned long int elements =  
-    m2->getObjectPtr()->get_number_of_elements();
-
-  //First find max
-  float max = 0.0;
-  for (unsigned long int i = 0; i < elements; i++) {
-    if (abs(d[i]) > max) {
-      max = abs(d[i]);
-    }
-  }
-
-  //Now threshold
-  for (unsigned long int i = 0; i < elements; i++) {
-    if (abs(d[i]) < threshold_level_*max) {
-      d[i] = std::complex<float>(0.0,0.0);
-    }
-  }
-
-  //Now pass on image
-  if (this->next()->putq(m1) < 0) {
-     return GADGET_FAIL;
-  }
-
-  return GADGET_OK;
-}
-
-GADGET_FACTORY_DECLARE(ThresholdGadget)</programlisting>
-
-        <programlisting>//examplelib_export.h
-
-#ifndef EXAMPLE_EXPORT_H_
-#define EXAMPLE_EXPORT_H_
-
-
-#if defined (WIN32)
-#if defined (gadgetronexamplelib_EXPORTS)
-#define EXPORTGADGETSEXAMPLE __declspec(dllexport)
-#else
-#define EXPORTGADGETSEXAMPLE __declspec(dllimport)
-#endif
-#else
-#define EXPORTGADGETSEXAMPLE
-#endif
-
-#endif /* EXAMPLE_EXPORT_H_ */
-</programlisting>
-
-        <para>Now that we have the files for the Gadget we need to set up the
-        build environment. In the folder
-        <filename>gadgetron_examplelib</filename> create a file called
-        <filename>CMakeLists.txt</filename> with the following content:</para>
-
-        <programlisting>cmake_minimum_required(VERSION 2.6)
-
-project(examplelib)
-
-if (WIN32)
-ADD_DEFINITIONS(-DWIN32 -D_WIN32 -D_WINDOWS)
-ADD_DEFINITIONS(-DUNICODE -D_UNICODE)
-SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc")
-SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W3")
-endif (WIN32)
-
-###############################################################
-#Bootstrap search for libraries 
-# (We need to find cmake modules in Gadgetron)
-###############################################################
-find_path(GADGETRON_CMAKE_MODULES FindGadgetron.cmake HINTS
-$ENV{GADGETRON_HOME}/cmake
-/usr/local/gadgetron)
-
-if (NOT GADGETRON_CMAKE_MODULES)
-  MESSAGE(FATAL_ERROR "GADGETRON_CMAKE_MODULES cannot be found. 
-   Try to set GADGETRON_HOME environment variable.")
-endif(NOT GADGETRON_CMAKE_MODULES)
-
-set(CMAKE_MODULE_PATH ${GADGETRON_CMAKE_MODULES})
-###############################################################
-
-find_package(Gadgetron REQUIRED)
-find_package(Boost REQUIRED)
-find_package(ACE REQUIRED)
-
-set(CMAKE_INSTALL_PREFIX ${GADGETRON_HOME})
-
-INCLUDE_DIRECTORIES(${ACE_INCLUDE_DIR} 
-     ${Boost_INCLUDE_DIR}
-     ${GADGETRON_INCLUDE_DIR})
-
-LINK_DIRECTORIES(${GADGETRON_LIB_DIR})
-
-ADD_LIBRARY(gadgetronexamplelib SHARED ThresholdGadget.cpp)
-
-TARGET_LINK_LIBRARIES(gadgetronexamplelib 
-                      hondarray 
-                      optimized ${ACE_LIBRARIES} 
-                      debug ${ACE_DEBUG_LIBRARY})
-
-INSTALL (FILES ThresholdGadget.h
-         examplelib_export.h
-         DESTINATION include)
-
-INSTALL(TARGETS gadgetronexamplelib DESTINATION lib)
-
-INSTALL(FILES threshold.xml DESTINATION config)
-</programlisting>
-
-        <para>The last thing we need is the XML configuration file to use when
-        running our new <classname>ThresholdGadget</classname>. In the same
-        folder create the <filename>threshold.xml</filename> file:</para>
-
-        <programlisting><?xml version="1.0" ?>
-<gadgetronStreamConfiguration 
-  xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
-  xmlns="http://gadgetron.sf.net/gadgetron"
-  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-        
-    <reader>
-      <slot>1008</slot>
-      <dll>gadgetroncore</dll>
-      <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
-    </reader>
-  
-    <writer>
-      <slot>1004</slot>
-      <dll>gadgetroncore</dll>
-      <classname>MRIImageWriterCPLX</classname>
-    </writer>
-    <writer>
-      <slot>1005</slot>
-      <dll>gadgetroncore</dll>
-      <classname>MRIImageWriterFLOAT</classname>
-    </writer>
-    <writer>
-      <slot>1006</slot>
-      <dll>gadgetroncore</dll>
-      <classname>MRIImageWriterUSHORT</classname>
-    </writer>
-  
-    <gadget>
-      <name>Acc</name>
-      <dll>gadgetroncore</dll>
-      <classname>AccumulatorGadget</classname>
-    </gadget>
-    <gadget>
-      <name>FFT</name>
-      <dll>gadgetroncore</dll>
-      <classname>FFTGadget</classname>
-    </gadget>
-
-    <gadget>
-      <name>CropCombine</name>
-      <dll>gadgetroncore</dll>
-      <classname>CropAndCombineGadget</classname>
-    </gadget>
-
-    <!-- This is where we insert our new Gadget -->
-    <gadget>
-      <name>Threshold</name>
-      <dll>gadgetronexamplelib</dll>
-      <classname>ThresholdGadget</classname>
-      <property><name>level</name><value>0.25</value></property>
-    </gadget>
-
-    <gadget>
-      <name>Extract</name>
-      <dll>gadgetroncore</dll>
-      <classname>ExtractGadget</classname>
-    </gadget>  
-    <gadget>
-      <name>ImageFinishFLOAT</name>
-      <dll>gadgetroncore</dll>
-      <classname>ImageFinishGadgetFLOAT</classname>
-    </gadget>
-
-</gadgetronStreamConfiguration>
-</programlisting>
-
-        <para>Check that you have 5 files in your folder:</para>
-
-        <screen><prompt>user at mycomputer:gadgetron_examplelib$</prompt> <userinput>ls</userinput>
-CMakeLists.txt
-ThresholdGadget.cpp
-ThresholdGadget.h
-examplelib_export.h
-threshold.xml
-</screen>
-
-        <para>Next, let us create a <filename>build</filename> directory and
-        compile:</para>
-
-        <screen><prompt>user at mycomputer:gadgetron_examplelib$</prompt> <userinput>mkdir build; cd build</userinput></screen>
-
-        <para>In the <filename>build</filename> folder</para>
-
-        <screen><prompt>user at mycomputer:build$</prompt> <userinput>cmake ../</userinput></screen>
-
-        <para>Assuming the <application>cmake</application> process was
-        successful:</para>
-
-        <screen><prompt>user at mycomputer:build$</prompt> <userinput>make</userinput> 
-Scanning dependencies of target gadgetronexamplelib
-
-[100%] Building CXX object \
-    CMakeFiles/gadgetronexamplelib.dir/ThresholdGadget.cpp.o
-
-Linking CXX shared library libgadgetronexamplelib.dylib
-[100%] Built target gadgetronexamplelib
-
-<prompt>user at mycomputer:build$</prompt> <userinput>make install</userinput>
-[100%] Built target gadgetronexamplelib
-Install the project...
--- Install configuration: ""
--- Up-to-date: /usr/local/gadgetron/include/ThresholdGadget.h
--- Up-to-date: /usr/local/gadgetron/include/examplelib_export.h
--- Installing: /usr/local/gadgetron/lib/libgadgetronexamplelib.so
--- Up-to-date: /usr/local/gadgetron/config/threshold.xml</screen>
-
-        <para>You may have to use <application>sudo</application> for the
-        <command>make install</command> command depending on your
-        setup.</para>
-
-        <para>You should now be able to run a reconstruction using your new
-        reconstruction chain. Follow the instructions in <xref
-        linkend="sect.simpleexample"/> if you have not yet tried to run a
-        simple reconstruction. After having started up the Gadgetron, run the
-        <application>mriclient</application>:</para>
-
-        <screen>user at mycomputer:~/temp/test_data$ <userinput>mriclient \
-    -d gadgetron_testdata.h5 \ 
-    -c threshold.xml</userinput>
-
-Gadgetron MRI Data Sender
-  -- host            :      localhost
-  -- port            :      9002
-  -- hdf5 file  in   :      gadgetron_testdata.h5
-  -- hdf5 group in   :      simple_gre
-  -- conf            :      theshold.xml
-  -- loop            :      1
-  -- hdf5 file out   :      ./out.h5
-  -- hdf5 group out  :      2012-05-11 12:52:14
-(31540|140170355443520) Connection from 127.0.0.1:9002
-31540, 81, GadgetronConnector, Close Message received
-(31540|140170283570944) Handling close...
-(31540|140170283570944) svc done...
-(31540|140170283570944) Handling close...</screen>
-
-        <para>If you run it again with the <varname>level</varname> parameter
-        set to 0.00000001 (remember to re-install the
-        <filename>threshold.xml</filename> file in
-        <filename>gadgetron/config</filename> by running <command>make
-        install</command>):</para>
-
-        <programlisting>    <gadget>
-      <name>Threshold</name>
-      <dll>gadgetronexamplelib</dll>
-      <class>ThresholdGadget</class>
-      <property><name>level</name><value>0.00000001</value></property>
-    </gadget>
-</programlisting>
-
-        <para>You should get two different results that look something like
-        <xref linkend="fig.examplelib"/>.</para>
-
-        <figure xml:id="fig.examplelib">
-          <title>Result from <classname>ThresholdGadget</classname>
-          experiment</title>
-
-          <mediaobject>
-            <imageobject>
-              <imagedata fileref="figs/examplelibresult.png" format="PNG"
-                         width="6in"/>
-            </imageobject>
-          </mediaobject>
-        </figure>
-
-        <para>If you create interesting Gadget libraries please consider
-        publishing them online to the benefit of the reconstruction community.
-        An easy way to do this is by sending them to the Gadgetron team for us
-        to publish right away on the web and possibly include in a future
-        release of the Gadgetron.</para>
-      </sect2>
-    </sect1>
-
-    <sect1>
-      <title>Gadgetron Clients</title>
-
-      <sect2>
-        <title>Available Clients</title>
-
-        <para>The purpose of this section is to maintain a list over the
-        available clients that are included in the Gadgetron distribution. The
-        current available clients are:</para>
-
-        <itemizedlist>
-          <listitem>
-            <para><application>mriclient</application>:</para>
-
-            <para>This is the standard client for sending MRI data to the
-            Gadgetron using the ISMRM Raw Data format. In order to get usage
-            information for the client, simply run the client with no
-            arguments.</para>
-          </listitem>
-        </itemizedlist>
-      </sect2>
-
-      <sect2>
-        <title>Making a new Client</title>
-
-        <para>The Gadgetron distribution comes with a
-        <classname>GadgetronConnector</classname> class, which can be used to
-        create clients. An example <filename>main.cpp</filename> file for a
-        client could look like:</para>
-
-        <programlisting>
-#include "GadgetMessageInterface.h"
-#include "GadgetronConnector.h"
-
-int main(int argc, char** argv)
-{
-
-  std::string host_name("localhost");
-  std::string port("9002");
-  std::string config_file("threshold.xml");
-  std::string xml_config;
-
-  //Generate some XML configuration in xml_fconfig
-
-  GadgetronConnector con;
-
-  //Register Readers and Writers
-  con.register_writer(....);
-  con.register_reader(....);
-  con.register_reader(....);
-
-  //Open a connection with the gadgetron
-  if (con.open(hostname, port_no) != 0) {
-    //Deal with errors
-  }
-
-  //Tell Gadgetron which XML configuration to run.
-  if (con.send_gadgetron_configuration_file(config_file) != 0) {
-    //Deal with errors
-  }
-
-  if (con.send_gadgetron_parameters(xml_config) != 0) {
-     //Deal with errors
-  }
-
-
-  //Send data
-  while ( .... ) { //some condition
-    GadgetContainerMessage<GadgetMessageIdentifier>* m1 =
-      new GadgetContainerMessage<GadgetMessageIdentifier>();
-      
-      //Create data and add to m1
-
-      if (con.putq(m1) == -1) {
-         //Deal with errors
-      }
-  }
-
-  //Put a close package on the queue
-
-  GadgetContainerMessage<GadgetMessageIdentifier>* m1 =
-    new GadgetContainerMessage<GadgetMessageIdentifier>();
-
-  m1->getObjectPtr()->id = GADGET_MESSAGE_CLOSE;
-
-  if (con.putq(m1) == -1) {
-   //Deal with errors
-  }
-
-  con.wait(); //Wait for recon to finish
-
-  return 0;
-}</programlisting>
-
-        <para>To compile this client, create a
-        <application>cmake</application> file:</para>
-
-        <programlisting>cmake_minimum_required(VERSION 2.6)
-
-project(exampleclient)
-
-if (WIN32)
-ADD_DEFINITIONS(-DWIN32 -D_WIN32 -D_WINDOWS)
-ADD_DEFINITIONS(-DUNICODE -D_UNICODE)
-SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc")
-SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W3")
-endif (WIN32)
-
-###############################################################
-#Bootstrap search for libraries 
-# (We need to find cmake modules in Gadgetron)
-###############################################################
-find_path(GADGETRON_CMAKE_MODULES FindGadgetron.cmake HINTS
-$ENV{GADGETRON_HOME}/cmake
-/usr/local/gadgetron)
-
-if (NOT GADGETRON_CMAKE_MODULES)
-  MESSAGE(FATAL_ERROR "GADGETRON_CMAKE_MODULES cannot be found. 
-   Try to set GADGETRON_HOME environment variable.")
-endif(NOT GADGETRON_CMAKE_MODULES)
-
-set(CMAKE_MODULE_PATH ${GADGETRON_CMAKE_MODULES})
-###############################################################
-
-find_package(Gadgetron REQUIRED)
-find_package(Boost REQUIRED)
-find_package(ACE REQUIRED)
-
-set(CMAKE_INSTALL_PREFIX ${GADGETRON_HOME})
-
-INCLUDE_DIRECTORIES(${ACE_INCLUDE_DIR} 
-     ${Boost_INCLUDE_DIR}
-     ${GADGETRON_INCLUDE_DIR})
-
-LINK_DIRECTORIES(${GADGETRON_LIB_DIR})
-
-add_executable(mygadgetronclient main.cpp)
-
-target_link_libraries(mygadgetronclient 
-      optimized gadgettools debug gadgettools${CMAKE_DEBUG_SUFFIX}
-      tinyxml 
-      optimized ${ACE_LIBRARIES} debug ${ACE_DEBUG_LIBRARY})
-
-install(TARGETS mygadgetronclient DESTINATION bin)
-</programlisting>
-
-        <para>Run <application>cmake</application> and follow the normal
-        <command>make</command> and <command>make install</command>
-        instructions (see <xref
-        linkend="sect.makingnewgadgetlibrary"/>).</para>
-      </sect2>
-    </sect1>
-  </chapter>
-
-  <chapter xml:id="sect.exampleapplications">
-    <title>Gadgetron Applications</title>
-
-    <sect1 xml:id="sect.2dftexample">
-      <title>Basic 2D FFT MRI</title>
-
-      <para>A basic example application in the Gadgetron is a simple 2D FT MRI
-      reconstruction. It receives 2D MRI data, collects it into k-space
-      arrays, performs FFT of the data, combines channels (if there are
-      multiple), and returns the images to the client. This example is
-      included in the Gadgetron for testing and demonstration purposes only.
-      It was not intended to be fast or otherwise optimal in any sense.</para>
-
-      <para>The Gadgets for this reconstruction are in the
-      <filename>core</filename> folder and the configuration file to use to
-      run this reconstruction is <filename>default.xml</filename>. The section
-      <xref linkend="sect.simpleexample"/> describes how to run a simple
-      reconstruction using this Gadget chain and how to download data to test
-      it.</para>
-
-      <para>In this section we will take a closer look at the Gadgets in this
-      chain and how they are implemented. The Gadgetron XML configuration file
-      (<filename>default.xml</filename>) looks like this:</para>
-
-      <programlisting><?xml version="1.0" ?>  
-<gadgetronStreamConfiguration 
-  xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
-  xmlns="http://gadgetron.sf.net/gadgetron"
-  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-        
-    <reader>
-      <slot>1008</slot>
-      <dll>gadgetroncore</dll>
-      <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
-    </reader>
-  
-    <writer>
-      <slot>1004</slot>
-      <dll>gadgetroncore</dll>
-      <classname>MRIImageWriterCPLX</classname>
-    </writer>
-    <writer>
-      <slot>1005</slot>
-      <dll>gadgetroncore</dll>
-      <classname>MRIImageWriterFLOAT</classname>
-    </writer>
-    <writer>
-      <slot>1006</slot>
-      <dll>gadgetroncore</dll>
-      <classname>MRIImageWriterUSHORT</classname>
-    </writer>
-  
-    <gadget>
-      <name>Acc</name>
-      <dll>gadgetroncore</dll>
-      <classname>AccumulatorGadget</classname>
-    </gadget>
-    <gadget>
-      <name>FFT</name>
-      <dll>gadgetroncore</dll>
-      <classname>FFTGadget</classname>
-    </gadget>
-
-    <gadget>
-      <name>CropCombine</name>
-      <dll>gadgetroncore</dll>
-      <classname>CropAndCombineGadget</classname>
-    </gadget>
-
-    <gadget>
-      <name>Extract</name>
-      <dll>gadgetroncore</dll>
-      <classname>ExtractGadget</classname>
-    </gadget>
-  
-    <gadget>
-      <name>ImageFinishFLOAT</name>
-      <dll>gadgetroncore</dll>
-      <classname>ImageFinishGadgetFLOAT</classname>
-    </gadget>
-
-</gadgetronStreamConfiguration>
-</programlisting>
-
-      <para>The resulting Gadget chain is illustrated in <xref
-      linkend="fig.simple2dft"/>. As described in <xref
-      linkend="sect.streamconfiguration"/> the Gadgetron configuration
-      contains 3 sections: Readers, Writers, and the Stream. In this
-      particular case, there is only one Reader, which received MRI
-      Acquisitions. This data format is described in <xref
-      linkend="sect.mrigadgets"/>. There are 3 Writers registered with this
-      configuration. They are all used to write MRI images, but responsible
-      for the different data types (complex float, float, or unsigned short).
-      In principle this means that this reconstruction is capable of returning
-      3 different types of images, but as is seen from the stream
-      configuration, the only output from this reconstruction will be float
-      format images. However, many reconstructions will have all 3 Writers
-      registered to make it easy to switch formats, i.e. it would be trivial
-      to turn this reconstruction into one that outputs unsigned short images
-      (have a look at the file <filename>default_short.xml</filename>) for an
-      example of how this is done.</para>
-
-      <figure xml:id="fig.simple2dft">
-        <title>Simple 2D FT Reconstruction Chain</title>
-
-        <mediaobject>
-          <imageobject>
-            <imagedata fileref="figs/simple2dft.png" width="4in"/>
-          </imageobject>
-        </mediaobject>
-      </figure>
-
-      <para>As is seen in the Gadgets section of the configuration, this
-      reconstruction uses 5 Gadgets. The first Gadget is responsible for
-      accumulating MRI acquisitions. To accomplish this, it uses an
-      accumulation buffer. When a k-space line arrives at the Gadget, it will
-      be inserted into the k-space buffer and when the last acquisition in a
-      slice/repetition has arrived, it will copy the entire buffer and pass it
-      on to the next Gadget.</para>
-
-      <para>Let's have a look at the definition of the
-      <classname>AccumulatorGadget</classname> class:</para>
-
-      <programlisting>class EXPORTGADGETSCORE AccumulatorGadget : 
-public Gadget2< ISMRMRD::AcquisitionHeader, 
-                hoNDArray< std::complex<float> > >
-{
-  
- public:
-  GADGET_DECLARE(AccumulatorGadget);
-
-  AccumulatorGadget();
-  ~AccumulatorGadget();
-
- protected:
-  virtual int process_config(ACE_Message_Block* mb);
-  virtual int process(
-    GadgetContainerMessage< ISMRMRD::AcquisitionHeader >* m1,
-    GadgetContainerMessage< hoNDArray< std::complex<float> > > * m2);
-
-  hoNDArray< std::complex<float> >* buffer_;
-  std::vector<unsigned int> dimensions_;
-
-  int image_counter_;
-  int image_series_;
-
-};
-</programlisting>
-
-      <para>There are a few member variables to help us keep track of the
-      buffer and the data dimensions and the core functionality is implemented
-      in two functions: <function>process_config</function>, is used to set up
-      the buffer, and <function>process</function>, which is responsible for
-      the accumulation of data. Let us examine the
-      <function>process_config</function> function (abbreviated):</para>
-
-      <programlisting linenumbering="numbered">int AccumulatorGadget::process_config(ACE_Message_Block* mb)
-{
- boost::shared_ptr<ISMRMRD::ismrmrdHeader> cfg = 
-    parseIsmrmrdXMLHeader(std::string(mb->rd_ptr()));
-
- ISMRMRD::ismrmrdHeader::encoding_sequence e_seq = cfg->encoding();
-
- ISMRMRD::encodingSpaceType e_space = (*e_seq.begin()).encodedSpace();
- ISMRMRD::encodingSpaceType r_space = (*e_seq.begin()).reconSpace();
- ISMRMRD::encodingLimitsType e_limits = (*e_seq.begin()).encodingLimits();
-
- GADGET_DEBUG2("Matrix size: %d, %d, %d\n", 
-                e_space.matrixSize().x(), 
-                e_space.matrixSize().y(), 
-                e_space.matrixSize().z());
-
- dimensions_.push_back(e_space.matrixSize().x());
- dimensions_.push_back(e_space.matrixSize().y());
- dimensions_.push_back(e_space.matrixSize().z());
-
- slices_ = e_limits.slice().present() ? 
-             e_limits.slice().get().maximum()+1 : 1;
-
-  return GADGET_OK;
-}</programlisting>
-
-      <para>The main purpose of this function is to pull parameters out of the
-      XML portion of the ISMRM Raw Data header in order to set up the buffer.
-      As mentioned in <xref linkend="sect.xmlparameters"/>, the convention is
-      to pass parameters into the Gadgets in XML format. To enable convenient
-      parsing of these parameters, the ISMRMRD library includes a C++ class
-      representation of the header. See
-      <uri>http://ismrmrd.sourceforge.net</uri> for more details.</para>
-
-      <para>Now we are ready to receive and buffer data, which is done by the
-      <function>process</function> function:</para>
-
-      <programlisting>int AccumulatorGadget::
-process(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1,
- GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2)
-{
-
-  if (!buffer_) {
-   dimensions_.push_back(m1->getObjectPtr()->active_channels);
-   dimensions_.push_back(slices_);
-
-   if (!(buffer_ = new hoNDArray< std::complex<float> >())) {
-    GADGET_DEBUG1("Failed create buffer\n");
-    return GADGET_FAIL;
-   }
-
-   if (!buffer_->create(&dimensions_)) {
-    GADGET_DEBUG1("Failed allocate buffer array\n");
-    return GADGET_FAIL;
-   }
-
-   image_series_ = this->get_int_value("image_series");
-
-  }
-
-
-  std::complex<float>* b =
-    buffer_->get_data_ptr();
-
-  std::complex<float>* d =
-    m2->getObjectPtr()->get_data_ptr();
-
-  int samples =  m1->getObjectPtr()->number_of_samples;
-  int line = m1->getObjectPtr()->idx.kspace_encode_step_1;
-  int partition = m1->getObjectPtr()->idx.kspace_encode_step_2;
-  int slice = m1->getObjectPtr()->idx.slice;
-
-  if (samples > static_cast<int>(dimensions_[0])) {
-   GADGET_DEBUG1("Wrong number of samples received\n");
-   return GADGET_FAIL;
-  }
-
-  size_t offset= 0;
-  //Copy the data for all the channels
-  for (int c = 0; c < m1->getObjectPtr()->active_channels; c++) {
-    offset = 
-      slice*dimensions_[0]*dimensions_[1]*dimensions_[2]*dimensions_[3] +
-      c*dimensions_[0]*dimensions_[1]*dimensions_[2] +
-      partition*dimensions_[0]*dimensions_[1] +
-      line*dimensions_[0] + (dimensions_[0]>>1)-m1->getObjectPtr()->center_sample;
-    
-    memcpy(b+offset,
-     d+c*samples,
-     sizeof(std::complex<float>)*samples);
-  }
-  
-  bool is_last_scan_in_slice = 
-     ISMRMRD::FlagBit(ISMRMRD::ACQ_LAST_IN_SLICE).isSet(m1->getObjectPtr()->flags);
-  
-  if (is_last_scan_in_slice) {
-    GadgetContainerMessage<ISMRMRD::ImageHeader>* cm1 = 
-      new GadgetContainerMessage<ISMRMRD::ImageHeader>();
-    
-    cm1->getObjectPtr()->flags = 0;
-
-    GadgetContainerMessage< hoNDArray< std::complex<float> > >* cm2 = 
-      new GadgetContainerMessage<hoNDArray< std::complex<float> > >();
-    
-    cm1->cont(cm2);
-    
-    std::vector<unsigned int> img_dims(4);
-    img_dims[0] = dimensions_[0];
-    img_dims[1] = dimensions_[1];
-    img_dims[2] = dimensions_[2];
-    img_dims[3] = dimensions_[3];
-    
-    if (!cm2->getObjectPtr()->create(&img_dims)) {
-      GADGET_DEBUG1("Unable to allocate new image array\n");
-      cm1->release();
-      return -1;
-    }
-    
-    size_t data_length = dimensions_[0]*dimensions_[1]*
-      dimensions_[2]*dimensions_[3];
-    
-    offset = slice*data_length;
-    
-    memcpy(cm2->getObjectPtr()->get_data_ptr(),b+offset,
-    sizeof(std::complex<float>)*data_length);
-    
-    cm1->getObjectPtr()->matrix_size[0]     = img_dims[0];
-    cm1->getObjectPtr()->matrix_size[1]     = img_dims[1];
-    cm1->getObjectPtr()->matrix_size[2]     = img_dims[2];
-    cm1->getObjectPtr()->channels           = img_dims[3];
-    cm1->getObjectPtr()->slice   = m1->getObjectPtr()->idx.slice;
-
-    memcpy(cm1->getObjectPtr()->position,
-      m1->getObjectPtr()->position,
-    sizeof(float)*3);
-
-    memcpy(cm1->getObjectPtr()->read_dir,
-      m1->getObjectPtr()->read_dir,
-    sizeof(float)*3);
-
-    memcpy(cm1->getObjectPtr()->phase_dir,
-      m1->getObjectPtr()->phase_dir,
-    sizeof(float)*3);
-
-    memcpy(cm1->getObjectPtr()->slice_dir,
-      m1->getObjectPtr()->slice_dir,
-    sizeof(float)*3);
- 
-    memcpy(cm1->getObjectPtr()->patient_table_position,
-      m1->getObjectPtr()->patient_table_position, sizeof(float)*3);
-
-    cm1->getObjectPtr()->image_data_type = ISMRMRD::DATA_COMPLEX_FLOAT;
-    cm1->getObjectPtr()->image_index = ++image_counter_;
-    cm1->getObjectPtr()->image_series_index = image_series_;
-
-    if (this->next()->putq(cm1) < 0) {
-     return GADGET_FAIL;
-    }
-  } 
-
-  m1->release();
-  return GADGET_OK;
-}
-</programlisting>
-
-      <para>This function has two basic tasks: insert data into the buffer and
-      when enough data is present, copy the buffer and pass it on to next
-      gadget. Additionally, the data buffer is created in this function if it
-      is not already allocated. In this example we choose to allocate the
-      buffer after the first data elements arrive. This allows us to respond
-      to changes in data sizes introduced by upstream Gadgets, e.g. readout
-      downsampling, coil reduction, etc.</para>
-
-      <para>In this case the copying of data is done with a very simple
-      <function>memcpy</function> command. There is a basic check for the
-      image dimensions, but a more robust application may have more checks of
-      the incoming data.</para>
-
-      <para>Once the data is in the buffer, we check to see if we should put
-      out an image. This is done with the <varname>flags</varname> field on
-      the acquisition. Specifically we check if a specific bit
-      (<varname>ISMRMRD::ACQ_LAST_IN_SLICE</varname>) is set.</para>
-
-      <para>If it is determined that this is the last acquisition for this
-      slice, we create a copy of the buffer and pass it on to the next Gadget.
-      Instead of a <classname>ISMRMRD::AcquisitionHeader</classname> we now
-      need an ISMRMRD::ImageHeader to pass along with the data. This header
-      structure is created and populated with fields (orientation, etc.) from
-      the acquisition header before it is passed on to the Gadget in the
-      stream.</para>
-
-      <para>Next Gadget is the <classname>FFTGadget</classname>. Since the
-      k-space buffering has been taken care of, the Fourier transform is a
-      relatively simple task. The <function>process</function> function uses
-      the FFTW wrapper class (<xref linkend="sect.ffttoolbox"/>) to perform
-      the FFT along the first 3 dimensions of the array:</para>
-
-      <programlisting>int FFTGadget::process( 
-GadgetContainerMessage< ISMRMRD::ImageHeader>* m1,
-GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2)
-{
-  FFT<float>::instance()->ifft(m2->getObjectPtr(),0);
-  FFT<float>::instance()->ifft(m2->getObjectPtr(),1);
-  FFT<float>::instance()->ifft(m2->getObjectPtr(),2);
-
-  if (this->next()->putq(m1) < 0) {
-     return GADGET_FAIL;
-  }
-
-  return GADGET_OK;
-}</programlisting>
-
-      <para>Now that the images have been Fourier transformed, we need to
-      remove the oversampling that is done in the readout dimensions and we
-      need to combine the receiver channels. In this case, we are making some
-      assumptions, i.e. we assume two-fold oversampling in the readout and we
-      are doing a simple RMS coil combination to obtain combined magnitude
-      images. We will not repeat the source code here, it can be found in
-      <filename>gadgets/core/CropAndCombineGadget.cpp</filename>.</para>
-
-      <para>Last two remaining steps after the coil combination is to extract
-      the magnitude of the data and return the floating point images to the
-      Gadgetron so that they can be returned to the client. This is
-      accomplished in the <classname>ExtractGadget</classname> and the
-      <classname>ImageFinishGadgetFLOAT</classname>. Both of these Gadgets are
-      described in <xref linkend="sect.mrigadgets"/>.</para>
-    </sect1>
-
-    <sect1 xml:id="sect.grappa">
-      <title>Cartesian 2D Parallel MRI (GRAPPA)</title>
-
-      <para>The Gadgetron contains a high-throughput real-time 2D Cartesian
-      parallel imaging reconstruction (GRAPPA) implemented on the GPU. It is
-      beyond the scope of this manual to review all the algorithmic details of
-      this application, but we will give an overview here as an example of a
-      more complicated reconstruction chain.</para>
-
-      <para>The Gadget chain is defined in the <filename>grappa.xml</filename>
-      and the resulting chain is illustrated in <xref
-      linkend="fig.grappachain"/>.</para>
-
-      <para>To test this configuration, please download the GRAPPA test
-      datasets from <uri type="website"
-      xlink:href="https://sourceforge.net/projects/gadgetron/files/testdata/">https://sourceforge.net/projects/gadgetron/files/testdata/ismrmrd</uri>,
-      where you will find the dataset <filename>grappa_rate2</filename> It is
-      a Cartesian parallel imaging datasets with rate 2 TSENSE type
-      acquisition. Data were acquired with a 32 channel coil.</para>
-
-      <para>In order to run the GRAPPA reconstruction you have to have a CUDA
-      enable GPU on your system and your Gadgetron distribution should be
-      compiled with CUDA and CULA enabled. Please see <xref
-      linkend="sect.installation"/> for details for your specific
-      platform.</para>
-
-      <para>To run the reconstruction, start up your Gadgetron (in its own
-      terminal window) and use the <application>mriclient</application> to
-      send the data from another terminal:</para>
-
-      <screen>user at host:~/temp$ wget http://sourceforge.net/projects/gadgetron/files/testdata/ismrmrd/grappa_rate2.h5
-
-user at host:~/temp$ mriclient \
-    -d grappa_rate2.h5 \
-    -c grappa.xml
-Gadgetron MRI Data Sender
-  -- host            :      localhost
-  -- port            :      9002
-  -- hdf5 file  in   :      gadgetron_testdata.h5
-  -- hdf5 group in   :      gre_tgrappa_rate4
-  -- conf            :      grappa.xml
-  -- loop            :      1
-  -- hdf5 file out   :      ./out.h5
-  -- hdf5 group out  :      2012-05-11 15:43:03
-(32580|140398140757824) Connection from 127.0.0.1:9002
-32580, 81, GadgetronConnector, Close Message received
-(32580|140398068885248) Handling close...
-(32580|140398068885248) svc done...
-(32580|140398068885248) Handling close...
-</screen>
-
-      <para>You should get example images that look similar to the ones in
-      <xref linkend="fig.examplegrapparesult"/>.</para>
-
-      <figure xml:id="fig.grappachain">
-        <title>GRAPPA Reconstruction Chain</title>
-
-        <mediaobject>
-          <imageobject>
-            <imagedata fileref="figs/grappa.png" width="4in"/>
-          </imageobject>
-        </mediaobject>
-      </figure>
-
-      <figure xml:id="fig.examplegrapparesult">
-        <title>GRAPPA Reconstruction Results</title>
-
-        <mediaobject>
-          <imageobject>
-            <imagedata fileref="figs/examplegrapparesult.png" format="PNG"
-                       width="5in"/>
-          </imageobject>
-        </mediaobject>
-      </figure>
-
-      <para>Let's take a closer look at some of the components of this
-      reconstruction application.</para>
-
-      <para>The first Gadget is the <classname>NoiseAdjustGadget</classname>.
-      As described in <xref linkend="sect.mrigadgets"/>, the purpose of this
-      Gadget is to decorrelate the noise in the receiver channels. This
-      improves the parallel imaging performance, especially in cases where
-      there is a large amount of noise in just a few receiver elements. There
-      are two versions of this Gadget, one that uses the BLAS/LAPACK routines
-      for performance improvements and one that implements the same
-      functionality without these optimizations. When you call the included
-      <filename>grappa.xml</filename> configuration, you will use the
-      optimized version. If you do not have BLAS and LAPACK on your system,
-      you can modify the XML configuration to use the one from the
-      <filename>gadgets/core</filename> library.</para>
-
-      <para>Second step is removing the oversampling. This step could also be
-      performed after the reconstruction (as it is done in <xref
-      linkend="sect.2dftexample"/>), but here we opt to remove this excess
-      data to improve downstream performance.</para>
-
-      <para>The purpose of the next two Gadgets
-      (<classname>PCAGadget</classname> and
-      <classname>CoilReductionGadget</classname>) is to a) transform the
-      receiver coils into PCA virtual coils ordered by their information
-      content and b) remove some of the coils to improve downstream
-      performance. The first step is achieved by buffering the first frame of
-      data and then performing a principal component analysis (PCA) on the
-      first frame of data. Based on the determined PCA transformation all data
-      is then subsequently transformed into virtual coils. In the coil
-      reduction gadget we can now simple eliminate the channels that are above
-      a certain number. See <xref linkend="sect.mrigadgets"/> for details on
-      how to control the channel compression.</para>
-
-      <para>The next two Gadgets are responsible for the actual GRAPPA
-      reconstruction. The <classname>GrappaGadget</classname> calculates the
-      GRAPPA coefficients and <classname>GrappaUnmixingGadget</classname>
-      performs the Fourier transform of the raw data and applies the GRAPPA
-      coefficients to the aliased imaged to obtain unaliased images.</para>
-
-      <para>In general it is assumed that the data is acquired in such a way
-      that a set of neighboring frames can be averaged to yield a fully
-      sampled k-space; the data is acquired with a time-interleaved sampling
-      pattern. When enough calibration data is available to calculate GRAPPA
-      coefficients, i.e. when a fully sampled region of k-space is available,
-      the calibration data is sent to a grappa coefficient calculation object
-      (<classname>GrappaWeightsCalculator</classname>).</para>
-
-      <para>The <classname>GrappaWeightsCalculator</classname> is an active
-      object, which picks up weight calculation jobs from an input queue and
-      passes them on to the GPU where it uses toolbox functions to calculate
-      GRAPPA unmixing coefficients. These coefficients are Fourier transformed
-      to image space where they are combined for all coils and stored in a
-      <classname>GrappaWeights</classname> object.</para>
-
-      <para>When the <classname>GrappaGadget</classname> passes on the raw
-      data to the <classname>GrappaUnmixingGadget</classname> it passes a
-      reference to the <classname>GrappaWeights</classname> object which is to
-      be used when performing the unmixing operation. Let's have closer look
-      at the <filename>GrappaUnmixingGadget.h</filename> file:</para>
-
-      <programlisting>struct GrappaUnmixingJob
-{
- boost::shared_ptr< GrappaWeights<float> > weights_;
-};
-
-class GrappaUnmixingGadget: 
-public Gadget3<GrappaUnmixingJob, 
-               ISMRMRD::ImageHeader, 
-               hoNDArray<std::complex<float> > > 
-{
-public:
- GADGET_DECLARE(GrappaUnmixingGadget);
-
- GrappaUnmixingGadget();
- virtual ~GrappaUnmixingGadget();
-protected:
- virtual int process(GadgetContainerMessage<GrappaUnmixingJob>* m1,
-   GadgetContainerMessage<ISMRMRD::ImageHeader>* m2, 
-   GadgetContainerMessage<hoNDArray<std::complex<float> > >* m3);
-
-};
-</programlisting>
-
-      <para>We can see that the <classname>GrappaUnmixingGadget</classname> is
-      an example of a Gadget, which takes 3 arguments and the additional
-      argument in this case holds a reference to the unmixing
-      coefficients.</para>
-
-      <para>The <classname>GrappaWeightsCalculator</classname> will update the
-      coefficients as often as it is instructed to do so and the
-      <classname>GrappaGadget</classname> is in charge of determining when an
-      update should be done. Specifically, it monitors the incoming data and
-      when the slice orientation changes, a job will be submitted to update
-      the coefficients. If the slice is not changing, it is in principle OK to
-      continue with the current coefficients, but if data is available and the
-      <classname>GrappaWeightsCalculator</classname> is idle (the queue is
-      empty) a job will be submitted.</para>
-
-      <para>With this design, the data passes through the
-      <classname>GrappaGadget</classname> very quickly and the
-      <classname>GrappaUnmixingGadget</classname> can reconstruction the
-      images very quickly, i.e. it is simply a Fourier transform and an
-      element wise multiplication and sum over the coils. It is in other words
-      designed for very high throughput.</para>
-
-      <para>If the slice orientation changes, new coefficients will be
-      calculated, but this calculation will not be done by the time the data
-      reaches the <classname>GrappaUnmixingGadget</classname> and
-      consequently, the images will be reconstructed with the "old"
-      coefficients until the coefficients are ready. This design ensures low
-      latency, but when the slice changes, aliasing may occur for a few frames
-      until coefficients are updated.</para>
-
-      <para>After the unmixing, the images are scaled and magnitude is
-      extracted before returning images to the client. The
-      <classname>AutoScaleGadget</classname> has been added in this case to
-      ensure that images are in a reasonable range before converting to
-      unsigned short as the output in this case. Automatic image scaling can
-      be problematic, especially when doing quantitative imaging, but it was
-      added in this case to make the reconstruction more robust to data from
-      different sources. A better solution is to only use data where noise
-      calibration data is available and reconstruct SNR scaled images. Based
-      on typical SNR values for MRI images, it is fairly trivial to keep the
-      images in the appropriate range and perform a proper conversion to
-      unsigned short.</para>
-
-      <para>A final comment about the GRAPPA reconstruction is that it allows
-      a second step of channel compression. More specifically, it is possible
-      to reconstruct to a limited number of target channels to further improve
-      performance. Between the upstream and downstream channel compression
-      steps, it is possible to tune the performance of the reconstruction to
-      enable real-time reconstruction on the available hardware.</para>
-    </sect1>
-
-    <sect1 xml:id="sect.cgsense">
-      <title>Non-Cartesian 2D Parallel MRI (SENSE)</title>
-
-      <para>The Gadgetron includes a real-time implementation of a GPU-based
-      real-time non-Cartesian Sense reconstruction published in <citation
-      linkend="sorensen09"><xref linkend="sorensen09"/></citation>. One of the
-      keys to obtaining real-time performance is an efficient GPU
-      implementation of the non-Cartesian Fast Fourier Transform
-      <citation><xref linkend="sorensen08"/></citation>. The application
-      reuses several of the gadgets we have seen in use already for the
-      Cartesian Grappa implementation above (<xref linkend="sect.grappa"/>).
-      An overview of the non-Cartesian Sense gadget chain is given in figure
-      <xref linkend="fig.cgsense"/>. <figure xml:id="fig.cgsense">
-          <title>Gadgetron Chain for Non-Cartesian Sense</title>
-
-          <mediaobject>
-            <imageobject role="html">
-              <imagedata align="left" fileref="figs/cgsense.png" format="PNG"
-                         width="3in"/>
-            </imageobject>
-
-            <imageobject role="fo">
-              <imagedata align="left" fileref="figs/cgsense.png" format="PNG"
-                         width="3in"/>
-            </imageobject>
-
-            <textobject>
-              <phrase>Gadgetron chain for non-Cartesian Sense</phrase>
-            </textobject>
-          </mediaobject>
-        </figure></para>
-
-      <para>The <classname>CGSenseGadget</classname> implements the
-      non-Cartesian Sense reconstruction. It contains a conjugate gradient
-      solver (<xref linkend="sect.linear_solvers"/>) set up with a
-      <classname>nonCartesianSense</classname> image encoding matrix and an
-      <classname>imageOperator</classname> for regularization. Internally it
-      maintains a cyclic buffer of a few seconds of imaging data. It uses this
-      buffer to maintain a fully sampled (i.e. unaliased but blurred) k-space
-      image from which coil sensititivities and regularization images are
-      dynamically estimated. The combination of parallel imaging and image
-      regularization operators allows for alias-suppressed image
-      reconstruction using significant undersampling hereby achieving
-      real-time data acquisition rates per frame. The conjugate gradient
-      solver is able to reconstruct faster than the acquisition time e.g. a
-      192x192 image from 32 coils using 10 solver iterations on newer graphics
-      hardware.</para>
-
-      <para>To test this configuration use the 32 channel radial MRI test
-      dataset (<filename>golden_angle.h5</filename>), which you can download
-      from <uri
-      xlink:href="https://sourceforge.net/projects/gadgetron/files/testdata/mri/">
-      https://sourceforge.net/projects/gadgetron/files/testdata/ismrmrd/</uri>.
-      We assume that you have added <envar>$(GADGETRON_HOME)/bin</envar> to
-      your <envar>PATH</envar> environment variable. You need a CUDA enable
-      GPU on your system and your Gadgetron distribution should be compiled
-      with CUDA and CULA enabled. Please see <xref
-      linkend="sect.installation"/> for details for your specific
-      platform.</para>
-
-      <para>To run the reconstruction; start up
-      <application>gadgetron</application> (in its own terminal window) and
-      use the <application>mriclient</application> to send the data from
-      another terminal. First start
-      <application>gadgetron</application>:</para>
-
-      <screen>user at host$ <userinput>gadgetron</userinput> 
-Configuring services</screen>
-
-      <para>If asked, allow the gadgetron application to allow incoming
-      network connection. Next start the
-      <application>mriclient</application>:</para>
-
-      <screen>user at host:~/temp$ wget http://sourceforge.net/projects/gadgetron/files/testdata/ismrmrd/golden_angle.h5
-
-user at host:~/temp$ mriclient \
-       -d golden_angle.h5 \
-       -c radial_single.xml
-
-Gadgetron MRI Data Sender
-  -- host            :      localhost
-  -- port            :      9002
-  -- hdf5 file  in   :      gadgetron_testdata.h5
-  -- hdf5 group in   :      gre_golden_angle
-  -- conf            :      radial_single.xml
-  -- loop            :      1
-  -- hdf5 file out   :      ./out.h5
-  -- hdf5 group out  :      2012-05-11 15:47:22
-(32608|139797448419136) Connection from 127.0.0.1:9002
-32608, 81, GadgetronConnector, Close Message received
-(32608|139797376546560) Handling close...
-(32608|139797376546560) svc done...
-(32608|139797376546560) Handling close...
-
-</screen>
-
-      <para>Your current folder now holds the reconstructed images in the
-      <filename>out.h5</filename> HDF5 file. They will look something like the
-      one depicted in <xref linkend="fig.examplecgsenseresult"/>. <figure
-          xml:id="fig.examplecgsenseresult">
-          <title>Non-Cartesian Sense Reconstruction Results</title>
-
-          <mediaobject>
-            <imageobject>
-              <imagedata fileref="figs/examplecgsenseresult.png" format="PNG"
-                         width="3in"/>
-            </imageobject>
-          </mediaobject>
-        </figure></para>
-    </sect1>
-  </chapter>
-
-  <chapter xml:id="sect.standalone_applications">
-    <title>Standalone Applications</title>
-
-    <para>This chapter demonstrates through a few examples how to use the
-    Gadgetron toolboxes (<xref linkend="sect.toolboxes"/>) to build standalone
-    applications outside the streaming framework. You need a CUDA enabled GPU
-    on your system and your Gadgetron distribution should be compiled with
-    CUDA (and CULA) enabled. Then the examples are automatically build with
-    the Gadgetron and binaries should consequently be available in
-    <envar>$GADGETRON_HOME/bin</envar>.</para>
-
-    <sect1 xml:id="sect.image_denoising">
-      <title>Image Denoising</title>
-
-      <para>This example uses the unconstraint Split Bregman solver for total
-      variation based 2D image denoising. The encoding matrix is defined as an
-      <classname>identityOperator</classname> and a
-      <classname>partialDerivativeOperator</classname> is used for each of the
-      two spatial directions to implement the total variation regularization
-      term. The two partial derivatives are added as a "group" of
-      regularization operators to implement isotropic denoising.
-      Alternatively, by changing a few lines of code they can be added as
-      individual regularization operators instead to implement anisotropic
-      denoising.</para>
-
-      <para>The full source code for the example can be found at
-      <envar>$(GADGETRON_SOURCE)</envar><filename>/apps/standalone/gpu/denoising/2d/denoise_TV.cpp</filename>.</para>
-
-      <para>You can download some noisy Shepp-Logan phantom test datasets from
-      <uri
-      xlink:href="https://sourceforge.net/projects/gadgetron/files/testdata/phantom/shepp_logan/shepp.tar.gz">https://sourceforge.net/projects/gadgetron/files/testdata/phantom/shepp.tar.gz</uri></para>
-
-      <para>In a terminal, go to the folder in which you unpacked the data. We
-      assume that you have added <envar>$(GADGETRON_HOME)/bin</envar> to your
-      <envar>PATH</envar> environment variable.</para>
-
-      <para>Try</para>
-
-      <screen>user at host$ <userinput>denoise_TV -d shepp_logan_256_256_med_noise.real -O 250 -m 1</userinput>
-Running denoising with the following parameters: 
----------------------------------------------------- 
-  Noisy image file name (.real)  : shepp_logan_256_256_med_noise.real 
-  Result file name               : denoised_image_TV.real 
-  Number of cg iterations        : 20 
-  Number of sb inner iterations  : 1 
-  Number of sb outer iterations  : 250 
-  Regularization weight (mu)     : 1 
----------------------------------------------------- 
-...
-user at host$</screen>
-
-      <para>which runs 250 iterations of the solver with a regularization
-      weight of 1. The output is saved in the current folder in the file
-      <filename>denoised_image_TV.real</filename>.</para>
-
-      <para>The noisy and denoised phantom is depicted below.</para>
-
-      <figure xml:id="fig.noisy2d">
-        <title>A noisy version of the Shepp-Logan phantom</title>
-
-        <mediaobject>
-          <imageobject>
-            <imagedata fileref="figs/shepp_noisy.png" format="PNG" width="2in"/>
-          </imageobject>
-        </mediaobject>
-      </figure>
-
-      <figure>
-        <title>Result after total variation denoising</title>
-
-        <mediaobject>
-          <imageobject>
-            <imagedata fileref="figs/shepp_denoised.png" format="PNG"
-                       width="2in"/>
-          </imageobject>
-        </mediaobject>
-      </figure>
-
-      <para>Running <application>denoise_TV</application> with no arguments
-      prints out a brief usage description. We leave it as an exercise to run
-      the algorithm with various settings. The data file you downloaded
-      contains two further dataset (with lower and higher noise levels
-      respectively) to try out as well.</para>
-    </sect1>
-
-    <sect1>
-      <title>Image Deblurring</title>
-
-      <para>This example uses 1) the linear least squares solver, and 2) the
-      constraint Split Bregman solver for image deblurring. The encoding
-      matrix is defined as a <classname>convolutionOperator</classname>. A
-      <classname>partialDerivativeOperator</classname> is added for each of
-      the two spatial directions as regularization terms.</para>
-
-      <para>We reuse the Shepp-Logan data from the image denoising experiment
-      above (<xref linkend="sect.image_denoising"/>).</para>
-
-      <para>First we generate a blurry Shepp-Logan phantom by convolution with
-      a Gaussian kernel. This is easily achieved using the method
-      <function>mult_M</function> in the
-      <classname>convolutionOperator</classname>. Source code is provided at
-      <envar>$(GADGETRON_SOURCE)</envar><filename>/apps/standalone/gpu/deblurring/2d/blur_2d.cpp</filename></para>
-
-      <para>In a terminal, go to the folder in which you unpacked the
-      Shepp-Logan phantom.</para>
-
-      <para>Try</para>
-
-      <screen>user at host$ <userinput>blur_2d -d shepp_logan_256_256_no_noise.real</userinput></screen>
-
-      <para>which generates two complex images;
-      <filename>blurred_image.cplx</filename> and
-      <filename>kernel_image.cplx</filename>. For convenience a corresponding
-      magnitudes image is also saved as
-      <filename>blurred_image.real</filename>.</para>
-
-      <para>Next run the conjugate gradient solver. The source code for the
-      example can be found in
-      <envar>$(GADGETRON_SOURCE)</envar><filename>/apps/standalone/gpu/deblurring/2d/deblur_2d_cg.cpp</filename>.</para>
-
-      <screen>user at host$ <userinput>deblur_2d_cg -K 1e-4</userinput>
- Running deblurring with the following parameters: 
----------------------------------------------------- 
-  Blurred image file name (.cplx)  : blurred_image.cplx 
-  Kernel image file name (.cplx)   : kernel_image.cplx 
-  Result file name                 : cg_deblurred_image.cplx 
-  Number of iterations             : 25 
-  Regularization weight            : 1e-4 
----------------------------------------------------- 
-Iterating...
-...
-user at host$</screen>
-
-      <para>The result is saved in the current folder in the file
-      <filename>cg_deblurred_image.cplx</filename>. A magnitudes image is also
-      saved as <filename>cg_deblurred_image.real</filename>.</para>
-
-      <para>Next run the constraint Split Bregman solver. The source code for
-      the example can be found in
-      <envar>$(GADGETRON_SOURCE)</envar><filename>/apps/standalone/gpu/deblurring/2d/deblur_2d_sb.cpp</filename>.</para>
-
-      <screen>user at host$ <userinput>deblur_2d_sb -O 100 -L 0.5 -M 0.5</userinput>
- Running deblurring with the following parameters: 
----------------------------------------------------- 
-  Blurred image file name (.cplx)  : blurred_image.cplx 
-  Kernel image file name (.cplx)   : kernel_image.cplx 
-  Result file name                 : sb_deblurred_image.cplx 
-  Number of cg iterations          : 20 
-  Number of sb inner iterations    : 1 
-  Number of sb outer iterations    : 100 
-  Mu                               : 0.5 
-  Lambda                           : 0.5 
----------------------------------------------------- 
-...
-user at host$</screen>
-
-      <para>The result is saved as
-      <filename>sb_deblurred_image.cplx</filename>. A magnitudes image is also
-      saved as <filename>sb_deblurred_image.real</filename>.</para>
-
-      <para>The blurred and deblurred phantoms are depicted below.</para>
-
-      <figure>
-        <title>Blurry Shepp-Logan phantom</title>
-
-        <mediaobject>
-          <imageobject>
-            <imagedata fileref="figs/shepp_blurred.png" format="PNG"
-                       width="2in"/>
-          </imageobject>
-        </mediaobject>
-      </figure>
-
-      <figure>
-        <title>Deblurred phantom from the Conjugate Gradient solver</title>
-
-        <mediaobject>
-          <imageobject>
-            <imagedata fileref="figs/shepp_deblurred_cg.png" format="PNG"
-                       width="2in"/>
-          </imageobject>
-        </mediaobject>
-      </figure>
-
-      <figure>
-        <title>Deblurred phantom from the constrained Split Bregman
-        solver</title>
-
-        <mediaobject>
-          <imageobject>
-            <imagedata fileref="figs/shepp_deblurred_sb.png" format="PNG"
-                       width="2in"/>
-          </imageobject>
-        </mediaobject>
-      </figure>
-
-      <para>In the present examples no noise was added to the blurred images
-      before the deconvolution. Consequently for the conjugate gradient
-      solver, a very low weight of the regularization term was "sufficient".
-      We leave it as an exercise to run the algorithms with various settings.
-      In particular, try to add noise to the blurred image before the
-      deconvolution to observe the very ill-posed nature of the
-      problem.</para>
-
-      <para><remark>Notice</remark>. If the dimensions of the provided
-      convolution kernel is exactly double that of the provided image, the
-      convolution operator zero-pads the image before the convolution and
-      removes the padding again after. As the convolution operator utilizes
-      FFTs in its implementation, this oversampling is a way of avoiding
-      cyclic boundary conditions during the convolution.</para>
-    </sect1>
-
-    <sect1>
-      <title>Non-Cartesian FFT</title>
-
-      <para>This example shows how to use the forwards and adjoint
-      non-Cartesian Fast Fourier Transform (NFFT and
-      NFFT<superscript>H</superscript> respectively) on a 2D image. The source
-      code can be found at
-      <envar>$(GADGETRON_SOURCE)</envar><filename>/apps/standalone/gpu/MRI/nfft/2d/main_nfft.cpp</filename>
-      and
-      <envar>$(GADGETRON_SOURCE)</envar><filename>/apps/standalone/gpu/MRI/nfft/2d/main_nffth.cpp</filename>.</para>
-
-      <para>We reuse the Shepp-Logan data downloaded for the previous
-      experiments (<xref linkend="sect.image_denoising"/>).</para>
-
-      <para>In the following we run the NFFT followed by the
-      NFFT<superscript>H</superscript>. The image matrix size is
-      256<superscript>2</superscript>. We use an oversampled matrix size of
-      384<superscript>2</superscript>, 128 profiles in k-space (<emphasis
-      role="underline">undersampling</emphasis>) with 384 samples each. The
-      NFFT Kaiser-Bessel convolution kernel width is set to
-      5.5<superscript>2</superscript> (see <xref
-      linkend="sorensen08"/>).</para>
-
-      <screen>user at host$ <userinput>nfft -d shepp_logan_256_256_no_noise.real \ 
-   -o 384 -p 128 -s 384 -k 5.5
-</userinput>
- Running reconstruction with the following parameters: 
----------------------------------------------------- 
-  Input image file name (.real)  : shepp_logan_256_256_no_noise.real 
-  Result file name               : samples.cplx 
-  Oversampled matrix size        : 384 
-  Number of profiles             : 128 
-  Samples per profiles           : 384 
-  Kernel width                   : 5.5 
----------------------------------------------------- 
-Loading image from disk
-Uploading, normalizing and converting to complex
-Initializing plan
-Computing golden ratio radial trajectories
-NFFT preprocessing
-Computing density compensation weights
-Computing nfft (inverse gridding)
-Output result to disk
-user at host$</screen>
-
-      <screen>user at host$ <userinput>nffth -d samples.cplx -m 256 -o 384 -k 5.5</userinput>
- Running reconstruction with the following parameters: 
----------------------------------------------------- 
-  Input samples file name (.cplx)  : samples.cplx 
-  Output image file name (.cplx)   : result.cplx 
-  Matrix size                      : 256 
-  Oversampled matrix size          : 384 
-  Kernel width                     : 5.5 
----------------------------------------------------- 
-Loading samples from disk
-Uploading samples to device
-Initializing plan
-Computing golden ratio radial trajectories
-NFFT preprocessing
-Computing density compensation weights
-Computing nffth (gridding)
-Output result to disk
-user at host$</screen>
-
-      <para>The result is saved in file <filename>result.cplx</filename>. A
-      magnitudes image is saved as <filename>result.real</filename>. As an
-      exercise, experiment with the settings to reduce (or increase) the
-      aliasing.</para>
-
-      <para>The <envar>
-      $(GADGETRON_SOURCE)</envar><filename>/apps/standalone/gpu/MRI/nfft/2d/</filename>
-      folder also contains examples of using the
-      <classname>nfftOperator</classname> in a Conjugate Gradient solver and a
-      Split Bregman solver respectively.</para>
-    </sect1>
-
-    <sect1>
-      <title>Non-Cartesian parallel MRI (SENSE)</title>
-
-      <para>This section demonstrates how to run a standalone non-Cartesian
-      parallel MRI reconstruction similar to the one that was previously shown
-      using the streaming framework infrastructure in section<xref
-      linkend="sect.cgsense"/>. More details can be found in <xref
-      linkend="sorensen09"/>.</para>
-
-      <para>In addition to a regularized linear least squares solution to the
-      reconstruction problem, we furthermore use the Split Bregman solver to
-      obtain the solution with minimum total variation subject to the
-      constraint of the encoding operator (compressed sensing).</para>
-
-      <para>Download a free-breathing cardiac MRI sample dataset from <uri
-      xlink:href="https://sourceforge.net/projects/gadgetron/files/testdata/mri/fb_data.zip">http://sourceforge.net/projects/gadgetron/files/testdata/mri/fb_data.zip</uri></para>
-
-      <para>Source code is found in the files <filename>main_cg.cpp</filename>
-      and <filename>main_sbc.cpp</filename> in directory</para>
-
-      <para><envar>$(GADGETRON_SOURCE)</envar><filename>/apps/standalone/gpu/MRI/sense/noncartesian/radial/2d_golden_ratio</filename>.</para>
-
-      <para>Both command lines below produce a 2D image sequence, each image
-      with a matrix size of 192<superscript>2</superscript>
-      (<varname>-m</varname>). 32 projections are used for each frame
-      (<varname>-p</varname>) for a frame rate of roughly 32 profiles/frame *
-      2.5 ms/profile = 80 ms ms/frame or 12.5 frames/s. The reconstruction
-      results are written out in both complex and magnitude data as
-      <filename>result.cplx</filename> and <filename>result.real</filename>
-      respectively.</para>
-
-      <para>If sufficient device memory is available on your GPU (i.e. you are
-      in possession of a high-end card) all frames in the sequence can be
-      reconstructed concurrently (as a 3D volume). On systems that do not hold
-      enough device memory to reconstruct all frames in parallel, they can
-      instead be reconstructed in several batches. The <varname>-f</varname>
-      option to the command lines below indicate the number of frames that are
-      reconstructed per batch. A negative value indicates "all". If the
-      command below fails to complete due to lack of device memory, try
-      running with argument <varname>-f 8</varname> (or an even smaller
-      number) instead.</para>
-
-      <para>The following output was obtained on a Geforce GTX 480 GPU.</para>
-
-      <screen>user at host$ radial_sense_cg -d fb_data.cplx -m 192 -o 256 -p 32 -K 0.01
-
-  Running reconstruction with the following parameters: 
----------------------------------------------------- 
-  Sample data file name                             : fb_data.cplx 
-  Result file name                                  : result.cplx 
-  Matrix size                                       : 192 
-  Oversampled matrix size                           : 256 
-  Profiles per frame                                : 32 
-  Frames per reconstruction (negative meaning all)  : -1 
-  Number of iterations                              : 10 
-  Kernel width                                      : 5.5 
-  Kappa                                             : 0.01 
----------------------------------------------------- 
-
-Loading data: 18.339 ms
-
-#samples/profile: 256
-#profiles/frame: 32
-#profiles: 2560
-#coils: 4
-#frames/reconstruction: 80
-#profiles/reconstruction: 2560
-#samples/reconstruction: 655360
-
-Filling rhs buffer: 283.675 ms
-Estimating csm: 3.435 ms
-Computing regularization: 0.319 ms
-Computing preconditioning weights: 0.081 ms
-Iterating...
-Iteration 0. rq/rq_0 = 0.453177
-Iteration 1. rq/rq_0 = 0.132643
-Iteration 2. rq/rq_0 = 0.0413432
-Iteration 3. rq/rq_0 = 0.0144378
-Iteration 4. rq/rq_0 = 0.00681063
-Iteration 5. rq/rq_0 = 0.00450857
-Iteration 6. rq/rq_0 = 0.00342872
-Iteration 7. rq/rq_0 = 0.00240418
-Iteration 8. rq/rq_0 = 0.00146108
-Iteration 9. rq/rq_0 = 0.000903398
-GPU Conjugate Gradient solve: 2115.7 ms
-Full SENSE reconstruction.: 2188.68 ms
-Writing out result: 50.111 ms
-
-user at host$</screen>
-
-      <screen>user at host$ radial_sense_sbc -d fb_data.cplx -m 192 -o 256 -p 32 
-
-  Running reconstruction with the following parameters: 
----------------------------------------------------- 
-  Sample data file name                             : fb_data.cplx 
-  Result file name                                  : result.cplx 
-  Matrix size                                       : 192 
-  Oversampled matrix size                           : 256 
-  Profiles per frame                                : 32 
-  Frames per reconstruction (negative meaning all)  : -1 
-  Number of cg iterations                           : 20 
-  Number of sb inner iterations                     : 1 
-  Number of sb outer iterations                     : 20 
-  Kernel width                                      : 5.5 
-  Mu                                                : 1.0 
-  Lambda                                            : 2.0 
----------------------------------------------------- 
-
-Loading data: 16.082 ms
-
-#samples/profile: 256
-#profiles/frame: 32
-#profiles: 2560
-#coils: 4
-#frames/reconstruction 80
-#profiles/reconstruction 2560
-#samples/reconstruction 655360
-
-CSM and regularization estimation: 288.983 ms
-
-...
-
-GPU constrained Split Bregman solve: 57257.4 ms
-Full SENSE reconstruction with TV regularization.: 57330.3 ms
-Writing out result: 50.421 ms
-
-user at host$</screen>
-
-      <para>As all 80 frames are reconstructed in parallel it is
-      straightforward to add temporal regularization to the reconstructions.
-      We leave this as a suggested exercise for the reader.</para>
-
-      <para>For the interested reader, an implementation of
-      <emphasis>kt</emphasis>-Sense can be found in directory</para>
-
-      <para><envar>$(GADGETRON_SOURCE)</envar><filename>/apps/standalone/gpu/MRI/sense/noncartesian/radial/2d_golden_ratio_kt</filename>.</para>
-
-      <para>Additionally, the source code for the user interface demonstrated
-      in the <uri
-      xlink:href="http://www.cs.au.dk/~sangild/non-Cartesian_interactive_reconstruction.wmv">movie</uri>
-      accompanying <xref linkend="sorensen09"/> can be found (if you
-      configured <command>cmake</command> to inlude Qt support) in
-      direcotry</para>
-
-      <para><envar>$(GADGETRON_SOURCE)</envar><filename>/apps/standalone/gpu/MRI/sense/noncartesian/radial/2d_golden_ratio_gui</filename>.</para>
-    </sect1>
-  </chapter>
-
-  <chapter>
-    <title>Frequently Asked Questions (FAQ)</title>
-
-    <itemizedlist>
-      <listitem>
-        <para><emphasis>Can I make a branching Gadget chain?</emphasis></para>
-
-        <para>The short answer is no. We plan on supporting this in a future
-        release, but it is not quite ready yet.</para>
-      </listitem>
-
-      <listitem>
-        <para><emphasis>How can I help?</emphasis></para>
-
-        <para>We are always looking for people who are interested in helping
-        with the continuing development of the Gadgetron. There are many
-        things you can do:</para>
-
-        <itemizedlist>
-          <listitem>
-            <para>Use it.</para>
-          </listitem>
-
-          <listitem>
-            <para>When you develop new Gadgets or Toolboxes, please consider
-            submitting them to us so that we can include them in the
-            archive.</para>
-          </listitem>
-
-          <listitem>
-            <para>Help us implement some of the future features in <xref
-            linkend="futurefeatures"/>. It is probably a good idea to get in
-            touch with us before you start coding, just in case somebody is
-            already working on it.</para>
-          </listitem>
-        </itemizedlist>
-      </listitem>
-    </itemizedlist>
-  </chapter>
-
-  <appendix xml:id="simplearrayfiles">
-    <title>Simple Array File Format</title>
-
-    <para>When working with the Gadgetron it is often necessary to write files
-    with reconstructed images to disk, either as part of debugging or as the
-    final reconstruction result. We have adopted a very simple
-    multidimensional array file format for this purpose. The main advantage of
-    this file format is its simplicity but there are a number of disadvantages
-    and caveats as well as described in this section.</para>
-
-    <para>The simple array files are made up of a) a header followed by b) the
-    data itself. This layout of data and header is illustrated in <xref
-    linkend="fig.gadgetron.fileformat"/>. The header has a single 32-bit
-    integer to indicate the number of dimensions of the dataset followed by
-    one integer for each dimension to indicate the length of that dimension.
-    The data follows immediately after the header. The data is stored such
-    that the first dimension is the fastest moving dimension, second dimension
-    is second fastest, etc. The header contains no information about the size
-    of each individual data element and consequently the user needs to know
-    what type of data is contained in the array. In general, the Gadgetron
-    uses 3 different types of data and the convention is to use the file
-    extension to indicate the data type in the file:</para>
-
-    <itemizedlist>
-      <listitem>
-        <para>16-bit unsigned short. File extension:
-        <filename>*.short</filename></para>
-      </listitem>
-
-      <listitem>
-        <para>32-bit float. File extension: <filename>*.real</filename></para>
-      </listitem>
-
-      <listitem>
-        <para>32-bit complex float. Two 32-bit floating point values per data
-        element. File extension: <filename>*.cplx</filename></para>
-      </listitem>
-    </itemizedlist>
-
-    <figure xml:id="fig.gadgetron.fileformat">
-      <title>Simple Array File Format</title>
-
-      <mediaobject>
-        <imageobject condition="print">
-          <imagedata align="left" fileref="figs/arrayfileformat.png"
-                     format="PNG" width="2in"/>
-        </imageobject>
-
-        <textobject>
-          <phrase>Simple Array</phrase>
-        </textobject>
-      </mediaobject>
-
-      <caption>
-        <para>The simple array file format has a header followed by the data.
-        The header consists of one 32-bit integer defining the number of
-        dimensions (N-dimensions) followed by N-dimensions 32-bit unsigned
-        integers each defining the length of each dimensions. In the example,
-        the dataset has 4 dimensions and the size of those dimensions is
-        128x128x1x1, i.e. 16384 elements.</para>
-      </caption>
-    </figure>
-
-    <para>The Gadgetron framework provides function for reading these files in
-    C++. The functions are located in
-    <filename>toolboxes/ndarray/hoNDArray_fileio.h</filename> in the Gadgetron
-    source code distribution.</para>
-
-    <para>It is also trivial to read the files into Matlab. Below is a
-    function which detects the data type based on the file extension and reads
-    the file into Matlab.</para>
-
-    <programlisting>
-
-function data = read_gadgetron_array(filename)
-%  data = read_gadgetron_array(filename)
-%  
-%  Reads simplified array format output from the Gadgetron
-%
-%  The datatype is determined by the file extension.
-%     - *.short : 16-bit unsigned integer
-%     - *.real  : 32-bit float
-%     - *.cplx  : 32-bit complex (two 32-bit values per data element)
-%
-%
-if (~exist(filename,'file')),
-    error('File not found.');
-end
-
-[path,name,ext] = fileparts(filename);
-
-ext = lower(ext);
-
-if (~strcmp(ext,'.short') && ~strcmp(ext,'.real') && ~strcmp(ext,'.cplx')),
-   error('Unknown file extension'); 
-end
-
-f = fopen(filename);
-ndims = fread(f,1,'int32'); 
-dims = fread(f,ndims,'int32'); 
-
-switch ext
-    case '.short'
-        data = fread(f,prod(dims),'uint16'); 
-    case '.real'
-        data = fread(f,prod(dims),'float32'); 
-    case '.cplx'
-        data = fread(f,2*prod(dims),'float32'); 
-        data = complex(data(1:2:end),data(2:2:end));
-    otherwise     
-end
-
-fclose(f);
-
-data = reshape(data,dims');
-
-end
-
-  </programlisting>
-  </appendix>
-
-  <appendix xml:id="section.hdf5">
-    <title>HDF5 Files</title>
-
-    <para>The Gadgetron framework is used to process many different types of
-    data and it is cumbersome to add specific read and write routines for all
-    these different kinds of data. Consequently we have chosen to use the
-    generic HDF5 file format. A detailed description of this format can be
-    found at <uri>http://www.hdfgroup.org/HDF5/</uri>.</para>
-
-    <para>The HDF5 file format is much like a file system. Data can be
-    organized hierarchically into groups (like folders in a filesystem) and
-    each file can contain multiple groups and datasets. Each dataset can be an
-    array of any type, e.g. an array of images. There is a generic tool
-    <command>hdfview</command> which can be used to view the files. It is
-    available on all the platforms supported by the Gadgetron framework. HDF5
-    files can also be read easily in newer versions of Matlab.</para>
-
-    <para>As an example of a HDF5 file with MRI raw data can be found at <uri
-    type="website"
-    xlink:href="https://sourceforge.net/projects/gadgetron/files/testdata/">https://sourceforge.net/projects/gadgetron/files/testdata/</uri>.
-    Download the file <filename>gadgetron_testdata.h5</filename>. When opened
-    with <command>hdfview</command>, it should look like <xref
-    linkend="fig.hdfview_testdata"/>. As seen, the file contains 4 groups of
-    data. Each group consists of some data and an XML configuration for the
-    Gadgetron.</para>
-
-    <figure xml:id="fig.hdfview_testdata">
-      <title>Examining Data with HDFView</title>
-
-      <screenshot>
-        <mediaobject>
-          <imageobject>
-            <imagedata fileref="figs/hdfview_mri_testdata.png" width="6in"/>
-          </imageobject>
-        </mediaobject>
-      </screenshot>
-    </figure>
-
-    <para>HDF5 Files can also be used to store images. Several of the
-    Gadgetron clients included with the framework save images in HDF5 files.
-    An example of viewing the output of a reconstruction can be seen in <xref
-    linkend="fig.hdfview_image"/>.</para>
-
-    <figure xml:id="fig.hdfview_image">
-      <title>Viewing Images in HDF5 Files</title>
-
-      <mediaobject>
-        <imageobject>
-          <imagedata fileref="figs/hdfview_image_view.png" width="5.5in"/>
-        </imageobject>
-      </mediaobject>
-    </figure>
-
-    <para>Images saved by Gadgetron clients are saved as arrays in the HDF5
-    files. Due to the array storage conventions in the Gadgetron environment,
-    the first dimension is the slowest varying dimension in the arrays and the
-    last dimension is the fastest varying dimension. That means that an array
-    with 10 images with dimensions 128x128 would be stored in a variable in
-    the HDF5 file with dimensions 10x1x128x128 as seen in <xref
-    linkend="fig.hdfview_image"/>. To display the images, right click on the
-    data and choose settings as illustrated in <xref
-    linkend="fig.settings_hdfview"/>.</para>
-
-    <figure xml:id="fig.settings_hdfview">
-      <title>Setting for viewing HDF5 output images.</title>
-
-      <mediaobject>
-        <imageobject>
-          <imagedata fileref="figs/hdfview_image_view_setting.png" width="3in"/>
-        </imageobject>
-      </mediaobject>
-    </figure>
-
-    <para>The HDF5 files can also be read with Matlab. The images in the file
-    above could be read with:<programlisting>>> images = h5read('out.h5','/2012-05-11 10:57:48/data_0');
->> size(images)
-
-ans =
-
-   128   128     1    10
-
->> imagesc(images(:,:,1,1));colormap(gray) 
-</programlisting></para>
-  </appendix>
-
-  <appendix xml:id="futurefeatures">
-    <title>Future Features</title>
-
-    <para>The Gadgetron is evolving continuously and there are many things
-    still that we would like to include but have not yet had the time to do.
-    This appendix serves as a to-do list of features to be implement as we go
-    along.</para>
-
-    <itemizedlist>
-      <listitem>
-        <para>Branching Gadget chains. There is currently no ability to branch
-        and collect in the Gadgetron.</para>
-      </listitem>
-
-      <listitem>
-        <para>Persistent memory storage across Gadget chains.</para>
-      </listitem>
-
-      <listitem>
-        <para>Matlab Gadgets. It would be great to have a way to encapsulate
-        Matlab code in a Gadget similar to the way that the Python Gadgets
-        work.</para>
-      </listitem>
-    </itemizedlist>
-  </appendix>
-
-  <bibliography>
-    <biblioentry role="article" xml:id="hansen12">
-      <abbrev>HANSEN12</abbrev>
-
-      <biblioset role="article">
-        <authorgroup>
-          <author>
-            <personname><firstname>M. S.</firstname>
-            <surname>Hansen</surname></personname>
-          </author>
-
-          <author>
-            <personname><firstname>T. S.</firstname>
-            <surname>Sørensen</surname></personname>
-          </author>
-        </authorgroup>
-
-        <title>Gadgetron: An Open Source Framework for Medical Image
-        Reconstruction</title>
-      </biblioset>
-
-      <biblioset role="journal">
-        <title>Magnetic Resonance in Medicine</title>
-
-        <volumenum>Submitted</volumenum>
-
-        <pubdate>2012</pubdate>
-      </biblioset>
-    </biblioentry>
-
-    <biblioentry role="article" xml:id="hansen08">
-      <abbrev>HANSEN08</abbrev>
-
-      <biblioset role="article">
-        <authorgroup>
-          <author>
-            <personname><firstname>M. S.</firstname>
-            <surname>Hansen</surname></personname>
-          </author>
-
-          <author>
-            <personname><firstname>D.</firstname>
-            <surname>Atkinson</surname></personname>
-          </author>
-
-          <author>
-            <personname><firstname>T.
-            S.</firstname><surname>Sørensen</surname></personname>
-          </author>
-        </authorgroup>
-
-        <title>Cartesian SENSE and k-t SENSE reconstruction using commodity
-        graphics hardware</title>
-      </biblioset>
-
-      <biblioset role="journal">
-        <title>Magnetic Resonance in Medicine</title>
-
-        <volumenum>59</volumenum>
-
-        <issuenum>3</issuenum>
-
-        <pagenums>463-468</pagenums>
-
-        <pubdate>2008</pubdate>
-      </biblioset>
-    </biblioentry>
-
-    <biblioentry role="article" xml:id="sorensen08">
-      <abbrev>SANGILD08</abbrev>
-
-      <biblioset role="article">
-        <authorgroup>
-          <author>
-            <personname><firstname>T. S.</firstname>
-            <surname>Sørensen</surname></personname>
-          </author>
-
-          <author>
-            <personname><firstname>T.</firstname>
-            <surname>Schaeffter</surname></personname>
-          </author>
-
-          <author>
-            <personname><firstname>K. O.</firstname>
-            <surname>Noe</surname></personname>
-          </author>
-
-          <author>
-            <personname><firstname>M. S.</firstname>
-            <surname>Hansen</surname></personname>
-          </author>
-        </authorgroup>
-
-        <title>Accelerating the nonequispaced fast fourier transform on
-        commodity graphics hardware</title>
-      </biblioset>
-
-      <biblioset role="journal">
-        <title>IEEE Trans Med Imaging</title>
-
-        <volumenum>27</volumenum>
-
-        <issuenum>4</issuenum>
-
-        <pagenums>538-47</pagenums>
-
-        <pubdate>2008</pubdate>
-      </biblioset>
-    </biblioentry>
-
-    <biblioentry role="article" xml:id="sorensen09">
-      <abbrev>SANGILD09</abbrev>
-
-      <biblioset role="article">
-        <authorgroup>
-          <author>
-            <personname><firstname>T. S.</firstname>
-            <surname>Sørensen</surname></personname>
-          </author>
-
-          <author>
-            <personname><firstname>D.</firstname>
-            <surname>Atkinson</surname></personname>
-          </author>
-
-          <author>
-            <personname><firstname>T.</firstname>
-            <surname>Schaeffter</surname></personname>
-          </author>
-
-          <author>
-            <personname><firstname>M. S.</firstname>
-            <surname>Hansen</surname></personname>
-          </author>
-        </authorgroup>
-
-        <title>Real-time reconstruction of sensitivity encoded radial magnetic
-        resonance imaging using a graphics processing unit</title>
-      </biblioset>
-
-      <biblioset role="journal">
-        <title>IEEE Trans Med Imaging</title>
-
-        <volumenum>28</volumenum>
-
-        <issuenum>12</issuenum>
-
-        <pagenums>1974-85</pagenums>
-
-        <pubdate>2009</pubdate>
-      </biblioset>
-    </biblioentry>
-  </bibliography>
-</book>
diff --git a/gadgets/CMakeLists.txt b/gadgets/CMakeLists.txt
index 3e2154d..b08e1c2 100644
--- a/gadgets/CMakeLists.txt
+++ b/gadgets/CMakeLists.txt
@@ -3,54 +3,51 @@ if (WIN32)
   link_directories(${Boost_LIBRARY_DIRS})
 endif (WIN32)
 
-include_directories(   
+# These dependencies should have been found already to even get in here, but we play safe
+find_package(ACE)
+find_package(FFTW3 COMPONENTS single double)
+find_package(Ismrmrd)
+
+include_directories(
+  ${CMAKE_BINARY_DIR}/apps/gadgetron
   ${ACE_INCLUDE_DIR} 
   ${Boost_INCLUDE_DIR}
   ${FFTW3_INCLUDE_DIR}
   ${ISMRMRD_INCLUDE_DIR}
-  ${ISMRMRD_SCHEMA_DIR}
-  ${ISMRMRD_XSD_INCLUDE_DIR}
-  ${XSD_INCLUDE_DIR}
-  ${XERCESC_INCLUDE_DIR}
   ${CMAKE_SOURCE_DIR}/dependencies/tinyxml
   ${CMAKE_SOURCE_DIR}/apps/gadgetron 
   ${CMAKE_SOURCE_DIR}/toolboxes/core
   ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu
   ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/hostutils
+  ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/image
+  ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/algorithm
   ${CMAKE_SOURCE_DIR}/toolboxes/gadgettools
   )
 
-if (MKL_FOUND)
-    MESSAGE("MKL Found for gtPlus ... ")
-    list(APPEND EXTRA_MKL_LIBRARIES mkl_core)
-    if ( USE_OPENMP )
-        list(APPEND EXTRA_MKL_LIBRARIES mkl_intel_thread)
-    endif ( USE_OPENMP )
-
-    INCLUDE_DIRECTORIES( ${MKL_INCLUDE_DIR} )
-    LINK_DIRECTORIES( ${MKL_LIB_DIR} ${MKL_COMPILER_LIB_DIR} )
-endif (MKL_FOUND)
-
 add_subdirectory(mri_core)
+add_subdirectory(interventional_mri)
+add_subdirectory(util)
+
+if (ARMADILLO_FOUND)
+    add_subdirectory(epi)
+elseif (ARMADILLO_FOUND)
+    MESSAGE("Armadillo not found, NOT compiling EPI Gadgets")
+endif (ARMADILLO_FOUND)
 
-if (MKL_FOUND)
+if (MKL_FOUND OR ARMADILLO_FOUND)
     add_subdirectory(gtPlus)
-endif (MKL_FOUND)
+endif (MKL_FOUND OR ARMADILLO_FOUND)
 
 if (CUDA_FOUND)
   message("Cuda found, compiling gpu accelerated gadgets")
+  add_subdirectory(pmri)
+  add_subdirectory(grappa)
   add_subdirectory(radial)
   add_subdirectory(spiral)
-  add_subdirectory(sense)
-  add_subdirectory(grappa)
 else (CUDA_FOUND)
   message("Cuda NOT found, NOT compiling gpu accelerated gadgets")
 endif(CUDA_FOUND)
 
-#find_package(Octave)
-#if (OCTAVE_FOUND)
-#  add_subdirectory(octave)
-#endif(OCTAVE_FOUND)
 
 if (MATLAB_FOUND)
   message("Compilng MATLAB gadgets")
@@ -58,7 +55,8 @@ if (MATLAB_FOUND)
 endif(MATLAB_FOUND)
 
 # nest the find_package checks to provide more useful error messages
-find_package(Boost COMPONENTS python)
+find_package(Boost COMPONENTS python system thread REQUIRED)
+
 if (Boost_PYTHON_FOUND AND PYTHONLIBS_FOUND AND NUMPY_FOUND)
     MESSAGE("PYTHON_INCLUDE_DIRS: ${PYTHON_INCLUDE_DIRS}")
     MESSAGE("PYTHON_LIBRARIES: ${PYTHON_LIBRARIES}")
@@ -86,4 +84,7 @@ else(DCMTK_FOUND)
 endif(DCMTK_FOUND)
 
 add_subdirectory(cartesian)
-add_subdirectory(moco)
+
+if(ARMADILLO_FOUND)
+    add_subdirectory(moco)
+endif(ARMADILLO_FOUND)
diff --git a/gadgets/cartesian/CMakeLists.txt b/gadgets/cartesian/CMakeLists.txt
index 1960193..bbeb717 100644
--- a/gadgets/cartesian/CMakeLists.txt
+++ b/gadgets/cartesian/CMakeLists.txt
@@ -3,21 +3,31 @@ IF (WIN32)
 ENDIF (WIN32)
 
 find_package(Ismrmrd REQUIRED)
-find_package(XSD REQUIRED)
-find_package(XercesC REQUIRED)
 
 include_directories(
   ${CMAKE_SOURCE_DIR}/gadgets/mri_core
-  ${ISMRMRD_XSD_INCLUDE_DIR}
   )
 
 add_library(gadgetron_cartesian SHARED 
+  gadgetron_cartesian_export.h
+  CartesianToGenericGadget.h
   CartesianToGenericGadget.cpp
-  ${ISMRMRD_XSD_SOURCE})
+)
 
-target_link_libraries(gadgetron_cartesian cpucore 
-  ${ISMRMRD_LIBRARIES} ${XERCESC_LIBRARIES}
+set_target_properties(gadgetron_cartesian PROPERTIES VERSION ${GADGETRON_VERSION_STRING} SOVERSION ${GADGETRON_SOVERSION})
+
+target_link_libraries(gadgetron_cartesian gadgetron_toolbox_cpucore 
+  gadgetron_gadgetbase
+  ${ISMRMRD_LIBRARIES}
   optimized ${ACE_LIBRARIES} debug ${ACE_DEBUG_LIBRARY}
   )
 
-install (TARGETS gadgetron_cartesian DESTINATION lib)
+if(ARMADILLO_FOUND)
+    target_link_libraries(gadgetron_cartesian gadgetron_toolbox_cpucore_math )
+endif(ARMADILLO_FOUND)
+
+install (FILES  gadgetron_cartesian_export.h
+                CartesianToGenericGadget.h
+                DESTINATION include COMPONENT main)
+
+install (TARGETS gadgetron_cartesian DESTINATION lib COMPONENT main)
diff --git a/gadgets/cartesian/CartesianToGenericGadget.cpp b/gadgets/cartesian/CartesianToGenericGadget.cpp
index f926922..6f16d5f 100644
--- a/gadgets/cartesian/CartesianToGenericGadget.cpp
+++ b/gadgets/cartesian/CartesianToGenericGadget.cpp
@@ -1,5 +1,5 @@
 #include "CartesianToGenericGadget.h"
-#include "GadgetIsmrmrdReadWrite.h"
+#include "ismrmrd/xml.h"
 
 namespace Gadgetron{
 
@@ -12,31 +12,29 @@ namespace Gadgetron{
   
   int CartesianToGenericGadget::process_config(ACE_Message_Block* mb)
   {
-    boost::shared_ptr<ISMRMRD::ismrmrdHeader> cfg = parseIsmrmrdXMLHeader(std::string(mb->rd_ptr()));
-
-    if( cfg.get() == 0x0 ){
-      GADGET_DEBUG1("Unable to parse Ismrmrd header\n");
-      return GADGET_FAIL;
-    }
-
-    ISMRMRD::ismrmrdHeader::encoding_sequence e_seq = cfg->encoding();
-
-    if (e_seq.size() != 1) {
-      GADGET_DEBUG2("Number of encoding spaces: %d\n", e_seq.size());
+    // Get the Ismrmrd header
+    //
+    ISMRMRD::IsmrmrdHeader h;
+    ISMRMRD::deserialize(mb->rd_ptr(),h);
+    
+    
+    if (h.encoding.size() != 1) {
       GADGET_DEBUG1("This Gadget only supports one encoding space\n");
       return GADGET_FAIL;
     }
+    
+    // Get the encoding space and trajectory description
+    ISMRMRD::EncodingSpace e_space = h.encoding[0].encodedSpace;
+    ISMRMRD::EncodingSpace r_space = h.encoding[0].reconSpace;
+    ISMRMRD::EncodingLimits e_limits = h.encoding[0].encodingLimits;
 
     // Enforcement of the matrix size being a multiple of the "warp size"
     warp_size_ = get_int_value(std::string("matrix_size_as_a_multipluple_of").c_str());
 
-    ISMRMRD::encodingSpaceType e_space = (*e_seq.begin()).encodedSpace();
-    ISMRMRD::encodingLimitsType e_limits = (*e_seq.begin()).encodingLimits();
-
-    matrix_size_.push_back( (e_space.matrixSize().x()+warp_size_-1)/warp_size_*warp_size_);
-    matrix_size_.push_back( (e_space.matrixSize().y()+warp_size_-1)/warp_size_*warp_size_);
+    matrix_size_.push_back( (e_space.matrixSize.x+warp_size_-1)/warp_size_*warp_size_);
+    matrix_size_.push_back( (e_space.matrixSize.y+warp_size_-1)/warp_size_*warp_size_);
 
-    center_phase_ = e_limits.kspace_encoding_step_1().get().center();
+    center_phase_ = e_limits.kspace_encoding_step_1 ? e_limits.kspace_encoding_step_1->center : 0;
 
     return GADGET_OK;
   }
@@ -48,7 +46,7 @@ namespace Gadgetron{
     // Noise should have been consumed by the noise adjust, but just in case...
     //
 
-    bool is_noise = ISMRMRD::FlagBit(ISMRMRD::ACQ_IS_NOISE_MEASUREMENT).isSet(m1->getObjectPtr()->flags);
+    bool is_noise = m1->getObjectPtr()->isFlagSet(ISMRMRD::ISMRMRD_ACQ_IS_NOISE_MEASUREMENT);
     if (is_noise) {
       m1->release();
       return GADGET_OK;
diff --git a/gadgets/cartesian/CartesianToGenericGadget.h b/gadgets/cartesian/CartesianToGenericGadget.h
index e6f92e5..163e600 100644
--- a/gadgets/cartesian/CartesianToGenericGadget.h
+++ b/gadgets/cartesian/CartesianToGenericGadget.h
@@ -7,7 +7,7 @@
 #include "GadgetMRIHeaders.h"
 #include "hoNDArray.h"
 
-#include <ismrmrd.h>
+#include <ismrmrd/ismrmrd.h>
 #include <vector>
 #include <complex>
 #include <boost/shared_ptr.hpp>
@@ -20,7 +20,6 @@ namespace Gadgetron{
 
   public:
     GADGET_DECLARE(CartesianToGenericGadget);
-
     CartesianToGenericGadget();
     virtual ~CartesianToGenericGadget();
 
diff --git a/gadgets/dicom/CMakeLists.txt b/gadgets/dicom/CMakeLists.txt
index 0639800..4192a70 100644
--- a/gadgets/dicom/CMakeLists.txt
+++ b/gadgets/dicom/CMakeLists.txt
@@ -1,15 +1,36 @@
 # DCMTK-necessary preprocessor flags
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_CONFIG_H -D_REENTRANT -D_OSF_SOURCE")
 
-set(GT_DICOM_LIBRARIES
-    z
-    ${DCMTK_dcmdata_LIBRARY}
-    ${DCMTK_oflog_LIBRARY}
-    ${DCMTK_ofstd_LIBRARY}
-    m
-    #rt
-    #nsl
-    pthread)
+set(Boost_NO_BOOST_CMAKE ON)
+find_package(Boost COMPONENTS date_time REQUIRED)
+
+if(WIN32)
+  link_directories(${Boost_LIBRARY_DIRS})
+endif(WIN32)
+
+if (WIN32)
+    set(GT_DICOM_LIBRARIES
+        #z
+        ${DCMTK_dcmdata_LIBRARY}
+        ${DCMTK_oflog_LIBRARY}
+        ${DCMTK_ofstd_LIBRARY}
+        #m
+        #rt
+        #nsl
+        #pthread
+        )
+else (WIN32)
+    set(GT_DICOM_LIBRARIES
+        z
+        ${DCMTK_dcmdata_LIBRARY}
+        ${DCMTK_oflog_LIBRARY}
+        ${DCMTK_ofstd_LIBRARY}
+        m
+        #rt
+        #nsl
+        pthread
+        )
+endif (WIN32)
 
 # sanity check:
 #message("DCMTK ${DCMTK_HOME}")
@@ -17,27 +38,67 @@ set(GT_DICOM_LIBRARIES
 #message("Libraries: ${GT_DICOM_LIBRARIES}")
 
 include_directories(
-    ${CMAKE_SOURCE_DIR}/gadgets/mri_core    # for GadgetIsmrmrdReadWrite.h
-    ${DCMTK_INCLUDE_DIRS})
+        ${CMAKE_SOURCE_DIR}/gadgets/mri_core    # for GadgetIsmrmrdReadWrite.h
+        ${DCMTK_INCLUDE_DIRS}
+        ${CMAKE_SOURCE_DIR}/toolboxes/mri/pmri/gpu
+        ${CMAKE_SOURCE_DIR}/toolboxes/nfft/gpu
+        ${CMAKE_SOURCE_DIR}/toolboxes/core
+        ${CMAKE_SOURCE_DIR}/toolboxes/core/gpu
+        ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu
+        ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/image
+        ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/hostutils
+        ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/math
+        ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/image
+        ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/algorithm
+        ${CMAKE_SOURCE_DIR}/toolboxes/operators
+        ${CMAKE_SOURCE_DIR}/toolboxes/operators/cpu
+        ${CMAKE_SOURCE_DIR}/toolboxes/solvers
+        ${CMAKE_SOURCE_DIR}/toolboxes/solvers/cpu
+        ${CMAKE_SOURCE_DIR}/gadgets/core
+        ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/math
+        ${CMAKE_SOURCE_DIR}/toolboxes/registration/optical_flow
+        ${CMAKE_SOURCE_DIR}/toolboxes/gtplus
+        ${CMAKE_SOURCE_DIR}/toolboxes/gtplus/util
+        ${CMAKE_SOURCE_DIR}/toolboxes/gtplus/workflow
+        ${CMAKE_SOURCE_DIR}/toolboxes/gtplus/algorithm
+        ${CMAKE_SOURCE_DIR}/toolboxes/gtplus/solver
+        ${CMAKE_SOURCE_DIR}/toolboxes/gadgettools
+        ${CMAKE_SOURCE_DIR}/apps/gadgetron
+        ${CMAKE_SOURCE_DIR}/apps/matlab
+        ${CMAKE_SOURCE_DIR}/gadgets/mri_core 
+        ${CMAKE_SOURCE_DIR}/gadgets/gtPlus
+        ${ARMADILLO_INCLUDE_DIRS}
+        )
+
+add_library(gadgetron_dicom SHARED
+    DicomFinishGadget.h DicomFinishGadget.cpp
+    DicomFinishAttribGadget.h DicomFinishAttribGadget.cpp
+    DicomImageWriter.h DicomImageWriter.cpp)
 
-set(GT_DICOM_LIB gadgetron_dicom)
-add_library(${GT_DICOM_LIB} SHARED
-    DicomFinishGadget.cpp
-    DicomImageWriter.cpp
-    ${ISMRMRD_XSD_SOURCE})
+set_target_properties(gadgetron_dicom PROPERTIES VERSION ${GADGETRON_VERSION_STRING} SOVERSION ${GADGETRON_SOVERSION})
 
 target_link_libraries(
-    ${GT_DICOM_LIB}
+  gadgetron_dicom
+  gadgetron_gadgetbase
+    gadgetron_toolbox_cpucore 
     ${ISMRMRD_LIBRARIES}
     optimized ${ACE_LIBRARIES}
     debug ${ACE_DEBUG_LIBRARY}
-    ${XERCESC_LIBRARIES}
-    ${GT_DICOM_LIBRARIES})
+    ${Boost_LIBRARIES}
+    ${GT_DICOM_LIBRARIES} )
+
+if(ARMADILLO_FOUND)
+    target_link_libraries(gadgetron_dicom gadgetron_toolbox_cpucore_math )
+endif(ARMADILLO_FOUND)
+
+if(MKL_FOUND)
+    target_link_libraries(gadgetron_dicom gadgetron_toolbox_gtplus )
+endif(MKL_FOUND)
 
 install(
     FILES DicomFinishGadget.h DicomImageWriter.h gadgetron_dicom_export.h
-    DESTINATION include)
+    DESTINATION include COMPONENT main)
 
-install(TARGETS ${GT_DICOM_LIB} DESTINATION lib)
+install(TARGETS gadgetron_dicom DESTINATION lib COMPONENT main)
 
-install(FILES dicom.xml DESTINATION config)
+install(FILES dicom.xml DESTINATION ${GADGETRON_INSTALL_CONFIG_PATH} COMPONENT main)
diff --git a/gadgets/dicom/DicomFinishAttribGadget.cpp b/gadgets/dicom/DicomFinishAttribGadget.cpp
new file mode 100644
index 0000000..cf9a43b
--- /dev/null
+++ b/gadgets/dicom/DicomFinishAttribGadget.cpp
@@ -0,0 +1,897 @@
+#include <vector>
+#include "boost/date_time/gregorian/gregorian.hpp"            
+
+#include "DicomFinishAttribGadget.h"
+#include "GtPlusDefinition.h"
+#include "gtPlusIOAnalyze.h"
+#include "ismrmrd/xml.h"
+
+// Used for windowing using short ints
+#define PIX_RANGE_MAX    (+32767)
+#define PIX_RANGE_MIN    (-32768)
+
+
+// Writes a DICOM string value at the given location in the header
+// Saves keystrokes
+#define WRITE_DCM_STRING(k, s)    \
+    do {                                                                    \
+        status = dataset->putAndInsertString(k, s);            \
+        if (!status.good()) {                                               \
+            GADGET_DEBUG2("Failed to insert DICOM field (0x%04X,0x%04X) at "\
+                "line %u\n", k.getGroup(), k.getElement(), __LINE__);       \
+            return GADGET_FAIL;                                             \
+        }                                                                   \
+    } while (0)
+
+namespace Gadgetron {
+
+template <typename T>
+int DicomFinishAttribGadget<T>::process_config(ACE_Message_Block* mb)
+{
+    OFCondition status;
+    DcmTagKey key;
+    long BUFSIZE = 1024;
+    char *buf = new char[BUFSIZE];  // used for writing numbers as strings in DCMTK
+
+    ISMRMRD::IsmrmrdHeader h;
+    deserialize(mb->rd_ptr(), h);
+
+    // Ensure DICOM dictionary is loaded
+    if (!dcmDataDict.isDictionaryLoaded()) {
+        GADGET_DEBUG1("Dictionary not loaded!  Set DCMDICTPATH\n");
+        return GADGET_FAIL;
+    }
+
+    ISMRMRD::ExperimentalConditions exp_cond = h.experimentalConditions;
+
+    if (h.subjectInformation) {
+        GADGET_DEBUG1("Header missing SubjectInformation parameters\n");
+        return GADGET_FAIL;
+    }
+
+    ISMRMRD::SubjectInformation patient_info = *h.subjectInformation;
+
+    if (h.studyInformation) {
+      GADGET_DEBUG1("Header missing StudyInformation parameters\n");
+      return GADGET_FAIL;
+    }
+
+    ISMRMRD::StudyInformation study_info = *h.studyInformation;
+
+    if (h.measurementInformation) {
+        GADGET_DEBUG1("Header missing MeasurementInformation parameters\n");
+        return GADGET_FAIL;
+    }
+
+    ISMRMRD::MeasurementInformation meas_info = *h.measurementInformation;
+
+    if (h.acquisitionSystemInformation) {
+        GADGET_DEBUG1("Header missing AcquisitionSystemInformation parameters\n");
+        return GADGET_FAIL;
+    }
+
+    ISMRMRD::AcquisitionSystemInformation sys_info = *h.acquisitionSystemInformation;
+
+    if (h.sequenceParameters) {
+        GADGET_DEBUG1("Header missing SequenceTiming parameters\n");
+        return GADGET_FAIL;
+    }
+
+    ISMRMRD::SequenceParameters seq_info = *h.sequenceParameters;
+
+    if (h.encoding.size() == 0) {
+      GADGET_DEBUG2("Number of encoding spaces: %d\n", h.encoding.size());
+      GADGET_DEBUG1("This Gadget needs an encoding description\n");
+      return GADGET_FAIL;
+    }
+
+
+    ISMRMRD::EncodingSpace e_space = h.encoding[0].encodedSpace;
+    ISMRMRD::EncodingSpace r_space = h.encoding[0].reconSpace;
+    ISMRMRD::EncodingLimits e_limits = h.encoding[0].encodingLimits;
+
+    DcmDataset *dataset = dcmFile.getDataset();
+    DcmMetaInfo *metainfo = dcmFile.getMetaInfo();
+
+
+    // Store initial Series Number for later
+    if (meas_info.initialSeriesNumber) {
+      this->initialSeriesNumber = (long)*meas_info.initialSeriesNumber;
+    } else {
+      this->initialSeriesNumber = 0;
+    }
+
+
+    // Set the Application Entity Title in the DICOM Meta Info section
+    // The rest of the Meta Info will be automatically populated by DCMTK
+    if (sys_info.stationName) {
+        status = metainfo->putAndInsertString(DcmTagKey(0x0002,0x0016),
+                sys_info.stationName->c_str());
+        if (!status.good()) {
+            GADGET_DEBUG1("Failed to set AET in MetaInfo\n");
+            return GADGET_FAIL;
+        }
+    } else {
+        status = metainfo->putAndInsertString(DcmTagKey(0x0002,0x0016), "none");
+        if (!status.good()) {
+            GADGET_DEBUG1("Failed to set AET in MetaInfo\n");
+            return GADGET_FAIL;
+        }
+    }
+
+    // Group Length
+    key.set(0x0008, 0x0000);
+    status = dataset->insertEmptyElement(key);
+    if (status.bad()) {
+        GADGET_DEBUG1("Failed to write 0x0008 Group Length\n");
+        return GADGET_FAIL;
+    }
+
+    // Specific Character Set
+    key.set(0x0008, 0x0005);
+    WRITE_DCM_STRING(key, "ISO_IR 100");
+
+    // Image Type
+    // ORIGINAL or DERIVED describes origin of pixel data
+    // PRIMARY or SECONDARY describes image creation time (during or after exam)
+    // OTHER, etc. are implementation-specific
+    key.set(0x0008, 0x0008);
+    WRITE_DCM_STRING(key, "ORIGINAL\\PRIMARY\\OTHER");
+
+    // SOPClassUID
+    key.set(0x0008, 0x0016);
+    WRITE_DCM_STRING(key, UID_MRImageStorage);
+
+    // Study Date
+    key.set(0x0008, 0x0020);
+    // ACE_OS::snprintf(buf, BUFSIZE, "%04d%02d%02d", study_info.studyDate().year(), study_info.studyDate().month(), study_info.studyDate().day());
+    ACE_OS::snprintf(buf, BUFSIZE, "%04d%02d%02d", 2014, 3, 21);
+    WRITE_DCM_STRING(key, buf);
+
+    // Series Date
+    key.set(0x0008, 0x0021);
+    // ACE_OS::snprintf(buf, BUFSIZE, "%04d%02d%02d", meas_info.seriesDate().year(), meas_info.seriesDate().month(), meas_info.seriesDate().day());
+    ACE_OS::snprintf(buf, BUFSIZE, "%04d%02d%02d", 2014, 3, 21);
+    WRITE_DCM_STRING(key, buf);
+    // Acquisition Date
+    key.set(0x0008, 0x0022);
+    WRITE_DCM_STRING(key, buf);
+    // Content Date
+    key.set(0x0008, 0x0023);
+    WRITE_DCM_STRING(key, buf);
+
+    // Study Time
+    key.set(0x0008, 0x0030);
+    //ACE_OS::snprintf(buf, BUFSIZE, "%02d%02d%02d", study_info.studyTime().hours(), study_info.studyTime().minutes(), (int)study_info.studyTime().seconds());
+    ACE_OS::snprintf(buf, BUFSIZE, "%02d%02d%02d", 12, 12, 12);
+    WRITE_DCM_STRING(key, buf);
+
+    // Series Time
+    key.set(0x0008, 0x0031);
+    // ACE_OS::snprintf(buf, BUFSIZE, "%02d%02d%02d", meas_info.seriesTime().hours(), meas_info.seriesTime().minutes(), (int)meas_info.seriesTime().seconds());
+    ACE_OS::snprintf(buf, BUFSIZE, "%02d%02d%02d", 12, 12, 12);
+    WRITE_DCM_STRING(key, buf);
+
+    // Acquisition Time
+    key.set(0x0008, 0x0032);
+    WRITE_DCM_STRING(key, buf);
+
+    // Content Time
+    key.set(0x0008, 0x0033);
+    WRITE_DCM_STRING(key, buf);
+
+    // Accession Number
+    key.set(0x0008, 0x0050);
+    if (study_info.accessionNumber) {
+        ACE_OS::snprintf(buf, BUFSIZE, "%ld", *study_info.accessionNumber);
+        WRITE_DCM_STRING(key, buf);
+    } else {
+        WRITE_DCM_STRING(key, 0);
+    }
+
+    // Modality
+    // TODO: this is hardcoded!!
+    key.set(0x0008, 0x0060);
+    WRITE_DCM_STRING(key, "MR");
+
+    // Manufacturer
+    key.set(0x0008, 0x0070);
+    if (sys_info.systemVendor) {
+        WRITE_DCM_STRING(key, sys_info.systemVendor->c_str());
+    } else {
+        WRITE_DCM_STRING(key, "UNKNOWN");
+    }
+
+    // Institution Name
+    key.set(0x0008, 0x0080);
+    if (sys_info.institutionName) {
+        WRITE_DCM_STRING(key, sys_info.institutionName->c_str());
+    } else {
+        WRITE_DCM_STRING(key, "UNKNOWN");
+    }
+
+    // Referring Physician's Name
+    key.set(0x0008, 0x0090);
+    if (study_info.referringPhysicianName) {
+        WRITE_DCM_STRING(key, study_info.referringPhysicianName->c_str());
+    } else {
+        WRITE_DCM_STRING(key, "");
+    }
+
+    // Station Name
+    key.set(0x0008, 0x1010);
+    if (sys_info.stationName) {
+        WRITE_DCM_STRING(key, sys_info.stationName->c_str());
+    } else {
+        WRITE_DCM_STRING(key, "");
+    }
+
+    // Study Description
+    key.set(0x0008, 0x1030);
+    if (study_info.studyDescription) {
+      WRITE_DCM_STRING(key, study_info.studyDescription->c_str());
+    } else {
+      WRITE_DCM_STRING(key, "");
+    }
+
+    // Series Description
+    key.set(0x0008, 0x103E);
+    if (meas_info.seriesDescription) {
+        WRITE_DCM_STRING(key, meas_info.seriesDescription->c_str());
+    } else {
+        WRITE_DCM_STRING(key, "");
+    }
+
+    // Manufacturer's Model Name
+    key.set(0x0008, 0x1090);
+    if (sys_info.systemModel) {
+        WRITE_DCM_STRING(key, sys_info.systemModel->c_str());
+    } else {
+        WRITE_DCM_STRING(key, "");
+    }
+
+    // Referenced SOP Instance UIDs
+    std::vector<ISMRMRD::ReferencedImageSequence> refs(meas_info.referencedImageSequence);
+    if (refs.size() > 0) {
+        DcmItem *ref_sequence;
+        std::vector<ISMRMRD::ReferencedImageSequence>::iterator it;
+        for (it = refs.begin(); it != refs.end(); ++it) {
+            std::string ref_uid(it->referencedSOPInstanceUID);
+            if (ref_uid.length() > 0) {   // Only write non-empty strings
+                if (dataset->findOrCreateSequenceItem(key, ref_sequence, -2).good()) {
+                    // Write the Referenced SOPClassUID (MRImageStorage)
+                    key.set(0x0008, 0x1150);
+                    ((DcmDataset *)ref_sequence)->putAndInsertString(key, UID_MRImageStorage);
+                    // Write the Referenced SOPInstanceUID
+                    key.set(0x0008, 0x1155);
+                    ((DcmDataset *)ref_sequence)->putAndInsertString(key, ref_uid.c_str());
+                }
+            }
+        }
+    }
+
+    // Group Length
+    key.set(0x0010, 0x0000);
+    status = dataset->insertEmptyElement(key);
+    if (!status.good()) {
+        GADGET_DEBUG1("Failed to write 0x0010 Group Length\n");
+        return GADGET_FAIL;
+    }
+
+    // Patient Name
+    key.set(0x0010, 0x0010);
+    if (patient_info.patientName) {
+        WRITE_DCM_STRING(key, patient_info.patientName->c_str());
+    } else {
+        WRITE_DCM_STRING(key, "None");
+    }
+
+    // Patient ID
+    key.set(0x0010, 0x0020);
+    if (patient_info.patientID) {
+        WRITE_DCM_STRING(key, patient_info.patientID->c_str());
+    } else {
+        WRITE_DCM_STRING(key, "0");
+    }
+
+    // Patient Birthdate
+    key.set(0x0010, 0x0030);
+    if (patient_info.patientBirthdate) {
+        //ACE_OS::snprintf(buf, BUFSIZE, "%04d%02d%02d", patient_info.patientBirthdate().get().year(),
+        //        patient_info.patientBirthdate().get().month(), patient_info.patientBirthdate().get().day());
+        //WRITE_DCM_STRING(key, buf);
+    } else {
+        status = dataset->insertEmptyElement(key);
+    }
+
+    // Patient Sex
+    key.set(0x0010, 0x0040);
+    if (patient_info.patientGender) {
+        if (*patient_info.patientGender == "O") {
+            status = dataset->insertEmptyElement(key);
+        }
+        else {
+            WRITE_DCM_STRING(key, patient_info.patientGender->c_str());
+        }
+    } else {
+        WRITE_DCM_STRING(key, "");
+    }
+
+    // Patient Age
+    key.set(0x0010, 0x1010);
+    if (patient_info.patientBirthdate && meas_info.seriesDate) {
+        boost::gregorian::date bday(boost::gregorian::from_simple_string(patient_info.patientBirthdate.get()));
+        boost::gregorian::date seriesDate(boost::gregorian::from_simple_string(meas_info.seriesDate.get()));
+
+        boost::gregorian::days age = seriesDate - bday;
+
+        long age_in_years = age.days() / 365;
+
+        ACE_OS::snprintf(buf, BUFSIZE, "%03ldY", age_in_years);
+        WRITE_DCM_STRING(key, buf);
+    } else {
+        WRITE_DCM_STRING(key, "000Y");
+    }
+
+    // Patient Weight
+    key.set(0x0010, 0x1030);
+    if (patient_info.patientWeight_kg) {
+        ACE_OS::snprintf(buf, BUFSIZE, "%f", *patient_info.patientWeight_kg);
+        WRITE_DCM_STRING(key, buf);
+    } else {
+        WRITE_DCM_STRING(key, "0.0");
+    }
+
+    // Group Length
+    key.set(0x0018, 0x0000);
+    status = dataset->insertEmptyElement(key);
+    if (!status.good()) {
+        GADGET_DEBUG1("Failed to write 0x0018 Group Length\n");
+        return GADGET_FAIL;
+    }
+
+    // Scanning Sequence, Sequence Variant, Scan Options, Acquisition Type
+    std::string scanningSequence("RM");
+    std::string sequenceVariant("NONE");
+    std::string scanOptions("NONE");
+    std::string mrAcquisitionType("2D");
+    if (h.userParameters) {
+        ISMRMRD::UserParameters user_params = h.userParameters.get();
+        std::vector<ISMRMRD::UserParameterString> strings = user_params.userParameterString;
+        std::vector<ISMRMRD::UserParameterString>::iterator it;
+
+        for (it = strings.begin(); it != strings.end(); ++it) {
+            if (it->name == "scanningSequence") {
+                scanningSequence = it->value;
+            } else if (it->name == "sequenceVariant") {
+                sequenceVariant = it->value;
+            } else if (it->name == "scanOptions") {
+                scanOptions = it->value;
+            } else if (it->name == "mrAcquisitionType") {
+                mrAcquisitionType = it->value;
+            }
+        }
+    }
+    key.set(0x0018, 0x0020);
+    WRITE_DCM_STRING(key, scanningSequence.c_str());
+    key.set(0x0018, 0x0021);
+    WRITE_DCM_STRING(key, sequenceVariant.c_str());
+    key.set(0x0018, 0x0022);
+    WRITE_DCM_STRING(key, scanOptions.c_str());
+    key.set(0x0018, 0x0023);
+    WRITE_DCM_STRING(key, mrAcquisitionType.c_str());
+
+    // Angio Flag
+    // TODO: hardcoded
+    key.set(0x0018, 0x0025);
+    WRITE_DCM_STRING(key, "N");
+
+    // Slice Thickness
+    // This will need updated if the "reconSpace.fieldOfView_mm.z" field
+    // is changed in the ISMRMRD populating code (client)
+    key.set(0x0018, 0x0050);
+    ACE_OS::snprintf(buf, BUFSIZE, "%f", r_space.fieldOfView_mm.z);
+    WRITE_DCM_STRING(key, buf);
+
+    // Repetition Time
+    key.set(0x0018, 0x0080);
+    ACE_OS::snprintf(buf, BUFSIZE, "%f", seq_info.TR.front());
+    WRITE_DCM_STRING(key, buf);
+
+    // Echo Time
+    key.set(0x0018, 0x0081);
+    ACE_OS::snprintf(buf, BUFSIZE, "%f", seq_info.TE.front());
+    WRITE_DCM_STRING(key, buf);
+
+    // Inversion Time
+    key.set(0x0018, 0x0082);
+    ACE_OS::snprintf(buf, BUFSIZE, "%f", seq_info.TI.front());
+    WRITE_DCM_STRING(key, buf);
+
+    // Flip Angle
+    key.set(0x0018, 0x1314);
+    ACE_OS::snprintf(buf, BUFSIZE, "%ld", (long)seq_info.flipAngle_deg.front());
+    WRITE_DCM_STRING(key, buf);
+
+    // Imaging Frequency in tenths of MHz ???
+    key.set(0x0018, 0x0084);
+    ACE_OS::snprintf(buf, BUFSIZE, "%f", (float)exp_cond.H1resonanceFrequency_Hz / 10000000.);
+    WRITE_DCM_STRING(key, buf);
+
+    // Magnetic Field Strength (T)
+    key.set(0x0018, 0x0087);
+    if (sys_info.systemFieldStrength_T) {
+        ACE_OS::snprintf(buf, BUFSIZE, "%f", *sys_info.systemFieldStrength_T);
+        WRITE_DCM_STRING(key, buf);
+    } else {
+        WRITE_DCM_STRING(key, "3.0");
+    }
+
+    // Spacing Between Slices
+    key.set(0x0018, 0x0088);
+    ACE_OS::snprintf(buf, BUFSIZE, "%f", r_space.fieldOfView_mm.z);
+    WRITE_DCM_STRING(key, buf);
+
+    // Echo Train Length
+    if (h.encoding[0].echoTrainLength) {
+        key.set(0x0018, 0x0091);
+        ACE_OS::snprintf(buf, BUFSIZE, "%ld", (long)*h.encoding[0].echoTrainLength);
+        WRITE_DCM_STRING(key, buf);
+    } else {
+        WRITE_DCM_STRING(key, "1");
+    }
+
+    // Percent Sampling
+    // TODO: hardcoded
+    key.set(0x0018, 0x0093);
+    WRITE_DCM_STRING(key, "100");
+
+    // Percent Phase FOV
+    // TODO: hardcoded
+    key.set(0x0018, 0x0094);
+    WRITE_DCM_STRING(key, "100");
+
+    // Protocol Name
+    if (meas_info.protocolName) {
+        key.set(0x0018, 0x1030);
+        WRITE_DCM_STRING(key, meas_info.protocolName.get().c_str());
+    } else {
+        WRITE_DCM_STRING(key, "");
+    }
+
+    // Trigger Time - TODO: use Image Meta Data
+    key.set(0x0018, 0x1060);
+    WRITE_DCM_STRING(key, "0.0");
+
+    // Reconstruction Diameter (FOV) - TODO: ?
+    key.set(0x0018, 0x1100);
+
+    // Frequency Encoding Direction - TODO: use Image Meta Data
+    key.set(0x0018, 0x1312);
+    WRITE_DCM_STRING(key, "ROW");
+
+    // Patient Position
+    key.set(0x0018, 0x5100);
+    WRITE_DCM_STRING(key, meas_info.patientPosition.c_str());
+
+    /****************************************/
+    // Group Length
+    key.set(0x0020, 0x0000);
+    status = dataset->insertEmptyElement(key);
+    if (!status.good()) {
+        GADGET_DEBUG1("Failed to write 0x0020 Group Length\n");
+        return GADGET_FAIL;
+    }
+
+    // Study Instance UID
+    key.set(0x0020, 0x000D);
+    if (study_info.studyInstanceUID) {
+      WRITE_DCM_STRING(key, study_info.studyInstanceUID->c_str());
+    }
+
+    // Study ID
+    if (study_info.studyID) {
+        key.set(0x0020, 0x0010);
+        WRITE_DCM_STRING(key, study_info.studyID->c_str());
+    } else {
+        WRITE_DCM_STRING(key, "0");
+    }
+
+    // Store Series Instance UID for later
+    if (meas_info.seriesInstanceUIDRoot) {
+      seriesIUIDRoot = *meas_info.seriesInstanceUIDRoot;
+    }
+
+    // Frame of Reference UID
+    if (meas_info.frameOfReferenceUID) {
+        key.set(0x0020, 0x0052);
+        WRITE_DCM_STRING(key, meas_info.frameOfReferenceUID->c_str());
+    }
+
+    /****************************************/
+    // Group Length
+    key.set(0x0028, 0x0000);
+    status = dataset->insertEmptyElement(key);
+    if (!status.good()) {
+        GADGET_DEBUG1("Failed to write 0x0028 Group Length\n");
+        return GADGET_FAIL;
+    }
+
+    // Samples Per Pixel
+    key.set(0x0028, 0x0002);
+    // TODO: hardcoded
+    WRITE_DCM_STRING(key, "1");
+
+    // Photometric Interpretation
+    key.set(0x0028, 0x0004);
+    // TODO: hardcoded
+    WRITE_DCM_STRING(key, "MONOCHROME2");
+
+    // Pixel Spacing (Array of len 2)
+    key.set(0x0028, 0x0030);
+    float pixel_spacing_X = r_space.fieldOfView_mm.x / r_space.matrixSize.x;
+    float pixel_spacing_Y = r_space.fieldOfView_mm.y / r_space.matrixSize.y;
+    ACE_OS::snprintf(buf, BUFSIZE, "%.3f\\%.3f", pixel_spacing_X, pixel_spacing_Y);
+    WRITE_DCM_STRING(key, buf);
+
+    // Bits Allocated
+    key.set(0x0028, 0x0100);
+    WRITE_DCM_STRING(key, "16");
+    // Bits Stored
+    key.set(0x0028, 0x0101);
+    WRITE_DCM_STRING(key, "16");
+    // High Bit
+    key.set(0x0028, 0x0102);
+    WRITE_DCM_STRING(key, "15");
+    // Pixel Representation
+    key.set(0x0028, 0x0103);
+    WRITE_DCM_STRING(key, "1");
+
+    //GADGET_DEBUG1("Finished populating DICOM fields\n");
+
+    /* clean up the buffer we created for ACE_OS::snprintf */
+    delete[] buf;
+
+    return GADGET_OK;
+}
+
+template <typename T>
+int DicomFinishAttribGadget<T>::process(GadgetContainerMessage<ISMRMRD::ImageHeader>* m1, GadgetContainerMessage< hoNDArray< T > >* m2, GadgetContainerMessage<ISMRMRD::MetaContainer>* m3)
+{
+    if (!this->controller_) {
+        ACE_DEBUG( (LM_DEBUG,
+                    ACE_TEXT("Cannot return result to controller, no controller set")) );
+        return -1;
+    }
+
+    // --------------------------------------------------
+
+    ISMRMRD::MetaContainer* img_attrib = m3->getObjectPtr();
+
+    // image data role
+    size_t n;
+
+    size_t num = img_attrib->length(GTPLUS_DATA_ROLE);
+
+    std::vector<std::string> dataRole;
+    if ( num == 0 )
+    {
+        dataRole.push_back("Image");
+    }
+    else
+    {
+        dataRole.resize(num);
+        for ( n=0; n<num; n++ )
+        {
+            dataRole[n] = std::string( img_attrib->as_str(GTPLUS_DATA_ROLE, n) );
+        }
+    }
+
+    long imageNumber = img_attrib->as_long(GTPLUS_IMAGENUMBER, 0);
+
+    long cha, slc, e2, con, phs, rep, set, ave;
+    cha = img_attrib->as_long(GTPLUS_CHA,        0);
+    slc = img_attrib->as_long(GTPLUS_SLC,        0);
+    e2  = img_attrib->as_long(GTPLUS_E2,         0);
+    con = img_attrib->as_long(GTPLUS_CONTRAST,   0);
+    phs = img_attrib->as_long(GTPLUS_PHASE,      0);
+    rep = img_attrib->as_long(GTPLUS_REP,        0);
+    set = img_attrib->as_long(GTPLUS_SET,        0);
+    ave = img_attrib->as_long(GTPLUS_AVERAGE,    0);
+
+    std::ostringstream ostr;
+
+    for ( n=0; n<dataRole.size(); n++ )
+    {
+        ostr << dataRole[n] << "_";
+    }
+
+    ostr << "SLC" << slc << "_"
+         << "E2" << e2 << "_"
+         << "CON" << con << "_"
+         << "PHS" << phs << "_"
+         << "REP" << rep << "_"
+         << "SET" << set << "_"
+         << "AVE" << ave << std::ends;
+
+    std::string filename = ostr.str();
+
+    GadgetContainerMessage<std::string>* mfilename = new GadgetContainerMessage<std::string>();
+    *(mfilename->getObjectPtr()) = filename;
+
+    //typedef ISMRMRD::MetaContainer::size_t_type size_t_type;
+    //char* meta_buf = NULL;
+    //size_t_type len(0);
+    //{
+    //    if ( !m3->getObjectPtr()->serialize(meta_buf, len) )
+    //    {
+    //        ACE_DEBUG ((LM_ERROR, ACE_TEXT ("(%P|%t) Unable to serialize dicom image meta attributes \n")));
+    //        return -1;
+    //    }
+    //}
+
+    // --------------------------------------------------
+
+    GadgetContainerMessage<hoNDArray< ACE_INT16 > > *pixels =
+            new GadgetContainerMessage<hoNDArray< ACE_INT16 > >();
+    boost::shared_ptr< std::vector<size_t> > dims = m2->getObjectPtr()->get_dimensions();
+
+    try {
+        pixels->getObjectPtr()->create(dims.get());
+    } catch (bad_alloc& ) {
+        GADGET_DEBUG1("Unable to create short storage in DicomFinishAttribGadget");
+        return GADGET_FAIL;
+    }
+
+    /* create ImageHeader and hoNDArray pointers for better readability */
+    ISMRMRD::ImageHeader *img = m1->getObjectPtr();
+    hoNDArray<ACE_INT16>* data = pixels->getObjectPtr();
+
+    /* grab pointers to both the original and new data arrays
+     * The original is of type T
+     * The new is of type ACE_INT16 */
+    T *src = m2->getObjectPtr()->get_data_ptr();
+    ACE_INT16 *dst = data->get_data_ptr();
+
+    /* Convert/cast each element in the data array
+     * and simultaneously find the min/max pixel value, which
+     * will be used later for some crude windowing */
+    T min_pix_val, max_pix_val, sum_pix_val = 0;
+    if (pixels->getObjectPtr()->get_number_of_elements() > 0) {
+        min_pix_val = src[0];
+        max_pix_val = src[0];
+    }
+    for (unsigned long i = 0; i < pixels->getObjectPtr()->get_number_of_elements(); i++) {
+        T pix_val = src[i];
+        // search for minimum and maximum pixel values
+        if (pix_val < min_pix_val) min_pix_val = pix_val;
+        if (pix_val > max_pix_val) max_pix_val = pix_val;
+        sum_pix_val += pix_val / 4; // scale by 25% to avoid overflow
+
+        // copy/cast the pixel value to a short int
+        dst[i] = static_cast<ACE_INT16>(pix_val);
+    }
+    T mean_pix_val = (T)((sum_pix_val * 4) / (T)pixels->getObjectPtr()->get_number_of_elements());
+
+    // export data
+    /*Gadgetron::gtPlus::gtPlusIOAnalyze gt_io;
+    gt_io.export2DArray(*data, "data");*/
+
+    /* replace the old 'message2' with the new data */
+    m1->cont(pixels);
+    /* release the old data array */
+    m2->cont(NULL);
+    m2->release();
+    /* update the image data_type.
+     * There is currently no SIGNED SHORT type so this will have to suffice */
+    m1->getObjectPtr()->data_type = ISMRMRD::ISMRMRD_USHORT;
+
+    unsigned int BUFSIZE = 1024;
+    char *buf = new char[BUFSIZE];
+    OFCondition status;
+    DcmTagKey key;
+    DcmDataset *dataset = dcmFile.getDataset();
+
+    // Echo Number
+    // TODO: it is often the case the img->contrast is not properly set
+    // likely due to the allocated ISMRMRD::ImageHeader being uninitialized
+    key.set(0x0018, 0x0086);
+    ACE_OS::snprintf(buf, BUFSIZE, "%d", img->contrast);
+    WRITE_DCM_STRING(key, buf);
+
+    // Acquisition Matrix ... Image Dimensions
+    // Defined as: [frequency rows, frequency columns, phase rows, phase columns]
+    // But at this point in the gadget I don't know the frequency encode direction
+    key.set(0x0018, 0x1310);
+    ACE_UINT16 im_dim[4] = {0,0,0,0};
+    /* if (frequence_encode_dir == "ROW")) {
+        // frequency encoding direction is ROW
+        im_dim[1] = img->matrix_size[0];
+        im_dim[2] = img->matrix_size[1];
+    } */
+    // frequency encoding direction is COLUMN
+    /*im_dim[0] = img->matrix_size[0];
+    im_dim[3] = img->matrix_size[1];*/
+
+    im_dim[1] = img->matrix_size[0];
+    im_dim[2] = img->matrix_size[1];
+
+    status = dataset->putAndInsertUint16Array(key, im_dim, 4);
+    if (!status.good()) {
+        GADGET_DEBUG1("Failed to stuff image dimensions\n");
+        return GADGET_FAIL;
+    }
+
+    // Series Number
+    // Only write a number if the image_series_index is positive and non-zero
+    key.set(0x0020, 0x0011);
+    ACE_OS::snprintf(buf, BUFSIZE, "%ld", this->initialSeriesNumber * 100 + img->image_series_index);
+    WRITE_DCM_STRING(key, buf);
+
+    // Image Number
+    key.set(0x0020, 0x0013);
+    ACE_OS::snprintf(buf, BUFSIZE, "%d", img->image_index + 1);
+    WRITE_DCM_STRING(key, buf);
+
+    // Image Position (Patient)
+    float corner[3];
+
+    corner[0] = img->position[0] -
+            (img->field_of_view[0] / 2.0f) * img->read_dir[0] -
+            (img->field_of_view[1] / 2.0f) * img->phase_dir[0];
+    corner[1] = img->position[1] -
+            (img->field_of_view[0] / 2.0f) * img->read_dir[1] -
+            (img->field_of_view[1] / 2.0f) * img->phase_dir[1];
+    corner[2] = img->position[2] -
+            (img->field_of_view[0] / 2.0f) * img->read_dir[2] -
+            (img->field_of_view[1] / 2.0f) * img->phase_dir[2];
+
+    key.set(0x0020, 0x0032);
+    ACE_OS::snprintf(buf, BUFSIZE, "%.4f\\%.4f\\%.4f", corner[0], corner[1], corner[2]);
+    WRITE_DCM_STRING(key, buf);
+
+    // Image Orientation
+    // read_dir, phase_dir, and slice_dir were calculated in
+    // a DICOM/patient coordinate system, so just plug them in
+    key.set(0x0020, 0x0037);
+    ACE_OS::snprintf(buf, BUFSIZE, "%.4f\\%.4f\\%.4f\\%.4f\\%.4f\\%.4f",
+            img->read_dir[0], img->read_dir[1], img->read_dir[2],
+            img->phase_dir[0], img->phase_dir[1], img->phase_dir[2]);
+    WRITE_DCM_STRING(key, buf);
+
+    // Slice Location
+    key.set(0x0020, 0x1041);
+    ACE_OS::snprintf(buf, BUFSIZE, "%f", img->position[2]);
+    WRITE_DCM_STRING(key, buf);
+
+    // Columns
+    key.set(0x0028, 0x0010);
+    ACE_OS::snprintf(buf, BUFSIZE, "%d", img->matrix_size[1]);
+    WRITE_DCM_STRING(key, buf);
+
+    // Rows
+    key.set(0x0028, 0x0011);
+    ACE_OS::snprintf(buf, BUFSIZE, "%d", img->matrix_size[0]);
+    WRITE_DCM_STRING(key, buf);
+
+    // Simple windowing using pixel values calculated earlier...
+    int mid_pix_val = (int)(max_pix_val + min_pix_val) / 2;
+    int window_center = (int)(mid_pix_val + mean_pix_val) / 2;
+    int window_width_left = (int)(window_center - min_pix_val);
+    int window_width_right = (int)(max_pix_val - window_center);
+    int window_width = (window_width_right > window_width_left) ?
+            window_width_right : window_width_left;
+
+    // Window Center
+    key.set(0x0028, 0x1050);
+    ACE_OS::snprintf(buf, BUFSIZE, "%d", window_center);
+    WRITE_DCM_STRING(key, buf);
+
+    // Window Width
+    key.set(0x0028, 0x1051);
+    ACE_OS::snprintf(buf, BUFSIZE, "%d", window_width);
+    WRITE_DCM_STRING(key, buf);
+
+    // ACR_NEMA_2C_VariablePixelDataGroupLength
+    key.set(0x7fe0, 0x0000);
+    status = dataset->insertEmptyElement(key);
+    if (!status.good()) {
+        GADGET_DEBUG1("Failed to write 0x7fe0 Group Length\n");
+        return GADGET_FAIL;
+    }
+
+    // Pixel Data
+    if ((unsigned long)img->matrix_size[0] * (unsigned long)img->matrix_size[1] !=
+                data->get_number_of_elements()) {
+        GADGET_DEBUG1("Mismatch in image dimensions and available data\n");
+        return GADGET_FAIL;
+    }
+    key.set(0x7fe0, 0x0010);
+    status = dataset->putAndInsertUint16Array(key, (unsigned short *)data->get_data_ptr(), (unsigned long)data->get_number_of_elements());
+    if (!status.good()) {
+        GADGET_DEBUG1("Failed to stuff Pixel Data\n");
+        return GADGET_FAIL;
+    }
+
+    // Series Instance UID = generated here
+    key.set(0x0020, 0x000E);
+    unsigned short series_number = img->image_series_index + 1;
+
+    // Try to find an already-generated Series Instance UID in our map
+    std::map<unsigned int, std::string>::iterator it = seriesIUIDs.find(series_number);
+
+    if (it == seriesIUIDs.end()) {
+        // Didn't find a Series Instance UID for this series number
+        char prefix[32];
+        char newuid[96];
+        if (seriesIUIDRoot.length() > 20) {
+            memcpy(prefix, seriesIUIDRoot.c_str(), 20);
+            prefix[20] = '\0';
+            dcmGenerateUniqueIdentifier(newuid, prefix);
+        } else {
+            dcmGenerateUniqueIdentifier(newuid);
+        }
+        seriesIUIDs[series_number] = std::string(newuid);
+    }
+    WRITE_DCM_STRING(key, seriesIUIDs[series_number].c_str());
+
+    // At a minimum, to put the DICOM image back into the database,
+    // you must change the SOPInstanceUID.
+    key.set(0x0008, 0x0018);        // SOPInstanceUID
+    const char *root;
+    if (seriesIUIDRoot.length() > 0) {
+        root = std::string(seriesIUIDRoot, 0, 20).c_str();
+    } else {
+       root = "1.2.840.113619.2.156";
+    }
+    char newuid[65];
+    dcmGenerateUniqueIdentifier(newuid, root);
+    WRITE_DCM_STRING(key, newuid);
+
+    //// set the private fields to store meta attributes
+    //key.set(0x0051, 0x0000);
+    //status = dataset->insertEmptyElement(key);
+    //if (!status.good())
+    //{
+    //    GADGET_DEBUG1("Failed to write 0x0051 Group Length\n");
+    //    return GADGET_FAIL;
+    //}
+
+    //key.set(0x0051, 0x0019);
+    //WRITE_DCM_STRING(key, buf+sizeof(size_t_type));
+
+    //delete [] meta_buf;
+
+    /* clean up the char[] we created for ACE_OS::snprintf */
+    delete[] buf;
+
+    GadgetContainerMessage<DcmFileFormat>* mdcm = new GadgetContainerMessage<DcmFileFormat>();
+
+    *mdcm->getObjectPtr() = dcmFile;
+
+    GadgetContainerMessage<GadgetMessageIdentifier>* mb =
+        new GadgetContainerMessage<GadgetMessageIdentifier>();
+
+    mb->getObjectPtr()->id = GADGET_MESSAGE_DICOM_WITHNAME;
+
+    mb->cont(mdcm);
+    mdcm->cont(mfilename);
+    mfilename->cont(m3);
+
+    int ret =  this->controller_->output_ready(mb);
+
+    if ( (ret < 0) )
+    {
+        GADGET_DEBUG1("Failed to return message to controller\n");
+        return GADGET_FAIL;
+    }
+
+    return GADGET_OK;
+}
+
+
+//Declare factories for the various template instances
+GADGET_FACTORY_DECLARE(DicomFinishAttribGadgetFLOAT)
+GADGET_FACTORY_DECLARE(DicomFinishAttribGadgetUSHORT)
+
+} /* namespace Gadgetron */
diff --git a/gadgets/dicom/DicomFinishAttribGadget.h b/gadgets/dicom/DicomFinishAttribGadget.h
new file mode 100644
index 0000000..f8fb902
--- /dev/null
+++ b/gadgets/dicom/DicomFinishAttribGadget.h
@@ -0,0 +1,76 @@
+/** \file       DicomFinishAttribGadget.h
+    \brief      Assemble the dicom images and send out 
+
+                The dicom image is sent out with message id -> dicom image -> dicom image name -> meta attributes
+    \author     Hui Xue
+*/
+
+#ifndef DICOMFINISHATTRIBGADGET_H
+#define DICOMFINISHATTRIBGADGET_H
+
+#include "gadgetron_dicom_export.h"
+
+#include "Gadget.h"
+#include "hoNDArray.h"
+#include "ismrmrd/meta.h"
+#include "GadgetMRIHeaders.h"
+#include "ismrmrd/ismrmrd.h"
+#include "GadgetStreamController.h"
+
+#include "dcmtk/config/osconfig.h"
+#include "dcmtk/ofstd/ofstdinc.h"
+#define INCLUDE_CSTDLIB
+#define INCLUDE_CSTDIO
+#define INCLUDE_CSTRING
+#include "dcmtk/dcmdata/dctk.h"
+#include "dcmtk/dcmdata/dcostrmb.h"
+
+#include <string>
+#include <map>
+#include <complex>
+
+namespace Gadgetron
+{
+
+template <typename T>
+class EXPORTGADGETSDICOM DicomFinishAttribGadget : public Gadget3<ISMRMRD::ImageHeader, hoNDArray< T >, ISMRMRD::MetaContainer >
+{
+    public:
+
+        typedef Gadget3<ISMRMRD::ImageHeader, hoNDArray< T >, ISMRMRD::MetaContainer > BaseClass;
+
+        DicomFinishAttribGadget<T>()
+            : BaseClass()
+            , dcmFile()
+            , seriesIUIDRoot()
+        { }
+
+    protected:
+
+        virtual int process_config(ACE_Message_Block * mb);
+        virtual int process(GadgetContainerMessage<ISMRMRD::ImageHeader>* m1, GadgetContainerMessage< hoNDArray< T > >* m2, GadgetContainerMessage<ISMRMRD::MetaContainer>* m3);
+
+    private:
+        DcmFileFormat dcmFile;
+        std::string seriesIUIDRoot;
+        long initialSeriesNumber;
+        std::map <unsigned int, std::string> seriesIUIDs;
+};
+
+class EXPORTGADGETSDICOM DicomFinishAttribGadgetUSHORT :
+    public DicomFinishAttribGadget<ACE_UINT16>
+{
+    public:
+        GADGET_DECLARE(DicomFinishAttribGadgetUSHORT);
+};
+
+class EXPORTGADGETSDICOM DicomFinishAttribGadgetFLOAT :
+    public DicomFinishAttribGadget<float>
+{
+    public:
+        GADGET_DECLARE(DicomFinishAttribGadgetFLOAT);
+};
+
+} /* namespace Gadgetron */
+
+#endif // DICOMFINISHATTRIBGADGET_H
diff --git a/gadgets/dicom/DicomFinishGadget.cpp b/gadgets/dicom/DicomFinishGadget.cpp
index 25154b5..279166d 100644
--- a/gadgets/dicom/DicomFinishGadget.cpp
+++ b/gadgets/dicom/DicomFinishGadget.cpp
@@ -1,19 +1,10 @@
-// DICOM includes
-#include "dcmtk/config/osconfig.h"
-#include "dcmtk/ofstd/ofstdinc.h"
-#define INCLUDE_CSTDLIB
-#define INCLUDE_CSTDIO
-#define INCLUDE_CSTRING
-#include "dcmtk/dcmdata/dctk.h"
-#include "dcmtk/dcmdata/dcostrmb.h"
 
 #include <vector>
+#include "boost/date_time/gregorian/gregorian.hpp"            
 
-#include "GadgetIsmrmrdReadWrite.h"
 #include "DicomFinishGadget.h"
-
-using namespace std;
-
+#include "ismrmrd/xml.h"
+#include <iostream>
 // Used for windowing using short ints
 #define PIX_RANGE_MAX    (+32767)
 #define PIX_RANGE_MIN    (-32768)
@@ -41,10 +32,8 @@ int DicomFinishGadget<T>::process_config(ACE_Message_Block* mb)
     long BUFSIZE = 1024;
     char *buf = new char[BUFSIZE];  // used for writing numbers as strings in DCMTK
 
-    // Parse ISMRMRD XML header
-    boost::shared_ptr<ISMRMRD::ismrmrdHeader> cfg = parseIsmrmrdXMLHeader(string(mb->rd_ptr()));
-
-    //GADGET_DEBUG1("Processing XML config in DicomFinishGadget\n");
+    ISMRMRD::IsmrmrdHeader h;
+    deserialize(mb->rd_ptr(), h);
 
     // Ensure DICOM dictionary is loaded
     if (!dcmDataDict.isDictionaryLoaded()) {
@@ -52,69 +41,70 @@ int DicomFinishGadget<T>::process_config(ACE_Message_Block* mb)
         return GADGET_FAIL;
     }
 
-    ISMRMRD::experimentalConditionsType exp_cond = cfg->experimentalConditions();
+    ISMRMRD::ExperimentalConditions exp_cond = h.experimentalConditions;
 
-    if (!cfg->subjectInformation().present()) {
+    if (!h.subjectInformation) {
         GADGET_DEBUG1("Header missing SubjectInformation parameters\n");
         return GADGET_FAIL;
     }
-    ISMRMRD::subjectInformationType patient_info = cfg->subjectInformation().get();
 
-    if (!cfg->studyInformation().present()) {
-        GADGET_DEBUG1("Header missing StudyInformation parameters\n");
-        return GADGET_FAIL;
+    ISMRMRD::SubjectInformation patient_info = *h.subjectInformation;
+
+    if (!h.studyInformation) {
+      GADGET_DEBUG1("Header missing StudyInformation parameters\n");
+      return GADGET_FAIL;
     }
-    ISMRMRD::studyInformationType study_info = cfg->studyInformation().get();
 
-    if (!cfg->measurementInformation().present()) {
+    ISMRMRD::StudyInformation study_info = *h.studyInformation;
+
+    if (!h.measurementInformation) {
         GADGET_DEBUG1("Header missing MeasurementInformation parameters\n");
         return GADGET_FAIL;
     }
-    ISMRMRD::measurementInformationType meas_info = cfg->measurementInformation().get();
 
-    if (!cfg->acquisitionSystemInformation().present()) {
+    ISMRMRD::MeasurementInformation meas_info = *h.measurementInformation;
+
+    if (!h.acquisitionSystemInformation) {
         GADGET_DEBUG1("Header missing AcquisitionSystemInformation parameters\n");
         return GADGET_FAIL;
     }
-    ISMRMRD::acquisitionSystemInformationType sys_info = cfg->acquisitionSystemInformation().get();
 
-    if (!cfg->sequenceParameters().present()) {
+    ISMRMRD::AcquisitionSystemInformation sys_info = *h.acquisitionSystemInformation;
+
+    if (!h.sequenceParameters) {
         GADGET_DEBUG1("Header missing SequenceTiming parameters\n");
         return GADGET_FAIL;
     }
-    ISMRMRD::sequenceParametersType seq_info = cfg->sequenceParameters().get();
 
-    // Ensure that the XML header contains the DICOM parameters
-    if (!cfg->dicomParameters().present()) {
-        GADGET_DEBUG1("Header missing DICOM parameters\n");
-        return GADGET_OK;
+    ISMRMRD::SequenceParameters seq_info = *h.sequenceParameters;
+
+    if (h.encoding.size() == 0) {
+      GADGET_DEBUG2("Number of encoding spaces: %d\n", h.encoding.size());
+      GADGET_DEBUG1("This Gadget needs an encoding description\n");
+      return GADGET_FAIL;
     }
 
-    ISMRMRD::ismrmrdHeader::encoding_sequence e_seq = cfg->encoding();
-    ISMRMRD::encodingSpaceType e_space = (*e_seq.begin()).encodedSpace();
-    ISMRMRD::encodingSpaceType r_space = (*e_seq.begin()).reconSpace();
-    ISMRMRD::encodingLimitsType e_limits = (*e_seq.begin()).encodingLimits();
 
-    ISMRMRD::dicomParametersType dcm_params = cfg->dicomParameters().get();
-    ISMRMRD::MRImageModule mr_image(dcm_params.MRImageModule().get());
+    ISMRMRD::EncodingSpace e_space = h.encoding[0].encodedSpace;
+    ISMRMRD::EncodingSpace r_space = h.encoding[0].reconSpace;
+    ISMRMRD::EncodingLimits e_limits = h.encoding[0].encodingLimits;
 
     DcmDataset *dataset = dcmFile.getDataset();
     DcmMetaInfo *metainfo = dcmFile.getMetaInfo();
 
-
     // Store initial Series Number for later
-    if (meas_info.initialSeriesNumber().present()) {
-        this->initialSeriesNumber = meas_info.initialSeriesNumber().get();
+    if (meas_info.initialSeriesNumber) {
+      this->initialSeriesNumber = (long)*meas_info.initialSeriesNumber;
     } else {
-        this->initialSeriesNumber = 0;
+      this->initialSeriesNumber = 0;
     }
 
 
     // Set the Application Entity Title in the DICOM Meta Info section
     // The rest of the Meta Info will be automatically populated by DCMTK
-    if (sys_info.stationName().present()) {
+    if (sys_info.stationName) {
         status = metainfo->putAndInsertString(DcmTagKey(0x0002,0x0016),
-                sys_info.stationName().get().c_str());
+                sys_info.stationName->c_str());
         if (!status.good()) {
             GADGET_DEBUG1("Failed to set AET in MetaInfo\n");
             return GADGET_FAIL;
@@ -140,59 +130,64 @@ int DicomFinishGadget<T>::process_config(ACE_Message_Block* mb)
     WRITE_DCM_STRING(key, "ISO_IR 100");
 
     // Image Type
+    // ORIGINAL or DERIVED describes origin of pixel data
+    // PRIMARY or SECONDARY describes image creation time (during or after exam)
+    // OTHER, etc. are implementation-specific
     key.set(0x0008, 0x0008);
-    if (mr_image.imageType().present()) {
-        WRITE_DCM_STRING(key, mr_image.imageType().get().c_str());
-    } else {
-        WRITE_DCM_STRING(key, "ORIGINAL\\PRIMARY\\OTHER");
-    }
+    WRITE_DCM_STRING(key, "ORIGINAL\\PRIMARY\\OTHER");
 
     // SOPClassUID
     key.set(0x0008, 0x0016);
     WRITE_DCM_STRING(key, UID_MRImageStorage);
 
     // Study Date
-    key.set(0x0008, 0x0020);
-    snprintf(buf, BUFSIZE, "%04d%02d%02d", study_info.studyDate().year(),
-            study_info.studyDate().month(), study_info.studyDate().day());
-    WRITE_DCM_STRING(key, buf);
+    if (study_info.studyDate) {
+        key.set(0x0008, 0x0020);
+        std::string d(study_info.studyDate.get());
+        d.erase(std::remove(d.begin(), d.end(), '-'), d.end());     // erase all occurrences of '-'
+        WRITE_DCM_STRING(key, d.c_str());
+    }
 
-    // Series Date
-    key.set(0x0008, 0x0021);
-    snprintf(buf, BUFSIZE, "%04d%02d%02d", meas_info.seriesDate().year(),
-            meas_info.seriesDate().month(), meas_info.seriesDate().day());
-    WRITE_DCM_STRING(key, buf);
-    // Acquisition Date
-    key.set(0x0008, 0x0022);
-    WRITE_DCM_STRING(key, buf);
-    // Content Date
-    key.set(0x0008, 0x0023);
-    WRITE_DCM_STRING(key, buf);
+    // Series, Acquisition, Content Date
+    if (meas_info.seriesDate) {
+        key.set(0x0008, 0x0021);
+        std::string d(meas_info.seriesDate.get());
+        d.erase(std::remove(d.begin(), d.end(), '-'), d.end());
+        WRITE_DCM_STRING(key, d.c_str());
+
+        key.set(0x0008, 0x0022);
+        WRITE_DCM_STRING(key, d.c_str());
+
+        key.set(0x0008, 0x0023);
+        WRITE_DCM_STRING(key, d.c_str());
+    }
 
     // Study Time
-    key.set(0x0008, 0x0030);
-    snprintf(buf, BUFSIZE, "%02d%02d%02d", study_info.studyTime().hours(),
-            study_info.studyTime().minutes(), (int)study_info.studyTime().seconds());
-    WRITE_DCM_STRING(key, buf);
+    if (study_info.studyTime) {
+        key.set(0x0008, 0x0030);
+        std::string t(study_info.studyTime.get());
+        t.erase(std::remove(t.begin(), t.end(), ':'), t.end());
+        WRITE_DCM_STRING(key, t.c_str());
+    }
 
-    // Series Time
-    key.set(0x0008, 0x0031);
-    snprintf(buf, BUFSIZE, "%02d%02d%02d", meas_info.seriesTime().hours(),
-            meas_info.seriesTime().minutes(), (int)meas_info.seriesTime().seconds());
-    WRITE_DCM_STRING(key, buf);
+    // Series, Acquisition, Content Time
+    if (meas_info.seriesTime) {
+        key.set(0x0008, 0x0031);
+        std::string t(meas_info.seriesTime.get());
+        t.erase(std::remove(t.begin(), t.end(), ':'), t.end());
+        WRITE_DCM_STRING(key, t.c_str());
 
-    // Acquisition Time
-    key.set(0x0008, 0x0032);
-    WRITE_DCM_STRING(key, buf);
+        key.set(0x0008, 0x0032);
+        WRITE_DCM_STRING(key, t.c_str());
 
-    // Content Time
-    key.set(0x0008, 0x0033);
-    WRITE_DCM_STRING(key, buf);
+        key.set(0x0008, 0x0033);
+        WRITE_DCM_STRING(key, t.c_str());
+    }
 
     // Accession Number
     key.set(0x0008, 0x0050);
-    if (study_info.accessionNumber().present()) {
-        snprintf(buf, BUFSIZE, "%d", (int)study_info.accessionNumber().get());
+    if (study_info.accessionNumber) {
+        ACE_OS::snprintf(buf, BUFSIZE, "%ld", *study_info.accessionNumber);
         WRITE_DCM_STRING(key, buf);
     } else {
         WRITE_DCM_STRING(key, 0);
@@ -205,70 +200,69 @@ int DicomFinishGadget<T>::process_config(ACE_Message_Block* mb)
 
     // Manufacturer
     key.set(0x0008, 0x0070);
-    if (sys_info.systemVendor().present()) {
-        WRITE_DCM_STRING(key, sys_info.systemVendor().get().c_str());
+    if (sys_info.systemVendor) {
+        WRITE_DCM_STRING(key, sys_info.systemVendor->c_str());
     } else {
         WRITE_DCM_STRING(key, "UNKNOWN");
     }
 
     // Institution Name
     key.set(0x0008, 0x0080);
-    if (sys_info.institutionName().present()) {
-        WRITE_DCM_STRING(key, sys_info.institutionName().get().c_str());
+    if (sys_info.institutionName) {
+        WRITE_DCM_STRING(key, sys_info.institutionName->c_str());
     } else {
         WRITE_DCM_STRING(key, "UNKNOWN");
     }
 
     // Referring Physician's Name
     key.set(0x0008, 0x0090);
-    if (study_info.referringPhysicianName().present()) {
-        WRITE_DCM_STRING(key, study_info.referringPhysicianName().get().c_str());
+    if (study_info.referringPhysicianName) {
+        WRITE_DCM_STRING(key, study_info.referringPhysicianName->c_str());
     } else {
         WRITE_DCM_STRING(key, "");
     }
 
     // Station Name
     key.set(0x0008, 0x1010);
-    if (sys_info.stationName().present()) {
-        WRITE_DCM_STRING(key, sys_info.stationName().get().c_str());
+    if (sys_info.stationName) {
+        WRITE_DCM_STRING(key, sys_info.stationName->c_str());
     } else {
         WRITE_DCM_STRING(key, "");
     }
 
     // Study Description
     key.set(0x0008, 0x1030);
-    if (study_info.studyDescription().present()) {
-        WRITE_DCM_STRING(key, study_info.studyDescription().get().c_str());
+    if (study_info.studyDescription) {
+      WRITE_DCM_STRING(key, study_info.studyDescription->c_str());
     } else {
-        WRITE_DCM_STRING(key, "");
+      WRITE_DCM_STRING(key, "");
     }
 
     // Series Description
     key.set(0x0008, 0x103E);
-    if (meas_info.seriesDescription().present()) {
-        WRITE_DCM_STRING(key, meas_info.seriesDescription().get().c_str());
+    if (meas_info.seriesDescription) {
+        WRITE_DCM_STRING(key, meas_info.seriesDescription->c_str());
     } else {
         WRITE_DCM_STRING(key, "");
     }
 
     // Manufacturer's Model Name
     key.set(0x0008, 0x1090);
-    if (sys_info.systemModel().present()) {
-        WRITE_DCM_STRING(key, sys_info.systemModel().get().c_str());
+    if (sys_info.systemModel) {
+        WRITE_DCM_STRING(key, sys_info.systemModel->c_str());
     } else {
         WRITE_DCM_STRING(key, "");
     }
 
     // Referenced SOP Instance UIDs
-    if (dcm_params.referencedImageSequence().present()) {
-        ISMRMRD::referencedImageSequence refs = dcm_params.referencedImageSequence().get();
+    std::vector<ISMRMRD::ReferencedImageSequence> refs(meas_info.referencedImageSequence);
+    if (refs.size() > 0) {
         DcmItem *ref_sequence;
-        string ref_uid;
-        for (unsigned int i = 0; i < refs.referencedSOPInstanceUID().size(); i++) {
-            ref_uid = refs.referencedSOPInstanceUID()[i];
-
+        std::vector<ISMRMRD::ReferencedImageSequence>::iterator it;
+        for (it = refs.begin(); it != refs.end(); ++it) {
+            std::string ref_uid(it->referencedSOPInstanceUID);
             if (ref_uid.length() > 0) {   // Only write non-empty strings
-                if (dataset->findOrCreateSequenceItem(key, ref_sequence, -2 /* append */).good()) {
+                if (dataset->findOrCreateSequenceItem(key, ref_sequence, -2).good()) {
                     // Write the Referenced SOPClassUID (MRImageStorage)
                     key.set(0x0008, 0x1150);
                     ((DcmDataset *)ref_sequence)->putAndInsertString(key, UID_MRImageStorage);
@@ -290,38 +284,38 @@ int DicomFinishGadget<T>::process_config(ACE_Message_Block* mb)
 
     // Patient Name
     key.set(0x0010, 0x0010);
-    if (patient_info.patientName().present()) {
-        WRITE_DCM_STRING(key, patient_info.patientName().get().c_str());
+    if (patient_info.patientName) {
+        WRITE_DCM_STRING(key, patient_info.patientName->c_str());
     } else {
         WRITE_DCM_STRING(key, "None");
     }
 
     // Patient ID
     key.set(0x0010, 0x0020);
-    if (patient_info.patientID().present()) {
-        WRITE_DCM_STRING(key, patient_info.patientID().get().c_str());
+    if (patient_info.patientID) {
+        WRITE_DCM_STRING(key, patient_info.patientID->c_str());
     } else {
         WRITE_DCM_STRING(key, "0");
     }
 
     // Patient Birthdate
     key.set(0x0010, 0x0030);
-    if (patient_info.patientBirthdate().present()) {
-        snprintf(buf, BUFSIZE, "%04d%02d%02d", patient_info.patientBirthdate().get().year(),
-                patient_info.patientBirthdate().get().month(), patient_info.patientBirthdate().get().day());
-        WRITE_DCM_STRING(key, buf);
+    if (patient_info.patientBirthdate) {
+        //ACE_OS::snprintf(buf, BUFSIZE, "%04d%02d%02d", patient_info.patientBirthdate().get().year(),
+        //        patient_info.patientBirthdate().get().month(), patient_info.patientBirthdate().get().day());
+        //WRITE_DCM_STRING(key, buf);
     } else {
         status = dataset->insertEmptyElement(key);
     }
 
     // Patient Sex
     key.set(0x0010, 0x0040);
-    if (patient_info.patientGender().present()) {
-        if (patient_info.patientGender().get() == "O") {
+    if (patient_info.patientGender) {
+        if (*patient_info.patientGender == "O") {
             status = dataset->insertEmptyElement(key);
         }
         else {
-            WRITE_DCM_STRING(key, patient_info.patientGender().get().c_str());
+            WRITE_DCM_STRING(key, patient_info.patientGender->c_str());
         }
     } else {
         WRITE_DCM_STRING(key, "");
@@ -329,9 +323,15 @@ int DicomFinishGadget<T>::process_config(ACE_Message_Block* mb)
 
     // Patient Age
     key.set(0x0010, 0x1010);
-    if (patient_info.patientBirthdate().present()) {
-        snprintf(buf, BUFSIZE, "%03uY", meas_info.seriesDate().year() -
-                patient_info.patientBirthdate().get().year());
+    if (patient_info.patientBirthdate && meas_info.seriesDate) {
+        boost::gregorian::date bday(boost::gregorian::from_simple_string(patient_info.patientBirthdate.get()));
+        boost::gregorian::date seriesDate(boost::gregorian::from_simple_string(meas_info.seriesDate.get()));
+
+        boost::gregorian::days age = seriesDate - bday;
+
+        long age_in_years = age.days() / 365;
+
+        ACE_OS::snprintf(buf, BUFSIZE, "%03ldY", age_in_years);
         WRITE_DCM_STRING(key, buf);
     } else {
         WRITE_DCM_STRING(key, "000Y");
@@ -339,8 +339,8 @@ int DicomFinishGadget<T>::process_config(ACE_Message_Block* mb)
 
     // Patient Weight
     key.set(0x0010, 0x1030);
-    if (patient_info.patientWeight_kg().present()) {
-        snprintf(buf, BUFSIZE, "%f", patient_info.patientWeight_kg().get());
+    if (patient_info.patientWeight_kg) {
+        ACE_OS::snprintf(buf, BUFSIZE, "%f", *patient_info.patientWeight_kg);
         WRITE_DCM_STRING(key, buf);
     } else {
         WRITE_DCM_STRING(key, "0.0");
@@ -354,37 +354,36 @@ int DicomFinishGadget<T>::process_config(ACE_Message_Block* mb)
         return GADGET_FAIL;
     }
 
-    // Scanning Sequence
-    if (mr_image.scanningSequence().present()) {
-        key.set(0x0018, 0x0020);
-        WRITE_DCM_STRING(key, mr_image.scanningSequence().get().c_str());
-    } else {
-        WRITE_DCM_STRING(key, "RM");
-    }
-
-    // Sequence Variant
-    if (mr_image.sequenceVariant().present()) {
-        key.set(0x0018, 0x0021);
-        WRITE_DCM_STRING(key, mr_image.sequenceVariant().get().c_str());
-    } else {
-        WRITE_DCM_STRING(key, "NONE");
-    }
-
-    // Scan Options
-    if (mr_image.scanOptions().present()) {
-        key.set(0x0018, 0x0022);
-        WRITE_DCM_STRING(key, mr_image.scanOptions().get().c_str());
-    } else {
-        WRITE_DCM_STRING(key, "NONE");
-    }
-
-    // Acquisition Type
-    if (mr_image.mrAcquisitionType().present()) {
-        key.set(0x0018, 0x0023);
-        WRITE_DCM_STRING(key, mr_image.mrAcquisitionType().get().c_str());
-    } else {
-        WRITE_DCM_STRING(key, "2D");
+    // Scanning Sequence, Sequence Variant, Scan Options, Acquisition Type
+    std::string scanningSequence("RM");
+    std::string sequenceVariant("NONE");
+    std::string scanOptions("NONE");
+    std::string mrAcquisitionType("2D");
+    if (h.userParameters) {
+        ISMRMRD::UserParameters user_params = h.userParameters.get();
+        std::vector<ISMRMRD::UserParameterString> strings = user_params.userParameterString;
+        std::vector<ISMRMRD::UserParameterString>::iterator it;
+
+        for (it = strings.begin(); it != strings.end(); ++it) {
+            if (it->name == "scanningSequence") {
+                scanningSequence = it->value;
+            } else if (it->name == "sequenceVariant") {
+                sequenceVariant = it->value;
+            } else if (it->name == "scanOptions") {
+                scanOptions = it->value;
+            } else if (it->name == "mrAcquisitionType") {
+                mrAcquisitionType = it->value;
+            }
+        }
     }
+    key.set(0x0018, 0x0020);
+    WRITE_DCM_STRING(key, scanningSequence.c_str());
+    key.set(0x0018, 0x0021);
+    WRITE_DCM_STRING(key, sequenceVariant.c_str());
+    key.set(0x0018, 0x0022);
+    WRITE_DCM_STRING(key, scanOptions.c_str());
+    key.set(0x0018, 0x0023);
+    WRITE_DCM_STRING(key, mrAcquisitionType.c_str());
 
     // Angio Flag
     // TODO: hardcoded
@@ -395,47 +394,55 @@ int DicomFinishGadget<T>::process_config(ACE_Message_Block* mb)
     // This will need updated if the "reconSpace.fieldOfView_mm.z" field
     // is changed in the ISMRMRD populating code (client)
     key.set(0x0018, 0x0050);
-    snprintf(buf, BUFSIZE, "%f", cfg->encoding().front().reconSpace().fieldOfView_mm().z());
+    ACE_OS::snprintf(buf, BUFSIZE, "%f", r_space.fieldOfView_mm.z/std::max(r_space.matrixSize.z,(unsigned short)1));
     WRITE_DCM_STRING(key, buf);
 
+
     // Repetition Time
     key.set(0x0018, 0x0080);
-    snprintf(buf, BUFSIZE, "%f", seq_info.TR().front());
+    ACE_OS::snprintf(buf, BUFSIZE, "%f", seq_info.TR.front());
     WRITE_DCM_STRING(key, buf);
 
     // Echo Time
     key.set(0x0018, 0x0081);
-    snprintf(buf, BUFSIZE, "%f", seq_info.TE().front());
+    ACE_OS::snprintf(buf, BUFSIZE, "%f", seq_info.TE.front());
     WRITE_DCM_STRING(key, buf);
 
     // Inversion Time
-    key.set(0x0018, 0x0082);
-    snprintf(buf, BUFSIZE, "%f", seq_info.TI().front());
-    WRITE_DCM_STRING(key, buf);
+    if (seq_info.TI.size()>0)
+    {
+        key.set(0x0018, 0x0082);
+        ACE_OS::snprintf(buf, BUFSIZE, "%f", seq_info.TI.front());
+        WRITE_DCM_STRING(key, buf);
+    }
+
+    // Flip Angle
+    if (seq_info.flipAngle_deg.size()>0)
+    {
+        key.set(0x0018, 0x1314);
+        ACE_OS::snprintf(buf, BUFSIZE, "%ld", (long)seq_info.flipAngle_deg.front());
+        WRITE_DCM_STRING(key, buf);
+    }
 
     // Imaging Frequency in tenths of MHz ???
     key.set(0x0018, 0x0084);
-    snprintf(buf, BUFSIZE, "%f", (float)exp_cond.H1resonanceFrequency_Hz() / 10000000.);
+    ACE_OS::snprintf(buf, BUFSIZE, "%f", (float)exp_cond.H1resonanceFrequency_Hz / 10000000.);
     WRITE_DCM_STRING(key, buf);
 
     // Magnetic Field Strength (T)
     key.set(0x0018, 0x0087);
-    if (sys_info.systemFieldStrength_T().present()) {
-        snprintf(buf, BUFSIZE, "%f", sys_info.systemFieldStrength_T().get());
+    if (sys_info.systemFieldStrength_T) {
+        ACE_OS::snprintf(buf, BUFSIZE, "%f", *sys_info.systemFieldStrength_T);
         WRITE_DCM_STRING(key, buf);
     } else {
         WRITE_DCM_STRING(key, "3.0");
     }
 
-    // Spacing Between Slices
-    key.set(0x0018, 0x0088);
-    snprintf(buf, BUFSIZE, "%f", cfg->encoding().front().reconSpace().fieldOfView_mm().z());
-    WRITE_DCM_STRING(key, buf);
 
     // Echo Train Length
-    if (mr_image.echoTrainLength().present()) {
+    if (h.encoding[0].echoTrainLength) {
         key.set(0x0018, 0x0091);
-        snprintf(buf, BUFSIZE, "%ld", (long)mr_image.echoTrainLength().get());
+        ACE_OS::snprintf(buf, BUFSIZE, "%ld", (long)*h.encoding[0].echoTrainLength);
         WRITE_DCM_STRING(key, buf);
     } else {
         WRITE_DCM_STRING(key, "1");
@@ -452,47 +459,29 @@ int DicomFinishGadget<T>::process_config(ACE_Message_Block* mb)
     WRITE_DCM_STRING(key, "100");
 
     // Protocol Name
-    if (meas_info.protocolName().present()) {
+    if (meas_info.protocolName) {
         key.set(0x0018, 0x1030);
-        WRITE_DCM_STRING(key, meas_info.protocolName().get().c_str());
+        WRITE_DCM_STRING(key, meas_info.protocolName.get().c_str());
     } else {
         WRITE_DCM_STRING(key, "");
     }
 
-    // Trigger Time
-    if (mr_image.triggerTime().present()) {
-        key.set(0x0018, 0x1060);
-        snprintf(buf, BUFSIZE, "%f", mr_image.triggerTime().get());
-        WRITE_DCM_STRING(key, buf);
-    } else {
-        WRITE_DCM_STRING(key, "0.0");
-    }
+    // Trigger Time - TODO: use Image Meta Data
+    key.set(0x0018, 0x1060);
+    WRITE_DCM_STRING(key, "0.0");
 
-    // Reconstruction Diameter (FOV)
-    // TODO: hmm
+    // Reconstruction Diameter (FOV) - TODO: ?
     key.set(0x0018, 0x1100);
 
-    // Frequency Encoding Direction
-    if (mr_image.freqEncodingDirection().present()) {
-        key.set(0x0018, 0x1312);
-        WRITE_DCM_STRING(key, mr_image.freqEncodingDirection().get().c_str());
-    } else {
-        WRITE_DCM_STRING(key, "ROW");
-    }
-
-    // Flip Angle
-    if (mr_image.flipAngle_deg().present()) {
-        key.set(0x0018, 0x1314);
-        snprintf(buf, BUFSIZE, "%d", (int)mr_image.flipAngle_deg().get());
-        WRITE_DCM_STRING(key, buf);
-    } else {
-        WRITE_DCM_STRING(key, "0");
-    }
+    // Frequency Encoding Direction - TODO: use Image Meta Data
+    key.set(0x0018, 0x1312);
+    WRITE_DCM_STRING(key, "ROW");
 
     // Patient Position
     key.set(0x0018, 0x5100);
-    WRITE_DCM_STRING(key, meas_info.patientPosition().c_str());
+    WRITE_DCM_STRING(key, meas_info.patientPosition.c_str());
 
+    /****************************************/
     // Group Length
     key.set(0x0020, 0x0000);
     status = dataset->insertEmptyElement(key);
@@ -503,25 +492,27 @@ int DicomFinishGadget<T>::process_config(ACE_Message_Block* mb)
 
     // Study Instance UID
     key.set(0x0020, 0x000D);
-    WRITE_DCM_STRING(key, dcm_params.studyInstanceUID().c_str());
+    if (study_info.studyInstanceUID) {
+      WRITE_DCM_STRING(key, study_info.studyInstanceUID->c_str());
+    }
 
     // Study ID
-    if (study_info.studyID().present()) {
+    if (study_info.studyID) {
         key.set(0x0020, 0x0010);
-        WRITE_DCM_STRING(key, study_info.studyID().get().c_str());
+        WRITE_DCM_STRING(key, study_info.studyID->c_str());
     } else {
         WRITE_DCM_STRING(key, "0");
     }
 
     // Store Series Instance UID for later
-    if (dcm_params.seriesInstanceUIDRoot().present()) {
-        seriesIUIDRoot = dcm_params.seriesInstanceUIDRoot().get();
+    if (meas_info.seriesInstanceUIDRoot) {
+      seriesIUIDRoot = *meas_info.seriesInstanceUIDRoot;
     }
 
     // Frame of Reference UID
-    if (dcm_params.frameOfReferenceUID().present()) {
+    if (meas_info.frameOfReferenceUID) {
         key.set(0x0020, 0x0052);
-        WRITE_DCM_STRING(key, dcm_params.frameOfReferenceUID().get().c_str());
+        WRITE_DCM_STRING(key, meas_info.frameOfReferenceUID->c_str());
     }
 
     /****************************************/
@@ -545,9 +536,9 @@ int DicomFinishGadget<T>::process_config(ACE_Message_Block* mb)
 
     // Pixel Spacing (Array of len 2)
     key.set(0x0028, 0x0030);
-    float pixel_spacing_X = r_space.fieldOfView_mm().x() / r_space.matrixSize().x();
-    float pixel_spacing_Y = r_space.fieldOfView_mm().y() / r_space.matrixSize().y();
-    snprintf(buf, BUFSIZE, "%.3f\\%.3f", pixel_spacing_X, pixel_spacing_Y);
+    float pixel_spacing_X = r_space.fieldOfView_mm.x / r_space.matrixSize.x;
+    float pixel_spacing_Y = r_space.fieldOfView_mm.y / r_space.matrixSize.y;
+    ACE_OS::snprintf(buf, BUFSIZE, "%.3f\\%.3f", pixel_spacing_X, pixel_spacing_Y);
     WRITE_DCM_STRING(key, buf);
 
     // Bits Allocated
@@ -565,17 +556,17 @@ int DicomFinishGadget<T>::process_config(ACE_Message_Block* mb)
 
     //GADGET_DEBUG1("Finished populating DICOM fields\n");
 
-    /* clean up the buffer we created for snprintf */
+    /* clean up the buffer we created for ACE_OS::snprintf */
     delete[] buf;
 
     return GADGET_OK;
 }
 
 template <typename T>
-int DicomFinishGadget<T>::process(GadgetContainerMessage<ISMRMRD::ImageHeader>* m1,
-        GadgetContainerMessage< hoNDArray< T > >* m2)
+int DicomFinishGadget<T>::process(GadgetContainerMessage<ISMRMRD::ImageHeader>* m1, GadgetContainerMessage< hoNDArray< T > >* m2)
 {
-    if (!this->controller_) {
+    if (!this->controller_)
+    {
         ACE_DEBUG( (LM_DEBUG,
                     ACE_TEXT("Cannot return result to controller, no controller set")) );
         return -1;
@@ -626,9 +617,9 @@ int DicomFinishGadget<T>::process(GadgetContainerMessage<ISMRMRD::ImageHeader>*
     m1->cont(pixels);
     /* release the old data array */
     m2->release();
-    /* update the image_data_type.
+    /* update the image data_type.
      * There is currently no SIGNED SHORT type so this will have to suffice */
-    m1->getObjectPtr()->image_data_type = ISMRMRD::DATA_UNSIGNED_SHORT;
+    m1->getObjectPtr()->data_type = ISMRMRD::ISMRMRD_USHORT;
 
     unsigned int BUFSIZE = 1024;
     char *buf = new char[BUFSIZE];
@@ -640,20 +631,14 @@ int DicomFinishGadget<T>::process(GadgetContainerMessage<ISMRMRD::ImageHeader>*
     // TODO: it is often the case the img->contrast is not properly set
     // likely due to the allocated ISMRMRD::ImageHeader being uninitialized
     key.set(0x0018, 0x0086);
-    snprintf(buf, BUFSIZE, "%d", img->contrast);
+    ACE_OS::snprintf(buf, BUFSIZE, "%d", img->contrast);
     WRITE_DCM_STRING(key, buf);
 
     // Acquisition Matrix ... Image Dimensions
     // Defined as: [frequency rows, frequency columns, phase rows, phase columns]
-    // But at this point in the gadget I don't know the frequency encode direction
     key.set(0x0018, 0x1310);
     ACE_UINT16 im_dim[4] = {0,0,0,0};
-    /* if (frequence_encode_dir == "ROW")) {
-        // frequency encoding direction is ROW
-        im_dim[1] = img->matrix_size[0];
-        im_dim[2] = img->matrix_size[1];
-    } */
-    // frequency encoding direction is COLUMN
+    // frequency encoding direction is always the COLUMN
     im_dim[0] = img->matrix_size[0];
     im_dim[3] = img->matrix_size[1];
     status = dataset->putAndInsertUint16Array(key, im_dim, 4);
@@ -665,12 +650,12 @@ int DicomFinishGadget<T>::process(GadgetContainerMessage<ISMRMRD::ImageHeader>*
     // Series Number
     // Only write a number if the image_series_index is positive and non-zero
     key.set(0x0020, 0x0011);
-    snprintf(buf, BUFSIZE, "%ld", this->initialSeriesNumber * 100 + img->image_series_index);
+    ACE_OS::snprintf(buf, BUFSIZE, "%ld", this->initialSeriesNumber * 100 + img->image_series_index);
     WRITE_DCM_STRING(key, buf);
 
     // Image Number
     key.set(0x0020, 0x0013);
-    snprintf(buf, BUFSIZE, "%d", img->image_index + 1);
+    ACE_OS::snprintf(buf, BUFSIZE, "%d", img->image_index + 1);
     WRITE_DCM_STRING(key, buf);
 
     // Image Position (Patient)
@@ -687,31 +672,37 @@ int DicomFinishGadget<T>::process(GadgetContainerMessage<ISMRMRD::ImageHeader>*
             (img->field_of_view[1] / 2.0) * img->phase_dir[2];
 
     key.set(0x0020, 0x0032);
-    snprintf(buf, BUFSIZE, "%.4f\\%.4f\\%.4f", corner[0], corner[1], corner[2]);
+    ACE_OS::snprintf(buf, BUFSIZE, "%.4f\\%.4f\\%.4f", corner[0], corner[1], corner[2]);
     WRITE_DCM_STRING(key, buf);
 
     // Image Orientation
     // read_dir, phase_dir, and slice_dir were calculated in
     // a DICOM/patient coordinate system, so just plug them in
     key.set(0x0020, 0x0037);
-    snprintf(buf, BUFSIZE, "%.4f\\%.4f\\%.4f\\%.4f\\%.4f\\%.4f",
+    ACE_OS::snprintf(buf, BUFSIZE, "%.4f\\%.4f\\%.4f\\%.4f\\%.4f\\%.4f",
             img->read_dir[0], img->read_dir[1], img->read_dir[2],
             img->phase_dir[0], img->phase_dir[1], img->phase_dir[2]);
     WRITE_DCM_STRING(key, buf);
 
     // Slice Location
     key.set(0x0020, 0x1041);
-    snprintf(buf, BUFSIZE, "%f", img->position[2]);
+    ACE_OS::snprintf(buf, BUFSIZE, "%f", img->position[2]);
     WRITE_DCM_STRING(key, buf);
 
+    //Number of frames
+    if (img->matrix_size[2] > 1){ //Only write if we have more than 1 frame
+		key.set(0x0028,0x0008);
+		ACE_OS::snprintf(buf,BUFSIZE,"%d",img->matrix_size[2]);
+		WRITE_DCM_STRING(key,buf);
+    }
     // Columns
     key.set(0x0028, 0x0010);
-    snprintf(buf, BUFSIZE, "%d", img->matrix_size[0]);
+    ACE_OS::snprintf(buf, BUFSIZE, "%d", img->matrix_size[0]);
     WRITE_DCM_STRING(key, buf);
 
     // Rows
     key.set(0x0028, 0x0011);
-    snprintf(buf, BUFSIZE, "%d", img->matrix_size[1]);
+    ACE_OS::snprintf(buf, BUFSIZE, "%d", img->matrix_size[1]);
     WRITE_DCM_STRING(key, buf);
 
     // Simple windowing using pixel values calculated earlier...
@@ -724,12 +715,12 @@ int DicomFinishGadget<T>::process(GadgetContainerMessage<ISMRMRD::ImageHeader>*
 
     // Window Center
     key.set(0x0028, 0x1050);
-    snprintf(buf, BUFSIZE, "%d", window_center);
+    ACE_OS::snprintf(buf, BUFSIZE, "%d", window_center);
     WRITE_DCM_STRING(key, buf);
 
     // Window Width
     key.set(0x0028, 0x1051);
-    snprintf(buf, BUFSIZE, "%d", window_width);
+    ACE_OS::snprintf(buf, BUFSIZE, "%d", window_width);
     WRITE_DCM_STRING(key, buf);
 
     // ACR_NEMA_2C_VariablePixelDataGroupLength
@@ -741,7 +732,7 @@ int DicomFinishGadget<T>::process(GadgetContainerMessage<ISMRMRD::ImageHeader>*
     }
 
     // Pixel Data
-    if ((unsigned long)img->matrix_size[0] * (unsigned long)img->matrix_size[1] !=
+    if ((unsigned long)img->matrix_size[0] * (unsigned long)img->matrix_size[1] * (unsigned long) img->matrix_size[2] !=
                 data->get_number_of_elements()) {
         GADGET_DEBUG1("Mismatch in image dimensions and available data\n");
         return GADGET_FAIL;
@@ -759,7 +750,7 @@ int DicomFinishGadget<T>::process(GadgetContainerMessage<ISMRMRD::ImageHeader>*
     unsigned short series_number = img->image_series_index + 1;
 
     // Try to find an already-generated Series Instance UID in our map
-    std::map<unsigned int, string>::iterator it = seriesIUIDs.find(series_number);
+    std::map<unsigned int, std::string>::iterator it = seriesIUIDs.find(series_number);
 
     if (it == seriesIUIDs.end()) {
         // Didn't find a Series Instance UID for this series number
@@ -772,7 +763,7 @@ int DicomFinishGadget<T>::process(GadgetContainerMessage<ISMRMRD::ImageHeader>*
         } else {
             dcmGenerateUniqueIdentifier(newuid);
         }
-        seriesIUIDs[series_number] = string(newuid);
+        seriesIUIDs[series_number] = std::string(newuid);
     }
     WRITE_DCM_STRING(key, seriesIUIDs[series_number].c_str());
 
@@ -781,7 +772,7 @@ int DicomFinishGadget<T>::process(GadgetContainerMessage<ISMRMRD::ImageHeader>*
     key.set(0x0008, 0x0018);        // SOPInstanceUID
     const char *root;
     if (seriesIUIDRoot.length() > 0) {
-        root = string(seriesIUIDRoot, 0, 20).c_str();
+        root = std::string(seriesIUIDRoot, 0, 20).c_str();
     } else {
        root = "1.2.840.113619.2.156";
     }
@@ -789,7 +780,7 @@ int DicomFinishGadget<T>::process(GadgetContainerMessage<ISMRMRD::ImageHeader>*
     dcmGenerateUniqueIdentifier(newuid, root);
     WRITE_DCM_STRING(key, newuid);
 
-    /* clean up the char[] we created for snprintf */
+    /* clean up the char[] we created for ACE_OS::snprintf */
     delete[] buf;
 
     GadgetContainerMessage<DcmFileFormat>* mdcm = new GadgetContainerMessage<DcmFileFormat>();
@@ -815,10 +806,8 @@ int DicomFinishGadget<T>::process(GadgetContainerMessage<ISMRMRD::ImageHeader>*
     return GADGET_OK;
 }
 
-
 //Declare factories for the various template instances
 GADGET_FACTORY_DECLARE(DicomFinishGadgetFLOAT)
 GADGET_FACTORY_DECLARE(DicomFinishGadgetUSHORT)
-//GADGET_FACTORY_DECLARE(DicomFinishGadgetCPLX)
 
 } /* namespace Gadgetron */
diff --git a/gadgets/dicom/DicomFinishGadget.h b/gadgets/dicom/DicomFinishGadget.h
index f03540c..8f1053a 100644
--- a/gadgets/dicom/DicomFinishGadget.h
+++ b/gadgets/dicom/DicomFinishGadget.h
@@ -2,12 +2,21 @@
 #define DICOMFINISHGADGET_H
 
 #include "gadgetron_dicom_export.h"
+
 #include "Gadget.h"
 #include "hoNDArray.h"
 #include "GadgetMRIHeaders.h"
-#include "ismrmrd.h"
+#include "ismrmrd/ismrmrd.h"
 #include "GadgetStreamController.h"
 
+#include "dcmtk/config/osconfig.h"
+#include "dcmtk/ofstd/ofstdinc.h"
+#define INCLUDE_CSTDLIB
+#define INCLUDE_CSTDIO
+#define INCLUDE_CSTRING
+#include "dcmtk/dcmdata/dctk.h"
+#include "dcmtk/dcmdata/dcostrmb.h"
+
 #include <string>
 #include <map>
 #include <complex>
diff --git a/gadgets/dicom/DicomImageWriter.cpp b/gadgets/dicom/DicomImageWriter.cpp
index 0009082..8756a85 100644
--- a/gadgets/dicom/DicomImageWriter.cpp
+++ b/gadgets/dicom/DicomImageWriter.cpp
@@ -7,6 +7,7 @@
 #include "DicomImageWriter.h"
 #include "GadgetContainerMessage.h"
 #include "hoNDArray.h"
+#include "ismrmrd/meta.h"
 
 // DCMTK includes
 #include "dcmtk/config/osconfig.h"
@@ -17,40 +18,96 @@
 #include "dcmtk/dcmdata/dctk.h"
 #include "dcmtk/dcmdata/dcostrmb.h"
 
-
 namespace Gadgetron {
 
+// -------------------------------------------------------
+// DicomImageWriter
+// -------------------------------------------------------
+
 int DicomImageWriter::write(ACE_SOCK_Stream* sock, ACE_Message_Block* mb)
 {
     GadgetContainerMessage<DcmFileFormat>* dcm_file_message = AsContainerMessage<DcmFileFormat>(mb);
-    if (!dcm_file_message) {
+    if (!dcm_file_message)
+    {
         ACE_DEBUG( (LM_ERROR, ACE_TEXT("(%P,%l), DicomImageWriter::write, invalid image message objects, 1\n")) );
         return -1;
     }
 
     DcmFileFormat *dcmFile = dcm_file_message->getObjectPtr();
 
-/* BEGIN DEBUG
+    // Initialize transfer state of DcmDataset
+    dcmFile->transferInit();
+
+    // Calculate size of DcmFileFormat and create a SUFFICIENTLY sized buffer
+    long buffer_length = dcmFile->calcElementLength(EXS_LittleEndianExplicit, EET_ExplicitLength) * 2;
+    std::vector<char> bufferChar(buffer_length);
+    char* buffer = &bufferChar[0];
+
+    DcmOutputBufferStream out_stream(buffer, buffer_length);
+
+    OFCondition status;
 
-    OFString modality;
-    DcmTagKey key(0x0008, 0x0060);
-    OFCondition s = dcmFile->getDataset()->findAndGetOFString(key, modality);
-    if (s.bad()) {
-        GADGET_DEBUG1("Failed to set Modality\n");
+    status = dcmFile->write(out_stream, EXS_LittleEndianExplicit, EET_ExplicitLength, NULL);
+    if (!status.good()) {
+        GADGET_DEBUG2("Failed to write DcmFileFormat to DcmOutputStream(%s)\n", status.text());
         return GADGET_FAIL;
     }
 
-    GADGET_DEBUG2("Verifying that DcmDataset is valid... Modality: %s\n", modality.c_str());
+    void *serialized = NULL;
+    offile_off_t serialized_length = 0;
+    out_stream.flushBuffer(serialized, serialized_length);
+
+    // finalize transfer state of DcmDataset
+    dcmFile->transferEnd();
+
+    ssize_t send_cnt = 0;
+
+    GadgetMessageIdentifier id;
+    id.id = GADGET_MESSAGE_DICOM;
+
+    if ((send_cnt = sock->send_n (&id, sizeof(GadgetMessageIdentifier))) <= 0) {
+        ACE_DEBUG ((LM_ERROR, ACE_TEXT ("(%P|%t) Unable to send DICOM message identifier\n")));
+        return -1;
+    }
+
+    uint32_t nbytes = (uint32_t)serialized_length;
+    if ((send_cnt = sock->send_n (&nbytes, sizeof(nbytes))) <= 0) {
+        ACE_DEBUG ((LM_ERROR, ACE_TEXT ("(%P|%t) Unable to send DICOM bytes length\n")));
+        return -1;
+    }
+
+    if ((send_cnt = sock->send_n (serialized, serialized_length)) <= 0) {
+        ACE_DEBUG ((LM_ERROR, ACE_TEXT ("(%P|%t) Unable to send DICOM bytes\n")));
+        return -1;
+    }
+
+    return 0;
+}
+
+GADGETRON_WRITER_FACTORY_DECLARE(DicomImageWriter)
+
+// -------------------------------------------------------
+// DicomImageAttribWriter
+// -------------------------------------------------------
+
+int DicomImageAttribWriter::write(ACE_SOCK_Stream* sock, ACE_Message_Block* mb)
+{
+    GadgetContainerMessage<DcmFileFormat>* dcm_file_message = AsContainerMessage<DcmFileFormat>(mb);
+    if (!dcm_file_message)
+    {
+        ACE_DEBUG( (LM_ERROR, ACE_TEXT("(%P,%l), DicomImageWriter::write, invalid image message objects, 1\n")) );
+        return -1;
+    }
 
-END DEBUG */
+    DcmFileFormat *dcmFile = dcm_file_message->getObjectPtr();
 
-    //GADGET_DEBUG1("Initializing transfer state for DICOM file\n");
     // Initialize transfer state of DcmDataset
     dcmFile->transferInit();
 
     // Calculate size of DcmFileFormat and create a SUFFICIENTLY sized buffer
     long buffer_length = dcmFile->calcElementLength(EXS_LittleEndianExplicit, EET_ExplicitLength) * 2;
-    char buffer[buffer_length];
+    std::vector<char> bufferChar(buffer_length);
+    char* buffer = &bufferChar[0];
 
     DcmOutputBufferStream out_stream(buffer, buffer_length);
 
@@ -72,34 +129,89 @@ END DEBUG */
     ssize_t send_cnt = 0;
 
     GadgetMessageIdentifier id;
-    id.id = GADGET_MESSAGE_DICOM;
-    //GADGET_DEBUG2("Sending GadgetMessageIdentifier %d\n", id.id);
-    if ((send_cnt = sock->send_n (&id, sizeof(GadgetMessageIdentifier))) <= 0) {
+    id.id = GADGET_MESSAGE_DICOM_WITHNAME;
+
+    if ((send_cnt = sock->send_n (&id, sizeof(GadgetMessageIdentifier))) <= 0)
+    {
         ACE_DEBUG ((LM_ERROR, ACE_TEXT ("(%P|%t) Unable to send DICOM message identifier\n")));
         return -1;
     }
-    //GADGET_DEBUG2("Sent GadgetMessageIdentifier %d\n", id.id);
-
 
     uint32_t nbytes = (uint32_t)serialized_length;
-    //GADGET_DEBUG2("Sending bytes length %d\n", serialized_length);
-    if ((send_cnt = sock->send_n (&nbytes, sizeof(nbytes))) <= 0) {
+    if ((send_cnt = sock->send_n (&nbytes, sizeof(nbytes))) <= 0)
+    {
         ACE_DEBUG ((LM_ERROR, ACE_TEXT ("(%P|%t) Unable to send DICOM bytes length\n")));
         return -1;
     }
-    //GADGET_DEBUG2("Sent bytes length %d\n", serialized_length);
-
 
-    //GADGET_DEBUG1("Begin sending DICOM image bytes\n");
-    if ((send_cnt = sock->send_n (serialized, serialized_length)) <= 0) {
+    if ((send_cnt = sock->send_n (serialized, serialized_length)) <= 0)
+    {
         ACE_DEBUG ((LM_ERROR, ACE_TEXT ("(%P|%t) Unable to send DICOM bytes\n")));
         return -1;
     }
-    //GADGET_DEBUG1("Finished sending DICOM image bytes\n");
+
+    // chech whether the image filename is attached
+    GadgetContainerMessage<std::string>* dcm_filename_message = AsContainerMessage<std::string>(mb->cont());
+    if (dcm_filename_message)
+    {
+        unsigned long long len = dcm_filename_message->getObjectPtr()->length();
+        if ((send_cnt = sock->send_n (&len, sizeof(unsigned long long))) <= 0)
+        {
+            ACE_DEBUG ((LM_ERROR, ACE_TEXT ("(%P|%t) Unable to send DICOM filename length\n")));
+            return -1;
+        }
+
+        const char* filename = dcm_filename_message->getObjectPtr()->c_str();
+        if ((send_cnt = sock->send_n (filename, len)) <= 0)
+        {
+            ACE_DEBUG ((LM_ERROR, ACE_TEXT ("(%P|%t) Unable to send DICOM filename\n")));
+            return -1;
+        }
+
+        GadgetContainerMessage<ISMRMRD::MetaContainer>* dcm_meta_message = AsContainerMessage<ISMRMRD::MetaContainer>(dcm_filename_message->cont());
+        if (dcm_meta_message)
+        {
+            typedef unsigned long long size_t_type;
+
+            char* buf = NULL;
+            size_t_type len(0);
+
+            try
+            {
+                std::stringstream str;
+                ISMRMRD::serialize( *dcm_meta_message->getObjectPtr(), str);
+                std::string attribContent = str.str();
+                len = attribContent.length()+1;
+
+                buf = new char[len];
+                GADGET_CHECK_THROW(buf != NULL);
+
+                memset(buf, '\0', sizeof(char)*len);
+                memcpy(buf, attribContent.c_str(), len-1);
+            }
+            catch(...)
+            {
+                ACE_DEBUG ((LM_ERROR, ACE_TEXT ("(%P|%t) Unable to serialize dicom image meta attributes \n")));
+                return -1;
+            }
+
+            if ( (send_cnt = sock->send_n (buf, len)) <= 0 )
+            {
+                ACE_DEBUG ((LM_ERROR,
+                        ACE_TEXT ("(%P|%t) Unable to send dicom image meta attributes\n")));
+
+                if ( buf != NULL ) delete [] buf;
+
+                return -1;
+            }
+
+            if ( buf != NULL ) delete [] buf;
+        }
+    }
 
     return 0;
 }
 
-GADGETRON_WRITER_FACTORY_DECLARE(DicomImageWriter)
+GADGETRON_WRITER_FACTORY_DECLARE(DicomImageAttribWriter)
 
 } /* namespace Gadgetron */
diff --git a/gadgets/dicom/DicomImageWriter.h b/gadgets/dicom/DicomImageWriter.h
index 3a5c275..5f5fe74 100644
--- a/gadgets/dicom/DicomImageWriter.h
+++ b/gadgets/dicom/DicomImageWriter.h
@@ -4,7 +4,7 @@
 #include "gadgetron_dicom_export.h"
 #include "GadgetMessageInterface.h"
 #include "GadgetMRIHeaders.h"
-#include "ismrmrd.h"
+#include "ismrmrd/ismrmrd.h"
 
 
 namespace Gadgetron {
@@ -17,6 +17,14 @@ class EXPORTGADGETSDICOM DicomImageWriter : public GadgetMessageWriter
   GADGETRON_WRITER_DECLARE(DicomImageWriter);
 };
 
+class EXPORTGADGETSDICOM DicomImageAttribWriter : public GadgetMessageWriter
+{
+ public:
+  virtual int write(ACE_SOCK_Stream* sock, ACE_Message_Block* mb);
+
+  GADGETRON_WRITER_DECLARE(DicomImageAttribWriter);
+};
+
 } /* namespace Gadgetron */
 
 #endif
diff --git a/gadgets/dicom/dicom.xml b/gadgets/dicom/dicom.xml
index 8d7f4d8..d3687d9 100644
--- a/gadgets/dicom/dicom.xml
+++ b/gadgets/dicom/dicom.xml
@@ -5,7 +5,7 @@
 
     <reader>
         <slot>1008</slot>
-        <dll>gadgetroncore</dll>
+        <dll>gadgetron_mricore</dll>
         <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
     </reader>
 
@@ -16,37 +16,68 @@
     </writer>
 
     <gadget>
-        <name>Acc</name>
-        <dll>gadgetroncore</dll>
-        <classname>AccumulatorGadget</classname>
+      <name>RemoveROOversampling</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>RemoveROOversamplingGadget</classname>
+        <property>
+            <name>constant_noise_variance</name>
+            <value>false</value>
+        </property>
+    </gadget>
+
+    <gadget>
+        <name>AccTrig</name>
+        <dll>gadgetron_mricore</dll>
+	<classname>AcquisitionAccumulateTriggerGadget</classname>
+        <property>
+            <name>trigger_dimension</name>
+            <value>repetition</value>
+        </property>
+        <property>
+          <name>sorting_dimension</name>
+          <value>slice</value>
+        </property>
+    </gadget>
+
+    <gadget>
+        <name>Buff</name>
+        <dll>gadgetron_mricore</dll>
+	<classname>BucketToBufferGadget</classname>
+        <property>
+            <name>N_dimension</name>
+            <value></value>
+        </property>
+        <property>
+          <name>S_dimension</name>
+          <value></value>
+        </property>
+        <property>
+          <name>split_slice</name>
+          <value>true</value>
+        </property>
     </gadget>
 
     <gadget>
         <name>FFT</name>
-        <dll>gadgetroncore</dll>
+        <dll>gadgetron_mricore</dll>
         <classname>FFTGadget</classname>
     </gadget>
 
     <gadget>
-        <name>CoilCombinePython</name>
-        <dll>gadgetronpython</dll>
-        <classname>ImagePythonGadget</classname>
-        <property><name>python_path</name>                  <value>/home/myuser/scripts/python</value></property>
-        <property><name>python_module</name>                <value>rms_coil_combine</value></property>
-        <property><name>gadget_reference_function</name>    <value>set_gadget_reference</value></property>
-        <property><name>input_function</name>               <value>recon_function</value></property>
-        <property><name>config_function</name>              <value>config_function</value></property>
+        <name>Combine</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>CombineGadget</classname>
     </gadget>
 
     <gadget>
         <name>Extract</name>
-        <dll>gadgetroncore</dll>
+        <dll>gadgetron_mricore</dll>
         <classname>ExtractGadget</classname>
     </gadget>
 
     <gadget>
         <name>AutoScale</name>
-        <dll>gadgetroncore</dll>
+        <dll>gadgetron_mricore</dll>
         <classname>AutoScaleGadget</classname>
     </gadget>
 
diff --git a/gadgets/epi/CMakeLists.txt b/gadgets/epi/CMakeLists.txt
new file mode 100644
index 0000000..3ba07a1
--- /dev/null
+++ b/gadgets/epi/CMakeLists.txt
@@ -0,0 +1,49 @@
+IF (WIN32)
+  ADD_DEFINITIONS(-D__BUILD_GADGETRON_EPI__)
+ENDIF (WIN32)
+
+find_package(Ismrmrd REQUIRED)
+
+include_directories(
+  ${CMAKE_SOURCE_DIR}/gadgets/mri_core
+  ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu
+  ${CMAKE_SOURCE_DIR}/toolboxes/fft/cpu
+  ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/math
+  ${CMAKE_SOURCE_DIR}/toolboxes/gadgettools
+  ${CMAKE_SOURCE_DIR}/toolboxes/mri/epi
+  ${ARMADILLO_INCLUDE_DIRS}
+)
+
+add_library(gadgetron_epi SHARED 
+  EPIReconXGadget.h EPIReconXGadget.cpp
+  EPICorrGadget.h EPICorrGadget.cpp
+  FFTXGadget.h FFTXGadget.cpp
+  CutXGadget.h CutXGadget.cpp
+  epi.xml
+  epi_gtplus_grappa.xml
+)
+
+set_target_properties(gadgetron_epi PROPERTIES VERSION ${GADGETRON_VERSION_STRING} SOVERSION ${GADGETRON_SOVERSION})
+
+target_link_libraries(
+  gadgetron_epi
+  gadgetron_gadgetbase
+  gadgetron_toolbox_cpucore
+  gadgetron_toolbox_cpufft
+  gadgetron_toolbox_cpucore_math
+  optimized ${ACE_LIBRARIES} debug ${ACE_DEBUG_LIBRARY} 
+)
+
+install(FILES 
+  EPIReconXGadget.h
+  EPICorrGadget.h
+  FFTXGadget.h
+  gadgetron_epi_export.h
+  DESTINATION include COMPONENT main)
+
+install(TARGETS gadgetron_epi DESTINATION lib COMPONENT main)
+
+install(FILES
+  epi.xml
+  epi_gtplus_grappa.xml
+  DESTINATION ${GADGETRON_INSTALL_CONFIG_PATH} COMPONENT main)
diff --git a/gadgets/epi/CutXGadget.cpp b/gadgets/epi/CutXGadget.cpp
new file mode 100644
index 0000000..d87f36e
--- /dev/null
+++ b/gadgets/epi/CutXGadget.cpp
@@ -0,0 +1,91 @@
+#include "CutXGadget.h"
+#include "hoNDFFT.h"
+#include "hoNDArray_utils.h"
+#include "ismrmrd/xml.h"
+
+namespace Gadgetron{
+
+    CutXGadget::CutXGadget() {}
+    CutXGadget::~CutXGadget() {}
+
+    int CutXGadget::process_config(ACE_Message_Block* mb)
+    {
+      ISMRMRD::IsmrmrdHeader h;
+      ISMRMRD::deserialize(mb->rd_ptr(),h);
+      
+      
+      if (h.encoding.size() == 0) {
+	GADGET_DEBUG2("Number of encoding spaces: %d\n", h.encoding.size());
+	GADGET_DEBUG1("This Gadget needs an encoding description\n");
+	return GADGET_FAIL;
+      }
+      
+      // Get the encoding space and trajectory description
+      ISMRMRD::EncodingSpace e_space = h.encoding[0].encodedSpace;
+      ISMRMRD::EncodingSpace r_space = h.encoding[0].reconSpace;
+      ISMRMRD::EncodingLimits e_limits = h.encoding[0].encodingLimits;
+      ISMRMRD::TrajectoryDescription traj_desc;
+      
+      // Primary encoding space is for EPI
+      encodeNx_  = e_space.matrixSize.x;
+      encodeFOV_ = e_space.fieldOfView_mm.x;
+      reconNx_   = r_space.matrixSize.x;
+      reconFOV_  = r_space.fieldOfView_mm.x;
+      
+      cutNx_ = encodeNx_;
+
+      return 0;
+    }
+
+    int CutXGadget::process( GadgetContainerMessage< ISMRMRD::AcquisitionHeader>* m1,
+        GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2)
+    {
+        try
+        {
+            // cut the central half from the kspace line
+            if ( m1->getObjectPtr()->number_of_samples > cutNx_ )
+            {
+                size_t RO = m1->getObjectPtr()->number_of_samples;
+
+                uint16_t startX = m1->getObjectPtr()->center_sample - cutNx_/2;
+                uint16_t endX = startX + cutNx_ - 1;
+
+                float ratio = RO / (float)cutNx_;
+                m1->getObjectPtr()->number_of_samples = cutNx_;
+                m1->getObjectPtr()->center_sample = (uint16_t)(m1->getObjectPtr()->center_sample / ratio );
+
+                GadgetContainerMessage< hoNDArray< std::complex<float> > >* m3 = new GadgetContainerMessage< hoNDArray< std::complex<float> > >();
+
+                std::vector<size_t> dim(2);
+                dim[0] = cutNx_;
+                dim[1] = m2->getObjectPtr()->get_size(1);
+
+                m3->getObjectPtr()->create(&dim);
+
+                size_t cha;
+                for ( cha=0; cha<dim[1]; cha++ )
+                {
+                    memcpy(m3->getObjectPtr()->begin()+cha*cutNx_, 
+                            m2->getObjectPtr()->begin()+cha*RO+startX, 
+                            sizeof( std::complex<float> )*cutNx_);
+                }
+
+                m1->cont(m3);
+                m2->release();
+            }
+
+            if (this->next()->putq(m1) < 0)
+            {
+                return GADGET_FAIL;
+            }
+        }
+        catch(...)
+        {
+            return GADGET_FAIL;
+        }
+
+        return GADGET_OK;
+    }
+
+    GADGET_FACTORY_DECLARE(CutXGadget)
+}
diff --git a/gadgets/epi/CutXGadget.h b/gadgets/epi/CutXGadget.h
new file mode 100644
index 0000000..37a59eb
--- /dev/null
+++ b/gadgets/epi/CutXGadget.h
@@ -0,0 +1,33 @@
+#ifndef CutXGADGET_H
+#define CutXGADGET_H
+
+#include "Gadget.h"
+#include "hoNDArray.h"
+#include "gadgetron_epi_export.h"
+
+#include <ismrmrd/ismrmrd.h>
+#include <complex>
+
+namespace Gadgetron{
+
+  class   EXPORTGADGETS_EPI CutXGadget : 
+  public Gadget2<ISMRMRD::AcquisitionHeader, hoNDArray< std::complex<float> > >
+  {
+    public:
+      CutXGadget();
+      virtual ~CutXGadget();
+
+    protected:
+      virtual int process_config(ACE_Message_Block* mb);
+      virtual int process( GadgetContainerMessage< ISMRMRD::AcquisitionHeader>* m1,
+                       GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2);
+
+      size_t encodeNx_;
+      float encodeFOV_;
+      size_t reconNx_;
+      float reconFOV_;
+
+      size_t cutNx_;
+  };
+}
+#endif //CutXGADGET_H
diff --git a/gadgets/epi/EPICorrGadget.cpp b/gadgets/epi/EPICorrGadget.cpp
new file mode 100644
index 0000000..f23dfdf
--- /dev/null
+++ b/gadgets/epi/EPICorrGadget.cpp
@@ -0,0 +1,197 @@
+#include "EPICorrGadget.h"
+#include "Gadgetron.h"
+#include "ismrmrd/xml.h"
+
+namespace Gadgetron{
+
+  EPICorrGadget::EPICorrGadget() {}
+  EPICorrGadget::~EPICorrGadget() {}
+
+int EPICorrGadget::process_config(ACE_Message_Block* mb)
+{
+  ISMRMRD::IsmrmrdHeader h;
+  ISMRMRD::deserialize(mb->rd_ptr(),h);
+
+  if (h.encoding.size() == 0) {
+    GADGET_DEBUG2("Number of encoding spaces: %d\n", h.encoding.size());
+    GADGET_DEBUG1("This Gadget needs an encoding description\n");
+    return GADGET_FAIL;
+  }
+
+  // Get the encoding space and trajectory description
+  ISMRMRD::EncodingSpace e_space = h.encoding[0].encodedSpace;
+  ISMRMRD::EncodingSpace r_space = h.encoding[0].reconSpace;
+  ISMRMRD::EncodingLimits e_limits = h.encoding[0].encodingLimits;
+  ISMRMRD::TrajectoryDescription traj_desc;
+
+  if (h.encoding[0].trajectoryDescription) {
+    traj_desc = *h.encoding[0].trajectoryDescription;
+  } else {
+    GADGET_DEBUG1("Trajectory description missing");
+    return GADGET_FAIL;
+  }
+
+  if (std::strcmp(traj_desc.identifier.c_str(), "ConventionalEPI")) {
+    GADGET_DEBUG1("Expected trajectory description identifier 'ConventionalEPI', not found.");
+    return GADGET_FAIL;
+  }
+
+
+  for (std::vector<ISMRMRD::UserParameterLong>::iterator i (traj_desc.userParameterLong.begin()); i != traj_desc.userParameterLong.end(); ++i) {
+    if (std::strcmp(i->name.c_str(),"numberOfNavigators") == 0) {
+      numNavigators_ = i->value;
+    } else if (std::strcmp(i->name.c_str(),"etl") == 0) {
+      etl_ = i->value;
+    }
+  }
+
+  verboseMode_ = this->get_bool_value("verboseMode");
+
+  corrComputed_ = false;
+  navNumber_ = -1;
+  epiEchoNumber_ = -1;
+
+  return 0;
+}
+
+int EPICorrGadget::process(
+          GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1,
+      GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2)
+{
+
+  //std::cout << "Nav: " << navNumber_ << "    " << "Echo: " << epiEchoNumber_ << std::endl;
+
+  // Get a reference to the acquisition header
+  ISMRMRD::AcquisitionHeader &hdr = *m1->getObjectPtr();
+
+  // Pass on the non-EPI data (e.g. FLASH Calibration)
+  if (hdr.encoding_space_ref > 0) {
+    // It is enough to put the first one, since they are linked
+    if (this->next()->putq(m1) == -1) {
+      m1->release();
+      ACE_ERROR_RETURN( (LM_ERROR,
+             ACE_TEXT("%p\n"),
+             ACE_TEXT("EPICorrGadget::process, passing data on to next gadget")),
+            -1);
+    }
+    return 0;
+  }
+
+  // We have data from encoding space 0.
+
+  // Make an armadillo matrix of the data
+  arma::cx_fmat adata = as_arma_matrix(m2->getObjectPtr());
+
+  // Check to see if the data is a navigator line or an imaging line
+  if (hdr.isFlagSet(ISMRMRD::ISMRMRD_ACQ_IS_PHASECORR_DATA)) {
+
+    // Increment the navigator counter
+    navNumber_ += 1;
+
+    // If the number of navigators per shot is exceeded, then
+    // we are at the beginning of the next shot
+    if (navNumber_ == numNavigators_) {
+      corrComputed_ = false;
+      navNumber_ = 0;
+      epiEchoNumber_ = -1;
+    }
+    
+    // If we are at the beginning of a shot, then initialize
+    if (navNumber_==0) {
+      // Set the size of the corrections and storage arrays
+      corrpos_.set_size( adata.n_rows);
+      corrneg_.set_size( adata.n_rows );
+      navdata_.set_size( adata.n_rows, hdr.active_channels, numNavigators_);
+      // Store the first navigator's polarity
+      startNegative_ = hdr.isFlagSet(ISMRMRD::ISMRMRD_ACQ_IS_REVERSE);
+    }
+
+    // Store the navigator data
+    navdata_.slice(navNumber_) = adata;
+
+    // If this is the last of the navigators for this shot, then
+    // compute the correction operator
+    if (navNumber_ == (numNavigators_-1)) {
+      arma::cx_fvec ctemp =  arma::zeros<arma::cx_fvec>(adata.n_rows);    // temp column complex
+      arma::fvec tvec = arma::zeros<arma::fvec>(adata.n_rows);            // temp column real
+      arma::fvec x = arma::linspace<arma::fvec>(-0.5, 0.5, adata.n_rows); // Evenly spaced x-space locations
+      int p; // counter
+      
+      // Accumulate over navigator triplets and sum over coils
+      // this is the average phase difference between odd and even navigators
+      for (p=0; p<numNavigators_-2; p=p+2) {
+    ctemp += arma::sum(arma::conj(navdata_.slice(p)+navdata_.slice(p+2)) % navdata_.slice(p+1),1);
+      }
+      
+      // TODO: Add a configuration toggle to switch between correction types
+
+      // Point-wise phase estimate
+      //for (p=0; p<adata.n_rows; p++) {
+      //  tvec[p] = std::arg(ctemp[p]);
+      //}
+
+      // Robust fit to a straight line
+      float slope = ctemp.n_rows * std::arg(arma::cdot(ctemp.rows(0,ctemp.n_rows-2), ctemp.rows(1,ctemp.n_rows-1)));
+      ctemp = ctemp % arma::exp(arma::cx_fvec(arma::zeros<arma::fvec>(x.n_rows), -slope*x));
+      float intercept = std::arg(arma::sum(ctemp));
+      //std::cout << "Slope = " << slope << std::endl;
+      //std::cout << "Intercept = " << intercept << std::endl;
+      tvec = slope*x + intercept;
+      
+      // Odd and even phase corrections
+      if (!startNegative_) {
+    // if the first navigator is a positive readout, we need to flip the sign of our correction
+    tvec = -1.0*tvec;
+      }
+      corrpos_ = arma::exp(arma::cx_fvec(arma::zeros<arma::fvec>(x.n_rows), -0.5*tvec));
+      corrneg_ = arma::exp(arma::cx_fvec(arma::zeros<arma::fvec>(x.n_rows), +0.5*tvec));
+      corrComputed_ = true;
+    }
+
+  }
+  else {
+    // Increment the echo number
+    epiEchoNumber_ += 1;
+    // TODO: use this to apply the B0 correction
+
+    // Apply the correction
+    // We use the armadillo notation that loops over all the columns
+    if (hdr.isFlagSet(ISMRMRD::ISMRMRD_ACQ_IS_REVERSE)) {
+      // Negative readout
+      for (int p=0; p<adata.n_cols; p++) {
+    adata.col(p) %= corrneg_;
+      }
+      // Now that we have corrected we set the readout direction to positive
+      hdr.clearFlag(ISMRMRD::ISMRMRD_ACQ_IS_REVERSE);
+    } 
+    else {
+      // Positive readout
+      for (int p=0; p<adata.n_cols; p++) {
+    adata.col(p) %= corrpos_;
+      }
+    }
+  }
+
+  // Pass on the imaging data
+  // TODO: this should be controlled by a flag
+  if (hdr.isFlagSet(ISMRMRD::ISMRMRD_ACQ_IS_PHASECORR_DATA)) {
+    m1->release();
+  } 
+  else {
+    // It is enough to put the first one, since they are linked
+    if (this->next()->putq(m1) == -1) {
+      m1->release();
+      ACE_ERROR_RETURN( (LM_ERROR,
+             ACE_TEXT("%p\n"),
+             ACE_TEXT("EPICorrGadget::process, passing data on to next gadget")),
+            -1);
+    }
+  }
+
+  return 0;
+}
+
+GADGET_FACTORY_DECLARE(EPICorrGadget)
+}
+
+
diff --git a/gadgets/epi/EPICorrGadget.h b/gadgets/epi/EPICorrGadget.h
new file mode 100644
index 0000000..decf13f
--- /dev/null
+++ b/gadgets/epi/EPICorrGadget.h
@@ -0,0 +1,48 @@
+#ifndef EPICORRGADGET_H
+#define EPICORRGADGET_H
+
+#include "Gadget.h"
+#include "hoNDArray.h"
+#include "hoArmadillo.h"
+#include "gadgetron_epi_export.h"
+
+#include <ismrmrd/ismrmrd.h>
+#include <complex>
+
+#define _USE_MATH_DEFINES
+#include <math.h>
+
+namespace Gadgetron{
+
+  class  EXPORTGADGETS_EPI EPICorrGadget :
+  public Gadget2<ISMRMRD::AcquisitionHeader,hoNDArray< std::complex<float> > >
+    {
+    public:
+      EPICorrGadget();
+      virtual ~EPICorrGadget();
+
+    protected:
+      virtual int process_config(ACE_Message_Block* mb);
+      virtual int process(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1,
+              GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2);
+
+      // in verbose mode, more info is printed out
+      bool verboseMode_;
+
+      arma::cx_fvec corrpos_;
+      arma::cx_fvec corrneg_;  
+      arma::cx_fcube navdata_;
+
+      // epi parameters
+      int numNavigators_;
+      int etl_;
+
+      // for a given shot
+      bool corrComputed_;
+      int navNumber_;
+      int epiEchoNumber_;
+      bool startNegative_;
+
+    };
+}
+#endif //EPICORRGADGET_H
diff --git a/gadgets/epi/EPIReconXGadget.cpp b/gadgets/epi/EPIReconXGadget.cpp
new file mode 100644
index 0000000..b9c87b1
--- /dev/null
+++ b/gadgets/epi/EPIReconXGadget.cpp
@@ -0,0 +1,133 @@
+#include "EPIReconXGadget.h"
+#include "Gadgetron.h"
+#include "ismrmrd/xml.h"
+
+namespace Gadgetron{
+
+  EPIReconXGadget::EPIReconXGadget() {}
+  EPIReconXGadget::~EPIReconXGadget() {}
+
+int EPIReconXGadget::process_config(ACE_Message_Block* mb)
+{
+  ISMRMRD::IsmrmrdHeader h;
+  ISMRMRD::deserialize(mb->rd_ptr(),h);
+  
+  
+  verboseMode_ = this->get_bool_value("verboseMode");
+
+  if (h.encoding.size() == 0) {
+    GADGET_DEBUG2("Number of encoding spaces: %d\n", h.encoding.size());
+    GADGET_DEBUG1("This Gadget needs an encoding description\n");
+    return GADGET_FAIL;
+  }
+
+  GADGET_DEBUG2("Number of encoding spaces = %d\n", h.encoding.size());
+
+  // Get the encoding space and trajectory description
+  ISMRMRD::EncodingSpace e_space = h.encoding[0].encodedSpace;
+  ISMRMRD::EncodingSpace r_space = h.encoding[0].reconSpace;
+  ISMRMRD::EncodingLimits e_limits = h.encoding[0].encodingLimits;
+  ISMRMRD::TrajectoryDescription traj_desc;
+
+  if (h.encoding[0].trajectoryDescription) {
+    traj_desc = *h.encoding[0].trajectoryDescription;
+  } else {
+    GADGET_DEBUG1("Trajectory description missing");
+    return GADGET_FAIL;
+  }
+
+  if (std::strcmp(traj_desc.identifier.c_str(), "ConventionalEPI")) {
+    GADGET_DEBUG1("Expected trajectory description identifier 'ConventionalEPI', not found.");
+    return GADGET_FAIL;
+  }
+
+  // Primary encoding space is for EPI
+  reconx.encodeNx_  = e_space.matrixSize.x;
+  reconx.encodeFOV_ = e_space.fieldOfView_mm.x;
+  reconx.reconNx_   = r_space.matrixSize.x;
+  reconx.reconFOV_  = r_space.fieldOfView_mm.x;
+  
+  // TODO: we need a flag that says it's a balanced readout.
+  for (std::vector<ISMRMRD::UserParameterLong>::iterator i (traj_desc.userParameterLong.begin()); i != traj_desc.userParameterLong.end(); ++i) {
+    if (std::strcmp(i->name.c_str(),"rampUpTime") == 0) {
+      reconx.rampUpTime_ = i->value;
+    } else if (std::strcmp(i->name.c_str(),"rampDownTime") == 0) {
+      reconx.rampDownTime_ = i->value;
+    } else if (std::strcmp(i->name.c_str(),"flatTopTime") == 0) {
+      reconx.flatTopTime_ = i->value;
+    } else if (std::strcmp(i->name.c_str(),"acqDelayTime") == 0) {
+      reconx.acqDelayTime_ = i->value;
+    } else if (std::strcmp(i->name.c_str(),"numSamples") == 0) {
+      reconx.numSamples_ = i->value;
+    } else {
+      GADGET_DEBUG2("WARNING: unused trajectory parameter %s found\n", i->name.c_str());
+    }
+  }
+
+
+  for (std::vector<ISMRMRD::UserParameterDouble>::iterator i (traj_desc.userParameterDouble.begin()); i != traj_desc.userParameterDouble.end(); ++i) {
+    if (std::strcmp(i->name.c_str(),"dwellTime") == 0) {
+      reconx.dwellTime_ = i->value;
+    } else {
+      GADGET_DEBUG2("WARNING: unused trajectory parameter %s found\n", i->name.c_str());
+    }
+  }
+
+  // Compute the trajectory
+  reconx.computeTrajectory();
+
+  // Second encoding space is an even readout for PAT REF e.g. FLASH
+  if ( h.encoding.size() > 1 ) {
+    ISMRMRD::EncodingSpace e_space2 = h.encoding[1].encodedSpace;
+    ISMRMRD::EncodingSpace r_space2 = h.encoding[1].reconSpace;
+    reconx_other.encodeNx_  = r_space2.matrixSize.x;
+    reconx_other.encodeFOV_ = r_space2.fieldOfView_mm.x;
+    reconx_other.reconNx_   = r_space2.matrixSize.x;
+    reconx_other.reconFOV_  = r_space2.fieldOfView_mm.x;
+    reconx_other.numSamples_ = e_space2.matrixSize.x;
+    reconx_other.dwellTime_ = 1.0;
+    reconx_other.computeTrajectory();
+  }
+
+  return 0;
+}
+
+int EPIReconXGadget::process(
+          GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1,
+      GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2)
+{
+
+  ISMRMRD::AcquisitionHeader hdr_in = *(m1->getObjectPtr());
+  ISMRMRD::AcquisitionHeader hdr_out;
+  hoNDArray<std::complex<float> > data_out;
+
+  data_out.create(reconx.reconNx_, m2->getObjectPtr()->get_size(1));
+
+  // Switch the reconstruction based on the encoding space (e.g. for FLASH Calibration)
+  if (hdr_in.encoding_space_ref == 0) {
+    reconx.apply(*m1->getObjectPtr(), *m2->getObjectPtr(), hdr_out, data_out);
+  }
+  else {
+    reconx_other.apply(*m1->getObjectPtr(), *m2->getObjectPtr(), hdr_out, data_out);
+  }
+
+  // Replace the contents of m1 with the new header and the contentes of m2 with the new data
+  *m1->getObjectPtr() = hdr_out;
+  *m2->getObjectPtr() = data_out;
+
+  // It is enough to put the first one, since they are linked
+  if (this->next()->putq(m1) == -1) {
+    m1->release();
+    ACE_ERROR_RETURN( (LM_ERROR,
+               ACE_TEXT("%p\n"),
+               ACE_TEXT("EPIReconXGadget::process, passing data on to next gadget")),
+              -1);
+  }
+
+  return 0;
+}
+
+GADGET_FACTORY_DECLARE(EPIReconXGadget)
+}
+
+
diff --git a/gadgets/epi/EPIReconXGadget.h b/gadgets/epi/EPIReconXGadget.h
new file mode 100644
index 0000000..4a3402d
--- /dev/null
+++ b/gadgets/epi/EPIReconXGadget.h
@@ -0,0 +1,38 @@
+#ifndef EPIRECONXGADGET_H
+#define EPIRECONXGADGET_H
+
+#include "Gadget.h"
+#include "hoNDArray.h"
+#include "gadgetron_epi_export.h"
+#include "hoArmadillo.h"
+
+#include <ismrmrd/ismrmrd.h>
+#include <complex>
+
+#include "EPIReconXObjectFlat.h"
+#include "EPIReconXObjectTrapezoid.h"
+
+namespace Gadgetron{
+
+  class EXPORTGADGETS_EPI EPIReconXGadget : 
+  public Gadget2<ISMRMRD::AcquisitionHeader,hoNDArray< std::complex<float> > >
+    {
+    public:
+      EPIReconXGadget();
+      virtual ~EPIReconXGadget();
+      
+    protected:
+      virtual int process_config(ACE_Message_Block* mb);
+      virtual int process(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1,
+			  GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2);
+
+      // in verbose mode, more info is printed out
+      bool verboseMode_;
+
+      // A set of reconstruction objects
+      EPI::EPIReconXObjectTrapezoid<std::complex<float> > reconx;
+      EPI::EPIReconXObjectFlat<std::complex<float> > reconx_other;
+
+    };
+}
+#endif //EPIRECONXGADGET_H
diff --git a/gadgets/epi/FFTXGadget.cpp b/gadgets/epi/FFTXGadget.cpp
new file mode 100644
index 0000000..b66ac68
--- /dev/null
+++ b/gadgets/epi/FFTXGadget.cpp
@@ -0,0 +1,27 @@
+#include "GadgetIsmrmrdReadWrite.h"
+#include "FFTXGadget.h"
+#include "hoNDFFT.h"
+#include "hoNDArray_utils.h"
+
+namespace Gadgetron{
+
+  FFTXGadget::FFTXGadget() {}
+  FFTXGadget::~FFTXGadget() {}
+
+  int FFTXGadget::process( GadgetContainerMessage< ISMRMRD::AcquisitionHeader>* m1,
+                GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2)
+  {
+
+    // FFT along 1st dimensions (x)
+    hoNDFFT<float>::instance()->fft1c( *m2->getObjectPtr() );
+
+    if (this->next()->putq(m1) < 0)
+    {
+      return GADGET_FAIL;
+    }
+    
+    return GADGET_OK;
+  }
+
+  GADGET_FACTORY_DECLARE(FFTXGadget)
+}
diff --git a/gadgets/epi/FFTXGadget.h b/gadgets/epi/FFTXGadget.h
new file mode 100644
index 0000000..2c8ca98
--- /dev/null
+++ b/gadgets/epi/FFTXGadget.h
@@ -0,0 +1,25 @@
+#ifndef FFTXGADGET_H
+#define FFTXGADGET_H
+
+#include "Gadget.h"
+#include "hoNDArray.h"
+#include "gadgetron_epi_export.h"
+
+#include <ismrmrd/ismrmrd.h>
+#include <complex>
+
+namespace Gadgetron{
+
+  class   EXPORTGADGETS_EPI FFTXGadget : 
+  public Gadget2<ISMRMRD::AcquisitionHeader, hoNDArray< std::complex<float> > >
+  {
+    public:
+      FFTXGadget();
+      virtual ~FFTXGadget();
+
+    protected:
+      virtual int process( GadgetContainerMessage< ISMRMRD::AcquisitionHeader>* m1,
+                       GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2);
+  };
+}
+#endif //FFTXGADGET_H
diff --git a/gadgets/epi/epi.xml b/gadgets/epi/epi.xml
new file mode 100644
index 0000000..b36c2f6
--- /dev/null
+++ b/gadgets/epi/epi.xml
@@ -0,0 +1,138 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+        
+    <reader>
+      <slot>1008</slot>
+      <dll>gadgetron_mricore</dll>
+      <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+    </reader>
+  
+    <writer>
+      <slot>1004</slot>
+      <dll>gadgetron_mricore</dll>
+      <classname>MRIImageWriterCPLX</classname>
+    </writer>
+    <writer>
+      <slot>1005</slot>
+      <dll>gadgetron_mricore</dll>
+      <classname>MRIImageWriterFLOAT</classname>
+    </writer>
+    <writer>
+      <slot>1006</slot>
+      <dll>gadgetron_mricore</dll>
+      <classname>MRIImageWriterUSHORT</classname>
+    </writer>
+  
+    <gadget>
+      <name>NoiseAdjust</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>NoiseAdjustGadget</classname>
+    </gadget>
+
+    <gadget>
+      <name>ReconX</name>
+      <dll>gadgetron_epi</dll>
+      <classname>EPIReconXGadget</classname>
+    </gadget>
+
+    <gadget>
+      <name>EPICorr</name>
+      <dll>gadgetron_epi</dll>
+      <classname>EPICorrGadget</classname>
+    </gadget>
+
+    <!-- FFT in X back to k -->
+    <gadget>
+        <name>FFTX</name>
+        <dll>gadgetron_epi</dll>
+        <classname>FFTXGadget</classname>
+    </gadget>
+
+<!--
+    <gadget>
+      <name>IsmrmrdDump</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>IsmrmrdDumpGadget</classname>
+      <property><name>file_prefix</name><value>ISMRMRD_DUMP</value></property>
+      <property><name>append_timestamp</name><value>1</value></property>
+    </gadget>
+-->
+
+    <gadget>
+        <name>AccTrig</name>
+        <dll>gadgetron_mricore</dll>
+	<classname>AcquisitionAccumulateTriggerGadget</classname>
+        <property>
+            <name>trigger_dimension</name>
+            <value>repetition</value>
+        </property>
+        <property>
+          <name>sorting_dimension</name>
+          <value>slice</value>
+        </property>
+    </gadget>
+
+    <gadget>
+        <name>Buff</name>
+        <dll>gadgetron_mricore</dll>
+	<classname>BucketToBufferGadget</classname>
+        <property>
+            <name>N_dimension</name>
+            <value></value>
+        </property>
+        <property>
+          <name>S_dimension</name>
+          <value></value>
+        </property>
+        <property>
+          <name>split_slice</name>
+          <value>true</value>
+        </property>
+    </gadget>
+
+    <gadget>
+      <name>FFT</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>FFTGadget</classname>
+    </gadget>
+    
+    <gadget>
+      <name>Combine</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>CombineGadget</classname>
+    </gadget>
+
+    <gadget>
+      <name>Extract</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ExtractGadget</classname>
+    </gadget>  
+
+   <gadget>
+      <name>AutoScale</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>AutoScaleGadget</classname>
+    </gadget>
+
+<!--
+     <gadget>
+      <name>ImageFinishFLOAT</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishGadgetFLOAT</classname>
+    </gadget>
+-->
+
+    <gadget>
+      <name>FloatToShort</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>FloatToUShortGadget</classname>
+    </gadget>
+ 
+     <gadget>
+      <name>ImageFinishUSHORT</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishGadgetUSHORT</classname>
+    </gadget>
+</gadgetronStreamConfiguration>
diff --git a/gadgets/epi/epi_gtplus_grappa.xml b/gadgets/epi/epi_gtplus_grappa.xml
new file mode 100644
index 0000000..9f3dbd6
--- /dev/null
+++ b/gadgets/epi/epi_gtplus_grappa.xml
@@ -0,0 +1,525 @@
+<?xml version="1.0" encoding="utf-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+    <!-- 
+        GT Plus configuration file for general 2D epi reconstruction
+
+        Author: Souheil Inati
+        Email: souheil.inati at nih.gov
+    -->
+
+    <!-- reader -->
+    <reader>
+        <slot>1008</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+    </reader>
+
+    <!-- writer -->
+    <writer>
+        <slot>1004</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1005</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1006</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterUSHORT</classname>
+    </writer>
+
+    <writer>
+        <slot>1015</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1016</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1017</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterUSHORT</classname>
+    </writer>
+
+    <!-- Noise prewhitening -->
+    <gadget>
+        <name>NoiseAdjust</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>NoiseAdjustGadget</classname>
+    </gadget>
+
+    <!-- Recon in X -->
+    <gadget>
+        <name>ReconX</name>
+        <dll>gadgetron_epi</dll>
+        <classname>EPIReconXGadget</classname>
+    </gadget>
+
+    <!-- EPI Corr -->
+    <gadget>
+        <name>EPICorr</name>
+        <dll>gadgetron_epi</dll>
+        <classname>EPICorrGadget</classname>
+    </gadget>
+
+    <!-- FFT in X back to k -->
+    <gadget>
+        <name>FFTX</name>
+        <dll>gadgetron_epi</dll>
+        <classname>FFTXGadget</classname>
+    </gadget>
+
+    <!-- Data accumulation and trigger gadget -->
+    <gadget>
+        <name>Acc</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
+
+        <!-- debug and info mode -->
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>noacceleration_triggerDim1</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+        <property>
+            <name>noacceleration_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>noacceleration_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_triggerDim1</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+        <property>
+            <name>separate_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
+        <property>
+            <name>other_kspace_matching_Dim</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+    </gadget>
+
+    <!--Recon computation for 2DT cases -->
+    <gadget>
+        <name>Recon</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusRecon2DTGadget</classname>
+
+        <!-- kspace data -->
+        <property>
+            <name>dim_4th</name>
+            <value>DIM_Contrast</value>
+        </property>
+        <property>
+            <name>dim_5th</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <!-- work flow -->
+        <property>
+            <name>workOrder_ShareDim</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>no_acceleration_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>no_acceleration_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>no_acceleration_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>separate_fullres_coilmap</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>separate_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>separate_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- coil compression -->
+        <property>
+            <name>same_coil_compression_coeff_allS</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>downstream_coil_compression</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>coil_compression_thres</name>
+            <value>-1</value>
+        </property>
+
+        <property>
+            <name>coil_compression_num_modesKept</name>
+            <value>-1</value>
+        </property>
+
+        <!-- parameters for coil map estimation 
+            enum ISMRMRDCOILMAPALGO
+            {
+                ISMRMRD_SOUHEIL,
+                ISMRMRD_SOUHEIL_ITER
+            };
+        -->
+        <property>
+            <name>coil_map_algorithm</name>
+            <value>ISMRMRD_SOUHEIL</value>
+        </property>
+        <property>
+            <name>csm_kSize</name>
+            <value>7</value>
+        </property>
+
+        <property>
+            <name>csm_powermethod_num</name>
+            <value>3</value>
+        </property>
+
+        <property>
+            <name>csm_true_3D</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>csm_iter_num</name>
+            <value>5</value>
+        </property>
+
+        <property>
+            <name>csm_iter_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>csm_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- algorithm -->
+        <property>
+            <name>recon_algorithm</name>
+            <value>ISMRMRD_GRAPPA</value>
+        </property>
+
+        <property>
+            <name>recon_kspace_needed</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>recon_auto_parameters</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_GRAPPA -->
+        <property>
+            <name>grappa_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E1</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E2</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_reg_lamda</name>
+            <value>0.0005</value>
+        </property>
+        <property>
+            <name>grappa_calib_over_determine_ratio</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>grappa_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for scaling and image sending -->
+        <property>
+            <name>min_intensity_value</name>
+            <value>64</value>
+        </property>
+
+        <property>
+            <name>max_intensity_value</name>
+            <value>4095</value>
+        </property>
+
+        <property>
+            <name>scalingFactor</name>
+            <value>-1.0</value>
+        </property>
+
+        <property>
+            <name>use_constant_scalingFactor</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for kspace filter, image data -->
+        <property>
+            <name>filterRO</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterRO_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE1</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE1_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE2</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE2_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, ref data -->
+        <property>
+            <name>filterRefRO</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE1</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE2</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for debug and timing -->
+        <property>
+            <name>debugFolder</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>debugFolder2</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>cloudNodeFile</name>
+            <value>myCloud_2DT.txt</value>
+        </property>
+
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for system acquisition -->
+        <property>
+            <name>timeStampResolution</name>
+            <value>0.0025</value>
+        </property>
+
+        <!-- parameters for recon job split -->
+        <property>
+            <name>job_split_by_S</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>job_num_of_N</name>
+            <value>32</value>
+        </property>
+        <property>
+            <name>job_max_Megabytes</name>
+            <value>10240</value>
+        </property>
+        <property>
+            <name>job_overlap</name>
+            <value>2</value>
+        </property>
+        <property>
+            <name>job_perform_on_control_node</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for the cloud computation 
+             The cloud should be defined as the following: CloudNodeX_IP/Port/XMLConfiguration etc.
+        -->
+        <property>
+            <name>CloudComputing</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>CloudSize</name>
+            <value>1</value>
+        </property>
+
+        <!-- node 0 -->
+        <property>
+            <name>CloudNode0_IP</name>
+            <value>localhost</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_Port</name>
+            <value>9003</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_XMLConfiguration</name>
+            <value>GtProg_2DT_Cartesian_CloudNode.xml</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_ComputingPowerIndex</name>
+            <value>1</value>
+        </property>
+
+    </gadget>
+
+    <!-- after recon processing -->
+    <gadget>
+        <name>ComplexToFloatAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ComplexToFloatAttribGadget</classname>
+    </gadget>
+
+    <gadget>
+        <name>FloatToShortAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>FloatToUShortAttribGadget</classname>
+    </gadget>
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribCPLX</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetCPLX</classname>
+    </gadget>
+    -->
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribFLOAT</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetFLOAT</classname>
+    </gadget>
+    -->
+
+    <gadget>
+        <name>ImageFinishAttribUSHORT</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ImageFinishAttribGadgetUSHORT</classname>
+    </gadget>
+
+</gadgetronStreamConfiguration>
diff --git a/gadgets/epi/gadgetron_epi_export.h b/gadgets/epi/gadgetron_epi_export.h
new file mode 100644
index 0000000..b7b3b8e
--- /dev/null
+++ b/gadgets/epi/gadgetron_epi_export.h
@@ -0,0 +1,14 @@
+#ifndef GADGETRON_EPI_EXPORT_H_
+#define GADGETRON_EPI_EXPORT_H_
+
+#if defined (WIN32)
+#if defined (__BUILD_GADGETRON_EPI__)
+#define EXPORTGADGETS_EPI __declspec(dllexport)
+#else
+#define EXPORTGADGETS_EPI __declspec(dllimport)
+#endif
+#else
+#define EXPORTGADGETS_EPI
+#endif
+
+#endif /* GADGETRON_EPI_EXPORT_H_ */
diff --git a/gadgets/grappa/CMakeLists.txt b/gadgets/grappa/CMakeLists.txt
index d242c5f..696cf51 100644
--- a/gadgets/grappa/CMakeLists.txt
+++ b/gadgets/grappa/CMakeLists.txt
@@ -1,35 +1,57 @@
 if (WIN32)
-  ADD_DEFINITIONS(-D__BUILD_GADGETRON_GRAPPA__)
+    ADD_DEFINITIONS(-D__BUILD_GADGETRON_GRAPPA__)
 endif (WIN32)
 
 find_package(Ismrmrd REQUIRED)
-find_package(XSD REQUIRED)
-find_package(XercesC REQUIRED)
 
 include_directories(
-  ${CMAKE_SOURCE_DIR}/gadgets/mri_core
-  ${CMAKE_SOURCE_DIR}/toolboxes/mri/pmri/gpu
-  ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu
-  ${CMAKE_SOURCE_DIR}/toolboxes/core/gpu
+    ${CMAKE_SOURCE_DIR}/gadgets/mri_core
+    ${CMAKE_SOURCE_DIR}/toolboxes/mri/pmri/gpu
+    ${CMAKE_SOURCE_DIR}/toolboxes/core
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/image
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/math
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/gpu
+    ${CMAKE_SOURCE_DIR}/toolboxes/fft/cpu
+    ${CMAKE_SOURCE_DIR}/toolboxes/fft/gpu
+    ${ARMADILLO_INCLUDE_DIRS}
 )
 
 add_library(gadgetron_grappa SHARED 
-	GrappaGadget.cpp
-	GrappaCalibrationBuffer.cpp
-	GrappaWeights.cpp
-	GrappaWeightsCalculator.cpp
-	GrappaUnmixingGadget.cpp
-	${ISMRMRD_XSD_SOURCE}
+    gadgetron_grappa_export.h
+    GrappaCalibrationBuffer.h
+    GrappaGadget.h
+    GrappaUnmixingGadget.h
+    GrappaWeights.h
+    GrappaWeightsCalculator.h
+    GrappaGadget.cpp
+    GrappaCalibrationBuffer.cpp
+    GrappaWeights.cpp
+    GrappaWeightsCalculator.cpp
+    GrappaUnmixingGadget.cpp
     )
 
+set_target_properties(gadgetron_grappa PROPERTIES VERSION ${GADGETRON_VERSION_STRING} SOVERSION ${GADGETRON_SOVERSION})
+
 target_link_libraries(gadgetron_grappa 
-  gpuparallelmri cpucore
+  gadgetron_gadgetbase
+  gadgetron_toolbox_gpuparallelmri
+  gadgetron_toolbox_cpucore
+  gadgetron_toolbox_cpufft
+  gadgetron_toolbox_gpufft
   ${Boost_LIBRARIES}
   ${ISMRMRD_LIBRARIES} ${FFTW3_LIBRARIES} 
   optimized ${ACE_LIBRARIES} debug ${ACE_DEBUG_LIBRARY} 
-  ${XERCESC_LIBRARIES} 
   )
 
-install (TARGETS gadgetron_grappa DESTINATION lib)
+install (FILES  gadgetron_grappa_export.h
+                GrappaCalibrationBuffer.h
+                GrappaGadget.h
+                GrappaUnmixingGadget.h
+                GrappaWeights.h
+                GrappaWeightsCalculator.h 
+                DESTINATION include COMPONENT main)
+
+install (TARGETS gadgetron_grappa DESTINATION lib COMPONENT main)
 
 add_subdirectory(config)
diff --git a/gadgets/grappa/GrappaCalibrationBuffer.cpp b/gadgets/grappa/GrappaCalibrationBuffer.cpp
index d102cf6..e073d78 100644
--- a/gadgets/grappa/GrappaCalibrationBuffer.cpp
+++ b/gadgets/grappa/GrappaCalibrationBuffer.cpp
@@ -16,14 +16,17 @@ namespace Gadgetron{
     , weights_invalid_(true)
   {
     dimensions_ = dimensions;
-    try {buffer_.create(&dimensions_);}
-    catch (std::runtime_error & err){
+    try {
+      buffer_.create(&dimensions_);
+      buffer_.fill(std::complex<float>(0.0,0.0));
+    } catch (std::runtime_error & err){
       GADGET_DEBUG_EXCEPTION(err,"Unable to allocate memory for GRAPPA buffer");
     }
   
   }
 
-  int GrappaCalibrationBuffer::add_data(ISMRMRD::AcquisitionHeader* m1, hoNDArray< std::complex<float> >* m2)
+  int GrappaCalibrationBuffer::add_data(ISMRMRD::AcquisitionHeader* m1, hoNDArray< std::complex<float> >* m2,
+					 unsigned short line_offset, unsigned short partition_offset)
   {
     if (!buffer_.get_data_ptr()) {
       GADGET_DEBUG1("Buffer not allocated, cannot add data");
@@ -31,8 +34,8 @@ namespace Gadgetron{
     }
   
     unsigned int samples =  m1->number_of_samples;
-    unsigned int line = m1->idx.kspace_encode_step_1;
-    unsigned int partition = m1->idx.kspace_encode_step_2;
+    unsigned int line = m1->idx.kspace_encode_step_1 + line_offset;
+    unsigned int partition = m1->idx.kspace_encode_step_2 + partition_offset;
     unsigned int slice = m1->idx.slice; //We should probably check this
 
     if (samples != dimensions_[0]) {
@@ -68,7 +71,7 @@ namespace Gadgetron{
       weights_invalid_ = true;
     }
 
-    bool is_first_scan_in_slice = ISMRMRD::FlagBit(ISMRMRD::ACQ_FIRST_IN_SLICE).isSet(m1->flags);
+    bool is_first_scan_in_slice = m1->isFlagSet(ISMRMRD::ISMRMRD_ACQ_FIRST_IN_SLICE);
 
 
     //Depending on the sequence used, we could get into trouble if the sequence switches slice acquisition scheme before finishing a slice.
@@ -85,7 +88,7 @@ namespace Gadgetron{
     last_line_ = line;
 
 
-    bool is_last_scan_in_slice = ISMRMRD::FlagBit(ISMRMRD::ACQ_LAST_IN_SLICE).isSet(m1->flags);
+    bool is_last_scan_in_slice = m1->isFlagSet(ISMRMRD::ISMRMRD_ACQ_LAST_IN_SLICE);
 
     if (is_last_scan_in_slice && acquiring_sequentially) {
       unsigned int min_ky, max_ky;
diff --git a/gadgets/grappa/GrappaCalibrationBuffer.h b/gadgets/grappa/GrappaCalibrationBuffer.h
index ea69b21..84e4b8e 100644
--- a/gadgets/grappa/GrappaCalibrationBuffer.h
+++ b/gadgets/grappa/GrappaCalibrationBuffer.h
@@ -2,7 +2,7 @@
 #define GRAPPACALIBRATIONBUFFER_H
 
 #include "gadgetron_grappa_export.h"
-#include "ismrmrd.h"
+#include "ismrmrd/ismrmrd.h"
 #include "hoNDArray.h"
 #include "GrappaWeights.h"
 #include "GrappaWeightsCalculator.h"
@@ -129,7 +129,8 @@ class EXPORTGADGETSGRAPPA GrappaCalibrationBuffer
 			  GrappaWeightsCalculator<float>* weights_calculator);
   virtual ~GrappaCalibrationBuffer() {}
 
-  int add_data(ISMRMRD::AcquisitionHeader* m1, hoNDArray< std::complex<float> >* m2);
+  int add_data(ISMRMRD::AcquisitionHeader* m1, hoNDArray< std::complex<float> >* m2, 
+	       unsigned short line_offset = 0, unsigned short partition_offset = 0);
 
  private:
   hoNDArray< std::complex<float> > buffer_;
diff --git a/gadgets/grappa/GrappaGadget.cpp b/gadgets/grappa/GrappaGadget.cpp
index 5d9193e..7c1fbfa 100644
--- a/gadgets/grappa/GrappaGadget.cpp
+++ b/gadgets/grappa/GrappaGadget.cpp
@@ -1,13 +1,13 @@
-#include "../mri_core/GadgetIsmrmrdReadWrite.h"
 #include "Gadgetron.h"
 #include "GrappaGadget.h"
 #include "GrappaUnmixingGadget.h"
-#include "GadgetIsmrmrdReadWrite.h"
+#include "ismrmrd/xml.h"
 
 #include <ace/OS_NS_stdlib.h>
 #include <boost/algorithm/string.hpp>
 #include <boost/algorithm/string/split.hpp>
 
+
 namespace Gadgetron{
 
   GrappaGadget::GrappaGadget()
@@ -46,32 +46,51 @@ namespace Gadgetron{
 
   int GrappaGadget::process_config(ACE_Message_Block* mb)
   {
-    boost::shared_ptr<ISMRMRD::ismrmrdHeader> cfg = parseIsmrmrdXMLHeader(std::string(mb->rd_ptr()));
+    ISMRMRD::IsmrmrdHeader h;
+    ISMRMRD::deserialize(mb->rd_ptr(),h);
 
-    ISMRMRD::ismrmrdHeader::encoding_sequence e_seq = cfg->encoding();
-    if (e_seq.size() != 1) {
-      GADGET_DEBUG2("Number of encoding spaces: %d\n", e_seq.size());
+    if (h.encoding.size() != 1) {
+      GADGET_DEBUG2("Number of encoding spaces: %d\n", h.encoding.size());
       GADGET_DEBUG1("This Gadget only supports one encoding space\n");
       return GADGET_FAIL;
     }
 
-    ISMRMRD::encodingSpaceType e_space = (*e_seq.begin()).encodedSpace();
-    ISMRMRD::encodingSpaceType r_space = (*e_seq.begin()).reconSpace();
-    ISMRMRD::encodingLimitsType e_limits = (*e_seq.begin()).encodingLimits();
+    ISMRMRD::EncodingSpace e_space = h.encoding[0].encodedSpace;
+    ISMRMRD::EncodingSpace r_space = h.encoding[0].reconSpace;
+    ISMRMRD::EncodingLimits e_limits = h.encoding[0].encodingLimits;
 
-    unsigned int slices = e_limits.slice().present() ? e_limits.slice().get().maximum() + 1 : 1;
-    dimensions_.push_back(e_space.matrixSize().x());
-    dimensions_.push_back(e_space.matrixSize().y());
-    dimensions_.push_back(e_space.matrixSize().z());
-    dimensions_.push_back((cfg->acquisitionSystemInformation().present() && cfg->acquisitionSystemInformation().get().receiverChannels().present()) ?
-                          cfg->acquisitionSystemInformation().get().receiverChannels().get() : 1);
+    unsigned int slices = e_limits.slice ? e_limits.slice->maximum + 1 : 1;
+    dimensions_.push_back(e_space.matrixSize.x);
+    dimensions_.push_back(e_space.matrixSize.y);
+    dimensions_.push_back(e_space.matrixSize.z);
+    dimensions_.push_back((h.acquisitionSystemInformation && h.acquisitionSystemInformation->receiverChannels) ?
+                          *(h.acquisitionSystemInformation->receiverChannels) : 1);
     dimensions_.push_back(slices);
 
-    fov_.push_back(r_space.fieldOfView_mm().x());
-    fov_.push_back(r_space.fieldOfView_mm().y());
-    fov_.push_back(r_space.fieldOfView_mm().z());
 
-    line_offset_ = (dimensions_[1]>>1)-e_limits.kspace_encoding_step_1().get().center();
+    GADGET_DEBUG2("Dimensions %d, %d, %d, %d, %d\n", dimensions_[0], dimensions_[1], dimensions_[2], dimensions_[3], dimensions_[4]);
+
+    image_dimensions_.push_back(r_space.matrixSize.x); 
+    image_dimensions_.push_back(r_space.matrixSize.y);
+    image_dimensions_.push_back(r_space.matrixSize.z);
+    image_dimensions_.push_back(dimensions_[3]);
+
+    fov_.push_back(r_space.fieldOfView_mm.x);
+    fov_.push_back(r_space.fieldOfView_mm.y);
+    fov_.push_back(r_space.fieldOfView_mm.z);
+
+    line_offset_ = (dimensions_[1]>>1)-e_limits.kspace_encoding_step_1->center;
+
+    if (h.userParameters) {
+      for (size_t i = 0; i < h.userParameters->userParameterString.size(); i++) {
+	std::string name = h.userParameters->userParameterString[i].name;
+	std::string value = h.userParameters->userParameterString[i].value;
+	if (name.substr(0,5) == std::string("COIL_")) {
+	  int coil_num = std::atoi(name.substr(5,name.size()-5).c_str());
+	  channel_map_[value] = coil_num;
+	}
+      }
+    }
 
     return GADGET_OK;
   }
@@ -80,13 +99,6 @@ namespace Gadgetron{
   int GrappaGadget::initial_setup()
   {
 
-    GADGET_DEBUG2("Dimensions %d, %d, %d, %d, %d\n", dimensions_[0], dimensions_[1], dimensions_[2], dimensions_[3], dimensions_[4]);
-
-    image_dimensions_.push_back(dimensions_[0] / 2); //TODO: fix this in general
-    image_dimensions_.push_back(dimensions_[1]);
-    image_dimensions_.push_back(dimensions_[2]);
-    image_dimensions_.push_back(dimensions_[3]);
-
 
     weights_ = std::vector< boost::shared_ptr<GrappaWeights<float> > >(dimensions_[4]);
 
@@ -103,17 +115,48 @@ namespace Gadgetron{
 
     weights_calculator_.set_number_of_target_coils(target_coils_);
 
-    //Let's figure out if we have channels that are supposed to be uncombined
-    boost::shared_ptr<std::string> uncomb_str = this->get_string_value("uncombined_channels");
-    std::vector<std::string> uncomb;
-    boost::split(uncomb, *uncomb_str, boost::is_any_of(","));
-    for (unsigned int i = 0; i < uncomb.size(); i++) {
-      std::string ch = boost::algorithm::trim_copy(uncomb[i]);
-      if (ch.size() > 0) {
-        unsigned int channel_id = static_cast<unsigned int>(ACE_OS::atoi(ch.c_str()));
-        weights_calculator_.add_uncombined_channel(channel_id);
+
+    int device_channels = this->get_int_value("device_channels");
+    if (device_channels) {
+      GADGET_DEBUG2("We got the number of device channels from other gadget: %d\n", device_channels);
+      for (int i = 0; i < device_channels; i++) {
+	weights_calculator_.add_uncombined_channel((unsigned int)i);
+      }
+    } else {
+      //Let's figure out if we have channels that are supposed to be uncombined
+      boost::shared_ptr<std::string> uncomb_str = this->get_string_value("uncombined_channels");
+      std::vector<std::string> uncomb;
+      boost::split(uncomb, *uncomb_str, boost::is_any_of(","));
+      for (unsigned int i = 0; i < uncomb.size(); i++) {
+	std::string ch = boost::algorithm::trim_copy(uncomb[i]);
+	if (ch.size() > 0) {
+	  unsigned int channel_id = static_cast<unsigned int>(ACE_OS::atoi(ch.c_str()));
+	  weights_calculator_.add_uncombined_channel(channel_id);
+	}
+      }
+      
+      uncomb_str = this->get_string_value("uncombined_channels_by_name");
+      if (uncomb_str->size()) {
+	GADGET_DEBUG2("uncomb_str: %s\n",  uncomb_str->c_str());
+	boost::split(uncomb, *uncomb_str, boost::is_any_of(","));
+	for (unsigned int i = 0; i < uncomb.size(); i++) {
+	std::string ch = boost::algorithm::trim_copy(uncomb[i]);
+	map_type_::iterator it = channel_map_.find(ch);
+	if (it != channel_map_.end()) {
+	  unsigned int channel_id = static_cast<unsigned int>(it->second);
+	  GADGET_DEBUG2("Device channel: %s (%d)\n",  uncomb[i].c_str(), channel_id);
+	  weights_calculator_.add_uncombined_channel(channel_id);
+	}
+	/*
+	  if (ch.size() > 0) {
+	  unsigned int channel_id = static_cast<unsigned int>(ACE_OS::atoi(ch.c_str()));
+	  weights_calculator_.add_uncombined_channel(channel_id);
+	  }
+	*/
+	}
       }
     }
+    
 
     for (unsigned int i = 0; i < buffers_.size(); i++) {
       weights_[i] = boost::shared_ptr<GrappaWeights<float> >(new GrappaWeights<float>());
@@ -163,6 +206,14 @@ namespace Gadgetron{
   process(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1,
           GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2)
   {
+      bool is_noise = m1->getObjectPtr()->isFlagSet(ISMRMRD::ISMRMRD_ACQ_IS_NOISE_MEASUREMENT);
+
+      //We should not be receiving noise here
+      if (is_noise) {
+	m1->release();
+	return GADGET_OK;
+      }
+
 
     if (first_call_) {
       if (m1->getObjectPtr()->active_channels != dimensions_[3]) {
@@ -186,7 +237,7 @@ namespace Gadgetron{
     unsigned int slice = acq_head->idx.slice;
 
     if (samples != image_dimensions_[0]) {
-      GADGET_DEBUG1("GrappaGadget: wrong number of samples received\n");
+      GADGET_DEBUG2("GrappaGadget: wrong number of samples received %d, expected %d\n", samples, image_dimensions_[0]);
       return GADGET_FAIL;
     }
 
@@ -217,9 +268,9 @@ namespace Gadgetron{
     }
 
 
-    bool is_last_scan_in_slice = ISMRMRD::FlagBit(ISMRMRD::ACQ_LAST_IN_SLICE).isSet(m1->getObjectPtr()->flags);
+    bool is_last_scan_in_slice = m1->getObjectPtr()->isFlagSet(ISMRMRD::ISMRMRD_ACQ_LAST_IN_SLICE);
 
-    bool is_first_scan_in_slice = ISMRMRD::FlagBit(ISMRMRD::ACQ_FIRST_IN_SLICE).isSet(m1->getObjectPtr()->flags);
+    bool is_first_scan_in_slice = m1->getObjectPtr()->isFlagSet(ISMRMRD::ISMRMRD_ACQ_FIRST_IN_SLICE);
 
     if (is_first_scan_in_slice) {
       time_stamps_[slice] = m1->getObjectPtr()->acquisition_time_stamp;
@@ -322,7 +373,7 @@ namespace Gadgetron{
       */
     }
 
-    if (buffers_[slice]->add_data(m1->getObjectPtr(),m2->getObjectPtr()) < 0) {
+    if (buffers_[slice]->add_data(m1->getObjectPtr(),m2->getObjectPtr(), line_offset_) < 0) {
       GADGET_DEBUG1("Failed to add incoming data to grappa calibration buffer\n");
       return GADGET_FAIL;
     }
diff --git a/gadgets/grappa/GrappaGadget.h b/gadgets/grappa/GrappaGadget.h
index 64d8160..ea1042e 100644
--- a/gadgets/grappa/GrappaGadget.h
+++ b/gadgets/grappa/GrappaGadget.h
@@ -7,8 +7,9 @@
 #include "GrappaCalibrationBuffer.h"
 #include "gadgetron_grappa_export.h"
 
-#include <ismrmrd.h>
+#include <ismrmrd/ismrmrd.h>
 #include <complex>
+#include <map>
 
 namespace Gadgetron{
 struct EXPORTGADGETSGRAPPA GrappaBufferInfo
@@ -31,6 +32,7 @@ public Gadget2< ISMRMRD::AcquisitionHeader, hoNDArray< std::complex<float> > >
   virtual ~GrappaGadget();
 
  protected:
+
   virtual int process_config(ACE_Message_Block* mb);
   virtual int process( GadgetContainerMessage< ISMRMRD::AcquisitionHeader >* m1,
 		  GadgetContainerMessage< hoNDArray< std::complex<float> > > * m2 );
@@ -44,6 +46,8 @@ public Gadget2< ISMRMRD::AcquisitionHeader, hoNDArray< std::complex<float> > >
 
   bool first_call_;
  private:
+  typedef std::map< std::string, int > map_type_;
+
   std::vector< GrappaCalibrationBuffer* > buffers_;
   std::vector<unsigned int> fov_;
   std::vector<size_t> dimensions_;
@@ -57,6 +61,8 @@ public Gadget2< ISMRMRD::AcquisitionHeader, hoNDArray< std::complex<float> > >
   int target_coils_;
   float phase_encoding_resolution_;
   unsigned int line_offset_;
+  
+  map_type_ channel_map_;
 };
 }
 #endif //GRAPPAGADGET_H
diff --git a/gadgets/grappa/GrappaUnmixingGadget.cpp b/gadgets/grappa/GrappaUnmixingGadget.cpp
index 28853af..e5a3ab2 100644
--- a/gadgets/grappa/GrappaUnmixingGadget.cpp
+++ b/gadgets/grappa/GrappaUnmixingGadget.cpp
@@ -37,9 +37,12 @@ namespace Gadgetron{
     m1->cont(0);
     m2->cont(cm2);
 
+    hoNDFFT<float>::instance()->ifft3c(*m3->getObjectPtr());
+    /*
     hoNDFFT<float>::instance()->ifft(m3->getObjectPtr(),0);
     hoNDFFT<float>::instance()->ifft(m3->getObjectPtr(),1);
     hoNDFFT<float>::instance()->ifft(m3->getObjectPtr(),2);
+    */
 
     if (!m1->getObjectPtr()->weights_) {
       GADGET_DEBUG1("Weights are a NULL\n");
diff --git a/gadgets/grappa/GrappaUnmixingGadget.h b/gadgets/grappa/GrappaUnmixingGadget.h
index 2fe525f..a6258fc 100644
--- a/gadgets/grappa/GrappaUnmixingGadget.h
+++ b/gadgets/grappa/GrappaUnmixingGadget.h
@@ -4,7 +4,7 @@
 #include "gadgetron_grappa_export.h"
 #include "Gadget.h"
 #include "hoNDArray.h"
-#include "ismrmrd.h"
+#include "ismrmrd/ismrmrd.h"
 #include "GrappaWeights.h"
 
 #include <complex>
diff --git a/gadgets/grappa/GrappaWeightsCalculator.cpp b/gadgets/grappa/GrappaWeightsCalculator.cpp
index 67b86a4..0008fbd 100644
--- a/gadgets/grappa/GrappaWeightsCalculator.cpp
+++ b/gadgets/grappa/GrappaWeightsCalculator.cpp
@@ -77,19 +77,21 @@ template <class T> int GrappaWeightsCalculator<T>::svc(void)  {
         size_t ks = 5;
         size_t power = 3;
 
-        cuNDArray<complext<float> > D(RO*E1, ks*ks, CHA);
+        /*cuNDArray<complext<float> > D(RO*E1, ks*ks, CHA);
         cuNDArray<complext<float> > DH_D(RO*E1, CHA, CHA); 
         cuNDArray<complext<float> > V1(RO*E1, CHA);
-        cuNDArray<complext<float> > U1(RO*E1, ks*ks);
+        cuNDArray<complext<float> > U1(RO*E1, ks*ks);*/
 
 		// Compute CSM
-		cuNDArray<float_complext> csm;
-        csm.create(device_data.get_dimensions());
+		/*cuNDArray<float_complext> csm;
+        csm.create(device_data.get_dimensions());*/
+
+        boost::shared_ptr< cuNDArray<float_complext> > csm;
 		{
         	//GPUTimer timer("GRAPPA CSM");
-			// csm = estimate_b1_map<float,2>( &device_data, target_coils_ );
+			csm = estimate_b1_map<float,2>( &device_data, target_coils_ );
 
-            estimate_b1_map_2D_NIH_Souheil( &device_data, &csm, ks, power, D, DH_D, V1, U1 );
+            // estimate_b1_map_2D_NIH_Souheil( &device_data, &csm, ks, power, D, DH_D, V1, U1 );
 
 			//GADGET_DEBUG2("Coils in csm: %d\n", csm->get_size(2));
 		}
@@ -118,7 +120,7 @@ template <class T> int GrappaWeightsCalculator<T>::svc(void)  {
 			kernel_size.push_back(5);
 			kernel_size.push_back(4);
 			if ( htgrappa_calculate_grappa_unmixing(reinterpret_cast< cuNDArray<complext<float> >* >(&device_data),
-					&csm,
+					csm.get(),
 					(unsigned int)(mb1->getObjectPtr()->acceleration_factor),
 					&kernel_size,
 					&unmixing_dev,
diff --git a/gadgets/grappa/config/CMakeLists.txt b/gadgets/grappa/config/CMakeLists.txt
index a06dc16..709f979 100644
--- a/gadgets/grappa/config/CMakeLists.txt
+++ b/gadgets/grappa/config/CMakeLists.txt
@@ -1,5 +1,6 @@
-install (FILES grappa_unoptimized.xml grappa_unoptimized_float.xml DESTINATION config)
+
+install (FILES grappa_unoptimized.xml grappa_unoptimized_float.xml DESTINATION ${GADGETRON_INSTALL_CONFIG_PATH} COMPONENT main)
 
 if(ARMADILLO_FOUND)
-  install (FILES grappa.xml grappa_float.xml DESTINATION config)
+  install (FILES grappa.xml grappa_float.xml DESTINATION ${GADGETRON_INSTALL_CONFIG_PATH} COMPONENT main)
 endif(ARMADILLO_FOUND)
diff --git a/gadgets/gtPlus/CMakeLists.txt b/gadgets/gtPlus/CMakeLists.txt
index 51eb9a9..9e6cc33 100644
--- a/gadgets/gtPlus/CMakeLists.txt
+++ b/gadgets/gtPlus/CMakeLists.txt
@@ -1,21 +1,20 @@
 
-include_directories(   
+include_directories( 
     ${CMAKE_SOURCE_DIR}/gadgets/core
     ${ACE_INCLUDE_DIR} 
     ${Boost_INCLUDE_DIR}
     ${ISMRMRD_INCLUDE_DIR}
-    ${ISMRMRD_SCHEMA_DIR}
-    ${ISMRMRD_XSD_INCLUDE_DIR}
-    ${XSD_INCLUDE_DIR}
     ${FFTW3_INCLUDE_DIR}
     ${ARMADILLO_INCLUDE_DIRS}
+    ${CMAKE_SOURCE_DIR}/toolboxes/cloudbus
     ${CMAKE_SOURCE_DIR}/toolboxes/mri/pmri/gpu
     ${CMAKE_SOURCE_DIR}/toolboxes/nfft/gpu
     ${CMAKE_SOURCE_DIR}/toolboxes/core
     ${CMAKE_SOURCE_DIR}/toolboxes/core/gpu
     ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu
     ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/hostutils
-    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/arma_math
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/math
+    ${CMAKE_SOURCE_DIR}/toolboxes/fft/cpu
     ${CMAKE_SOURCE_DIR}/toolboxes/operators
     ${CMAKE_SOURCE_DIR}/toolboxes/operators/cpu
     ${CMAKE_SOURCE_DIR}/toolboxes/solvers
@@ -25,15 +24,19 @@ include_directories(
     ${HDF5_INCLUDE_DIR}
     ${HDF5_INCLUDE_DIR}/cpp
     ${CMAKE_SOURCE_DIR}/toolboxes/registration/optical_flow
+    ${CMAKE_SOURCE_DIR}/toolboxes/registration/optical_flow/cpu
     ${CMAKE_SOURCE_DIR}/toolboxes/gtplus
     ${CMAKE_SOURCE_DIR}/toolboxes/gtplus/util
     ${CMAKE_SOURCE_DIR}/toolboxes/gtplus/workflow
     ${CMAKE_SOURCE_DIR}/toolboxes/gtplus/algorithm
     ${CMAKE_SOURCE_DIR}/toolboxes/gtplus/solver
+    ${CMAKE_SOURCE_DIR}/toolboxes/gtplus/application
     ${CMAKE_SOURCE_DIR}/toolboxes/gadgettools
+    ${CMAKE_SOURCE_DIR}/toolboxes/gadgettools/ismrmrd
     ${CMAKE_SOURCE_DIR}/apps/gadgetron
     ${CMAKE_SOURCE_DIR}/gadgets/mri_core 
     ${CMAKE_SOURCE_DIR}/gadgets/gtPlus 
+    ${CMAKE_SOURCE_DIR}/apps/gadgetron
   )
 
 IF (WIN32)
@@ -46,54 +49,129 @@ if(WIN32)
 endif(WIN32)
 
 find_package(Ismrmrd REQUIRED)
-find_package(XSD REQUIRED)
-find_package(XercesC REQUIRED)
+
+set( gtCloud_files 
+        config/gtCloud/myCloud_2DT.txt 
+        config/gtCloud/myCloud_3DT.txt 
+        config/gtCloud/myCloud_2DT_DualLayer.txt 
+        config/gtCloud/myCloud_2DT_DualLayer_FirstLayer.txt )
+
+set( config_files 
+
+    config/GT_2DT_Cartesian.xml
+    config/GT_2DT_Cartesian_CloudNode.xml
+    config/GT_2DT_Cartesian_Dicom.xml
+    config/GT_2DT_Cartesian_DualLayer_Gateway_L1SPIRIT.xml
+    config/GT_2DT_Cartesian_DualLayer_Gateway_SPIRIT.xml
+    config/GT_2DT_Cartesian_FirstLayer_CloudNode.xml
+    config/GT_2DT_Cartesian_GFactor.xml
+    config/GT_2DT_Cartesian_ImageTrigger_Dicom.xml
+    config/GT_2DT_Cartesian_L1SPIRIT.xml
+    config/GT_2DT_Cartesian_PseudoReplica_SNRUnitRecon.xml
+    config/GT_2DT_Cartesian_SingleLayer_CloudNode.xml
+    config/GT_2DT_Cartesian_SPIRIT.xml
+    config/GT_2DT_FatWater.xml
+
+    config/GT_2DT_HASTE.xml
+    config/GT_2DT_HASTE_MOCO_AVE.xml
+
+    config/GT_2DT_T2W.xml
+
+    config/GT_2DT_LGE.xml
+
+    config/GT_2DT_MOLLI.xml
+    config/GT_2DT_MOLLI_Offline.xml
+
+    config/GT_2DT_Perfusion.xml
+
+    config/GT_2DT_PseudoReplica_SNRUnitRecon_DataExport.xml
+
+    config/GT_2DT_RealTimeCine.xml
+    config/GT_2DT_RealTimeFlow.xml
+
+    config/GT_2DT_RTCine_L1SPIRIT_PhysioInterp.xml
+    config/GT_2DT_RTCine_L1SPIRIT_PhysioInterp_DualLayer_Gateway.xml
+
+    config/GT_3DT_Cartesian.xml
+    config/GT_3DT_Cartesian_CloudNode.xml
+    config/GT_3DT_Cartesian_GFactor.xml
+    config/GT_3DT_Cartesian_L1SPIRIT.xml
+    config/GT_3DT_Cartesian_SingleLayer_L1SPIRIT.xml )
+
+set( gadgetronPlus_header_files GtPlusGadgetImageArray.h
+                                GtPlusAccumulatorWorkOrderTriggerGadget.h
+                                GtPlusAccumulatorImageTriggerGadget.h
+                                GtPlusGadgetOpenMP.h
+                                GtPlusReconGadget.h
+                                GtPlusRecon2DTGadget.h
+                                GtPlusRecon3DTGadget.h
+                                GtPlusRecon2DTGadgetCloud.h
+                                GtPlusRecon2DTCloudPackage.h
+                                GadgetCloudJobMessageReadWrite.h
+                                GtPlusReconJob2DTGadget.h 
+                                GtPlusReconJob3DTGadget.h 
+                                GtPlusReconJob2DTGadgetCloud.h 
+                                GtPlusImageReconGadget.h 
+                                GtPlusReconGadgetUtil.h 
+                                )
+
+set( gadgetronPlus_src_files GtPlusGadgetImageArray.cpp
+                            GtPlusAccumulatorWorkOrderTriggerGadget.cpp
+                            GtPlusAccumulatorImageTriggerGadget.cpp
+                            GtPlusGadgetOpenMP.cpp
+                            GtPlusReconGadget.cpp
+                            GtPlusRecon2DTGadget.cpp
+                            GtPlusRecon3DTGadget.cpp
+                            GtPlusRecon2DTGadgetCloud.cpp
+                            GadgetCloudJobMessageReadWrite.cpp
+                            GtPlusReconJob2DTGadget.cpp 
+                            GtPlusReconJob3DTGadget.cpp 
+                            GtPlusReconJob2DTGadgetCloud.cpp 
+                            GtPlusImageReconGadget.cpp 
+                            GtPlusReconGadgetUtil.cpp 
+                            )
+
+set( config_gtCloud_files ${config_files} ${gtCloud_files} )
+source_group(config FILES ${config_gtCloud_files})
 
 add_library(gadgetronPlus SHARED
-            GtPlusGadgetExport.h
-            GtPlusGadgetImageArray.h
-            GtPlusGadgetImageArray.cpp
-            GtPlusAccumulatorWorkOrderTriggerGadget.h
-            GtPlusAccumulatorWorkOrderTriggerGadget.cpp
-            GtPlusGadgetOpenMP.h
-            GtPlusGadgetOpenMP.cpp
-            GtPlusReconGadget.h
-            GtPlusReconGadget.cpp
-            GtPlusRecon2DTGadget.h
-            GtPlusRecon2DTGadget.cpp
-            GtPlusRecon3DTGadget.h
-            GtPlusRecon3DTGadget.cpp
-            GtPlusRecon2DTGadgetCloud.h
-            GtPlusRecon2DTGadgetCloud.cpp
-            GtPlusRecon2DTCloudPackage.h
-            GadgetCloudJobMessageReadWrite.h
-            GadgetCloudJobMessageReadWrite.cpp
-            GtPlusReconJob2DTGadget.h 
-            GtPlusReconJob2DTGadget.cpp 
-            GtPlusReconJob3DTGadget.h 
-            GtPlusReconJob3DTGadget.cpp 
-            GtPlusReconJob2DTGadgetCloud.h 
-            GtPlusReconJob2DTGadgetCloud.cpp 
-            ${ISMRMRD_XSD_SOURCE} )
+            GtPlusGadgetExport.h 
+            ${gadgetronPlus_header_files} 
+            ${gadgetronPlus_src_files} 
+            ${config_gtCloud_files} 
+        )
+
+set_target_properties(gadgetronPlus PROPERTIES VERSION ${GADGETRON_VERSION_STRING} SOVERSION ${GADGETRON_SOVERSION})
 
 target_link_libraries(gadgetronPlus 
-    cpucore 
-    cpucore_math 
-    gtplus 
-    gadgettools 
-    ${MKL_LIBRARIES} 
+    gadgetron_gadgetbase
+    gadgetron_toolbox_cpucore 
+    gadgetron_toolbox_cpucore_math 
+    gadgetron_toolbox_cpufft
+    gadgetron_toolbox_gtplus 
+    gadgetron_toolbox_gadgettools 
+    gadgetron_toolbox_cloudbus
     ${Boost_LIBRARIES}
     ${ISMRMRD_LIBRARIES} ${FFTW3_LIBRARIES} 
-    optimized ${ACE_LIBRARIES} debug ${ACE_DEBUG_LIBRARY}
-    ${XERCESC_LIBRARIES} )
+    optimized ${ACE_LIBRARIES} debug ${ACE_DEBUG_LIBRARY} 
+    )
 
-    if (CUDA_FOUND)
-        target_link_libraries(gadgetronPlus gpuparallelmri)
-    endif (CUDA_FOUND)
+if (CUDA_FOUND)  
+  include_directories( 
+    ${CUDA_INCLUDE_DIRS}
+    )
+  target_link_libraries(gadgetronPlus gadgetron_toolbox_gpuparallelmri)
+endif(CUDA_FOUND)
 
 install (FILES 
-        GtPlusGadgetExport.h
-        DESTINATION include)
+        GtPlusGadgetExport.h 
+        ${gadgetronPlus_header_files} 
+        DESTINATION include COMPONENT main)
+
+install (FILES  ${config_files} 
+        DESTINATION ${GADGETRON_INSTALL_CONFIG_PATH} COMPONENT main)
+
+install (FILES  ${gtCloud_files} 
+        DESTINATION ${GADGETRON_INSTALL_CONFIG_PATH}/gtCloud COMPONENT main)
 
-install(TARGETS gadgetronPlus DESTINATION lib)
-# install(FILES default.xml default_short.xml DESTINATION config)
+install(TARGETS gadgetronPlus DESTINATION lib COMPONENT main)
diff --git a/gadgets/gtPlus/GadgetCloudJobMessageReadWrite.h b/gadgets/gtPlus/GadgetCloudJobMessageReadWrite.h
index 16a946a..1c7993e 100644
--- a/gadgets/gtPlus/GadgetCloudJobMessageReadWrite.h
+++ b/gadgets/gtPlus/GadgetCloudJobMessageReadWrite.h
@@ -62,7 +62,7 @@ namespace Gadgetron
                 return 0;
             }
 
-            size_t maxBytesPerSend = 512.0*1024*1024;
+            size_t maxBytesPerSend = (size_t)(512.0*1024*1024);
 
             if ( sizeOfJob > maxBytesPerSend )
             {
@@ -164,7 +164,7 @@ namespace Gadgetron
 
             GADGET_DEBUG2("--> send job, size of job : %f MBytes ... \n", sizeOfJob/1024.0/1024);
 
-            size_t maxBytesPerSend = 512.0*1024*1024;
+            size_t maxBytesPerSend = (size_t)(512.0*1024*1024);
 
             if ( sizeOfJob > maxBytesPerSend )
             {
diff --git a/gadgets/gtPlus/GadgetMRIHeadersExt.h b/gadgets/gtPlus/GadgetMRIHeadersExt.h
deleted file mode 100644
index cf66c55..0000000
--- a/gadgets/gtPlus/GadgetMRIHeadersExt.h
+++ /dev/null
@@ -1,231 +0,0 @@
-#ifndef GADGETMRIHEADERSEXT_H
-#define GADGETMRIHEADERSEXT_H
-
-#include "gadgetronMrRecon_export.h"
-#include "GadgetronMrReconCommon.h"
-#include "GadgetMRIHeaders.h"
-#include "ismrmrd.h"
-#include "core/basic/Common.h"
-#include <algorithm/MrRecon/basic/MrReconNDArray.h>
-
-#include <vector>
-
-/** @name OS and compiler version */
-//@{
-#ifdef _WIN32
-    // assume microsft visual c++ compiler if on windows
-    #define GADGETRON_FTK_VISUAL_CPP
-#elif defined WIN32
-    #define GADGETRON_FTK_VISUAL_CPP
-#elif defined WINDOWS
-    #define GADGETRON_FTK_VISUAL_CPP
-#else
-    // not the visual studio, maybe gcc
-    #define NOT_WIN32
-    #define GADGETRON_FTK_DEPRECATED
-#endif
-
-#ifdef GADGETRON_FTK_VISUAL_CPP
-    #if _MSC_VER >= 1300 // vc 7 or higher, only vc6 does not support template very well
-        #define GADGETRON_FTK_TEMPLATE_SUPPORT
-    #else
-        #ifndef GADGETRON_FTK_OLD_VC_FLAG
-            #define GADGETRON_FTK_OLD_VC_FLAG // vc 6 flag
-        #endif
-    #endif
-#elif defined NOT_WIN32 // gcc or others
-    #define GADGETRON_FTK_TEMPLATE_SUPPORT
-#endif
-
-// settings specific for microsoft compiler
-#ifdef GADGETRON_FTK_VISUAL_CPP
-    // disable warnings on 255 char debug symbols
-    #pragma warning (disable : 4786)
-
-    // disable warnings on exporting classes in DLL which has STL members
-    #pragma warning (disable : 4251)
-
-    // disable warnings on using 'this' in initializer list
-    #pragma warning (disable : 4355)
-
-    // disable warnings when specifying functions with a throw specifier
-    #pragma warning( disable : 4290 )
-
-    // disable warnings for implicit conversions
-    //#pragma warning( disable : 4244 )
-
-    // disable warnings for unknown pragma
-    #pragma warning( disable : 4068 )
-    
-    // disable warnings for unsafe functions
-    #pragma warning( disable : 4996 )
-
-    // disable warnings for warning C4275: non dll-interface class 
-    // 'std::_Complex_base<float>' used as base for dll-interface 
-    //class 'std::complex<float>'
-    #pragma warning( disable : 4275 )
-
-    /// disable warning for constant conditional expression
-    #pragma warning( disable : 4127)
-
-    /// disable warning for unreachable code
-    #pragma warning( disable : 4702)
-
-    /// 'identifier' : decorated name length exceeded, name was truncated
-    /// The decorated name was longer than the maximum the compiler allows (247), 
-    /// and was truncated. To avoid this warning and the truncation, reduce the number of arguments or name length of identifiers used.
-    #pragma warning( disable : 4503)
-
-    #pragma warning( disable : 4267)
-    #pragma warning( disable : 4244)
-    #pragma warning( disable : 4996)
-
-    // debug functionality
-    // #include <crtdbg.h>
-
-    // make code portable between VSS 6.0 and .NET
-    #if _MSC_VER >= 1300 // check for .NET
-    #define GADGETRON_FTK_DEPRECATED __declspec(deprecated)
-    #else
-    #define GADGETRON_FTK_DEPRECATED
-    #endif
-
-#endif
-//@}
-
-// -----------------------------------------------------------------
-// info zone
-
-enum PATRefScanMode
-{
-    PAT_REF_SCAN_UNDEFINED      = 0x01, // e.g. if no PAT is selected
-    PAT_REF_SCAN_INPLACE        = 0x02, // sequence supplies inplace reference lines
-    PAT_REF_SCAN_EXTRA          = 0x04, // sequence supplies extra reference lines
-    PAT_REF_SCAN_PRESCAN        = 0x08, // sequence does not supply reference lines, the data must have been acquired with a previous measurement
-    PAT_REF_SCAN_INTRINSIC_AVE  = 0x10, // The sequence contains intrinsic ref.lines due to sharing e.g. in the averages dimension
-    PAT_REF_SCAN_INTRINSIC_REP  = 0x20, // The sequence contains intrinsic ref.lines due to sharing e.g. in the repetition or phases dimension (i.e., TSENSE)
-    PAT_REF_SCAN_INTRINSIC_PHS  = 0x40, // The sequence contains intrinsic ref.lines due to sharing e.g. in the repetition or phases dimension (i.e., TSENSE)
-    PAT_REF_SCAN_INPLACE_LET    = 0x80  // A single (L)ong (E)cho (T)rain acquires reference lines and imaging lines
-};
-
-struct LoopCounters
-{
-    ACE_UINT16 line;
-    ACE_UINT16 acquisition;
-    ACE_UINT16 slice;
-    ACE_UINT16 partition;
-    ACE_UINT16 echo;
-    ACE_UINT16 phase;
-    ACE_UINT16 repetition;
-    ACE_UINT16 set;
-    ACE_UINT16 segment;
-    ACE_UINT16 channel;
-};
-
-#define MDH_FREEHDRPARA         4
-#define MDH_FREEHDRPARAOFFSET   4
-
-// aushIceProgramPara
-
-// in the user_int
-#define WIP_INDEX_TR                                MDH_FREEHDRPARAOFFSET+0
-#define WIP_INDEX_TE                                MDH_FREEHDRPARAOFFSET+1
-#define WIP_INDEX_FOV                               MDH_FREEHDRPARAOFFSET+2
-#define WIP_INDEX_SliceThickness                    MDH_FREEHDRPARAOFFSET+3
-
-// in the user_float
-#define WIP_INDEX_BaseResolution                    0
-#define WIP_INDEX_KernelSelection                   1
-#define WIP_INDEX_MoCoRecon                         2
-#define WIP_INDEX_AcceFactor                        3
-#define WIP_INDEX_NumRepForMoCo                     4
-#define WIP_INDEX_NumOfLine                         5
-
-// -----------------------------------------------------------------
-
-// [Col Line Cha Slice Partition Echo Phase Rep Set Seg]
-#ifdef VDimMrRecon
-    #undef VDimMrRecon
-#endif // VDimMrRecon
-#define  VDimMrRecon 10
-
-#ifdef BufferLengthMrRecon
-    #undef BufferLengthMrRecon
-#endif // BufferLengthMrRecon
-#define  BufferLengthMrRecon 2048
-
-struct  EXPORTGADGETSMRRECON GadgetMessageImageExt : public ISMRMRD::ImageHeader
-{
-    // fields added to store the time_stamp and pmu_time_stamp for every incoming read-out line
-    // if one line is not acquried, the corresponding time is -1
-    std::vector<int>     time_stamps;
-    std::vector<int>     pmu_time_stamps;
-
-    GadgetMessageImageExt();
-    ~GadgetMessageImageExt();
-
-    void copy(GadgetMessageImageExt& aMessageImage);
-    void set_matrix_size(unsigned int index, ACE_UINT16 size);
-    void dump();
-}; 
-
-// [Col Line Cha Slice Partition Echo Phase Rep Set Seg]
-//   0   1    2   3     4         5    6     7   8   9
-// store a scan with 10 dimensions
-struct  EXPORTGADGETSMRRECON GadgetMessageImageArray
-{
-    // size of the image array
-    ACE_UINT16 matrix_size[10];
-
-    // kspace center column number
-    ACE_UINT16 kSpace_centre_col_no;
-    // kspace max acquired col number
-    ACE_UINT16 kSpace_max_acquired_col_no;
-
-    // kspace center line number
-    ACE_UINT16 kSpace_centre_line_no;
-    // kspace max acquired line number
-    ACE_UINT16 kSpace_max_acquired_line_no;
-
-    // kspace center partition number
-    ACE_UINT16 kSpace_centre_partition_no;
-    // kspace max acquired partition number
-    ACE_UINT16 kSpace_max_acquired_partition_no;
-
-    // message information for every 2D image [Slice Partition Echo Phase Rep Set Seg]
-    GadgetMessageImageExt* imageArray_;
-
-    GadgetMessageImageArray();
-    GadgetMessageImageArray(int aSize[10]);
-    ~GadgetMessageImageArray();
-
-    void resize(int aSize[10]);
-    void copy(GadgetMessageImageArray& imageArray);
-    int get_offset(int slc, int par, int eco, int phs, int rep, int set, int seg);
-    void extractMessageImageArrayForSLC(int slc, GadgetMessageImageArray& imageArray);
-    void extractMessageImageArrayForREP(int rep, GadgetMessageImageArray& imageArray);
-
-    void dump();
-};
-
-struct EXPORTGADGETSMRRECON KSpaceBuffer
-{
-    typedef FTK_NAMESPACE_NAME::MrReconNDArray< std::complex<float>, VDimMrRecon > MrReconBufferType;
-
-    // kspace data
-    MrReconBufferType buffer_;
-
-    // reference ACS data
-    MrReconBufferType ref_;
-
-    // other data, e.g. AIF data
-    MrReconBufferType other_;
-
-    // whether it is ipat or pat with seperate ref
-    bool isIPAT;
-
-    KSpaceBuffer();
-    ~KSpaceBuffer();
-};
-
-#endif  //GADGETMRIHEADERSEXT_H
diff --git a/gadgets/gtPlus/GadgetronMrReconCommon.h b/gadgets/gtPlus/GadgetronMrReconCommon.h
deleted file mode 100644
index 853d61f..0000000
--- a/gadgets/gtPlus/GadgetronMrReconCommon.h
+++ /dev/null
@@ -1,90 +0,0 @@
-#ifndef GADGETRONMRRECONCOMMON_H
-#define GADGETRONMRRECONCOMMON_H
-
-/** @name OS and compiler version */
-//@{
-#ifdef _WIN32
-    // assume microsft visual c++ compiler if on windows
-    #define GADGETRON_FTK_VISUAL_CPP
-#elif defined WIN32
-    #define GADGETRON_FTK_VISUAL_CPP
-#elif defined WINDOWS
-    #define GADGETRON_FTK_VISUAL_CPP
-#else
-    // not the visual studio, maybe gcc
-    #define NOT_WIN32
-    #define GADGETRON_FTK_DEPRECATED
-#endif
-
-#ifdef GADGETRON_FTK_VISUAL_CPP
-    #if _MSC_VER >= 1300 // vc 7 or higher, only vc6 does not support template very well
-        #define GADGETRON_FTK_TEMPLATE_SUPPORT
-    #else
-        #ifndef GADGETRON_FTK_OLD_VC_FLAG
-            #define GADGETRON_FTK_OLD_VC_FLAG // vc 6 flag
-        #endif
-    #endif
-#elif defined NOT_WIN32 // gcc or others
-    #define GADGETRON_FTK_TEMPLATE_SUPPORT
-#endif
-
-// settings specific for microsoft compiler
-#ifdef GADGETRON_FTK_VISUAL_CPP
-    // disable warnings on 255 char debug symbols
-    #pragma warning (disable : 4786)
-
-    // disable warnings on exporting classes in DLL which has STL members
-    #pragma warning (disable : 4251)
-
-    // disable warnings on using 'this' in initializer list
-    #pragma warning (disable : 4355)
-
-    // disable warnings when specifying functions with a throw specifier
-    #pragma warning( disable : 4290 )
-
-    // disable warnings for implicit conversions
-    //#pragma warning( disable : 4244 )
-
-    // disable warnings for unknown pragma
-    #pragma warning( disable : 4068 )
-    
-    // disable warnings for unsafe functions
-    #pragma warning( disable : 4996 )
-
-    // disable warnings for warning C4275: non dll-interface class 
-    // 'std::_Complex_base<float>' used as base for dll-interface 
-    //class 'std::complex<float>'
-    #pragma warning( disable : 4275 )
-
-    /// disable warning for constant conditional expression
-    #pragma warning( disable : 4127)
-
-    /// disable warning for unreachable code
-    #pragma warning( disable : 4702)
-
-    /// 'identifier' : decorated name length exceeded, name was truncated
-    /// The decorated name was longer than the maximum the compiler allows (247), 
-    /// and was truncated. To avoid this warning and the truncation, reduce the number of arguments or name length of identifiers used.
-    #pragma warning( disable : 4503)
-
-    #pragma warning( disable : 4267)
-    #pragma warning( disable : 4244)
-    #pragma warning( disable : 4996)
-
-    // warning C4305: 'argument' : truncation
-    #pragma warning( disable : 4305)
-
-    // debug functionality
-    // #include <crtdbg.h>
-
-    // make code portable between VSS 6.0 and .NET
-    #if _MSC_VER >= 1300 // check for .NET
-    #define GADGETRON_FTK_DEPRECATED __declspec(deprecated)
-    #else
-    #define GADGETRON_FTK_DEPRECATED
-    #endif
-
-#endif
-//@}
-
-#endif  // GADGETRONMRRECONCOMMON_H
diff --git a/gadgets/gtPlus/GtPlusAccumulatorGadget.cpp b/gadgets/gtPlus/GtPlusAccumulatorGadget.cpp
deleted file mode 100644
index 2b0c813..0000000
--- a/gadgets/gtPlus/GtPlusAccumulatorGadget.cpp
+++ /dev/null
@@ -1,1168 +0,0 @@
-#include "GtPlusAccumulatorGadget.h"
-
-namespace Gadgetron
-{
-
-// --------------------------------------------------------------------
-
-GadgetMessageImageExt::GadgetMessageImageExt() : ISMRMRD::ImageHeader()
-{
-    time_stamps.clear();
-    pmu_time_stamps.clear();
-}
-
-GadgetMessageImageExt::~GadgetMessageImageExt() { }
-
-void GadgetMessageImageExt::set_matrix_size(unsigned int index, ACE_UINT16 size)
-{
-    if (index < 3) 
-    {
-        matrix_size[index] = size;
-    }
-
-    if ( index == 1 )
-    {
-        time_stamps.clear();
-        time_stamps.resize(matrix_size[1], -1);
-        pmu_time_stamps.clear();
-        pmu_time_stamps.resize(matrix_size[1], -1);
-    }
-}
-
-void GadgetMessageImageExt::copy(GadgetMessageImageExt& aMessageImage)
-{
-    flags = aMessageImage.flags;
-
-    matrix_size[0] = aMessageImage.matrix_size[0];
-    matrix_size[1] = aMessageImage.matrix_size[1];
-    matrix_size[2] = aMessageImage.matrix_size[2];
-
-    channels = aMessageImage.channels;
-
-    position[0] = aMessageImage.position[0];
-    position[1] = aMessageImage.position[1];
-    position[2] = aMessageImage.position[2];
-
-    read_dir[0] = aMessageImage.read_dir[0];
-    read_dir[1] = aMessageImage.read_dir[1];
-    read_dir[2] = aMessageImage.read_dir[2];
-
-    phase_dir[0] = aMessageImage.phase_dir[0];
-    phase_dir[1] = aMessageImage.phase_dir[1];
-    phase_dir[2] = aMessageImage.phase_dir[2];
-
-    slice_dir[0] = aMessageImage.slice_dir[0];
-    slice_dir[1] = aMessageImage.slice_dir[1];
-    slice_dir[2] = aMessageImage.slice_dir[2];
-
-    patient_table_position[0] = aMessageImage.patient_table_position[0];
-    patient_table_position[1] = aMessageImage.patient_table_position[1];
-    patient_table_position[2] = aMessageImage.patient_table_position[2];
-
-    acquisition_time_stamp = aMessageImage.acquisition_time_stamp;
-
-    physiology_time_stamp[0] = aMessageImage.physiology_time_stamp[0];
-    physiology_time_stamp[1] = aMessageImage.physiology_time_stamp[1];
-    physiology_time_stamp[2] = aMessageImage.physiology_time_stamp[2];
-
-    image_data_type = aMessageImage.image_data_type;
-    image_type = aMessageImage.image_type;
-    image_index = aMessageImage.image_index;
-    image_series_index = aMessageImage.image_series_index;
-
-    memcpy(user_int, aMessageImage.user_int, sizeof(int32_t)*ISMRMRD_USER_INTS);
-    memcpy(user_float, aMessageImage.user_float, sizeof(float)*ISMRMRD_USER_FLOATS);
-
-    time_stamps = aMessageImage.time_stamps;
-    pmu_time_stamps = aMessageImage.pmu_time_stamps;
-}
-
-void GadgetMessageImageExt::dump()
-{
-    std::cout << "GadgetMessageImageExt" << std::endl;
-    std::cout << "----------------------------------------------------------" << std::endl;
-    //dumpInfo();
-    std::cout << "----------------------------------------------------------" << std::endl;
-}
-
-// --------------------------------------------------------------------
-
-GadgetMessageImageArray::GadgetMessageImageArray() 
-:   imageArray_(0)
-{
-
-}
-
-GadgetMessageImageArray::GadgetMessageImageArray(int aSize[10])
-{
-    try
-    {
-        unsigned int ii;
-        for ( ii=0; ii<10; ii++ )
-        {
-            matrix_size[ii] = aSize[ii];
-        }
-
-        unsigned int len = 1;
-        for ( ii=3; ii<10; ii++ )
-        {
-            len *= matrix_size[ii];
-        }
-
-        if ( len > 0 )
-        {
-            imageArray_ = new GadgetMessageImageExt[len];
-        }
-    }
-    catch(...)
-    {
-        std::cout << "Failed in allocate imageArray_" << std::endl;
-    }
-}
-
-GadgetMessageImageArray::~GadgetMessageImageArray()
-{
-    if (imageArray_)
-    {
-        delete [] imageArray_;
-    }
-}
-
-void GadgetMessageImageArray::resize(int aSize[10])
-{
-    try
-    {
-        unsigned int ii;
-        for ( ii=0; ii<10; ii++ )
-        {
-            matrix_size[ii] = aSize[ii];
-        }
-
-        unsigned int len = 1;
-        for ( ii=3; ii<10; ii++ )
-        {
-            len *= matrix_size[ii];
-        }
-
-        if ( imageArray_ ) 
-        {
-            delete [] imageArray_;
-            imageArray_ = NULL;
-        }
-
-        if ( len > 0 )
-        {
-            imageArray_ = new GadgetMessageImageExt[len];
-        }
-    }
-    catch(...)
-    {
-        std::cout << "Failed in resize GadgetMessageImageArray " << std::endl;
-    }
-}
-
-void GadgetMessageImageArray::copy(GadgetMessageImageArray& imageArray)
-{
-    if (imageArray_) delete [] imageArray_;
-
-    unsigned int ii;
-    for ( ii=0; ii<10; ii++ )
-    {
-        matrix_size[ii] = imageArray.matrix_size[ii];
-    }
-
-    unsigned int len = 1;
-    for ( ii=3; ii<10; ii++ )
-    {
-        len *= matrix_size[ii];
-    }
-
-    if ( len > 0 )
-    {
-        imageArray_ = new GadgetMessageImageExt[len];
-    }
-
-    for ( unsigned int i=0; i<len; i++ )
-    {
-        imageArray_[i] = imageArray.imageArray_[i];
-    }
-}
-
-int GadgetMessageImageArray::get_offset(int slc, int e2, int con, int phs, int rep, int set, int seg)
-{
-    int offset = seg*matrix_size[8]*matrix_size[7]*matrix_size[6]*matrix_size[5]*matrix_size[4]*matrix_size[3]
-                    + set*matrix_size[7]*matrix_size[6]*matrix_size[5]*matrix_size[4]*matrix_size[3]
-                    + rep*matrix_size[6]*matrix_size[5]*matrix_size[4]*matrix_size[3]
-                    + phs*matrix_size[5]*matrix_size[4]*matrix_size[3]
-                    + con*matrix_size[4]*matrix_size[3]
-                    + e2*matrix_size[3]
-                    + slc;
-    return offset;
-}
-
-void GadgetMessageImageArray::extractMessageImageArrayForSLC(int slc, GadgetMessageImageArray& imageArray)
-{
-    if ( slc >= matrix_size[3] )
-    {
-        std::cout << "extractMessageImageArrayForSLC error - slc >= matrix_size[3] " << std::endl;
-        return;
-    }
-
-    int aSize[10];
-
-    unsigned int ii;
-    for ( ii=0; ii<10; ii++ )
-    {
-        aSize[ii] = matrix_size[ii];
-    }
-
-    aSize[3] = 1;
-
-    imageArray.resize(aSize);
-
-    int e2, con, phs, rep, set, seg;
-
-    int E2 = matrix_size[4];
-    int CON = matrix_size[5];
-    int PHS = matrix_size[6];
-    int REP = matrix_size[7];
-    int SET = matrix_size[8];
-    int SEG = matrix_size[9];
-
-    for ( seg=0; seg<SEG; seg++ )
-    {
-        for ( set=0; set<SET; set++ )
-        {
-            for ( rep=0; rep<REP; rep++ )
-            {
-                for ( phs=0; phs<PHS; phs++ )
-                {
-                    for ( con=0; con<CON; con++ )
-                    {
-                        for ( e2=0; e2<E2; e2++ )
-                        {
-                            int offset = this->get_offset(slc, e2, con, phs, rep, set, seg);
-                            int offsetSLC = imageArray.get_offset(0, e2, con, phs, rep, set, seg);
-
-                            imageArray.imageArray_[offsetSLC] = imageArray_[offset];
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
-
-void GadgetMessageImageArray::extractMessageImageArrayForREP(int rep, GadgetMessageImageArray& imageArray)
-{
-    if ( rep >= matrix_size[7] )
-    {
-        std::cout << "extractMessageImageArrayForSLC error - rep >= matrix_size[7] " << std::endl;
-        return;
-    }
-
-    int aSize[10];
-
-    unsigned int ii;
-    for ( ii=0; ii<10; ii++ )
-    {
-        aSize[ii] = matrix_size[ii];
-    }
-
-    aSize[7] = 1;
-
-    imageArray.resize(aSize);
-
-    int e2, con, phs, slc, set, seg;
-
-    int SLC = matrix_size[3];
-    int E2 = matrix_size[4];
-    int CON = matrix_size[5];
-    int PHS = matrix_size[6];
-    int SET = matrix_size[8];
-    int SEG = matrix_size[9];
-
-    for ( seg=0; seg<SEG; seg++ )
-    {
-        for ( set=0; set<SET; set++ )
-        {
-            for ( slc=0; slc<SLC; slc++ )
-            {
-                for ( phs=0; phs<PHS; phs++ )
-                {
-                    for ( con=0; con<CON; con++ )
-                    {
-                        for ( e2=0; e2<E2; e2++ )
-                        {
-                            int offset = this->get_offset(slc, e2, con, phs, rep, set, seg);
-                            int offsetREP = imageArray.get_offset(slc, e2, con, phs, 0, set, seg);
-
-                            imageArray.imageArray_[offsetREP] = imageArray_[offset];
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
-
-void GadgetMessageImageArray::dump()
-{
-    unsigned int ii;
-    std::cout << "GadgetMessageImageArray" << std::endl;
-    std::cout << "==========================================================" << std::endl;
-    std::cout << "matrix_size           : ";
-    for ( ii=0; ii<10; ii++ )
-    {
-        std::cout << matrix_size[ii] << " ";
-    }
-    std::cout << std::endl;
-    std::cout << "----------------------------------------------------------" << std::endl;
-    if ( imageArray_ )
-    {
-        int slc, e2, con, phs, rep, set, seg;
-        for ( seg=0; seg<matrix_size[9]; seg++ )
-        {
-            for ( set=0; set<matrix_size[8]; set++ )
-            {
-                for ( rep=0; rep<matrix_size[7]; rep++ )
-                {
-                    for ( phs=0; phs<matrix_size[6]; phs++ )
-                    {
-                        for ( con=0; con<matrix_size[5]; con++ )
-                        {
-                            for ( e2=0; e2<matrix_size[4]; e2++ )
-                            {
-                                for ( slc=0; slc<matrix_size[3]; slc++ )
-                                {
-                                    int offset = get_offset(slc, e2, con, phs, rep, set, seg);
-                                    std::cout << "[Slice E2 Contrast Phase Rep Set Seg] = [" 
-                                                << " " << slc 
-                                                << " " << e2 
-                                                << " " << con 
-                                                << " " << phs 
-                                                << " " << rep 
-                                                << " " << set 
-                                                << " " << seg << "]" << std::endl;
-
-                                    imageArray_[offset].dump();
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-        }
-
-    }
-    std::cout << "==========================================================" << std::endl;
-}
-
-// --------------------------------------------------------------------
-
-KSpaceBuffer::KSpaceBuffer() 
-{
-
-}
-
-KSpaceBuffer::~KSpaceBuffer()
-{
-
-}
-
-// --------------------------------------------------------------------
-
-GtPlusAccumulatorGadget::GtPlusAccumulatorGadget()
-    : messageImage_(0)
-    , kspaceBuffer_(0)
-    , image_counter_(0)
-    , image_series_(0)
-    , triggered_(false)
-{
-
-}
-
-GtPlusAccumulatorGadget::~GtPlusAccumulatorGadget()
-{
-    if (messageImage_) delete messageImage_;
-    if (kspaceBuffer_) delete kspaceBuffer_;
-}
-
-// extract necessary configuration information from the xml
-int GtPlusAccumulatorGadget::process_config(ACE_Message_Block* mb)
-{
-
-    // allocate the kspace buffer
-    if ( kspaceBuffer_ == NULL )
-    {
-        if (!(kspaceBuffer_ = new KSpaceBuffer)) 
-        {
-            GADGET_DEBUG1("Failed create buffer\n");
-            return GADGET_FAIL;
-        }
-    }
-
-    // image series
-    image_series_ = this->get_int_value("image_series");
-
-    // pass the xml file
-    boost::shared_ptr<ISMRMRD::ismrmrdHeader> cfg = parseIsmrmrdXMLHeader(std::string(mb->rd_ptr()));
-
-    // seq object
-    ISMRMRD::ismrmrdHeader::encoding_sequence e_seq = cfg->encoding();
-    if (e_seq.size() != 1)
-    {
-        GADGET_DEBUG2("Number of encoding spaces: %d\n", e_seq.size());
-        GADGET_DEBUG1("This simple GtPlusAccumulatorGadget only supports one encoding space\n");
-        return GADGET_FAIL;
-    }
-
-    ISMRMRD::encodingSpaceType e_space = (*e_seq.begin()).encodedSpace();
-    ISMRMRD::encodingSpaceType r_space = (*e_seq.begin()).reconSpace();
-    ISMRMRD::encodingLimitsType e_limits = (*e_seq.begin()).encodingLimits();
-
-    GADGET_MSG("Matrix size: " << e_space.matrixSize().x() << " " << e_space.matrixSize().y() << " " << e_space.matrixSize().z());
-    GADGET_MSG("Recon size: " << r_space.matrixSize().x() << " " << r_space.matrixSize().y() << " " << r_space.matrixSize().z());
-
-    meas_max_ro_ = e_space.matrixSize().x()/2;
-
-    field_of_view_[0] = r_space.fieldOfView_mm().x();
-    field_of_view_[1] = r_space.fieldOfView_mm().y();
-    field_of_view_[2] = r_space.fieldOfView_mm().z();
-    GADGET_MSG("field_of_view_ is " << field_of_view_[0] << " " << field_of_view_[1] << " " << field_of_view_[2]);
-
-    int newE1_ = field_of_view_[1]/(field_of_view_[0]/meas_max_ro_);
-
-    if (e_limits.kspace_encoding_step_1().present()) 
-    {
-        meas_max_idx_.kspace_encode_step_1 = e_limits.kspace_encoding_step_1().get().maximum();
-    }
-    else
-    {
-        meas_max_idx_.kspace_encode_step_1 = 0;
-        std::cout << "Setting number of kspace_encode_step_1 to 0" << std::endl;
-        return GADGET_FAIL;
-    }
-
-    kspaceBuffer_->kSpaceCentreEncode1_ = e_limits.kspace_encoding_step_1().get().center();
-    GADGET_MSG("kSpaceCentreEncode1_ is " << kspaceBuffer_->kSpaceCentreEncode1_);
-
-    kspaceBuffer_->kSpaceCentreEncode2_ = e_limits.kspace_encoding_step_2().get().center();
-    GADGET_MSG("kSpaceCentreEncode2_ is " << kspaceBuffer_->kSpaceCentreEncode2_);
-
-    kspaceBuffer_->kSpaceMaxEncode1_ = e_limits.kspace_encoding_step_1().get().maximum()+1;
-    GADGET_MSG("kSpaceMaxEncode1_ is " << kspaceBuffer_->kSpaceMaxEncode1_);
-
-    kspaceBuffer_->kSpaceMaxEncode2_ = e_limits.kspace_encoding_step_2().get().maximum()+1;
-    GADGET_MSG("kSpaceMaxEncode2_ is " << kspaceBuffer_->kSpaceMaxEncode2_);
-
-    if (e_limits.set().present())
-    {
-        meas_max_idx_.set = e_limits.set().get().maximum() - 1;
-        if ( meas_max_idx_.set < 0 ) meas_max_idx_.set = 0;
-    }
-    else
-    {
-        meas_max_idx_.set = 0;
-    }
-
-    if (e_limits.phase().present())
-    {
-        meas_max_idx_.phase = e_limits.phase().get().maximum()-1;
-        if ( meas_max_idx_.phase < 0 ) meas_max_idx_.phase = 0;
-    }
-    else
-    {
-        meas_max_idx_.phase = 0;
-    }
-
-    if (e_limits.kspace_encoding_step_2().present())
-    {
-        meas_max_idx_.kspace_encode_step_2 = e_limits.kspace_encoding_step_2().get().maximum();
-    }
-    else
-    {
-        meas_max_idx_.kspace_encode_step_2 = 0;
-    }
-
-    if (e_limits.contrast().present())
-    {
-        meas_max_idx_.contrast = e_limits.contrast().get().maximum()-1;
-        if ( meas_max_idx_.contrast < 0 ) meas_max_idx_.contrast = 0;
-    }
-    else
-    {
-        meas_max_idx_.contrast = 0;
-    }
-
-    if (e_limits.slice().present())
-    {
-        meas_max_idx_.slice = e_limits.slice().get().maximum();
-    }
-    else
-    {
-        meas_max_idx_.slice = 0;
-    }
-
-    if (e_limits.repetition().present())
-    {
-        meas_max_idx_.repetition = e_limits.repetition().get().maximum();
-    }
-    else
-    {
-        meas_max_idx_.repetition = 0;
-    }
-
-    if (e_limits.segment().present())
-    {
-        // meas_max_idx_.segment = e_limits.segment().get().maximum()-1;
-        meas_max_idx_.segment = 0;
-    }
-    else
-    {
-        meas_max_idx_.segment = 0;
-    }
-
-    // find out the PAT mode
-    ISMRMRD::ismrmrdHeader::parallelImaging_optional p_imaging_type = cfg->parallelImaging();
-    ISMRMRD::parallelImagingType p_imaging = *p_imaging_type;
-
-    kspaceBuffer_->AccelFactE1_ = (unsigned int)(p_imaging.accelerationFactor().kspace_encoding_step_1());
-    kspaceBuffer_->AccelFactE2_ = (unsigned int)(p_imaging.accelerationFactor().kspace_encoding_step_2());
-    GADGET_MSG("AccelFactE1 is " << kspaceBuffer_->AccelFactE1_);
-    GADGET_MSG("AccelFactE2 is " << kspaceBuffer_->AccelFactE2_);
-
-    ISMRMRD::calibrationModeType calib = *(p_imaging.calibrationMode());
-    kspaceBuffer_->CalibMode_ = calib;
-
-    // find out the calibration mode
-    if ( kspaceBuffer_->CalibMode_ == ISMRMRD::calibrationModeType::separate )
-    {
-        GADGET_MSG("Calibration mode is separate");
-    }
-
-    if ( kspaceBuffer_->CalibMode_ == ISMRMRD::calibrationModeType::embedded )
-    {
-        GADGET_MSG("Calibration mode is embedded");
-    }
-
-    if ( kspaceBuffer_->CalibMode_ == ISMRMRD::calibrationModeType::interleaved )
-    {
-        GADGET_MSG("Calibration mode is interleaved");
-
-        if ( p_imaging.interleavingDimension().present() )
-        {
-            kspaceBuffer_->InterleaveDim_ = *(p_imaging.interleavingDimension());
-            GADGET_MSG("InterleaveDim is " << kspaceBuffer_->InterleaveDim_);
-        }
-    }
-
-    return GADGET_OK;
-}
-
-int GtPlusAccumulatorGadget::process(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1, GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2)
-{
-    // logic to control whether to store kspace and ref data
-    bool bIsKSpace, bIsRef, bIsNoise, bIsPhaseCorr, bIsReflect, bIsOther;
-    if ( !checkStatus(m1->getObjectPtr()->flags, m1->getObjectPtr()->number_of_samples, bIsKSpace, bIsRef, bIsNoise, bIsPhaseCorr, bIsReflect, bIsOther) )
-    {
-        GADGET_DEBUG1("Failed check readout status\n");
-        return GADGET_FAIL;
-    }
-
-    // store kspace read out
-    if ( bIsKSpace )
-    {
-        if ( !storeImageData(m1, m2, bIsReflect) )
-        {
-            GADGET_DEBUG1("Failed check readout status\n");
-            return GADGET_FAIL;
-        }
-    }
-
-    // store ref read out
-    if ( bIsRef )
-    {
-        ISMRMRD::AcquisitionHeader* pMDH = m1->getObjectPtr();
-        hoNDArray< ValueType >* pRefLine = m2->getObjectPtr();
-
-        ReadOutBuffer item;
-        item.acqHead_ = *pMDH;
-        item.data_ = *pRefLine;
-        item.isReflect_ = bIsReflect;
-        refBuffer_.push_back(item);
-    }
-
-    // store phaseCorr read out
-    if ( bIsPhaseCorr )
-    {
-        ISMRMRD::AcquisitionHeader* pMDH = m1->getObjectPtr();
-        hoNDArray< ValueType >* pRefLine = m2->getObjectPtr();
-
-        ReadOutBuffer item;
-        item.acqHead_ = *pMDH;
-        item.data_ = *pRefLine;
-        item.isReflect_ = bIsReflect;
-        phaseCorrBuffer_.push_back(item);
-    }
-
-    // store noise read out
-    if ( bIsNoise )
-    {
-        ISMRMRD::AcquisitionHeader* pMDH = m1->getObjectPtr();
-        hoNDArray< ValueType >* pRefLine = m2->getObjectPtr();
-
-        ReadOutBuffer item;
-        item.acqHead_ = *pMDH;
-        item.data_ = *pRefLine;
-        item.isReflect_ = bIsReflect;
-        noiseBuffer_.push_back(item);
-    }
-
-    // store other read out
-    if ( bIsOther )
-    {
-        ISMRMRD::AcquisitionHeader* pMDH = m1->getObjectPtr();
-        hoNDArray< ValueType >* pRefLine = m2->getObjectPtr();
-
-        ReadOutBuffer item;
-        item.acqHead_ = *pMDH;
-        item.data_ = *pRefLine;
-        item.isReflect_ = bIsReflect;
-        otherBuffer_.push_back(item);
-    }
-
-    m1->release();
-    return GADGET_OK;
-}
-
-bool GtPlusAccumulatorGadget::checkStatus(uint64_t flag, int samples, bool& bIsKSpace, bool& bIsRef, bool& bIsNoise, bool& bIsPhaseCorr, bool& bIsReflect, bool& bIsOther)
-{
-    bIsNoise = ISMRMRD::FlagBit(ISMRMRD::ACQ_IS_NOISE_MEASUREMENT).isSet(flag);
-    bool is_ref = ISMRMRD::FlagBit(ISMRMRD::ACQ_IS_PARALLEL_CALIBRATION).isSet(flag);
-    bool is_ref_kspace = ISMRMRD::FlagBit(ISMRMRD::ACQ_IS_PARALLEL_CALIBRATION_AND_IMAGING).isSet(flag);
-    bIsReflect = ISMRMRD::FlagBit(ISMRMRD::ACQ_IS_REVERSE).isSet(flag);
-    bIsPhaseCorr = ISMRMRD::FlagBit(ISMRMRD::ACQ_IS_PHASECORR_DATA).isSet(flag);
-
-    bIsKSpace = false;
-    bIsRef = false;
-    bIsOther = false;
-
-    if ( bIsNoise || bIsPhaseCorr )
-    {
-        return true;
-    }
-
-    // in interleaved mode, only store the image data
-    if ( kspaceBuffer_->CalibMode_==ISMRMRD::calibrationModeType::interleaved )
-    {
-        bIsKSpace = true;
-        bIsRef = false;
-    }
-
-    // in embedded, kspace stores only the undersampled lines
-    // ref stores all lines used for references
-    if ( kspaceBuffer_->CalibMode_==ISMRMRD::calibrationModeType::embedded )
-    {
-        if ( is_ref && !is_ref_kspace )
-        {
-            bIsKSpace = false;
-            bIsRef = true;
-        }
-
-        if ( !is_ref && is_ref_kspace )
-        {
-            bIsKSpace = true;
-            bIsRef = true;
-        }
-
-        if ( is_ref && is_ref_kspace )
-        {
-            bIsKSpace = true;
-            bIsRef = true;
-        }
-
-        if ( !is_ref && !is_ref_kspace )
-        {
-            bIsKSpace = true;
-            bIsRef = false;
-        }
-    }
-
-    // in separate mode
-    if ( kspaceBuffer_->CalibMode_==ISMRMRD::calibrationModeType::separate 
-    || kspaceBuffer_->CalibMode_==ISMRMRD::calibrationModeType::external )
-    {
-        if ( is_ref )
-        {
-            bIsKSpace = false;
-            bIsRef = true;
-        }
-
-        if ( !is_ref )
-        {
-            bIsKSpace = true;
-            bIsRef = false;
-        }
-    }
-
-    // store other data, e.g. AIF
-    // only for tpat
-    if ( !is_ref && !is_ref_kspace && (samples != meas_max_ro_) )
-    {
-        bIsOther = true;
-        bIsKSpace = false;
-        bIsRef = false;
-    }
-
-    return true;
-}
-
-bool GtPlusAccumulatorGadget::storeImageData(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1, GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2, bool isReflect)
-{
-    try
-    {
-        unsigned int ii;
-        int samples =  m1->getObjectPtr()->number_of_samples;
-        ISMRMRD::EncodingCounters idx = m1->getObjectPtr()->idx;
-
-        if ( kspaceBuffer_->buffer_.get_number_of_elements() <= 0 )
-        {
-            meas_max_channel_ = m1->getObjectPtr()->active_channels;
-
-            int E1 = 2*kspaceBuffer_->kSpaceCentreEncode1_;
-            int E2 = 2*kspaceBuffer_->kSpaceCentreEncode2_;
-
-            // find the loop counter boundary and allocate the buffer
-            GADGET_MSG("[RO E1 Cha Slice E2 Con Phase Rep Set Seg] = [" 
-                               << meas_max_ro_ 
-                               << " " << E1 
-                               << " " << meas_max_channel_ 
-                               << " " << meas_max_idx_.slice+1 
-                               << " " << E2 
-                               << " " << meas_max_idx_.contrast+1 
-                               << " " << meas_max_idx_.phase+1 
-                               << " " << meas_max_idx_.repetition+1 
-                               << " " << meas_max_idx_.set+1 
-                               << " " << meas_max_idx_.segment+1 << "]");
-
-            dimensions_.push_back(meas_max_ro_);
-            dimensions_.push_back(E1);
-            dimensions_.push_back(meas_max_channel_);
-            dimensions_.push_back(meas_max_idx_.slice+1);
-            dimensions_.push_back(E2);
-            dimensions_.push_back(meas_max_idx_.contrast+1);
-            dimensions_.push_back(meas_max_idx_.phase+1);
-            dimensions_.push_back(meas_max_idx_.repetition+1);
-            dimensions_.push_back(meas_max_idx_.set+1);
-            dimensions_.push_back(meas_max_idx_.segment+1);
-
-            unsigned int N = dimensions_.size();
-            for ( ii=0; ii<N; ii++ )
-            {
-                GADGET_MSG("dimensions_[" << ii << "] = " << dimensions_[ii]);
-            }
-
-            // allocate data buffer
-            try
-            {
-                kspaceBuffer_->buffer_.create(&dimensions_);
-
-                std::vector<unsigned int> reflect_dimensions_(dimensions_);
-                reflect_dimensions_[0] = 1;
-                reflect_dimensions_[2] = 1;
-                kspaceBuffer_->reflect_.create(&reflect_dimensions_);
-            }
-            catch(...)
-            {
-                GADGET_DEBUG1("Failed create buffer\n");
-                return false;
-            }
-
-            // allocate message buffer
-            int matrix_size[10];
-            for ( ii=0; ii<10; ii++ )
-            {
-                matrix_size[ii] = dimensions_[ii];
-            }
-
-            if (!(messageImage_ = new GadgetMessageImageArray(matrix_size))) 
-            {
-                GADGET_DEBUG1("Failed create buffer\n");
-                return false;
-            }
-        }
-
-        std::complex<float>* b = kspaceBuffer_->buffer_.begin();
-        std::complex<float>* d = m2->getObjectPtr()->get_data_ptr();
-        if (samples != static_cast<int>(dimensions_[0])) 
-        {
-            GADGET_DEBUG1("Wrong number of samples received\n");
-            return false;
-        }
-
-        //Copy the data for all the channels
-        std::vector<unsigned int> pos(10);
-        for (int c = 0; c < m1->getObjectPtr()->active_channels; c++) 
-        {
-            pos[0] = 0;
-            pos[1] = idx.kspace_encode_step_1;
-            pos[2] = c;
-            pos[3] = idx.slice;
-            pos[4] = idx.kspace_encode_step_2;
-            pos[5] = idx.contrast;
-            pos[6] = idx.phase;
-            pos[7] = idx.repetition;
-            pos[8] = idx.set;
-            pos[9] = idx.segment;
-            int offsetBuffer = kspaceBuffer_->buffer_.calculate_offset(pos);
-
-            memcpy(b+offsetBuffer, d+c*samples, sizeof(std::complex<float>)*samples);
-
-            pos[2] = 0;
-            offsetBuffer = kspaceBuffer_->reflect_.calculate_offset(pos);
-            kspaceBuffer_->reflect_.at(offsetBuffer) = isReflect;
-        }
-
-        if ( !fillImageInfo(m1, messageImage_, m1->getObjectPtr()->idx) )
-        {
-            GADGET_DEBUG1("Failed in fillImageInfo(m1, messageImage_, m1->getObjectPtr()->idx)\n");
-            return false;
-        }
-    }
-    catch(...)
-    {
-        GADGET_DEBUG1("Errors in GtPlusAccumulatorGadget::storeImageData(...) ... \n");
-        return false;
-    }
-
-    return true;
-}
-
-bool GtPlusAccumulatorGadget::
-fillBuffer(ReadOutBufferType& readOutBuffer, BufferType& buf, ReflectBufferType& reflectBuf)
-{
-    try
-    {
-        // find the maximal dimension of all buffered ICE readout
-        unsigned int numOfReadOuts = readOutBuffer.size();
-        ISMRMRD::EncodingCounters max_idx;
-        max_idx.kspace_encode_step_1 = 0;
-        max_idx.average = 0;
-        max_idx.slice = 0;
-        max_idx.kspace_encode_step_2 = 0;
-        max_idx.contrast = 0;
-        max_idx.phase = 0;
-        max_idx.repetition = 0;
-        max_idx.set = 0;
-        max_idx.segment = 0;
-        int max_channel = 0;
-        int max_col = 0;
-
-        unsigned int a;
-        for (a = 0; a < numOfReadOuts; a++) 
-        {
-            ISMRMRD::EncodingCounters idx = readOutBuffer[a].acqHead_.idx;
-
-            if ( readOutBuffer[a].acqHead_.number_of_samples > max_col ) 
-                max_col=readOutBuffer[a].acqHead_.number_of_samples;
-
-            if ( idx.kspace_encode_step_1 > max_idx.kspace_encode_step_1 ) 
-                max_idx.kspace_encode_step_1=idx.kspace_encode_step_1;
-
-            if ( idx.slice > max_idx.slice ) 
-                max_idx.slice = idx.slice;
-
-            if ( idx.kspace_encode_step_2 > max_idx.kspace_encode_step_2 ) 
-                max_idx.kspace_encode_step_2 = idx.kspace_encode_step_2;
-
-            if ( idx.contrast > max_idx.contrast ) 
-                max_idx.contrast = idx.contrast;
-
-            if ( idx.phase > max_idx.phase ) 
-                max_idx.phase = idx.phase;
-
-            if ( idx.repetition > max_idx.repetition ) 
-                max_idx.repetition = idx.repetition;
-
-            if ( idx.set > max_idx.set ) 
-                max_idx.set = idx.set;
-
-            if ( idx.segment > max_idx.segment ) 
-                max_idx.segment = idx.segment;
-
-            if ( readOutBuffer[a].acqHead_.active_channels > max_channel ) 
-                max_channel = readOutBuffer[a].acqHead_.active_channels;
-        }
-
-        GADGET_MSG("[RO E1 Cha Slice E2 Contrast Phase Rep Set Seg] = [" 
-                               << max_col 
-                               << " " << max_idx.kspace_encode_step_1+1 
-                               << " " << max_channel 
-                               << " " << max_idx.slice+1 
-                               << " " << max_idx.kspace_encode_step_2+1 
-                               << " " << max_idx.contrast+1 
-                               << " " << max_idx.phase+1 
-                               << " " << max_idx.repetition+1 
-                               << " " << max_idx.set+1 
-                               << " " << max_idx.segment+1 << "]");
-
-        // alloate buffer for data
-        std::vector<unsigned int> dims(10);
-        dims[0] = max_col;
-        dims[1] = max_idx.kspace_encode_step_1+1;
-        dims[2] = max_channel;
-        dims[3] = max_idx.slice+1;
-        dims[4] = max_idx.kspace_encode_step_2+1;
-        dims[5] = max_idx.contrast+1;
-        dims[6] = max_idx.phase+1;
-        dims[7] = max_idx.repetition+1;
-        dims[8] = max_idx.set+1;
-        dims[9] = max_idx.segment+1;
-
-        try
-        {
-            buf.create(&dims);
-
-            std::vector<unsigned int> reflect_dims(dims);
-            reflect_dims[0] = 1;
-            reflect_dims[2] = 1;
-            reflectBuf.create(&reflect_dims);
-        }
-        catch(...)
-        {
-            GADGET_DEBUG1("Failed create buffer\n");
-            return false;
-        }
-
-        std::complex<float>* b = buf.begin();
-
-        // copy the data
-        int c;
-        std::vector<unsigned int> pos(10);
-
-        for ( a=0; a<numOfReadOuts; a++) 
-        {
-            ISMRMRD::EncodingCounters idx = readOutBuffer[a].acqHead_.idx;
-            std::complex<float>* d = const_cast<std::complex<float>*>(readOutBuffer[a].data_.begin());
-
-            for ( c=0; c<readOutBuffer[a].acqHead_.active_channels; c++) 
-            {
-                pos[0] = 0;
-                pos[1] = idx.kspace_encode_step_1;
-                pos[2] = c;
-                pos[3] = idx.slice;
-                pos[4] = idx.kspace_encode_step_2;
-                pos[5] = idx.contrast;
-                pos[6] = idx.phase;
-                pos[7] = idx.repetition;
-                pos[8] = idx.set;
-                pos[9] = idx.segment;
-                int offsetBuffer = buf.calculate_offset(pos);
-
-                memcpy(b+offsetBuffer, d+c*readOutBuffer[a].acqHead_.number_of_samples, sizeof(std::complex<float>)*readOutBuffer[a].acqHead_.number_of_samples);
-
-                pos[2] = 0;
-                offsetBuffer = reflectBuf.calculate_offset(pos);
-                reflectBuf.at(offsetBuffer) = readOutBuffer[a].isReflect_;
-            }
-        }
-    }
-    catch(...)
-    {
-        GADGET_DEBUG1("Errors in GtPlusAccumulatorGadget::fillBuffer(...) ... \n");
-        return false;
-    }
-
-    return true;
-}
-
-bool GtPlusAccumulatorGadget::fillImageInfo(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1, GadgetMessageImageArray* messageImage, const ISMRMRD::EncodingCounters& idx)
-{
-    try
-    {
-        // fill the message info
-        int offset = messageImage->get_offset(idx.slice, idx.kspace_encode_step_2, idx.contrast, idx.phase, idx.repetition, idx.set, idx.segment);
-
-        // if it is the first acq in a slice, fill in all information
-        bool is_first_acq_in_slice = ISMRMRD::FlagBit(ISMRMRD::ACQ_FIRST_IN_SLICE).isSet(m1->getObjectPtr()->flags);
-
-        if ( is_first_acq_in_slice )
-        {
-            messageImage->imageArray_[offset].version = m1->getObjectPtr()->version;
-            messageImage->imageArray_[offset].flags = m1->getObjectPtr()->flags;
-            messageImage->imageArray_[offset].measurement_uid = m1->getObjectPtr()->measurement_uid;
-
-            //messageImage->imageArray_[offset].matrix_size[0] = dimensions_[0];
-            //messageImage->imageArray_[offset].matrix_size[1] = dimensions_[1];
-            //messageImage->imageArray_[offset].matrix_size[2] = dimensions_[2];
-
-            messageImage->imageArray_[offset].set_matrix_size(0, dimensions_[0]);
-            messageImage->imageArray_[offset].set_matrix_size(1, dimensions_[1]);
-            messageImage->imageArray_[offset].set_matrix_size(2, dimensions_[2]);
-
-            messageImage->imageArray_[offset].field_of_view[0] = field_of_view_[0];
-            messageImage->imageArray_[offset].field_of_view[1] = field_of_view_[1];
-            messageImage->imageArray_[offset].field_of_view[2] = field_of_view_[2];
-
-            messageImage->imageArray_[offset].channels = m1->getObjectPtr()->active_channels;
-
-            messageImage->imageArray_[offset].position[0] = m1->getObjectPtr()->position[0];
-            messageImage->imageArray_[offset].position[1] = m1->getObjectPtr()->position[1];
-            messageImage->imageArray_[offset].position[2] = m1->getObjectPtr()->position[2];
-
-            //messageImage->imageArray_[offset].quaternion[0] = m1->getObjectPtr()->quaternion[0];
-            //messageImage->imageArray_[offset].quaternion[1] = m1->getObjectPtr()->quaternion[1];
-            //messageImage->imageArray_[offset].quaternion[2] = m1->getObjectPtr()->quaternion[2];
-            //messageImage->imageArray_[offset].quaternion[3] = m1->getObjectPtr()->quaternion[3];
-
-            messageImage->imageArray_[offset].read_dir[0] = m1->getObjectPtr()->read_dir[0];
-            messageImage->imageArray_[offset].read_dir[1] = m1->getObjectPtr()->read_dir[1];
-            messageImage->imageArray_[offset].read_dir[2] = m1->getObjectPtr()->read_dir[2];
-
-            messageImage->imageArray_[offset].phase_dir[0] = m1->getObjectPtr()->phase_dir[0];
-            messageImage->imageArray_[offset].phase_dir[1] = m1->getObjectPtr()->phase_dir[1];
-            messageImage->imageArray_[offset].phase_dir[2] = m1->getObjectPtr()->phase_dir[2];
-
-            messageImage->imageArray_[offset].slice_dir[0] = m1->getObjectPtr()->slice_dir[0];
-            messageImage->imageArray_[offset].slice_dir[1] = m1->getObjectPtr()->slice_dir[1];
-            messageImage->imageArray_[offset].slice_dir[2] = m1->getObjectPtr()->slice_dir[2];
-
-            messageImage->imageArray_[offset].patient_table_position[0] = m1->getObjectPtr()->patient_table_position[0];
-            messageImage->imageArray_[offset].patient_table_position[1] = m1->getObjectPtr()->patient_table_position[1];
-            messageImage->imageArray_[offset].patient_table_position[2] = m1->getObjectPtr()->patient_table_position[2];
-
-            messageImage->imageArray_[offset].average = m1->getObjectPtr()->idx.average;
-            messageImage->imageArray_[offset].slice = m1->getObjectPtr()->idx.slice;
-            messageImage->imageArray_[offset].contrast = m1->getObjectPtr()->idx.contrast;
-            messageImage->imageArray_[offset].phase = m1->getObjectPtr()->idx.phase;
-            messageImage->imageArray_[offset].repetition = m1->getObjectPtr()->idx.repetition;
-            messageImage->imageArray_[offset].set = m1->getObjectPtr()->idx.set;
-
-            messageImage->imageArray_[offset].acquisition_time_stamp = m1->getObjectPtr()->acquisition_time_stamp;
-
-            messageImage->imageArray_[offset].physiology_time_stamp[0] = m1->getObjectPtr()->physiology_time_stamp[0];
-            messageImage->imageArray_[offset].physiology_time_stamp[1] = m1->getObjectPtr()->physiology_time_stamp[1];
-            messageImage->imageArray_[offset].physiology_time_stamp[2] = m1->getObjectPtr()->physiology_time_stamp[2];
-
-            messageImage->imageArray_[offset].image_data_type = ISMRMRD::DATA_COMPLEX_FLOAT;
-
-            messageImage->imageArray_[offset].image_type = ISMRMRD::TYPE_MAGNITUDE;
-
-            messageImage->imageArray_[offset].image_index = ++image_counter_;
-            messageImage->imageArray_[offset].image_series_index = image_series_;
-
-            // need to store the free user parameters
-            memcpy(messageImage->imageArray_[offset].user_int, m1->getObjectPtr()->user_int, sizeof(int32_t)*8);
-            memcpy(messageImage->imageArray_[offset].user_float, m1->getObjectPtr()->user_float, sizeof(float)*8);
-        }
-
-        // whether or not this acq is the first in a slice, we need to fill the TimeStamps and PMUTimeStamps
-        messageImage->imageArray_[offset].time_stamps[idx.kspace_encode_step_1] = m1->getObjectPtr()->acquisition_time_stamp;
-        messageImage->imageArray_[offset].pmu_time_stamps[idx.kspace_encode_step_1] = m1->getObjectPtr()->physiology_time_stamp[0];
-    }
-    catch(...)
-    {
-        GADGET_DEBUG1("Errors in GtPlusAccumulatorGadget::fillImageInfo(...) ... \n");
-        return false;
-    }
-
-    return true;
-}
-
-int GtPlusAccumulatorGadget::close(unsigned long flags)
-{
-    if ( !triggered_ )
-    {
-        triggered_ = true;
-
-        GADGET_MSG("GtPlusAccumulatorGadget - trigger next gadget ... ");
-
-        GadgetContainerMessage<GadgetMessageImageArray>* cm1 = 
-            new GadgetContainerMessage<GadgetMessageImageArray>();
-
-        GadgetContainerMessage< KSpaceBuffer >* cm2 = 
-            new GadgetContainerMessage< KSpaceBuffer >();
-
-        cm1->cont(cm2);
-
-        // copy the image content
-        cm2->getObjectPtr()->buffer_ = kspaceBuffer_->buffer_;
-        cm2->getObjectPtr()->reflect_ = kspaceBuffer_->reflect_;
-
-        // copy the message image array
-        cm1->getObjectPtr()->copy(*messageImage_);
-
-        if (!refBuffer_.empty())
-        {
-            GADGET_MSG("GtPlusAccumulatorGadget - ref signal found : " << refBuffer_.size());
-
-            if ( !fillBuffer(refBuffer_, kspaceBuffer_->ref_, kspaceBuffer_->refReflect_) )
-            {
-                GADGET_DEBUG1("fillBuffer(refBuffer_) failed ... \n");
-                cm1->release();
-                return GADGET_FAIL;
-            }
-
-            cm2->getObjectPtr()->ref_ = kspaceBuffer_->ref_;
-            cm2->getObjectPtr()->refReflect_ = kspaceBuffer_->refReflect_;
-        }
-
-        if (!phaseCorrBuffer_.empty())
-        {
-            GADGET_MSG("GtPlusAccumulatorGadget - phase correction signal found : " << phaseCorrBuffer_.size());
-
-            if ( !fillBuffer(phaseCorrBuffer_, kspaceBuffer_->phaseCorr_, kspaceBuffer_->phaseCorrReflect_) )
-            {
-                GADGET_DEBUG1("fillBuffer(phaseCorrBuffer_) failed ... \n");
-                cm1->release();
-                return GADGET_FAIL;
-            }
-
-            cm2->getObjectPtr()->phaseCorr_ = kspaceBuffer_->phaseCorr_;
-            cm2->getObjectPtr()->phaseCorrReflect_ = kspaceBuffer_->phaseCorrReflect_;
-        }
-
-        if (!noiseBuffer_.empty())
-        {
-            GADGET_MSG("GtPlusAccumulatorGadget - noise signal found : " << noiseBuffer_.size());
-
-            ReflectBufferType tmpBuf;
-            if ( !fillBuffer(noiseBuffer_, kspaceBuffer_->noise_, tmpBuf) )
-            {
-                GADGET_DEBUG1("fillBuffer(noiseBuffer_) failed ... \n");
-                cm1->release();
-                return GADGET_FAIL;
-            }
-
-            cm2->getObjectPtr()->noise_ = kspaceBuffer_->noise_;
-        }
-
-        if (!otherBuffer_.empty())
-        {
-            GADGET_MSG("GtPlusAccumulatorGadget - other signal found : " << otherBuffer_.size());
-
-            ReflectBufferType tmpBuf;
-            if ( !fillBuffer(otherBuffer_, kspaceBuffer_->other_, tmpBuf) )
-            {
-                GADGET_DEBUG1("fillBuffer(otherBuffer_) failed ... \n");
-                cm1->release();
-                return GADGET_FAIL;
-            }
-
-            cm2->getObjectPtr()->other_ = kspaceBuffer_->other_;
-        }
-
-        // send to next gadget
-        if (this->next()->putq(cm1) < 0) 
-        {
-            return GADGET_FAIL;
-        }
-    }
-
-    return BaseClass::close(flags);
-}
-
-GADGET_FACTORY_DECLARE(GtPlusAccumulatorGadget)
-
-}
diff --git a/gadgets/gtPlus/GtPlusAccumulatorGadget.h b/gadgets/gtPlus/GtPlusAccumulatorGadget.h
deleted file mode 100644
index e45ecee..0000000
--- a/gadgets/gtPlus/GtPlusAccumulatorGadget.h
+++ /dev/null
@@ -1,198 +0,0 @@
-#pragma once
-
-#include <complex>
-#include "GtPlusExport.h"
-#include "Gadget.h"
-#include "hoNDArray.h"
-#include "ismrmrd.h"
-#include "GadgetIsmrmrdReadWrite.h"
-
-// the buffered kspace is defined by the ISMRM 10 dimensions
-// readout
-// kspace_encode_step_1
-// kspace_encode_step_2
-// average
-// slice
-// contrast
-// phase
-// repetition
-// set
-// segment
-// in the order of [RO E1 CHA AVE SLC E2 CON PHS REP SET SEG]
-
-namespace Gadgetron
-{
-
-struct  EXPORTGTPLUS GadgetMessageImageExt : public ISMRMRD::ImageHeader
-{
-    // fields added to store the time_stamp and pmu_time_stamp for every incoming read-out line
-    // if one line is not acquried, the corresponding time is -1
-    std::vector<int>     time_stamps;
-    std::vector<int>     pmu_time_stamps;
-
-    GadgetMessageImageExt();
-    ~GadgetMessageImageExt();
-
-    void copy(GadgetMessageImageExt& aMessageImage);
-    void set_matrix_size(unsigned int index, ACE_UINT16 size);
-    void dump();
-}; 
-
-// [Ro E1 Cha Slice E2 Con Phase Rep Set Seg]
-//   0  1  2   3  4  5    6     7   8   9
-// store a scan with 10 dimensions
-struct  EXPORTGTPLUS GadgetMessageImageArray
-{
-    // size of the image array
-    ACE_UINT16 matrix_size[10];
-
-    // message information for every 2D image [Slice E2 Contrast Phase Rep Set Seg]
-    GadgetMessageImageExt* imageArray_;
-
-    GadgetMessageImageArray();
-    GadgetMessageImageArray(int aSize[10]);
-    ~GadgetMessageImageArray();
-
-    void resize(int aSize[10]);
-    void copy(GadgetMessageImageArray& imageArray);
-    int get_offset(int slc, int par, int eco, int phs, int rep, int set, int seg);
-    void extractMessageImageArrayForSLC(int slc, GadgetMessageImageArray& imageArray);
-    void extractMessageImageArrayForREP(int rep, GadgetMessageImageArray& imageArray);
-
-    void dump();
-};
-
-struct EXPORTGTPLUS KSpaceBuffer
-{
-    typedef hoNDArray< std::complex<float> > BufferType;
-    typedef hoNDArray< unsigned short > ReflectBufferType;
-
-    // reflect buffer shows whether a readouline is reflected or not
-
-    // kspace data
-    BufferType buffer_;
-    ReflectBufferType reflect_;
-
-    // reference ACS data
-    BufferType ref_;
-    ReflectBufferType refReflect_;
-
-    // noise data
-    BufferType noise_;
-
-    // phase correction data
-    BufferType phaseCorr_;
-    ReflectBufferType phaseCorrReflect_;
-
-    // other data, e.g. AIF data
-    BufferType other_;
-
-    // properties of kspace
-    // kspace center readout number
-    unsigned int kSpaceCentreRO_;
-    // kspace center number for the first encoding dimension
-    unsigned int kSpaceCentreEncode1_;
-    // kspace center number for the second encoding dimension
-    unsigned int kSpaceCentreEncode2_;
-
-    // kspace max acquired readout number
-    unsigned int kSpaceMaxRO_;
-    // kspace max acquired number for the first encoding dimension
-    unsigned int kSpaceMaxEncode1_;
-    // kspace max acquired number for the second encoding dimension
-    unsigned int kSpaceMaxEncode2_;
-
-    // acceleration rate along the E1 and E2 dimensions
-    unsigned int AccelFactE1_;
-    unsigned int AccelFactE2_;
-
-    // mode of calibration
-    ISMRMRD::calibrationModeType::value CalibMode_;
-    ISMRMRD::interleavingDimensionType::value InterleaveDim_;
-
-    KSpaceBuffer();
-    ~KSpaceBuffer();
-};
-
-// -----------------------------------------------------------------------------------------------------------
-
-struct ReadOutBuffer
-{
-    ISMRMRD::AcquisitionHeader acqHead_;
-    hoNDArray< std::complex<float> > data_;
-    bool isReflect_;
-};
-
-class EXPORTGTPLUS GtPlusAccumulatorGadget : public Gadget2< ISMRMRD::AcquisitionHeader, hoNDArray< std::complex<float> > >
-{
-public:
-    GADGET_DECLARE(GtPlusAccumulatorGadget);
-
-    typedef std::complex<float> ValueType;
-
-    typedef Gadget2< ISMRMRD::AcquisitionHeader, hoNDArray< ValueType > > BaseClass;
-
-    typedef std::vector< ReadOutBuffer > ReadOutBufferType;
-    typedef hoNDArray< std::complex<float> > BufferType;
-    typedef hoNDArray< unsigned short > ReflectBufferType;
-
-    GtPlusAccumulatorGadget();
-    ~GtPlusAccumulatorGadget();
-
-    virtual int close(unsigned long flags);
-
-protected:
-
-    virtual int process_config(ACE_Message_Block* mb);
-    virtual int process(GadgetContainerMessage< ISMRMRD::AcquisitionHeader >* m1, GadgetContainerMessage< hoNDArray< ValueType > > * m2);
-
-    // check the status of incoming readout
-    // bIsKSpace: whether this data is for image
-    // bIsRef: whether this data is for calibration signal
-    // bIsNoise: whether this data is a noise scan
-    // bIsPhaseCorr: whether this data is for phase correction
-    // bIsReflect: whether this data is acquired reflectly (for EPI and similar scans)
-    // bIsOther: other scans
-    virtual bool checkStatus(uint64_t flag, int samples, bool& bIsKSpace, bool& bIsRef, bool& bIsNoise, bool& bIsPhaseCorr, bool& bIsReflect, bool& bIsOther);
-
-    // store the image data
-    virtual bool storeImageData(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1, GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2, bool isReflect);
-
-    // fill the dynamically buffered data
-    virtual bool fillBuffer(ReadOutBufferType& readOutBuffer, BufferType& buf, ReflectBufferType& reflectBuf);
-
-    // fill the per 2D image info
-    virtual bool fillImageInfo(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1, GadgetMessageImageArray* messageImage, const ISMRMRD::EncodingCounters& idx);
-
-    // buffer for per 2D image information
-    GadgetMessageImageArray* messageImage_;
-
-    // buffer for image kspace data
-    // if the partial fourier is used, the kspace center is put at the center of buffer
-    // this means zeros will be padded accordingly
-    KSpaceBuffer* kspaceBuffer_;
-
-    // dynamic buffer for other kspace data
-    ReadOutBufferType refBuffer_;
-    ReadOutBufferType noiseBuffer_;
-    ReadOutBufferType phaseCorrBuffer_;
-    ReadOutBufferType otherBuffer_;
-
-    // dimension for image kspace
-    std::vector<unsigned int> dimensions_;
-
-    // filed of view [mm]
-    float field_of_view_[3];
-
-    int image_counter_;
-    int image_series_;
-
-    // whether the next gadget has been triggered
-    bool triggered_;
-
-    int meas_max_ro_;
-    ISMRMRD::EncodingCounters meas_max_idx_;
-    int meas_max_channel_;
-};
-
-}
diff --git a/gadgets/gtPlus/GtPlusAccumulatorIRT2DGadget.cpp b/gadgets/gtPlus/GtPlusAccumulatorIRT2DGadget.cpp
deleted file mode 100644
index 0cf2b4f..0000000
--- a/gadgets/gtPlus/GtPlusAccumulatorIRT2DGadget.cpp
+++ /dev/null
@@ -1,615 +0,0 @@
-
-#include "GtPlusAccumulatorIRT2DGadget.h"
-#include "GadgetIsmrmrdReadWrite.h"
-
-namespace Gadgetron
-{
-
-GtPlusAccumulatorIRT2DGadget::GtPlusAccumulatorIRT2DGadget() : prev_rep_(-1), cur_rep_(-1), num_scan_buffered_(0)
-{
-
-}
-
-GtPlusAccumulatorIRT2DGadget::~GtPlusAccumulatorIRT2DGadget()
-{
-
-}
-
-int GtPlusAccumulatorIRT2DGadget::process_config(ACE_Message_Block* mb)
-{
-    return BaseClass::process_config(mb);
-}
-
-bool GtPlusAccumulatorIRT2DGadget::
-copyBufferForREP(BufferType& buf, int rep, BufferType& bufREP)
-{
-    try
-    {
-        boost::shared_ptr< std::vector<unsigned int> > dims = buf.get_dimensions();
-
-        boost::shared_ptr< std::vector<unsigned int> > dimsREP = dims;
-        (*dimsREP)[7] = 1;
-
-        try
-        {
-            bufREP.create(dimsREP);
-        }
-        catch(...)
-        {
-            GADGET_DEBUG1("Failed create buffer for REP \n");
-            return false;
-        }
-
-        // copy the memory over
-        int RO = (*dims)[0];
-        int E1 = (*dims)[1];
-        int CHA = (*dims)[2];
-        int SLC = (*dims)[3];
-        int E2 = (*dims)[4];
-        int CON = (*dims)[5];
-        int PHS = (*dims)[6];
-        int REP = (*dims)[7];
-        int SET = (*dims)[8];
-        int SEG = (*dims)[9];
-
-        int e2, con, phs, slc, set, seg;
-
-        std::vector<unsigned int> pos(10);
-
-        for ( seg=0; seg<SEG; seg++ )
-        {
-            for ( set=0; set<SET; set++ )
-            {
-                for ( slc=0; slc<SLC; slc++ )
-                {
-                    for ( phs=0; phs<PHS; phs++ )
-                    {
-                        for ( con=0; con<CON; con++ )
-                        {
-                            for ( e2=0; e2<E2; e2++ )
-                            {
-                                pos[0] = 0;
-                                pos[1] = 0;
-                                pos[2] = 0;
-                                pos[3] = slc;
-                                pos[4] = e2;
-                                pos[5] = con;
-                                pos[6] = phs;
-                                pos[7] = rep;
-                                pos[8] = set;
-                                pos[9] = seg;
-                                int offsetBuffer = buf.calculate_offset(pos);
-
-                                // buffer slc
-                                pos[0] = 0;
-                                pos[1] = 0;
-                                pos[2] = 0;
-                                pos[3] = slc;
-                                pos[4] = e2;
-                                pos[5] = con;
-                                pos[6] = phs;
-                                pos[7] = 0;
-                                pos[8] = set;
-                                pos[9] = seg;
-                                int offsetBufferREP = bufREP.calculate_offset(pos);
-
-                                // copy the image content
-                                memcpy(bufREP.begin()+offsetBufferREP, buf.begin()+offsetBuffer, sizeof(std::complex<float>)*RO*E1*CHA);
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-    catch(...)
-    {
-        GADGET_DEBUG1("Errors in GtPlusAccumulatorIRT2DGadget::copyBufferForREP(...) ... \n");
-        return false;
-    }
-
-    return true;
-}
-
-bool GtPlusAccumulatorIRT2DGadget::
-copyReflectBufferForREP(ReflectBufferType& buf, int rep, ReflectBufferType& bufREP)
-{
-    try
-    {
-        boost::shared_ptr< std::vector<unsigned int> > dims = buf.get_dimensions();
-
-        boost::shared_ptr< std::vector<unsigned int> > dimsREP = dims;
-        (*dimsREP)[7] = 1;
-
-        try
-        {
-            bufREP.create(dimsREP);
-        }
-        catch(...)
-        {
-            GADGET_DEBUG1("Failed create buffer for REP \n");
-            return false;
-        }
-
-        // copy the memory over
-        int RO = (*dims)[0];
-        int E1 = (*dims)[1];
-        int CHA = (*dims)[2];
-        int SLC = (*dims)[3];
-        int E2 = (*dims)[4];
-        int CON = (*dims)[5];
-        int PHS = (*dims)[6];
-        int REP = (*dims)[7];
-        int SET = (*dims)[8];
-        int SEG = (*dims)[9];
-
-        int e2, con, phs, slc, set, seg;
-
-        std::vector<unsigned int> pos(10);
-
-        for ( seg=0; seg<SEG; seg++ )
-        {
-            for ( set=0; set<SET; set++ )
-            {
-                for ( slc=0; slc<SLC; slc++ )
-                {
-                    for ( phs=0; phs<PHS; phs++ )
-                    {
-                        for ( con=0; con<CON; con++ )
-                        {
-                            for ( e2=0; e2<E2; e2++ )
-                            {
-                                pos[0] = 0;
-                                pos[1] = 0;
-                                pos[2] = 0;
-                                pos[3] = slc;
-                                pos[4] = e2;
-                                pos[5] = con;
-                                pos[6] = phs;
-                                pos[7] = rep;
-                                pos[8] = set;
-                                pos[9] = seg;
-                                int offsetBuffer = buf.calculate_offset(pos);
-
-                                // buffer slc
-                                pos[0] = 0;
-                                pos[1] = 0;
-                                pos[2] = 0;
-                                pos[3] = slc;
-                                pos[4] = e2;
-                                pos[5] = con;
-                                pos[6] = phs;
-                                pos[7] = 0;
-                                pos[8] = set;
-                                pos[9] = seg;
-                                int offsetBufferREP = bufREP.calculate_offset(pos);
-
-                                // copy the image content
-                                memcpy(bufREP.begin()+offsetBufferREP, buf.begin()+offsetBuffer, sizeof(unsigned short)*RO*E1*CHA);
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-    catch(...)
-    {
-        GADGET_DEBUG1("Errors in GtPlusAccumulatorSLCGadget::copyReflectBufferForSLC(...) ... \n");
-        return false;
-    }
-
-    return true;
-}
-
-bool GtPlusAccumulatorIRT2DGadget::triggerREP(int rep)
-{
-    try
-    {
-        GadgetContainerMessage<GadgetMessageImageArray>* cm1 = 
-            new GadgetContainerMessage<GadgetMessageImageArray>();
-
-        GadgetContainerMessage< KSpaceBuffer >* cm2 = 
-            new GadgetContainerMessage< KSpaceBuffer >();
-
-        cm1->cont(cm2);
-
-        // copy the kspace data for this REP
-        if ( !copyBufferForREP(kspaceBuffer_->buffer_, 0, cm2->getObjectPtr()->buffer_) ) 
-        {
-            GADGET_DEBUG1("Unable to copyBufferForREP\n");
-            cm1->release();
-            return false;
-        }
-
-        if ( !copyReflectBufferForREP(kspaceBuffer_->reflect_, 0, cm2->getObjectPtr()->reflect_) ) 
-        {
-            GADGET_DEBUG1("Unable to copyReflectBufferForREP\n");
-            cm1->release();
-            return false;
-        }
-
-        // fill buffer with zeros, ready for next REP
-        kspaceBuffer_->buffer_.fill(0);
-        kspaceBuffer_->reflect_.fill(0);
-
-        // copy the message image array for this REP
-        GadgetMessageImageArray aMessageArray;
-        messageImage_->extractMessageImageArrayForREP(0, aMessageArray);
-        cm1->getObjectPtr()->copy(aMessageArray);
-
-        if (!refBuffer_.empty())
-        {
-            GADGET_MSG("GtPlusAccumulatorIRT2DGadget - ref signal found : " << refBuffer_.size());
-
-            BufferType refCurr;
-            ReflectBufferType refReflectCurr;
-            if ( !fillBuffer(refBuffer_, refCurr, refReflectCurr) )
-            {
-                GADGET_DEBUG1("fillBuffer(refBuffer_, refCurr, refReflectCurr) failed ... \n");
-                cm1->release();
-                return GADGET_FAIL;
-            }
-
-            if ( !copyBufferForREP(refCurr, rep, cm2->getObjectPtr()->ref_) ) 
-            {
-                GADGET_DEBUG1("Unable to copyBufferForREP(refCurr, rep, cm2->getObjectPtr()->ref_)\n");
-                cm1->release();
-                return false;
-            }
-
-            if ( !copyReflectBufferForREP(refReflectCurr, rep, cm2->getObjectPtr()->refReflect_) ) 
-            {
-                GADGET_DEBUG1("Unable to copyReflectBufferForREP(refReflectCurr, rep, cm2->getObjectPtr()->refReflect_)\n");
-                cm1->release();
-                return false;
-            }
-        }
-
-        if (!phaseCorrBuffer_.empty())
-        {
-            GADGET_MSG("GtPlusAccumulatorIRT2DGadget - phase correction signal found : " << phaseCorrBuffer_.size());
-
-            BufferType phsCorrCurr;
-            ReflectBufferType phsCorrReflectCurr;
-            if ( !fillBuffer(phaseCorrBuffer_, phsCorrCurr, phsCorrReflectCurr) )
-            {
-                GADGET_DEBUG1("fillBuffer(phaseCorrBuffer_, phsCorrCurr, phsCorrReflectCurr) failed ... \n");
-                cm1->release();
-                return GADGET_FAIL;
-            }
-
-            if ( !copyBufferForREP(phsCorrCurr, rep, cm2->getObjectPtr()->phaseCorr_) ) 
-            {
-                GADGET_DEBUG1("Unable to copyBufferForREP(phsCorrCurr, rep, cm2->getObjectPtr()->phaseCorr_)\n");
-                cm1->release();
-                return false;
-            }
-
-            if ( !copyReflectBufferForREP(phsCorrReflectCurr, rep, cm2->getObjectPtr()->phaseCorrReflect_) ) 
-            {
-                GADGET_DEBUG1("Unable to copyReflectBufferForREP(phsCorrReflectCurr, rep, cm2->getObjectPtr()->phaseCorrReflect_)\n");
-                cm1->release();
-                return false;
-            }
-        }
-
-        if (!noiseBuffer_.empty())
-        {
-            GADGET_MSG("GtPlusAccumulatorIRT2DGadget - noise signal found : " << noiseBuffer_.size());
-
-            BufferType noiseCurr;
-            ReflectBufferType tmpBuf;
-            if ( !fillBuffer(noiseBuffer_, noiseCurr, tmpBuf) )
-            {
-                GADGET_DEBUG1("fillBuffer(noiseBuffer_, noiseCurr, tmpBuf) failed ... \n");
-                cm1->release();
-                return false;
-            }
-
-            if ( !copyBufferForREP(noiseCurr, rep, cm2->getObjectPtr()->noise_) ) 
-            {
-                GADGET_DEBUG1("Unable to copyBufferForREP(noiseCurr, rep, cm2->getObjectPtr()->noise_)\n");
-                cm1->release();
-                return false;
-            }
-        }
-
-        if (!otherBuffer_.empty())
-        {
-            GADGET_MSG("GtPlusAccumulatorIRT2DGadget - other signal found : " << otherBuffer_.size());
-
-            BufferType otherCurr;
-            ReflectBufferType tmpBuf;
-            if ( !fillBuffer(otherBuffer_, otherCurr, tmpBuf) )
-            {
-                GADGET_DEBUG1("fillBuffer(otherBuffer_, otherCurr, tmpBuf) failed ... \n");
-                cm1->release();
-                return false;
-            }
-
-            if ( !copyBufferForREP(otherCurr, rep, cm2->getObjectPtr()->other_) ) 
-            {
-                GADGET_DEBUG1("Unable to copyBufferForSLC(otherCurr, rep, cm2->getObjectPtr()->other_)\n");
-                cm1->release();
-                return false;
-            }
-        }
-
-        // send to next gadget
-        if (this->next()->putq(cm1) < 0) 
-        {
-            return false;
-        }
-    }
-    catch(...)
-    {
-        GADGET_DEBUG1("Errors in GtPlusAccumulatorIRT2DGadget::triggerREP(rep) ... \n");
-        return false;
-    }
-
-    return true;
-}
-
-bool GtPlusAccumulatorIRT2DGadget::
-storeImageData(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1, GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2, bool isReflect)
-{
-    try
-    {
-        unsigned int ii;
-        int samples =  m1->getObjectPtr()->number_of_samples;
-        ISMRMRD::EncodingCounters idx = m1->getObjectPtr()->idx;
-
-        if ( kspaceBuffer_->buffer_.get_number_of_elements() <= 0 )
-        {
-            meas_max_channel_ = m1->getObjectPtr()->active_channels;
-
-            int E1 = 2*kspaceBuffer_->kSpaceCentreEncode1_;
-            int E2 = 2*kspaceBuffer_->kSpaceCentreEncode2_;
-
-            // find the loop counter boundary and allocate the buffer
-            GADGET_MSG("[RO E1 Cha Slice E2 Con Phase Rep Set Seg] = [" 
-                               << meas_max_ro_ 
-                               << " " << E1 
-                               << " " << meas_max_channel_ 
-                               << " " << meas_max_idx_.slice+1 
-                               << " " << E2 
-                               << " " << meas_max_idx_.contrast+1 
-                               << " " << meas_max_idx_.phase+1 
-                               << " " << 1 
-                               << " " << meas_max_idx_.set+1 
-                               << " " << meas_max_idx_.segment+1 << "]");
-
-            dimensions_.push_back(meas_max_ro_);
-            dimensions_.push_back(E1);
-            dimensions_.push_back(meas_max_channel_);
-            dimensions_.push_back(meas_max_idx_.slice+1);
-            dimensions_.push_back(E2);
-            dimensions_.push_back(meas_max_idx_.contrast+1);
-            dimensions_.push_back(meas_max_idx_.phase+1);
-            dimensions_.push_back(1);
-            dimensions_.push_back(meas_max_idx_.set+1);
-            dimensions_.push_back(meas_max_idx_.segment+1);
-
-            unsigned int N = dimensions_.size();
-            for ( ii=0; ii<N; ii++ )
-            {
-                GADGET_MSG("dimensions_[" << ii << "] = " << dimensions_[ii]);
-            }
-
-            // allocate data buffer
-            try
-            {
-                kspaceBuffer_->buffer_.create(&dimensions_);
-
-                std::vector<unsigned int> reflect_dimensions_(dimensions_);
-                reflect_dimensions_[0] = 1;
-                reflect_dimensions_[2] = 1;
-                kspaceBuffer_->reflect_.create(&reflect_dimensions_);
-            }
-            catch(...)
-            {
-                GADGET_DEBUG1("Failed create buffer\n");
-                return false;
-            }
-
-            // allocate message buffer
-            int matrix_size[10];
-            for ( ii=0; ii<10; ii++ )
-            {
-                matrix_size[ii] = dimensions_[ii];
-            }
-
-            if (!(messageImage_ = new GadgetMessageImageArray(matrix_size))) 
-            {
-                GADGET_DEBUG1("Failed create buffer\n");
-                return false;
-            }
-        }
-
-        std::complex<float>* b = kspaceBuffer_->buffer_.begin();
-        std::complex<float>* d = m2->getObjectPtr()->get_data_ptr();
-        if (samples != static_cast<int>(dimensions_[0])) 
-        {
-            GADGET_DEBUG1("Wrong number of samples received\n");
-            return false;
-        }
-
-        //Copy the data for all the channels
-        std::vector<unsigned int> pos(10);
-        for (int c = 0; c < m1->getObjectPtr()->active_channels; c++) 
-        {
-            pos[0] = 0;
-            pos[1] = idx.kspace_encode_step_1;
-            pos[2] = c;
-            pos[3] = idx.slice;
-            pos[4] = idx.kspace_encode_step_2;
-            pos[5] = idx.contrast;
-            pos[6] = idx.phase;
-            pos[7] = 0;
-            pos[8] = idx.set;
-            pos[9] = idx.segment;
-            int offsetBuffer = kspaceBuffer_->buffer_.calculate_offset(pos);
-
-            memcpy(b+offsetBuffer, d+c*samples, sizeof(std::complex<float>)*samples);
-
-            pos[2] = 0;
-            offsetBuffer = kspaceBuffer_->reflect_.calculate_offset(pos);
-            kspaceBuffer_->reflect_.at(offsetBuffer) = isReflect;
-        }
-
-        idx.repetition = 0;
-        if ( !fillImageInfo(m1, messageImage_, idx) )
-        {
-            GADGET_DEBUG1("Failed in fillImageInfo(m1, messageImage_, idx)\n");
-            return false;
-        }
-    }
-    catch(...)
-    {
-        GADGET_DEBUG1("Errors in GtPlusAccumulatorIRT2DGadget::storeImageData(...) ... \n");
-        return false;
-    }
-
-    return true;
-}
-
-int GtPlusAccumulatorIRT2DGadget::process(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1, 
-        GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2)
-{
-    // check whether a new REP starts
-    bool isLastScanInSlice = ISMRMRD::FlagBit(ISMRMRD::ACQ_LAST_IN_SLICE).isSet(m1->getObjectPtr()->flags);
-
-    bool isNewRep = false;
-    cur_rep_ = m1->getObjectPtr()->idx.repetition;
-
-    if ( prev_rep_==-1 )
-    {
-        prev_rep_ = cur_rep_;
-    }
-    else
-    {
-        if ( cur_rep_!=prev_rep_ )
-        {
-            isNewRep = true;
-        }
-    }
-
-    bool bIsKSpace, bIsRef, bIsNoise, bIsPhaseCorr, bIsReflect, bIsOther;
-    if ( !checkStatus(m1->getObjectPtr()->flags, m1->getObjectPtr()->number_of_samples, bIsKSpace, bIsRef, bIsNoise, bIsPhaseCorr, bIsReflect, bIsOther) )
-    {
-        GADGET_DEBUG1("Failed check readout status\n");
-        return GADGET_FAIL;
-    }
-
-    // store kspace read out
-    if ( bIsKSpace )
-    {
-        if ( !storeImageData(m1, m2, bIsReflect) )
-        {
-            GADGET_DEBUG1("Failed check readout status\n");
-            return GADGET_FAIL;
-        }
-
-        num_scan_buffered_++;
-    }
-
-    // store ref read out
-    if ( bIsRef )
-    {
-        ISMRMRD::AcquisitionHeader* pMDH = m1->getObjectPtr();
-        hoNDArray< ValueType >* pRefLine = m2->getObjectPtr();
-
-        ReadOutBuffer item;
-        item.acqHead_ = *pMDH;
-        item.data_ = *pRefLine;
-        item.isReflect_ = bIsReflect;
-
-        if ( isNewRep )
-        {
-            refBuffer_.clear();
-        }
-
-        refBuffer_.push_back(item);
-    }
-
-    // store phaseCorr read out
-    if ( bIsPhaseCorr )
-    {
-        ISMRMRD::AcquisitionHeader* pMDH = m1->getObjectPtr();
-        hoNDArray< ValueType >* pRefLine = m2->getObjectPtr();
-
-        ReadOutBuffer item;
-        item.acqHead_ = *pMDH;
-        item.data_ = *pRefLine;
-        item.isReflect_ = bIsReflect;
-
-        if ( isNewRep )
-        {
-            phaseCorrBuffer_.clear();
-        }
-
-        phaseCorrBuffer_.push_back(item);
-    }
-
-    // store noise read out
-    if ( bIsNoise )
-    {
-        ISMRMRD::AcquisitionHeader* pMDH = m1->getObjectPtr();
-        hoNDArray< ValueType >* pRefLine = m2->getObjectPtr();
-
-        ReadOutBuffer item;
-        item.acqHead_ = *pMDH;
-        item.data_ = *pRefLine;
-        item.isReflect_ = bIsReflect;
-
-        if ( isNewRep )
-        {
-            noiseBuffer_.clear();
-        }
-
-        noiseBuffer_.push_back(item);
-    }
-
-    // store other read out
-    if ( bIsOther )
-    {
-        ISMRMRD::AcquisitionHeader* pMDH = m1->getObjectPtr();
-        hoNDArray< ValueType >* pRefLine = m2->getObjectPtr();
-
-        ReadOutBuffer item;
-        item.acqHead_ = *pMDH;
-        item.data_ = *pRefLine;
-        item.isReflect_ = bIsReflect;
-
-        if ( isNewRep )
-        {
-            otherBuffer_.clear();
-        }
-
-        otherBuffer_.push_back(item);
-    }
-
-   // if a new rep comes, it indicates the previous one is complete and can be sent out
-    if ( isLastScanInSlice )
-    {
-        // GADGET_MSG("Repetition " << prev_rep_ << " is complete ... ");
-
-        if ( !triggerREP(prev_rep_) ) 
-        {
-            GADGET_DEBUG1("Unable to trigger this rep ... \n");
-            return GADGET_FAIL;
-        }
-
-        prev_rep_ = cur_rep_;
-
-        GADGET_ERROR_MSG("GtPlusAccumulatorIRT2DGadget - trigger next gadget for REP " << prev_rep_ << " - scan buffered - " << num_scan_buffered_ << " ... ");
-        num_scan_buffered_ = 0;
-    }
-
-    m1->release();
-    return GADGET_OK;
-}
-
-GADGET_FACTORY_DECLARE(GtPlusAccumulatorIRT2DGadget)
-}
diff --git a/gadgets/gtPlus/GtPlusAccumulatorIRT2DGadget.h b/gadgets/gtPlus/GtPlusAccumulatorIRT2DGadget.h
deleted file mode 100644
index 51e2270..0000000
--- a/gadgets/gtPlus/GtPlusAccumulatorIRT2DGadget.h
+++ /dev/null
@@ -1,42 +0,0 @@
-#pragma once
-
-#include "GtPlusAccumulatorGadget.h"
-
-namespace Gadgetron
-{
-
-class EXPORTGTPLUS GtPlusAccumulatorIRT2DGadget : public GtPlusAccumulatorGadget
-{
-public:
-    GADGET_DECLARE(GtPlusAccumulatorIRT2DGadget);
-
-    typedef GtPlusAccumulatorGadget BaseClass;
-
-    typedef BaseClass::ValueType ValueType;
-    typedef BaseClass::ReadOutBufferType ReadOutBufferType;
-    typedef BaseClass::BufferType BufferType;
-    typedef BaseClass::ReflectBufferType ReflectBufferType;
-
-    GtPlusAccumulatorIRT2DGadget();
-    ~GtPlusAccumulatorIRT2DGadget();
-
-protected:
-
-    virtual int process_config(ACE_Message_Block* mb);
-
-    // here, every 2D kspace is stored and send out for every new repetition
-    virtual bool storeImageData(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1, GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2, bool isReflect);
-
-    virtual bool triggerREP(int rep);
-    virtual int process(Gadgetron::GadgetContainerMessage< ISMRMRD::AcquisitionHeader >* m1, Gadgetron::GadgetContainerMessage< Gadgetron::hoNDArray< std::complex<float> > > * m2);
-
-    virtual bool copyBufferForREP(BufferType& buf, int rep, BufferType& bufREP);
-    virtual bool copyReflectBufferForREP(ReflectBufferType& buf, int rep, ReflectBufferType& bufREP);
-
-    int prev_rep_;
-    int cur_rep_;
-
-    int num_scan_buffered_;
-};
-
-}
diff --git a/gadgets/gtPlus/GtPlusAccumulatorImageTriggerGadget.cpp b/gadgets/gtPlus/GtPlusAccumulatorImageTriggerGadget.cpp
new file mode 100644
index 0000000..0459513
--- /dev/null
+++ b/gadgets/gtPlus/GtPlusAccumulatorImageTriggerGadget.cpp
@@ -0,0 +1,746 @@
+
+#include "GtPlusAccumulatorImageTriggerGadget.h"
+#include "GtPlusReconGadgetUtil.h"
+
+using namespace Gadgetron::gtPlus;
+
+namespace Gadgetron
+{
+
+GtPlusAccumulatorImageTriggerGadget::GtPlusAccumulatorImageTriggerGadget() : image_counter_(0), triggered_in_close_(false), verboseMode_(false)
+{
+    cha_trigger_ = false;
+    slc_trigger_ = false;
+    e2_trigger_ = false;
+    con_trigger_ = false;
+    phs_trigger_ = false;
+    rep_trigger_ = false;
+    set_trigger_ = false;
+    ave_trigger_ = false;
+
+    num_of_dimensions_ = 8; // [CHA SLC E2 CON PHS REP SET AVE]
+
+    // this may be changed later if multi-channel image workflow are used
+    meas_max_channel_ = 1;
+
+    pass_image_immediate_ = false;
+}
+
+GtPlusAccumulatorImageTriggerGadget::~GtPlusAccumulatorImageTriggerGadget()
+{
+
+}
+
+// extract necessary configuration information from the xml
+int GtPlusAccumulatorImageTriggerGadget::process_config(ACE_Message_Block* mb)
+{
+    // gadget parameters
+    verboseMode_ = this->get_bool_value("verboseMode");
+
+    cha_trigger_ = this->get_bool_value("TriggerChannel");
+    slc_trigger_ = this->get_bool_value("TriggerSlice");
+    e2_trigger_  = this->get_bool_value("TriggerE2");
+    con_trigger_ = this->get_bool_value("TriggerContrast");
+    phs_trigger_ = this->get_bool_value("TriggerPhase");
+    rep_trigger_ = this->get_bool_value("TriggerRepetition");
+    set_trigger_ = this->get_bool_value("TriggerSet");
+    ave_trigger_ = this->get_bool_value("TriggerAverage");
+
+    pass_image_immediate_ = this->get_bool_value("PassImageImmediately");
+
+    // ---------------------------------------------------------------------------------------------------------
+    // pass the xml file
+    ISMRMRD::IsmrmrdHeader h;
+    try {
+      deserialize(mb->rd_ptr(),h);
+    } catch (...) {
+      GADGET_DEBUG1("Error parsing ISMRMRD Header");
+      throw;
+      return GADGET_FAIL;
+    }
+
+    // seq object
+    if (h.encoding.size() != 1)
+    {
+        GADGET_DEBUG2("Number of encoding spaces: %d\n", h.encoding.size());
+        GADGET_DEBUG1("This simple GtPlusAccumulatorImageTriggerGadget only supports one encoding space\n");
+        return GADGET_FAIL;
+    }
+
+    // ---------------------------------------------------------------------------------------------------------
+
+    // find out the encoding space 
+    findMatrixSizeEncoding(h, matrix_size_encoding_);
+    findFOVEncoding(h, field_of_view_encoding_);
+
+    findMatrixSizeRecon(h, matrix_size_recon_);
+    findFOVRecon(h, field_of_view_recon_);
+
+    GADGET_CONDITION_MSG(verboseMode_, "Encoding matrix size: " << matrix_size_encoding_[0] << " " << matrix_size_encoding_[1] << " " << matrix_size_encoding_[2]);
+    GADGET_CONDITION_MSG(verboseMode_, "Encoding field_of_view : " << field_of_view_encoding_[0] << " " << field_of_view_encoding_[1] << " " << field_of_view_encoding_[2]);
+    GADGET_CONDITION_MSG(verboseMode_, "Recon matrix size : " << matrix_size_recon_[0] << " " << matrix_size_recon_[1] << " " << matrix_size_recon_[2]);
+    GADGET_CONDITION_MSG(verboseMode_, "Recon field_of_view :  " << field_of_view_recon_[0] << " " << field_of_view_recon_[1] << " " << field_of_view_recon_[2]);
+
+    // ---------------------------------------------------------------------------------------------------------
+    // encoding limits
+    GADGET_CHECK_RETURN(findEncodingLimits(h, meas_max_idx_, verboseMode_), GADGET_FAIL);
+
+    //ISMRMRD::EncodingLimits e_limits = h.encoding[0].encodingLimits;
+
+    //if (e_limits.kspace_encoding_step_1) 
+    //{
+    //    meas_max_idx_.kspace_encode_step_1 = (uint16_t)(matrix_size_encoding_[1]-1); // e_limits.kspace_encoding_step_1().get().maximum();
+    //}
+    //else
+    //{
+    //    meas_max_idx_.kspace_encode_step_1 = 0;
+    //    std::cout << "Setting number of kspace_encode_step_1 to 0" << std::endl;
+    //    return GADGET_FAIL;
+    //}
+
+    //if (e_limits.set)
+    //{
+    //    if ( e_limits.set->maximum > 0 )
+    //        meas_max_idx_.set = e_limits.set->maximum - 1;
+    //    else
+    //        meas_max_idx_.set = 0;
+
+    //    if ( meas_max_idx_.set < 0 ) meas_max_idx_.set = 0;
+    //}
+    //else
+    //{
+    //    meas_max_idx_.set = 0;
+    //}
+
+    //if (e_limits.phase)
+    //{
+    //    if ( e_limits.phase->maximum > 0 )
+    //        meas_max_idx_.phase = e_limits.phase->maximum-1;
+    //    else
+    //        meas_max_idx_.phase = 0;
+
+    //    if ( meas_max_idx_.phase < 0 ) meas_max_idx_.phase = 0;
+    //}
+    //else
+    //{
+    //    meas_max_idx_.phase = 0;
+    //}
+
+    //if (e_limits.kspace_encoding_step_2)
+    //{
+    //    meas_max_idx_.kspace_encode_step_2 = (uint16_t)(matrix_size_encoding_[2] - 1); // e_limits.kspace_encoding_step_2().get().maximum();
+    //}
+    //else
+    //{
+    //    meas_max_idx_.kspace_encode_step_2 = 0;
+    //}
+    //meas_max_idx_.kspace_encode_step_2 = (uint16_t)(matrix_size_recon_[2]);
+
+    //if (e_limits.contrast)
+    //{
+    //    if ( e_limits.contrast->maximum > 0 )
+    //        meas_max_idx_.contrast = e_limits.contrast->maximum-1;
+    //    else
+    //        meas_max_idx_.contrast = 0;
+
+    //    if ( meas_max_idx_.contrast < 0 ) meas_max_idx_.contrast = 0;
+    //}
+    //else
+    //{
+    //    meas_max_idx_.contrast = 0;
+    //}
+
+    //if (e_limits.slice)
+    //{
+    //    meas_max_idx_.slice = e_limits.slice->maximum;
+    //}
+    //else
+    //{
+    //    meas_max_idx_.slice = 0;
+    //}
+
+    //if (e_limits.repetition)
+    //{
+    //    meas_max_idx_.repetition = e_limits.repetition->maximum;
+    //}
+    //else
+    //{
+    //    meas_max_idx_.repetition = 0;
+    //}
+
+    //if (e_limits.average)
+    //{
+    //    meas_max_idx_.average = e_limits.average->maximum-1;
+    //}
+    //else
+    //{
+    //    meas_max_idx_.average = 0;
+    //}
+
+    //if (e_limits.segment)
+    //{
+    //    // meas_max_idx_.segment = e_limits.segment().get().maximum()-1;
+    //    meas_max_idx_.segment = 0;
+    //}
+    //else
+    //{
+    //    meas_max_idx_.segment = 0;
+    //}
+
+    // allocate the image buffers
+    // [Cha Slice E2 Con Phase Rep Set Ave]
+    //   0    1    2   3   4    5   6   7
+
+    meas_max_idx_.kspace_encode_step_2 = (uint16_t)(matrix_size_recon_[2]);
+
+    dimensions_.resize(GT_DIM_NUM_IMAGE, 0);
+    dimensions_[0] = meas_max_channel_;
+    dimensions_[1] = meas_max_idx_.slice+1;
+    dimensions_[2] = meas_max_idx_.kspace_encode_step_2;
+    dimensions_[3] = meas_max_idx_.contrast+1;
+    dimensions_[4] = meas_max_idx_.phase+1;
+    dimensions_[5] = meas_max_idx_.repetition+1;
+    dimensions_[6] = meas_max_idx_.set+1;
+    dimensions_[7] = meas_max_idx_.average+1;
+
+    imageBuffer_.create(dimensions_);
+    imageSent_.create(dimensions_);
+
+    otherBuffer_.create(dimensions_);
+    otherSent_.create(dimensions_);
+
+    size_t nElem = imageBuffer_.get_number_of_elements();
+    size_t ii;
+    for ( ii=0; ii<nElem; ii++ )
+    {
+        imageBuffer_(ii) = NULL;
+        otherBuffer_(ii) = NULL;
+        imageSent_(ii) = false;
+        otherSent_(ii) = false;
+    }
+
+    // set the dimensions under/not under trigger
+    this->setDimensionsUnderTrigger();
+
+    GADGET_CONDITION_MSG(verboseMode_, "dimension limits                [Cha Slice E2 Con Phase Rep Set Ave] = [" 
+                               << " " << dimensions_[0] 
+                               << " " << dimensions_[1] 
+                               << " " << dimensions_[2] 
+                               << " " << dimensions_[3]
+                               << " " << dimensions_[4]
+                               << " " << dimensions_[5]
+                               << " " << dimensions_[6] 
+                               << " " << dimensions_[7] << "]");
+
+    GADGET_CONDITION_MSG(verboseMode_, "dimension under trigger         [Cha Slice E2 Con Phase Rep Set Ave] = [" 
+                               << " " << dim_under_trigger_[0] 
+                               << " " << dim_under_trigger_[1] 
+                               << " " << dim_under_trigger_[2] 
+                               << " " << dim_under_trigger_[3]
+                               << " " << dim_under_trigger_[4]
+                               << " " << dim_under_trigger_[5]
+                               << " " << dim_under_trigger_[6] 
+                               << " " << dim_under_trigger_[7] << "]");
+
+    GADGET_CONDITION_MSG(verboseMode_, "dimension limits under trigger  [Cha Slice E2 Con Phase Rep Set Ave] = [" 
+                               << " " << dim_limit_under_trigger_[0] 
+                               << " " << dim_limit_under_trigger_[1] 
+                               << " " << dim_limit_under_trigger_[2] 
+                               << " " << dim_limit_under_trigger_[3]
+                               << " " << dim_limit_under_trigger_[4]
+                               << " " << dim_limit_under_trigger_[5]
+                               << " " << dim_limit_under_trigger_[6] 
+                               << " " << dim_limit_under_trigger_[7] << "]");
+
+    GADGET_CONDITION_MSG(verboseMode_, "dimension NOT under trigger     [Cha Slice E2 Con Phase Rep Set Ave] = [" 
+                               << " " << dim_not_under_trigger_[0] 
+                               << " " << dim_not_under_trigger_[1] 
+                               << " " << dim_not_under_trigger_[2] 
+                               << " " << dim_not_under_trigger_[3]
+                               << " " << dim_not_under_trigger_[4]
+                               << " " << dim_not_under_trigger_[5]
+                               << " " << dim_not_under_trigger_[6] 
+                               << " " << dim_not_under_trigger_[7] << "]");
+
+    GADGET_CONDITION_MSG(verboseMode_, "dimension limits NOT under trigger [Cha Slice E2 Con Phase Rep Set Ave] = [" 
+                               << " " << dim_limit_not_under_trigger_[0] 
+                               << " " << dim_limit_not_under_trigger_[1] 
+                               << " " << dim_limit_not_under_trigger_[2] 
+                               << " " << dim_limit_not_under_trigger_[3]
+                               << " " << dim_limit_not_under_trigger_[4]
+                               << " " << dim_limit_not_under_trigger_[5]
+                               << " " << dim_limit_not_under_trigger_[6] 
+                               << " " << dim_limit_not_under_trigger_[7] << "]");
+
+    return GADGET_OK;
+}
+
+void GtPlusAccumulatorImageTriggerGadget::setDimensionsUnderTrigger()
+{
+    dim_under_trigger_.resize(num_of_dimensions_, false);
+    dim_not_under_trigger_.resize(num_of_dimensions_, false);
+
+    dim_limit_under_trigger_.resize(num_of_dimensions_, 1);
+    dim_limit_not_under_trigger_.resize(num_of_dimensions_, 1);
+
+    if (cha_trigger_)
+    {
+        dim_under_trigger_[0] = true;
+        dim_limit_under_trigger_[0] = dimensions_[0];
+    }
+    else
+    {
+        dim_not_under_trigger_[0] = true;
+        dim_limit_not_under_trigger_[0] = dimensions_[0];
+    }
+
+    if (slc_trigger_)
+    {
+        dim_under_trigger_[1] = true;
+        dim_limit_under_trigger_[1] = dimensions_[1];
+    }
+    else
+    {
+        dim_not_under_trigger_[1] = true;
+        dim_limit_not_under_trigger_[1] = dimensions_[1];
+    }
+
+    if (e2_trigger_)
+    {
+        dim_under_trigger_[2] = true;
+        dim_limit_under_trigger_[2] = dimensions_[2];
+    }
+    else
+    {
+        dim_not_under_trigger_[2] = true;
+        dim_limit_not_under_trigger_[2] = dimensions_[2];
+    }
+
+    if (con_trigger_)
+    {
+        dim_under_trigger_[3] = true;
+        dim_limit_under_trigger_[3] = dimensions_[3];
+    }
+    else
+    {
+        dim_not_under_trigger_[3] = true;
+        dim_limit_not_under_trigger_[3] = dimensions_[3];
+    }
+
+    if (phs_trigger_)
+    {
+        dim_under_trigger_[4] = true;
+        dim_limit_under_trigger_[4] = dimensions_[4];
+    }
+    else
+    {
+        dim_not_under_trigger_[4] = true;
+        dim_limit_not_under_trigger_[4] = dimensions_[4];
+    }
+
+    if (rep_trigger_)
+    {
+        dim_under_trigger_[5] = true;
+        dim_limit_under_trigger_[5] = dimensions_[5];
+    }
+    else
+    {
+        dim_not_under_trigger_[5] = true;
+        dim_limit_not_under_trigger_[5] = dimensions_[5];
+    }
+
+    if (set_trigger_)
+    {
+        dim_under_trigger_[6] = true;
+        dim_limit_under_trigger_[6] = dimensions_[6];
+    }
+    else
+    {
+        dim_not_under_trigger_[6] = true;
+        dim_limit_not_under_trigger_[6] = dimensions_[6];
+    }
+
+    if (ave_trigger_)
+    {
+        dim_under_trigger_[7] = true;
+        dim_limit_under_trigger_[7] = dimensions_[7];
+    }
+    else
+    {
+        dim_not_under_trigger_[7] = true;
+        dim_limit_not_under_trigger_[7] = dimensions_[7];
+    }
+
+    imageSentBuffer_.create(dim_limit_under_trigger_);
+    imageSentBuffer_.delete_data_on_destruct(false);
+}
+
+int GtPlusAccumulatorImageTriggerGadget::process(GadgetContainerMessage<ISMRMRD::ImageHeader>* m1, GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2, GadgetContainerMessage<ISMRMRD::MetaContainer>* m3)
+{
+    // find the data role
+    std::string dataRole;
+    dataRole = std::string(m3->getObjectPtr()->as_str(GTPLUS_DATA_ROLE, 0));
+
+    GADGET_CONDITION_MSG(verboseMode_, "--> receive image : " << m1->getObjectPtr()->image_index << " -- " << dataRole);
+
+    if ( dataRole == GTPLUS_IMAGE_REGULAR )
+    {
+        GADGET_CHECK_RETURN(this->storeImage(*m1->getObjectPtr(), *m2->getObjectPtr(), *m3->getObjectPtr(), imageBuffer_), GADGET_FAIL);
+        GADGET_CHECK_RETURN(this->trigger(imageBuffer_, imageSent_, false), GADGET_FAIL);
+    }
+
+    if ( dataRole == GTPLUS_IMAGE_OTHER )
+    {
+        GADGET_CHECK_RETURN(this->storeImage(*m1->getObjectPtr(), *m2->getObjectPtr(), *m3->getObjectPtr(), otherBuffer_), GADGET_FAIL);
+        GADGET_CHECK_RETURN(this->trigger(otherBuffer_, otherSent_, false), GADGET_FAIL);
+    }
+
+    if ( dataRole == GTPLUS_IMAGE_GFACTOR )
+    {
+        // pass the image to the next gadget
+        Gadgetron::GadgetContainerMessage<ImageBufferType>* cm1 = new Gadgetron::GadgetContainerMessage<ImageBufferType>();
+
+        ImageBufferType& imgBuf = *(cm1->getObjectPtr());
+
+        std::vector<size_t> dim2D(num_of_dimensions_, 1);
+        imgBuf.create(dim2D);
+        imgBuf(0) = new ImageType();
+        GADGET_CHECK_RETURN(imgBuf(0)!=NULL, GADGET_FAIL);
+
+        // set image content
+        imgBuf(0)->from_NDArray( *m2->getObjectPtr() );
+        // set image attrib
+        imgBuf(0)->attrib_ = *m3->getObjectPtr();
+
+        // pass the ISMRMRD header info
+        GADGET_CHECK_RETURN(gtPlus_util_.setMetaAttributesFromImageHeaderISMRMRD(*m1->getObjectPtr(), imgBuf(0)->attrib_), GADGET_FAIL);
+
+        if (this->next()->putq(cm1) < 0) 
+        {
+            m1->release();
+            cm1->release();
+            return GADGET_FAIL;
+        }
+    }
+
+    m1->release();
+    return GADGET_OK;
+}
+
+bool GtPlusAccumulatorImageTriggerGadget::trigger(ImageBufferType& buf, ImageSentFlagBufferType& sentFlagBuf, bool inClose)
+{
+    try
+    {
+        // scan the buffered images, if the trigger dimensions are complete, sent out this package
+
+        // not under trigger
+        size_t cha, slc, e2, con, phs, rep, set, ave;
+
+        // under trigger
+        size_t cha_t, slc_t, e2_t, con_t, phs_t, rep_t, set_t, ave_t;
+
+        std::vector<size_t> image_ind(num_of_dimensions_, 0);
+        std::vector<size_t> image_sent_ind(num_of_dimensions_, 0);
+
+        size_t numOfElem = imageSentBuffer_.get_number_of_elements();
+        size_t ii;
+        for ( ii=0; ii<numOfElem; ii++ ) { imageSentBuffer_(ii) = NULL; }
+
+        for ( ave=0; ave<dim_limit_not_under_trigger_[7]; ave++ )
+        {
+            if ( dim_not_under_trigger_[7] ) image_ind[7] = ave;
+            // -------------------
+            for ( set=0; set<dim_limit_not_under_trigger_[6]; set++ )
+            {
+                if ( dim_not_under_trigger_[6] ) image_ind[6] = set;
+                // -------------------
+                for ( rep=0; rep<dim_limit_not_under_trigger_[5]; rep++ )
+                {
+                    if ( dim_not_under_trigger_[5] ) image_ind[5] = rep;
+                    // -------------------
+                    for ( phs=0; phs<dim_limit_not_under_trigger_[4]; phs++ )
+                    {
+                        if ( dim_not_under_trigger_[4] ) image_ind[4] = phs;
+                        // -------------------
+                        for ( con=0; con<dim_limit_not_under_trigger_[3]; con++ )
+                        {
+                            if ( dim_not_under_trigger_[3] ) image_ind[3] = con;
+                            // -------------------
+                            for ( e2=0; e2<dim_limit_not_under_trigger_[2]; e2++ )
+                            {
+                                if ( dim_not_under_trigger_[2] ) image_ind[2] = e2;
+                                // -------------------
+                                for ( slc=0; slc<dim_limit_not_under_trigger_[1]; slc++ )
+                                {
+                                    if ( dim_not_under_trigger_[1] ) image_ind[1] = slc;
+                                    // -------------------
+                                    for ( cha=0; cha<dim_limit_not_under_trigger_[0]; cha++ )
+                                    {
+                                        if ( dim_not_under_trigger_[0] ) image_ind[0] = cha;
+                                        // -------------------
+
+                                        // loop over under triggered dimensions and check whether every images are there
+                                        bool needTrigger = true;
+                                        if ( inClose )
+                                        {
+                                            needTrigger = false;
+                                        }
+
+                                        {
+                                            for ( ii=0; ii<numOfElem; ii++ ) { imageSentBuffer_(ii) = NULL; }
+
+                                            // =================================================
+
+                                            for ( ave_t=0; ave_t<dim_limit_under_trigger_[7]; ave_t++ )
+                                            {
+                                                if ( dim_under_trigger_[7] ) image_ind[7] = ave_t;
+                                                image_sent_ind[7] = ave_t;
+                                                // -------------------
+                                                for ( set_t=0; set_t<dim_limit_under_trigger_[6]; set_t++ )
+                                                {
+                                                    if ( dim_under_trigger_[6] ) image_ind[6] = set_t;
+                                                    image_sent_ind[6] = set_t;
+                                                    // -------------------
+                                                    for ( rep_t=0; rep_t<dim_limit_under_trigger_[5]; rep_t++ )
+                                                    {
+                                                        if ( dim_under_trigger_[5] ) image_ind[5] = rep_t;
+                                                        image_sent_ind[5] = rep_t;
+                                                        // -------------------
+                                                        for ( phs_t=0; phs_t<dim_limit_under_trigger_[4]; phs_t++ )
+                                                        {
+                                                            if ( dim_under_trigger_[4] ) image_ind[4] = phs_t;
+                                                            image_sent_ind[4] = phs_t;
+                                                            // -------------------
+                                                            for ( con_t=0; con_t<dim_limit_under_trigger_[3]; con_t++ )
+                                                            {
+                                                                if ( dim_under_trigger_[3] ) image_ind[3] = con_t;
+                                                                image_sent_ind[3] = con_t;
+                                                                // -------------------
+                                                                for ( e2_t=0; e2_t<dim_limit_under_trigger_[2]; e2_t++ )
+                                                                {
+                                                                    if ( dim_under_trigger_[2] ) image_ind[2] = e2_t;
+                                                                    image_sent_ind[2] = e2_t;
+                                                                    // -------------------
+                                                                    for ( slc_t=0; slc_t<dim_limit_under_trigger_[1]; slc_t++ )
+                                                                    {
+                                                                        if ( dim_under_trigger_[1] ) image_ind[1] = slc_t;
+                                                                        image_sent_ind[1] = slc_t;
+                                                                        // -------------------
+                                                                        for ( cha_t=0; cha_t<dim_limit_under_trigger_[0]; cha_t++ )
+                                                                        {
+                                                                            if ( dim_under_trigger_[0] ) image_ind[0] = cha_t;
+                                                                            image_sent_ind[0] = cha_t;
+                                                                            // -------------------
+
+                                                                            ImageType* pImage = buf(image_ind);
+                                                                            bool sentFlag = sentFlagBuf(image_ind);
+
+                                                                            if ( inClose )
+                                                                            {
+                                                                                // if in close call, send out all unsent images
+                                                                                if ( pImage != NULL && !sentFlag )
+                                                                                {
+                                                                                    imageSentBuffer_(image_sent_ind) = pImage;
+                                                                                    buf(image_ind) = NULL;
+                                                                                    needTrigger = true;
+                                                                                }
+                                                                            }
+                                                                            else
+                                                                            {
+                                                                                if ( pImage != NULL && !sentFlag )
+                                                                                {
+                                                                                    imageSentBuffer_(image_sent_ind) = pImage;
+                                                                                    // buf(image_ind) = NULL;
+                                                                                }
+                                                                                else
+                                                                                {
+                                                                                    needTrigger = false; // if all images for current under-trigger dimensions are filled, trigger
+                                                                                    break;
+                                                                                }
+                                                                            }
+                                                                        }
+                                                                    }
+                                                                }
+                                                            }
+                                                        }
+                                                    }
+                                                }
+                                            }
+
+                                            if ( needTrigger )
+                                            {
+                                                // if a image has been sent, not sent again
+                                                for ( ave_t=0; ave_t<dim_limit_under_trigger_[7]; ave_t++ )
+                                                {
+                                                    if ( dim_under_trigger_[7] ) image_ind[7] = ave_t;
+                                                    for ( set_t=0; set_t<dim_limit_under_trigger_[6]; set_t++ )
+                                                    {
+                                                        if ( dim_under_trigger_[6] ) image_ind[6] = set_t;
+                                                        for ( rep_t=0; rep_t<dim_limit_under_trigger_[5]; rep_t++ )
+                                                        {
+                                                            if ( dim_under_trigger_[5] ) image_ind[5] = rep_t;
+                                                            for ( phs_t=0; phs_t<dim_limit_under_trigger_[4]; phs_t++ )
+                                                            {
+                                                                if ( dim_under_trigger_[4] ) image_ind[4] = phs_t;
+                                                                for ( con_t=0; con_t<dim_limit_under_trigger_[3]; con_t++ )
+                                                                {
+                                                                    if ( dim_under_trigger_[3] ) image_ind[3] = con_t;
+                                                                    for ( e2_t=0; e2_t<dim_limit_under_trigger_[2]; e2_t++ )
+                                                                    {
+                                                                        if ( dim_under_trigger_[2] ) image_ind[2] = e2_t;
+                                                                        for ( slc_t=0; slc_t<dim_limit_under_trigger_[1]; slc_t++ )
+                                                                        {
+                                                                            if ( dim_under_trigger_[1] ) image_ind[1] = slc_t;
+                                                                            for ( cha_t=0; cha_t<dim_limit_under_trigger_[0]; cha_t++ )
+                                                                            {
+                                                                                if ( dim_under_trigger_[0] ) image_ind[0] = cha_t;
+
+                                                                                bool sentFlag = sentFlagBuf(image_ind);
+                                                                                if ( sentFlag )
+                                                                                {
+                                                                                    imageSentBuffer_(cha_t, slc_t, e2_t, con_t, phs_t, rep_t, set_t) = NULL;
+                                                                                }
+                                                                                else
+                                                                                {
+                                                                                    sentFlagBuf(image_ind) = true;
+                                                                                }
+
+                                                                                buf(image_ind) = NULL;
+                                                                            }
+                                                                        }
+                                                                    }
+                                                                }
+                                                            }
+                                                        }
+                                                    }
+                                                }
+
+                                                GADGET_MSG("--> Accumulator image trigger for [CHA SLC E2 CON PHS REP SET AVE] : [" 
+                                                                                                                            << image_ind[0] << " " 
+                                                                                                                            << image_ind[1] << " " 
+                                                                                                                            << image_ind[2] << " " 
+                                                                                                                            << image_ind[3] << " " 
+                                                                                                                            << image_ind[4] << " " 
+                                                                                                                            << image_ind[5] << " " 
+                                                                                                                            << image_ind[6] << " " 
+                                                                                                                            << image_ind[7] << "]" );
+
+                                                Gadgetron::GadgetContainerMessage<ImageBufferType>* cm1 = new Gadgetron::GadgetContainerMessage<ImageBufferType>();
+                                                ImageBufferType& imgBuf = *(cm1->getObjectPtr());
+                                                imgBuf = imageSentBuffer_;
+                                                imgBuf.delete_data_on_destruct(true);
+
+                                                if (this->next()->putq(cm1) < 0) 
+                                                {
+                                                    cm1->release();
+                                                    return false;
+                                                }
+                                            }
+                                            else
+                                            {
+                                                for ( ii=0; ii<numOfElem; ii++ )
+                                                {
+                                                    imageSentBuffer_(ii) = NULL;
+                                                }
+                                            }
+
+                                            // =================================================
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happens in GtPlusAccumulatorImageTriggerGadget::trigger(...) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+bool GtPlusAccumulatorImageTriggerGadget::storeImage(const ISMRMRD::ImageHeader& imgHeader, const hoNDArray<ValueType>& img, const ISMRMRD::MetaContainer& attrib, ImageBufferType& buf)
+{
+    try
+    {
+        long long cha = attrib.as_long(GTPLUS_CHA, 0);
+
+        size_t slc = imgHeader.slice;
+
+        long long e2 = attrib.as_long(GTPLUS_E2, 0);
+
+        size_t con = imgHeader.contrast;
+        size_t phs = imgHeader.phase;
+        size_t rep = imgHeader.repetition;
+        size_t set = imgHeader.set;
+        size_t ave = imgHeader.average;
+
+        // create image
+        ImageType* storedImage = new ImageType();
+        GADGET_CHECK_RETURN_FALSE(storedImage!=NULL);
+
+        storedImage->from_NDArray(img);
+        storedImage->attrib_ = attrib;
+        GADGET_CHECK_RETURN_FALSE(gtPlus_util_.setMetaAttributesFromImageHeaderISMRMRD(imgHeader, storedImage->attrib_));
+
+        storedImage->attrib_.set(GTPLUS_PASS_IMMEDIATE, (long)0);
+        buf(cha, slc, e2, con, phs, rep, set, ave) = storedImage;
+
+        if ( pass_image_immediate_ )
+        {
+            Gadgetron::GadgetContainerMessage<ImageBufferType>* cm1 = new Gadgetron::GadgetContainerMessage<ImageBufferType>();
+
+            ImageBufferType& imgBuf = *(cm1->getObjectPtr());
+
+            std::vector<size_t> dim2D(num_of_dimensions_, 1);
+            imgBuf.create(dim2D);
+
+            imgBuf(0) = new ImageType();
+            *imgBuf(0) = *storedImage;
+
+            // set the pass_image flag, so next gadget knows
+            imgBuf(0)->attrib_.set(GTPLUS_PASS_IMMEDIATE, (long)1);
+
+            if (this->next()->putq(cm1) < 0) 
+            {
+                cm1->release();
+                return false;
+            }
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happens in GtPlusAccumulatorImageTriggerGadget::storeImage(const ISMRMRD::ImageHeader& imgHeader, const hoNDArray<ValueType>& img, const ISMRMRD::MetaContainer& attrib, ImageBufferType& buf) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+int GtPlusAccumulatorImageTriggerGadget::close(unsigned long flags)
+{
+    GADGET_CONDITION_MSG(true, "GtPlusAccumulatorImageTriggerGadget - close(flags) : " << flags);
+
+    if ( BaseClass::close(flags) != GADGET_OK ) return GADGET_FAIL;
+
+    if ( flags!=0 && !triggered_in_close_ )
+    {
+        triggered_in_close_ = true;
+
+        GADGET_CONDITION_MSG(true, "GtPlusAccumulatorImageTriggerGadget - trigger in close(flags) ... ");
+
+        GADGET_CHECK_RETURN(this->trigger(imageBuffer_, imageSent_, true), GADGET_FAIL);
+        GADGET_CHECK_RETURN(this->trigger(otherBuffer_, otherSent_, true), GADGET_FAIL);
+    }
+
+    return GADGET_OK;
+}
+
+GADGET_FACTORY_DECLARE(GtPlusAccumulatorImageTriggerGadget)
+
+}
diff --git a/gadgets/gtPlus/GtPlusAccumulatorImageTriggerGadget.h b/gadgets/gtPlus/GtPlusAccumulatorImageTriggerGadget.h
new file mode 100644
index 0000000..640a879
--- /dev/null
+++ b/gadgets/gtPlus/GtPlusAccumulatorImageTriggerGadget.h
@@ -0,0 +1,150 @@
+/** \file   GtPlusAccumulatorImageTriggerGadget.h
+    \brief  The GtPlus image accmulation and triggering gadget, used after GtPlus reconstruction for image data
+    \author Hui Xue
+*/
+
+#pragma once
+
+#include <complex>
+#include "GtPlusGadgetExport.h"
+#include "Gadget.h"
+#include "hoNDArray.h"
+#include "hoNDObjectArray.h"
+#include "ismrmrd/ismrmrd.h"
+#include "ismrmrd/meta.h"
+#include "GadgetIsmrmrdReadWrite.h"
+
+#include "hoNDArray_utils.h"
+#include "hoNDImage.h"
+
+#include "GtPlusGadgetImageArray.h"
+
+#include "gtPlusIOAnalyze.h"
+#include "gtPlusISMRMRDReconUtil.h"
+
+namespace Gadgetron
+{
+
+// the dimensionsal order of buffered images
+// [Cha Slice E2 Con Phase Rep Set Ave]
+//   0    1    2   3   4    5   6   7
+#define GT_DIM_NUM_IMAGE 8
+
+class EXPORTGTPLUSGADGET GtPlusAccumulatorImageTriggerGadget : public Gadget3< ISMRMRD::ImageHeader, hoNDArray< std::complex<float> >, ISMRMRD::MetaContainer >
+{
+public:
+    GADGET_DECLARE(GtPlusAccumulatorImageTriggerGadget);
+
+    typedef std::complex<float> ValueType;
+
+    typedef Gadget3< ISMRMRD::ImageHeader, hoNDArray< ValueType >, ISMRMRD::MetaContainer > BaseClass;
+
+    typedef hoNDImage<ValueType, 2> ImageType;
+
+    typedef hoNDObjectArray<ImageType> ImageBufferType;
+    typedef hoNDArray<bool> ImageSentFlagBufferType;
+
+    GtPlusAccumulatorImageTriggerGadget();
+    ~GtPlusAccumulatorImageTriggerGadget();
+
+    virtual int close(unsigned long flags);
+
+    /// parameters to control the triggering
+
+    /// for every dimension, user can define whether it is under the trigger
+    /// if the dimensional index of buffered images reache maximum for all dimensions under the trigger, 
+    /// the image buffer will be send to the next gadget
+    /// e.g., if the PHS dimension limit is 40 and the dimension PHS is under the trigger, all 40 images 
+    /// will be sent to the next gadget as a data buffer
+    /// every buffered images will only  be sent once
+    /// GTPLUS_IMAGE_GFACTOR gfactor images will be sent to the next gadget immediately
+
+    /// dimension limits
+    /// the dimension limits by default is read from the protocol,but 
+    /// user can set them via the input parameters
+    ISMRMRD::EncodingCounters meas_max_idx_;
+
+    /// whether a dimension is under the trigger
+    /// if no dimension is under the trigger, images will be passed to next gadget right away
+    bool cha_trigger_;
+    bool slc_trigger_;
+    bool e2_trigger_;
+    bool con_trigger_;
+    bool phs_trigger_;
+    bool rep_trigger_;
+    bool set_trigger_;
+    bool ave_trigger_;
+
+    /// whether to immediately pass the image to the next gadget
+    bool pass_image_immediate_;
+
+protected:
+
+    virtual int process_config(ACE_Message_Block* mb);
+    virtual int process(GadgetContainerMessage<ISMRMRD::ImageHeader>* m1, GadgetContainerMessage< hoNDArray<ValueType> >* m2, GadgetContainerMessage<ISMRMRD::MetaContainer>* m3);
+
+    // perform the triggering
+    virtual bool trigger(ImageBufferType& buf, ImageSentFlagBufferType& sentFlagBuf, bool inClose);
+
+    // store the incoming image
+    // if pass_image_immediate_==true, the image will be immediately passed to the next gadget with 
+    virtual bool storeImage(const ISMRMRD::ImageHeader& imgHeader, const hoNDArray<ValueType>& img, const ISMRMRD::MetaContainer& attrib, ImageBufferType& buf);
+
+    // set dimensions under trigger
+    void setDimensionsUnderTrigger();
+
+    // buffer for regular images whose data role is GTPLUS_IMAGE_REGULAR
+    ImageBufferType imageBuffer_;
+    ImageSentFlagBufferType imageSent_;
+
+    // buffer for other images whole data role is not GTPLUS_IMAGE_REGULAR and not GTPLUS_IMAGE_GFACTOR
+    ImageBufferType otherBuffer_;
+    ImageSentFlagBufferType otherSent_;
+
+    // buffer sent to next gadget
+    ImageBufferType imageSentBuffer_;
+
+    // number of total dimensions
+    size_t num_of_dimensions_;
+
+    // dimensions under trigger
+    std::vector<bool> dim_under_trigger_;
+    std::vector<size_t> dim_limit_under_trigger_;
+
+    std::vector<bool> dim_not_under_trigger_;
+    std::vector<size_t> dim_limit_not_under_trigger_;
+
+    // whether the next gadget has been triggered in close(...)
+    bool triggered_in_close_;
+
+    // dimension for image kspace
+    std::vector<size_t> dimensions_;
+
+    // encoding matrix size (the real sampled size)
+    size_t matrix_size_encoding_[3];
+
+    // encoding space size (the logic kspace size)
+    size_t space_size_[3];
+
+    // encoding filed of view [mm]
+    float field_of_view_encoding_[3];
+
+    // recon matrix size (the final image size)
+    size_t matrix_size_recon_[3];
+
+    // recon filed of view [mm]
+    float field_of_view_recon_[3];
+
+    int image_counter_;
+
+    int meas_max_ro_;
+    int meas_max_channel_;
+
+    // util for gtplus
+    Gadgetron::gtPlus::gtPlusISMRMRDReconUtil< std::complex<float> > gtPlus_util_;
+
+    // in verbose mode, more info is printed out
+    bool verboseMode_;
+};
+
+}
diff --git a/gadgets/gtPlus/GtPlusAccumulatorPerfAIFGadget.cpp b/gadgets/gtPlus/GtPlusAccumulatorPerfAIFGadget.cpp
deleted file mode 100644
index cf1ab36..0000000
--- a/gadgets/gtPlus/GtPlusAccumulatorPerfAIFGadget.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-#include "GtPlusAccumulatorPerfAIFGadget.h"
-#include "GadgetIsmrmrdReadWrite.h"
-
-namespace Gadgetron
-{
-
-GtPlusAccumulatorPerfAIFGadget::GtPlusAccumulatorPerfAIFGadget() : cur_rep_(0)
-{
-
-}
-
-GtPlusAccumulatorPerfAIFGadget::~GtPlusAccumulatorPerfAIFGadget()
-{
-
-}
-
-int GtPlusAccumulatorPerfAIFGadget::process_config(ACE_Message_Block* mb)
-{
-    return BaseClass::process_config(mb);
-}
-
-int GtPlusAccumulatorPerfAIFGadget::process(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1, 
-        GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2)
-{
-    bool bIsKSpace, bIsRef, bIsNoise, bIsPhaseCorr, bIsReflect, bIsOther;
-    if ( !checkStatus(m1->getObjectPtr()->flags, m1->getObjectPtr()->number_of_samples, bIsKSpace, bIsRef, bIsNoise, bIsPhaseCorr, bIsReflect, bIsOther) )
-    {
-        GADGET_DEBUG1("Failed check readout status\n");
-        return GADGET_FAIL;
-    }
-
-    // Last scan for measurement of the first slice can indicate the number of repetition
-    bool is_last_scan_in_slice = ISMRMRD::FlagBit(ISMRMRD::ACQ_LAST_IN_SLICE).isSet(m1->getObjectPtr()->flags);
-    if ( is_last_scan_in_slice && m1->getObjectPtr()->idx.slice==0 && !bIsOther )
-    {
-        GADGET_MSG("Repetition " << cur_rep_ << " is complete ... ");
-        cur_rep_++;
-    }
-
-    BaseClass::process(m1, m2);
-
-    // if the other data is stored, need to correct the repetition
-    if ( bIsOther )
-    {
-        if ( !otherBuffer_.empty() )
-        {
-            otherBuffer_[otherBuffer_.size()-1].acqHead_.idx.repetition = cur_rep_;
-        }
-    }
-
-    return GADGET_OK;
-}
-
-GADGET_FACTORY_DECLARE(GtPlusAccumulatorPerfAIFGadget)
-}
diff --git a/gadgets/gtPlus/GtPlusAccumulatorPerfAIFGadget.h b/gadgets/gtPlus/GtPlusAccumulatorPerfAIFGadget.h
deleted file mode 100644
index 48bf802..0000000
--- a/gadgets/gtPlus/GtPlusAccumulatorPerfAIFGadget.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#pragma once
-
-#include "GtPlusAccumulatorGadget.h"
-
-namespace Gadgetron
-{
-
-class EXPORTGTPLUS GtPlusAccumulatorPerfAIFGadget : public GtPlusAccumulatorGadget
-{
-public:
-    GADGET_DECLARE(GtPlusAccumulatorPerfAIFGadget);
-
-    typedef GtPlusAccumulatorGadget BaseClass;
-
-    typedef BaseClass::ValueType ValueType;
-    typedef BaseClass::ReadOutBufferType ReadOutBufferType;
-    typedef BaseClass::BufferType BufferType;
-    typedef BaseClass::ReflectBufferType ReflectBufferType;
-
-    GtPlusAccumulatorPerfAIFGadget();
-    ~GtPlusAccumulatorPerfAIFGadget();
-
-protected:
-
-    virtual int process_config(ACE_Message_Block* mb);
-
-    virtual int process(Gadgetron::GadgetContainerMessage< ISMRMRD::AcquisitionHeader >* m1, Gadgetron::GadgetContainerMessage< Gadgetron::hoNDArray< std::complex<float> > > * m2);
-
-    int cur_rep_;
-};
-
-}
diff --git a/gadgets/gtPlus/GtPlusAccumulatorSLCGadget.cpp b/gadgets/gtPlus/GtPlusAccumulatorSLCGadget.cpp
deleted file mode 100644
index 1ce603c..0000000
--- a/gadgets/gtPlus/GtPlusAccumulatorSLCGadget.cpp
+++ /dev/null
@@ -1,403 +0,0 @@
-#include "GtPlusAccumulatorSLCGadget.h"
-#include "GadgetIsmrmrdReadWrite.h"
-
-namespace Gadgetron
-{
-
-GtPlusAccumulatorSLCGadget::GtPlusAccumulatorSLCGadget() : prev_slc_(-1), cur_slc_(-1)
-{
-
-}
-
-GtPlusAccumulatorSLCGadget::~GtPlusAccumulatorSLCGadget()
-{
-
-}
-
-int GtPlusAccumulatorSLCGadget::process_config(ACE_Message_Block* mb)
-{
-    return BaseClass::process_config(mb);
-}
-
-bool GtPlusAccumulatorSLCGadget::
-copyBufferForSLC(BufferType& buf, int slc, BufferType& bufSLC)
-{
-    try
-    {
-        boost::shared_ptr< std::vector<unsigned int> > dims = buf.get_dimensions();
-
-        boost::shared_ptr< std::vector<unsigned int> > dimsSLC = dims;
-        (*dimsSLC)[3] = 1;
-
-        try
-        {
-            bufSLC.create(dimsSLC);
-        }
-        catch(...)
-        {
-            GADGET_DEBUG1("Failed create buffer for SLC \n");
-            return false;
-        }
-
-        // copy the memory over
-        int RO = (*dims)[0];
-        int E1 = (*dims)[1];
-        int CHA = (*dims)[2];
-        int SLC = (*dims)[3];
-        int E2 = (*dims)[4];
-        int CON = (*dims)[5];
-        int PHS = (*dims)[6];
-        int REP = (*dims)[7];
-        int SET = (*dims)[8];
-        int SEG = (*dims)[9];
-
-        int e2, con, phs, rep, set, seg;
-
-        std::vector<unsigned int> pos(10);
-
-        for ( seg=0; seg<SEG; seg++ )
-        {
-            for ( set=0; set<SET; set++ )
-            {
-                for ( rep=0; rep<REP; rep++ )
-                {
-                    for ( phs=0; phs<PHS; phs++ )
-                    {
-                        for ( con=0; con<CON; con++ )
-                        {
-                            for ( e2=0; e2<E2; e2++ )
-                            {
-                                pos[0] = 0;
-                                pos[1] = 0;
-                                pos[2] = 0;
-                                pos[3] = slc;
-                                pos[4] = e2;
-                                pos[5] = con;
-                                pos[6] = phs;
-                                pos[7] = rep;
-                                pos[8] = set;
-                                pos[9] = seg;
-                                int offsetBuffer = buf.calculate_offset(pos);
-
-                                // buffer slc
-                                pos[0] = 0;
-                                pos[1] = 0;
-                                pos[2] = 0;
-                                pos[3] = 0;
-                                pos[4] = e2;
-                                pos[5] = con;
-                                pos[6] = phs;
-                                pos[7] = rep;
-                                pos[8] = set;
-                                pos[9] = seg;
-                                int offsetBufferSLC = bufSLC.calculate_offset(pos);
-
-                                // copy the image content
-                                memcpy(bufSLC.begin()+offsetBufferSLC, buf.begin()+offsetBuffer, sizeof(std::complex<float>)*RO*E1*CHA);
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-    catch(...)
-    {
-        GADGET_DEBUG1("Errors in GtPlusAccumulatorSLCGadget::copyBufferForSLC(...) ... \n");
-        return false;
-    }
-
-    return true;
-}
-
-bool GtPlusAccumulatorSLCGadget::
-copyReflectBufferForSLC(ReflectBufferType& buf, int slc, ReflectBufferType& bufSLC)
-{
-    try
-    {
-        boost::shared_ptr< std::vector<unsigned int> > dims = buf.get_dimensions();
-
-        boost::shared_ptr< std::vector<unsigned int> > dimsSLC = dims;
-        (*dimsSLC)[3] = 1;
-
-        try
-        {
-            bufSLC.create(dimsSLC);
-        }
-        catch(...)
-        {
-            GADGET_DEBUG1("Failed create buffer for SLC \n");
-            return false;
-        }
-
-        // copy the memory over
-        int RO = (*dims)[0];
-        int E1 = (*dims)[1];
-        int CHA = (*dims)[2];
-        int SLC = (*dims)[3];
-        int E2 = (*dims)[4];
-        int CON = (*dims)[5];
-        int PHS = (*dims)[6];
-        int REP = (*dims)[7];
-        int SET = (*dims)[8];
-        int SEG = (*dims)[9];
-
-        int e2, con, phs, rep, set, seg;
-
-        std::vector<unsigned int> pos(10);
-
-        for ( seg=0; seg<SEG; seg++ )
-        {
-            for ( set=0; set<SET; set++ )
-            {
-                for ( rep=0; rep<REP; rep++ )
-                {
-                    for ( phs=0; phs<PHS; phs++ )
-                    {
-                        for ( con=0; con<CON; con++ )
-                        {
-                            for ( e2=0; e2<E2; e2++ )
-                            {
-                                pos[0] = 0;
-                                pos[1] = 0;
-                                pos[2] = 0;
-                                pos[3] = slc;
-                                pos[4] = e2;
-                                pos[5] = con;
-                                pos[6] = phs;
-                                pos[7] = rep;
-                                pos[8] = set;
-                                pos[9] = seg;
-                                int offsetBuffer = buf.calculate_offset(pos);
-
-                                // buffer slc
-                                pos[0] = 0;
-                                pos[1] = 0;
-                                pos[2] = 0;
-                                pos[3] = 0;
-                                pos[4] = e2;
-                                pos[5] = con;
-                                pos[6] = phs;
-                                pos[7] = rep;
-                                pos[8] = set;
-                                pos[9] = seg;
-                                int offsetBufferSLC = bufSLC.calculate_offset(pos);
-
-                                // copy the image content
-                                memcpy(bufSLC.begin()+offsetBufferSLC, buf.begin()+offsetBuffer, sizeof(unsigned short)*RO*E1*CHA);
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-    catch(...)
-    {
-        GADGET_DEBUG1("Errors in GtPlusAccumulatorSLCGadget::copyReflectBufferForSLC(...) ... \n");
-        return false;
-    }
-
-    return true;
-}
-
-bool GtPlusAccumulatorSLCGadget::triggerSLC(int slc)
-{
-    try
-    {
-        GadgetContainerMessage<GadgetMessageImageArray>* cm1 = 
-            new GadgetContainerMessage<GadgetMessageImageArray>();
-
-        GadgetContainerMessage< KSpaceBuffer >* cm2 = 
-            new GadgetContainerMessage< KSpaceBuffer >();
-
-        cm1->cont(cm2);
-
-        // copy the kspace data for this SLC
-        if ( !copyBufferForSLC(kspaceBuffer_->buffer_, slc, cm2->getObjectPtr()->buffer_) ) 
-        {
-            GADGET_DEBUG1("Unable to copyBufferForSLC\n");
-            cm1->release();
-            return false;
-        }
-
-        if ( !copyReflectBufferForSLC(kspaceBuffer_->reflect_, slc, cm2->getObjectPtr()->reflect_) ) 
-        {
-            GADGET_DEBUG1("Unable to copyReflectBufferForSLC\n");
-            cm1->release();
-            return false;
-        }
-
-        // copy the message image array for this SLC
-        GadgetMessageImageArray aMessageArraySLC;
-        messageImage_->extractMessageImageArrayForSLC(slc, aMessageArraySLC);
-        cm1->getObjectPtr()->copy(aMessageArraySLC);
-
-        if (!refBuffer_.empty())
-        {
-            GADGET_MSG("GtPlusAccumulatorSLCGadget - ref signal found : " << refBuffer_.size());
-
-            BufferType refCurr;
-            ReflectBufferType refReflectCurr;
-            if ( !fillBuffer(refBuffer_, refCurr, refReflectCurr) )
-            {
-                GADGET_DEBUG1("fillBuffer(refBuffer_, refCurr, refReflectCurr) failed ... \n");
-                cm1->release();
-                return GADGET_FAIL;
-            }
-
-            if ( !copyBufferForSLC(refCurr, slc, cm2->getObjectPtr()->ref_) ) 
-            {
-                GADGET_DEBUG1("Unable to copyBufferForSLC(refCurr, slc, cm2->getObjectPtr()->ref_)\n");
-                cm1->release();
-                return false;
-            }
-
-            if ( !copyReflectBufferForSLC(refReflectCurr, slc, cm2->getObjectPtr()->refReflect_) ) 
-            {
-                GADGET_DEBUG1("Unable to copyBufferForSLC(refReflectCurr, slc, cm2->getObjectPtr()->refReflect_)\n");
-                cm1->release();
-                return false;
-            }
-        }
-
-        if (!phaseCorrBuffer_.empty())
-        {
-            GADGET_MSG("GtPlusAccumulatorSLCGadget - phase correction signal found : " << phaseCorrBuffer_.size());
-
-            BufferType phsCorrCurr;
-            ReflectBufferType phsCorrReflectCurr;
-            if ( !fillBuffer(phaseCorrBuffer_, phsCorrCurr, phsCorrReflectCurr) )
-            {
-                GADGET_DEBUG1("fillBuffer(phaseCorrBuffer_, phsCorrCurr, phsCorrReflectCurr) failed ... \n");
-                cm1->release();
-                return GADGET_FAIL;
-            }
-
-            if ( !copyBufferForSLC(phsCorrCurr, slc, cm2->getObjectPtr()->phaseCorr_) ) 
-            {
-                GADGET_DEBUG1("Unable to copyBufferForSLC(phsCorrCurr, slc, cm2->getObjectPtr()->phaseCorr_)\n");
-                cm1->release();
-                return false;
-            }
-
-            if ( !copyReflectBufferForSLC(phsCorrReflectCurr, slc, cm2->getObjectPtr()->phaseCorrReflect_) ) 
-            {
-                GADGET_DEBUG1("Unable to copyBufferForSLC(phsCorrReflectCurr, slc, cm2->getObjectPtr()->phaseCorrReflect_)\n");
-                cm1->release();
-                return false;
-            }
-        }
-
-        if (!noiseBuffer_.empty())
-        {
-            GADGET_MSG("GtPlusAccumulatorSLCGadget - noise signal found : " << noiseBuffer_.size());
-
-            BufferType noiseCurr;
-            ReflectBufferType tmpBuf;
-            if ( !fillBuffer(noiseBuffer_, noiseCurr, tmpBuf) )
-            {
-                GADGET_DEBUG1("fillBuffer(noiseBuffer_, noiseCurr, tmpBuf) failed ... \n");
-                cm1->release();
-                return false;
-            }
-
-            if ( !copyBufferForSLC(noiseCurr, slc, cm2->getObjectPtr()->noise_) ) 
-            {
-                GADGET_DEBUG1("Unable to copyBufferForSLC(noiseCurr, slc, cm2->getObjectPtr()->noise_)\n");
-                cm1->release();
-                return false;
-            }
-        }
-
-        if (!otherBuffer_.empty())
-        {
-            GADGET_MSG("GtPlusAccumulatorSLCGadget - other signal found : " << otherBuffer_.size());
-
-            BufferType otherCurr;
-            ReflectBufferType tmpBuf;
-            if ( !fillBuffer(otherBuffer_, otherCurr, tmpBuf) )
-            {
-                GADGET_DEBUG1("fillBuffer(otherBuffer_, otherCurr, tmpBuf) failed ... \n");
-                cm1->release();
-                return false;
-            }
-
-            if ( !copyBufferForSLC(otherCurr, slc, cm2->getObjectPtr()->other_) ) 
-            {
-                GADGET_DEBUG1("Unable to copyBufferForSLC(otherCurr, slc, cm2->getObjectPtr()->other_)\n");
-                cm1->release();
-                return false;
-            }
-        }
-
-        // send to next gadget
-        if (this->next()->putq(cm1) < 0) 
-        {
-            return false;
-        }
-    }
-    catch(...)
-    {
-        GADGET_DEBUG1("Errors in triggerSLC(slc) ... \n");
-        return false;
-    }
-
-    return true;
-}
-
-int GtPlusAccumulatorSLCGadget::process(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1, 
-        GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2)
-{
-    cur_slc_ = m1->getObjectPtr()->idx.slice;
-
-    BaseClass::process(m1, m2);
-
-    if ( prev_slc_==-1 )
-    {
-        prev_slc_ = cur_slc_;
-    }
-
-   // if a new slice comes, it indicates the previous one is complete and can be sent out
-    if ( cur_slc_!=prev_slc_ )
-    {
-        GADGET_MSG("Slice " << prev_slc_ << " is complete ... ");
-
-        // send out prev slice
-        GADGET_MSG("GtPlusAccumulatorSLCGadget - trigger next gadget for SLC " << prev_slc_ << " ... ");
-        if ( !triggerSLC(prev_slc_) ) 
-        {
-            GADGET_DEBUG1("Unable to trigger this slc ... \n");
-            return GADGET_FAIL;
-        }
-
-        prev_slc_ = cur_slc_;
-    }
-
-    return GADGET_OK;
-}
-
-int GtPlusAccumulatorSLCGadget::close(unsigned long flags)
-{
-    // the last slice is still not sent out yet
-    if ( !triggered_ )
-    {
-        GADGET_MSG("GtPlusAccumulatorSLCGadget - trigger next gadget for SLC " << cur_slc_ << " ... ");
-
-        if ( !triggerSLC(cur_slc_) ) 
-        {
-            GADGET_DEBUG1("Unable to trigger this slc ... \n");
-            return GADGET_FAIL;
-        }
-
-        triggered_ = true;
-    }
-
-    // the base class shall do nothing
-    triggered_ = true;
-    return BaseClass::close(flags);
-}
-
-GADGET_FACTORY_DECLARE(GtPlusAccumulatorSLCGadget)
-
-}
diff --git a/gadgets/gtPlus/GtPlusAccumulatorSLCGadget.h b/gadgets/gtPlus/GtPlusAccumulatorSLCGadget.h
deleted file mode 100644
index dd9178c..0000000
--- a/gadgets/gtPlus/GtPlusAccumulatorSLCGadget.h
+++ /dev/null
@@ -1,40 +0,0 @@
-#pragma once
-
-#include "GtPlusAccumulatorGadget.h"
-
-namespace Gadgetron
-{
-
-class EXPORTGTPLUS GtPlusAccumulatorSLCGadget : public GtPlusAccumulatorGadget
-{
-public:
-    GADGET_DECLARE(GtPlusAccumulatorSLCGadget);
-
-    typedef GtPlusAccumulatorGadget BaseClass;
-
-    typedef BaseClass::ValueType ValueType;
-    typedef BaseClass::ReadOutBufferType ReadOutBufferType;
-    typedef BaseClass::BufferType BufferType;
-    typedef BaseClass::ReflectBufferType ReflectBufferType;
-
-    GtPlusAccumulatorSLCGadget();
-    ~GtPlusAccumulatorSLCGadget();
-
-    virtual int close(unsigned long flags);
-
-protected:
-
-    virtual int process_config(ACE_Message_Block* mb);
-
-    virtual bool copyBufferForSLC(BufferType& buf, int slc, BufferType& bufSLC);
-    virtual bool copyReflectBufferForSLC(ReflectBufferType& buf, int slc, ReflectBufferType& bufSLC);
-
-    virtual bool triggerSLC(int slc);
-
-    virtual int process(Gadgetron::GadgetContainerMessage< ISMRMRD::AcquisitionHeader >* m1, Gadgetron::GadgetContainerMessage< Gadgetron::hoNDArray< std::complex<float> > > * m2);
-
-    int prev_slc_;
-    int cur_slc_;
-};
-
-}
diff --git a/gadgets/gtPlus/GtPlusAccumulatorWorkOrderTriggerGadget.cpp b/gadgets/gtPlus/GtPlusAccumulatorWorkOrderTriggerGadget.cpp
index 79aef32..c4c2558 100644
--- a/gadgets/gtPlus/GtPlusAccumulatorWorkOrderTriggerGadget.cpp
+++ b/gadgets/gtPlus/GtPlusAccumulatorWorkOrderTriggerGadget.cpp
@@ -1,19 +1,34 @@
 #include "GtPlusAccumulatorWorkOrderTriggerGadget.h"
+#include "GtPlusReconGadgetUtil.h"
 
 using namespace Gadgetron::gtPlus;
 
 namespace Gadgetron
 {
 
-GtPlusAccumulatorWorkOrderTriggerGadget::GtPlusAccumulatorWorkOrderTriggerGadget()
-: image_counter_(0), image_series_(100), first_kspace_scan_(true), triggered_in_close_(false), triggered_in_process_(false), triggered_in_process_last_acq_(false), 
-    prev_dim1_(-1), curr_dim1_(-1), prev_dim2_(-1), curr_dim2_(-1), count_dim1_(0), verboseMode_(false), other_kspace_matching_Dim_(DIM_NONE)
+GtPlusAccumulatorWorkOrderTriggerGadget::GtPlusAccumulatorWorkOrderTriggerGadget() : 
+                                            image_counter_(0), image_series_(100), first_kspace_scan_(true), 
+                                            triggered_in_close_(false), triggered_in_process_(false), triggered_in_process_last_acq_(false), 
+                                            triggered_in_process_by_numOfKSpace_triggerDim1_(false), 
+                                            prev_dim1_(-1), curr_dim1_(-1), 
+                                            prev_dim2_(-1), curr_dim2_(-1), 
+                                            count_dim1_(0), 
+                                            last_acq_arrived_(false), 
+                                            verboseMode_(false), 
+                                            other_kspace_matching_Dim_(DIM_NONE)
 {
     space_matrix_offset_E1_ = 0;
     space_matrix_offset_E2_ = 0;
 
     gtPlusISMRMRDReconUtil<ValueType>().clearAcquisitionHeaderISMRMRD(prev_acq_header_);
     memset(&meas_max_idx_ref_, 0, sizeof(ISMRMRD::EncodingCounters));
+
+    ind_time_stamp_.resize(GT_DIM_NUM, 0);
+
+    embedded_ref_lines_E1_ = 0;
+    embedded_ref_lines_E2_ = 0;
+
+    timeStampResolution_ = 0.0025f;
 }
 
 GtPlusAccumulatorWorkOrderTriggerGadget::~GtPlusAccumulatorWorkOrderTriggerGadget()
@@ -47,307 +62,249 @@ int GtPlusAccumulatorWorkOrderTriggerGadget::process_config(ACE_Message_Block* m
 
     verboseMode_ = this->get_bool_value("verboseMode");
 
+    timeStampResolution_ = (float)this->get_double_value("timeStampResolution");
+    if ( timeStampResolution_ < FLT_EPSILON ) timeStampResolution_ = 0.0025f;
+    GADGET_CONDITION_MSG(verboseMode_, "timeStampResolution_ is " << timeStampResolution_);
+
     // ---------------------------------------------------------------------------------------------------------
     // pass the xml file
-    boost::shared_ptr<ISMRMRD::ismrmrdHeader> cfg = parseIsmrmrdXMLHeader(std::string(mb->rd_ptr()));
+    ISMRMRD::IsmrmrdHeader h;
+    try {
+      deserialize(mb->rd_ptr(),h);
+    } catch (...) {
+      GADGET_DEBUG1("Error parsing ISMRMRD Header");
+      throw;
+      return GADGET_FAIL;
+    }
+
 
-    // seq object
-    ISMRMRD::ismrmrdHeader::encoding_sequence e_seq = cfg->encoding();
-    if (e_seq.size() != 1)
+    // This only supports two encoding spaces where the recon_space is the same size
+    // e.g. Parallel imaging reference scan collected with GRE and data with EPI
+    if (h.encoding.size() > 2)
     {
-        GADGET_DEBUG2("Number of encoding spaces: %d\n", e_seq.size());
-        GADGET_DEBUG1("This simple GtPlusAccumulatorWorkOrderTriggerGadget only supports one encoding space\n");
+        GADGET_DEBUG2("Number of encoding spaces: %d\n", h.encoding.size());
+        GADGET_DEBUG1("This GtPlusAccumulatorWorkOrderTriggerGadget only supports two encoding space\n");
         return GADGET_FAIL;
+    } 
+    else if (h.encoding.size() == 2)
+    {
+        if (! ((h.encoding[0].reconSpace.matrixSize.x == h.encoding[1].reconSpace.matrixSize.x) && 
+            (h.encoding[0].reconSpace.matrixSize.y == h.encoding[1].reconSpace.matrixSize.y) && 
+            (h.encoding[0].reconSpace.matrixSize.z == h.encoding[1].reconSpace.matrixSize.z) && 
+            (h.encoding[0].reconSpace.fieldOfView_mm.x == h.encoding[1].reconSpace.fieldOfView_mm.x) &&
+            (h.encoding[0].reconSpace.fieldOfView_mm.y == h.encoding[1].reconSpace.fieldOfView_mm.y) &&
+            (h.encoding[0].reconSpace.fieldOfView_mm.z == h.encoding[1].reconSpace.fieldOfView_mm.z)) )
+        {
+            GADGET_DEBUG2("Number of encoding spaces: %d\n", h.encoding.size());
+            GADGET_DEBUG1("This GtPlusAccumulatorWorkOrderTriggerGadget only supports two encoding spaces with identical recon spaces.\n");
+            return GADGET_FAIL;
+        }
     }
 
     // find out the PAT mode
-    ISMRMRD::ismrmrdHeader::parallelImaging_optional p_imaging_type = cfg->parallelImaging();
-    ISMRMRD::parallelImagingType p_imaging = *p_imaging_type;
+    if (!h.encoding[0].parallelImaging)
+    {
+      GADGET_DEBUG1("Parallel Imaging section not found in header");
+      return GADGET_FAIL;
+    }
+
+    ISMRMRD::ParallelImaging p_imaging = *h.encoding[0].parallelImaging;
+
+    workOrder_.acceFactorE1_ = (double)(p_imaging.accelerationFactor.kspace_encoding_step_1);
+    workOrder_.acceFactorE2_ = (double)(p_imaging.accelerationFactor.kspace_encoding_step_2);
 
-    workOrder_.acceFactorE1_ = (size_t)(p_imaging.accelerationFactor().kspace_encoding_step_1());
-    workOrder_.acceFactorE2_ = (size_t)(p_imaging.accelerationFactor().kspace_encoding_step_2());
     GADGET_CONDITION_MSG(verboseMode_, "acceFactorE1_ is " << workOrder_.acceFactorE1_);
     GADGET_CONDITION_MSG(verboseMode_, "acceFactorE2_ is " << workOrder_.acceFactorE2_);
 
-    ISMRMRD::calibrationModeType calib = *(p_imaging.calibrationMode());
-    if ( calib == ISMRMRD::calibrationModeType::interleaved )
+    workOrder_.InterleaveDim_ = Gadgetron::gtPlus::DIM_NONE;
+
+    if ( !p_imaging.calibrationMode.is_present() )
+    {
+        GADGET_DEBUG1("Parallel Imaging calibrationMode not found in header");
+        return GADGET_FAIL;
+    }
+
+    std::string calib = *p_imaging.calibrationMode;
+    if ( calib.compare("interleaved") == 0 )
     {
         workOrder_.CalibMode_ = Gadgetron::gtPlus::ISMRMRD_interleaved;
         GADGET_CONDITION_MSG(verboseMode_, "Calibration mode is interleaved");
 
-        if ( p_imaging.interleavingDimension().present() )
+        if ( p_imaging.interleavingDimension )
         {
-            if ( *(p_imaging.interleavingDimension()) == ISMRMRD::interleavingDimensionType::phase )
+            if ( p_imaging.interleavingDimension->compare("phase") == 0 )
             {
                 workOrder_.InterleaveDim_ = Gadgetron::gtPlus::DIM_Phase;
             }
-
-            if ( *(p_imaging.interleavingDimension()) == ISMRMRD::interleavingDimensionType::repetition )
+            else if ( p_imaging.interleavingDimension->compare("repetition") == 0 )
             {
                 workOrder_.InterleaveDim_ = Gadgetron::gtPlus::DIM_Repetition;
             }
-
-            if ( *(p_imaging.interleavingDimension()) == ISMRMRD::interleavingDimensionType::average )
+            else if ( p_imaging.interleavingDimension->compare("average") == 0 )
             {
                 workOrder_.InterleaveDim_ = Gadgetron::gtPlus::DIM_Average;
             }
-
-            if ( *(p_imaging.interleavingDimension()) == ISMRMRD::interleavingDimensionType::contrast )
+            else if ( p_imaging.interleavingDimension->compare("contrast") == 0 )
             {
                 workOrder_.InterleaveDim_ = Gadgetron::gtPlus::DIM_Contrast;
             }
-
-            if ( *(p_imaging.interleavingDimension()) == ISMRMRD::interleavingDimensionType::other )
+            else if ( p_imaging.interleavingDimension->compare("other") == 0 )
             {
                 workOrder_.InterleaveDim_ = Gadgetron::gtPlus::DIM_other1;
             }
-
+            else
+            {
+                GADGET_DEBUG1("Unknown interleaving dimension. Bailing out");
+                return GADGET_FAIL;
+            }
             GADGET_CONDITION_MSG(verboseMode_, "InterleaveDim is " << gtPlus_util_.getISMRMRDDimName(workOrder_.InterleaveDim_));
         }
     }
-
-    if ( calib == ISMRMRD::calibrationModeType::embedded )
+    else if ( calib.compare("embedded") == 0 )
     {
         workOrder_.CalibMode_ = Gadgetron::gtPlus::ISMRMRD_embedded;
         GADGET_CONDITION_MSG(verboseMode_, "Calibration mode is embedded");
     }
-
-    if ( calib == ISMRMRD::calibrationModeType::separate )
+    else if ( calib.compare("separate") == 0 )
     {
         workOrder_.CalibMode_ = Gadgetron::gtPlus::ISMRMRD_separate;
         GADGET_CONDITION_MSG(verboseMode_, "Calibration mode is separate");
     }
-
-    if ( calib == ISMRMRD::calibrationModeType::external )
+    else if ( calib.compare("external") == 0 )
     {
         workOrder_.CalibMode_ = Gadgetron::gtPlus::ISMRMRD_external;
     }
-
-    if ( calib == ISMRMRD::calibrationModeType::other && workOrder_.acceFactorE1_==1 && workOrder_.acceFactorE2_==1 )
+    else if ( (calib.compare("other") == 0) && workOrder_.acceFactorE1_==1 && workOrder_.acceFactorE2_==1 )
     {
-        // workOrder_.CalibMode_ = Gadgetron::gtPlus::ISMRMRD_noacceleration;
-        workOrder_.CalibMode_ = Gadgetron::gtPlus::ISMRMRD_interleaved;
-        workOrder_.acceFactorE1_=2;
-        workOrder_.InterleaveDim_ = Gadgetron::gtPlus::DIM_Phase;
+        workOrder_.CalibMode_ = Gadgetron::gtPlus::ISMRMRD_noacceleration;
+        workOrder_.acceFactorE1_=1;
     }
-
-    if ( calib == ISMRMRD::calibrationModeType::other && (workOrder_.acceFactorE1_>1 || workOrder_.acceFactorE2_>1) )
+    else if ( (calib.compare("other") == 0) &&  (workOrder_.acceFactorE1_>1 || workOrder_.acceFactorE2_>1) )
     {
-        //workOrder_.CalibMode_ = Gadgetron::gtPlus::ISMRMRD_other;
         workOrder_.CalibMode_ = Gadgetron::gtPlus::ISMRMRD_interleaved;
         workOrder_.acceFactorE1_=2;
         workOrder_.InterleaveDim_ = Gadgetron::gtPlus::DIM_Phase;
     }
-
+    else
+    {
+        GADGET_DEBUG1("Failed to process parallel imaging calibration mode");
+        return GADGET_FAIL;
+    }
+    
     // ---------------------------------------------------------------------------------------------------------
 
     // find out the encoding space 
-    ISMRMRD::encodingSpaceType e_space = (*e_seq.begin()).encodedSpace();
-    ISMRMRD::encodingSpaceType r_space = (*e_seq.begin()).reconSpace();
-    ISMRMRD::encodingLimitsType e_limits = (*e_seq.begin()).encodingLimits();
 
-    matrix_size_encoding_[0] = e_space.matrixSize().x();
-    matrix_size_encoding_[1] = e_space.matrixSize().y();
-    matrix_size_encoding_[2] = e_space.matrixSize().z();
-    GADGET_CONDITION_MSG(verboseMode_, "Encoding matrix size: " << matrix_size_encoding_[0] << " " << matrix_size_encoding_[1] << " " << matrix_size_encoding_[2]);
+    findMatrixSizeEncoding(h, matrix_size_encoding_);
+    findFOVEncoding(h, field_of_view_encoding_);
 
-    field_of_view_encoding_[0] = e_space.fieldOfView_mm().x();
-    field_of_view_encoding_[1] = e_space.fieldOfView_mm().y();
-    field_of_view_encoding_[2] = e_space.fieldOfView_mm().z();
-    GADGET_CONDITION_MSG(verboseMode_, "Encoding field_of_view : " << field_of_view_encoding_[0] << " " << field_of_view_encoding_[1] << " " << field_of_view_encoding_[2]);
+    findMatrixSizeRecon(h, matrix_size_recon_);
+    findFOVRecon(h, field_of_view_recon_);
 
-    // find the recon space
-    matrix_size_recon_[0] = r_space.matrixSize().x();
-    matrix_size_recon_[1] = r_space.matrixSize().y();
-    matrix_size_recon_[2] = r_space.matrixSize().z();
+    GADGET_CONDITION_MSG(verboseMode_, "Encoding matrix size: " << matrix_size_encoding_[0] << " " << matrix_size_encoding_[1] << " " << matrix_size_encoding_[2]);
+    GADGET_CONDITION_MSG(verboseMode_, "Encoding field_of_view : " << field_of_view_encoding_[0] << " " << field_of_view_encoding_[1] << " " << field_of_view_encoding_[2]);
     GADGET_CONDITION_MSG(verboseMode_, "Recon matrix size : " << matrix_size_recon_[0] << " " << matrix_size_recon_[1] << " " << matrix_size_recon_[2]);
-
-    field_of_view_recon_[0] = r_space.fieldOfView_mm().x();
-    field_of_view_recon_[1] = r_space.fieldOfView_mm().y();
-    field_of_view_recon_[2] = r_space.fieldOfView_mm().z();
     GADGET_CONDITION_MSG(verboseMode_, "Recon field_of_view :  " << field_of_view_recon_[0] << " " << field_of_view_recon_[1] << " " << field_of_view_recon_[2]);
 
     // ---------------------------------------------------------------------------------------------------------
     // handle partial fourier
-    //workOrder_.kSpaceCenterEncode1_ = e_limits.kspace_encoding_step_1().get().center();
-    //GADGET_CONDITION_MSG(verboseMode_, "kSpaceCenterEncode1_ is " << workOrder_.kSpaceCenterEncode1_);
 
-    //workOrder_.kSpaceCenterEncode2_ = e_limits.kspace_encoding_step_2().get().center();
-    //GADGET_CONDITION_MSG(verboseMode_, "kSpaceCenterEncode2_ is " << workOrder_.kSpaceCenterEncode2_);
+    ISMRMRD::EncodingSpace e_space = h.encoding[0].encodedSpace;
+    ISMRMRD::EncodingLimits e_limits = h.encoding[0].encodingLimits;
 
-    workOrder_.kSpaceMaxEncode1_ = e_limits.kspace_encoding_step_1().get().maximum();
-    GADGET_CONDITION_MSG(verboseMode_, "kSpaceMaxEncode1_ is " << workOrder_.kSpaceMaxEncode1_);
+    workOrder_.kSpaceMaxEncode1_ = matrix_size_encoding_[1]-1;
+    GADGET_CONDITION_MSG(verboseMode_, "matrix size kSpaceMaxEncode1_ is " << workOrder_.kSpaceMaxEncode1_);
 
-    workOrder_.kSpaceMaxEncode2_ = e_limits.kspace_encoding_step_2().get().maximum();
-    GADGET_CONDITION_MSG(verboseMode_, "kSpaceMaxEncode2_ is " << workOrder_.kSpaceMaxEncode2_);
+    workOrder_.kSpaceMaxEncode2_ = matrix_size_encoding_[2]-1;
+    GADGET_CONDITION_MSG(verboseMode_, "matrix size kSpaceMaxEncode2_ is " << workOrder_.kSpaceMaxEncode2_);
 
     space_size_[1] = workOrder_.kSpaceMaxEncode1_+1;
     space_size_[2] = workOrder_.kSpaceMaxEncode2_+1;
 
-    // if partial fourier or asymmetric echo is used, correct the kSpaceCenter
-    //if ( space_size_[1]-matrix_size_encoding_[1] > workOrder_.acceFactorE1_ )
-    //{
-    //    GADGET_CONDITION_MSG(verboseMode_, "Partial fourier along E1 ... ");
-    //    //if ( GT_ABS(matrix_size_encoding_[1]/workOrder_.acceFactorE1_ - std::floor(matrix_size_encoding_[1]/workOrder_.acceFactorE1_)) > FLT_EPSILON )
-    //    //{
-    //    //    GADGET_WARN_MSG("matrix_size_[1] is not multiplied by acceFactorE1_ ... ");
-    //    //    matrix_size_encoding_[1] = (std::floor(matrix_size_encoding_[1]/workOrder_.acceFactorE1_)+1)*workOrder_.acceFactorE1_;
-    //    //}
-
-    //    if ( 2*workOrder_.kSpaceCenterEncode1_ > (matrix_size_encoding_[1]+1) )
-    //    {
-    //        space_matrix_offset_E1_ = 0;
+    if ( (!e_limits.kspace_encoding_step_1) || (!e_limits.kspace_encoding_step_2))
+    {
+        GADGET_DEBUG1("kspace_encoding_step_1 and kspace_encoding_step_2 limits are required. Not found. Bailing out.");
+        return GADGET_FAIL;
+    }
 
-    //        workOrder_.start_E2_ = 0;
-    //        workOrder_.end_E2_ = matrix_size_encoding_[1];
-    //    }
-    //    else
-    //    {
-    //        space_matrix_offset_E1_ = space_size_[1] - matrix_size_encoding_[1];
+    max_sampled_E1_ = e_limits.kspace_encoding_step_1->maximum;
+    max_sampled_E2_ = e_limits.kspace_encoding_step_2->maximum;
 
-    //        workOrder_.start_E1_ = space_matrix_offset_E1_;
-    //        workOrder_.end_E1_ = workOrder_.kSpaceMaxEncode1_;
-    //    }
-    //}
-    //else
-    //{
-    //    space_matrix_offset_E1_ = 0;
-    //}
+    GADGET_CONDITION_MSG(verboseMode_, "max_sampled_E1_ is " << max_sampled_E1_);
+    GADGET_CONDITION_MSG(verboseMode_, "max_sampled_E2_ is " << max_sampled_E2_);
 
-    //if ( space_size_[2]-matrix_size_encoding_[2] > workOrder_.acceFactorE2_ )
-    //{
-    //    GADGET_CONDITION_MSG(verboseMode_, "Partial fourier along E2 ... ");
-    //    //if ( GT_ABS(matrix_size_encoding_[2]/workOrder_.acceFactorE2_ - std::floor(matrix_size_encoding_[2]/workOrder_.acceFactorE2_)) > FLT_EPSILON )
-    //    //{
-    //    //    GADGET_WARN_MSG("matrix_size_[2] is not multiplied by acceFactorE2_ ... ");
-    //    //    matrix_size_[2] = (std::floor(matrix_size_[2]/workOrder_.acceFactorE2_)+1)*workOrder_.acceFactorE2_;
-    //    //}
-
-    //    if ( 2*workOrder_.kSpaceCenterEncode2_ > (matrix_size_encoding_[2]+1) )
-    //    {
-    //        space_matrix_offset_E2_ = 0;
+    center_line_E1_ = e_limits.kspace_encoding_step_1->center;
+    center_line_E2_ = e_limits.kspace_encoding_step_2->center;
 
-    //        workOrder_.start_E2_ = 0;
-    //        workOrder_.end_E2_ = matrix_size_encoding_[2];
-    //    }
-    //    else
-    //    {
-    //        space_matrix_offset_E2_ = space_size_[2] - matrix_size_encoding_[2];
+    GADGET_CONDITION_MSG(verboseMode_, "center_line_E1_ is " << center_line_E1_);
+    GADGET_CONDITION_MSG(verboseMode_, "center_line_E2_ is " << center_line_E2_);
 
-    //        workOrder_.start_E2_ = space_matrix_offset_E2_;
-    //        workOrder_.end_E2_ = workOrder_.kSpaceMaxEncode2_;
-    //    }
-    //}
-    //else
-    //{
-    //    space_matrix_offset_E2_ = 0;
-    //}
-
-    // ---------------------------------------------------------------------------------------------------------
-    // encoding limits
+    workOrder_.kSpaceCenterEncode1_ = center_line_E1_;
+    GADGET_CONDITION_MSG(verboseMode_, "kSpaceCenterEncode1_ is " << workOrder_.kSpaceCenterEncode1_);
 
-    meas_max_ro_ = e_space.matrixSize().x()/2;
+    workOrder_.kSpaceCenterEncode2_ = center_line_E2_;
+    GADGET_CONDITION_MSG(verboseMode_, "kSpaceCenterEncode2_ is " << workOrder_.kSpaceCenterEncode2_);
 
-    if (e_limits.kspace_encoding_step_1().present()) 
-    {
-        meas_max_idx_.kspace_encode_step_1 = e_limits.kspace_encoding_step_1().get().maximum();
-    }
-    else
+    // ---------------------------------------------------------------------------------------------------------
+    // handle retro-gating
+    if (h.userParameters)
     {
-        meas_max_idx_.kspace_encode_step_1 = 0;
-        std::cout << "Setting number of kspace_encode_step_1 to 0" << std::endl;
-        return GADGET_FAIL;
+        for (std::vector<ISMRMRD::UserParameterLong>::const_iterator  i = h.userParameters->userParameterLong.begin (); i != h.userParameters->userParameterLong.end(); ++i)
+        {
+            if (std::strcmp(i->name.c_str(),"RetroGatedImages") == 0)
+            {
+                workOrder_.retro_gated_images_ = i->value;
+            }
+            else if ( std::strcmp(i->name.c_str(),"RetroGatedSegmentSize") == 0 )
+            {
+                workOrder_.retro_gated_segment_size_ = i->value;
+            }
+            else if ( std::strcmp(i->name.c_str(),"EmbeddedRefLinesE1") == 0 )
+            {
+                embedded_ref_lines_E1_ = i->value;
+            }
+            else if ( std::strcmp(i->name.c_str(),"EmbeddedRefLinesE2") == 0 )
+            {
+                embedded_ref_lines_E2_ = i->value;
+            }
+        }
     }
 
-    space_size_[0] = meas_max_ro_;
+    // ---------------------------------------------------------------------------------------------------------
+    // encoding limits
 
-    if (e_limits.set().present())
+    if ( GT_ABS(2*field_of_view_recon_[0]-field_of_view_encoding_[0]) < 1.0 )
     {
-        if ( e_limits.set().get().maximum() > 0 )
-            meas_max_idx_.set = e_limits.set().get().maximum() - 1;
-        else
-            meas_max_idx_.set = 0;
-
-        if ( meas_max_idx_.set < 0 ) meas_max_idx_.set = 0;
+        meas_max_ro_ = e_space.matrixSize.x/2;
     }
     else
     {
-        meas_max_idx_.set = 0;
+        meas_max_ro_ = e_space.matrixSize.x;
     }
+    space_size_[0] = meas_max_ro_;
 
-    if (e_limits.phase().present())
-    {
-        if ( e_limits.phase().get().maximum() > 0 )
-            meas_max_idx_.phase = e_limits.phase().get().maximum()-1;
-        else
-            meas_max_idx_.phase = 0;
+    meas_max_idx_.kspace_encode_step_1 = (uint16_t)matrix_size_encoding_[1]-1;
 
-        if ( meas_max_idx_.phase < 0 ) meas_max_idx_.phase = 0;
-    }
-    else
-    {
-        meas_max_idx_.phase = 0;
-    }
+    meas_max_idx_.set = (e_limits.set && (e_limits.set->maximum>0)) ? e_limits.set->maximum : 0;
+    meas_max_idx_.phase = (e_limits.phase && (e_limits.phase->maximum>0)) ? e_limits.phase->maximum : 0;
 
-    if (e_limits.kspace_encoding_step_2().present())
-    {
-        meas_max_idx_.kspace_encode_step_2 = e_limits.kspace_encoding_step_2().get().maximum();
-    }
-    else
+    // if it is retro-gating
+    if ( workOrder_.retro_gated_images_ > 0 )
     {
-        meas_max_idx_.kspace_encode_step_2 = 0;
+        meas_max_idx_.phase = (uint16_t)(workOrder_.retro_gated_images_ - 1);
     }
 
-    if (e_limits.contrast().present())
-    {
-        if ( e_limits.contrast().get().maximum() > 0 )
-            meas_max_idx_.contrast = e_limits.contrast().get().maximum()-1;
-        else
-            meas_max_idx_.contrast = 0;
+    meas_max_idx_.kspace_encode_step_2 = (uint16_t)matrix_size_encoding_[2]-1;
 
-        if ( meas_max_idx_.contrast < 0 ) meas_max_idx_.contrast = 0;
-    }
-    else
-    {
-        meas_max_idx_.contrast = 0;
-    }
+    meas_max_idx_.contrast = (e_limits.contrast && (e_limits.contrast->maximum > 0)) ? e_limits.contrast->maximum : 0;
 
-    if (e_limits.slice().present())
-    {
-        meas_max_idx_.slice = e_limits.slice().get().maximum();
-    }
-    else
-    {
-        meas_max_idx_.slice = 0;
-    }
+    meas_max_idx_.slice = (e_limits.slice && (e_limits.slice->maximum > 0)) ? e_limits.slice->maximum : 0;
 
-    if (e_limits.repetition().present())
-    {
-        meas_max_idx_.repetition = e_limits.repetition().get().maximum();
-    }
-    else
-    {
-        meas_max_idx_.repetition = 0;
-    }
+    meas_max_idx_.repetition = e_limits.repetition ? e_limits.repetition->maximum : 0;
 
-    if (e_limits.average().present())
-    {
-        meas_max_idx_.average = e_limits.average().get().maximum()-1;
-    }
-    else
-    {
-        meas_max_idx_.average = 0;
-    }
+    meas_max_idx_.average = e_limits.average ? e_limits.average->maximum : 0;
 
-    if (e_limits.segment().present())
-    {
-        // meas_max_idx_.segment = e_limits.segment().get().maximum()-1;
-        meas_max_idx_.segment = 0;
-    }
-    else
-    {
-        meas_max_idx_.segment = 0;
-    }
+    meas_max_idx_.segment = 0;
 
     return GADGET_OK;
 }
@@ -372,7 +329,10 @@ int GtPlusAccumulatorWorkOrderTriggerGadget::process(GadgetContainerMessage<ISMR
     }
 
     // combine the segmentes
-    m1->getObjectPtr()->idx.segment = 0;
+    //if ( workOrder_.retro_gated_images_ == 0 )
+    //{
+        m1->getObjectPtr()->idx.segment = 0;
+    //}
 
     if ( (bIsNavigator || bIsRTFeedback || bIsHPFeedback || bIsDummyScan) && !bIsKSpace && !bIsRef )
     {
@@ -393,55 +353,65 @@ int GtPlusAccumulatorWorkOrderTriggerGadget::process(GadgetContainerMessage<ISMR
             workOrder_.kSpaceMaxRO_ = m1->getObjectPtr()->number_of_samples;
         }
 
-        workOrder_.kSpaceCenterEncode1_ = m1->getObjectPtr()->idx.user[5];
-        GADGET_CONDITION_MSG(verboseMode_, "kSpaceCenterEncode1_ is " << workOrder_.kSpaceCenterEncode1_);
-
-        workOrder_.kSpaceCenterEncode2_ = m1->getObjectPtr()->idx.user[6];
-        GADGET_CONDITION_MSG(verboseMode_, "kSpaceCenterEncode2_ is " << workOrder_.kSpaceCenterEncode2_);
-
         // if partial fourier or asymmetric echo is used, correct the kSpaceCenter
-        if ( space_size_[1]-matrix_size_encoding_[1] > workOrder_.acceFactorE1_ )
+        if ( GT_ABS(space_size_[1]-max_sampled_E1_) > workOrder_.acceFactorE1_ )
         {
             GADGET_CONDITION_MSG(verboseMode_, "Partial fourier along E1 ... ");
 
-            if ( 2*workOrder_.kSpaceCenterEncode1_ > (matrix_size_encoding_[1]+1) )
+            // if ( (m1->getObjectPtr()->idx.user[5]>0) && (GT_ABS( (long long)m1->getObjectPtr()->idx.user[5] - (long long)space_size_[1]/2 )<2) )
+            if ( (m1->getObjectPtr()->idx.user[5]>0) )
+            {
+                workOrder_.kSpaceCenterEncode1_ = m1->getObjectPtr()->idx.user[5];
+            }
+
+            if ( 2*workOrder_.kSpaceCenterEncode1_ >= (max_sampled_E1_+1) )
             {
                 space_matrix_offset_E1_ = 0;
 
-                workOrder_.start_E2_ = 0;
-                workOrder_.end_E2_ = matrix_size_encoding_[1];
+                workOrder_.start_E1_ = 0;
+                workOrder_.end_E1_ = (int)max_sampled_E1_;
             }
             else
             {
-                space_matrix_offset_E1_ = space_size_[1] - matrix_size_encoding_[1];
+                space_matrix_offset_E1_ = space_size_[1] - max_sampled_E1_ -1;
 
-                workOrder_.start_E1_ = space_matrix_offset_E1_;
-                workOrder_.end_E1_ = workOrder_.kSpaceMaxEncode1_;
+                workOrder_.start_E1_ = (int)space_matrix_offset_E1_;
+                workOrder_.end_E1_ = (int)workOrder_.kSpaceMaxEncode1_;
             }
+
+            workOrder_.kSpaceMaxEncode1_ = 2*workOrder_.kSpaceCenterEncode1_-1;
         }
         else
         {
             space_matrix_offset_E1_ = 0;
         }
 
-        if ( space_size_[2]-matrix_size_encoding_[2] > workOrder_.acceFactorE2_ )
+        if ( GT_ABS(space_size_[2]-max_sampled_E2_) > workOrder_.acceFactorE2_ )
         {
             GADGET_CONDITION_MSG(verboseMode_, "Partial fourier along E2 ... ");
 
-            if ( 2*workOrder_.kSpaceCenterEncode2_ > (matrix_size_encoding_[2]+1) )
+            // if ( (m1->getObjectPtr()->idx.user[6]>0) && (GT_ABS( (long long)m1->getObjectPtr()->idx.user[6] - (long long)space_size_[2]/2 )<2) )
+            if ( (m1->getObjectPtr()->idx.user[6]>0) )
+            {
+                workOrder_.kSpaceCenterEncode2_ = m1->getObjectPtr()->idx.user[6];
+            }
+
+            if ( 2*workOrder_.kSpaceCenterEncode2_ >= (max_sampled_E2_+1) )
             {
                 space_matrix_offset_E2_ = 0;
 
                 workOrder_.start_E2_ = 0;
-                workOrder_.end_E2_ = matrix_size_encoding_[2];
+                workOrder_.end_E2_ = (int)max_sampled_E2_;
             }
             else
             {
-                space_matrix_offset_E2_ = space_size_[2] - matrix_size_encoding_[2];
+                space_matrix_offset_E2_ = space_size_[2] - max_sampled_E2_-1;
 
-                workOrder_.start_E2_ = space_matrix_offset_E2_;
-                workOrder_.end_E2_ = workOrder_.kSpaceMaxEncode2_;
+                workOrder_.start_E2_ = (int)space_matrix_offset_E2_;
+                workOrder_.end_E2_ = (int)workOrder_.kSpaceMaxEncode2_;
             }
+
+            workOrder_.kSpaceMaxEncode2_ = 2*workOrder_.kSpaceCenterEncode2_-1;
         }
         else
         {
@@ -471,7 +441,7 @@ int GtPlusAccumulatorWorkOrderTriggerGadget::process(GadgetContainerMessage<ISMR
     }
 
     // store ref read out
-    if ( bIsRef )
+    if ( bIsRef && (workOrder_.CalibMode_ != Gadgetron::gtPlus::ISMRMRD_interleaved) )
     {
         if ( !storeRefData(m1, m2, bIsReflect) )
         {
@@ -544,8 +514,9 @@ bool GtPlusAccumulatorWorkOrderTriggerGadget::needTriggerWorkOrderAllInClose()
     // already triggered for last acquisition
     if ( triggered_in_process_last_acq_ ) return false;
 
-    // if never triggered in process(...)
-    if ( !triggered_in_process_ && !triggered_in_process_last_acq_ ) return true;
+    // if never triggered in process(...) and the last acqusition does arrive
+    // if the last acquisition does not arrive, the user may has cancel the scan
+    if ( !triggered_in_process_ && !triggered_in_process_last_acq_ && last_acq_arrived_ ) return true;
 
     if ( workOrder_.CalibMode_ == ISMRMRD_interleaved )
     {
@@ -654,22 +625,29 @@ triggerWorkOrder(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1,
             Gadgetron::gtPlus::ISMRMRDDIM& triggerDim2_,
             int numOfKSpace_triggerDim1_)
 {
-    //bool is_first_acq_in_slice = ISMRMRD::FlagBit(ISMRMRD::ACQ_FIRST_IN_SLICE).isSet(m1->getObjectPtr()->flags);
+    //bool is_first_acq_in_slice = ISMRMRD::FlagBit(ISMRMRD::ISMRMRD_ACQ_FIRST_IN_SLICE).isSet(m1->getObjectPtr()->flags);
     //if ( !is_first_acq_in_slice ) return true;
 
-    bool is_last_acq = ((ISMRMRD::FlagBit(ISMRMRD::ACQ_LAST_IN_REPETITION).isSet(m1->getObjectPtr()->flags)) 
-                    || (ISMRMRD::FlagBit(ISMRMRD::ACQ_LAST_IN_SLICE).isSet(m1->getObjectPtr()->flags)) ) 
-                    && (m1->getObjectPtr()->idx.repetition==meas_max_idx_.repetition)
-                    && (m1->getObjectPtr()->idx.slice==meas_max_idx_.slice)
-                    && (m1->getObjectPtr()->idx.set==meas_max_idx_.set)
-                    && (m1->getObjectPtr()->idx.contrast==meas_max_idx_.contrast)
-                    && (m1->getObjectPtr()->idx.phase==meas_max_idx_.phase);
+    bool is_last_acq = ( ((ISMRMRD::FlagBit(ISMRMRD::ISMRMRD_ACQ_LAST_IN_REPETITION).isSet(m1->getObjectPtr()->flags)) || (ISMRMRD::FlagBit(ISMRMRD::ISMRMRD_ACQ_LAST_IN_SLICE).isSet(m1->getObjectPtr()->flags)) ) 
+                                && (m1->getObjectPtr()->idx.repetition==meas_max_idx_.repetition)
+                                && (m1->getObjectPtr()->idx.slice==meas_max_idx_.slice)
+                                && (m1->getObjectPtr()->idx.set==meas_max_idx_.set)
+                                && (m1->getObjectPtr()->idx.contrast==meas_max_idx_.contrast)
+                                && (m1->getObjectPtr()->idx.phase==meas_max_idx_.phase)
+                                && (m1->getObjectPtr()->idx.average==meas_max_idx_.average) );
+
+    // if retro gating, use the end of acq flag
+    if ( !is_last_acq && (workOrder_.retro_gated_images_ > 0) )
+    {
+        is_last_acq = (ISMRMRD::FlagBit(ISMRMRD::ISMRMRD_ACQ_LAST_IN_MEASUREMENT).isSet(m1->getObjectPtr()->flags));
+    }
+
+    if ( is_last_acq ) last_acq_arrived_ = true;
 
     curr_dim1_ = getDimValue(*(m1->getObjectPtr()), triggerDim1_);
     curr_dim2_ = getDimValue(*(m1->getObjectPtr()), triggerDim2_);
 
-    if ( is_last_acq 
-            && ( (triggerDim1_!=DIM_NONE) || (triggerDim2_!=DIM_NONE) ) )
+    if ( is_last_acq && ( (triggerDim1_!=DIM_NONE) || (triggerDim2_!=DIM_NONE) ) )
     {
         GADGET_CONDITION_MSG(true, "Last scan in measurement - " << gtPlusISMRMRDReconUtil<ValueType>().getISMRMRDDimName(triggerDim1_ ) << " = " << curr_dim1_ << " - " << gtPlusISMRMRDReconUtil<ValueType>().getISMRMRDDimName(triggerDim2_ ) << " = " << curr_dim2_);
 
@@ -747,7 +725,7 @@ triggerWorkOrder(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1,
     int numOfAcquiredKSpaceForTriggerDim1 = numOfKSpace_triggerDim1_;
     if ( workOrder_.CalibMode_ == ISMRMRD_interleaved )
     {
-        numOfAcquiredKSpaceForTriggerDim1 = numOfKSpace_triggerDim1_ * workOrder_.acceFactorE1_ * workOrder_.acceFactorE2_;
+        numOfAcquiredKSpaceForTriggerDim1 = (int)(numOfKSpace_triggerDim1_ * workOrder_.acceFactorE1_ * workOrder_.acceFactorE2_);
     }
 
     // trigger whenever the Dim2 is changed
@@ -756,7 +734,7 @@ triggerWorkOrder(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1,
         prev_dim1_ = curr_dim1_;
         prev_acq_header_ = *(m1->getObjectPtr());
 
-        int prev_dim2_local_ = prev_dim2_;
+        size_t prev_dim2_local_ = prev_dim2_;
         prev_dim2_ = curr_dim2_;
 
         if ( curr_dim2_!= prev_dim2_local_ )
@@ -772,7 +750,7 @@ triggerWorkOrder(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1,
     {
         prev_dim2_ = curr_dim2_;
 
-        int prev_dim1_local_ = prev_dim1_;
+        size_t prev_dim1_local_ = prev_dim1_;
         prev_dim1_ = curr_dim1_;
 
         if ( numOfKSpace_triggerDim1_ > 0 )
@@ -827,8 +805,8 @@ triggerWorkOrder(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1,
 
     if (  triggerDim1_!=DIM_NONE && triggerDim2_!=DIM_NONE  )
     {
-        int prev_dim1_local_ = prev_dim1_;
-        int prev_dim2_local_ = prev_dim2_;
+        size_t prev_dim1_local_ = prev_dim1_;
+        size_t prev_dim2_local_ = prev_dim2_;
 
         prev_dim1_ = curr_dim1_;
         prev_dim2_ = curr_dim2_;
@@ -837,16 +815,32 @@ triggerWorkOrder(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1,
         {
             if ( (curr_dim2_!=prev_dim2_local_) || resetTriggerStatus(m1) )
             {
-                count_dim1_ = 0;
-                GADGET_CONDITION_MSG(verboseMode_, "Trigger Dim1 : " << gtPlusISMRMRDReconUtil<ValueType>().getISMRMRDDimName(triggerDim1_ ) << " = " << prev_dim1_local_ 
-                    << "; Dim2 : " << gtPlusISMRMRDReconUtil<ValueType>().getISMRMRDDimName(triggerDim2_ ) << " = " << prev_dim2_local_);
+                if ( count_dim1_ > numOfAcquiredKSpaceForTriggerDim1 )
+                {
+                    count_dim1_ = 0;
+                    GADGET_CONDITION_MSG(verboseMode_, "Trigger Dim1 : " << gtPlusISMRMRDReconUtil<ValueType>().getISMRMRDDimName(triggerDim1_ ) << " = " << prev_dim1_local_ 
+                        << "; Dim2 : " << gtPlusISMRMRDReconUtil<ValueType>().getISMRMRDDimName(triggerDim2_ ) << " = " << prev_dim2_local_);
 
-                workFlow_BufferKernel_ = false;
-                workFlow_use_BufferedKernel_ = true;
+                    workFlow_BufferKernel_ = false;
+                    workFlow_use_BufferedKernel_ = true;
 
-                GADGET_CHECK_RETURN_FALSE(triggerByDimEqual(triggerDim1_, prev_dim1_local_, triggerDim2_, prev_dim2_local_, workFlow_BufferKernel_, workFlow_use_BufferedKernel_));
+                    GADGET_CHECK_RETURN_FALSE(triggerByDimEqual(triggerDim1_, prev_dim1_local_, triggerDim2_, prev_dim2_local_, workFlow_BufferKernel_, workFlow_use_BufferedKernel_));
+                }
+
+                if ( count_dim1_ <= numOfAcquiredKSpaceForTriggerDim1 && !triggered_in_process_by_numOfKSpace_triggerDim1_ ) // the trigger never happened
+                {
+                    count_dim1_ = 0;
+                    GADGET_CONDITION_MSG(verboseMode_, "Trigger Dim1 : " << gtPlusISMRMRDReconUtil<ValueType>().getISMRMRDDimName(triggerDim1_ ) << " = " << prev_dim1_local_ 
+                        << "; Dim2 : " << gtPlusISMRMRDReconUtil<ValueType>().getISMRMRDDimName(triggerDim2_ ) << " = " << prev_dim2_local_);
+
+                    workFlow_BufferKernel_ = false;
+                    workFlow_use_BufferedKernel_ = false;
+
+                    GADGET_CHECK_RETURN_FALSE(triggerByDim1LessEqualDim2Equal(triggerDim1_, prev_dim1_local_, triggerDim2_, prev_dim2_local_, workFlow_BufferKernel_, workFlow_use_BufferedKernel_));
+                }
 
                 triggered_in_process_ = true;
+                triggered_in_process_by_numOfKSpace_triggerDim1_ = false; // reset this flag to be false for next dim2
             }
 
             if (curr_dim1_!=prev_dim1_local_)
@@ -860,6 +854,7 @@ triggerWorkOrder(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1,
                     workFlow_use_BufferedKernel_ = false;
                     GADGET_CHECK_RETURN_FALSE(triggerByDim1LessEqualDim2Equal(triggerDim1_, prev_dim1_local_, triggerDim2_, prev_dim2_local_, workFlow_BufferKernel_, workFlow_use_BufferedKernel_));
                     triggered_in_process_ = true;
+                    triggered_in_process_by_numOfKSpace_triggerDim1_ = true;
                 }
                 else if ( count_dim1_ > numOfAcquiredKSpaceForTriggerDim1 )
                 {
@@ -870,6 +865,7 @@ triggerWorkOrder(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1,
                     workFlow_use_BufferedKernel_ = true;
                     GADGET_CHECK_RETURN_FALSE(triggerByDimEqual(triggerDim1_, prev_dim1_local_, triggerDim2_, prev_dim2_local_, workFlow_BufferKernel_, workFlow_use_BufferedKernel_));
                     triggered_in_process_ = true;
+                    triggered_in_process_by_numOfKSpace_triggerDim1_ = true;
                 }
             }
 
@@ -909,11 +905,11 @@ triggerWorkOrderLastCountInClose(Gadgetron::gtPlus::ISMRMRDDIM& triggerDim1_, Ga
     int numOfAcquiredKSpaceForTriggerDim1 = numOfKSpace_triggerDim1_;
     if ( workOrder_.CalibMode_ == ISMRMRD_interleaved )
     {
-        numOfAcquiredKSpaceForTriggerDim1 = numOfKSpace_triggerDim1_ * workOrder_.acceFactorE1_ * workOrder_.acceFactorE2_;
+        numOfAcquiredKSpaceForTriggerDim1 = (int)(numOfKSpace_triggerDim1_ * workOrder_.acceFactorE1_ * workOrder_.acceFactorE2_);
     }
 
-    int prev_dim1_local_ = prev_dim1_;
-    int prev_dim2_local_ = prev_dim2_;
+    size_t prev_dim1_local_ = prev_dim1_;
+    size_t prev_dim2_local_ = prev_dim2_;
 
     prev_dim1_ = curr_dim1_;
     prev_dim2_ = curr_dim2_;
@@ -985,15 +981,15 @@ bool GtPlusAccumulatorWorkOrderTriggerGadget::checkStatus(uint64_t flag, int sam
     bool& bIsKSpace, bool& bIsRef, bool& bIsNoise, bool& bIsPhaseCorr, bool& bIsReflect, bool& bIsOther,
     bool& bIsNavigator, bool& bIsRTFeedback, bool& bIsHPFeedback, bool& bIsDummyScan)
 {
-    bIsNoise = ISMRMRD::FlagBit(ISMRMRD::ACQ_IS_NOISE_MEASUREMENT).isSet(flag);
-    bool is_ref = ISMRMRD::FlagBit(ISMRMRD::ACQ_IS_PARALLEL_CALIBRATION).isSet(flag);
-    bool is_ref_kspace = ISMRMRD::FlagBit(ISMRMRD::ACQ_IS_PARALLEL_CALIBRATION_AND_IMAGING).isSet(flag);
-    bIsReflect = ISMRMRD::FlagBit(ISMRMRD::ACQ_IS_REVERSE).isSet(flag);
-    bIsPhaseCorr = ISMRMRD::FlagBit(ISMRMRD::ACQ_IS_PHASECORR_DATA).isSet(flag);
-    bIsNavigator = ISMRMRD::FlagBit(ISMRMRD::ACQ_IS_NAVIGATION_DATA).isSet(flag);
-    bIsRTFeedback = ISMRMRD::FlagBit(ISMRMRD::ACQ_IS_RTFEEDBACK_DATA).isSet(flag);
-    bIsHPFeedback = ISMRMRD::FlagBit(ISMRMRD::ACQ_IS_HPFEEDBACK_DATA).isSet(flag);
-    bIsDummyScan = ISMRMRD::FlagBit(ISMRMRD::ACQ_IS_DUMMYSCAN_DATA).isSet(flag);
+    bIsNoise = ISMRMRD::FlagBit(ISMRMRD::ISMRMRD_ACQ_IS_NOISE_MEASUREMENT).isSet(flag);
+    bool is_ref = ISMRMRD::FlagBit(ISMRMRD::ISMRMRD_ACQ_IS_PARALLEL_CALIBRATION).isSet(flag);
+    bool is_ref_kspace = ISMRMRD::FlagBit(ISMRMRD::ISMRMRD_ACQ_IS_PARALLEL_CALIBRATION_AND_IMAGING).isSet(flag);
+    bIsReflect = ISMRMRD::FlagBit(ISMRMRD::ISMRMRD_ACQ_IS_REVERSE).isSet(flag);
+    bIsPhaseCorr = ISMRMRD::FlagBit(ISMRMRD::ISMRMRD_ACQ_IS_PHASECORR_DATA).isSet(flag);
+    bIsNavigator = ISMRMRD::FlagBit(ISMRMRD::ISMRMRD_ACQ_IS_NAVIGATION_DATA).isSet(flag);
+    bIsRTFeedback = ISMRMRD::FlagBit(ISMRMRD::ISMRMRD_ACQ_IS_RTFEEDBACK_DATA).isSet(flag);
+    bIsHPFeedback = ISMRMRD::FlagBit(ISMRMRD::ISMRMRD_ACQ_IS_HPFEEDBACK_DATA).isSet(flag);
+    bIsDummyScan = ISMRMRD::FlagBit(ISMRMRD::ISMRMRD_ACQ_IS_DUMMYSCAN_DATA).isSet(flag);
 
     bIsKSpace = false;
     bIsRef = false;
@@ -1083,21 +1079,26 @@ bool GtPlusAccumulatorWorkOrderTriggerGadget::storeImageData(GadgetContainerMess
         size_t samples =  m1->getObjectPtr()->number_of_samples;
         ISMRMRD::EncodingCounters idx = m1->getObjectPtr()->idx;
 
-        idx.segment = 0; // combine the segments
+        /*if ( workOrder_.retro_gated_images_ == 0 )
+        {*/
+            idx.segment = 0; // combine the segments
+        //}
 
         if ( workOrder_.data_.get_number_of_elements() <= 0 )
         {
             meas_max_channel_ = m1->getObjectPtr()->active_channels;
 
-            int E1 = workOrder_.kSpaceMaxEncode1_+1;
-            int E2 = workOrder_.kSpaceMaxEncode2_+1;
+            size_t E1 = workOrder_.kSpaceMaxEncode1_+1;
+            size_t E2 = workOrder_.kSpaceMaxEncode2_+1;
             if ( E2 == 0 ) E2 = 1;
 
             if ( E1 < matrix_size_encoding_[1] ) E1 = matrix_size_encoding_[1];
             if ( E2 < matrix_size_encoding_[2] ) E2 = matrix_size_encoding_[2];
 
+            if ( samples > meas_max_ro_ ) meas_max_ro_ = samples;
+
             // find the loop counter boundary and allocate the buffer
-            GADGET_CONDITION_MSG(verboseMode_, "[RO E1 Cha Slice E2 Con Phase Rep Set Seg] = [" 
+            GADGET_CONDITION_MSG(verboseMode_, "[RO E1 Cha Slice E2 Con Phase Rep Set Seg Ave] = [" 
                                << meas_max_ro_ 
                                << " " << E1 
                                << " " << meas_max_channel_ 
@@ -1107,7 +1108,8 @@ bool GtPlusAccumulatorWorkOrderTriggerGadget::storeImageData(GadgetContainerMess
                                << " " << meas_max_idx_.phase+1 
                                << " " << meas_max_idx_.repetition+1 
                                << " " << meas_max_idx_.set+1 
-                               << " " << meas_max_idx_.segment+1 << "]");
+                               << " " << meas_max_idx_.segment+1 
+                               << " " << meas_max_idx_.average+1 << "]");
 
             dimensions_.clear();
             dimensions_.push_back(meas_max_ro_);
@@ -1120,6 +1122,7 @@ bool GtPlusAccumulatorWorkOrderTriggerGadget::storeImageData(GadgetContainerMess
             dimensions_.push_back(meas_max_idx_.repetition+1);
             dimensions_.push_back(meas_max_idx_.set+1);
             dimensions_.push_back(meas_max_idx_.segment+1);
+            dimensions_.push_back(meas_max_idx_.average+1);
 
             size_t N = dimensions_.size();
             for ( ii=0; ii<N; ii++ )
@@ -1138,6 +1141,15 @@ bool GtPlusAccumulatorWorkOrderTriggerGadget::storeImageData(GadgetContainerMess
                 reflect_dimensions_[2] = 1;
                 workOrder_.reflect_.create(&reflect_dimensions_);
                 Gadgetron::clear(workOrder_.reflect_);
+
+                std::vector<size_t> dim(dimensions_);
+                dim[0] = 1;
+                dim[2] = 1;
+                workOrder_.time_stamp_.create(dim);
+                Gadgetron::fill(workOrder_.time_stamp_, (real_value_type)(-1) );
+
+                workOrder_.physio_time_stamp_.create(dim);
+                Gadgetron::fill(workOrder_.physio_time_stamp_, (real_value_type)(-1) );
             }
             catch(...)
             {
@@ -1146,8 +1158,8 @@ bool GtPlusAccumulatorWorkOrderTriggerGadget::storeImageData(GadgetContainerMess
             }
 
             // allocate message buffer
-            int matrix_size[10];
-            for ( ii=0; ii<10; ii++ )
+            size_t matrix_size[GT_DIM_NUM];
+            for ( ii=0; ii<GT_DIM_NUM; ii++ )
             {
                 matrix_size[ii] = dimensions_[ii];
             }
@@ -1170,9 +1182,15 @@ bool GtPlusAccumulatorWorkOrderTriggerGadget::storeImageData(GadgetContainerMess
             idx.kspace_encode_step_2 += workOrder_.start_E2_;
         }
 
+        if ( idx.kspace_encode_step_1 >= dimensions_[1] )
+        {
+            return true;
+        }
+
+        size_t dataN = workOrder_.data_.get_number_of_elements();
         std::complex<float>* b = workOrder_.data_.begin();
         std::complex<float>* d = m2->getObjectPtr()->get_data_ptr();
-        if (samples != static_cast<int>(dimensions_[0])) 
+        if (samples != static_cast<size_t>(dimensions_[0])) 
         {
             GADGET_DEBUG1("Wrong number of samples received\n");
             return false;
@@ -1185,8 +1203,8 @@ bool GtPlusAccumulatorWorkOrderTriggerGadget::storeImageData(GadgetContainerMess
             reflectBuf.create(samples);
         }
 
-        std::vector<size_t> pos(10);
-        for (int c = 0; c < m1->getObjectPtr()->active_channels; c++) 
+        std::vector<size_t> pos(GT_DIM_NUM);
+        for (size_t c = 0; c < m1->getObjectPtr()->active_channels; c++) 
         {
             pos[0] = 0;
             pos[1] = idx.kspace_encode_step_1;
@@ -1198,11 +1216,22 @@ bool GtPlusAccumulatorWorkOrderTriggerGadget::storeImageData(GadgetContainerMess
             pos[7] = idx.repetition;
             pos[8] = idx.set;
             pos[9] = idx.segment;
-            long long offsetBuffer = workOrder_.data_.calculate_offset(pos);
+            pos[10] = idx.average;
+            size_t offsetBuffer = workOrder_.data_.calculate_offset(pos);
+
+            if ( offsetBuffer >= dataN )
+            {
+                break;
+            }
+
+            if ( offsetBuffer >= dataN )
+            {
+                break;
+            }
 
             if ( isReflect )
             {
-                for ( int s=0; s<samples; s++ )
+                for ( size_t s=0; s<samples; s++ )
                 {
                     reflectBuf(samples-1-s) = d[c*samples+s];
                 }
@@ -1243,14 +1272,17 @@ storeRefData(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1, GadgetConta
         size_t samples =  m1->getObjectPtr()->number_of_samples;
         ISMRMRD::EncodingCounters idx = m1->getObjectPtr()->idx;
 
-        idx.segment = 0; // combine the segments
+        /*if ( workOrder_.retro_gated_images_ == 0 )
+        {*/
+            idx.segment = 0; // combine the segments
+        //}
 
         if ( workOrder_.ref_.get_number_of_elements() <= 0 )
         {
             meas_max_channel_ = m1->getObjectPtr()->active_channels;
 
-            int E1 = workOrder_.kSpaceMaxEncode1_+1;
-            int E2 = workOrder_.kSpaceMaxEncode2_+1;
+            size_t E1 = workOrder_.kSpaceMaxEncode1_+1;
+            size_t E2 = workOrder_.kSpaceMaxEncode2_+1;
             if ( E2 == 0 ) E2 = 1;
 
             if ( E1 < matrix_size_encoding_[1] ) E1 = matrix_size_encoding_[1];
@@ -1264,8 +1296,10 @@ storeRefData(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1, GadgetConta
                 RO = samples;
             }
 
+            if ( RO < samples ) RO = samples;
+
             // find the loop counter boundary and allocate the buffer
-            GADGET_CONDITION_MSG(verboseMode_, "[RO E1 Cha Slice E2 Con Phase Rep Set Seg] = [" 
+            GADGET_CONDITION_MSG(verboseMode_, "[RO E1 Cha Slice E2 Con Phase Rep Set Seg Ave] = [" 
                                << RO 
                                << " " << E1 
                                << " " << meas_max_channel_ 
@@ -1275,7 +1309,8 @@ storeRefData(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1, GadgetConta
                                << " " << meas_max_idx_.phase+1 
                                << " " << meas_max_idx_.repetition+1 
                                << " " << meas_max_idx_.set+1 
-                               << " " << meas_max_idx_.segment+1 << "]");
+                               << " " << meas_max_idx_.segment+1 
+                               << " " << meas_max_idx_.average+1 << "]");
 
             dimensions_.clear();
             dimensions_.push_back(RO);
@@ -1288,6 +1323,7 @@ storeRefData(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1, GadgetConta
             dimensions_.push_back(meas_max_idx_.repetition+1);
             dimensions_.push_back(meas_max_idx_.set+1);
             dimensions_.push_back(meas_max_idx_.segment+1);
+            dimensions_.push_back(meas_max_idx_.average+1);
 
             size_t N = dimensions_.size();
             for ( ii=0; ii<N; ii++ )
@@ -1340,17 +1376,19 @@ storeRefData(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1, GadgetConta
             if ( idx.repetition > meas_max_idx_ref_.repetition )                        meas_max_idx_ref_.repetition = idx.repetition;
             if ( idx.set > meas_max_idx_ref_.set )                                      meas_max_idx_ref_.set = idx.set;
             if ( idx.segment > meas_max_idx_ref_.segment )                              meas_max_idx_ref_.segment = idx.segment;
+            if ( idx.average > meas_max_idx_ref_.average )                              meas_max_idx_ref_.average = idx.average;
 
             size_t ii;
-            for ( ii=0; ii<ISMRMRD_USER_INTS; ii++ )
+            for ( ii=0; ii<ISMRMRD::ISMRMRD_USER_INTS; ii++ )
             {
                 if ( idx.user[ii] > meas_max_idx_ref_.user[ii] ) meas_max_idx_ref_.user[ii] = idx.user[ii];
             }
         }
 
+        size_t refN = workOrder_.ref_.get_number_of_elements();
         std::complex<float>* b = workOrder_.ref_.begin();
         std::complex<float>* d = m2->getObjectPtr()->get_data_ptr();
-        if (samples != static_cast<int>(dimensions_[0])) 
+        if (samples != static_cast<size_t>(workOrder_.ref_.get_size(0))) 
         {
             GADGET_DEBUG1("Wrong number of samples received\n");
             return false;
@@ -1363,8 +1401,8 @@ storeRefData(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1, GadgetConta
             reflectBuf.create(samples);
         }
 
-        std::vector<size_t> pos(10);
-        for (int c = 0; c < m1->getObjectPtr()->active_channels; c++) 
+        std::vector<size_t> pos(GT_DIM_NUM);
+        for (uint16_t c = 0; c < m1->getObjectPtr()->active_channels; c++) 
         {
             pos[0] = 0;
             pos[1] = idx.kspace_encode_step_1;
@@ -1376,11 +1414,17 @@ storeRefData(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1, GadgetConta
             pos[7] = idx.repetition;
             pos[8] = idx.set;
             pos[9] = idx.segment;
-            long long offsetBuffer = workOrder_.ref_.calculate_offset(pos);
+            pos[10] = idx.average;
+
+            size_t offsetBuffer = workOrder_.ref_.calculate_offset(pos);
+            if ( offsetBuffer >= refN )
+            {
+                break;
+            }
 
             if ( isReflect )
             {
-                for ( int s=0; s<samples; s++ )
+                for ( size_t s=0; s<samples; s++ )
                 {
                     reflectBuf(samples-1-s) = d[c*samples+s];
                 }
@@ -1396,6 +1440,25 @@ storeRefData(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1, GadgetConta
             offsetBuffer = workOrder_.reflect_ref_.calculate_offset(pos);
             workOrder_.reflect_ref_.at(offsetBuffer) = isReflect;
         }
+
+        // if it is embedded mode, store the acquisition and physio time stamp
+        if ( workOrder_.CalibMode_ == ISMRMRD_embedded )
+        {
+            ind_time_stamp_[0] = 0;
+            ind_time_stamp_[1] = idx.kspace_encode_step_1;
+            ind_time_stamp_[2] = 0;
+            ind_time_stamp_[3] = idx.slice;
+            ind_time_stamp_[4] = idx.kspace_encode_step_2;
+            ind_time_stamp_[5] = idx.contrast;
+            ind_time_stamp_[6] = idx.phase;
+            ind_time_stamp_[7] = idx.repetition;
+            ind_time_stamp_[8] = idx.set;
+            ind_time_stamp_[9] = idx.segment;
+            ind_time_stamp_[10] = idx.average;
+
+            workOrder_.time_stamp_(ind_time_stamp_) = (real_value_type)(m1->getObjectPtr()->acquisition_time_stamp) * timeStampResolution_;
+            workOrder_.physio_time_stamp_(ind_time_stamp_) = (real_value_type)(m1->getObjectPtr()->physiology_time_stamp[0]) * timeStampResolution_;
+        }
     }
     catch(...)
     {
@@ -1423,8 +1486,9 @@ fillBuffer(ReadOutBufferType& readOutBuffer, BufferType& buf, ReflectBufferType&
         max_idx.repetition = 0;
         max_idx.set = 0;
         max_idx.segment = 0;
-        int max_channel = 0;
-        int max_col = 0;
+        max_idx.average = 0;
+        size_t max_channel = 0;
+        size_t max_col = 0;
 
         size_t a;
         for (a = 0; a < numOfReadOuts; a++) 
@@ -1458,11 +1522,14 @@ fillBuffer(ReadOutBufferType& readOutBuffer, BufferType& buf, ReflectBufferType&
             if ( idx.segment > max_idx.segment ) 
                 max_idx.segment = idx.segment;
 
+            if ( idx.average > max_idx.average ) 
+                max_idx.average = idx.average;
+
             if ( readOutBuffer[a].acqHead_.active_channels > max_channel ) 
                 max_channel = readOutBuffer[a].acqHead_.active_channels;
         }
 
-        GADGET_CONDITION_MSG(verboseMode_, "[RO E1 Cha Slice E2 Contrast Phase Rep Set Seg] = [" 
+        GADGET_CONDITION_MSG(verboseMode_, "[RO E1 Cha Slice E2 Contrast Phase Rep Set Seg Ave] = [" 
                                << max_col 
                                << " " << max_idx.kspace_encode_step_1+1 
                                << " " << max_channel 
@@ -1472,10 +1539,11 @@ fillBuffer(ReadOutBufferType& readOutBuffer, BufferType& buf, ReflectBufferType&
                                << " " << max_idx.phase+1 
                                << " " << max_idx.repetition+1 
                                << " " << max_idx.set+1 
-                               << " " << max_idx.segment+1 << "]");
+                               << " " << max_idx.segment+1 
+                               << " " << max_idx.average+1 << "]");
 
         // alloate buffer for data
-        std::vector<size_t> dims(10);
+        std::vector<size_t> dims(GT_DIM_NUM);
         dims[0] = max_col;
         dims[1] = max_idx.kspace_encode_step_1+1;
         dims[2] = max_channel;
@@ -1486,6 +1554,7 @@ fillBuffer(ReadOutBufferType& readOutBuffer, BufferType& buf, ReflectBufferType&
         dims[7] = max_idx.repetition+1;
         dims[8] = max_idx.set+1;
         dims[9] = max_idx.segment+1;
+        dims[10] = max_idx.average+1;
 
         try
         {
@@ -1507,8 +1576,8 @@ fillBuffer(ReadOutBufferType& readOutBuffer, BufferType& buf, ReflectBufferType&
         std::complex<float>* b = buf.begin();
 
         // copy the data
-        int c;
-        std::vector<size_t> pos(10);
+        uint16_t c;
+        std::vector<size_t> pos(GT_DIM_NUM);
 
         for ( a=0; a<numOfReadOuts; a++) 
         {
@@ -1527,6 +1596,7 @@ fillBuffer(ReadOutBufferType& readOutBuffer, BufferType& buf, ReflectBufferType&
                 pos[7] = idx.repetition;
                 pos[8] = idx.set;
                 pos[9] = idx.segment;
+                pos[10] = idx.average;
                 long long offsetBuffer = buf.calculate_offset(pos);
 
                 memcpy(b+offsetBuffer, d+c*readOutBuffer[a].acqHead_.number_of_samples, sizeof(std::complex<float>)*readOutBuffer[a].acqHead_.number_of_samples);
@@ -1546,15 +1616,44 @@ fillBuffer(ReadOutBufferType& readOutBuffer, BufferType& buf, ReflectBufferType&
     return true;
 }
 
+//XUE-TODO: Functions DO NOT return booleans in the Gadgetron
 bool GtPlusAccumulatorWorkOrderTriggerGadget::fillImageInfo(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1, GtPlusGadgetImageArray* messageImage, const ISMRMRD::EncodingCounters& idx)
 {
+
     try
     {
         // fill the message info
-        int offset = messageImage->get_offset(idx.slice, idx.kspace_encode_step_2, idx.contrast, idx.phase, idx.repetition, idx.set, idx.segment);
+        size_t offset = messageImage->get_offset(idx.slice, idx.kspace_encode_step_2, idx.contrast, idx.phase, idx.repetition, idx.set, idx.segment, idx.average);
+
+        if( (offset >= messageImage->max_num_of_images_)
+            || (idx.slice>=messageImage->matrix_size[3])
+            || (idx.kspace_encode_step_2>=messageImage->matrix_size[4])
+            || (idx.contrast>=messageImage->matrix_size[5])
+            || (idx.phase>=messageImage->matrix_size[6])
+            || (idx.repetition>=messageImage->matrix_size[7])
+            || (idx.set>=messageImage->matrix_size[8])
+            || (idx.segment>=messageImage->matrix_size[9])
+            || (idx.average>=messageImage->matrix_size[10]) )
+        {
+            GADGET_WARN_MSG("Incoming image is over the boundary of buffer [SLC E2 CON PHS REP SET SEG AVE] = [ " 
+                                                                            << idx.slice << " " << idx.kspace_encode_step_2 << " " 
+                                                                            << idx.contrast << " " << idx.phase << " " 
+                                                                            << idx.repetition << " " << idx.set << " " 
+                                                                            << idx.segment << " " << idx.average << " ] ");
+            return true;
+        }
+
+        if( offset >= messageImage->max_num_of_images_ )
+        {
+            GADGET_WARN_MSG("Incoming image is over the boundary of buffer [SLC E2 CON PHS REP SET SEG AVE] = [ " 
+                                                                            << idx.slice << " " << idx.kspace_encode_step_2 << " " 
+                                                                            << idx.contrast << " " << idx.phase << " " << idx.repetition << " " 
+                                                                            << idx.set << " " << idx.segment << " " << idx.average << " ] ");
+            return true;
+        }
 
         // if it is the first acq in a slice, fill in all information
-        bool is_first_acq_in_slice = ISMRMRD::FlagBit(ISMRMRD::ACQ_FIRST_IN_SLICE).isSet(m1->getObjectPtr()->flags);
+        bool is_first_acq_in_slice = ISMRMRD::FlagBit(ISMRMRD::ISMRMRD_ACQ_FIRST_IN_SLICE).isSet(m1->getObjectPtr()->flags);
 
         /*if ( is_first_acq_in_slice 
             || ( messageImage->imageArray_[offset].version==0 
@@ -1564,13 +1663,15 @@ bool GtPlusAccumulatorWorkOrderTriggerGadget::fillImageInfo(GadgetContainerMessa
                     && messageImage->imageArray_[offset].flags==0 
                     && messageImage->imageArray_[offset].measurement_uid==0 )
         {
-            GADGET_CONDITION_MSG(verboseMode_, "--> buffer image header - offset = " << offset << " - [SLC E2 CON PHS REP SET] = [" 
+            GADGET_CONDITION_MSG(verboseMode_, "--> buffer image header - offset = " << offset << " - [SLC E2 CON PHS REP SET SEG AVE] = [" 
                                                                       << idx.slice << " " 
                                                                       << idx.kspace_encode_step_2 << " " 
                                                                       << idx.contrast << " " 
                                                                       << idx.phase << " " 
                                                                       << idx.repetition << " " 
-                                                                      << idx.set << "]");
+                                                                      << idx.set << " " 
+                                                                      << idx.segment << " " 
+                                                                      << idx.average << "]");
 
             messageImage->imageArray_[offset].version = m1->getObjectPtr()->version;
             messageImage->imageArray_[offset].flags = m1->getObjectPtr()->flags;
@@ -1621,6 +1722,7 @@ bool GtPlusAccumulatorWorkOrderTriggerGadget::fillImageInfo(GadgetContainerMessa
             messageImage->imageArray_[offset].phase = m1->getObjectPtr()->idx.phase;
             messageImage->imageArray_[offset].repetition = m1->getObjectPtr()->idx.repetition;
             messageImage->imageArray_[offset].set = m1->getObjectPtr()->idx.set;
+            messageImage->imageArray_[offset].average = m1->getObjectPtr()->idx.average;
 
             messageImage->imageArray_[offset].acquisition_time_stamp = m1->getObjectPtr()->acquisition_time_stamp;
 
@@ -1628,12 +1730,12 @@ bool GtPlusAccumulatorWorkOrderTriggerGadget::fillImageInfo(GadgetContainerMessa
             messageImage->imageArray_[offset].physiology_time_stamp[1] = m1->getObjectPtr()->physiology_time_stamp[1];
             messageImage->imageArray_[offset].physiology_time_stamp[2] = m1->getObjectPtr()->physiology_time_stamp[2];
 
-            messageImage->imageArray_[offset].image_data_type = ISMRMRD::DATA_COMPLEX_FLOAT;
+            messageImage->imageArray_[offset].data_type = ISMRMRD::ISMRMRD_CXFLOAT;
 
-            messageImage->imageArray_[offset].image_type = ISMRMRD::TYPE_MAGNITUDE;
+            messageImage->imageArray_[offset].image_type = ISMRMRD::ISMRMRD_IMTYPE_MAGNITUDE;
 
-            messageImage->imageArray_[offset].image_index = ++image_counter_;
-            messageImage->imageArray_[offset].image_series_index = image_series_;
+            messageImage->imageArray_[offset].image_index = (uint16_t)(++image_counter_);
+            messageImage->imageArray_[offset].image_series_index = (uint16_t)image_series_;
 
             // need to store the free user parameters
             memcpy(messageImage->imageArray_[offset].user_int, m1->getObjectPtr()->user_int, sizeof(int32_t)*8);
@@ -1641,8 +1743,26 @@ bool GtPlusAccumulatorWorkOrderTriggerGadget::fillImageInfo(GadgetContainerMessa
         }
 
         // whether or not this acq is the first in a slice, we need to fill the TimeStamps and PMUTimeStamps
-        messageImage->imageArray_[offset].time_stamps[idx.kspace_encode_step_1] = m1->getObjectPtr()->acquisition_time_stamp;
-        messageImage->imageArray_[offset].pmu_time_stamps[idx.kspace_encode_step_1] = m1->getObjectPtr()->physiology_time_stamp[0];
+        if ( idx.kspace_encode_step_1 < messageImage->imageArray_[offset].time_stamps.size() )
+        {
+            messageImage->imageArray_[offset].time_stamps[idx.kspace_encode_step_1] = m1->getObjectPtr()->acquisition_time_stamp;
+            messageImage->imageArray_[offset].pmu_time_stamps[idx.kspace_encode_step_1] = m1->getObjectPtr()->physiology_time_stamp[0];
+
+            ind_time_stamp_[0] = 0;
+            ind_time_stamp_[1] = idx.kspace_encode_step_1;
+            ind_time_stamp_[2] = 0;
+            ind_time_stamp_[3] = idx.slice;
+            ind_time_stamp_[4] = idx.kspace_encode_step_2;
+            ind_time_stamp_[5] = idx.contrast;
+            ind_time_stamp_[6] = idx.phase;
+            ind_time_stamp_[7] = idx.repetition;
+            ind_time_stamp_[8] = idx.set;
+            ind_time_stamp_[9] = idx.segment;
+            ind_time_stamp_[10] = idx.average;
+
+            workOrder_.time_stamp_(ind_time_stamp_) = (real_value_type)(m1->getObjectPtr()->acquisition_time_stamp) * timeStampResolution_;
+            workOrder_.physio_time_stamp_(ind_time_stamp_) = (real_value_type)(m1->getObjectPtr()->physiology_time_stamp[0]) * timeStampResolution_;
+        }
     }
     catch(...)
     {
@@ -1656,7 +1776,7 @@ bool GtPlusAccumulatorWorkOrderTriggerGadget::fillImageInfo(GadgetContainerMessa
 size_t GtPlusAccumulatorWorkOrderTriggerGadget::
 computeEncodedSizeE1(size_t centerE1, size_t maxE1)
 {
-    int E1;
+    size_t E1;
     if ( (maxE1+1)%2 == 0 )
     {
         E1 = 2*centerE1;
@@ -1672,7 +1792,7 @@ computeEncodedSizeE1(size_t centerE1, size_t maxE1)
 size_t GtPlusAccumulatorWorkOrderTriggerGadget::
 computeEncodedSizeE2(size_t centerE2, size_t maxE2)
 {
-    int E2;
+    size_t E2;
     if ( (maxE2+1)%2 == 0 )
     {
         E2 = 2*centerE2;
@@ -1705,6 +1825,8 @@ triggerByDimEqual(Gadgetron::gtPlus::ISMRMRDDIM& triggerDim, size_t value, bool
         // copy the image content
         GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<ValueType>().extractSubArrayForDim(workOrder_.data_, cm2->getObjectPtr()->data_, triggerDim, value, lessEqual));
         GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<unsigned short>().extractSubArrayForDim(workOrder_.reflect_, cm2->getObjectPtr()->reflect_, triggerDim, value, lessEqual));
+        GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<real_value_type>().extractSubArrayForDim(workOrder_.time_stamp_, cm2->getObjectPtr()->time_stamp_, triggerDim, value, lessEqual));
+        GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<real_value_type>().extractSubArrayForDim(workOrder_.physio_time_stamp_, cm2->getObjectPtr()->physio_time_stamp_, triggerDim, value, lessEqual));
 
         // copy the ref
         if ( workOrder_.ref_.get_number_of_elements()>0 
@@ -1811,6 +1933,8 @@ triggerByDimLessEqual(Gadgetron::gtPlus::ISMRMRDDIM& triggerDim, size_t value, b
         // copy the image content
         GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<ValueType>().extractSubArrayForDim(workOrder_.data_, cm2->getObjectPtr()->data_, triggerDim, value, lessEqual));
         GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<unsigned short>().extractSubArrayForDim(workOrder_.reflect_, cm2->getObjectPtr()->reflect_, triggerDim, value, lessEqual));
+        GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<real_value_type>().extractSubArrayForDim(workOrder_.time_stamp_, cm2->getObjectPtr()->time_stamp_, triggerDim, value, lessEqual));
+        GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<real_value_type>().extractSubArrayForDim(workOrder_.physio_time_stamp_, cm2->getObjectPtr()->physio_time_stamp_, triggerDim, value, lessEqual));
 
         // copy the ref
         if ( workOrder_.ref_.get_number_of_elements()>0 
@@ -1917,6 +2041,8 @@ triggerByDimEqual(Gadgetron::gtPlus::ISMRMRDDIM& triggerDim1, size_t value1, Gad
         // copy the image content
         GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<ValueType>().extractSubArrayForDim(workOrder_.data_, cm2->getObjectPtr()->data_, triggerDim1, value1, triggerDim2, value2, lessEqual));
         GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<unsigned short>().extractSubArrayForDim(workOrder_.reflect_, cm2->getObjectPtr()->reflect_, triggerDim1, value1, triggerDim2, value2, lessEqual));
+        GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<real_value_type>().extractSubArrayForDim(workOrder_.time_stamp_, cm2->getObjectPtr()->time_stamp_, triggerDim1, value1, triggerDim2, value2, lessEqual));
+        GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<real_value_type>().extractSubArrayForDim(workOrder_.physio_time_stamp_, cm2->getObjectPtr()->physio_time_stamp_, triggerDim1, value1, triggerDim2, value2, lessEqual));
 
         // copy the ref
         if ( workOrder_.ref_.get_number_of_elements()>0 
@@ -2023,6 +2149,8 @@ triggerByDim1LessEqualDim2Equal(Gadgetron::gtPlus::ISMRMRDDIM& triggerDim1, size
         // copy the image content
         GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<ValueType>().extractSubArrayForDim1LessEqualDim2Equal(workOrder_.data_, cm2->getObjectPtr()->data_, triggerDim1, value1, triggerDim2, value2));
         GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<unsigned short>().extractSubArrayForDim1LessEqualDim2Equal(workOrder_.reflect_, cm2->getObjectPtr()->reflect_, triggerDim1, value1, triggerDim2, value2));
+        GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<real_value_type>().extractSubArrayForDim1LessEqualDim2Equal(workOrder_.time_stamp_, cm2->getObjectPtr()->time_stamp_, triggerDim1, value1, triggerDim2, value2));
+        GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<real_value_type>().extractSubArrayForDim1LessEqualDim2Equal(workOrder_.physio_time_stamp_, cm2->getObjectPtr()->physio_time_stamp_, triggerDim1, value1, triggerDim2, value2));
 
         // copy the ref
         if ( workOrder_.ref_.get_number_of_elements()>0 
@@ -2126,6 +2254,8 @@ bool GtPlusAccumulatorWorkOrderTriggerGadget::triggerWorkOrderAllInClose()
 
         // copy the image content
         cm2->getObjectPtr()->data_ = workOrder_.data_;
+        cm2->getObjectPtr()->time_stamp_ = workOrder_.time_stamp_;
+        cm2->getObjectPtr()->physio_time_stamp_ = workOrder_.physio_time_stamp_;
         cm2->getObjectPtr()->reflect_ = workOrder_.reflect_;
 
         // copy the ref
@@ -2156,7 +2286,7 @@ bool GtPlusAccumulatorWorkOrderTriggerGadget::triggerWorkOrderAllInClose()
             {
                 GADGET_DEBUG1("fillBuffer(phaseCorrBuffer_) failed ... \n");
                 cm1->release();
-                return GADGET_FAIL;
+                return false;
             }
 
             cm2->getObjectPtr()->phaseCorr_ = workOrder_.phaseCorr_;
@@ -2172,7 +2302,7 @@ bool GtPlusAccumulatorWorkOrderTriggerGadget::triggerWorkOrderAllInClose()
             {
                 GADGET_DEBUG1("fillBuffer(noiseBuffer_) failed ... \n");
                 cm1->release();
-                return GADGET_FAIL;
+                return false;
             }
 
             cm2->getObjectPtr()->noise_ = workOrder_.noise_;
@@ -2186,7 +2316,7 @@ bool GtPlusAccumulatorWorkOrderTriggerGadget::triggerWorkOrderAllInClose()
             {
                 GADGET_DEBUG1("fillBuffer(otherBuffer_) failed ... \n");
                 cm1->release();
-                return GADGET_FAIL;
+                return false;
             }
 
             cm2->getObjectPtr()->other_ = workOrder_.other_;
@@ -2196,7 +2326,7 @@ bool GtPlusAccumulatorWorkOrderTriggerGadget::triggerWorkOrderAllInClose()
         // send to next gadget
         if (this->next()->putq(cm1) < 0) 
         {
-            return GADGET_FAIL;
+            return false;
         }
     }
     catch(...)
@@ -2211,15 +2341,15 @@ bool GtPlusAccumulatorWorkOrderTriggerGadget::triggerWorkOrderAllInClose()
 size_t GtPlusAccumulatorWorkOrderTriggerGadget::
 getDimValue(const ISMRMRD::AcquisitionHeader& acqHeader, Gadgetron::gtPlus::ISMRMRDDIM& dim)
 {
-    if ( dim == DIM_Encoding1 ) return acqHeader.idx.kspace_encode_step_1;
-    if ( dim == DIM_Slice ) return acqHeader.idx.slice;
-    if ( dim == DIM_Encoding2 ) return acqHeader.idx.kspace_encode_step_2;
-    if ( dim == DIM_Contrast ) return acqHeader.idx.contrast;
-    if ( dim == DIM_Phase ) return acqHeader.idx.phase;
-    if ( dim == DIM_Repetition ) return acqHeader.idx.repetition;
-    if ( dim == DIM_Set ) return acqHeader.idx.set;
-    if ( dim == DIM_Segment ) return acqHeader.idx.segment;
-    if ( dim == DIM_Average ) return acqHeader.idx.average;
+    if ( dim == DIM_Encoding1 )             return acqHeader.idx.kspace_encode_step_1;
+    if ( dim == DIM_Slice )                 return acqHeader.idx.slice;
+    if ( dim == DIM_Encoding2 )             return acqHeader.idx.kspace_encode_step_2;
+    if ( dim == DIM_Contrast )              return acqHeader.idx.contrast;
+    if ( dim == DIM_Phase )                 return acqHeader.idx.phase;
+    if ( dim == DIM_Repetition )            return acqHeader.idx.repetition;
+    if ( dim == DIM_Set )                   return acqHeader.idx.set;
+    if ( dim == DIM_Segment )               return acqHeader.idx.segment;
+    if ( dim == DIM_Average )               return acqHeader.idx.average;
 
     return 0;
 }
@@ -2227,15 +2357,15 @@ getDimValue(const ISMRMRD::AcquisitionHeader& acqHeader, Gadgetron::gtPlus::ISMR
 void GtPlusAccumulatorWorkOrderTriggerGadget::
 setDimValue(ISMRMRD::AcquisitionHeader& acqHeader, Gadgetron::gtPlus::ISMRMRDDIM& dim, size_t value)
 {
-    if ( dim == DIM_Encoding1 ) acqHeader.idx.kspace_encode_step_1 = value;
-    if ( dim == DIM_Slice ) acqHeader.idx.slice = value;
-    if ( dim == DIM_Encoding2 ) acqHeader.idx.kspace_encode_step_2 = value;
-    if ( dim == DIM_Contrast ) acqHeader.idx.contrast = value;
-    if ( dim == DIM_Phase ) acqHeader.idx.phase = value;
-    if ( dim == DIM_Repetition ) acqHeader.idx.repetition = value;
-    if ( dim == DIM_Set ) acqHeader.idx.set = value;
-    if ( dim == DIM_Segment ) acqHeader.idx.segment = value;
-    if ( dim == DIM_Average ) acqHeader.idx.average = value;
+    if ( dim == DIM_Encoding1 ) acqHeader.idx.kspace_encode_step_1  = (uint16_t)value;
+    if ( dim == DIM_Slice ) acqHeader.idx.slice                     = (uint16_t)value;
+    if ( dim == DIM_Encoding2 ) acqHeader.idx.kspace_encode_step_2  = (uint16_t)value;
+    if ( dim == DIM_Contrast ) acqHeader.idx.contrast               = (uint16_t)value;
+    if ( dim == DIM_Phase ) acqHeader.idx.phase                     = (uint16_t)value;
+    if ( dim == DIM_Repetition ) acqHeader.idx.repetition           = (uint16_t)value;
+    if ( dim == DIM_Set ) acqHeader.idx.set                         = (uint16_t)value;
+    if ( dim == DIM_Segment ) acqHeader.idx.segment                 = (uint16_t)value;
+    if ( dim == DIM_Average ) acqHeader.idx.average                 = (uint16_t)value;
 
     return;
 }
@@ -2246,8 +2376,8 @@ int GtPlusAccumulatorWorkOrderTriggerGadget::close(unsigned long flags)
 
     if ( BaseClass::close(flags) != GADGET_OK ) return GADGET_FAIL;
 
-    // if ( flags!=0 && !triggered_in_close_ )
-    if ( !triggered_in_close_ )
+    if ( flags!=0 && !triggered_in_close_ )
+    // if ( !triggered_in_close_ )
     {
         triggered_in_close_ = true;
 
diff --git a/gadgets/gtPlus/GtPlusAccumulatorWorkOrderTriggerGadget.h b/gadgets/gtPlus/GtPlusAccumulatorWorkOrderTriggerGadget.h
index 31bf47f..f2529f5 100644
--- a/gadgets/gtPlus/GtPlusAccumulatorWorkOrderTriggerGadget.h
+++ b/gadgets/gtPlus/GtPlusAccumulatorWorkOrderTriggerGadget.h
@@ -9,8 +9,8 @@
 #include "GtPlusGadgetExport.h"
 #include "Gadget.h"
 #include "hoNDArray.h"
-#include "ismrmrd.h"
-#include "GadgetIsmrmrdReadWrite.h"
+#include "ismrmrd/ismrmrd.h"
+#include "ismrmrd/xml.h"
 
 #include "hoNDArray_utils.h"
 
@@ -23,8 +23,8 @@
 namespace Gadgetron
 {
 
-// [Ro E1 Cha Slice E2 Con Phase Rep Set Seg]
-//   0  1  2   3    4   5    6     7  8   9
+// [Ro E1 Cha Slice E2 Con Phase Rep Set Seg AVE]
+//   0  1  2   3    4   5    6     7  8   9  10
 
 struct ReadOutBuffer
 {
@@ -36,9 +36,10 @@ struct ReadOutBuffer
 class EXPORTGTPLUSGADGET GtPlusAccumulatorWorkOrderTriggerGadget : public Gadget2< ISMRMRD::AcquisitionHeader, hoNDArray< std::complex<float> > >
 {
 public:
-    GADGET_DECLARE(GtPlusAccumulatorGadget);
+    GADGET_DECLARE(GtPlusAccumulatorWorkOrderTriggerGadget);
 
-    typedef std::complex<float> ValueType;
+    typedef float real_value_type;
+    typedef std::complex<real_value_type> ValueType;
 
     typedef Gadget2< ISMRMRD::AcquisitionHeader, hoNDArray< ValueType > > BaseClass;
 
@@ -201,6 +202,14 @@ protected:
     // encoding matrix size (the real sampled size)
     size_t matrix_size_encoding_[3];
 
+    // maximal sampled line along E1 and E2
+    size_t max_sampled_E1_;
+    size_t max_sampled_E2_;
+
+    // index of center line along E1 and E2
+    size_t center_line_E1_;
+    size_t center_line_E2_;
+
     // encoding space size (the logic kspace size)
     size_t space_size_[3];
 
@@ -217,8 +226,12 @@ protected:
     // recon filed of view [mm]
     float field_of_view_recon_[3];
 
-    int image_counter_;
-    int image_series_;
+    // for the embedded mode
+    size_t embedded_ref_lines_E1_;
+    size_t embedded_ref_lines_E2_;
+
+    size_t image_counter_;
+    size_t image_series_;
 
     // mark the first kspace line
     bool first_kspace_scan_;
@@ -229,38 +242,56 @@ protected:
     // whether the next gadget has been triggered in process(...)
     bool triggered_in_process_;
 
+    // whether the next gadget has been triggered becasue the triggerDim1 changes meet the required number of kspace
+    // only used for triggerDim1!=DIM_NONE && triggerDim2!=DIM_NONE
+    bool triggered_in_process_by_numOfKSpace_triggerDim1_;
+
     // whether the next gadget has been triggered in process(...) for the last acquisition
     // if so, extra triggering in close(...) is not needed
     bool triggered_in_process_last_acq_;
 
-    int meas_max_ro_;
+    size_t meas_max_ro_;
     ISMRMRD::EncodingCounters meas_max_idx_;
-    int meas_max_channel_;
+    size_t meas_max_channel_;
 
     // maximal idx for reference data
     ISMRMRD::EncodingCounters meas_max_idx_ref_;
 
     // track the trigger dim1 and dim2
-    int prev_dim1_;
-    int curr_dim1_;
+    size_t prev_dim1_;
+    size_t curr_dim1_;
 
-    int prev_dim2_;
-    int curr_dim2_;
+    size_t prev_dim2_;
+    size_t curr_dim2_;
 
     // store the previous acquisition head
     ISMRMRD::AcquisitionHeader prev_acq_header_;
 
     // for trigger dim1, need to count its times
-    int count_dim1_;
+    size_t count_dim1_;
 
     // a general workorder to store the buffered data
     WorkOrderType workOrder_;
 
+    // indicator for the arrival of last acq
+    bool last_acq_arrived_;
+
+    // time stamp resolution (default, 0.0025s)
+    float timeStampResolution_;
+
+    // exporter
+    Gadgetron::gtPlus::gtPlusIOAnalyze gt_exporter_;
+
     // util for gtplus
-    Gadgetron::gtPlus::gtPlusISMRMRDReconUtil<GT_Complex8> gtPlus_util_;
+    Gadgetron::gtPlus::gtPlusISMRMRDReconUtil< std::complex<float> > gtPlus_util_;
 
     // in verbose mode, more info is printed out
     bool verboseMode_;
+
+private:
+
+    // index for the time stamp
+    std::vector<size_t> ind_time_stamp_;
 };
 
 }
diff --git a/gadgets/gtPlus/GtPlusGadgetImageArray.cpp b/gadgets/gtPlus/GtPlusGadgetImageArray.cpp
index ba884d9..ea63ca3 100644
--- a/gadgets/gtPlus/GtPlusGadgetImageArray.cpp
+++ b/gadgets/gtPlus/GtPlusGadgetImageArray.cpp
@@ -13,11 +13,11 @@ GtPlusGadgetImageExt::GtPlusGadgetImageExt() : ISMRMRD::ImageHeader()
     matrix_size[0] = 0; matrix_size[1] = 0; matrix_size[2] = 0;
     field_of_view[0] = 0; field_of_view[1] = 0; field_of_view[2] = 0;
     channels = 0;
-    memset(position, 0, sizeof(float)*ISMRMRD_POSITION_LENGTH);
-    memset(read_dir, 0, sizeof(float)*ISMRMRD_POSITION_LENGTH);
-    memset(phase_dir, 0, sizeof(float)*ISMRMRD_POSITION_LENGTH);
-    memset(slice_dir, 0, sizeof(float)*ISMRMRD_POSITION_LENGTH);
-    memset(patient_table_position, 0, sizeof(float)*ISMRMRD_POSITION_LENGTH);
+    memset(position, 0, sizeof(float)*ISMRMRD::ISMRMRD_POSITION_LENGTH);
+    memset(read_dir, 0, sizeof(float)*ISMRMRD::ISMRMRD_POSITION_LENGTH);
+    memset(phase_dir, 0, sizeof(float)*ISMRMRD::ISMRMRD_POSITION_LENGTH);
+    memset(slice_dir, 0, sizeof(float)*ISMRMRD::ISMRMRD_POSITION_LENGTH);
+    memset(patient_table_position, 0, sizeof(float)*ISMRMRD::ISMRMRD_POSITION_LENGTH);
 
     average = 0;
     slice = 0;
@@ -26,15 +26,15 @@ GtPlusGadgetImageExt::GtPlusGadgetImageExt() : ISMRMRD::ImageHeader()
     repetition = 0;
     set = 0;
     acquisition_time_stamp = 0;
-    memset(physiology_time_stamp, 0, sizeof(uint32_t)*ISMRMRD_PHYS_STAMPS);
+    memset(physiology_time_stamp, 0, sizeof(uint32_t)*ISMRMRD::ISMRMRD_PHYS_STAMPS);
 
-    image_data_type = 0;
+    data_type = 0;
     image_type = 0;
     image_index = 0;
     image_series_index = 0;
 
-    memset(user_int, 0, sizeof(int32_t)*ISMRMRD_USER_INTS);
-    memset(user_float, 0, sizeof(float)*ISMRMRD_USER_FLOATS);
+    memset(user_int, 0, sizeof(int32_t)*ISMRMRD::ISMRMRD_USER_INTS);
+    memset(user_float, 0, sizeof(float)*ISMRMRD::ISMRMRD_USER_FLOATS);
 
     time_stamps.clear();
     pmu_time_stamps.clear();
@@ -44,11 +44,11 @@ GtPlusGadgetImageExt::~GtPlusGadgetImageExt()
 {
 }
 
-void GtPlusGadgetImageExt::set_matrix_size(size_t index, ACE_UINT16 size)
+void GtPlusGadgetImageExt::set_matrix_size(size_t index, size_t size)
 {
     if (index < 3) 
     {
-        matrix_size[index] = size;
+        matrix_size[index] = (uint16_t)size;
     }
 
     if ( index == 1 )
@@ -76,11 +76,11 @@ void GtPlusGadgetImageExt::copy(GtPlusGadgetImageExt& aMessageImage)
 
     channels = aMessageImage.channels;
 
-    memcpy(position, aMessageImage.position, sizeof(float)*ISMRMRD_POSITION_LENGTH);
-    memcpy(read_dir, aMessageImage.read_dir, sizeof(float)*ISMRMRD_DIRECTION_LENGTH);
-    memcpy(phase_dir, aMessageImage.phase_dir, sizeof(float)*ISMRMRD_DIRECTION_LENGTH);
-    memcpy(slice_dir, aMessageImage.slice_dir, sizeof(float)*ISMRMRD_DIRECTION_LENGTH);
-    memcpy(patient_table_position, aMessageImage.patient_table_position, sizeof(float)*ISMRMRD_POSITION_LENGTH);
+    memcpy(position, aMessageImage.position, sizeof(float)*ISMRMRD::ISMRMRD_POSITION_LENGTH);
+    memcpy(read_dir, aMessageImage.read_dir, sizeof(float)*ISMRMRD::ISMRMRD_DIRECTION_LENGTH);
+    memcpy(phase_dir, aMessageImage.phase_dir, sizeof(float)*ISMRMRD::ISMRMRD_DIRECTION_LENGTH);
+    memcpy(slice_dir, aMessageImage.slice_dir, sizeof(float)*ISMRMRD::ISMRMRD_DIRECTION_LENGTH);
+    memcpy(patient_table_position, aMessageImage.patient_table_position, sizeof(float)*ISMRMRD::ISMRMRD_POSITION_LENGTH);
 
     average = aMessageImage.average;
     slice = aMessageImage.slice;
@@ -91,15 +91,15 @@ void GtPlusGadgetImageExt::copy(GtPlusGadgetImageExt& aMessageImage)
 
     acquisition_time_stamp = aMessageImage.acquisition_time_stamp;
 
-    memcpy(physiology_time_stamp, aMessageImage.physiology_time_stamp, sizeof(uint32_t)*ISMRMRD_PHYS_STAMPS);
+    memcpy(physiology_time_stamp, aMessageImage.physiology_time_stamp, sizeof(uint32_t)*ISMRMRD::ISMRMRD_PHYS_STAMPS);
 
-    image_data_type = aMessageImage.image_data_type;
+    data_type = aMessageImage.data_type;
     image_type = aMessageImage.image_type;
     image_index = aMessageImage.image_index;
     image_series_index = aMessageImage.image_series_index;
 
-    memcpy(user_int, aMessageImage.user_int, sizeof(int32_t)*ISMRMRD_USER_INTS);
-    memcpy(user_float, aMessageImage.user_float, sizeof(float)*ISMRMRD_USER_FLOATS);
+    memcpy(user_int, aMessageImage.user_int, sizeof(int32_t)*ISMRMRD::ISMRMRD_USER_INTS);
+    memcpy(user_float, aMessageImage.user_float, sizeof(float)*ISMRMRD::ISMRMRD_USER_FLOATS);
 
     time_stamps = aMessageImage.time_stamps;
     pmu_time_stamps = aMessageImage.pmu_time_stamps;
@@ -108,15 +108,15 @@ void GtPlusGadgetImageExt::copy(GtPlusGadgetImageExt& aMessageImage)
 void GtPlusGadgetImageExt::recomputeHeader(const GtPlusGadgetImageExt& aMessageImage, double weight)
 {
     size_t ii;
-    for ( ii=0; ii<ISMRMRD_POSITION_LENGTH; ii++ )
+    for ( ii=0; ii<ISMRMRD::ISMRMRD_POSITION_LENGTH; ii++ )
     {
-        position[ii] = (position[ii]*weight) + (1.0-weight)*aMessageImage.position[ii];
-        patient_table_position[ii] = (patient_table_position[ii]*weight) + (1.0-weight)*aMessageImage.patient_table_position[ii];
+        position[ii] = (float)((position[ii]*weight) + (1.0-weight)*aMessageImage.position[ii]);
+        patient_table_position[ii] = (float)((patient_table_position[ii]*weight) + (1.0-weight)*aMessageImage.patient_table_position[ii]);
     }
 
     acquisition_time_stamp = (uint32_t)((acquisition_time_stamp*weight) + (1.0-weight)*aMessageImage.acquisition_time_stamp + 0.5);
 
-    for ( ii=0; ii<ISMRMRD_PHYS_STAMPS; ii++ )
+    for ( ii=0; ii<ISMRMRD::ISMRMRD_PHYS_STAMPS; ii++ )
     {
         physiology_time_stamp[ii] = (uint32_t)((physiology_time_stamp[ii]*weight) + (1.0-weight)*aMessageImage.physiology_time_stamp[ii] + 0.5);
     }
@@ -137,36 +137,36 @@ void GtPlusGadgetImageExt::dump()
 
     size_t ii;
 
-    cout << "position[ISMRMRD_POSITION_LENGTH]      : ";
-    for ( ii=0; ii<ISMRMRD_POSITION_LENGTH; ii++ )
+    cout << "position[ISMRMRD::ISMRMRD_POSITION_LENGTH]      : ";
+    for ( ii=0; ii<ISMRMRD::ISMRMRD_POSITION_LENGTH; ii++ )
     {
         cout << position[ii] << " ";
     }
     cout << endl;
 
-    cout << "read_dir[ISMRMRD_POSITION_LENGTH]      : ";
-    for ( ii=0; ii<ISMRMRD_POSITION_LENGTH; ii++ )
+    cout << "read_dir[ISMRMRD::ISMRMRD_POSITION_LENGTH]      : ";
+    for ( ii=0; ii<ISMRMRD::ISMRMRD_POSITION_LENGTH; ii++ )
     {
         cout << read_dir[ii] << " ";
     }
     cout << endl;
 
-    cout << "phase_dir[ISMRMRD_POSITION_LENGTH]      : ";
-    for ( ii=0; ii<ISMRMRD_POSITION_LENGTH; ii++ )
+    cout << "phase_dir[ISMRMRD::ISMRMRD_POSITION_LENGTH]      : ";
+    for ( ii=0; ii<ISMRMRD::ISMRMRD_POSITION_LENGTH; ii++ )
     {
         cout << phase_dir[ii] << " ";
     }
     cout << endl;
 
-    cout << "slice_dir[ISMRMRD_POSITION_LENGTH]      : ";
-    for ( ii=0; ii<ISMRMRD_POSITION_LENGTH; ii++ )
+    cout << "slice_dir[ISMRMRD::ISMRMRD_POSITION_LENGTH]      : ";
+    for ( ii=0; ii<ISMRMRD::ISMRMRD_POSITION_LENGTH; ii++ )
     {
         cout << slice_dir[ii] << " ";
     }
     cout << endl;
 
-    cout << "patient_table_position[ISMRMRD_POSITION_LENGTH]      : ";
-    for ( ii=0; ii<ISMRMRD_POSITION_LENGTH; ii++ )
+    cout << "patient_table_position[ISMRMRD::ISMRMRD_POSITION_LENGTH]      : ";
+    for ( ii=0; ii<ISMRMRD::ISMRMRD_POSITION_LENGTH; ii++ )
     {
         cout << patient_table_position[ii] << " ";
     }
@@ -180,27 +180,27 @@ void GtPlusGadgetImageExt::dump()
     cout << "set                : " << set << endl;
     cout << "acquisition_time_stamp : " << acquisition_time_stamp << endl;
 
-    cout << "physiology_time_stamp[ISMRMRD_PHYS_STAMPS] : ";
-    for ( ii=0; ii<ISMRMRD_PHYS_STAMPS; ii++ )
+    cout << "physiology_time_stamp[ISMRMRD::ISMRMRD_PHYS_STAMPS] : ";
+    for ( ii=0; ii<ISMRMRD::ISMRMRD_PHYS_STAMPS; ii++ )
     {
         cout << physiology_time_stamp[ii] << " ";
     }
     cout << endl;
 
-    cout << "image_data_type    : " << image_data_type << endl;
+    cout << "data_type          : " << data_type << endl;
     cout << "image_type         : " << image_type << endl;
     cout << "image_index        : " << image_index << endl;
     cout << "image_series_index : " << image_series_index << endl;
 
-    cout << "user_int[ISMRMRD_USER_INTS]        : ";
-    for ( ii=0; ii<ISMRMRD_USER_INTS; ii++ )
+    cout << "user_int[ISMRMRD::ISMRMRD_USER_INTS]        : ";
+    for ( ii=0; ii<ISMRMRD::ISMRMRD_USER_INTS; ii++ )
     {
         cout << user_int[ii] << " ";
     }
     cout << endl;
 
-    cout << "user_float[ISMRMRD_USER_FLOATS]    : ";
-    for ( ii=0; ii<ISMRMRD_USER_FLOATS; ii++ )
+    cout << "user_float[ISMRMRD::ISMRMRD_USER_FLOATS]    : ";
+    for ( ii=0; ii<ISMRMRD::ISMRMRD_USER_FLOATS; ii++ )
     {
         cout << user_float[ii] << " ";
     }
@@ -208,18 +208,20 @@ void GtPlusGadgetImageExt::dump()
     cout << "----------------------------------------------------------" << endl;
 }
 
-// [Ro E1 Cha Slice E2 Con Phase Rep Set Seg]
-//   0  1  2   3     4  5    6     7   8   9
+// [Ro E1 Cha Slice E2 Con Phase Rep Set Seg Ave]
+//   0  1  2   3     4  5    6     7   8   9  10
 // store a scan with 10 dimensions
 
 GtPlusGadgetImageArray::GtPlusGadgetImageArray() 
 :   imageArray_(0)
 {
     size_t ii;
-    for ( ii=0; ii<10; ii++ )
+    for ( ii=0; ii<GT_DIM_NUM; ii++ )
     {
         matrix_size[ii] = 0;
     }
+
+    max_num_of_images_ = 0;
 }
 
 GtPlusGadgetImageArray::GtPlusGadgetImageArray(const GtPlusGadgetImageArray& imArray) : imageArray_(0) 
@@ -227,22 +229,24 @@ GtPlusGadgetImageArray::GtPlusGadgetImageArray(const GtPlusGadgetImageArray& imA
     this->copy(imArray);
 }
 
-GtPlusGadgetImageArray::GtPlusGadgetImageArray(int aSize[10])
+GtPlusGadgetImageArray::GtPlusGadgetImageArray(size_t aSize[GT_DIM_NUM])
 {
     try
     {
         size_t ii;
-        for ( ii=0; ii<10; ii++ )
+        for ( ii=0; ii<GT_DIM_NUM; ii++ )
         {
             matrix_size[ii] = aSize[ii];
         }
 
         size_t len = 1;
-        for ( ii=3; ii<10; ii++ )
+        for ( ii=3; ii<GT_DIM_NUM; ii++ )
         {
             len *= matrix_size[ii];
         }
 
+        max_num_of_images_ = len;
+
         if ( len > 0 )
         {
             imageArray_ = new GtPlusGadgetImageExt[len];
@@ -262,18 +266,18 @@ GtPlusGadgetImageArray::~GtPlusGadgetImageArray()
     }
 }
 
-void GtPlusGadgetImageArray::resize(int aSize[10])
+void GtPlusGadgetImageArray::resize(size_t aSize[GT_DIM_NUM])
 {
     try
     {
         size_t ii;
-        for ( ii=0; ii<10; ii++ )
+        for ( ii=0; ii<GT_DIM_NUM; ii++ )
         {
             matrix_size[ii] = aSize[ii];
         }
 
         size_t len = 1;
-        for ( ii=3; ii<10; ii++ )
+        for ( ii=3; ii<GT_DIM_NUM; ii++ )
         {
             len *= matrix_size[ii];
         }
@@ -284,6 +288,8 @@ void GtPlusGadgetImageArray::resize(int aSize[10])
             imageArray_ = NULL;
         }
 
+        max_num_of_images_ = len;
+
         if ( len > 0 )
         {
             imageArray_ = new GtPlusGadgetImageExt[len];
@@ -300,19 +306,22 @@ bool GtPlusGadgetImageArray::copy(const GtPlusGadgetImageArray& imageArray)
     try
     {
         if (imageArray_) delete [] imageArray_;
+        max_num_of_images_ = 0;
 
         size_t ii;
-        for ( ii=0; ii<10; ii++ )
+        for ( ii=0; ii<GT_DIM_NUM; ii++ )
         {
             matrix_size[ii] = imageArray.matrix_size[ii];
         }
 
         size_t len = 1;
-        for ( ii=3; ii<10; ii++ )
+        for ( ii=3; ii<GT_DIM_NUM; ii++ )
         {
             len *= matrix_size[ii];
         }
 
+        max_num_of_images_ = len;
+
         if ( len > 0 )
         {
             imageArray_ = new GtPlusGadgetImageExt[len];
@@ -332,20 +341,21 @@ bool GtPlusGadgetImageArray::copy(const GtPlusGadgetImageArray& imageArray)
     return true;
 }
 
-int GtPlusGadgetImageArray::get_offset(int slc, int e2, int con, int phs, int rep, int set, int seg)
+size_t GtPlusGadgetImageArray::get_offset(size_t slc, size_t e2, size_t con, size_t phs, size_t rep, size_t set, size_t seg, size_t ave)
 {
-    int offset = seg*matrix_size[8]*matrix_size[7]*matrix_size[6]*matrix_size[5]*matrix_size[4]*matrix_size[3]
+    size_t offset = ave  *matrix_size[9]*matrix_size[8]*matrix_size[7]*matrix_size[6]*matrix_size[5]*matrix_size[4]*matrix_size[3]
+                    + seg*matrix_size[8]*matrix_size[7]*matrix_size[6]*matrix_size[5]*matrix_size[4]*matrix_size[3]
                     + set*matrix_size[7]*matrix_size[6]*matrix_size[5]*matrix_size[4]*matrix_size[3]
                     + rep*matrix_size[6]*matrix_size[5]*matrix_size[4]*matrix_size[3]
                     + phs*matrix_size[5]*matrix_size[4]*matrix_size[3]
                     + con*matrix_size[4]*matrix_size[3]
-                    + e2*matrix_size[3]
+                    + e2 *matrix_size[3]
                     + slc;
     return offset;
 }
 
 // Slice E2 Con Phase Rep Set Seg
-void GtPlusGadgetImageArray::findDimIndex(Gadgetron::gtPlus::ISMRMRDDIM& dim, int& ind)
+void GtPlusGadgetImageArray::findDimIndex(Gadgetron::gtPlus::ISMRMRDDIM& dim, size_t& ind)
 {
     switch (dim)
     {
@@ -377,6 +387,10 @@ void GtPlusGadgetImageArray::findDimIndex(Gadgetron::gtPlus::ISMRMRDDIM& dim, in
             ind = 9;
         break;
 
+        case Gadgetron::gtPlus::DIM_Average:
+            ind = 10;
+        break;
+
         default:
             ind = 0;
     }
@@ -389,15 +403,15 @@ extractGadgetImageArrayEqual(Gadgetron::gtPlus::ISMRMRDDIM& dim, size_t value, G
 {
     try
     {
-        int dimInd;
+        size_t dimInd;
         findDimIndex(dim, dimInd);
 
         GADGET_DEBUG_CHECK_RETURN_FALSE( value >= matrix_size[dimInd] );
 
-        size_t startInd[7];
-        size_t endInd[7];
+        size_t startInd[GT_DIM_NUM-3];
+        size_t endInd[GT_DIM_NUM-3];
 
-        for ( int d=Gadgetron::gtPlus::DIM_Slice; d<=Gadgetron::gtPlus::DIM_Segment; d++ )
+        for ( size_t d=Gadgetron::gtPlus::DIM_Slice; d<=Gadgetron::gtPlus::DIM_Average; d++ )
         {
             if ( d == dim )
             {
@@ -427,19 +441,19 @@ extractGadgetImageArrayEqual(Gadgetron::gtPlus::ISMRMRDDIM& dim1, size_t value1,
 {
     try
     {
-        int dimInd1;
+        size_t dimInd1;
         findDimIndex(dim1, dimInd1);
         GADGET_DEBUG_CHECK_RETURN_FALSE( value1 >= matrix_size[dimInd1] );
 
 
-        int dimInd2;
+        size_t dimInd2;
         findDimIndex(dim2, dimInd2);
         GADGET_DEBUG_CHECK_RETURN_FALSE( value2 >= matrix_size[dimInd2] );
 
-        size_t startInd[7];
-        size_t endInd[7];
+        size_t startInd[GT_DIM_NUM-3];
+        size_t endInd[GT_DIM_NUM-3];
 
-        for ( int d=Gadgetron::gtPlus::DIM_Slice; d<=Gadgetron::gtPlus::DIM_Segment; d++ )
+        for ( size_t d=Gadgetron::gtPlus::DIM_Slice; d<=Gadgetron::gtPlus::DIM_Average; d++ )
         {
             if ( d == dim1 )
             {
@@ -474,14 +488,14 @@ extractGadgetImageArrayLessEqual(Gadgetron::gtPlus::ISMRMRDDIM& dim, size_t valu
 {
     try
     {
-        int dimInd;
+        size_t dimInd;
         findDimIndex(dim, dimInd);
         GADGET_DEBUG_CHECK_RETURN_FALSE( value >= matrix_size[dimInd] );
 
-        size_t startInd[7];
-        size_t endInd[7];
+        size_t startInd[GT_DIM_NUM-3];
+        size_t endInd[GT_DIM_NUM-3];
 
-        for ( int d=Gadgetron::gtPlus::DIM_Slice; d<=Gadgetron::gtPlus::DIM_Segment; d++ )
+        for ( size_t d=Gadgetron::gtPlus::DIM_Slice; d<=Gadgetron::gtPlus::DIM_Average; d++ )
         {
             if ( d == dim )
             {
@@ -512,19 +526,19 @@ extractGadgetImageArray_Dim1LessEqual_Dim2Equal(Gadgetron::gtPlus::ISMRMRDDIM& d
 {
     try
     {
-        int dimInd1;
+        size_t dimInd1;
         findDimIndex(dim1, dimInd1);
 
-        int dimInd2;
+        size_t dimInd2;
         findDimIndex(dim2, dimInd2);
 
         GADGET_DEBUG_CHECK_RETURN_FALSE( value1 >= matrix_size[dimInd1] );
         GADGET_DEBUG_CHECK_RETURN_FALSE( value2 >= matrix_size[dimInd2] );
 
-        size_t startInd[7];
-        size_t endInd[7];
+        size_t startInd[GT_DIM_NUM];
+        size_t endInd[GT_DIM_NUM];
 
-        for ( int d=Gadgetron::gtPlus::DIM_Slice; d<=Gadgetron::gtPlus::DIM_Segment; d++ )
+        for ( size_t d=Gadgetron::gtPlus::DIM_Slice; d<=Gadgetron::gtPlus::DIM_Average; d++ )
         {
             if ( d == dim1 )
             {
@@ -559,39 +573,42 @@ getSubImageArray(size_t* startInd, size_t* endInd, GtPlusGadgetImageArray& image
 {
     try
     {
-        int aSize[10];
+        size_t aSize[GT_DIM_NUM];
         aSize[0] = matrix_size[0];
         aSize[1] = matrix_size[1];
         aSize[2] = matrix_size[2];
 
         size_t ii;
-        for ( ii=3; ii<10; ii++ )
+        for ( ii=3; ii<GT_DIM_NUM; ii++ )
         {
             aSize[ii] = endInd[ii-3]-startInd[ii-3];
         }
 
         imageArray.resize(aSize);
 
-        size_t slc, e2, con, phs, rep, set, seg;
+        size_t slc, e2, con, phs, rep, set, seg, ave;
 
-        for ( seg=startInd[6]; seg<endInd[6]; seg++ )
+        for ( ave=startInd[7]; ave<endInd[7]; ave++ )
         {
-            for ( set=startInd[5]; set<endInd[5]; set++ )
+            for ( seg=startInd[6]; seg<endInd[6]; seg++ )
             {
-                for ( rep=startInd[4]; rep<endInd[4]; rep++ )
+                for ( set=startInd[5]; set<endInd[5]; set++ )
                 {
-                    for ( phs=startInd[3]; phs<endInd[3]; phs++ )
+                    for ( rep=startInd[4]; rep<endInd[4]; rep++ )
                     {
-                        for ( con=startInd[2]; con<endInd[2]; con++ )
+                        for ( phs=startInd[3]; phs<endInd[3]; phs++ )
                         {
-                            for ( e2=startInd[1]; e2<endInd[1]; e2++ )
+                            for ( con=startInd[2]; con<endInd[2]; con++ )
                             {
-                                for ( slc=startInd[0]; slc<endInd[0]; slc++ )
+                                for ( e2=startInd[1]; e2<endInd[1]; e2++ )
                                 {
-                                    int offset = this->get_offset(slc, e2, con, phs, rep, set, seg);
-                                    int offsetDst= imageArray.get_offset(slc-startInd[0], e2-startInd[1], con-startInd[2], phs-startInd[3], rep-startInd[4], set-startInd[5], seg-startInd[6]);
+                                    for ( slc=startInd[0]; slc<endInd[0]; slc++ )
+                                    {
+                                        size_t offset = this->get_offset(slc, e2, con, phs, rep, set, seg, ave);
+                                        size_t offsetDst= imageArray.get_offset(slc-startInd[0], e2-startInd[1], con-startInd[2], phs-startInd[3], rep-startInd[4], set-startInd[5], seg-startInd[6], ave-startInd[7]);
 
-                                    imageArray.imageArray_[offsetDst] = imageArray_[offset];
+                                        imageArray.imageArray_[offsetDst] = imageArray_[offset];
+                                    }
                                 }
                             }
                         }
@@ -615,7 +632,7 @@ void GtPlusGadgetImageArray::dump()
     std::cout << "GtPlusGadgetImageArray" << std::endl;
     std::cout << "==========================================================" << std::endl;
     std::cout << "matrix_size           : ";
-    for ( ii=0; ii<10; ii++ )
+    for ( ii=0; ii<GT_DIM_NUM; ii++ )
     {
         std::cout << matrix_size[ii] << " ";
     }
@@ -623,32 +640,37 @@ void GtPlusGadgetImageArray::dump()
     std::cout << "----------------------------------------------------------" << std::endl;
     if ( imageArray_ )
     {
-        int slc, e2, con, phs, rep, set, seg;
-        for ( seg=0; seg<matrix_size[9]; seg++ )
+        int slc, e2, con, phs, rep, set, seg, ave;
+
+        for ( ave=0; ave<matrix_size[10]; ave++ )
         {
-            for ( set=0; set<matrix_size[8]; set++ )
+            for ( seg=0; seg<matrix_size[9]; seg++ )
             {
-                for ( rep=0; rep<matrix_size[7]; rep++ )
+                for ( set=0; set<matrix_size[8]; set++ )
                 {
-                    for ( phs=0; phs<matrix_size[6]; phs++ )
+                    for ( rep=0; rep<matrix_size[7]; rep++ )
                     {
-                        for ( con=0; con<matrix_size[5]; con++ )
+                        for ( phs=0; phs<matrix_size[6]; phs++ )
                         {
-                            for ( e2=0; e2<matrix_size[4]; e2++ )
+                            for ( con=0; con<matrix_size[5]; con++ )
                             {
-                                for ( slc=0; slc<matrix_size[3]; slc++ )
+                                for ( e2=0; e2<matrix_size[4]; e2++ )
                                 {
-                                    int offset = get_offset(slc, e2, con, phs, rep, set, seg);
-                                    std::cout << "[Slice E2 Contrast Phase Rep Set Seg] = [" 
-                                                << " " << slc 
-                                                << " " << e2 
-                                                << " " << con 
-                                                << " " << phs 
-                                                << " " << rep 
-                                                << " " << set 
-                                                << " " << seg << "]" << std::endl;
-
-                                    imageArray_[offset].dump();
+                                    for ( slc=0; slc<matrix_size[3]; slc++ )
+                                    {
+                                        size_t offset = get_offset(slc, e2, con, phs, rep, set, seg, ave);
+                                        std::cout << "[Slice E2 Contrast Phase Rep Set Seg Ave] = [" 
+                                                    << " " << slc 
+                                                    << " " << e2 
+                                                    << " " << con 
+                                                    << " " << phs 
+                                                    << " " << rep 
+                                                    << " " << set 
+                                                    << " " << seg 
+                                                    << " " << ave << "]" << std::endl;
+
+                                        imageArray_[offset].dump();
+                                    }
                                 }
                             }
                         }
@@ -656,7 +678,6 @@ void GtPlusGadgetImageArray::dump()
                 }
             }
         }
-
     }
     std::cout << "==========================================================" << std::endl;
 }
diff --git a/gadgets/gtPlus/GtPlusGadgetImageArray.h b/gadgets/gtPlus/GtPlusGadgetImageArray.h
index 9513e28..8d855a6 100644
--- a/gadgets/gtPlus/GtPlusGadgetImageArray.h
+++ b/gadgets/gtPlus/GtPlusGadgetImageArray.h
@@ -9,7 +9,7 @@
 #include "GtPlusGadgetExport.h"
 #include "Gadget.h"
 #include "hoNDArray.h"
-#include "ismrmrd.h"
+#include "ismrmrd/ismrmrd.h"
 #include "GadgetIsmrmrdReadWrite.h"
 
 #include "gtPlusIOAnalyze.h"
@@ -32,7 +32,7 @@ struct  EXPORTGTPLUSGADGET GtPlusGadgetImageExt : public ISMRMRD::ImageHeader
     ~GtPlusGadgetImageExt();
 
     void copy(GtPlusGadgetImageExt& aMessageImage);
-    void set_matrix_size(size_t index, ACE_UINT16 size);
+    void set_matrix_size(size_t index, size_t size);
 
     // interpolation is performed
     // this = weight * this + (1-weight)*aMessageImage
@@ -40,27 +40,31 @@ struct  EXPORTGTPLUSGADGET GtPlusGadgetImageExt : public ISMRMRD::ImageHeader
     void dump();
 }; 
 
-// [Ro E1 Cha Slice E2 Con Phase Rep Set Seg]
-//  0  1  2   3     4  5   6     7   8   9
-// store a scan with 10 dimensions
+// [Ro E1 Cha Slice E2 Con Phase Rep Set Seg Ave]
+//  0  1  2   3     4  5   6     7   8   9   10
+// store a scan with 11 dimensions
+#define GT_DIM_NUM 11
+
 struct  EXPORTGTPLUSGADGET GtPlusGadgetImageArray
 {
     // size of the image array
-    ACE_UINT16 matrix_size[10];
+    size_t matrix_size[GT_DIM_NUM];
+
+    size_t max_num_of_images_;
 
     // message information for every 2D image [RO E1 Cha Slice E2 Contrast Phase Rep Set Seg]
     GtPlusGadgetImageExt* imageArray_;
 
     GtPlusGadgetImageArray();
     GtPlusGadgetImageArray(const GtPlusGadgetImageArray& imArray);
-    GtPlusGadgetImageArray(int aSize[10]);
+    GtPlusGadgetImageArray(size_t aSize[GT_DIM_NUM]);
     ~GtPlusGadgetImageArray();
 
-    void findDimIndex(Gadgetron::gtPlus::ISMRMRDDIM& dim, int& ind);
+    void findDimIndex(Gadgetron::gtPlus::ISMRMRDDIM& dim, size_t& ind);
     bool getSubImageArray(size_t* startInd, size_t* endInd, GtPlusGadgetImageArray& imageArray);
-    void resize(int aSize[10]);
+    void resize(size_t aSize[GT_DIM_NUM]);
     bool copy(const GtPlusGadgetImageArray& imageArray);
-    int get_offset(int slc, int e2, int con, int phs, int rep, int set, int seg);
+    size_t get_offset(size_t slc, size_t e2, size_t con, size_t phs, size_t rep, size_t set, size_t seg, size_t ave);
     bool extractGadgetImageArrayEqual(Gadgetron::gtPlus::ISMRMRDDIM& dim, size_t value, GtPlusGadgetImageArray& imageArray);
     bool extractGadgetImageArrayEqual(Gadgetron::gtPlus::ISMRMRDDIM& dim1, size_t value1, Gadgetron::gtPlus::ISMRMRDDIM& dim2, size_t value2, GtPlusGadgetImageArray& imageArray);
     bool extractGadgetImageArrayLessEqual(Gadgetron::gtPlus::ISMRMRDDIM& dim, size_t value, GtPlusGadgetImageArray& imageArray);
diff --git a/gadgets/gtPlus/GtPlusGadgetOpenMP.cpp b/gadgets/gtPlus/GtPlusGadgetOpenMP.cpp
index 5c0a2f1..6040454 100644
--- a/gadgets/gtPlus/GtPlusGadgetOpenMP.cpp
+++ b/gadgets/gtPlus/GtPlusGadgetOpenMP.cpp
@@ -32,7 +32,7 @@ bool prepOpenMP()
             omp_set_num_threads(numOpenMPProcs);
         }
 
-        omp_set_nested(1);
+        // omp_set_nested(1);
         int allowOpenMPNested = omp_get_nested();
         GADGET_MSG("GtPlusRecon, allowOpenMPNested : " << allowOpenMPNested);
 
@@ -70,34 +70,4 @@ bool prepOpenMP()
 
 #endif // USE_OMP
 
-#ifdef USE_MKL
-
-bool prepMKL()
-{
-    try
-    {
-        GADGET_MSG("--> MKL info <--");
-        GADGET_MSG("--------------------------------------------------------");
-        MKL_INT oldmode = vmlSetMode( VML_EP );
-        GADGET_MSG("GtPlus, set MKL vml precision to EP ... ");
-        GADGET_MSG("--------------------------------------------------------");
-    }
-    catch(...)
-    {
-        GADGET_ERROR_MSG("Errors in GtPlus prepMKL() ... ");
-        return false;
-    }
-
-    return true;
-}
-
-#else
-
-bool prepMKL()
-{
-    return true;
-}
-
-#endif // USE_MKL
-
 }
diff --git a/gadgets/gtPlus/GtPlusGadgetOpenMP.h b/gadgets/gtPlus/GtPlusGadgetOpenMP.h
index b63d796..81c4338 100644
--- a/gadgets/gtPlus/GtPlusGadgetOpenMP.h
+++ b/gadgets/gtPlus/GtPlusGadgetOpenMP.h
@@ -1,5 +1,5 @@
 /** \file   GtPlusGadgetOpenMP.h
-    \brief  Pack up the OpenMP and MKL support in the GtPlus
+    \brief  Pack up the OpenMP support in the GtPlus
     \author Hui Xue
 */
 
@@ -9,7 +9,7 @@
 #include "GtPlusGadgetExport.h"
 #include "Gadget.h"
 #include "hoNDArray.h"
-#include "ismrmrd.h"
+#include "ismrmrd/ismrmrd.h"
 #include "GadgetIsmrmrdReadWrite.h"
 #include "GadgetronTimer.h"
 #include "gtPlusISMRMRDReconUtil.h"
@@ -22,6 +22,5 @@ namespace Gadgetron
 {
 
 bool EXPORTGTPLUSGADGET prepOpenMP();
-bool EXPORTGTPLUSGADGET prepMKL();
 
 }
diff --git a/gadgets/gtPlus/GtPlusImageReconGadget.cpp b/gadgets/gtPlus/GtPlusImageReconGadget.cpp
new file mode 100644
index 0000000..50ed0d0
--- /dev/null
+++ b/gadgets/gtPlus/GtPlusImageReconGadget.cpp
@@ -0,0 +1,711 @@
+
+#include "GtPlusImageReconGadget.h"
+#include "GtPlusGadgetOpenMP.h"
+#include <iomanip>
+
+using namespace Gadgetron::gtPlus;
+
+namespace Gadgetron
+{
+
+    GtPlusImageReconGadget::GtPlusImageReconGadget()
+    {
+        image_series_num_ = 100;
+
+        debugFolder_ = "DebugOutput";
+
+        performTiming_ = true;
+
+        verboseMode_ = false;
+
+        gt_timer1_.set_timing_in_destruction(false);
+        gt_timer2_.set_timing_in_destruction(false);
+        gt_timer3_.set_timing_in_destruction(false);
+
+        Gadgetron::prepOpenMP();
+    }
+
+    GtPlusImageReconGadget::~GtPlusImageReconGadget()
+    {
+
+    }
+
+    bool GtPlusImageReconGadget::readParameters()
+    {
+        try
+        {
+            GADGET_CONDITION_MSG(verboseMode_, "------> GtPlusImageReconGadget parameters <------");
+
+            verboseMode_ = this->get_bool_value("verboseMode");
+            GADGET_CONDITION_MSG(verboseMode_, "verboseMode_ is " << verboseMode_);
+
+            GADGET_CONDITION_MSG(verboseMode_, "-----------------------------------------------");
+
+            boost::shared_ptr<std::string> str = this->get_string_value("debugFolder");
+            debugFolder_ = *str;
+            GADGET_CONDITION_MSG(verboseMode_, "debugFolder_ is " << debugFolder_);
+
+            if ( !debugFolder_.empty() )
+            {
+                Gadgetron::getDebugFolderPath(debugFolder_, debugFolder_fullPath_, verboseMode_);
+            }
+            else
+            {
+                GADGET_MSG("GtPlusImageRecon, debugFolder is not set ...");
+            }
+
+            performTiming_ = this->get_bool_value("performTiming");
+            GADGET_CONDITION_MSG(verboseMode_, "performTiming_ is " << performTiming_);
+
+            GADGET_CONDITION_MSG(verboseMode_, "-----------------------------------------------");
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in GtPlusImageReconGadget::readParameters() ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    int GtPlusImageReconGadget::process_config(ACE_Message_Block* mb)
+    {
+        // read in parameters from the xml
+        GADGET_CHECK_RETURN(this->readParameters(), GADGET_FAIL);
+
+        ISMRMRD::IsmrmrdHeader h;
+        try {
+          deserialize(mb->rd_ptr(),h);
+        } catch (...) {
+          GADGET_DEBUG1("Error parsing ISMRMRD Header");
+          throw;
+          return GADGET_FAIL;
+        }
+
+        // seq object
+        if (h.encoding.size() != 1)
+        {
+            GADGET_DEBUG2("Number of encoding spaces: %d\n", h.encoding.size());
+            GADGET_DEBUG1("This simple GtPlusAccumulatorImageTriggerGadget only supports one encoding space\n");
+            return GADGET_FAIL;
+        }
+
+        GADGET_CHECK_RETURN(findEncodingLimits(h, meas_max_idx_, verboseMode_), GADGET_FAIL);
+
+        return GADGET_OK;
+    }
+
+    int GtPlusImageReconGadget::process(GadgetContainerMessage<ImageBufferType>* m1)
+    {
+        GADGET_CONDITION_MSG(verboseMode_, "GtPlusImageReconGadget::process(...) starts ... ");
+
+        std::vector<std::string> processStr;
+        std::vector<std::string> dataRole;
+
+        ImageBufferType& ori = *m1->getObjectPtr();
+
+        if ( ori.get_number_of_elements() == 1 )
+        {
+            size_t num = (*ori(0)).attrib_.length(GTPLUS_DATA_ROLE);
+            GADGET_CHECK_RETURN(num>0, GADGET_FAIL);
+
+            dataRole.resize(num);
+
+            for ( size_t ii=0; ii<num; ii++ )
+            {
+                dataRole[ii] = std::string( (*ori(0)).attrib_.as_str(GTPLUS_DATA_ROLE, ii) );
+            }
+
+            if ( (dataRole[0] == GTPLUS_IMAGE_GFACTOR) 
+                || (dataRole[0] == GTPLUS_IMAGE_SNR_MAP) 
+                || (dataRole[0] == GTPLUS_IMAGE_STD_MAP) 
+                || (dataRole[0] == GTPLUS_IMAGE_WRAPAROUNDMAP) )
+            {
+                GADGET_CHECK_RETURN(this->sendOutImages(ori, image_series_num_++, processStr, dataRole), GADGET_FAIL);
+                GADGET_CHECK_RETURN(this->releaseImageBuffer(ori), GADGET_FAIL);
+                return GADGET_OK;
+            }
+        }
+
+        this->processImageBuffer(ori);
+
+        this->releaseImageBuffer(ori);
+
+        m1->release();
+
+        return GADGET_OK;
+    }
+
+    int GtPlusImageReconGadget::processImageBuffer(ImageBufferType& ori)
+    {
+        std::vector<std::string> processStr;
+        std::vector<std::string> dataRole;
+
+        boost::shared_ptr< std::vector<size_t> > dims = ori.get_dimensions();
+        GADGET_CONDITION_MSG(verboseMode_, "[Cha Slice E2 Con Phase Rep Set Ave] = [" << (*dims)[0] << " " << (*dims)[1] << " " << (*dims)[2] << " " 
+            << (*dims)[3] << " " << (*dims)[4]  << " " << (*dims)[5] << " " 
+            << (*dims)[6] << " " << (*dims)[7] << "]");
+
+        this->sendOutImages(ori, image_series_num_++, processStr, dataRole);
+
+        return GADGET_OK;
+    }
+
+    bool GtPlusImageReconGadget::fillWithNULL(ImageBufferType& buf)
+    {
+        try
+        {
+            size_t N = buf.get_number_of_elements();
+            size_t ii;
+            for ( ii=0; ii<N; ii++ )
+            {
+                buf(ii) = NULL;
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in GtPlusImageReconGadget::fillWithNULL(ImageBufferType& buf) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    bool GtPlusImageReconGadget::releaseImageBuffer(ImageBufferType& buf)
+    {
+        try
+        {
+            size_t N = buf.get_number_of_elements();
+            size_t ii;
+            for ( ii=0; ii<N; ii++ )
+            {
+                ImageType* pImage = buf(ii);
+                if ( pImage != NULL )
+                {
+                    delete pImage;
+                    buf(ii) = NULL;
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in GtPlusImageReconGadget::releaseImageBuffer(ImageBufferType& buf) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    hoNDImage<std::complex<float>, 3>* GtPlusImageReconGadget::getImage3DFromImage2D(ImageBufferType& ori, size_t cha, size_t slc, size_t con, size_t phs, size_t rep, size_t set, size_t ave)
+    {
+        ImageType* pImage2D = ori(cha, slc, 0, con, phs, rep, set, ave);
+        GADGET_CHECK_THROW(pImage2D!=NULL);
+
+        size_t RO = pImage2D->get_size(0);
+        size_t E1 = pImage2D->get_size(1);
+        size_t E2 = ori.get_size(2);
+
+        Image3DType* pImage3D = new Image3DType(RO, E1, E2);
+        GADGET_CHECK_THROW(pImage3D!=NULL);
+
+        pImage3D->attrib_ = pImage2D->attrib_;
+
+        size_t e2;
+        for ( e2=0; e2<E2; e2++ )
+        {
+            pImage2D = ori(cha, slc, e2, con, phs, rep, set, ave);
+            GADGET_CHECK_THROW(pImage2D!=NULL);
+
+            memcpy(pImage3D->begin()+e2*RO*E1, pImage2D->begin(), sizeof(ValueType)*RO*E1 );
+        }
+
+        return pImage3D;
+    }
+
+    bool GtPlusImageReconGadget::getImage2DFromImage3D(Image3DType& image3D, ImageBufferType& image2DBuf)
+    {
+        size_t RO = image3D.get_size(0);
+        size_t E1 = image3D.get_size(1);
+        size_t E2 = image3D.get_size(2);
+
+        std::vector<size_t> dim(1);
+        dim[0] = E2;
+        image2DBuf.create(dim);
+
+        size_t e2;
+        for ( e2=0; e2<E2; e2++ )
+        {
+            ImageType* pImage2D = new ImageType(RO, E1);
+            GADGET_CHECK_RETURN_FALSE(pImage2D!=NULL);
+
+            memcpy(pImage2D->begin(), image3D.begin()+e2*RO*E1, sizeof(ValueType)*RO*E1);
+
+            image2DBuf(e2) = pImage2D;
+        }
+
+        return true;
+    }
+
+    size_t GtPlusImageReconGadget::computeSeriesImageNumber (ISMRMRD::ImageHeader& imheader, size_t nCHA, size_t cha, size_t nE2, size_t e2)
+    {
+        size_t nSET = meas_max_idx_.set+1;
+        size_t nREP = meas_max_idx_.repetition+1;
+        size_t nPHS = meas_max_idx_.phase+1;
+        size_t nSLC = meas_max_idx_.slice+1;
+        size_t nCON = meas_max_idx_.contrast+1;
+        if ( nE2 == 0 ) nE2 = 1;
+
+        size_t imageNum = imheader.average*nREP*nSET*nPHS*nCON*nSLC*nE2*nCHA 
+            + imheader.repetition*nSET*nPHS*nCON*nSLC*nE2*nCHA 
+            + imheader.set*nPHS*nCON*nSLC*nE2*nCHA 
+            + imheader.phase*nCON*nSLC*nE2*nCHA 
+            + imheader.contrast*nSLC*nE2*nCHA
+            + imheader.slice*nE2*nCHA 
+            + e2*nCHA 
+            + cha 
+            + 1;
+
+        return imageNum;
+    }
+
+    bool GtPlusImageReconGadget::sendOutImages(ImageBufferType& images, int seriesNum, const std::vector<std::string>& processStr, const std::vector<std::string>& dataRole, const std::vector<float>& windowCenter, const std::vector<float>& windowWidth)
+    {
+        try
+        {
+            size_t CHA = images.get_size(0);
+            size_t SLC = images.get_size(1);
+            size_t E2  = images.get_size(2);
+            size_t CON = images.get_size(3);
+            size_t PHS = images.get_size(4);
+            size_t REP = images.get_size(5);
+            size_t SET = images.get_size(6);
+            size_t AVE = images.get_size(7);
+
+            GADGET_CONDITION_MSG(verboseMode_, "--> GtPlusImageReconGadget, sending out images, array boundary [CHA SLC E2 CON PHS REP SET AVE] = [" 
+                << CHA << " " << SLC << " " 
+                << E2 << " " << CON << " " 
+                << PHS << " " << REP << " " 
+                << SET << " " << AVE << "] " );
+
+            size_t ave(0), set(0), rep(0), phs(0), con(0), e2(0), slc(0), cha(0);
+            std::vector<size_t> dim2D(2);
+
+            for ( ave=0; ave<AVE; ave++ )
+            {
+                for ( set=0; set<SET; set++ )
+                {
+                    for ( rep=0; rep<REP; rep++ )
+                    {
+                        for ( phs=0; phs<PHS; phs++ )
+                        {
+                            for ( con=0; con<CON; con++ )
+                            {
+                                for ( e2=0; e2<E2; e2++ )
+                                {
+                                    for ( slc=0; slc<SLC; slc++ )
+                                    {
+                                        for ( cha=0; cha<CHA; cha++ )
+                                        {
+                                            ImageType* pImage = images(cha, slc, e2, con, phs, rep, set, ave);
+                                            if ( pImage != NULL )
+                                            {
+                                                Gadgetron::GadgetContainerMessage<ISMRMRD::ImageHeader>* cm1 = new Gadgetron::GadgetContainerMessage<ISMRMRD::ImageHeader>();
+                                                Gadgetron::GadgetContainerMessage<ImgArrayType>* cm2 = new Gadgetron::GadgetContainerMessage<ImgArrayType>();
+                                                Gadgetron::GadgetContainerMessage<ISMRMRD::MetaContainer>* cm3 = new Gadgetron::GadgetContainerMessage<ISMRMRD::MetaContainer>();
+
+                                                try
+                                                {
+                                                    cm1->cont(cm2);
+                                                    cm2->cont(cm3);
+
+                                                    // set the ISMRMRD image header
+                                                    GADGET_CHECK_THROW( gtPlus_util_.setImageHeaderISMRMRDFromMetaAttributes(pImage->attrib_, *cm1->getObjectPtr()) );
+
+                                                    //long long imageNum(0);
+                                                    //if ( pImage->attrib_.attributeInteger_.get(GTPLUS_IMAGENUMBER, 0, imageNum) )
+                                                    //{
+                                                    //    cm1->getObjectPtr()->image_index = (uint16_t)imageNum;
+                                                    //}
+
+                                                    long long imageNum = this->computeSeriesImageNumber (*cm1->getObjectPtr(), CHA, cha, E2, e2);
+                                                    cm1->getObjectPtr()->image_index = (uint16_t)imageNum;
+                                                    pImage->attrib_.set(GTPLUS_IMAGENUMBER, (long)imageNum);
+
+                                                    cm1->getObjectPtr()->image_series_index = seriesNum;
+
+                                                    // set the image data
+                                                    size_t RO = pImage->get_size(0);
+                                                    size_t E1 = pImage->get_size(1);
+
+                                                    dim2D[0] = RO;
+                                                    dim2D[1] = E1;
+
+                                                    cm2->getObjectPtr()->create(dim2D);
+                                                    memcpy(cm2->getObjectPtr()->get_data_ptr(), pImage->get_data_ptr(), pImage->get_number_of_bytes());
+
+                                                    // set the attributes
+                                                    *cm3->getObjectPtr() = pImage->attrib_;
+
+                                                    if ( !dataRole.empty() && (dataRole[0]!=GTPLUS_IMAGE_REGULAR) )
+                                                    {
+                                                        std::string str;
+
+                                                        // data role
+                                                        bool isRealImage = false;
+                                                        bool isParametricMap = false;
+                                                        bool isParametricT1Map = false;
+                                                        bool isParametricT1SDMap = false;
+                                                        bool isParametricT2Map = false;
+                                                        bool isParametricT2SDMap = false;
+                                                        bool isParametricT2StarMap = false;
+                                                        bool isParametricT2StarMaskMap = false;
+                                                        bool isParametricT2StarSDMap = false;
+                                                        bool isParametricT2StarAMap = false;
+                                                        bool isParametricT2StarTruncMap = false;
+
+                                                        if ( !dataRole.empty() )
+                                                        {
+                                                            size_t n;
+                                                            for ( n=0; n<dataRole.size(); n++ )
+                                                            {
+                                                                if ( dataRole[n] == GTPLUS_IMAGE_PSIR )
+                                                                {
+                                                                    isRealImage = true;
+                                                                }
+
+                                                                if ( (dataRole[n]==GTPLUS_IMAGE_T1MAP) 
+                                                                    || (dataRole[n]==GTPLUS_IMAGE_T1SDMAP)
+                                                                    || (dataRole[n]==GTPLUS_IMAGE_T2MAP)
+                                                                    || (dataRole[n]==GTPLUS_IMAGE_T2SDMAP)
+                                                                    || (dataRole[n]==GTPLUS_IMAGE_T2STARMAP)
+                                                                    || (dataRole[n]==GTPLUS_IMAGE_T2STARMASKMAP)
+                                                                    || (dataRole[n]==GTPLUS_IMAGE_T2STARSDMAP)
+                                                                    || (dataRole[n]==GTPLUS_IMAGE_T2STARAMAP)
+                                                                    || (dataRole[n]==GTPLUS_IMAGE_T2STARTRUNCMAP)
+                                                                    || (dataRole[n]==GTPLUS_IMAGE_FREQMAP)
+                                                                    || (dataRole[n]==GTPLUS_IMAGE_B1MAP)
+                                                                    || (dataRole[n]==GTPLUS_IMAGE_FLIPANGLEMAP) )
+                                                                {
+                                                                    isParametricMap = true;
+                                                                }
+
+                                                                if ( dataRole[n]==GTPLUS_IMAGE_T1MAP )
+                                                                {
+                                                                    isParametricT1Map = true;
+                                                                }
+
+                                                                if ( dataRole[n]==GTPLUS_IMAGE_T1SDMAP )
+                                                                {
+                                                                    isParametricT1SDMap = true;
+                                                                }
+
+                                                                if ( dataRole[n]==GTPLUS_IMAGE_T2MAP )
+                                                                {
+                                                                    isParametricT2Map = true;
+                                                                }
+
+                                                                if ( dataRole[n]==GTPLUS_IMAGE_T2SDMAP )
+                                                                {
+                                                                    isParametricT2SDMap = true;
+                                                                }
+
+                                                                if ( dataRole[n]==GTPLUS_IMAGE_T2STARMAP )
+                                                                {
+                                                                    isParametricT2StarMap = true;
+                                                                }
+
+                                                                if ( dataRole[n]==GTPLUS_IMAGE_T2STARSDMAP )
+                                                                {
+                                                                    isParametricT2StarSDMap = true;
+                                                                }
+
+                                                                if ( dataRole[n]==GTPLUS_IMAGE_T2STARAMAP )
+                                                                {
+                                                                    isParametricT2StarAMap = true;
+                                                                }
+
+                                                                if ( dataRole[n]==GTPLUS_IMAGE_T2STARTRUNCMAP )
+                                                                {
+                                                                    isParametricT2StarTruncMap = true;
+                                                                }
+
+                                                                if ( dataRole[n]==GTPLUS_IMAGE_T2STARMASKMAP )
+                                                                {
+                                                                    isParametricT2StarMaskMap = true;
+                                                                }
+                                                            }
+
+                                                            std::vector<std::string> dataRoleAll;
+                                                            Gadgetron::getISMRMRMetaValues(*cm3->getObjectPtr(), GTPLUS_DATA_ROLE, dataRoleAll);
+
+                                                            if ( !debugFolder_fullPath_.empty() )
+                                                            {
+                                                                std::ostringstream ostr;
+                                                                for ( n=0; n<dataRoleAll.size(); n++ )
+                                                                {
+                                                                    ostr << dataRoleAll[n] << "_";
+                                                                }
+                                                                ostr << cm1->getObjectPtr()->image_index;
+
+                                                                GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, *cm2->getObjectPtr(), ostr.str());
+                                                            }
+
+                                                            // double check the image type
+                                                            if ( isRealImage )
+                                                            {
+                                                                cm1->getObjectPtr()->image_type = ISMRMRD::ISMRMRD_IMTYPE_REAL;
+                                                            }
+
+                                                            // image comment
+                                                            if ( isParametricMap )
+                                                            {
+                                                                // reset the image comment for maps
+
+                                                                std::vector<std::string> commentStr(dataRole.size()+1);
+
+                                                                commentStr[0] = "GT";
+                                                                for ( n=0; n<dataRole.size(); n++ )
+                                                                {
+                                                                    commentStr[n+1] = dataRole[n];
+                                                                }
+
+                                                                Gadgetron::setISMRMRMetaValues(*cm3->getObjectPtr(), GTPLUS_IMAGECOMMENT, commentStr);
+
+                                                                // get the scaling ratio
+                                                                float scalingRatio = 1;
+                                                                try
+                                                                {
+                                                                    scalingRatio = (float)(cm3->getObjectPtr()->as_double(GTPLUS_IMAGE_SCALE_RATIO, 0));
+
+                                                                    std::ostringstream ostr;
+                                                                    ostr << "x" << scalingRatio;
+                                                                    std::string scalingStr = ostr.str();
+                                                                    cm3->getObjectPtr()->append(GTPLUS_IMAGECOMMENT, scalingStr.c_str());
+
+                                                                    if ( isParametricT1Map || isParametricT1SDMap || isParametricT2Map || isParametricT2SDMap || isParametricT2StarMap || isParametricT2StarSDMap )
+                                                                    {
+                                                                        std::ostringstream ostr;
+                                                                        ostr << std::setprecision(3) << 1.0f/scalingRatio << "ms";
+                                                                        std::string unitStr = ostr.str();
+
+                                                                        cm3->getObjectPtr()->append(GTPLUS_IMAGECOMMENT, unitStr.c_str());
+                                                                    }
+                                                                }
+                                                                catch(...)
+                                                                {
+                                                                    GADGET_WARN_MSG("Image attrib does not have the scale ratio ...");
+                                                                    scalingRatio = 1;
+                                                                }
+
+                                                                if ( isParametricT1Map || isParametricT2Map || isParametricT2StarMap )
+                                                                {
+                                                                    cm3->getObjectPtr()->set(GTPLUS_IMAGE_WINDOWCENTER, (long)((this->get_double_value("window_center"))*scalingRatio) );
+                                                                    cm3->getObjectPtr()->set(GTPLUS_IMAGE_WINDOWWIDTH, (long)((this->get_double_value("window_width"))*scalingRatio) );
+                                                                }
+
+                                                                if ( isParametricT1SDMap || isParametricT2SDMap || isParametricT2StarSDMap || isParametricT2StarAMap )
+                                                                {
+                                                                    cm3->getObjectPtr()->set(GTPLUS_IMAGE_WINDOWCENTER, (long)((this->get_double_value("sd_window_center"))*scalingRatio) );
+                                                                    cm3->getObjectPtr()->set(GTPLUS_IMAGE_WINDOWWIDTH, (long)((this->get_double_value("sd_window_width"))*scalingRatio) );
+                                                                }
+
+                                                                if ( isParametricT2StarTruncMap )
+                                                                {
+                                                                    cm3->getObjectPtr()->set(GTPLUS_IMAGE_WINDOWCENTER, (long)(4) );
+                                                                    cm3->getObjectPtr()->set(GTPLUS_IMAGE_WINDOWWIDTH, (long)(8) );
+                                                                }
+
+                                                                /* if ( isParametricT2Map )
+                                                                {
+                                                                cm3->getObjectPtr()->attributeInteger_.set(GTPLUS_IMAGE_WINDOWCENTER, 0, (long long)(60*scalingRatio) );
+                                                                cm3->getObjectPtr()->attributeInteger_.set(GTPLUS_IMAGE_WINDOWWIDTH, 0, (long long)(120*scalingRatio) );
+                                                                }
+
+                                                                if ( isParametricT2StarMap )
+                                                                {
+                                                                cm3->getObjectPtr()->attributeInteger_.set(GTPLUS_IMAGE_WINDOWCENTER, 0, (long long)(25*scalingRatio) );
+                                                                cm3->getObjectPtr()->attributeInteger_.set(GTPLUS_IMAGE_WINDOWWIDTH, 0, (long long)(50*scalingRatio) );
+                                                                } */
+                                                            }
+                                                            else
+                                                            {
+                                                                for ( n=0; n<dataRole.size(); n++ )
+                                                                {
+                                                                    cm3->getObjectPtr()->append(GTPLUS_IMAGECOMMENT, dataRole[n].c_str());
+                                                                }
+                                                            }
+
+                                                            // seq description
+                                                            Gadgetron::appendISMRMRMetaValues(*cm3->getObjectPtr(), GTPLUS_SEQUENCEDESCRIPTION, dataRoleAll);
+                                                        }
+
+                                                        GADGET_CONDITION_MSG(verboseMode_, "--> GtPlusImageReconGadget, sending out 2D image [CHA SLC E2 CON PHS REP SET AVE] = [" 
+                                                            << cha << " " 
+                                                            << cm1->getObjectPtr()->slice << " " 
+                                                            << e2 << " " 
+                                                            << cm1->getObjectPtr()->contrast << " " 
+                                                            << cm1->getObjectPtr()->phase << " " 
+                                                            << cm1->getObjectPtr()->repetition << " " 
+                                                            << cm1->getObjectPtr()->set << " " 
+                                                            << cm1->getObjectPtr()->average << "] \t" 
+                                                            << " -- Image number -- " << cm1->getObjectPtr()->image_index);
+
+                                                        // image processing history
+                                                        if ( !processStr.empty() )
+                                                        {
+                                                            size_t n;
+                                                            for ( n=0; n<processStr.size(); n++ )
+                                                            {
+                                                                cm3->getObjectPtr()->append(GTPLUS_IMAGEPROCESSINGHISTORY, processStr[n].c_str());
+                                                            }
+                                                        }
+
+                                                        if ( windowCenter.size()==SLC && windowWidth.size()==SLC )
+                                                        {
+                                                            cm3->getObjectPtr()->set(GTPLUS_IMAGE_WINDOWCENTER, (long)windowCenter[slc]);
+                                                            cm3->getObjectPtr()->set(GTPLUS_IMAGE_WINDOWWIDTH, (long)windowWidth[slc]);
+                                                        }
+                                                    }
+
+                                                    if ( this->next()->putq(cm1) < 0 ) 
+                                                    {
+                                                        cm1->release();
+                                                        return false;
+                                                    }
+                                                }
+                                                catch(...)
+                                                {
+                                                    cm1->release();
+                                                    throw;
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in GtPlusImageReconGadget::sendOutImages(images, seriesNum, processStr, dataRole) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    int GtPlusImageReconGadget::close(unsigned long flags)
+    {
+        GADGET_CONDITION_MSG(true, "GtPlusImageReconGadget - close(flags) : " << flags);
+
+        if ( BaseClass::close(flags) != GADGET_OK ) return GADGET_FAIL;
+
+        if ( flags != 0 )
+        {
+            std::string procTime;
+            gtPlus_util_.getCurrentMoment(procTime);
+
+            GADGET_MSG("* ============================================================================== *");
+            GADGET_MSG("---> Image recon phase, Current processing time : " << procTime << " <---");
+            GADGET_MSG("* ============================================================================== *");
+        }
+
+        return GADGET_OK;
+    }
+
+    bool GtPlusImageReconGadget::exportImageContainer2D(ImageContainer2DType& input, const std::string& prefix)
+    {
+        if ( !this->debugFolder_.empty() )
+        {
+            size_t R = input.rows();
+
+            size_t r;
+
+            hoNDArray<ValueType> outArray;
+
+            for ( r=0; r<R; r++ )
+            {
+                input.to_NDArray(r, outArray);
+
+                std::ostringstream ostr;
+                ostr << prefix << "_" << r;
+
+                GADGET_EXPORT_ARRAY_COMPLEX(this->debugFolder_fullPath_, gt_exporter_, outArray, ostr.str());
+            }
+        }
+
+        return true;
+    }
+
+    bool GtPlusImageReconGadget::exportImageContainer2D(ImageContainer2DMagType& input, const std::string& prefix)
+    {
+        if ( !this->debugFolder_.empty() )
+        {
+            size_t R = input.rows();
+
+            size_t r;
+
+            hoNDArray<T> outArray;
+
+            for ( r=0; r<R; r++ )
+            {
+                input.to_NDArray(r, outArray);
+
+                std::ostringstream ostr;
+                ostr << prefix << "_" << r;
+
+                GADGET_EXPORT_ARRAY(this->debugFolder_fullPath_, gt_exporter_, outArray, ostr.str());
+            }
+        }
+
+        return true;
+    }
+
+    bool GtPlusImageReconGadget::exportImageContainer3D(ImageContainer3DType& input, const std::string& prefix)
+    {
+        if ( !this->debugFolder_.empty() )
+        {
+            size_t R = input.rows();
+
+            size_t r, c;
+            for ( r=0; r<R; r++ )
+            {
+                for ( c=0; c<input.cols(r); c++ )
+                {
+                    std::ostringstream ostr;
+                    ostr << prefix << "_" << r << "_" << c;
+
+                    GADGET_EXPORT_IMAGE_COMPLEX(this->debugFolder_fullPath_, gt_exporter_, input(r, c), ostr.str());
+                }
+            }
+        }
+
+        return true;
+    }
+
+    bool GtPlusImageReconGadget::exportImageContainer3D(ImageContainer3DMagType& input, const std::string& prefix)
+    {
+        if ( !this->debugFolder_.empty() )
+        {
+            size_t R = input.rows();
+
+            size_t r, c;
+            for ( r=0; r<R; r++ )
+            {
+                for ( c=0; c<input.cols(r); c++ )
+                {
+                    std::ostringstream ostr;
+                    ostr << prefix << "_" << r << "_" << c;
+
+                    GADGET_EXPORT_IMAGE(this->debugFolder_fullPath_, gt_exporter_, input(r, c), ostr.str());
+                }
+            }
+        }
+
+        return true;
+    }
+
+}
diff --git a/gadgets/gtPlus/GtPlusImageReconGadget.h b/gadgets/gtPlus/GtPlusImageReconGadget.h
new file mode 100644
index 0000000..8a7ea21
--- /dev/null
+++ b/gadgets/gtPlus/GtPlusImageReconGadget.h
@@ -0,0 +1,135 @@
+/** \file   GtPlusImageReconGadget.h
+    \brief  The GtPlus image reconstruction gadget, used after GtPlus kspace reconstruction
+    \author Hui Xue
+*/
+
+#pragma once
+
+#include <complex>
+#include "GtPlusGadgetExport.h"
+#include "Gadget.h"
+#include "hoNDArray.h"
+#include "hoNDObjectArray.h"
+#include "ismrmrd/ismrmrd.h"
+#include "GadgetIsmrmrdReadWrite.h"
+
+#include "hoNDImageContainer2D.h"
+#include "hoNDArray_utils.h"
+#include "hoNDImage.h"
+
+#include "GadgetronCommon.h"
+#include "GtPlusGadgetImageArray.h"
+
+#include "gtPlusIOAnalyze.h"
+#include "gtPlusISMRMRDReconUtil.h"
+
+#include "GadgetStreamController.h"
+#include "GtPlusReconGadgetUtil.h"
+
+#ifdef USE_OMP
+    #include "omp.h"
+#endif // USE_OMP
+
+namespace Gadgetron
+{
+
+// the dimensionsal order of buffered images
+// [Cha Slice E2 Con Phase Rep Set Ave]
+//   0    1    2   3   4    5   6   7
+
+class EXPORTGTPLUSGADGET GtPlusImageReconGadget : public Gadget1< hoNDObjectArray< hoNDImage<std::complex<float>, 2> > >
+{
+public:
+    GADGET_DECLARE(GtPlusImageReconGadget);
+
+    typedef float T;
+    typedef std::complex<T> ValueType;
+
+    typedef hoNDImage<ValueType, 2> ImageType;
+    typedef hoNDImage<ValueType, 3> Image3DType;
+
+    typedef hoNDImage<T, 2> ImageMagType;
+    typedef hoNDImage<T, 3> Image3DMagType;
+
+    // typedef hoNDArray<ImageType*> ImageBufferType;
+    typedef hoNDObjectArray<ImageType> ImageBufferType;
+    typedef hoNDArray<ValueType> ImgArrayType;
+
+    typedef hoNDImageContainer2D<ImageType> ImageContainer2DType;
+    typedef hoNDImageContainer2D<Image3DType> ImageContainer3DType;
+
+    typedef hoNDImageContainer2D<ImageMagType> ImageContainer2DMagType;
+    typedef hoNDImageContainer2D<Image3DMagType> ImageContainer3DMagType;
+
+    typedef Gadget1< ImageBufferType > BaseClass;
+
+    GtPlusImageReconGadget();
+    ~GtPlusImageReconGadget();
+
+    virtual int close(unsigned long flags);
+
+    /// image series number
+    int image_series_num_;
+
+    // debug folder
+    std::string debugFolder_;
+    std::string debugFolder_fullPath_;
+
+    // whether to perform timing
+    bool performTiming_;
+
+protected:
+
+    // encoding space size
+    ISMRMRD::EncodingCounters meas_max_idx_;
+
+    // read in parameters
+    bool readParameters();
+
+    virtual int process_config(ACE_Message_Block* mb);
+    virtual int process(GadgetContainerMessage<ImageBufferType>* m1);
+
+    virtual int processImageBuffer(ImageBufferType& ori);
+
+    /// fill image buffer with null point
+    bool fillWithNULL(ImageBufferType& buf);
+
+    /// release the image buffer
+    bool releaseImageBuffer(ImageBufferType& buf);
+
+    /// get the 3D images from 2D buffer
+    Image3DType* getImage3DFromImage2D(ImageBufferType& ori, size_t cha, size_t slc, size_t con, size_t phs, size_t rep, size_t set, size_t ave);
+
+    /// get the 2D image in buffer from a 3D image
+    bool getImage2DFromImage3D(Image3DType& image3D, ImageBufferType& image2DBuf);
+
+    /// compute the image number
+    size_t computeSeriesImageNumber (ISMRMRD::ImageHeader& imheader, size_t nCHA, size_t cha, size_t nE2, size_t e2);
+
+    /// send out the images as a Gadget3 message
+    /// windowCenter and windowWidth is for every SLC
+    virtual bool sendOutImages(ImageBufferType& images, int seriesNum, const std::vector<std::string>& processStr, const std::vector<std::string>& dataRole, const std::vector<float>& windowCenter=std::vector<float>(), const std::vector<float>& windowWidth=std::vector<float>());
+
+    /// utility function to export image container
+    bool exportImageContainer2D(ImageContainer2DType& input, const std::string& prefix);
+    bool exportImageContainer2D(ImageContainer2DMagType& input, const std::string& prefix);
+
+    bool exportImageContainer3D(ImageContainer3DType& input, const std::string& prefix);
+    bool exportImageContainer3D(ImageContainer3DMagType& input, const std::string& prefix);
+
+    // util for gtplus
+    Gadgetron::gtPlus::gtPlusISMRMRDReconUtil< std::complex<float> > gtPlus_util_;
+
+    // clock for timing
+    Gadgetron::GadgetronTimer gt_timer1_;
+    Gadgetron::GadgetronTimer gt_timer2_;
+    Gadgetron::GadgetronTimer gt_timer3_;
+
+    // exporter
+    Gadgetron::gtPlus::gtPlusIOAnalyze gt_exporter_;
+
+    // in verbose mode, more info is printed out
+    bool verboseMode_;
+};
+
+}
diff --git a/gadgets/gtPlus/GtPlusRecon2DTCloudPackage.h b/gadgets/gtPlus/GtPlusRecon2DTCloudPackage.h
index 012edf0..e61fcb8 100644
--- a/gadgets/gtPlus/GtPlusRecon2DTCloudPackage.h
+++ b/gadgets/gtPlus/GtPlusRecon2DTCloudPackage.h
@@ -4,20 +4,18 @@
             Ref to: 
 
             Hui Xue, Souheil Inati, Thomas Sangild Sorensen, Peter Kellman, Michael S. Hansen. 
-            Distributed MRI Reconstruction using Gadgetron based Cloud Computing. Submitted to
-            Magenetic Resonance in Medicine on Dec 2013.
+            Distributed MRI Reconstruction using Gadgetron based Cloud Computing. 
+            Magenetic Resonance in Medicine, doi: 10.1002/mrm.25213.
 
     \author Hui Xue
 */
 
 #pragma once
 
-#include "SerializableObject.h"
-
 namespace Gadgetron
 {
 
-struct EXPORTGTPLUSGADGET GtPlusRecon2DTPara
+struct GtPlusRecon2DTPara
 {
     size_t reconSizeRO_;
     size_t reconSizeE1_;
@@ -66,16 +64,27 @@ struct EXPORTGTPLUSGADGET GtPlusRecon2DTPara
 };
 
 template <typename T> 
-struct GtPlusRecon2DTCloudPackage : public SerializableObject
+struct GtPlusRecon2DTCloudPackage
 {
+    typedef typename realType<T>::Type real_value_type;
+
     GtPlusRecon2DTPara para;
 
     hoNDArray<T> kspace;
+    hoNDArray<real_value_type> timeStamp;
+    hoNDArray<real_value_type> physioTimeStamp;
+
     hoNDArray<T> ref;
 
     hoNDArray<T> complexIm;
     hoNDArray<T> res;
 
+    // extra recon images
+    hoNDArray<T> complexImSecond;
+    // optional time stamps for the secod recon images, in the unit of seconds
+    hoNDArray<real_value_type> resTimeStampSecond;
+    hoNDArray<real_value_type> resPhysioTimeStampSecond;
+
     GtPlusRecon2DTCloudPackage();
     GtPlusRecon2DTCloudPackage(const GtPlusRecon2DTCloudPackage& pack);
 
@@ -83,7 +92,7 @@ struct GtPlusRecon2DTCloudPackage : public SerializableObject
 
     GtPlusRecon2DTCloudPackage<T>& operator=(const GtPlusRecon2DTCloudPackage<T>& pack);
 
-    virtual bool serialize(char*& buf, size_t& len) const;
+    virtual bool serialize(char*& buf, size_t& len) const ;
     virtual bool deserialize(char* buf, size_t& len);
 };
 
@@ -91,8 +100,13 @@ template <typename T>
 GtPlusRecon2DTCloudPackage<T>::GtPlusRecon2DTCloudPackage()
 {
     kspace.clear();
+    timeStamp.clear();
+    physioTimeStamp.clear();
     ref.clear();
     complexIm.clear();
+    complexImSecond.clear();
+    resTimeStampSecond.clear();
+    resPhysioTimeStampSecond.clear();
     res.clear();
 }
 
@@ -107,8 +121,13 @@ GtPlusRecon2DTCloudPackage<T>::GtPlusRecon2DTCloudPackage(const GtPlusRecon2DTCl
 {
     para = pack.para;
     kspace = pack.kspace;
+    timeStamp = pack.timeStamp;
+    physioTimeStamp = pack.physioTimeStamp;
     ref = pack.ref;
     complexIm = pack.complexIm;
+    complexImSecond = pack.complexImSecond;
+    resTimeStampSecond = pack.resTimeStampSecond;
+    resPhysioTimeStampSecond = pack.resPhysioTimeStampSecond;
     res = pack.res;
 }
 
@@ -119,31 +138,41 @@ GtPlusRecon2DTCloudPackage<T>& GtPlusRecon2DTCloudPackage<T>::operator=(const Gt
 
     para = pack.para;
     kspace = pack.kspace;
+    timeStamp = pack.timeStamp;
+    physioTimeStamp = pack.physioTimeStamp;
     ref = pack.ref;
     complexIm = pack.complexIm;
+    complexImSecond = pack.complexImSecond;
+    resTimeStampSecond = pack.resTimeStampSecond;
+    resPhysioTimeStampSecond = pack.resPhysioTimeStampSecond;
     res = pack.res;
 
     return *this;
 }
 
 template <typename T> 
-bool GtPlusRecon2DTCloudPackage<T>::serialize(char*& buf, size_t& len) const
+bool GtPlusRecon2DTCloudPackage<T>::serialize(char*& buf, size_t& len) const 
 {
-    char *bufKSpace(NULL), *bufRef(NULL), *bufComplexIm(NULL), *bufRes(NULL);
+    char *bufKSpace(NULL), *bufTimeStamp(NULL), *bufPhysioTimeStamp(NULL), *bufRef(NULL), *bufComplexIm(NULL), *bufRes(NULL), *bufComplexImSecond(NULL), *bufResTimeStampSecond(NULL), *bufResPhysioTimeStampSecond(NULL);
     try
     {
         if ( buf != NULL ) delete[] buf;
 
         // find the total len
-        size_t lenKSpace, lenRef, lenComplexIm, lenRes;
+        size_t lenKSpace, lenTimeStamp, lenPhysioTimeStamp, lenRef, lenComplexIm, lenRes, lenComplexImSecond, lenResTimeStampSecond, lenResPhyisoTimeStampSecond;
 
         GADGET_CHECK_THROW(kspace.serialize(bufKSpace, lenKSpace));
-        GADGET_CHECK_THROW(kspace.serialize(bufRef, lenRef));
+        GADGET_CHECK_THROW(timeStamp.serialize(bufTimeStamp, lenTimeStamp));
+        GADGET_CHECK_THROW(physioTimeStamp.serialize(bufPhysioTimeStamp, lenPhysioTimeStamp));
+        GADGET_CHECK_THROW(ref.serialize(bufRef, lenRef));
         GADGET_CHECK_THROW(complexIm.serialize(bufComplexIm, lenComplexIm));
         GADGET_CHECK_THROW(res.serialize(bufRes, lenRes));
+        GADGET_CHECK_THROW(complexImSecond.serialize(bufComplexImSecond, lenComplexImSecond));
+        GADGET_CHECK_THROW(resTimeStampSecond.serialize(bufResTimeStampSecond, lenResTimeStampSecond));
+        GADGET_CHECK_THROW(resPhysioTimeStampSecond.serialize(bufResPhysioTimeStampSecond, lenResPhyisoTimeStampSecond));
 
         // total length
-        len = sizeof(GtPlusRecon2DTPara) + lenKSpace + lenRef + lenComplexIm + lenRes;
+        len = sizeof(GtPlusRecon2DTPara) + lenTimeStamp + lenPhysioTimeStamp + lenKSpace + lenRef + lenComplexIm + lenRes + lenComplexImSecond + lenResTimeStampSecond + lenResPhyisoTimeStampSecond;
 
         buf = new char[len];
         GADGET_CHECK_RETURN_FALSE( buf != NULL );
@@ -159,6 +188,16 @@ bool GtPlusRecon2DTCloudPackage<T>::serialize(char*& buf, size_t& len) const
         offset += currLen;
         delete [] bufKSpace;
 
+        currLen = lenTimeStamp;
+        memcpy(buf+offset, bufTimeStamp, currLen);
+        offset += currLen;
+        delete [] bufTimeStamp;
+
+        currLen = lenPhysioTimeStamp;
+        memcpy(buf+offset, bufPhysioTimeStamp, currLen);
+        offset += currLen;
+        delete [] bufPhysioTimeStamp;
+
         currLen = lenRef;
         memcpy(buf+offset, bufRef, currLen);
         offset += currLen;
@@ -173,15 +212,35 @@ bool GtPlusRecon2DTCloudPackage<T>::serialize(char*& buf, size_t& len) const
         memcpy(buf+offset, bufRes, currLen);
         offset += currLen;
         delete [] bufRes;
+
+        currLen = lenComplexImSecond;
+        memcpy(buf+offset, bufComplexImSecond, currLen);
+        offset += currLen;
+        delete [] bufComplexImSecond;
+
+        currLen = lenResTimeStampSecond;
+        memcpy(buf+offset, bufResTimeStampSecond, currLen);
+        offset += currLen;
+        delete [] bufResTimeStampSecond;
+
+        currLen = lenResPhyisoTimeStampSecond;
+        memcpy(buf+offset, bufResPhysioTimeStampSecond, currLen);
+        offset += currLen;
+        delete [] bufResPhysioTimeStampSecond;
     }
     catch (...)
     {
         GADGET_ERROR_MSG("Errors happened in GtPlusRecon2DTCloudPackage<T>::serialize(...) ... ");
 
         if ( bufKSpace != NULL ) delete [] bufKSpace;
+        if ( bufTimeStamp != NULL ) delete [] bufTimeStamp;
+        if ( bufPhysioTimeStamp != NULL ) delete [] bufPhysioTimeStamp;
         if ( bufRef != NULL ) delete [] bufRef;
         if ( bufComplexIm != NULL ) delete [] bufComplexIm;
         if ( bufRes != NULL ) delete [] bufRes;
+        if ( bufComplexImSecond != NULL ) delete [] bufComplexImSecond;
+        if ( bufResTimeStampSecond != NULL ) delete [] bufResTimeStampSecond;
+        if ( bufResPhysioTimeStampSecond != NULL ) delete [] bufResPhysioTimeStampSecond;
 
         return false;
     }
@@ -201,6 +260,12 @@ bool GtPlusRecon2DTCloudPackage<T>::deserialize(char* buf, size_t& len)
         GADGET_CHECK_RETURN_FALSE(kspace.deserialize(buf+offset, currLen));
         offset += currLen;
 
+        GADGET_CHECK_RETURN_FALSE(timeStamp.deserialize(buf+offset, currLen));
+        offset += currLen;
+
+        GADGET_CHECK_RETURN_FALSE(physioTimeStamp.deserialize(buf+offset, currLen));
+        offset += currLen;
+
         GADGET_CHECK_RETURN_FALSE(ref.deserialize(buf+offset, currLen));
         offset += currLen;
 
@@ -210,6 +275,15 @@ bool GtPlusRecon2DTCloudPackage<T>::deserialize(char* buf, size_t& len)
         GADGET_CHECK_RETURN_FALSE(res.deserialize(buf+offset, currLen));
         offset += currLen;
 
+        GADGET_CHECK_RETURN_FALSE(complexImSecond.deserialize(buf+offset, currLen));
+        offset += currLen;
+
+        GADGET_CHECK_RETURN_FALSE(resTimeStampSecond.deserialize(buf+offset, currLen));
+        offset += currLen;
+
+        GADGET_CHECK_RETURN_FALSE(resPhysioTimeStampSecond.deserialize(buf+offset, currLen));
+        offset += currLen;
+
         // total length
         len = offset;
     }
diff --git a/gadgets/gtPlus/GtPlusRecon2DTGadget.cpp b/gadgets/gtPlus/GtPlusRecon2DTGadget.cpp
index 0beaac9..8c28fd5 100644
--- a/gadgets/gtPlus/GtPlusRecon2DTGadget.cpp
+++ b/gadgets/gtPlus/GtPlusRecon2DTGadget.cpp
@@ -113,7 +113,7 @@ bool GtPlusRecon2DTGadget::readParameters()
         GADGET_CONDITION_MSG(verboseMode_, "-----------------------------------------------");
 
         // get the parameters from base class
-        BaseClass::readParameters();
+        // BaseClass::readParameters();
 
         para_.recon_kspace_needed_ = recon_kspace_needed_;
         para_.workOrderPara_ = workOrderPara_;
@@ -194,7 +194,7 @@ int GtPlusRecon2DTGadget::process_config(ACE_Message_Block* mb)
         bool parseSuccess = this->parseGTCloudNodeFile(cloud_node_file_, gt_cloud_);
         if ( parseSuccess )
         {
-            CloudSize_ = gt_cloud_.size();
+            CloudSize_ = (unsigned int)gt_cloud_.size();
             if ( CloudSize_ == 0 ) CloudComputing_ = false;
         }
         else
@@ -212,20 +212,25 @@ int GtPlusRecon2DTGadget::process(Gadgetron::GadgetContainerMessage< GtPlusGadge
 
     processed_called_times_++;
 
+
     GtPlusGadgetImageArray* images = m1->getObjectPtr();
 
     WorkOrderType* workOrder = m2->getObjectPtr();
 
     boost::shared_ptr< std::vector<size_t> > dims = workOrder->data_.get_dimensions();
 
-    GADGET_CONDITION_MSG(verboseMode_, "[Ro E1 Cha Slice E2 Con Phase Rep Set Seg] = [" 
-        << (*dims)[0] << " " << (*dims)[1] << " " << (*dims)[2] << " " << (*dims)[3] << " " << (*dims)[4] 
-        << " " << (*dims)[5] << " " << (*dims)[6] << " " << (*dims)[7] << " " << (*dims)[8] << " " << (*dims)[9] << "]");
+    size_t SEG = (*dims)[9];
+
+    GADGET_CONDITION_MSG(verboseMode_, "[Ro E1 Cha Slice E2 Con Phase Rep Set Seg Ave] = [" 
+                                                << (*dims)[0] << " " << (*dims)[1] << " " << (*dims)[2] << " " 
+                                                << (*dims)[3] << " " << (*dims)[4] << " " << (*dims)[5] << " " 
+                                                << (*dims)[6] << " " << (*dims)[7] << " " << (*dims)[8] << " " 
+                                                << (*dims)[9] << " " << (*dims)[10] << "]");
 
     dimensions_ = *dims;
 
     // fill in more parameters
-    para_.reconSizeRO_ = (*dims)[0];
+    para_.reconSizeRO_ = GT_MAX(matrix_size_recon_[0], (*dims)[0]);
     para_.reconSizeE1_ = reconE1_;
     para_.reconSizeE2_ = reconE2_;
     para_.encodingFOV_RO_ = field_of_view_encoding_[0];
@@ -258,10 +263,15 @@ int GtPlusRecon2DTGadget::process(Gadgetron::GadgetContainerMessage< GtPlusGadge
     para_.workOrderPara_.start_E2_ = workOrder->start_E2_;
     para_.workOrderPara_.end_E2_ = workOrder->end_E2_;
 
+    para_.workOrderPara_.retro_gated_images_ = workOrder->retro_gated_images_;
+    para_.workOrderPara_.retro_gated_segment_size_ = workOrder->retro_gated_segment_size_;
+
     para_.workOrderPara_.workFlow_BufferKernel_ = workOrder->workFlow_BufferKernel_;
     para_.workOrderPara_.workFlow_use_BufferedKernel_ = workOrder->workFlow_use_BufferedKernel_;
     para_.workOrderPara_.num_channels_res_ = workOrder->num_channels_res_;
 
+    bool perform_retro_gating = (para_.workOrderPara_.retro_gated_images_>0);
+
     // ---------------------------------------------------------
     // set the work flow
     // ---------------------------------------------------------
@@ -293,15 +303,19 @@ int GtPlusRecon2DTGadget::process(Gadgetron::GadgetContainerMessage< GtPlusGadge
     // ---------------------------------------------------------
     // set the worker
     // ---------------------------------------------------------
+    worker_grappa_.verbose_ = verboseMode_;
     worker_grappa_.performTiming_ = performTiming_;
     if ( !debugFolder_fullPath_.empty() ) worker_grappa_.debugFolder_ = debugFolder_fullPath_;
 
+    worker_noacceleration_.verbose_ = verboseMode_;
     worker_noacceleration_.performTiming_ = performTiming_;
     if ( !debugFolder_fullPath_.empty() ) worker_noacceleration_.debugFolder_ = debugFolder_fullPath_;
 
+    worker_spirit_.verbose_ = verboseMode_;
     worker_spirit_.performTiming_ = performTiming_;
     if ( !debugFolder_fullPath_.empty() ) worker_spirit_.debugFolder_ = debugFolder_fullPath_;
 
+    worker_spirit_L1_ncg_.verbose_ = verboseMode_;
     worker_spirit_L1_ncg_.performTiming_ = performTiming_;
     if ( !debugFolder_fullPath_.empty() ) worker_spirit_L1_ncg_.debugFolder_ = debugFolder_fullPath_;
 
@@ -333,7 +347,8 @@ int GtPlusRecon2DTGadget::process(Gadgetron::GadgetContainerMessage< GtPlusGadge
         //GADGET_CHECK_RETURN(gtPlus_util_complex_.zpadResize2D(workflow_.res_, workflow_.reconSizeRO_, workflow_.reconSizeE1_, resResized), GADGET_FAIL);
         //GADGET_CHECK_RETURN(this->sendOutRecon(images, resResized, image_series_+1, workOrder->dataDimStartingIndexes_, "Other"), GADGET_FAIL);
 
-       GADGET_CHECK_RETURN(this->sendOutRecon(images, workflow_.res_, image_series_+1, workOrder->dataDimStartingIndexes_, "Other"), GADGET_FAIL);
+        GADGET_CHECK_RETURN(this->scalingImages(workflow_.res_), GADGET_FAIL);
+        GADGET_CHECK_RETURN(this->sendOutRecon(images, workflow_.res_, image_series_+1, workOrder->dataDimStartingIndexes_, "Other", GTPLUS_IMAGE_OTHER), GADGET_FAIL);
 
         workflow_.res_.clear();
         workflow_.data_ = NULL;
@@ -343,21 +358,26 @@ int GtPlusRecon2DTGadget::process(Gadgetron::GadgetContainerMessage< GtPlusGadge
         workOrder_recon_other_.reset();
     }
 
+    // ------------------------------------------------------------------
     // perform the recon
+    // ------------------------------------------------------------------
     GADGET_START_TIMING_CONDITION(gt_timer1_, "Recon 2DT workorder ... ", performTiming_);
 
     GADGET_CHECK_RETURN(this->generateKSpaceFilter(*workOrder), GADGET_FAIL);
 
+    /// set the work order
     workOrder->duplicate(workOrder_recon_);
     setWorkOrder2DTParameters(&workOrder_recon_);
 
     workflow_.workOrder_ = &workOrder_recon_;
+
     if ( verboseMode_ )
     {
         workflow_.workOrder_->print(std::cout);
     }
 
-    workflow_.setDataArray(workOrder->data_);
+    /// set the data
+    workflow_.setDataArray(workOrder->data_, workOrder->time_stamp_, workOrder->physio_time_stamp_);
 
     if ( workOrder->ref_.get_number_of_elements() > 0 )
     {
@@ -390,6 +410,16 @@ int GtPlusRecon2DTGadget::process(Gadgetron::GadgetContainerMessage< GtPlusGadge
         workflow_.worker_ = &worker_noacceleration_;
     }
 
+    if ( workflow_.worker_ != &worker_grappa_ )
+    {
+        GADGET_WARN_MSG("The gfactor computation is currently only avaialbe for grappa reconstruction ... ");
+        workflow_.workOrder_->gfactor_needed_ = false;
+
+        GADGET_WARN_MSG("The wrap-around map computation is currently only avaialbe for grappa reconstruction ... ");
+        workflow_.workOrder_->wrap_around_map_needed_ = false;
+    }
+
+    /// perform the recon
     GADGET_CHECK_RETURN(workflow_.preProcessing(), GADGET_FAIL);
     GADGET_CHECK_RETURN(workflow_.recon(), GADGET_FAIL);
     GADGET_CHECK_RETURN(workflow_.postProcessing(), GADGET_FAIL);
@@ -401,18 +431,123 @@ int GtPlusRecon2DTGadget::process(Gadgetron::GadgetContainerMessage< GtPlusGadge
         std::ostringstream ostr;
         ostr << "Recon2DT_" << processed_called_times_;
 
-        hoNDArray<GT_Complex8> res = workflow_.res_;
+        hoNDArray< std::complex<float> > res = workflow_.res_;
         res.squeeze();
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder2_fullPath_, gt_exporter_, res, ostr.str());
+
+        if ( workflow_.workOrder_->gfactor_needed_ )
+        {
+            std::ostringstream ostr;
+            ostr << "Recon2DT_GFactor_" << processed_called_times_;
+
+            hoNDArray< std::complex<float> > gfactor = workflow_.gfactor_;
+            gfactor.squeeze();
+            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder2_fullPath_, gt_exporter_, gfactor, ostr.str());
+        }
+
+        if ( workflow_.workOrder_->wrap_around_map_needed_ )
+        {
+            std::ostringstream ostr;
+            ostr << "Recon2DT_WrapAroundMap_" << processed_called_times_;
+
+            hoNDArray< std::complex<float> > wrap_around_map = workflow_.wrap_around_map_;
+            wrap_around_map.squeeze();
+            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder2_fullPath_, gt_exporter_, wrap_around_map, ostr.str());
+        }
+
+        if ( workflow_.res_second_.get_number_of_elements() > 0 )
+        {
+            hoNDArray< std::complex<float> > res = workflow_.res_second_;
+            res.squeeze();
+
+            std::ostringstream ostr;
+            ostr << "Recon2DT_second_" << processed_called_times_;
+
+            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder2_fullPath_, gt_exporter_, res, ostr.str());
+        }
+    }
+
+    // compute SNR image and stdmap
+    hoNDArray<ValueType> snrImage, stdMap;
+    bool snrImageComputed = false;
+    bool stdMapComputed = false;
+
+    if ( workflow_.workOrder_->gfactor_needed_ || workOrder->acceFactorE1_==1 )
+    {
+        if ( scalingFactor_snr_image_>0 || scalingFactor_std_map_>0)
+        {
+            bool withAcceleration = (workOrder->acceFactorE1_>1);
+
+            if ( !this->computeSNRImage(workflow_.res_, workflow_.gfactor_, 
+                    start_frame_for_std_map_, withAcceleration, snrImage, stdMap) )
+            {
+                snrImage.clear();
+                stdMap.clear();
+            }
+            else
+            {
+                snrImageComputed = true;
+                stdMapComputed = true;
+            }
+
+            if ( workOrder->acceFactorE1_==1 ) snrImageComputed = false;
+        }
     }
 
     // send out the results
-    GADGET_CHECK_RETURN(this->sendOutRecon(images, workflow_.res_, image_series_, workOrder->dataDimStartingIndexes_, "Image"), GADGET_FAIL);
+    GADGET_CHECK_RETURN(this->scalingImages(workflow_.res_), GADGET_FAIL);
+
+    if ( send_out_recon_ )
+    {
+        if ( perform_retro_gating )
+        {
+            GADGET_CHECK_RETURN(this->sendOutRecon(images, workflow_.res_, workflow_.res_time_stamp_, workflow_.res_physio_time_stamp_, image_series_, workOrder->dataDimStartingIndexes_, "ImageRetro", GTPLUS_IMAGE_RETRO), GADGET_FAIL);
+        }
+        else
+        {
+            GADGET_CHECK_RETURN(this->sendOutRecon(images, workflow_.res_, workflow_.res_time_stamp_, workflow_.res_physio_time_stamp_, image_series_, workOrder->dataDimStartingIndexes_, "Image", GTPLUS_IMAGE_REGULAR), GADGET_FAIL);
+        }
+
+        if ( workflow_.workOrder_->gfactor_needed_ )
+        {
+            Gadgetron::scal((float)scalingFactor_gfactor_, workflow_.gfactor_);
+            GADGET_CHECK_RETURN(this->sendOutRecon(images, workflow_.gfactor_, workflow_.res_time_stamp_, workflow_.res_physio_time_stamp_, image_series_+1, workOrder->dataDimStartingIndexes_, "gfactor", GTPLUS_IMAGE_GFACTOR), GADGET_FAIL);
+        }
+
+        if ( workflow_.workOrder_->wrap_around_map_needed_ )
+        {
+            Gadgetron::scal((float)scalingFactor_wrap_around_map_, workflow_.wrap_around_map_);
+            GADGET_CHECK_RETURN(this->sendOutRecon(images, workflow_.wrap_around_map_, workflow_.res_time_stamp_, workflow_.res_physio_time_stamp_, image_series_+2, workOrder->dataDimStartingIndexes_, "wrap_around_map", GTPLUS_IMAGE_WRAPAROUNDMAP), GADGET_FAIL);
+        }
+
+        if ( scalingFactor_snr_image_>0 && snrImage.get_number_of_elements()>0 && snrImageComputed )
+        {
+            Gadgetron::scal((float)scalingFactor_snr_image_, snrImage);
+            GADGET_CHECK_RETURN(this->sendOutRecon(images, snrImage, workflow_.res_time_stamp_, workflow_.res_physio_time_stamp_, image_series_+3, workOrder->dataDimStartingIndexes_, "snr_map", GTPLUS_IMAGE_SNR_MAP), GADGET_FAIL);
+        }
+
+        if ( scalingFactor_std_map_>0 && stdMap.get_number_of_elements()>0 && stdMapComputed )
+        {
+            Gadgetron::scal((float)scalingFactor_std_map_, stdMap);
+            GADGET_CHECK_RETURN(this->sendOutRecon(images, stdMap, workflow_.res_time_stamp_, workflow_.res_physio_time_stamp_, image_series_+4, workOrder->dataDimStartingIndexes_, "std_map", GTPLUS_IMAGE_STD_MAP), GADGET_FAIL);
+        }
+    }
+
+    if ( send_out_recon_second_ )
+    {
+        if ( workflow_.res_second_.get_number_of_elements() > 0 )
+        {
+            Gadgetron::scal((float)scalingFactor_, workflow_.res_second_);
+            GADGET_CHECK_RETURN(this->sendOutRecon(images, workflow_.res_second_, workflow_.res_time_stamp_second_, workflow_.res_physio_time_stamp_second_, image_series_+5, workOrder->dataDimStartingIndexes_, "ImageSecond", GTPLUS_IMAGE_REGULAR), GADGET_FAIL);
+        }
+    }
 
     GADGET_CONDITION_MSG(verboseMode_, "GtPlusRecon2DTGadget::process(...) ends ... ");
 
     // reset the status
     workflow_.data_ = NULL;
+    workflow_.time_stamp_ = NULL;
+    workflow_.physio_time_stamp_ = NULL;
     workflow_.ref_ = NULL;
     workflow_.noise_ = NULL;
     workflow_.workOrder_ = NULL;
diff --git a/gadgets/gtPlus/GtPlusRecon2DTGadgetCloud.cpp b/gadgets/gtPlus/GtPlusRecon2DTGadgetCloud.cpp
index bf1f7eb..e8a915f 100644
--- a/gadgets/gtPlus/GtPlusRecon2DTGadgetCloud.cpp
+++ b/gadgets/gtPlus/GtPlusRecon2DTGadgetCloud.cpp
@@ -34,7 +34,7 @@ int GtPlusRecon2DTGadgetCloud::process_config(ACE_Message_Block* mb)
         bool parseSuccess = this->parseGTCloudNodeFile(cloud_node_file_, gt_cloud_);
         if ( parseSuccess )
         {
-            CloudSize_ = gt_cloud_.size();
+            CloudSize_ = (unsigned int)gt_cloud_.size();
             if ( CloudSize_ == 0 ) CloudComputing_ = false;
         }
 
@@ -99,14 +99,14 @@ int GtPlusRecon2DTGadgetCloud::process(Gadgetron::GadgetContainerMessage< GtPlus
 
         boost::shared_ptr< std::vector<size_t> > dims = workOrder->data_.get_dimensions();
 
-        GADGET_CONDITION_MSG(verboseMode_, "[Ro E1 Cha Slice E2 Con Phase Rep Set Seg] = [" 
+        GADGET_CONDITION_MSG(verboseMode_, "[Ro E1 Cha Slice E2 Con Phase Rep Set Seg Ave] = [" 
             << (*dims)[0] << " " << (*dims)[1] << " " << (*dims)[2] << " " << (*dims)[3] << " " << (*dims)[4] 
-            << " " << (*dims)[5] << " " << (*dims)[6] << " " << (*dims)[7] << " " << (*dims)[8] << " " << (*dims)[9] << "]");
+            << " " << (*dims)[5] << " " << (*dims)[6] << " " << (*dims)[7] << " " << (*dims)[8] << " " << (*dims)[9] << " " << (*dims)[10] << "]");
 
         dimensions_ = *dims;
 
         // fill in more parameters
-        para_.reconSizeRO_ = (*dims)[0];
+        para_.reconSizeRO_ = GT_MAX(matrix_size_recon_[0], (*dims)[0]);
         para_.reconSizeE1_ = reconE1_;
         para_.reconSizeE2_ = reconE2_;
         para_.encodingFOV_RO_ = field_of_view_encoding_[0];
@@ -169,14 +169,27 @@ int GtPlusRecon2DTGadgetCloud::process(Gadgetron::GadgetContainerMessage< GtPlus
         completedJobListCloud[0] = &packages_received_[num_of_jobs_];
 
         // set the data and ref arrays
-        jobListCloud[0]->kspace = workOrder->data_;
+
+        // get the data to be compressed format
+        if ( workOrder->acceFactorE1_>1 && workOrder->CalibMode_==Gadgetron::gtPlus::ISMRMRD_interleaved )
+        {
+            Gadgetron::extractSampledLinesUpTo11DArray(workOrder->data_, jobListCloud[0]->kspace, workOrder->time_stamp_, workOrder->acceFactorE1_, workOrder->acceFactorE2_);
+        }
+        else
+        {
+            jobListCloud[0]->kspace = workOrder->data_;
+        }
+
+        jobListCloud[0]->timeStamp = workOrder->time_stamp_;
+        jobListCloud[0]->physioTimeStamp = workOrder->physio_time_stamp_;
         if ( workOrder->ref_.get_number_of_elements() > 0 )
         {
             jobListCloud[0]->ref = workOrder->ref_;
         }
         else if ( CalibMode_==Gadgetron::gtPlus::ISMRMRD_interleaved )
         {
-            jobListCloud[0]->ref = workOrder->data_;
+            // jobListCloud[0]->ref = workOrder->data_;
+            jobListCloud[0]->ref.clear();
         }
 
         num_of_jobs_++;
@@ -214,9 +227,9 @@ bool GtPlusRecon2DTGadgetCloud::processJob(CloudPackageType& jobSent, CloudPacka
 
         boost::shared_ptr< std::vector<size_t> > dims = job->kspace.get_dimensions();
 
-        GADGET_CONDITION_MSG(verboseMode_, "job array size : [Ro E1 Cha Slice E2 Con Phase Rep Set Seg] = [" 
+        GADGET_CONDITION_MSG(verboseMode_, "job array size : [Ro E1 Cha Slice E2 Con Phase Rep Set Seg Ave] = [" 
             << (*dims)[0] << " " << (*dims)[1] << " " << (*dims)[2] << " " << (*dims)[3] << " " << (*dims)[4] 
-            << " " << (*dims)[5] << " " << (*dims)[6] << " " << (*dims)[7] << " " << (*dims)[8] << " " << (*dims)[9] << "]");
+            << " " << (*dims)[5] << " " << (*dims)[6] << " " << (*dims)[7] << " " << (*dims)[8] << " " << (*dims)[9] << " " << (*dims)[10] << "]");
 
         GtPlusRecon2DTPara& para = job->para;
 
@@ -250,39 +263,40 @@ bool GtPlusRecon2DTGadgetCloud::processJob(CloudPackageType& jobSent, CloudPacka
         workOrder.CloudSize_ = CloudSize_;
         workOrder.gt_cloud_ = gt_cloud_;
 
-        workOrder.data_ = job->kspace;
+        if ( workOrder.acceFactorE1_ <= 1 )
+        {
+            workOrder.data_ = job->kspace;
+        }
+        else
+        {
+            Gadgetron::fillSampledLinesUpTo11DArray(job->kspace, workOrder.data_, job->timeStamp);
+        }
+
+        workOrder.time_stamp_ = job->timeStamp;
+        workOrder.physio_time_stamp_ = job->physioTimeStamp;
         workOrder.ref_ = job->ref;
 
         // ---------------------------------------------------------
         // set the worker
         // ---------------------------------------------------------
+        worker_grappa_.verbose_ = verboseMode_;
         worker_grappa_.performTiming_ = performTiming_;
         if ( !debugFolder_fullPath_.empty() ) worker_grappa_.debugFolder_ = debugFolder_fullPath_;
 
+        worker_noacceleration_.verbose_ = verboseMode_;
         worker_noacceleration_.performTiming_ = performTiming_;
         if ( !debugFolder_fullPath_.empty() ) worker_noacceleration_.debugFolder_ = debugFolder_fullPath_;
 
+        worker_spirit_.verbose_ = verboseMode_;
         worker_spirit_.performTiming_ = performTiming_;
         if ( !debugFolder_fullPath_.empty() ) worker_spirit_.debugFolder_ = debugFolder_fullPath_;
 
+        worker_spirit_L1_ncg_.verbose_ = verboseMode_;
         worker_spirit_L1_ncg_.performTiming_ = performTiming_;
         if ( !debugFolder_fullPath_.empty() ) worker_spirit_L1_ncg_.debugFolder_ = debugFolder_fullPath_;
 
         if ( !debugFolder_fullPath_.empty() ) workflow_.debugFolder_ = debugFolder_fullPath_;
 
-        // set the worker
-        worker_grappa_.performTiming_ = performTiming_;
-        if ( !debugFolder_fullPath_.empty() ) worker_grappa_.debugFolder_ = debugFolder_fullPath_;
-
-        worker_noacceleration_.performTiming_ = performTiming_;
-        if ( !debugFolder_fullPath_.empty() ) worker_noacceleration_.debugFolder_ = debugFolder_fullPath_;
-
-        worker_spirit_.performTiming_ = performTiming_;
-        if ( !debugFolder_fullPath_.empty() ) worker_spirit_.debugFolder_ = debugFolder_fullPath_;
-
-        worker_spirit_L1_ncg_.performTiming_ = performTiming_;
-        if ( !debugFolder_fullPath_.empty() ) worker_spirit_L1_ncg_.debugFolder_ = debugFolder_fullPath_;
-
         if ( verboseMode_ )
         {
             workOrder.print(std::cout);
@@ -291,7 +305,7 @@ bool GtPlusRecon2DTGadgetCloud::processJob(CloudPackageType& jobSent, CloudPacka
         // perform the recon
         GADGET_START_TIMING_CONDITION(gt_timer1_, "Recon 2DT workorder on master node ... ", performTiming_);
 
-        GADGET_CHECK_RETURN(this->generateKSpaceFilter(workOrder), GADGET_FAIL);
+        GADGET_CHECK_RETURN_FALSE(this->generateKSpaceFilter(workOrder));
 
         workOrder.duplicate(workOrder_recon_);
         this->setWorkOrder2DTParameters(&workOrder_recon_);
@@ -302,7 +316,7 @@ bool GtPlusRecon2DTGadgetCloud::processJob(CloudPackageType& jobSent, CloudPacka
             workflow_.workOrder_->print(std::cout);
         }
 
-        workflow_.setDataArray(workOrder.data_);
+        workflow_.setDataArray(workOrder.data_, workOrder.time_stamp_, workOrder.physio_time_stamp_);
 
         if ( workOrder.ref_.get_number_of_elements() > 0 )
         {
@@ -353,18 +367,55 @@ bool GtPlusRecon2DTGadgetCloud::processJob(CloudPackageType& jobSent, CloudPacka
             std::ostringstream ostr;
             ostr << "Recon2DT";
 
-            hoNDArray<GT_Complex8> res = workflow_.res_;
+            hoNDArray< std::complex<float> > res = workflow_.res_;
             res.squeeze();
             GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, res, ostr.str());
+
+            if ( workflow_.res_second_.get_number_of_elements() > 0 )
+            {
+                hoNDArray< std::complex<float> > res = workflow_.res_second_;
+                res.squeeze();
+
+                std::ostringstream ostr;
+                ostr << "Recon2DT_Second_" << processed_called_times_;
+
+                GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, res, ostr.str());
+
+                if ( workflow_.res_time_stamp_second_.get_number_of_elements() > 0 )
+                {
+                    std::ostringstream ostr;
+                    ostr << "Recon2DT_Second_TimeStamp_" << processed_called_times_;
+
+                    hoNDArray<float> res = workflow_.res_time_stamp_second_;
+                    res.squeeze();
+                    GADGET_EXPORT_ARRAY(debugFolder_fullPath_, gt_exporter_, res, ostr.str());
+                }
+
+                if ( workflow_.res_physio_time_stamp_second_.get_number_of_elements() > 0 )
+                {
+                    std::ostringstream ostr;
+                    ostr << "Recon2DT_Second_PhysioTimeStamp_" << processed_called_times_;
+
+                    hoNDArray<float> res = workflow_.res_physio_time_stamp_second_;
+                    res.squeeze();
+                    GADGET_EXPORT_ARRAY(debugFolder_fullPath_, gt_exporter_, res, ostr.str());
+                }
+            }
         }
 
         if ( succeed )
         {
             jobReceived.complexIm = workflow_.res_;
+            jobReceived.complexImSecond = workflow_.res_second_;
+            jobReceived.resTimeStampSecond = workflow_.res_time_stamp_second_;
+            jobReceived.resPhysioTimeStampSecond = workflow_.res_physio_time_stamp_second_;
         }
         else
         {
             jobReceived.complexIm.clear();
+            jobReceived.complexImSecond.clear();
+            jobReceived.resTimeStampSecond.clear();
+            jobReceived.resPhysioTimeStampSecond.clear();
             jobReceived.res.clear();
         }
 
@@ -372,6 +423,8 @@ bool GtPlusRecon2DTGadgetCloud::processJob(CloudPackageType& jobSent, CloudPacka
 
         // reset the status
         workflow_.data_ = NULL;
+        workflow_.time_stamp_ = NULL;
+        workflow_.physio_time_stamp_ = NULL;
         workflow_.ref_ = NULL;
         workflow_.noise_ = NULL;
         workflow_.workOrder_ = NULL;
@@ -408,12 +461,38 @@ int GtPlusRecon2DTGadgetCloud::close(unsigned long flags)
 
             // if some jobs are not completed successfully, reprocess them; otherwise, send out images
             std::vector<DimensionRecordType> dataDimStartingIndexes;
-            unsigned int N = image_headers_.size();
+            unsigned int N = (unsigned int)image_headers_.size();
             unsigned int ii;
             for ( ii=0; ii<N; ii++ )
             {
                 bool jobIsOk = true;
-                if ( (packages_received_[ii].complexIm.get_number_of_elements() == 0) && (packages_received_[ii].res.get_number_of_elements() == 0) )
+
+                bool recomputeJob = (packages_received_[ii].complexIm.get_number_of_elements() == 0);
+
+                // special check if the second set of recon results is needed
+                if ( recon_res_second_required_ )
+                {
+                    GADGET_MSG("Check received recon results (second set) ... ");
+
+                    if (packages_received_[ii].complexImSecond.get_number_of_elements() == 0)
+                    {
+                        recomputeJob = true;
+                    }
+                    else
+                    {
+                        // check the images are not empty
+                        real_value_type v(0);
+                        Gadgetron::norm2(packages_received_[ii].complexImSecond, v);
+
+                        if ( GT_ABS(v) < FLT_EPSILON )
+                        {
+                            recomputeJob = true;
+                            GADGET_WARN_MSG("Received recon results (second set) contain no content ... ");
+                        }
+                    }
+                }
+
+                if ( recomputeJob )
                 {
                     // if the cloud goes wrong, do not try again
                     CloudComputing_ = false;
@@ -424,7 +503,39 @@ int GtPlusRecon2DTGadgetCloud::close(unsigned long flags)
                 {
                     if ( !packages_passed_to_next_gadget_[ii].second )
                     {
-                        GADGET_CHECK_RETURN(this->sendOutRecon(&image_headers_[ii], packages_received_[ii].complexIm, image_series_, dataDimStartingIndexes, "Image"), GADGET_FAIL);
+                        GADGET_CHECK_RETURN(this->scalingImages(packages_received_[ii].complexIm), GADGET_FAIL);
+
+                        if ( this->send_out_recon_ )
+                        {
+                            GADGET_CHECK_RETURN(this->sendOutRecon(&image_headers_[ii], packages_received_[ii].complexIm, image_series_, dataDimStartingIndexes, "Image", GTPLUS_IMAGE_REGULAR), GADGET_FAIL);
+                        }
+
+                        if ( this->send_out_recon_second_ )
+                        {
+                            if ( packages_received_[ii].complexImSecond.get_number_of_elements() > 0 )
+                            {
+                                Gadgetron::scal((float)scalingFactor_, packages_received_[ii].complexImSecond);
+
+                                if ( this->para_.workOrderPara_.retro_gated_images_>0 )
+                                {
+                                    GADGET_CHECK_RETURN(this->sendOutRecon(&image_headers_[ii], 
+                                                                            packages_received_[ii].complexImSecond, 
+                                                                            packages_received_[ii].resTimeStampSecond, 
+                                                                            packages_received_[ii].resPhysioTimeStampSecond, 
+                                                                            image_series_+1, dataDimStartingIndexes, 
+                                                                            "ImageRetro", GTPLUS_IMAGE_RETRO), GADGET_FAIL);
+                                }
+                                else
+                                {
+                                    GADGET_CHECK_RETURN(this->sendOutRecon(&image_headers_[ii], 
+                                                                            packages_received_[ii].complexImSecond, 
+                                                                            packages_received_[ii].resTimeStampSecond, 
+                                                                            packages_received_[ii].resPhysioTimeStampSecond, 
+                                                                            image_series_+1, dataDimStartingIndexes, 
+                                                                            "Image", GTPLUS_IMAGE_REGULAR), GADGET_FAIL);
+                                }
+                            }
+                        }
                     }
                 }
 
@@ -433,9 +544,40 @@ int GtPlusRecon2DTGadgetCloud::close(unsigned long flags)
                     std::ostringstream ostr;
                     ostr << "GadgetCloud_Recon2DT_" << ii;
 
-                    hoNDArray<GT_Complex8> res = packages_received_[ii].complexIm;
+                    hoNDArray< std::complex<float> > res = packages_received_[ii].complexIm;
                     res.squeeze();
                     GADGET_EXPORT_ARRAY_COMPLEX(debugFolder2_fullPath_, gt_exporter_, res, ostr.str());
+
+                    if (packages_received_[ii].complexImSecond.get_number_of_elements() > 0 )
+                    {
+                        hoNDArray< std::complex<float> > res = packages_received_[ii].complexImSecond;
+                        res.squeeze();
+
+                        std::ostringstream ostr;
+                        ostr << "GadgetCloud_Recon2DT_Second_" << ii;
+
+                        GADGET_EXPORT_ARRAY_COMPLEX(debugFolder2_fullPath_, gt_exporter_, res, ostr.str());
+
+                        if ( packages_received_[ii].resTimeStampSecond.get_number_of_elements() > 0 )
+                        {
+                            std::ostringstream ostr;
+                            ostr << "GadgetCloud_Recon2DT_Second_TimeStamp_" << ii;
+
+                            hoNDArray<float> res = packages_received_[ii].resTimeStampSecond;
+                            res.squeeze();
+                            GADGET_EXPORT_ARRAY(debugFolder2_fullPath_, gt_exporter_, res, ostr.str());
+                        }
+
+                        if ( packages_received_[ii].resPhysioTimeStampSecond.get_number_of_elements() > 0 )
+                        {
+                            std::ostringstream ostr;
+                            ostr << "GadgetCloud_Recon2DT_Second_PhysioTimeStamp_" << ii;
+
+                            hoNDArray<float> res = packages_received_[ii].resPhysioTimeStampSecond;
+                            res.squeeze();
+                            GADGET_EXPORT_ARRAY(debugFolder2_fullPath_, gt_exporter_, res, ostr.str());
+                        }
+                    }
                 }
             }
         }
@@ -479,17 +621,95 @@ bool GtPlusRecon2DTGadgetCloudSender::processJob(int jobID, GtPlusRecon2DTCloudP
             if ( !gadget_->packages_passed_to_next_gadget_[jobID].second )
             {
                 gadget_->packages_passed_to_next_gadget_[jobID].second = true;
-                GADGET_CHECK_RETURN(gadget_->sendOutRecon(&gadget_->image_headers_[jobID], 
-                    gadget_->packages_received_[jobID].complexIm, gadget_->image_series_, dataDimStartingIndexes, "Image"), false);
+
+                GADGET_CHECK_RETURN(gadget_->scalingImages(gadget_->packages_received_[jobID].complexIm), false);
+
+                if ( gadget_->send_out_recon_ )
+                {
+                    GADGET_CHECK_RETURN(gadget_->sendOutRecon(&gadget_->image_headers_[jobID], 
+                        gadget_->packages_received_[jobID].complexIm, gadget_->image_series_, dataDimStartingIndexes, "Image", GTPLUS_IMAGE_REGULAR), false);
+                }
+
+                if ( gadget_->send_out_recon_second_ )
+                {
+                    if ( gadget_->packages_received_[jobID].complexImSecond.get_number_of_elements() > 0 )
+                    {
+                        GADGET_MSG("Check received recon results (second set) in cloud sender ... ");
+
+                        // check the images are not empty
+                        float v(0);
+                        Gadgetron::norm2(gadget_->packages_received_[jobID].complexImSecond, v);
+
+                        bool reconResSecondValid = true;
+                        if ( GT_ABS(v) < FLT_EPSILON )
+                        {
+                            reconResSecondValid = false;
+                            GADGET_WARN_MSG("Received recon results (second set) contain no content ... ");
+                        }
+
+                        if ( reconResSecondValid )
+                        {
+                            Gadgetron::scal((float)gadget_->scalingFactor_, gadget_->packages_received_[jobID].complexImSecond);
+                            if ( gadget_->para_.workOrderPara_.retro_gated_images_ > 0 )
+                            {
+                                GADGET_CHECK_RETURN(gadget_->sendOutRecon(&gadget_->image_headers_[jobID], 
+                                                                        gadget_->packages_received_[jobID].complexImSecond, 
+                                                                        gadget_->packages_received_[jobID].resTimeStampSecond,
+                                                                        gadget_->packages_received_[jobID].resPhysioTimeStampSecond,
+                                                                        gadget_->image_series_+1, dataDimStartingIndexes, 
+                                                                        "ImageRetro", GTPLUS_IMAGE_RETRO), false);
+                            }
+                            else
+                            {
+                                GADGET_CHECK_RETURN(gadget_->sendOutRecon(&gadget_->image_headers_[jobID], 
+                                                                        gadget_->packages_received_[jobID].complexImSecond, 
+                                                                        gadget_->packages_received_[jobID].resTimeStampSecond,
+                                                                        gadget_->packages_received_[jobID].resPhysioTimeStampSecond,
+                                                                        gadget_->image_series_+1, dataDimStartingIndexes, 
+                                                                        "Image", GTPLUS_IMAGE_REGULAR), false);
+                            }
+                        }
+                    }
+                }
 
                 if ( !gadget_->debugFolder2_fullPath_.empty() )
                 {
                     std::ostringstream ostr;
                     ostr << "Recon2DT_" << jobID;
 
-                    hoNDArray<GT_Complex8> res = gadget_->packages_received_[jobID].complexIm;
+                    hoNDArray< std::complex<float> > res = gadget_->packages_received_[jobID].complexIm;
                     res.squeeze();
                     GADGET_EXPORT_ARRAY_COMPLEX(gadget_->debugFolder2_fullPath_, gadget_->gt_exporter_, res, ostr.str());
+
+                    if ( gadget_->packages_received_[jobID].complexImSecond.get_number_of_elements() > 0 )
+                    {
+                        std::ostringstream ostr;
+                        ostr << "Recon2DT_Second_" << jobID;
+
+                        hoNDArray< std::complex<float> > res = gadget_->packages_received_[jobID].complexImSecond;
+                        res.squeeze();
+                        GADGET_EXPORT_ARRAY_COMPLEX(gadget_->debugFolder2_fullPath_, gadget_->gt_exporter_, res, ostr.str());
+
+                        if ( gadget_->packages_received_[jobID].resTimeStampSecond.get_number_of_elements() > 0 )
+                        {
+                            std::ostringstream ostr;
+                            ostr << "Recon2DT_Second_TimeStamp_" << jobID;
+
+                            hoNDArray<float> res = gadget_->packages_received_[jobID].resTimeStampSecond;
+                            res.squeeze();
+                            GADGET_EXPORT_ARRAY(gadget_->debugFolder2_fullPath_, gadget_->gt_exporter_, res, ostr.str());
+                        }
+
+                        if ( gadget_->packages_received_[jobID].resPhysioTimeStampSecond.get_number_of_elements() > 0 )
+                        {
+                            std::ostringstream ostr;
+                            ostr << "Recon2DT_Second_PhysioTimeStamp_" << jobID;
+
+                            hoNDArray<float> res = gadget_->packages_received_[jobID].resPhysioTimeStampSecond;
+                            res.squeeze();
+                            GADGET_EXPORT_ARRAY(gadget_->debugFolder2_fullPath_, gadget_->gt_exporter_, res, ostr.str());
+                        }
+                    }
                 }
             }
         }
diff --git a/gadgets/gtPlus/GtPlusRecon2DTGadgetCloud.h b/gadgets/gtPlus/GtPlusRecon2DTGadgetCloud.h
index cbeb0f7..30ff6a7 100644
--- a/gadgets/gtPlus/GtPlusRecon2DTGadgetCloud.h
+++ b/gadgets/gtPlus/GtPlusRecon2DTGadgetCloud.h
@@ -7,8 +7,8 @@
             Ref to: 
 
             Hui Xue, Souheil Inati, Thomas Sangild Sorensen, Peter Kellman, Michael S. Hansen. 
-            Distributed MRI Reconstruction using Gadgetron based Cloud Computing. Submitted to
-            Magenetic Resonance in Medicine on Dec 2013.
+            Distributed MRI Reconstruction using Gadgetron based Cloud Computing. 
+            Magenetic Resonance in Medicine, doi: 10.1002/mrm.25213.
 
     \author Hui Xue
 */
diff --git a/gadgets/gtPlus/GtPlusRecon3DTGadget.cpp b/gadgets/gtPlus/GtPlusRecon3DTGadget.cpp
index f0a0014..0687b72 100644
--- a/gadgets/gtPlus/GtPlusRecon3DTGadget.cpp
+++ b/gadgets/gtPlus/GtPlusRecon3DTGadget.cpp
@@ -94,7 +94,7 @@ bool GtPlusRecon3DTGadget::readParameters()
         GADGET_CONDITION_MSG(verboseMode_, "-----------------------------------------------");
 
         // get the parameters from base class
-        BaseClass::readParameters();
+        // BaseClass::readParameters();
 
         para_.recon_kspace_needed_ = recon_kspace_needed_;
         para_.workOrderPara_ = workOrderPara_;
@@ -158,27 +158,27 @@ int GtPlusRecon3DTGadget::process_config(ACE_Message_Block* mb)
     {
         if ( num_acq_channels_ > 2*para_.workOrderPara_.coil_compression_num_modesKept_ )
         {
-            numOfBytes = (double)matrix_size_encoding_[0]*kSpaceMaxAcqE1No_*kSpaceMaxAcqE2No_*num_acq_channels_*para_.workOrderPara_.coil_compression_num_modesKept_*sizeof(ValueType);
+            numOfBytes = (size_t)( (double)matrix_size_encoding_[0]*kSpaceMaxAcqE1No_*kSpaceMaxAcqE2No_*num_acq_channels_*para_.workOrderPara_.coil_compression_num_modesKept_*sizeof(ValueType));
         }
         else
         {
-            numOfBytes = (double)matrix_size_encoding_[0]*kSpaceMaxAcqE1No_*kSpaceMaxAcqE2No_*num_acq_channels_*para_.workOrderPara_.coil_compression_num_modesKept_*sizeof(ValueType);
+            numOfBytes = (size_t)( (double)matrix_size_encoding_[0]*kSpaceMaxAcqE1No_*kSpaceMaxAcqE2No_*num_acq_channels_*para_.workOrderPara_.coil_compression_num_modesKept_*sizeof(ValueType) );
         }
 
         if ( para_.workOrderPara_.recon_algorithm_ == Gadgetron::gtPlus::ISMRMRD_GRAPPA && para_.workOrderPara_.job_num_of_N_>0 )
         {
-            numOfBytes = (double)para_.workOrderPara_.job_num_of_N_*kSpaceMaxAcqE1No_*kSpaceMaxAcqE2No_*num_acq_channels_*para_.workOrderPara_.coil_compression_num_modesKept_*sizeof(ValueType)*1.5;
+            numOfBytes = (size_t)( (double)para_.workOrderPara_.job_num_of_N_*kSpaceMaxAcqE1No_*kSpaceMaxAcqE2No_*num_acq_channels_*para_.workOrderPara_.coil_compression_num_modesKept_*sizeof(ValueType)*1.5 );
         }
     }
     else
     {
         if ( para_.workOrderPara_.recon_algorithm_ == Gadgetron::gtPlus::ISMRMRD_SPIRIT || para_.workOrderPara_.recon_algorithm_ == Gadgetron::gtPlus::ISMRMRD_L1SPIRIT )
         {
-            numOfBytes = (double)matrix_size_encoding_[0]*kSpaceMaxAcqE1No_*kSpaceMaxAcqE2No_*num_acq_channels_*num_acq_channels_*sizeof(ValueType)*0.8;
+            numOfBytes = (size_t)((double)matrix_size_encoding_[0]*kSpaceMaxAcqE1No_*kSpaceMaxAcqE2No_*num_acq_channels_*num_acq_channels_*sizeof(ValueType)*0.8);
         }
         else
         {
-            numOfBytes = (double)matrix_size_encoding_[0]*kSpaceMaxAcqE1No_*kSpaceMaxAcqE2No_*num_acq_channels_*num_acq_channels_*sizeof(ValueType)*0.6;
+            numOfBytes = (size_t)((double)matrix_size_encoding_[0]*kSpaceMaxAcqE1No_*kSpaceMaxAcqE2No_*num_acq_channels_*num_acq_channels_*sizeof(ValueType)*0.6);
         }
     }
 
@@ -189,7 +189,7 @@ int GtPlusRecon3DTGadget::process_config(ACE_Message_Block* mb)
 
     if ( numOfBytes > 1024*1024*1024*128.0 )
     {
-        numOfBytes = 1024*1024*1024*4.0;
+        numOfBytes = (size_t)(1024*1024*1024*4.0);
     }
 
     GADGET_CONDITION_MSG(verboseMode_, "GtPlusRecon3DTGadget::Pre allocate : " << numOfBytes/1024.0/1024.0 << " Megabytes ... ");
@@ -208,7 +208,7 @@ int GtPlusRecon3DTGadget::process_config(ACE_Message_Block* mb)
         bool parseSuccess = this->parseGTCloudNodeFile(cloud_node_file_, gt_cloud_);
         if ( parseSuccess )
         {
-            CloudSize_ = gt_cloud_.size();
+            CloudSize_ = (unsigned int)gt_cloud_.size();
             if ( CloudSize_ == 0 ) CloudComputing_ = false;
         }
         else
@@ -232,9 +232,9 @@ int GtPlusRecon3DTGadget::process(Gadgetron::GadgetContainerMessage< GtPlusGadge
 
     boost::shared_ptr< std::vector<size_t> > dims = workOrder->data_.get_dimensions();
 
-    GADGET_CONDITION_MSG(verboseMode_, "[Ro E1 Cha Slice E2 Con Phase Rep Set Seg] = [" 
+    GADGET_CONDITION_MSG(verboseMode_, "[Ro E1 Cha Slice E2 Con Phase Rep Set Seg Ave] = [" 
         << (*dims)[0] << " " << (*dims)[1] << " " << (*dims)[2] << " " << (*dims)[3] << " " << (*dims)[4] 
-        << " " << (*dims)[5] << " " << (*dims)[6] << " " << (*dims)[7] << " " << (*dims)[8] << " " << (*dims)[9] << "]");
+        << " " << (*dims)[5] << " " << (*dims)[6] << " " << (*dims)[7] << " " << (*dims)[8] << " " << (*dims)[9] << " " << (*dims)[10] << "]");
 
     dimensions_ = *dims;
 
@@ -306,15 +306,19 @@ int GtPlusRecon3DTGadget::process(Gadgetron::GadgetContainerMessage< GtPlusGadge
     // ---------------------------------------------------------
     // set the worker
     // ---------------------------------------------------------
+    worker_grappa_.verbose_ = verboseMode_;
     worker_grappa_.performTiming_ = performTiming_;
     if ( !debugFolder_fullPath_.empty() ) worker_grappa_.debugFolder_ = debugFolder_fullPath_;
 
+    worker_noacceleration_.verbose_ = verboseMode_;
     worker_noacceleration_.performTiming_ = performTiming_;
     if ( !debugFolder_fullPath_.empty() ) worker_noacceleration_.debugFolder_ = debugFolder_fullPath_;
 
+    worker_spirit_.verbose_ = verboseMode_;
     worker_spirit_.performTiming_ = performTiming_;
     if ( !debugFolder_fullPath_.empty() ) worker_spirit_.debugFolder_ = debugFolder_fullPath_;
 
+    worker_spirit_L1_ncg_.verbose_ = verboseMode_;
     worker_spirit_L1_ncg_.performTiming_ = performTiming_;
     if ( !debugFolder_fullPath_.empty() ) worker_spirit_L1_ncg_.debugFolder_ = debugFolder_fullPath_;
 
@@ -342,7 +346,8 @@ int GtPlusRecon3DTGadget::process(Gadgetron::GadgetContainerMessage< GtPlusGadge
         workflow_.setDataArray(workOrder->other_);
         GADGET_CHECK_RETURN(workflow_.recon(), GADGET_FAIL);
 
-       GADGET_CHECK_RETURN(this->sendOutRecon(images, workflow_.res_, image_series_+1, workOrder->dataDimStartingIndexes_, "Other"), GADGET_FAIL);
+        GADGET_CHECK_RETURN(this->scalingImages(workflow_.res_), GADGET_FAIL);
+        GADGET_CHECK_RETURN(this->sendOutRecon(images, workflow_.res_, image_series_+1, workOrder->dataDimStartingIndexes_, "Other", GTPLUS_IMAGE_OTHER), GADGET_FAIL);
 
         workflow_.res_.clear();
         workflow_.data_ = NULL;
@@ -366,7 +371,7 @@ int GtPlusRecon3DTGadget::process(Gadgetron::GadgetContainerMessage< GtPlusGadge
         workflow_.workOrder_->print(std::cout);
     }
 
-    workflow_.setDataArray(workOrder->data_);
+    workflow_.setDataArray(workOrder->data_, workOrder->time_stamp_, workOrder->physio_time_stamp_);
 
     if ( workOrder->ref_.get_number_of_elements() > 0 )
     {
@@ -399,6 +404,12 @@ int GtPlusRecon3DTGadget::process(Gadgetron::GadgetContainerMessage< GtPlusGadge
         workflow_.worker_ = &worker_noacceleration_;
     }
 
+    if ( workflow_.worker_ != &worker_grappa_ )
+    {
+        GADGET_WARN_MSG("The gfactor computation is currently only avaialbe for grappa reconstruction ... ");
+        workflow_.workOrder_->gfactor_needed_ = false;
+    }
+
     GADGET_CHECK_RETURN(workflow_.preProcessing(), GADGET_FAIL);
     GADGET_CHECK_RETURN(workflow_.recon(), GADGET_FAIL);
     GADGET_CHECK_RETURN(workflow_.postProcessing(), GADGET_FAIL);
@@ -410,13 +421,69 @@ int GtPlusRecon3DTGadget::process(Gadgetron::GadgetContainerMessage< GtPlusGadge
         std::ostringstream ostr;
         ostr << "Recon3DT";
 
-        hoNDArray<GT_Complex8> res = workflow_.res_;
+        hoNDArray< std::complex<float> > res = workflow_.res_;
         res.squeeze();
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder2_fullPath_, gt_exporter_, res, ostr.str());
+
+        if ( workflow_.workOrder_->gfactor_needed_ )
+        {
+            std::ostringstream ostr;
+            ostr << "Recon3DT_GFactor";
+
+            hoNDArray< std::complex<float> > gfactor = workflow_.gfactor_;
+            gfactor.squeeze();
+            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder2_fullPath_, gt_exporter_, gfactor, ostr.str());
+        }
+    }
+
+    // compute SNR image and stdmap
+    hoNDArray<ValueType> snrImage, stdMap;
+    bool snrImageComputed = false;
+    bool stdMapComputed = false;
+
+    if ( workflow_.workOrder_->gfactor_needed_ || workOrder->acceFactorE1_*workOrder->acceFactorE2_==1 )
+    {
+        if ( scalingFactor_snr_image_>0 || scalingFactor_std_map_>0)
+        {
+            bool withAcceleration = (workOrder->acceFactorE1_*workOrder->acceFactorE2_>1);
+
+            if ( !this->computeSNRImage(workflow_.res_, workflow_.gfactor_, 
+                    start_frame_for_std_map_, withAcceleration, snrImage, stdMap) )
+            {
+                snrImage.clear();
+                stdMap.clear();
+            }
+            else
+            {
+                snrImageComputed = true;
+                stdMapComputed = true;
+            }
+
+            if ( workOrder->acceFactorE1_*workOrder->acceFactorE2_==1 ) snrImageComputed = false;
+        }
     }
 
     // send out the results
-    GADGET_CHECK_RETURN(this->sendOutRecon(images, workflow_.res_, image_series_, workOrder->dataDimStartingIndexes_, "Image"), GADGET_FAIL);
+    GADGET_CHECK_RETURN(this->scalingImages(workflow_.res_), GADGET_FAIL);
+    GADGET_CHECK_RETURN(this->sendOutRecon(images, workflow_.res_, image_series_, workOrder->dataDimStartingIndexes_, "Image", GTPLUS_IMAGE_REGULAR), GADGET_FAIL);
+
+    if ( workflow_.workOrder_->gfactor_needed_ )
+    {
+        Gadgetron::scal((float)scalingFactor_gfactor_, workflow_.gfactor_);
+        GADGET_CHECK_RETURN(this->sendOutRecon(images, workflow_.gfactor_, image_series_+1, workOrder->dataDimStartingIndexes_, "gfactor", GTPLUS_IMAGE_GFACTOR), GADGET_FAIL);
+    }
+
+    if ( scalingFactor_snr_image_>0 && snrImage.get_number_of_elements()>0 && snrImageComputed )
+    {
+        Gadgetron::scal((float)scalingFactor_snr_image_, snrImage);
+        GADGET_CHECK_RETURN(this->sendOutRecon(images, snrImage, image_series_+2, workOrder->dataDimStartingIndexes_, "snr_map", GTPLUS_IMAGE_SNR_MAP), GADGET_FAIL);
+    }
+
+    if ( scalingFactor_std_map_>0 && stdMap.get_number_of_elements()>0 && stdMapComputed )
+    {
+        Gadgetron::scal((float)scalingFactor_std_map_, stdMap);
+        GADGET_CHECK_RETURN(this->sendOutRecon(images, stdMap, image_series_+3, workOrder->dataDimStartingIndexes_, "std_map", GTPLUS_IMAGE_STD_MAP), GADGET_FAIL);
+    }
 
     GADGET_CONDITION_MSG(verboseMode_, "GtPlusRecon3DTGadget::process(...) ends ... ");
 
diff --git a/gadgets/gtPlus/GtPlusReconGadget.cpp b/gadgets/gtPlus/GtPlusReconGadget.cpp
index e0b1753..3fe6a9e 100644
--- a/gadgets/gtPlus/GtPlusReconGadget.cpp
+++ b/gadgets/gtPlus/GtPlusReconGadget.cpp
@@ -1,1136 +1,1186 @@
 
 #include "GtPlusReconGadget.h"
 #include "GtPlusGadgetOpenMP.h"
+#include "gadgetron_paths.h"
+#include <iomanip>
+#include "CloudBus.h"
 
 using namespace Gadgetron::gtPlus;
 
 namespace Gadgetron
 {
 
-GtPlusReconGadget::GtPlusReconGadget() : mem_manager_(new Gadgetron::gtPlus::gtPlusMemoryManager(4, 640*1024*1024))
-{
-    image_series_ = 100;
+    GtPlusReconGadget::GtPlusReconGadget() : mem_manager_(new Gadgetron::gtPlus::gtPlusMemoryManager(4, 640*1024*1024))
+    {
+        image_series_ = 100;
 
-    min_intensity_value_ = 64;
-    max_intensity_value_ = 4095;
+        min_intensity_value_ = 64;
+        max_intensity_value_ = 4095;
 
-    max_intensity_value_US_ = 2048;
+        max_intensity_value_US_ = 2048;
 
-    scalingFactor_ = -1;
-    use_constant_scalingFactor_ = false;
+        scalingFactor_ = -1;
+        scalingFactor_gfactor_ = 100;
+        scalingFactor_wrap_around_map_ = 1000;
+        scalingFactor_snr_image_ = 10;
+        scalingFactor_std_map_ = 1000;
 
-    timeStampResolution_ = 0.0025f;
+        start_frame_for_std_map_ = 5;
 
-    aSpacing_[0] = 2.0;
-    aSpacing_[1] = 2.0;
-    aSpacing_[2] = 6.0;
-    aSpacing_[3] = 1.0;
-    aSpacing_[4] = 1.0;
-    aSpacing_[5] = 1.0;
+        use_constant_scalingFactor_ = false;
 
-    reconE1_ = 1;
-    reconE2_ = 1;
+        timeStampResolution_ = 0.0025f;
 
-    processed_called_times_ = 0;
+        aSpacing_[0] = 2.0;
+        aSpacing_[1] = 2.0;
+        aSpacing_[2] = 6.0;
+        aSpacing_[3] = 1.0;
+        aSpacing_[4] = 1.0;
+        aSpacing_[5] = 1.0;
 
-    kSpaceMaxAcqE2No_ = 0;
+        reconE1_ = 1;
+        reconE2_ = 1;
 
-    filterRO_type_ = ISMRMRD_FILTER_GAUSSIAN;
-    filterRO_sigma_ = 1.5;
-    filterRO_width_ = 0.15;
+        processed_called_times_ = 0;
 
-    filterE1_type_ = ISMRMRD_FILTER_GAUSSIAN;
-    filterE1_sigma_ = 1.5;
-    filterE1_width_ = 0.15;
+        thread_number_ratio_ = 0;
 
-    filterE2_type_ = ISMRMRD_FILTER_GAUSSIAN;
-    filterE2_sigma_ = 1.5;
-    filterE2_width_ = 0.15;
+        kSpaceMaxAcqE2No_ = 0;
 
-    filterRO_ref_type_ = ISMRMRD_FILTER_HANNING;
-    filterRO_ref_sigma_ = 1.5;
-    filterRO_ref_width_ = 0.15;
+        filterRO_type_ = ISMRMRD_FILTER_GAUSSIAN;
+        filterRO_sigma_ = 1.5;
+        filterRO_width_ = 0.15;
 
-    filterE1_ref_type_ = ISMRMRD_FILTER_HANNING;
-    filterE1_ref_sigma_ = 1.5;
-    filterE1_ref_width_ = 0.15;
+        filterE1_type_ = ISMRMRD_FILTER_GAUSSIAN;
+        filterE1_sigma_ = 1.5;
+        filterE1_width_ = 0.15;
 
-    filterE2_ref_type_ = ISMRMRD_FILTER_HANNING;
-    filterE2_ref_sigma_ = 1.5;
-    filterE2_ref_width_ = 0.15;
+        filterE2_type_ = ISMRMRD_FILTER_GAUSSIAN;
+        filterE2_sigma_ = 1.5;
+        filterE2_width_ = 0.15;
 
-    filterRO_pf_type_ = ISMRMRD_FILTER_HANNING;
-    filterRO_pf_sigma_ = 1.5;
-    filterRO_pf_width_ = 0.15;
-    filterRO_pf_densityComp_ = false;
+        filterRO_ref_type_ = ISMRMRD_FILTER_HANNING;
+        filterRO_ref_sigma_ = 1.5;
+        filterRO_ref_width_ = 0.15;
 
-    filterE1_pf_type_ = ISMRMRD_FILTER_HANNING;
-    filterE1_pf_sigma_ = 1.5;
-    filterE1_pf_width_ = 0.15;
-    filterE1_pf_densityComp_ = false;
+        filterE1_ref_type_ = ISMRMRD_FILTER_HANNING;
+        filterE1_ref_sigma_ = 1.5;
+        filterE1_ref_width_ = 0.15;
 
-    filterE2_pf_type_ = ISMRMRD_FILTER_HANNING;
-    filterE2_pf_sigma_ = 1.5;
-    filterE2_pf_width_ = 0.15;
-    filterE2_pf_densityComp_ = false;
+        filterE2_ref_type_ = ISMRMRD_FILTER_HANNING;
+        filterE2_ref_sigma_ = 1.5;
+        filterE2_ref_width_ = 0.15;
 
-    debugFolder_ = "DebugOutput";
-    debugFolder2_ = debugFolder_;
+        filterRO_pf_type_ = ISMRMRD_FILTER_HANNING;
+        filterRO_pf_sigma_ = 1.5;
+        filterRO_pf_width_ = 0.15;
+        filterRO_pf_densityComp_ = false;
 
-    performTiming_ = true;
+        filterE1_pf_type_ = ISMRMRD_FILTER_HANNING;
+        filterE1_pf_sigma_ = 1.5;
+        filterE1_pf_width_ = 0.15;
+        filterE1_pf_densityComp_ = false;
 
-    verboseMode_ = false;
+        filterE2_pf_type_ = ISMRMRD_FILTER_HANNING;
+        filterE2_pf_sigma_ = 1.5;
+        filterE2_pf_width_ = 0.15;
+        filterE2_pf_densityComp_ = false;
 
-    CloudComputing_ = false;
-    CloudSize_ = 0;
+        recon_res_second_required_ = false;
 
-    gt_timer1_.set_timing_in_destruction(false);
-    gt_timer2_.set_timing_in_destruction(false);
-    gt_timer3_.set_timing_in_destruction(false);
+        send_out_recon_ = true;
+        send_out_recon_second_ = true;
 
-    Gadgetron::prepOpenMP();
-    Gadgetron::prepMKL();
-}
+        debugFolder_ = "DebugOutput";
+        debugFolder2_ = debugFolder_;
 
-GtPlusReconGadget::~GtPlusReconGadget()
-{
+        performTiming_ = true;
 
-}
+        verboseMode_ = false;
 
-bool GtPlusReconGadget::readParameters()
-{
-    try
+        CloudComputing_ = false;
+        CloudSize_ = 0;
+
+        gt_timer1_.set_timing_in_destruction(false);
+        gt_timer2_.set_timing_in_destruction(false);
+        gt_timer3_.set_timing_in_destruction(false);
+
+        Gadgetron::prepOpenMP();
+    }
+
+    GtPlusReconGadget::~GtPlusReconGadget()
     {
-        GADGET_CONDITION_MSG(verboseMode_, "------> GtPlusReconGadget parameters <------");
-
-        min_intensity_value_ = this->get_int_value("min_intensity_value");
-        GADGET_CONDITION_MSG(verboseMode_, "min_intensity_value_ is " << min_intensity_value_);
-
-        max_intensity_value_ = this->get_int_value("max_intensity_value");
-        GADGET_CONDITION_MSG(verboseMode_, "max_intensity_value_ is " << max_intensity_value_);
-
-        scalingFactor_ = this->get_double_value("scalingFactor");
-        GADGET_CONDITION_MSG(verboseMode_, "scalingFactor_ is " << scalingFactor_);
-
-        use_constant_scalingFactor_ = this->get_bool_value("use_constant_scalingFactor");
-        GADGET_CONDITION_MSG(verboseMode_, "use_constant_scalingFactor_ is " << use_constant_scalingFactor_);
-
-        boost::shared_ptr<std::string> str = this->get_string_value("debugFolder");
-        debugFolder_ = *str;
-        GADGET_CONDITION_MSG(verboseMode_, "debugFolder_ is " << debugFolder_);
-
-        boost::shared_ptr<std::string> str2 = this->get_string_value("debugFolder2");
-        debugFolder2_ = *str2;
-        GADGET_CONDITION_MSG(verboseMode_, "debugFolder2_ is " << debugFolder2_);
-
-        timeStampResolution_ = (float)this->get_double_value("timeStampResolution");
-        GADGET_CONDITION_MSG(verboseMode_, "timeStampResolution_ is " << timeStampResolution_);
-
-        performTiming_ = this->get_bool_value("performTiming");
-        GADGET_CONDITION_MSG(verboseMode_, "performTiming_ is " << performTiming_);
-
-        // kspace filter parameters
-        str = this->get_string_value("filterRO");
-        filterRO_type_ = gtPlus_util_.getISMRMRDKSpaceFilterFromName(*str);
-        filterRO_sigma_ = this->get_double_value("filterRO_sigma");
-        filterRO_width_ = this->get_double_value("filterRO_width");
-        GADGET_CONDITION_MSG(verboseMode_, "filterRO_type_ is " << *str);
-        GADGET_CONDITION_MSG(verboseMode_, "filterRO_sigma_ is " << filterRO_sigma_);
-        GADGET_CONDITION_MSG(verboseMode_, "filterRO_width_ is " << filterRO_width_);
-
-        str = this->get_string_value("filterE1");
-        filterE1_type_ = gtPlus_util_.getISMRMRDKSpaceFilterFromName(*str);
-        filterE1_sigma_ = this->get_double_value("filterE1_sigma");
-        filterE1_width_ = this->get_double_value("filterE1_width");
-        GADGET_CONDITION_MSG(verboseMode_, "filterE1_type_ is " << *str);
-        GADGET_CONDITION_MSG(verboseMode_, "filterE1_sigma_ is " << filterE1_sigma_);
-        GADGET_CONDITION_MSG(verboseMode_, "filterE1_width_ is " << filterE1_width_);
-
-        str = this->get_string_value("filterE2");
-        filterE2_type_ = gtPlus_util_.getISMRMRDKSpaceFilterFromName(*str);
-        filterE2_sigma_ = this->get_double_value("filterE2_sigma");
-        filterE2_width_ = this->get_double_value("filterE2_width");
-        GADGET_CONDITION_MSG(verboseMode_, "filterE2_type_ is " << *str);
-        GADGET_CONDITION_MSG(verboseMode_, "filterE2_sigma_ is " << filterE2_sigma_);
-        GADGET_CONDITION_MSG(verboseMode_, "filterE2_width_ is " << filterE2_width_);
-
-        str = this->get_string_value("filterRefRO");
-        filterRO_ref_type_ = gtPlus_util_.getISMRMRDKSpaceFilterFromName(*str);
-        filterRO_ref_sigma_ = this->get_double_value("filterRefRO_sigma");
-        filterRO_ref_width_ = this->get_double_value("filterRefRO_width");
-        GADGET_CONDITION_MSG(verboseMode_, "filterRO_ref_type_ is " << *str);
-        GADGET_CONDITION_MSG(verboseMode_, "filterRO_ref_sigma_ is " << filterRO_ref_sigma_);
-        GADGET_CONDITION_MSG(verboseMode_, "filterRO_ref_width_ is " << filterRO_ref_width_);
-
-        str = this->get_string_value("filterRefE1");
-        filterE1_ref_type_ = gtPlus_util_.getISMRMRDKSpaceFilterFromName(*str);
-        filterE1_ref_sigma_ = this->get_double_value("filterRefE1_sigma");
-        filterE1_ref_width_ = this->get_double_value("filterRefE1_width");
-        GADGET_CONDITION_MSG(verboseMode_, "filterE1_ref_type_ is " << *str);
-        GADGET_CONDITION_MSG(verboseMode_, "filterE1_ref_sigma_ is " << filterE1_ref_sigma_);
-        GADGET_CONDITION_MSG(verboseMode_, "filterE1_ref_width_ is " << filterE1_ref_width_);
-
-        str = this->get_string_value("filterRefE2");
-        filterE2_ref_type_ = gtPlus_util_.getISMRMRDKSpaceFilterFromName(*str);
-        filterE2_ref_sigma_ = this->get_double_value("filterRefE2_sigma");
-        filterE2_ref_width_ = this->get_double_value("filterRefE2_width");
-        GADGET_CONDITION_MSG(verboseMode_, "filterE2_ref_type_ is " << *str);
-        GADGET_CONDITION_MSG(verboseMode_, "filterE2_ref_sigma_ is " << filterE2_ref_sigma_);
-        GADGET_CONDITION_MSG(verboseMode_, "filterE2_ref_width_ is " << filterE2_ref_width_);
-
-        str = this->get_string_value("filterPartialFourierRO");
-        filterRO_pf_type_ = gtPlus_util_.getISMRMRDKSpaceFilterFromName(*str);
-        filterRO_pf_sigma_ = this->get_double_value("filterPartialFourierRO_sigma");
-        filterRO_pf_width_ = this->get_double_value("filterPartialFourierRO_width");
-        filterRO_pf_densityComp_ = this->get_bool_value("filterPartialFourierRO_densityComp");
-        GADGET_CONDITION_MSG(verboseMode_, "filterRO_pf_type_ is " << *str);
-        GADGET_CONDITION_MSG(verboseMode_, "filterRO_pf_sigma_ is " << filterRO_pf_sigma_);
-        GADGET_CONDITION_MSG(verboseMode_, "filterRO_pf_width_ is " << filterRO_pf_width_);
-        GADGET_CONDITION_MSG(verboseMode_, "filterRO_pf_densityComp_ is " << filterRO_pf_densityComp_);
-
-        str = this->get_string_value("filterPartialFourierE1");
-        filterE1_pf_type_ = gtPlus_util_.getISMRMRDKSpaceFilterFromName(*str);
-        filterE1_pf_sigma_ = this->get_double_value("filterPartialFourierE1_sigma");
-        filterE1_pf_width_ = this->get_double_value("filterPartialFourierE1_width");
-        filterE1_pf_densityComp_ = this->get_bool_value("filterPartialFourierE1_densityComp");
-        GADGET_CONDITION_MSG(verboseMode_, "filterE1_pf_type_ is " << *str);
-        GADGET_CONDITION_MSG(verboseMode_, "filterE1_pf_sigma_ is " << filterE1_pf_sigma_);
-        GADGET_CONDITION_MSG(verboseMode_, "filterE1_pf_width_ is " << filterE1_pf_width_);
-        GADGET_CONDITION_MSG(verboseMode_, "filterE1_pf_densityComp_ is " << filterE1_pf_densityComp_);
-
-        str = this->get_string_value("filterPartialFourierE2");
-        filterE2_pf_type_ = gtPlus_util_.getISMRMRDKSpaceFilterFromName(*str);
-        filterE2_pf_sigma_ = this->get_double_value("filterPartialFourierE2_sigma");
-        filterE2_pf_width_ = this->get_double_value("filterPartialFourierE2_width");
-        filterE2_pf_densityComp_ = this->get_bool_value("filterPartialFourierE2_densityComp");
-        GADGET_CONDITION_MSG(verboseMode_, "filterE2_pf_type_ is " << *str);
-        GADGET_CONDITION_MSG(verboseMode_, "filterE2_pf_sigma_ is " << filterE2_pf_sigma_);
-        GADGET_CONDITION_MSG(verboseMode_, "filterE2_pf_width_ is " << filterE2_pf_width_);
-        GADGET_CONDITION_MSG(verboseMode_, "filterE2_pf_densityComp_ is " << filterE2_pf_densityComp_);
-
-        GADGET_CONDITION_MSG(verboseMode_, "-----------------------------------------------");
-
-        CloudComputing_ = this->get_bool_value("CloudComputing");
-        CloudSize_ = (unsigned int)(this->get_int_value("CloudSize"));
-
-        GADGET_CONDITION_MSG(verboseMode_, "CloudComputing_ is " << CloudComputing_);
-        GADGET_CONDITION_MSG(verboseMode_, "CloudSize_ is " << CloudSize_);
-
-        str = this->get_string_value("cloudNodeFile");
-        cloud_node_file_ = *str;
-        GADGET_CONDITION_MSG(verboseMode_, "cloud_node_file_ is " << cloud_node_file_);
-
-        // read in the cloud information for every node
-        gt_cloud_.resize(CloudSize_);
-
-        for ( unsigned int ii=0; ii<CloudSize_; ii++ )
-        {
-            std::ostringstream ostreamstr1;
-            ostreamstr1 << "CloudNode" << ii << "_IP" << std::ends;
-            boost::shared_ptr<std::string> IP = this->get_string_value(ostreamstr1.str().c_str());
-            gt_cloud_[ii].get<0>() = *IP;
-
-            std::ostringstream ostreamstr2;
-            ostreamstr2 << "CloudNode" << ii << "_Port" << std::ends;
-            boost::shared_ptr<std::string> Port = this->get_string_value(ostreamstr2.str().c_str());
-            gt_cloud_[ii].get<1>() = *Port;
-
-            std::ostringstream ostreamstr3;
-            ostreamstr3 << "CloudNode" << ii << "_XMLConfiguration" << std::ends;
-            boost::shared_ptr<std::string> xmlName = this->get_string_value(ostreamstr3.str().c_str());
-            gt_cloud_[ii].get<2>() = *xmlName;
-
-            std::ostringstream ostreamstr4;
-            ostreamstr4 << "CloudNode" << ii << "_ComputingPowerIndex" << std::ends;
-            unsigned int computingPowerIndex = this->get_int_value(ostreamstr4.str().c_str());
-            gt_cloud_[ii].get<3>() = computingPowerIndex;
-
-            GADGET_CONDITION_MSG(verboseMode_, "Cloud Node " << ii << " : " << gt_cloud_[ii]);
-        }
 
-        GADGET_CONDITION_MSG(verboseMode_, "-----------------------------------------------");
+    }
 
-        GADGET_CONDITION_MSG(verboseMode_, "==================================================================");
+    bool GtPlusReconGadget::readParameters()
+    {
+        try
+        {
+            GADGET_CONDITION_MSG(verboseMode_, "------> GtPlusReconGadget parameters <------");
 
-        GADGET_CONDITION_MSG(verboseMode_, "------> GtPlus recon parameters <------");
+            min_intensity_value_ = this->get_int_value("min_intensity_value");
+            GADGET_CONDITION_MSG(verboseMode_, "min_intensity_value_ is " << min_intensity_value_);
 
-        workOrderPara_.upstream_coil_compression_ = this->get_bool_value("upstream_coil_compression");
-        GADGET_CONDITION_MSG(verboseMode_, "upstream_coil_compression_ is " << workOrderPara_.upstream_coil_compression_);
+            max_intensity_value_ = this->get_int_value("max_intensity_value");
+            GADGET_CONDITION_MSG(verboseMode_, "max_intensity_value_ is " << max_intensity_value_);
 
-        workOrderPara_.upstream_coil_compression_thres_ = this->get_double_value("upstream_coil_compression_thres");
-        GADGET_CONDITION_MSG(verboseMode_, "upstream_coil_compression_thres_ is " << workOrderPara_.upstream_coil_compression_thres_);
+            scalingFactor_ = this->get_double_value("scalingFactor");
+            GADGET_CONDITION_MSG(verboseMode_, "scalingFactor_ is " << scalingFactor_);
 
-        workOrderPara_.upstream_coil_compression_num_modesKept_ = this->get_int_value("upstream_coil_compression_num_modesKept");
-        GADGET_CONDITION_MSG(verboseMode_, "upstream_coil_compression_num_modesKept_ is " << workOrderPara_.upstream_coil_compression_num_modesKept_);
-
-        workOrderPara_.downstream_coil_compression_ = this->get_bool_value("downstream_coil_compression");
-        GADGET_CONDITION_MSG(verboseMode_, "downstream_coil_compression_ is " << workOrderPara_.downstream_coil_compression_);
+            scalingFactor_gfactor_ = this->get_double_value("scalingFactor_gfactor");
+            if ( scalingFactor_gfactor_ == 0 ) scalingFactor_gfactor_ = 100;
+            GADGET_CONDITION_MSG(verboseMode_, "scalingFactor_gfactor_ is " << scalingFactor_gfactor_);
 
-        workOrderPara_.coil_compression_thres_ = this->get_double_value("coil_compression_thres");
-        GADGET_CONDITION_MSG(verboseMode_, "coil_compression_thres_ is " << workOrderPara_.coil_compression_thres_);
-
-        workOrderPara_.coil_compression_num_modesKept_ = this->get_int_value("coil_compression_num_modesKept");
-        GADGET_CONDITION_MSG(verboseMode_, "coil_compression_num_modesKept_ is " << workOrderPara_.coil_compression_num_modesKept_);
-
-        GADGET_CONDITION_MSG(verboseMode_, "-----------------------------------------------");
-
-        str = this->get_string_value("coil_map_algorithm");
-        workOrderPara_.coil_map_algorithm_ = gtPlus_util_.getISMRMRDCoilMapAlgoFromName(*str);
-        GADGET_CONDITION_MSG(verboseMode_, "coil_map_algorithm_ is " << *str);
-
-        workOrderPara_.csm_kSize_ = (size_t)(this->get_int_value("csm_kSize"));
-        GADGET_CONDITION_MSG(verboseMode_, "csm_kSize_ is " << workOrderPara_.csm_kSize_);
-
-        workOrderPara_.csm_powermethod_num_ = (size_t)(this->get_int_value("csm_powermethod_num"));
-        GADGET_CONDITION_MSG(verboseMode_, "csm_powermethod_num_ is " << workOrderPara_.csm_powermethod_num_);
-
-        workOrderPara_.csm_true_3D_ = this->get_bool_value("csm_true_3D");
-        GADGET_CONDITION_MSG(verboseMode_, "csm_true_3D_ is " << workOrderPara_.csm_true_3D_);
-
-        workOrderPara_.csm_iter_num_ = (size_t)(this->get_int_value("csm_iter_num"));
-        GADGET_CONDITION_MSG(verboseMode_, "csm_iter_num_ is " << workOrderPara_.csm_iter_num_);
-
-        workOrderPara_.csm_iter_thres_ = this->get_double_value("csm_iter_thres");
-        GADGET_CONDITION_MSG(verboseMode_, "csm_iter_thres_ is " << workOrderPara_.csm_iter_thres_);
-
-        workOrderPara_.csm_use_gpu_ = this->get_bool_value("csm_use_gpu");
-        GADGET_CONDITION_MSG(verboseMode_, "csm_use_gpu_ is " << workOrderPara_.csm_use_gpu_);
-
-        GADGET_CONDITION_MSG(verboseMode_, "-----------------------------------------------");
-
-        str = this->get_string_value("recon_algorithm");
-        workOrderPara_.recon_algorithm_ = gtPlus_util_.getISMRMRDReconAlgoFromName(*str);
-        GADGET_CONDITION_MSG(verboseMode_, "recon_algorithm_ is " << *str);
-
-        workOrderPara_.recon_auto_parameters_ = this->get_bool_value("recon_auto_parameters");
-        GADGET_CONDITION_MSG(verboseMode_, "recon_auto_parameters_ is " << workOrderPara_.recon_auto_parameters_);
-
-        GADGET_CONDITION_MSG(verboseMode_, "-----------------------------------------------");
-
-        workOrderPara_.grappa_kSize_RO_ = (size_t)(this->get_int_value("grappa_kSize_RO"));
-        workOrderPara_.grappa_kSize_E1_ = (size_t)(this->get_int_value("grappa_kSize_E1"));
-        workOrderPara_.grappa_kSize_E2_ = (size_t)(this->get_int_value("grappa_kSize_E2"));
-        workOrderPara_.grappa_reg_lamda_ = this->get_double_value("grappa_reg_lamda");
-        workOrderPara_.grappa_calib_over_determine_ratio_ = this->get_double_value("grappa_calib_over_determine_ratio");
-        workOrderPara_.grappa_use_gpu_ = this->get_bool_value("grappa_use_gpu");
-
-        GADGET_CONDITION_MSG(verboseMode_, "grappa_kSize_RO_ is " << workOrderPara_.grappa_kSize_RO_);
-        GADGET_CONDITION_MSG(verboseMode_, "grappa_kSize_E1_ is " << workOrderPara_.grappa_kSize_E1_);
-        GADGET_CONDITION_MSG(verboseMode_, "grappa_kSize_E2_ is " << workOrderPara_.grappa_kSize_E2_);
-        GADGET_CONDITION_MSG(verboseMode_, "grappa_reg_lamda_ is " << workOrderPara_.grappa_reg_lamda_);
-        GADGET_CONDITION_MSG(verboseMode_, "grappa_calib_over_determine_ratio_ is " << workOrderPara_.grappa_calib_over_determine_ratio_);
-        GADGET_CONDITION_MSG(verboseMode_, "grappa_use_gpu_ is " << workOrderPara_.grappa_use_gpu_);
-
-        GADGET_CONDITION_MSG(verboseMode_, "-----------------------------------------------");
-
-        workOrderPara_.spirit_kSize_RO_ = (size_t)(this->get_int_value("spirit_kSize_RO"));
-        workOrderPara_.spirit_kSize_E1_ = (size_t)(this->get_int_value("spirit_kSize_E1"));
-        workOrderPara_.spirit_kSize_E2_ = (size_t)(this->get_int_value("spirit_kSize_E2"));
-        workOrderPara_.spirit_reg_lamda_ = this->get_double_value("spirit_reg_lamda");
-        workOrderPara_.spirit_use_gpu_ = this->get_bool_value("spirit_use_gpu");
-        workOrderPara_.spirit_calib_over_determine_ratio_ = this->get_double_value("spirit_calib_over_determine_ratio");
-        workOrderPara_.spirit_solve_symmetric_ = this->get_bool_value("spirit_solve_symmetric");
-        workOrderPara_.spirit_iter_max_ = (size_t)(this->get_int_value("spirit_iter_max"));
-        workOrderPara_.spirit_iter_thres_ = this->get_double_value("spirit_iter_thres");
-        workOrderPara_.spirit_print_iter_ = this->get_bool_value("spirit_print_iter");
-
-        GADGET_CONDITION_MSG(verboseMode_, "spirit_kSize_RO_ is " << workOrderPara_.spirit_kSize_RO_);
-        GADGET_CONDITION_MSG(verboseMode_, "spirit_kSize_E1_ is " << workOrderPara_.spirit_kSize_E1_);
-        GADGET_CONDITION_MSG(verboseMode_, "spirit_kSize_E2_ is " << workOrderPara_.spirit_kSize_E2_);
-        GADGET_CONDITION_MSG(verboseMode_, "spirit_reg_lamda_ is " << workOrderPara_.spirit_reg_lamda_);
-        GADGET_CONDITION_MSG(verboseMode_, "spirit_use_gpu_ is " << workOrderPara_.spirit_use_gpu_);
-        GADGET_CONDITION_MSG(verboseMode_, "spirit_calib_over_determine_ratio_ is " << workOrderPara_.spirit_calib_over_determine_ratio_);
-        GADGET_CONDITION_MSG(verboseMode_, "spirit_solve_symmetric_ is " << workOrderPara_.spirit_solve_symmetric_);
-        GADGET_CONDITION_MSG(verboseMode_, "spirit_iter_max_ is " << workOrderPara_.spirit_iter_max_);
-        GADGET_CONDITION_MSG(verboseMode_, "spirit_iter_thres_ is " << workOrderPara_.spirit_iter_thres_);
-        GADGET_CONDITION_MSG(verboseMode_, "spirit_print_iter_ is " << workOrderPara_.spirit_print_iter_);
-
-        GADGET_CONDITION_MSG(verboseMode_, "-----------------------------------------------");
-
-        workOrderPara_.spirit_perform_linear_ = this->get_bool_value("spirit_perform_linear");
-        workOrderPara_.spirit_perform_nonlinear_ = this->get_bool_value("spirit_perform_nonlinear");
-        workOrderPara_.spirit_parallel_imaging_lamda_ = this->get_double_value("spirit_parallel_imaging_lamda");
-        workOrderPara_.spirit_image_reg_lamda_ = this->get_double_value("spirit_image_reg_lamda");
-        workOrderPara_.spirit_data_fidelity_lamda_ = this->get_double_value("spirit_data_fidelity_lamda");
-        workOrderPara_.spirit_ncg_iter_max_ = (size_t)(this->get_int_value("spirit_ncg_iter_max"));
-        workOrderPara_.spirit_ncg_iter_thres_ = this->get_double_value("spirit_ncg_iter_thres");
-        workOrderPara_.spirit_ncg_print_iter_ = this->get_bool_value("spirit_ncg_print_iter");
-        // spirit_ncg_scale_factor_ is computed from the data
-        workOrderPara_.spirit_use_coil_sen_map_ = this->get_bool_value("spirit_use_coil_sen_map");
-        workOrderPara_.spirit_use_moco_enhancement_ = this->get_bool_value("spirit_use_moco_enhancement");
-        workOrderPara_.spirit_recon_moco_images_ = this->get_bool_value("spirit_recon_moco_images");
-        workOrderPara_.spirit_RO_enhancement_ratio_ = this->get_double_value("spirit_RO_enhancement_ratio");
-        workOrderPara_.spirit_E1_enhancement_ratio_ = this->get_double_value("spirit_E1_enhancement_ratio");
-        workOrderPara_.spirit_E2_enhancement_ratio_ = this->get_double_value("spirit_E2_enhancement_ratio");
-        workOrderPara_.spirit_temporal_enhancement_ratio_ = this->get_double_value("spirit_temporal_enhancement_ratio");
-        workOrderPara_.spirit_2D_scale_per_chunk_ = this->get_bool_value("spirit_2D_scale_per_chunk");
-        workOrderPara_.spirit_3D_scale_per_chunk_ = this->get_bool_value("spirit_3D_scale_per_chunk");
-
-        GADGET_CONDITION_MSG(verboseMode_, "spirit_perform_linear_ is " << workOrderPara_.spirit_perform_linear_);
-        GADGET_CONDITION_MSG(verboseMode_, "spirit_perform_nonlinear_ is " << workOrderPara_.spirit_perform_nonlinear_);
-        GADGET_CONDITION_MSG(verboseMode_, "spirit_parallel_imaging_lamda_ is " << workOrderPara_.spirit_parallel_imaging_lamda_);
-        GADGET_CONDITION_MSG(verboseMode_, "spirit_image_reg_lamda_ is " << workOrderPara_.spirit_image_reg_lamda_);
-        GADGET_CONDITION_MSG(verboseMode_, "spirit_data_fidelity_lamda_ is " << workOrderPara_.spirit_data_fidelity_lamda_);
-        GADGET_CONDITION_MSG(verboseMode_, "spirit_ncg_iter_max_ is " << workOrderPara_.spirit_ncg_iter_max_);
-        GADGET_CONDITION_MSG(verboseMode_, "spirit_ncg_iter_thres_ is " << workOrderPara_.spirit_ncg_iter_thres_);
-        GADGET_CONDITION_MSG(verboseMode_, "spirit_ncg_print_iter_ is " << workOrderPara_.spirit_ncg_print_iter_);
-        GADGET_CONDITION_MSG(verboseMode_, "spirit_use_coil_sen_map_ is " << workOrderPara_.spirit_use_coil_sen_map_);
-        GADGET_CONDITION_MSG(verboseMode_, "spirit_use_moco_enhancement_ is " << workOrderPara_.spirit_use_moco_enhancement_);
-        GADGET_CONDITION_MSG(verboseMode_, "spirit_recon_moco_images_ is " << workOrderPara_.spirit_recon_moco_images_);
-        GADGET_CONDITION_MSG(verboseMode_, "spirit_RO_enhancement_ratio_ is " << workOrderPara_.spirit_RO_enhancement_ratio_);
-        GADGET_CONDITION_MSG(verboseMode_, "spirit_E1_enhancement_ratio_ is " << workOrderPara_.spirit_E1_enhancement_ratio_);
-        GADGET_CONDITION_MSG(verboseMode_, "spirit_E2_enhancement_ratio_ is " << workOrderPara_.spirit_E2_enhancement_ratio_);
-        GADGET_CONDITION_MSG(verboseMode_, "spirit_temporal_enhancement_ratio_ is " << workOrderPara_.spirit_temporal_enhancement_ratio_);
-        GADGET_CONDITION_MSG(verboseMode_, "spirit_2D_scale_per_chunk_ is " << workOrderPara_.spirit_2D_scale_per_chunk_);
-        GADGET_CONDITION_MSG(verboseMode_, "spirit_3D_scale_per_chunk_ is " << workOrderPara_.spirit_3D_scale_per_chunk_);
-
-        GADGET_CONDITION_MSG(verboseMode_, "-----------------------------------------------");
-
-        workOrderPara_.job_split_by_S_ = this->get_bool_value("job_split_by_S");
-        workOrderPara_.job_num_of_N_ = (size_t)(this->get_int_value("job_num_of_N"));
-        workOrderPara_.job_max_Megabytes_ = (size_t)(this->get_int_value("job_max_Megabytes"));
-        workOrderPara_.job_overlap_ = (size_t)(this->get_int_value("job_overlap"));
-        workOrderPara_.job_perform_on_control_node_ = this->get_bool_value("job_perform_on_control_node");
-
-        GADGET_CONDITION_MSG(verboseMode_, "job_split_by_S_ is " << workOrderPara_.job_split_by_S_);
-        GADGET_CONDITION_MSG(verboseMode_, "job_num_of_N_ is " << workOrderPara_.job_num_of_N_);
-        GADGET_CONDITION_MSG(verboseMode_, "job_max_Megabytes_ is " << workOrderPara_.job_max_Megabytes_);
-        GADGET_CONDITION_MSG(verboseMode_, "job_overlap_ is " << workOrderPara_.job_overlap_);
-        GADGET_CONDITION_MSG(verboseMode_, "job_perform_on_control_node_ is " << workOrderPara_.job_perform_on_control_node_);
-
-        GADGET_CONDITION_MSG(verboseMode_, "-----------------------------------------------");
-
-        str = this->get_string_value("partialFourier_algo");
-        workOrderPara_.partialFourier_algo_ = gtPlus_util_.getISMRMRDPartialFourierReconAlgoFromName(*str);
-        GADGET_CONDITION_MSG(verboseMode_, "partialFourier_algo_ is " << *str);
-
-        workOrderPara_.partialFourier_homodyne_iters_ = (size_t)(this->get_int_value("partialFourier_homodyne_iters"));
-        workOrderPara_.partialFourier_homodyne_thres_ = this->get_double_value("partialFourier_homodyne_thres");
-        workOrderPara_.partialFourier_homodyne_densityComp_ = this->get_bool_value("partialFourier_homodyne_densityComp");
-
-        GADGET_CONDITION_MSG(verboseMode_, "partialFourier_homodyne_iters_ is " << workOrderPara_.partialFourier_homodyne_iters_);
-        GADGET_CONDITION_MSG(verboseMode_, "partialFourier_homodyne_thres_ is " << workOrderPara_.partialFourier_homodyne_thres_);
-        GADGET_CONDITION_MSG(verboseMode_, "partialFourier_homodyne_densityComp_ is " << workOrderPara_.partialFourier_homodyne_densityComp_);
-
-        workOrderPara_.partialFourier_POCS_iters_ = (size_t)(this->get_int_value("partialFourier_POCS_iters"));
-        workOrderPara_.partialFourier_POCS_thres_ = this->get_double_value("partialFourier_POCS_thres");
-        workOrderPara_.partialFourier_POCS_transitBand_ = (size_t)(this->get_int_value("partialFourier_POCS_transitBand"));
-        workOrderPara_.partialFourier_POCS_transitBand_E2_ = (size_t)(this->get_int_value("partialFourier_POCS_transitBand_E2"));
-
-        GADGET_CONDITION_MSG(verboseMode_, "partialFourier_POCS_iters_ is " << workOrderPara_.partialFourier_POCS_iters_);
-        GADGET_CONDITION_MSG(verboseMode_, "partialFourier_POCS_thres_ is " << workOrderPara_.partialFourier_POCS_thres_);
-        GADGET_CONDITION_MSG(verboseMode_, "partialFourier_POCS_transitBand_ is " << workOrderPara_.partialFourier_POCS_transitBand_);
-        GADGET_CONDITION_MSG(verboseMode_, "partialFourier_POCS_transitBand_ is " << workOrderPara_.partialFourier_POCS_transitBand_E2_);
-
-        workOrderPara_.partialFourier_FengHuang_kSize_RO_ = (size_t)(this->get_int_value("partialFourier_FengHuang_kSize_RO"));
-        workOrderPara_.partialFourier_FengHuang_kSize_E1_ = (size_t)(this->get_int_value("partialFourier_FengHuang_kSize_E1"));
-        workOrderPara_.partialFourier_FengHuang_kSize_E2_ = (size_t)(this->get_int_value("partialFourier_FengHuang_kSize_E2"));
-        workOrderPara_.partialFourier_FengHuang_thresReg_ = this->get_double_value("partialFourier_FengHuang_thresReg");
-        workOrderPara_.partialFourier_FengHuang_sameKernel_allN_ = this->get_bool_value("partialFourier_FengHuang_sameKernel_allN");
-        workOrderPara_.partialFourier_FengHuang_transitBand_ = (size_t)(this->get_int_value("partialFourier_FengHuang_transitBand"));
-        workOrderPara_.partialFourier_FengHuang_transitBand_E2_ = (size_t)(this->get_int_value("partialFourier_FengHuang_transitBand_E2"));
-
-        GADGET_CONDITION_MSG(verboseMode_, "partialFourier_FengHuang_kSize_RO_ is " << workOrderPara_.partialFourier_FengHuang_kSize_RO_);
-        GADGET_CONDITION_MSG(verboseMode_, "partialFourier_FengHuang_kSize_E1_ is " << workOrderPara_.partialFourier_FengHuang_kSize_E1_);
-        GADGET_CONDITION_MSG(verboseMode_, "partialFourier_FengHuang_kSize_E2_ is " << workOrderPara_.partialFourier_FengHuang_kSize_E2_);
-        GADGET_CONDITION_MSG(verboseMode_, "partialFourier_FengHuang_thresReg_ is " << workOrderPara_.partialFourier_FengHuang_thresReg_);
-        GADGET_CONDITION_MSG(verboseMode_, "partialFourier_FengHuang_sameKernel_allN_ is " << workOrderPara_.partialFourier_FengHuang_sameKernel_allN_);
-        GADGET_CONDITION_MSG(verboseMode_, "partialFourier_FengHuang_transitBand_ is " << workOrderPara_.partialFourier_FengHuang_transitBand_);
-        GADGET_CONDITION_MSG(verboseMode_, "partialFourier_FengHuang_transitBand_E2_ is " << workOrderPara_.partialFourier_FengHuang_transitBand_E2_);
-
-        GADGET_CONDITION_MSG(verboseMode_, "-----------------------------------------------");
-
-        recon_kspace_needed_ = this->get_bool_value("recon_kspace_needed");
-        GADGET_CONDITION_MSG(verboseMode_, "recon_kspace_needed_ is " << recon_kspace_needed_);
-
-        GADGET_CONDITION_MSG(verboseMode_, "-----------------------------------------------");
-    }
-    catch(...)
-    {
-        GADGET_ERROR_MSG("Errors in GtPlusReconGadget::readParameters() ... ");
-        return false;
-    }
+            scalingFactor_wrap_around_map_ = this->get_double_value("scalingFactor_wrap_around_map");
+            if ( scalingFactor_wrap_around_map_ == 0 ) scalingFactor_wrap_around_map_ = 1000;
+            GADGET_CONDITION_MSG(verboseMode_, "scalingFactor_wrap_around_map_ is " << scalingFactor_wrap_around_map_);
 
-    return true;
-}
+            scalingFactor_snr_image_ = this->get_double_value("scalingFactor_snr_image");
+            if ( scalingFactor_snr_image_ == 0 ) scalingFactor_snr_image_ = 10;
+            GADGET_CONDITION_MSG(verboseMode_, "scalingFactor_snr_image_ is " << scalingFactor_snr_image_);
 
-bool GtPlusReconGadget::parseGTCloudNodeFile(const std::string& filename, CloudType& gtCloud)
-{
-    std::string nodeFileName = ACE_OS::getenv("GADGETRON_HOME");
-    nodeFileName.append("/config/gtCloud/");
-    nodeFileName.append(filename);
-    GADGET_CONDITION_MSG(verboseMode_, "Cloud node file name is " << nodeFileName);
+            scalingFactor_std_map_ = this->get_double_value("scalingFactor_std_map");
+            if ( scalingFactor_std_map_ == 0 ) scalingFactor_std_map_ = 1000;
+            GADGET_CONDITION_MSG(verboseMode_, "scalingFactor_std_map_ is " << scalingFactor_std_map_);
 
-    std::ifstream fs(nodeFileName.c_str(), std::ios::in);
-    if (!fs.is_open()) 
-    {
-        GADGET_WARN_MSG("Cannot open GT CloudNodeFile; use the local setting instead ... ");
-        return false;
-    }
+            start_frame_for_std_map_ = this->get_int_value("start_frame_for_std_map");
+            if ( start_frame_for_std_map_ == 0 ) start_frame_for_std_map_ = 5;
+            GADGET_CONDITION_MSG(verboseMode_, "start_frame_for_std_map_ is " << start_frame_for_std_map_);
 
-    // control node hostname
-    std::string controlNode;
-    fs >> controlNode;
+            use_constant_scalingFactor_ = this->get_bool_value("use_constant_scalingFactor");
+            GADGET_CONDITION_MSG(verboseMode_, "use_constant_scalingFactor_ is " << use_constant_scalingFactor_);
 
-    std::string portControlNode;
-    fs >> portControlNode;
+            boost::shared_ptr<std::string> str = this->get_string_value("debugFolder");
+            debugFolder_ = *str;
+            GADGET_CONDITION_MSG(verboseMode_, "debugFolder_ is " << debugFolder_);
 
-    // number of GadgetLevel nodes
-    unsigned int num;
-    fs >> num;
+            boost::shared_ptr<std::string> str2 = this->get_string_value("debugFolder2");
+            debugFolder2_ = *str2;
+            GADGET_CONDITION_MSG(verboseMode_, "debugFolder2_ is " << debugFolder2_);
 
-    gtCloud.resize(num);
+            timeStampResolution_ = (float)this->get_double_value("timeStampResolution");
+            if ( timeStampResolution_ < FLT_EPSILON ) timeStampResolution_ = 0.0025f;
+            GADGET_CONDITION_MSG(verboseMode_, "timeStampResolution_ is " << timeStampResolution_);
 
-    unsigned int n;
-    for ( n=0; n<num; n++ )
-    {
-        std::string gadgetNode;
-        fs >> gadgetNode;
+            str = this->get_string_value("send_out_recon");
+            if ( !str->empty() )
+            {
+                send_out_recon_ = this->get_bool_value("send_out_recon");
+            }
+            else
+            {
+                send_out_recon_ = true;
+            }
+            GADGET_CONDITION_MSG(verboseMode_, "send_out_recon_ is " << send_out_recon_);
 
-        std::string portGadgetNode;
-        fs >> portGadgetNode;
+            str = this->get_string_value("send_out_recon_second");
+            if ( !str->empty() )
+            {
+                send_out_recon_second_ = this->get_bool_value("send_out_recon_second");
+            }
+            else
+            {
+                send_out_recon_second_ = true;
+            }
+            GADGET_CONDITION_MSG(verboseMode_, "send_out_recon_second_ is " << send_out_recon_second_);
+
+            performTiming_ = this->get_bool_value("performTiming");
+            GADGET_CONDITION_MSG(verboseMode_, "performTiming_ is " << performTiming_);
+
+            performTiming_ = this->get_bool_value("performTiming");
+            GADGET_CONDITION_MSG(verboseMode_, "performTiming_ is " << performTiming_);
+
+            // kspace filter parameters
+            str = this->get_string_value("filterRO");
+            filterRO_type_ = gtPlus_util_.getISMRMRDKSpaceFilterFromName(*str);
+            filterRO_sigma_ = this->get_double_value("filterRO_sigma");
+            filterRO_width_ = this->get_double_value("filterRO_width");
+            GADGET_CONDITION_MSG(verboseMode_, "filterRO_type_ is " << *str);
+            GADGET_CONDITION_MSG(verboseMode_, "filterRO_sigma_ is " << filterRO_sigma_);
+            GADGET_CONDITION_MSG(verboseMode_, "filterRO_width_ is " << filterRO_width_);
+
+            str = this->get_string_value("filterE1");
+            filterE1_type_ = gtPlus_util_.getISMRMRDKSpaceFilterFromName(*str);
+            filterE1_sigma_ = this->get_double_value("filterE1_sigma");
+            filterE1_width_ = this->get_double_value("filterE1_width");
+            GADGET_CONDITION_MSG(verboseMode_, "filterE1_type_ is " << *str);
+            GADGET_CONDITION_MSG(verboseMode_, "filterE1_sigma_ is " << filterE1_sigma_);
+            GADGET_CONDITION_MSG(verboseMode_, "filterE1_width_ is " << filterE1_width_);
+
+            str = this->get_string_value("filterE2");
+            filterE2_type_ = gtPlus_util_.getISMRMRDKSpaceFilterFromName(*str);
+            filterE2_sigma_ = this->get_double_value("filterE2_sigma");
+            filterE2_width_ = this->get_double_value("filterE2_width");
+            GADGET_CONDITION_MSG(verboseMode_, "filterE2_type_ is " << *str);
+            GADGET_CONDITION_MSG(verboseMode_, "filterE2_sigma_ is " << filterE2_sigma_);
+            GADGET_CONDITION_MSG(verboseMode_, "filterE2_width_ is " << filterE2_width_);
+
+            str = this->get_string_value("filterRefRO");
+            filterRO_ref_type_ = gtPlus_util_.getISMRMRDKSpaceFilterFromName(*str);
+            filterRO_ref_sigma_ = this->get_double_value("filterRefRO_sigma");
+            filterRO_ref_width_ = this->get_double_value("filterRefRO_width");
+            GADGET_CONDITION_MSG(verboseMode_, "filterRO_ref_type_ is " << *str);
+            GADGET_CONDITION_MSG(verboseMode_, "filterRO_ref_sigma_ is " << filterRO_ref_sigma_);
+            GADGET_CONDITION_MSG(verboseMode_, "filterRO_ref_width_ is " << filterRO_ref_width_);
+
+            str = this->get_string_value("filterRefE1");
+            filterE1_ref_type_ = gtPlus_util_.getISMRMRDKSpaceFilterFromName(*str);
+            filterE1_ref_sigma_ = this->get_double_value("filterRefE1_sigma");
+            filterE1_ref_width_ = this->get_double_value("filterRefE1_width");
+            GADGET_CONDITION_MSG(verboseMode_, "filterE1_ref_type_ is " << *str);
+            GADGET_CONDITION_MSG(verboseMode_, "filterE1_ref_sigma_ is " << filterE1_ref_sigma_);
+            GADGET_CONDITION_MSG(verboseMode_, "filterE1_ref_width_ is " << filterE1_ref_width_);
+
+            str = this->get_string_value("filterRefE2");
+            filterE2_ref_type_ = gtPlus_util_.getISMRMRDKSpaceFilterFromName(*str);
+            filterE2_ref_sigma_ = this->get_double_value("filterRefE2_sigma");
+            filterE2_ref_width_ = this->get_double_value("filterRefE2_width");
+            GADGET_CONDITION_MSG(verboseMode_, "filterE2_ref_type_ is " << *str);
+            GADGET_CONDITION_MSG(verboseMode_, "filterE2_ref_sigma_ is " << filterE2_ref_sigma_);
+            GADGET_CONDITION_MSG(verboseMode_, "filterE2_ref_width_ is " << filterE2_ref_width_);
+
+            str = this->get_string_value("filterPartialFourierRO");
+            filterRO_pf_type_ = gtPlus_util_.getISMRMRDKSpaceFilterFromName(*str);
+            filterRO_pf_sigma_ = this->get_double_value("filterPartialFourierRO_sigma");
+            filterRO_pf_width_ = this->get_double_value("filterPartialFourierRO_width");
+            filterRO_pf_densityComp_ = this->get_bool_value("filterPartialFourierRO_densityComp");
+            GADGET_CONDITION_MSG(verboseMode_, "filterRO_pf_type_ is " << *str);
+            GADGET_CONDITION_MSG(verboseMode_, "filterRO_pf_sigma_ is " << filterRO_pf_sigma_);
+            GADGET_CONDITION_MSG(verboseMode_, "filterRO_pf_width_ is " << filterRO_pf_width_);
+            GADGET_CONDITION_MSG(verboseMode_, "filterRO_pf_densityComp_ is " << filterRO_pf_densityComp_);
+
+            str = this->get_string_value("filterPartialFourierE1");
+            filterE1_pf_type_ = gtPlus_util_.getISMRMRDKSpaceFilterFromName(*str);
+            filterE1_pf_sigma_ = this->get_double_value("filterPartialFourierE1_sigma");
+            filterE1_pf_width_ = this->get_double_value("filterPartialFourierE1_width");
+            filterE1_pf_densityComp_ = this->get_bool_value("filterPartialFourierE1_densityComp");
+            GADGET_CONDITION_MSG(verboseMode_, "filterE1_pf_type_ is " << *str);
+            GADGET_CONDITION_MSG(verboseMode_, "filterE1_pf_sigma_ is " << filterE1_pf_sigma_);
+            GADGET_CONDITION_MSG(verboseMode_, "filterE1_pf_width_ is " << filterE1_pf_width_);
+            GADGET_CONDITION_MSG(verboseMode_, "filterE1_pf_densityComp_ is " << filterE1_pf_densityComp_);
+
+            str = this->get_string_value("filterPartialFourierE2");
+            filterE2_pf_type_ = gtPlus_util_.getISMRMRDKSpaceFilterFromName(*str);
+            filterE2_pf_sigma_ = this->get_double_value("filterPartialFourierE2_sigma");
+            filterE2_pf_width_ = this->get_double_value("filterPartialFourierE2_width");
+            filterE2_pf_densityComp_ = this->get_bool_value("filterPartialFourierE2_densityComp");
+            GADGET_CONDITION_MSG(verboseMode_, "filterE2_pf_type_ is " << *str);
+            GADGET_CONDITION_MSG(verboseMode_, "filterE2_pf_sigma_ is " << filterE2_pf_sigma_);
+            GADGET_CONDITION_MSG(verboseMode_, "filterE2_pf_width_ is " << filterE2_pf_width_);
+            GADGET_CONDITION_MSG(verboseMode_, "filterE2_pf_densityComp_ is " << filterE2_pf_densityComp_);
+
+            GADGET_CONDITION_MSG(verboseMode_, "-----------------------------------------------");
+
+            CloudComputing_ = this->get_bool_value("CloudComputing");
+            CloudSize_ = (unsigned int)(this->get_int_value("CloudSize"));
+
+            GADGET_CONDITION_MSG(verboseMode_, "CloudComputing_ is " << CloudComputing_);
+            GADGET_CONDITION_MSG(verboseMode_, "CloudSize_ is " << CloudSize_);
+
+            str = this->get_string_value("cloudNodeFile");
+            cloud_node_file_ = *str;
+            GADGET_CONDITION_MSG(verboseMode_, "cloud_node_file_ is " << cloud_node_file_);
+
+            // read in the cloud information for every node
+            gt_cloud_.resize(CloudSize_);
+
+            for ( unsigned int ii=0; ii<CloudSize_; ii++ )
+            {
+                std::ostringstream ostreamstr1;
+                ostreamstr1 << "CloudNode" << ii << "_IP" << std::ends;
+                boost::shared_ptr<std::string> IP = this->get_string_value(ostreamstr1.str().c_str());
+                gt_cloud_[ii].get<0>() = *IP;
+
+                std::ostringstream ostreamstr2;
+                ostreamstr2 << "CloudNode" << ii << "_Port" << std::ends;
+                boost::shared_ptr<std::string> Port = this->get_string_value(ostreamstr2.str().c_str());
+                gt_cloud_[ii].get<1>() = *Port;
+
+                std::ostringstream ostreamstr3;
+                ostreamstr3 << "CloudNode" << ii << "_XMLConfiguration" << std::ends;
+                boost::shared_ptr<std::string> xmlName = this->get_string_value(ostreamstr3.str().c_str());
+                gt_cloud_[ii].get<2>() = *xmlName;
+
+                std::ostringstream ostreamstr4;
+                ostreamstr4 << "CloudNode" << ii << "_ComputingPowerIndex" << std::ends;
+                unsigned int computingPowerIndex = this->get_int_value(ostreamstr4.str().c_str());
+                gt_cloud_[ii].get<3>() = computingPowerIndex;
+
+                GADGET_CONDITION_MSG(verboseMode_, "Cloud Node " << ii << " : " << gt_cloud_[ii]);
+            }
 
-        std::string xmlGadgetNode;
-        fs >> xmlGadgetNode;
+            GADGET_CONDITION_MSG(verboseMode_, "-----------------------------------------------");
 
-        unsigned int computingPowerIndex;
-        fs >> computingPowerIndex;
+            thread_number_ratio_ = (float)this->get_double_value("thread_number_ratio");
+            if ( thread_number_ratio_>1 || thread_number_ratio_<0 ) thread_number_ratio_ = 0;
+            GADGET_CONDITION_MSG(verboseMode_, "thread_number_ratio_ is " << thread_number_ratio_);
 
-        gtCloud[n].get<0>() = gadgetNode;
-        gtCloud[n].get<1>() = portGadgetNode;
-        gtCloud[n].get<2>() = xmlGadgetNode;
-        gtCloud[n].get<3>() = computingPowerIndex;
+            GADGET_CONDITION_MSG(verboseMode_, "==================================================================");
 
-        GADGET_CONDITION_MSG(verboseMode_, "Gadget Node " << n << " : " << gt_cloud_[n]);
-    }
+            GADGET_CONDITION_MSG(verboseMode_, "------> GtPlus recon parameters <------");
 
-    fs.close();
+            workOrderPara_.upstream_coil_compression_ = this->get_bool_value("upstream_coil_compression");
+            GADGET_CONDITION_MSG(verboseMode_, "upstream_coil_compression_ is " << workOrderPara_.upstream_coil_compression_);
 
-    return true;
-}
+            workOrderPara_.upstream_coil_compression_thres_ = this->get_double_value("upstream_coil_compression_thres");
+            GADGET_CONDITION_MSG(verboseMode_, "upstream_coil_compression_thres_ is " << workOrderPara_.upstream_coil_compression_thres_);
 
-int GtPlusReconGadget::process_config(ACE_Message_Block* mb)
-{
-    // [Ro E1 Cha Slice E2 Con Phase Rep Set Seg]
-    //   0  1  2   3    4   5    6     7  8   9
+            workOrderPara_.upstream_coil_compression_num_modesKept_ = this->get_int_value("upstream_coil_compression_num_modesKept");
+            GADGET_CONDITION_MSG(verboseMode_, "upstream_coil_compression_num_modesKept_ is " << workOrderPara_.upstream_coil_compression_num_modesKept_);
 
-    verboseMode_ = this->get_bool_value("verboseMode");
+            workOrderPara_.downstream_coil_compression_ = this->get_bool_value("downstream_coil_compression");
+            GADGET_CONDITION_MSG(verboseMode_, "downstream_coil_compression_ is " << workOrderPara_.downstream_coil_compression_);
 
-    // read parameters from xml
-    image_series_ = this->get_int_value("image_series");
+            workOrderPara_.coil_compression_thres_ = this->get_double_value("coil_compression_thres");
 
-    // read in parameters from the xml
-    GADGET_CHECK_RETURN(this->readParameters(), GADGET_FAIL);
+            if ( workOrderPara_.upstream_coil_compression_ && (workOrderPara_.coil_compression_thres_ > workOrderPara_.upstream_coil_compression_thres_) )
+                workOrderPara_.coil_compression_thres_ = workOrderPara_.upstream_coil_compression_thres_;
 
-    boost::shared_ptr<ISMRMRD::ismrmrdHeader> cfg = Gadgetron::parseIsmrmrdXMLHeader(std::string(mb->rd_ptr()));
+            GADGET_CONDITION_MSG(verboseMode_, "coil_compression_thres_ is " << workOrderPara_.coil_compression_thres_);
 
-    ISMRMRD::ismrmrdHeader::acquisitionSystemInformation_optional e_acq = cfg->acquisitionSystemInformation();
-    num_acq_channels_ = e_acq->receiverChannels().get();
-    GADGET_CONDITION_MSG(verboseMode_, "Number of acquisition channels : " << num_acq_channels_);
+            workOrderPara_.coil_compression_num_modesKept_ = this->get_int_value("coil_compression_num_modesKept");
 
-    ISMRMRD::ismrmrdHeader::encoding_sequence e_seq = cfg->encoding();
-    if (e_seq.size() != 1)
-    {
-        GADGET_DEBUG2("Number of encoding spaces: %d\n", e_seq.size());
-        GADGET_DEBUG1("This simple GtPlusReconGadget only supports one encoding space\n");
-        return GADGET_FAIL;
-    }
+            if ( workOrderPara_.upstream_coil_compression_ && (workOrderPara_.coil_compression_num_modesKept_ > workOrderPara_.upstream_coil_compression_num_modesKept_) )
+                workOrderPara_.coil_compression_num_modesKept_ = workOrderPara_.upstream_coil_compression_num_modesKept_;
 
-    // find out the encoding space 
-    ISMRMRD::encodingSpaceType e_space = (*e_seq.begin()).encodedSpace();
-    ISMRMRD::encodingSpaceType r_space = (*e_seq.begin()).reconSpace();
-    ISMRMRD::encodingLimitsType e_limits = (*e_seq.begin()).encodingLimits();
+            GADGET_CONDITION_MSG(verboseMode_, "coil_compression_num_modesKept_ is " << workOrderPara_.coil_compression_num_modesKept_);
 
-    matrix_size_encoding_[0] = e_space.matrixSize().x();
-    matrix_size_encoding_[1] = e_space.matrixSize().y();
-    matrix_size_encoding_[2] = e_space.matrixSize().z();
-    GADGET_CONDITION_MSG(verboseMode_, "Encoding matrix size: " << matrix_size_encoding_[0] << " " << matrix_size_encoding_[1] << " " << matrix_size_encoding_[2]);
+            GADGET_CONDITION_MSG(verboseMode_, "-----------------------------------------------");
 
-    field_of_view_encoding_[0] = e_space.fieldOfView_mm().x();
-    field_of_view_encoding_[1] = e_space.fieldOfView_mm().y();
-    field_of_view_encoding_[2] = e_space.fieldOfView_mm().z();
-    GADGET_CONDITION_MSG(verboseMode_, "Encoding field_of_view : " << field_of_view_encoding_[0] << " " << field_of_view_encoding_[1] << " " << field_of_view_encoding_[2]);
+            str = this->get_string_value("coil_map_algorithm");
+            workOrderPara_.coil_map_algorithm_ = gtPlus_util_.getISMRMRDCoilMapAlgoFromName(*str);
+            GADGET_CONDITION_MSG(verboseMode_, "coil_map_algorithm_ is " << *str);
 
-    // find the recon space
-    matrix_size_recon_[0] = r_space.matrixSize().x();
-    matrix_size_recon_[1] = r_space.matrixSize().y();
-    matrix_size_recon_[2] = r_space.matrixSize().z();
-    GADGET_CONDITION_MSG(verboseMode_, "Recon matrix size : " << matrix_size_recon_[0] << " " << matrix_size_recon_[1] << " " << matrix_size_recon_[2]);
+            workOrderPara_.csm_kSize_ = (size_t)(this->get_int_value("csm_kSize"));
+            GADGET_CONDITION_MSG(verboseMode_, "csm_kSize_ is " << workOrderPara_.csm_kSize_);
 
-    field_of_view_recon_[0] = r_space.fieldOfView_mm().x();
-    field_of_view_recon_[1] = r_space.fieldOfView_mm().y();
-    field_of_view_recon_[2] = r_space.fieldOfView_mm().z();
-    GADGET_CONDITION_MSG(verboseMode_, "Recon field_of_view :  " << field_of_view_recon_[0] << " " << field_of_view_recon_[1] << " " << field_of_view_recon_[2]);
+            workOrderPara_.csm_powermethod_num_ = (size_t)(this->get_int_value("csm_powermethod_num"));
+            GADGET_CONDITION_MSG(verboseMode_, "csm_powermethod_num_ is " << workOrderPara_.csm_powermethod_num_);
 
-    reconE1_ = matrix_size_recon_[1];
-    GADGET_CONDITION_MSG(verboseMode_, "reconE1_ is " << reconE1_);
+            workOrderPara_.csm_true_3D_ = this->get_bool_value("csm_true_3D");
+            GADGET_CONDITION_MSG(verboseMode_, "csm_true_3D_ is " << workOrderPara_.csm_true_3D_);
 
-    reconE2_ = matrix_size_recon_[2];
-    GADGET_CONDITION_MSG(verboseMode_, "reconE2_ is " << reconE2_);
+            workOrderPara_.csm_iter_num_ = (size_t)(this->get_int_value("csm_iter_num"));
+            GADGET_CONDITION_MSG(verboseMode_, "csm_iter_num_ is " << workOrderPara_.csm_iter_num_);
 
-    kSpaceMaxAcqE1No_ = e_limits.kspace_encoding_step_1().get().maximum();
-    GADGET_CONDITION_MSG(verboseMode_, "kSpaceMaxAcqE1No_ is " << kSpaceMaxAcqE1No_);
+            workOrderPara_.csm_iter_thres_ = this->get_double_value("csm_iter_thres");
+            GADGET_CONDITION_MSG(verboseMode_, "csm_iter_thres_ is " << workOrderPara_.csm_iter_thres_);
 
-    kSpaceMaxAcqE2No_ = e_limits.kspace_encoding_step_2().get().maximum();
-    GADGET_CONDITION_MSG(verboseMode_, "kSpaceMaxAcqE2No_ is " << kSpaceMaxAcqE2No_);
+            workOrderPara_.csm_use_gpu_ = this->get_bool_value("csm_use_gpu");
+            GADGET_CONDITION_MSG(verboseMode_, "csm_use_gpu_ is " << workOrderPara_.csm_use_gpu_);
 
-    aSpacing_[0] = field_of_view_recon_[0]/matrix_size_recon_[0];
-    aSpacing_[1] = field_of_view_recon_[1]/reconE1_;
-    aSpacing_[2] = field_of_view_recon_[2]/reconE2_;
+            GADGET_CONDITION_MSG(verboseMode_, "-----------------------------------------------");
 
-    gt_exporter_.setPixelSize(aSpacing_[0], aSpacing_[1], aSpacing_[2], aSpacing_[3], aSpacing_[4], aSpacing_[5], aSpacing_[6]);
+            str = this->get_string_value("recon_algorithm");
+            workOrderPara_.recon_algorithm_ = gtPlus_util_.getISMRMRDReconAlgoFromName(*str);
+            GADGET_CONDITION_MSG(verboseMode_, "recon_algorithm_ is " << *str);
 
-    // find the maximal encoding size
-    if (e_limits.kspace_encoding_step_1().present()) 
-    {
-        meas_max_idx_.kspace_encode_step_1 = e_limits.kspace_encoding_step_1().get().maximum();
-    }
-    else
-    {
-        meas_max_idx_.kspace_encode_step_1 = 0;
-        std::cout << "Setting number of kspace_encode_step_1 to 0" << std::endl;
-        return GADGET_FAIL;
-    }
+            workOrderPara_.recon_auto_parameters_ = this->get_bool_value("recon_auto_parameters");
+            GADGET_CONDITION_MSG(verboseMode_, "recon_auto_parameters_ is " << workOrderPara_.recon_auto_parameters_);
 
-    if (e_limits.set().present())
-    {
-        meas_max_idx_.set = e_limits.set().get().maximum() - 1;
-        if ( meas_max_idx_.set < 0 ) meas_max_idx_.set = 0;
-    }
-    else
-    {
-        meas_max_idx_.set = 0;
-    }
+            workOrderPara_.gfactor_needed_ = this->get_bool_value("gfactor_needed");
+            GADGET_CONDITION_MSG(verboseMode_, "gfactor_needed_ is " << workOrderPara_.gfactor_needed_);
 
-    if (e_limits.phase().present())
-    {
-        meas_max_idx_.phase = e_limits.phase().get().maximum()-1;
-        if ( meas_max_idx_.phase < 0 ) meas_max_idx_.phase = 0;
-    }
-    else
-    {
-        meas_max_idx_.phase = 0;
-    }
+            workOrderPara_.wrap_around_map_needed_ = this->get_bool_value("wrap_around_map_needed");
+            GADGET_CONDITION_MSG(verboseMode_, "wrap_around_map_needed_ is " << workOrderPara_.wrap_around_map_needed_);
 
-    if (e_limits.kspace_encoding_step_2().present())
-    {
-        meas_max_idx_.kspace_encode_step_2 = e_limits.kspace_encoding_step_2().get().maximum();
-    }
-    else
-    {
-        meas_max_idx_.kspace_encode_step_2 = 0;
-    }
+            GADGET_CONDITION_MSG(verboseMode_, "-----------------------------------------------");
 
-    if (e_limits.contrast().present())
-    {
-        meas_max_idx_.contrast = e_limits.contrast().get().maximum()-1;
-        if ( meas_max_idx_.contrast < 0 ) meas_max_idx_.contrast = 0;
-    }
-    else
-    {
-        meas_max_idx_.contrast = 0;
-    }
+            workOrderPara_.grappa_kSize_RO_ = (size_t)(this->get_int_value("grappa_kSize_RO"));
+            workOrderPara_.grappa_kSize_E1_ = (size_t)(this->get_int_value("grappa_kSize_E1"));
+            workOrderPara_.grappa_kSize_E2_ = (size_t)(this->get_int_value("grappa_kSize_E2"));
+            workOrderPara_.grappa_reg_lamda_ = this->get_double_value("grappa_reg_lamda");
+            workOrderPara_.grappa_calib_over_determine_ratio_ = this->get_double_value("grappa_calib_over_determine_ratio");
+            workOrderPara_.grappa_use_gpu_ = this->get_bool_value("grappa_use_gpu");
 
-    if (e_limits.slice().present())
-    {
-        meas_max_idx_.slice = e_limits.slice().get().maximum();
-    }
-    else
-    {
-        meas_max_idx_.slice = 0;
-    }
+            GADGET_CONDITION_MSG(verboseMode_, "grappa_kSize_RO_ is " << workOrderPara_.grappa_kSize_RO_);
+            GADGET_CONDITION_MSG(verboseMode_, "grappa_kSize_E1_ is " << workOrderPara_.grappa_kSize_E1_);
+            GADGET_CONDITION_MSG(verboseMode_, "grappa_kSize_E2_ is " << workOrderPara_.grappa_kSize_E2_);
+            GADGET_CONDITION_MSG(verboseMode_, "grappa_reg_lamda_ is " << workOrderPara_.grappa_reg_lamda_);
+            GADGET_CONDITION_MSG(verboseMode_, "grappa_calib_over_determine_ratio_ is " << workOrderPara_.grappa_calib_over_determine_ratio_);
+            GADGET_CONDITION_MSG(verboseMode_, "grappa_use_gpu_ is " << workOrderPara_.grappa_use_gpu_);
 
-    if (e_limits.repetition().present())
-    {
-        meas_max_idx_.repetition = e_limits.repetition().get().maximum();
-    }
-    else
-    {
-        meas_max_idx_.repetition = 0;
-    }
+            GADGET_CONDITION_MSG(verboseMode_, "-----------------------------------------------");
 
-    if (e_limits.average().present())
-    {
-        meas_max_idx_.average = e_limits.average().get().maximum()-1;
-    }
-    else
-    {
-        meas_max_idx_.average = 0;
-    }
+            workOrderPara_.spirit_kSize_RO_ = (size_t)(this->get_int_value("spirit_kSize_RO"));
+            if ( workOrderPara_.spirit_kSize_RO_ == 0 ) workOrderPara_.spirit_kSize_RO_ = 7;
 
-    if (e_limits.segment().present())
-    {
-        // meas_max_idx_.segment = e_limits.segment().get().maximum()-1;
-        meas_max_idx_.segment = 0;
-    }
-    else
-    {
-        meas_max_idx_.segment = 0;
-    }
+            workOrderPara_.spirit_kSize_E1_ = (size_t)(this->get_int_value("spirit_kSize_E1"));
+            if ( workOrderPara_.spirit_kSize_E1_ == 0 ) workOrderPara_.spirit_kSize_E1_ = 7;
 
-    // find out the PAT mode
-    ISMRMRD::ismrmrdHeader::parallelImaging_optional p_imaging_type = cfg->parallelImaging();
-    ISMRMRD::parallelImagingType p_imaging = *p_imaging_type;
+            workOrderPara_.spirit_kSize_E2_ = (size_t)(this->get_int_value("spirit_kSize_E2"));
+            if ( workOrderPara_.spirit_kSize_E2_ == 0 ) workOrderPara_.spirit_kSize_E2_ = 5;
 
-    acceFactorE1_ = (long)(p_imaging.accelerationFactor().kspace_encoding_step_1());
-    acceFactorE2_ = (long)(p_imaging.accelerationFactor().kspace_encoding_step_2());
-    GADGET_CONDITION_MSG(verboseMode_, "acceFactorE1 is " << acceFactorE1_);
-    GADGET_CONDITION_MSG(verboseMode_, "acceFactorE2 is " << acceFactorE2_);
+            workOrderPara_.spirit_reg_lamda_ = this->get_double_value("spirit_reg_lamda");
+            if ( workOrderPara_.spirit_reg_lamda_ < FLT_EPSILON ) workOrderPara_.spirit_reg_lamda_ = 0.005;
 
-    ISMRMRD::calibrationModeType::value calib = *(p_imaging.calibrationMode());
+            workOrderPara_.spirit_use_gpu_ = this->get_bool_value("spirit_use_gpu");
+            workOrderPara_.spirit_calib_over_determine_ratio_ = this->get_double_value("spirit_calib_over_determine_ratio");
+            workOrderPara_.spirit_solve_symmetric_ = this->get_bool_value("spirit_solve_symmetric");
 
-    bool separate_ = (calib == ISMRMRD::calibrationModeType::separate);
-    bool embedded_ = (calib == ISMRMRD::calibrationModeType::embedded);
-    bool interleaved_ = (calib == ISMRMRD::calibrationModeType::interleaved);
-    bool other_ = (calib == ISMRMRD::calibrationModeType::other);
+            workOrderPara_.spirit_iter_max_ = (size_t)(this->get_int_value("spirit_iter_max"));
+            if ( workOrderPara_.spirit_iter_max_ == 0 ) workOrderPara_.spirit_iter_max_ = 100;
 
-    if ( separate_ ) { GADGET_CONDITION_MSG(verboseMode_, "Colibration mode is separate"); }
-    if ( embedded_ ) { GADGET_CONDITION_MSG(verboseMode_, "Colibration mode is embedded"); }
-    if ( interleaved_ ) { GADGET_CONDITION_MSG(verboseMode_, "Colibration mode is interleaved"); }
-    if ( other_ ) { GADGET_CONDITION_MSG(verboseMode_, "Colibration mode is other"); }
+            workOrderPara_.spirit_iter_thres_ = this->get_double_value("spirit_iter_thres");
+            if ( workOrderPara_.spirit_iter_thres_ < FLT_EPSILON ) workOrderPara_.spirit_iter_thres_ = 0.0015;
 
-    if ( other_ && acceFactorE1_==1 && acceFactorE2_==1 )
-    {
-        GADGET_CONDITION_MSG(verboseMode_, "Colibration mode is changed to ISMRMRD_interleaved");
-        CalibMode_ = Gadgetron::gtPlus::ISMRMRD_interleaved;
-        acceFactorE1_ = 2;
+            workOrderPara_.spirit_print_iter_ = this->get_bool_value("spirit_print_iter");
+
+            GADGET_CONDITION_MSG(verboseMode_, "spirit_kSize_RO_ is " << workOrderPara_.spirit_kSize_RO_);
+            GADGET_CONDITION_MSG(verboseMode_, "spirit_kSize_E1_ is " << workOrderPara_.spirit_kSize_E1_);
+            GADGET_CONDITION_MSG(verboseMode_, "spirit_kSize_E2_ is " << workOrderPara_.spirit_kSize_E2_);
+            GADGET_CONDITION_MSG(verboseMode_, "spirit_reg_lamda_ is " << workOrderPara_.spirit_reg_lamda_);
+            GADGET_CONDITION_MSG(verboseMode_, "spirit_use_gpu_ is " << workOrderPara_.spirit_use_gpu_);
+            GADGET_CONDITION_MSG(verboseMode_, "spirit_calib_over_determine_ratio_ is " << workOrderPara_.spirit_calib_over_determine_ratio_);
+            GADGET_CONDITION_MSG(verboseMode_, "spirit_solve_symmetric_ is " << workOrderPara_.spirit_solve_symmetric_);
+            GADGET_CONDITION_MSG(verboseMode_, "spirit_iter_max_ is " << workOrderPara_.spirit_iter_max_);
+            GADGET_CONDITION_MSG(verboseMode_, "spirit_iter_thres_ is " << workOrderPara_.spirit_iter_thres_);
+            GADGET_CONDITION_MSG(verboseMode_, "spirit_print_iter_ is " << workOrderPara_.spirit_print_iter_);
+
+            GADGET_CONDITION_MSG(verboseMode_, "-----------------------------------------------");
+
+            workOrderPara_.spirit_perform_linear_ = this->get_bool_value("spirit_perform_linear");
+            workOrderPara_.spirit_perform_nonlinear_ = this->get_bool_value("spirit_perform_nonlinear");
+            workOrderPara_.spirit_parallel_imaging_lamda_ = this->get_double_value("spirit_parallel_imaging_lamda");
+            workOrderPara_.spirit_image_reg_lamda_ = this->get_double_value("spirit_image_reg_lamda");
+            workOrderPara_.spirit_data_fidelity_lamda_ = this->get_double_value("spirit_data_fidelity_lamda");
+            workOrderPara_.spirit_ncg_iter_max_ = (size_t)(this->get_int_value("spirit_ncg_iter_max"));
+            workOrderPara_.spirit_ncg_iter_thres_ = this->get_double_value("spirit_ncg_iter_thres");
+            workOrderPara_.spirit_ncg_print_iter_ = this->get_bool_value("spirit_ncg_print_iter");
+            // spirit_ncg_scale_factor_ is computed from the data
+
+            workOrderPara_.spirit_use_coil_sen_map_ = this->get_bool_value("spirit_use_coil_sen_map");
+            workOrderPara_.spirit_use_moco_enhancement_ = this->get_bool_value("spirit_use_moco_enhancement");
+            workOrderPara_.spirit_recon_moco_images_ = this->get_bool_value("spirit_recon_moco_images");
+            workOrderPara_.spirit_RO_enhancement_ratio_ = this->get_double_value("spirit_RO_enhancement_ratio");
+            workOrderPara_.spirit_E1_enhancement_ratio_ = this->get_double_value("spirit_E1_enhancement_ratio");
+            workOrderPara_.spirit_E2_enhancement_ratio_ = this->get_double_value("spirit_E2_enhancement_ratio");
+            workOrderPara_.spirit_temporal_enhancement_ratio_ = this->get_double_value("spirit_temporal_enhancement_ratio");
+            workOrderPara_.spirit_2D_scale_per_chunk_ = this->get_bool_value("spirit_2D_scale_per_chunk");
+            workOrderPara_.spirit_3D_scale_per_chunk_ = this->get_bool_value("spirit_3D_scale_per_chunk");
+
+            GADGET_CONDITION_MSG(verboseMode_, "spirit_perform_linear_ is " << workOrderPara_.spirit_perform_linear_);
+            GADGET_CONDITION_MSG(verboseMode_, "spirit_perform_nonlinear_ is " << workOrderPara_.spirit_perform_nonlinear_);
+            GADGET_CONDITION_MSG(verboseMode_, "spirit_parallel_imaging_lamda_ is " << workOrderPara_.spirit_parallel_imaging_lamda_);
+            GADGET_CONDITION_MSG(verboseMode_, "spirit_image_reg_lamda_ is " << workOrderPara_.spirit_image_reg_lamda_);
+            GADGET_CONDITION_MSG(verboseMode_, "spirit_data_fidelity_lamda_ is " << workOrderPara_.spirit_data_fidelity_lamda_);
+            GADGET_CONDITION_MSG(verboseMode_, "spirit_ncg_iter_max_ is " << workOrderPara_.spirit_ncg_iter_max_);
+            GADGET_CONDITION_MSG(verboseMode_, "spirit_ncg_iter_thres_ is " << workOrderPara_.spirit_ncg_iter_thres_);
+            GADGET_CONDITION_MSG(verboseMode_, "spirit_ncg_print_iter_ is " << workOrderPara_.spirit_ncg_print_iter_);
+            GADGET_CONDITION_MSG(verboseMode_, "spirit_use_coil_sen_map_ is " << workOrderPara_.spirit_use_coil_sen_map_);
+            GADGET_CONDITION_MSG(verboseMode_, "spirit_use_moco_enhancement_ is " << workOrderPara_.spirit_use_moco_enhancement_);
+            GADGET_CONDITION_MSG(verboseMode_, "spirit_recon_moco_images_ is " << workOrderPara_.spirit_recon_moco_images_);
+            GADGET_CONDITION_MSG(verboseMode_, "spirit_RO_enhancement_ratio_ is " << workOrderPara_.spirit_RO_enhancement_ratio_);
+            GADGET_CONDITION_MSG(verboseMode_, "spirit_E1_enhancement_ratio_ is " << workOrderPara_.spirit_E1_enhancement_ratio_);
+            GADGET_CONDITION_MSG(verboseMode_, "spirit_E2_enhancement_ratio_ is " << workOrderPara_.spirit_E2_enhancement_ratio_);
+            GADGET_CONDITION_MSG(verboseMode_, "spirit_temporal_enhancement_ratio_ is " << workOrderPara_.spirit_temporal_enhancement_ratio_);
+            GADGET_CONDITION_MSG(verboseMode_, "spirit_2D_scale_per_chunk_ is " << workOrderPara_.spirit_2D_scale_per_chunk_);
+            GADGET_CONDITION_MSG(verboseMode_, "spirit_3D_scale_per_chunk_ is " << workOrderPara_.spirit_3D_scale_per_chunk_);
+
+            GADGET_CONDITION_MSG(verboseMode_, "-----------------------------------------------");
+
+            str = this->get_string_value("retro_gated_interp_method");
+            workOrderPara_.retro_gated_interp_method_ = gtPlus_util_.getISMRMRDRetroGatingInterpFromName(*str);
+            GADGET_CONDITION_MSG(verboseMode_, "retro_gated_interp_method_ is " << *str);
+
+            GADGET_CONDITION_MSG(verboseMode_, "-----------------------------------------------");
+
+            workOrderPara_.job_split_by_S_ = this->get_bool_value("job_split_by_S");
+            workOrderPara_.job_num_of_N_ = (size_t)(this->get_int_value("job_num_of_N"));
+            workOrderPara_.job_max_Megabytes_ = (size_t)(this->get_int_value("job_max_Megabytes"));
+            workOrderPara_.job_overlap_ = (size_t)(this->get_int_value("job_overlap"));
+            workOrderPara_.job_perform_on_control_node_ = this->get_bool_value("job_perform_on_control_node");
+
+            GADGET_CONDITION_MSG(verboseMode_, "job_split_by_S_ is " << workOrderPara_.job_split_by_S_);
+            GADGET_CONDITION_MSG(verboseMode_, "job_num_of_N_ is " << workOrderPara_.job_num_of_N_);
+            GADGET_CONDITION_MSG(verboseMode_, "job_max_Megabytes_ is " << workOrderPara_.job_max_Megabytes_);
+            GADGET_CONDITION_MSG(verboseMode_, "job_overlap_ is " << workOrderPara_.job_overlap_);
+            GADGET_CONDITION_MSG(verboseMode_, "job_perform_on_control_node_ is " << workOrderPara_.job_perform_on_control_node_);
+
+            GADGET_CONDITION_MSG(verboseMode_, "-----------------------------------------------");
+
+            str = this->get_string_value("partialFourier_algo");
+            workOrderPara_.partialFourier_algo_ = gtPlus_util_.getISMRMRDPartialFourierReconAlgoFromName(*str);
+            GADGET_CONDITION_MSG(verboseMode_, "partialFourier_algo_ is " << *str);
+
+            workOrderPara_.partialFourier_homodyne_iters_ = (size_t)(this->get_int_value("partialFourier_homodyne_iters"));
+            workOrderPara_.partialFourier_homodyne_thres_ = this->get_double_value("partialFourier_homodyne_thres");
+            workOrderPara_.partialFourier_homodyne_densityComp_ = this->get_bool_value("partialFourier_homodyne_densityComp");
+
+            GADGET_CONDITION_MSG(verboseMode_, "partialFourier_homodyne_iters_ is " << workOrderPara_.partialFourier_homodyne_iters_);
+            GADGET_CONDITION_MSG(verboseMode_, "partialFourier_homodyne_thres_ is " << workOrderPara_.partialFourier_homodyne_thres_);
+            GADGET_CONDITION_MSG(verboseMode_, "partialFourier_homodyne_densityComp_ is " << workOrderPara_.partialFourier_homodyne_densityComp_);
+
+            workOrderPara_.partialFourier_POCS_iters_ = (size_t)(this->get_int_value("partialFourier_POCS_iters"));
+            workOrderPara_.partialFourier_POCS_thres_ = this->get_double_value("partialFourier_POCS_thres");
+            workOrderPara_.partialFourier_POCS_transitBand_ = (size_t)(this->get_int_value("partialFourier_POCS_transitBand"));
+            workOrderPara_.partialFourier_POCS_transitBand_E2_ = (size_t)(this->get_int_value("partialFourier_POCS_transitBand_E2"));
+
+            GADGET_CONDITION_MSG(verboseMode_, "partialFourier_POCS_iters_ is " << workOrderPara_.partialFourier_POCS_iters_);
+            GADGET_CONDITION_MSG(verboseMode_, "partialFourier_POCS_thres_ is " << workOrderPara_.partialFourier_POCS_thres_);
+            GADGET_CONDITION_MSG(verboseMode_, "partialFourier_POCS_transitBand_ is " << workOrderPara_.partialFourier_POCS_transitBand_);
+            GADGET_CONDITION_MSG(verboseMode_, "partialFourier_POCS_transitBand_ is " << workOrderPara_.partialFourier_POCS_transitBand_E2_);
+
+            workOrderPara_.partialFourier_FengHuang_kSize_RO_ = (size_t)(this->get_int_value("partialFourier_FengHuang_kSize_RO"));
+            workOrderPara_.partialFourier_FengHuang_kSize_E1_ = (size_t)(this->get_int_value("partialFourier_FengHuang_kSize_E1"));
+            workOrderPara_.partialFourier_FengHuang_kSize_E2_ = (size_t)(this->get_int_value("partialFourier_FengHuang_kSize_E2"));
+            workOrderPara_.partialFourier_FengHuang_thresReg_ = this->get_double_value("partialFourier_FengHuang_thresReg");
+            workOrderPara_.partialFourier_FengHuang_sameKernel_allN_ = this->get_bool_value("partialFourier_FengHuang_sameKernel_allN");
+            workOrderPara_.partialFourier_FengHuang_transitBand_ = (size_t)(this->get_int_value("partialFourier_FengHuang_transitBand"));
+            workOrderPara_.partialFourier_FengHuang_transitBand_E2_ = (size_t)(this->get_int_value("partialFourier_FengHuang_transitBand_E2"));
+
+            GADGET_CONDITION_MSG(verboseMode_, "partialFourier_FengHuang_kSize_RO_ is " << workOrderPara_.partialFourier_FengHuang_kSize_RO_);
+            GADGET_CONDITION_MSG(verboseMode_, "partialFourier_FengHuang_kSize_E1_ is " << workOrderPara_.partialFourier_FengHuang_kSize_E1_);
+            GADGET_CONDITION_MSG(verboseMode_, "partialFourier_FengHuang_kSize_E2_ is " << workOrderPara_.partialFourier_FengHuang_kSize_E2_);
+            GADGET_CONDITION_MSG(verboseMode_, "partialFourier_FengHuang_thresReg_ is " << workOrderPara_.partialFourier_FengHuang_thresReg_);
+            GADGET_CONDITION_MSG(verboseMode_, "partialFourier_FengHuang_sameKernel_allN_ is " << workOrderPara_.partialFourier_FengHuang_sameKernel_allN_);
+            GADGET_CONDITION_MSG(verboseMode_, "partialFourier_FengHuang_transitBand_ is " << workOrderPara_.partialFourier_FengHuang_transitBand_);
+            GADGET_CONDITION_MSG(verboseMode_, "partialFourier_FengHuang_transitBand_E2_ is " << workOrderPara_.partialFourier_FengHuang_transitBand_E2_);
+
+            GADGET_CONDITION_MSG(verboseMode_, "-----------------------------------------------");
+
+            recon_kspace_needed_ = this->get_bool_value("recon_kspace_needed");
+            GADGET_CONDITION_MSG(verboseMode_, "recon_kspace_needed_ is " << recon_kspace_needed_);
+
+            GADGET_CONDITION_MSG(verboseMode_, "-----------------------------------------------");
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in GtPlusReconGadget::readParameters() ... ");
+            return false;
+        }
+
+        return true;
     }
 
-    if ( interleaved_ )
+    bool GtPlusReconGadget::parseGTCloudNodeFile(const std::string& filename, CloudType& gtCloud)
     {
-        CalibMode_ = Gadgetron::gtPlus::ISMRMRD_interleaved;
 
-        if ( p_imaging.interleavingDimension().present() )
-        {
-            if ( *(p_imaging.interleavingDimension()) == ISMRMRD::interleavingDimensionType::phase )
-            {
-                InterleaveDim_ = Gadgetron::gtPlus::DIM_Phase;
-            }
+        bool using_cloudbus = this->get_bool_value("using_cloudbus");
+        bool has_cloud_node_xml_configuration = this->get_string_value("CloudNodeXMLConfiguration")->size();
 
-            if ( *(p_imaging.interleavingDimension()) == ISMRMRD::interleavingDimensionType::repetition )
-            {
-                InterleaveDim_ = Gadgetron::gtPlus::DIM_Repetition;
-            }
+        if (using_cloudbus && has_cloud_node_xml_configuration) {
+            std::vector<GadgetronNodeInfo> nodes;
+            CloudBus::instance()->get_node_info(nodes);
+            gtCloud.resize(nodes.size());
 
-            if ( *(p_imaging.interleavingDimension()) == ISMRMRD::interleavingDimensionType::average )
+            unsigned int n;
+            for ( n=0; n<nodes.size(); n++ )
             {
-                InterleaveDim_ = Gadgetron::gtPlus::DIM_Average;
+                std::stringstream ss;
+                gtCloud[n].get<0>() = nodes[n].address;
+                ss << nodes[n].port;
+                gtCloud[n].get<1>() = ss.str();
+                gtCloud[n].get<2>() = *this->get_string_value("CloudNodeXMLConfiguration");
+                gtCloud[n].get<3>() = nodes[n].compute_capability;
+
+                GADGET_CONDITION_MSG(verboseMode_, "Gadget Node " << n << " : " << gt_cloud_[n]);
             }
 
-            if ( *(p_imaging.interleavingDimension()) == ISMRMRD::interleavingDimensionType::contrast )
-            {
-                InterleaveDim_ = Gadgetron::gtPlus::DIM_Contrast;
-            }
+            return true; //We will leave the function here
 
-            if ( *(p_imaging.interleavingDimension()) == ISMRMRD::interleavingDimensionType::other )
-            {
-                InterleaveDim_ = Gadgetron::gtPlus::DIM_other1;
-            }
+        }
+
+        std::string nodeFileName = get_gadgetron_home();
+        nodeFileName.append("/config/gtCloud/");
+        nodeFileName.append(filename);
+        GADGET_CONDITION_MSG(verboseMode_, "Cloud node file name is " << nodeFileName);
 
-            GADGET_CONDITION_MSG(verboseMode_, "InterleaveDim is " << gtPlus_util_.getISMRMRDDimName(InterleaveDim_));
+        std::ifstream fs(nodeFileName.c_str(), std::ios::in);
+        if (!fs.is_open()) 
+        {
+            GADGET_WARN_MSG("Cannot open GT CloudNodeFile; use the local setting instead ... ");
+            return false;
         }
-    }
 
-    if ( embedded_ )
-    {
-        CalibMode_ = Gadgetron::gtPlus::ISMRMRD_embedded;
-    }
+        // control node hostname
+        std::string controlNode;
+        fs >> controlNode;
 
-    if ( separate_ )
-    {
-        CalibMode_ = Gadgetron::gtPlus::ISMRMRD_separate;
-    }
+        std::string portControlNode;
+        fs >> portControlNode;
 
-    if ( calib == ISMRMRD::calibrationModeType::external )
-    {
-        CalibMode_ = Gadgetron::gtPlus::ISMRMRD_external;
-    }
+        // number of GadgetLevel nodes
+        unsigned int num;
+        fs >> num;
 
-    if ( calib == ISMRMRD::calibrationModeType::other )
-    {
-        CalibMode_ = Gadgetron::gtPlus::ISMRMRD_other;
-    }
+        gtCloud.resize(num);
 
-    // generate the destination folder
-    if ( !debugFolder_.empty() )
-    {
-        GADGET_CHECK_RETURN_FALSE(generateDebugFolderPath(debugFolder_, debugFolder_fullPath_));
-    }
-    else
-    {
-        GADGET_MSG("GtPlusRecon, debugFolder is not set ...");
-    }
+        unsigned int n;
+        for ( n=0; n<num; n++ )
+        {
+            std::string gadgetNode;
+            fs >> gadgetNode;
 
-    if ( !debugFolder2_.empty() )
-    {
-        GADGET_CHECK_RETURN_FALSE(generateDebugFolderPath(debugFolder2_, debugFolder2_fullPath_));
+            std::string portGadgetNode;
+            fs >> portGadgetNode;
+
+            std::string xmlGadgetNode;
+            fs >> xmlGadgetNode;
+
+            unsigned int computingPowerIndex;
+            fs >> computingPowerIndex;
+
+            gtCloud[n].get<0>() = gadgetNode;
+            gtCloud[n].get<1>() = portGadgetNode;
+            gtCloud[n].get<2>() = xmlGadgetNode;
+            gtCloud[n].get<3>() = computingPowerIndex;
+
+            GADGET_CONDITION_MSG(verboseMode_, "Gadget Node " << n << " : " << gt_cloud_[n]);
+        }
+
+        fs.close();
+
+        return true;
     }
-    else
+
+    int GtPlusReconGadget::process_config(ACE_Message_Block* mb)
     {
-        GADGET_MSG("GtPlusRecon, debugFolder2 is not set ...");
-    }
+        // [Ro E1 Cha Slice E2 Con Phase Rep Set Seg Ave]
+        //   0  1  2   3    4   5    6     7  8   9   10
 
-    return GADGET_OK;
-}
+        verboseMode_ = this->get_bool_value("verboseMode");
 
-bool GtPlusReconGadget::
-generateDebugFolderPath(const std::string& debugFolder, std::string& debugFolderPath)
-{
-    debugFolderPath = ACE_OS::getenv("GADGETRON_HOME");
-    debugFolderPath.append("/");
-    debugFolderPath.append(debugFolder);
-    debugFolderPath.append("/");
-    GADGET_CONDITION_MSG(verboseMode_, "Debug folder is " << debugFolderPath);
-    return true;
-}
+        // read parameters from xml
+        image_series_ = this->get_int_value("image_series");
 
-void GtPlusReconGadget::
-getCurrentMoment(std::string& procTime)
-{
-    char timestamp[100];
-    time_t mytime;
-    struct tm *mytm;
-    mytime=time(NULL);
-    mytm=localtime(&mytime);
-    strftime(timestamp, sizeof(timestamp),"_%a_%d_%b_%Y_%H_%M_%S",mytm);
-    procTime = timestamp;
-}
+        // read in parameters from the xml
+        GADGET_CHECK_RETURN(this->readParameters(), GADGET_FAIL);
 
-int GtPlusReconGadget::process(Gadgetron::GadgetContainerMessage< GtPlusGadgetImageArray >* m1, Gadgetron::GadgetContainerMessage< WorkOrderType > * m2)
-{
-    GADGET_CONDITION_MSG(verboseMode_, "GtPlusReconGadget::process(...) starts ... ");
+        // check whether the second set of recon results is required
+        recon_res_second_required_ = false;
 
-    processed_called_times_++;
+        ISMRMRD::IsmrmrdHeader h;
+        try {
+            deserialize(mb->rd_ptr(),h);
+        } catch (...) {
+            GADGET_DEBUG1("Error parsing ISMRMRD Header");
+        }
 
-    GtPlusGadgetImageArray* images = m1->getObjectPtr();
+        if (!h.acquisitionSystemInformation) {
+            GADGET_DEBUG1("acquisitionSystemInformation not found in header. Bailing out");
+            return GADGET_FAIL;
+        }
+        num_acq_channels_ = h.acquisitionSystemInformation->receiverChannels;
 
-    boost::shared_ptr< std::vector<size_t> > dims = m2->getObjectPtr()->data_.get_dimensions();
+        GADGET_CONDITION_MSG(verboseMode_, "Number of acquisition channels : " << num_acq_channels_);
 
-    GADGET_CONDITION_MSG(verboseMode_, "[Ro E1 Cha Slice E2 Con Phase Rep Set Seg] = [" 
-        << (*dims)[0] << " " << (*dims)[1] << " " << (*dims)[2] << " " << (*dims)[3] << " " << (*dims)[4] 
-        << " " << (*dims)[5] << " " << (*dims)[6] << " " << (*dims)[7] << " " << (*dims)[8] << " " << (*dims)[9] << "]");
+        if (h.encoding.size() < 1 || h.encoding.size() > 2) {
+            GADGET_DEBUG2("Number of encoding spaces: %d\n", h.encoding.size());
+            GADGET_DEBUG1("This GtPlusReconGadget only supports one or two encoding spaces\n");
+            return GADGET_FAIL;
+        }
 
-    dimensions_ = *dims;
+        // find out the encoding space 
+        ISMRMRD::EncodingSpace e_space = h.encoding[0].encodedSpace;
+        ISMRMRD::EncodingSpace r_space = h.encoding[0].reconSpace;
+        ISMRMRD::EncodingLimits e_limits = h.encoding[0].encodingLimits;
+
+        matrix_size_encoding_[0] = e_space.matrixSize.x;
+        matrix_size_encoding_[1] = e_space.matrixSize.y;
+        matrix_size_encoding_[2] = e_space.matrixSize.z;
+        GADGET_CONDITION_MSG(verboseMode_, "Encoding matrix size: " << matrix_size_encoding_[0] << " " << matrix_size_encoding_[1] << " " << matrix_size_encoding_[2]);
+
+        field_of_view_encoding_[0] = e_space.fieldOfView_mm.x;
+        field_of_view_encoding_[1] = e_space.fieldOfView_mm.y;
+        field_of_view_encoding_[2] = e_space.fieldOfView_mm.z;
+        GADGET_CONDITION_MSG(verboseMode_, "Encoding field_of_view : " << field_of_view_encoding_[0] << " " << field_of_view_encoding_[1] << " " << field_of_view_encoding_[2]);
+
+        // find the recon space
+        matrix_size_recon_[0] = r_space.matrixSize.x;
+        matrix_size_recon_[1] = r_space.matrixSize.y;
+        matrix_size_recon_[2] = r_space.matrixSize.z;
+        GADGET_CONDITION_MSG(verboseMode_, "Recon matrix size : " << matrix_size_recon_[0] << " " << matrix_size_recon_[1] << " " << matrix_size_recon_[2]);
+
+        field_of_view_recon_[0] = r_space.fieldOfView_mm.x;
+        field_of_view_recon_[1] = r_space.fieldOfView_mm.y;
+        field_of_view_recon_[2] = r_space.fieldOfView_mm.z;
+        GADGET_CONDITION_MSG(verboseMode_, "Recon field_of_view :  " << field_of_view_recon_[0] << " " << field_of_view_recon_[1] << " " << field_of_view_recon_[2]);
+
+        // this gadget supports two encoding spaces only if the
+        // second encoding space has the same field of view and resolution as the first
+        // e.g. for FLASH PAT reference scans.
+        if (h.encoding.size() == 2)
+        {
+            if (! ((h.encoding[0].reconSpace.matrixSize.x == h.encoding[1].reconSpace.matrixSize.x) && 
+                (h.encoding[0].reconSpace.matrixSize.y == h.encoding[1].reconSpace.matrixSize.y) && 
+                (h.encoding[0].reconSpace.matrixSize.z == h.encoding[1].reconSpace.matrixSize.z) && 
+                (h.encoding[0].reconSpace.fieldOfView_mm.x == h.encoding[1].reconSpace.fieldOfView_mm.x) &&
+                (h.encoding[0].reconSpace.fieldOfView_mm.y == h.encoding[1].reconSpace.fieldOfView_mm.y) &&
+                (h.encoding[0].reconSpace.fieldOfView_mm.z == h.encoding[1].reconSpace.fieldOfView_mm.z)) )
+            {
+                GADGET_DEBUG2("Number of encoding spaces: %d\n", h.encoding.size());
+                GADGET_DEBUG1("This GtPlusAccumulatorWorkOrderTriggerGadget only supports two encoding spaces with identical recon spaces.\n");
+                return GADGET_FAIL;
+            }
+        }
 
-    GADGET_CONDITION_MSG(verboseMode_, "GtPlusReconGadget::process(...) ends ... ");
+        reconE1_ = matrix_size_recon_[1];
+        GADGET_CONDITION_MSG(verboseMode_, "reconE1_ is " << reconE1_);
 
-    m1->release();
-    return GADGET_OK;
-}
+        reconE2_ = matrix_size_recon_[2];
+        GADGET_CONDITION_MSG(verboseMode_, "reconE2_ is " << reconE2_);
 
-int GtPlusReconGadget::computeSeriesImageNumber (ISMRMRD::ImageHeader& imheader, size_t nCHA, size_t cha, size_t nE2, size_t e2)
-{
-    int nSET = meas_max_idx_.set+1;
-    int nREP = meas_max_idx_.repetition+1;
-    int nPHS = meas_max_idx_.phase+1;
-    int nSLC = meas_max_idx_.slice+1;
-    int nCON = meas_max_idx_.contrast+1;
-    if ( nE2 == 0 ) nE2 = 1;
-
-    int imageNum = imheader.repetition*nSET*nPHS*nCON*nSLC*nE2*nCHA 
-                    + imheader.set*nPHS*nCON*nSLC*nE2*nCHA 
-                    + imheader.phase*nCON*nSLC*nE2*nCHA 
-                    + imheader.contrast*nSLC*nE2*nCHA
-                    + imheader.slice*nE2*nCHA 
-                    + e2*nCHA 
-                    + cha 
-                    + 1;
-
-    return imageNum;
-}
+        kSpaceMaxAcqE1No_ = matrix_size_encoding_[1]-1;
+        GADGET_CONDITION_MSG(verboseMode_, "kSpaceMaxAcqE1No_ is " << kSpaceMaxAcqE1No_);
 
-bool GtPlusReconGadget::
-addPrePostZeros(int centreNo, int sampleNo, int& PrePostZeros)
-{
-    // 1 : pre zeros
-    // 2 : post zeros
-    // 0 : no zeros
-    PrePostZeros = 0;
+        kSpaceMaxAcqE2No_ = matrix_size_encoding_[2]-1;
+        GADGET_CONDITION_MSG(verboseMode_, "kSpaceMaxAcqE2No_ is " << kSpaceMaxAcqE2No_);
 
-    if ( sampleNo <= 1 )
-        return true;
+        aSpacing_[0] = field_of_view_recon_[0]/matrix_size_recon_[0];
+        aSpacing_[1] = field_of_view_recon_[1]/reconE1_;
+        aSpacing_[2] = field_of_view_recon_[2]/reconE2_;
 
-    if ( 2*centreNo == sampleNo )
-    {
-        PrePostZeros = 0;
-    }
+        gt_exporter_.setPixelSize(aSpacing_[0], aSpacing_[1], aSpacing_[2], aSpacing_[3], aSpacing_[4], aSpacing_[5]);
 
-    if ( 2*centreNo < sampleNo )
-    {
-        PrePostZeros = 1;
-    }
+        //XUE-TODO: This is actually wrong. This assumes that you always zeropad, which is probably bad practice
+        meas_max_idx_.kspace_encode_step_1 = (uint16_t)matrix_size_encoding_[1]-1;
+        meas_max_idx_.set = (e_limits.set && (e_limits.set->maximum>0)) ? e_limits.set->maximum : 0;
+        meas_max_idx_.phase = (e_limits.phase && (e_limits.phase->maximum>0)) ? e_limits.phase->maximum : 0;
 
-    if ( 2*centreNo > sampleNo )
-    {
-        PrePostZeros = 2;
-    }
+        meas_max_idx_.kspace_encode_step_2 = (uint16_t)matrix_size_encoding_[2]-1; 
 
-    return true;
-}
+        meas_max_idx_.contrast = (e_limits.contrast && (e_limits.contrast->maximum > 0)) ? e_limits.contrast->maximum : 0;
+        meas_max_idx_.slice = (e_limits.slice && (e_limits.slice->maximum > 0)) ? e_limits.slice->maximum : 0;
+        meas_max_idx_.repetition = e_limits.repetition ? e_limits.repetition->maximum : 0;
+        meas_max_idx_.average = e_limits.average ? e_limits.average->maximum : 0;
 
-bool GtPlusReconGadget::
-scalingMagnitude(hoNDArray<float>& mag)
-{
-    if ( scalingFactor_ < 0 && !use_constant_scalingFactor_ )
-    {
-        // perform the scaling to [0 max_inten_value_]
-        size_t ind;
-        float maxInten;
+        // combine all incoming segments
+        meas_max_idx_.segment = 0;
+
+        // find out the PAT mode
+        if (!h.encoding[0].parallelImaging) {
+            GADGET_DEBUG1("Parallel Imaging section not found in header");
+            return GADGET_FAIL;
+        }
+
+        ISMRMRD::ParallelImaging p_imaging = *h.encoding[0].parallelImaging;
 
-        size_t RO = mag.get_size(0);
-        size_t E1 = mag.get_size(1);
-        size_t num = mag.get_number_of_elements()/(RO*E1);
+        acceFactorE1_ = (long)(p_imaging.accelerationFactor.kspace_encoding_step_1);
+        acceFactorE2_ = (long)(p_imaging.accelerationFactor.kspace_encoding_step_2);
+        GADGET_CONDITION_MSG(verboseMode_, "acceFactorE1 is " << acceFactorE1_);
+        GADGET_CONDITION_MSG(verboseMode_, "acceFactorE2 is " << acceFactorE2_);
 
-        if ( num <= 24 )
+        std::string calib = *p_imaging.calibrationMode;
+
+        bool separate = (calib.compare("separate") == 0);
+        bool embedded = (calib.compare("embedded") == 0);
+        bool external = (calib.compare("external") == 0);
+        bool interleaved = (calib.compare("interleaved") == 0);
+        bool other = (calib.compare("other") == 0);
+
+        if ( separate )
         {
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::maxAbsolute(mag, maxInten, ind));
+            GADGET_CONDITION_MSG(verboseMode_, "Colibration mode is separate");
         }
-        else
+        else if ( embedded )
         {
-            hoNDArray<float> magPartial(RO, E1, 24, mag.get_data_ptr()+(num/2 - 12)*RO*E1);
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::maxAbsolute(magPartial, maxInten, ind));
+            GADGET_CONDITION_MSG(verboseMode_, "Colibration mode is embedded");
         }
-        if ( maxInten < FLT_EPSILON ) maxInten = 1.0f;
-
-        if ( (maxInten<min_intensity_value_) || (maxInten>max_intensity_value_) )
+        else if ( interleaved )
         {
-            GADGET_CONDITION_MSG(verboseMode_, "Using the dynamic intensity scaling factor - may not have noise prewhitening performed ... ");
-            scalingFactor_ = (float)(max_intensity_value_US_)/maxInten;
+            GADGET_CONDITION_MSG(verboseMode_, "Colibration mode is interleaved");
         }
-        else
+        else if ( external )
+        {
+            GADGET_CONDITION_MSG(verboseMode_, "Colibration mode is external");
+        }
+        else if ( other )
         {
-            GADGET_CONDITION_MSG(verboseMode_, "Using the fixed intensity scaling factor - must have noise prewhitening performed ... ");
-            scalingFactor_ = SNR_NOISEFLOOR_SCALEFACTOR;
+            GADGET_CONDITION_MSG(verboseMode_, "Colibration mode is other");
+        }
 
-            while ( (maxInten*scalingFactor_ > max_intensity_value_) && (scalingFactor_>=2) )
-            {
-                scalingFactor_ /= 2;
-            }
+        //if ( other_ && acceFactorE1_==1 && acceFactorE2_==1 )
+        //{
+        //    GADGET_CONDITION_MSG(verboseMode_, "Colibration mode is changed to ISMRMRD_interleaved");
+        //    CalibMode_ = Gadgetron::gtPlus::ISMRMRD_interleaved;
+        //    acceFactorE1_ = 2;
+        //}
+
+        CalibMode_ = Gadgetron::gtPlus::ISMRMRD_noacceleration;
+
+        if ( interleaved )
+        {
+            CalibMode_ = Gadgetron::gtPlus::ISMRMRD_interleaved;
 
-            if (maxInten*scalingFactor_ > max_intensity_value_)
+            if ( p_imaging.interleavingDimension )
             {
-                GADGET_CONDITION_MSG(verboseMode_, "The fixed intensity scaling factor leads to dynamic range overflow - switch to dyanmic intensity scaling ... ");
-                scalingFactor_ = (float)(max_intensity_value_)/maxInten;
+                if ( p_imaging.interleavingDimension->compare("phase") == 0 ) {
+                    InterleaveDim_ = Gadgetron::gtPlus::DIM_Phase;
+                } else if ( p_imaging.interleavingDimension->compare("repetition") == 0 ) {
+                    InterleaveDim_ = Gadgetron::gtPlus::DIM_Repetition;
+                } else if ( p_imaging.interleavingDimension->compare("average") == 0 ) {
+                    InterleaveDim_ = Gadgetron::gtPlus::DIM_Average;
+                } else if ( p_imaging.interleavingDimension->compare("contrast") == 0 ) {
+                    InterleaveDim_ = Gadgetron::gtPlus::DIM_Contrast;
+                } else if ( p_imaging.interleavingDimension->compare("other") == 0 ) {
+                    InterleaveDim_ = Gadgetron::gtPlus::DIM_other1;
+                } else {
+                    GADGET_DEBUG1("Unknown interleaving dimension. Bailing out");
+                    return GADGET_FAIL;
+                }
             }
+        }
+        else if ( embedded )
+        {
+            CalibMode_ = Gadgetron::gtPlus::ISMRMRD_embedded;
+        }
+        else if ( separate )
+        {
+            CalibMode_ = Gadgetron::gtPlus::ISMRMRD_separate;
+        }
+        else if ( external )
+        {
+            CalibMode_ = Gadgetron::gtPlus::ISMRMRD_external;
+        }
+        else if ( other )
+        {
+            CalibMode_ = Gadgetron::gtPlus::ISMRMRD_other;
+        }
 
-            use_constant_scalingFactor_ = true;
+        // ---------------------------------------------------------------------------------------------------------
+        // generate the destination folder
+        if ( !debugFolder_.empty() )
+        {
+            Gadgetron::getDebugFolderPath(debugFolder_, debugFolder_fullPath_, verboseMode_);
+        }
+        else
+        {
+            GADGET_MSG("GtPlusRecon, debugFolder is not set ...");
+        }
+
+        if ( !debugFolder2_.empty() )
+        {
+            Gadgetron::getDebugFolderPath(debugFolder2_, debugFolder2_fullPath_, verboseMode_);
+        }
+        else
+        {
+            GADGET_MSG("GtPlusRecon, debugFolder2 is not set ...");
         }
 
-        GADGET_CONDITION_MSG(verboseMode_, "scalingFactor_ : " << scalingFactor_);
-        GADGET_CHECK_RETURN_FALSE(scal((float)scalingFactor_, mag));
+        // ---------------------------------------------------------------------------------------------------------
+        // set the maximal number of threads used
+        if ( thread_number_ratio_>0 && thread_number_ratio_<1 )
+        {
+        }
+
+        return GADGET_OK;
     }
-    else
+
+    int GtPlusReconGadget::process(Gadgetron::GadgetContainerMessage< GtPlusGadgetImageArray >* m1, Gadgetron::GadgetContainerMessage< WorkOrderType > * m2)
     {
-        GADGET_CONDITION_MSG(verboseMode_, "Using the fixed intensity scaling factor - scaling factor has been preset to be : " << scalingFactor_ << " ... ");
-        GADGET_CHECK_RETURN_FALSE(scal((float)scalingFactor_, mag));
+        GADGET_CONDITION_MSG(verboseMode_, "GtPlusReconGadget::process(...) starts ... ");
+
+        processed_called_times_++;
+
+        GtPlusGadgetImageArray* images = m1->getObjectPtr();
+
+        boost::shared_ptr< std::vector<size_t> > dims = m2->getObjectPtr()->data_.get_dimensions();
+
+        GADGET_CONDITION_MSG(verboseMode_, "[Ro E1 Cha Slice E2 Con Phase Rep Set Seg Ave] = [" 
+            << (*dims)[0] << " " << (*dims)[1] << " " << (*dims)[2] << " " 
+            << (*dims)[3] << " " << (*dims)[4] << " " << (*dims)[5] << " " 
+            << (*dims)[6] << " " << (*dims)[7] << " " << (*dims)[8] << " " 
+            << (*dims)[9] << " " << (*dims)[10] << "]");
+
+        dimensions_ = *dims;
+
+        GADGET_CONDITION_MSG(verboseMode_, "GtPlusReconGadget::process(...) ends ... ");
+
+        m1->release();
+        return GADGET_OK;
     }
 
-    return true;
-}
+    size_t GtPlusReconGadget::computeSeriesImageNumber (ISMRMRD::ImageHeader& imheader, size_t nCHA, size_t cha, size_t nE2, size_t e2)
+    {
+        size_t nSET = meas_max_idx_.set+1;
+        size_t nREP = meas_max_idx_.repetition+1;
+        size_t nPHS = meas_max_idx_.phase+1;
+        size_t nSLC = meas_max_idx_.slice+1;
+        size_t nCON = meas_max_idx_.contrast+1;
+        if ( nE2 == 0 ) nE2 = 1;
+
+        size_t imageNum = imheader.average*nREP*nSET*nPHS*nCON*nSLC*nE2*nCHA 
+            + imheader.repetition*nSET*nPHS*nCON*nSLC*nE2*nCHA 
+            + imheader.set*nPHS*nCON*nSLC*nE2*nCHA 
+            + imheader.phase*nCON*nSLC*nE2*nCHA 
+            + imheader.contrast*nSLC*nE2*nCHA
+            + imheader.slice*nE2*nCHA 
+            + e2*nCHA 
+            + cha 
+            + 1;
+
+        return imageNum;
+    }
 
-bool GtPlusReconGadget::
-generateKSpaceFilter(WorkOrderType& workOrder)
-{
-    try
+    bool GtPlusReconGadget::
+        addPrePostZeros(int centreNo, int sampleNo, int& PrePostZeros)
     {
-        size_t RO = workOrder.data_.get_size(0);
-        size_t E1 = workOrder.data_.get_size(1);
-        size_t E2 = workOrder.data_.get_size(4);
+        // 1 : pre zeros
+        // 2 : post zeros
+        // 0 : no zeros
+        PrePostZeros = 0;
 
-        size_t RO_ref = workOrder.ref_.get_size(0);
-        size_t E1_ref = workOrder.ref_.get_size(1);
-        size_t E2_ref = workOrder.ref_.get_size(4);
+        if ( sampleNo <= 1 )
+            return true;
 
-        if ( workOrder.CalibMode_ == Gadgetron::gtPlus::ISMRMRD_interleaved )
+        if ( 2*centreNo == sampleNo )
         {
-            RO_ref = RO;
-            E1_ref = E1;
-            E2_ref = E2;
+            PrePostZeros = 0;
         }
 
-        // image data filter
-        if ( RO>1 && filterRO_type_ != ISMRMRD_FILTER_NONE )
+        if ( 2*centreNo < sampleNo )
         {
-            workOrder.filterRO_.create(RO);
-            GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilter(RO, workOrder.filterRO_, filterRO_type_, filterRO_sigma_, std::ceil(filterRO_width_*RO)));
-            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, workOrder.filterRO_, "filterRO");
+            PrePostZeros = 1;
         }
 
-        if ( E1>1 && filterE1_type_ != ISMRMRD_FILTER_NONE )
+        if ( 2*centreNo > sampleNo )
         {
-            workOrder.filterE1_.create(E1);
-            GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilter(E1, workOrder.filterE1_, filterE1_type_, filterE1_sigma_, std::ceil(filterE1_width_*E1)));
-            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, workOrder.filterE1_, "filterE1");
+            PrePostZeros = 2;
         }
 
-        if ( E2>1 && filterE2_type_ != ISMRMRD_FILTER_NONE )
+        return true;
+    }
+
+    bool GtPlusReconGadget::
+        scalingImages(hoNDArray<ValueType>& res)
+    {
+        if ( scalingFactor_ < 0 && !use_constant_scalingFactor_ )
         {
-            workOrder.filterE2_.create(E2);
-            GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilter(E2, workOrder.filterE2_, filterE2_type_, filterE2_sigma_, std::ceil(filterE2_width_*E2)));
-            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, workOrder.filterE2_, "filterE2");
+            hoNDArray<float> mag(res.get_dimensions());
+            Gadgetron::abs(res, mag);
+            GADGET_CHECK_RETURN_FALSE(this->scalingMagnitude(mag));
         }
 
-        // ref data filter
-        if ( workOrder.ref_.get_number_of_elements() > 0 )
+        scal((float)scalingFactor_, res);
+
+        return true;
+    }
+
+    bool GtPlusReconGadget::
+        scalingMagnitude(hoNDArray<float>& mag)
+    {
+        if ( scalingFactor_ < 0 && !use_constant_scalingFactor_ )
         {
-            size_t startRO(0), endRO(0), startE1(0), endE1(0), startE2(0), endE2(0);
-            if ( E2_ref == 1 )
+            // perform the scaling to [0 max_inten_value_]
+            size_t ind;
+            float maxInten;
+
+            size_t RO = mag.get_size(0);
+            size_t E1 = mag.get_size(1);
+            size_t num = mag.get_number_of_elements()/(RO*E1);
+
+            if ( num <= 24 )
             {
-                GADGET_CHECK_RETURN_FALSE(gtPlus_util_complex_.detectSampledRegion2D(workOrder.ref_, startRO, endRO, startE1, endE1));
+                Gadgetron::maxAbsolute(mag, maxInten, ind);
             }
             else
             {
-                GADGET_CHECK_RETURN_FALSE(gtPlus_util_complex_.detectSampledRegion3D(workOrder.ref_, startRO, endRO, startE1, endE1, startE2, endE2));
+                hoNDArray<float> magPartial(RO, E1, 24, mag.get_data_ptr()+(num/2 - 12)*RO*E1);
+                Gadgetron::maxAbsolute(magPartial, maxInten, ind);
             }
+            if ( maxInten < FLT_EPSILON ) maxInten = 1.0f;
 
-            if ( (workOrder.CalibMode_ == ISMRMRD_interleaved) || (workOrder.CalibMode_ == ISMRMRD_embedded) )
+            if ( (maxInten<min_intensity_value_) || (maxInten>max_intensity_value_) )
             {
-                // use the image data sample range
-                startRO = workOrder.start_RO_; if ( startRO < 0 ) startRO=0;
-                endRO = workOrder.end_RO_; if ( endRO < 0 ) endRO = RO_ref-1;
+                GADGET_CONDITION_MSG(verboseMode_, "Using the dynamic intensity scaling factor - may not have noise prewhitening performed ... ");
+                scalingFactor_ = (float)(max_intensity_value_US_)/maxInten;
             }
+            else
+            {
+                GADGET_CONDITION_MSG(verboseMode_, "Using the fixed intensity scaling factor - must have noise prewhitening performed ... ");
+                scalingFactor_ = SNR_NOISEFLOOR_SCALEFACTOR;
 
-            if ( RO_ref > 1 && filterRO_ref_type_ != ISMRMRD_FILTER_NONE )
+                while ( (maxInten*scalingFactor_ > max_intensity_value_) && (scalingFactor_>=2) )
+                {
+                    scalingFactor_ /= 2;
+                }
+
+                if (maxInten*scalingFactor_ > max_intensity_value_)
+                {
+                    GADGET_CONDITION_MSG(verboseMode_, "The fixed intensity scaling factor leads to dynamic range overflow - switch to dyanmic intensity scaling ... ");
+                    scalingFactor_ = (float)(max_intensity_value_)/maxInten;
+                }
+
+                use_constant_scalingFactor_ = true;
+            }
+
+            GADGET_CONDITION_MSG(verboseMode_, "scalingFactor_ : " << scalingFactor_);
+            scal((float)scalingFactor_, mag);
+        }
+        else
+        {
+            GADGET_CONDITION_MSG(verboseMode_, "Using the fixed intensity scaling factor - scaling factor has been preset to be : " << scalingFactor_ << " ... ");
+            scal((float)scalingFactor_, mag);
+        }
+
+        return true;
+    }
+
+    bool GtPlusReconGadget::
+        generateKSpaceFilter(WorkOrderType& workOrder)
+    {
+        try
+        {
+            size_t RO = workOrder.data_.get_size(0);
+            size_t E1 = workOrder.data_.get_size(1);
+            size_t E2 = workOrder.data_.get_size(4);
+
+            size_t RO_ref = workOrder.ref_.get_size(0);
+            size_t E1_ref = workOrder.ref_.get_size(1);
+            size_t E2_ref = workOrder.ref_.get_size(4);
+
+            if ( workOrder.CalibMode_ == Gadgetron::gtPlus::ISMRMRD_interleaved )
+            {
+                RO_ref = RO;
+                E1_ref = E1;
+                E2_ref = E2;
+            }
+
+            // image data filter
+            if ( RO>1 && filterRO_type_ != ISMRMRD_FILTER_NONE )
+            {
+                workOrder.filterRO_.create(RO);
+                GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilter(RO, workOrder.start_RO_, workOrder.end_RO_, workOrder.filterRO_, filterRO_type_, filterRO_sigma_, (size_t)std::ceil(filterRO_width_*RO)));
+                GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, workOrder.filterRO_, "filterRO");
+            }
+
+            if ( E1>1 && filterE1_type_ != ISMRMRD_FILTER_NONE )
+            {
+                workOrder.filterE1_.create(E1);
+                GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilter(E1, workOrder.start_E1_, workOrder.end_E1_, workOrder.filterE1_, filterE1_type_, filterE1_sigma_, (size_t)std::ceil(filterE1_width_*E1)));
+                GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, workOrder.filterE1_, "filterE1");
+            }
+
+            if ( E2>1 && filterE2_type_ != ISMRMRD_FILTER_NONE )
             {
-                workOrder.filterRO_ref_.create(RO_ref);
-                GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilterForRef(RO_ref, startRO, endRO, workOrder.filterRO_ref_, filterRO_ref_type_, filterRO_ref_sigma_, std::ceil(filterRO_ref_width_*RO_ref)));
-                GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, workOrder.filterRO_ref_, "filterRO_ref");
+                workOrder.filterE2_.create(E2);
+                GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilter(E2, workOrder.start_E2_, workOrder.end_E2_, workOrder.filterE2_, filterE2_type_, filterE2_sigma_, (size_t)std::ceil(filterE2_width_*E2)));
+                GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, workOrder.filterE2_, "filterE2");
             }
 
-            if ( (workOrder.CalibMode_ == ISMRMRD_separate) || (workOrder.CalibMode_ == ISMRMRD_external) )
+            // ref data filter
+            if ( workOrder.ref_.get_number_of_elements() > 0 )
             {
-                if ( E1_ref > 1 && filterE1_ref_type_ != ISMRMRD_FILTER_NONE )
+                size_t startRO(0), endRO(0), startE1(0), endE1(0), startE2(0), endE2(0);
+                if ( E2_ref == 1 )
                 {
-                    size_t len = endE1-startE1+1;
-                    workOrder.filterE1_ref_.create(len);
-                    GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilter(len, workOrder.filterE1_ref_, filterE1_ref_type_, filterE1_ref_sigma_, std::ceil(filterE1_ref_width_*len)));
-                    GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, workOrder.filterE1_ref_, "filterE1_ref");
+                    GADGET_CHECK_RETURN_FALSE(gtPlus_util_complex_.detectSampledRegion2D(workOrder.ref_, startRO, endRO, startE1, endE1));
+                }
+                else
+                {
+                    GADGET_CHECK_RETURN_FALSE(gtPlus_util_complex_.detectSampledRegion3D(workOrder.ref_, startRO, endRO, startE1, endE1, startE2, endE2));
                 }
 
-                if ( E2_ref > 1 && filterE2_ref_type_ != ISMRMRD_FILTER_NONE )
+                if ( (workOrder.CalibMode_ == ISMRMRD_interleaved) || (workOrder.CalibMode_ == ISMRMRD_embedded) )
                 {
-                    size_t len = endE2-startE2+1;
-                    workOrder.filterE2_ref_.create(len);
-                    GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilter(len, workOrder.filterE2_ref_, filterE2_ref_type_, filterE2_ref_sigma_, std::ceil(filterE2_ref_width_*len)));
-                    GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, workOrder.filterE2_ref_, "filterE2_ref");
+                    // use the image data sample range
+                    startRO = workOrder.start_RO_; if ( startRO < 0 ) startRO=0;
+                    endRO = workOrder.end_RO_; if ( endRO < 0 ) endRO = RO_ref-1;
                 }
-            }
-            else
-            {
-                // this makes sure for interleaved and embedded, the kspace filter is applied at correct lines
-                if ( E1_ref > 1 && filterE1_ref_type_ != ISMRMRD_FILTER_NONE )
+
+                if ( RO_ref > 1 && filterRO_ref_type_ != ISMRMRD_FILTER_NONE )
                 {
-                    size_t len = E1_ref;
-                    workOrder.filterE1_ref_.create(len);
-                    GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilterForRef(len, startE1, endE1, workOrder.filterE1_ref_, filterE1_ref_type_, filterE1_ref_sigma_, std::ceil(filterE1_ref_width_*len)));
-                    GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, workOrder.filterE1_ref_, "filterE1_ref");
+                    workOrder.filterRO_ref_.create(RO_ref);
+                    GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilterForRef(RO_ref, startRO, endRO, workOrder.filterRO_ref_, filterRO_ref_type_, filterRO_ref_sigma_, (size_t)std::ceil(filterRO_ref_width_*RO_ref)));
+                    GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, workOrder.filterRO_ref_, "filterRO_ref");
                 }
 
-                if ( E2_ref > 1 && filterE2_ref_type_ != ISMRMRD_FILTER_NONE )
+                if ( (workOrder.CalibMode_ == ISMRMRD_separate) || (workOrder.CalibMode_ == ISMRMRD_external) )
                 {
-                    size_t len = E2_ref;
-                    workOrder.filterE2_ref_.create(len);
-                    GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilterForRef(len, startE2, endE2, workOrder.filterE2_ref_, filterE2_ref_type_, filterE2_ref_sigma_, std::ceil(filterE2_ref_width_*len)));
-                    GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, workOrder.filterE2_ref_, "filterE2_ref");
+                    if ( E1_ref > 1 && filterE1_ref_type_ != ISMRMRD_FILTER_NONE )
+                    {
+                        size_t len = endE1-startE1+1;
+                        workOrder.filterE1_ref_.create(len);
+                        GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilter(len, 0, len-1, workOrder.filterE1_ref_, filterE1_ref_type_, filterE1_ref_sigma_, (size_t)std::ceil(filterE1_ref_width_*len)));
+                        GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, workOrder.filterE1_ref_, "filterE1_ref");
+                    }
+
+                    if ( E2_ref > 1 && filterE2_ref_type_ != ISMRMRD_FILTER_NONE )
+                    {
+                        size_t len = endE2-startE2+1;
+                        workOrder.filterE2_ref_.create(len);
+                        GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilter(len, 0, len-1, workOrder.filterE2_ref_, filterE2_ref_type_, filterE2_ref_sigma_, (size_t)std::ceil(filterE2_ref_width_*len)));
+                        GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, workOrder.filterE2_ref_, "filterE2_ref");
+                    }
+                }
+                else
+                {
+                    // this makes sure for interleaved and embedded, the kspace filter is applied at correct lines
+                    if ( E1_ref > 1 && filterE1_ref_type_ != ISMRMRD_FILTER_NONE )
+                    {
+                        size_t len = E1_ref;
+                        workOrder.filterE1_ref_.create(len);
+                        GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilterForRef(len, startE1, endE1, workOrder.filterE1_ref_, filterE1_ref_type_, filterE1_ref_sigma_, (size_t)std::ceil(filterE1_ref_width_*len)));
+                        GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, workOrder.filterE1_ref_, "filterE1_ref");
+                    }
+
+                    if ( E2_ref > 1 && filterE2_ref_type_ != ISMRMRD_FILTER_NONE )
+                    {
+                        size_t len = E2_ref;
+                        workOrder.filterE2_ref_.create(len);
+                        GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilterForRef(len, startE2, endE2, workOrder.filterE2_ref_, filterE2_ref_type_, filterE2_ref_sigma_, (size_t)std::ceil(filterE2_ref_width_*len)));
+                        GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, workOrder.filterE2_ref_, "filterE2_ref");
+                    }
                 }
             }
-        }
 
-        // partial fourier handling filter
-        if ( RO>1 && workOrder.start_RO_>=0 && workOrder.end_RO_>0 )
-        {
-            workOrder.filterRO_partialfourier_.create(RO);
-            GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateAsymmetricFilter(RO, workOrder.start_RO_, workOrder.end_RO_, workOrder.filterRO_partialfourier_, filterRO_pf_type_, std::ceil(filterRO_pf_width_*RO), filterRO_pf_densityComp_));
-            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, workOrder.filterRO_partialfourier_, "filterRO_partialfourier");
-        }
+            // partial fourier handling filter
+            if ( RO>1 && workOrder.start_RO_>=0 && workOrder.end_RO_>0 )
+            {
+                workOrder.filterRO_partialfourier_.create(RO);
+                GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateAsymmetricFilter(RO, workOrder.start_RO_, workOrder.end_RO_, workOrder.filterRO_partialfourier_, filterRO_pf_type_, (size_t)std::ceil(filterRO_pf_width_*RO), filterRO_pf_densityComp_));
+                GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, workOrder.filterRO_partialfourier_, "filterRO_partialfourier");
+            }
 
-        if ( E1>1 && workOrder.start_E1_>=0 && workOrder.end_E1_>0 )
-        {
-            workOrder.filterE1_partialfourier_.create(E1);
-            GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateAsymmetricFilter(E1, workOrder.start_E1_, workOrder.end_E1_, workOrder.filterE1_partialfourier_, filterE1_pf_type_, std::ceil(filterE1_pf_width_*E1), filterE1_pf_densityComp_));
-            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, workOrder.filterE1_partialfourier_, "filterE1_partialfourier");
-        }
+            if ( E1>1 && workOrder.start_E1_>=0 && workOrder.end_E1_>0 )
+            {
+                workOrder.filterE1_partialfourier_.create(E1);
+                GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateAsymmetricFilter(E1, workOrder.start_E1_, workOrder.end_E1_, workOrder.filterE1_partialfourier_, filterE1_pf_type_, (size_t)std::ceil(filterE1_pf_width_*E1), filterE1_pf_densityComp_));
+                GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, workOrder.filterE1_partialfourier_, "filterE1_partialfourier");
+            }
 
-        if ( E2>1 && workOrder.start_E2_>=0 && workOrder.end_E2_>0 )
+            if ( E2>1 && workOrder.start_E2_>=0 && workOrder.end_E2_>0 )
+            {
+                workOrder.filterE2_partialfourier_.create(E2);
+                GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateAsymmetricFilter(E2, workOrder.start_E2_, workOrder.end_E2_, workOrder.filterE2_partialfourier_, filterE2_pf_type_, (size_t)std::ceil(filterE2_pf_width_*E2), filterE2_pf_densityComp_));
+                GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, workOrder.filterE2_partialfourier_, "filterE2_partialfourier");
+            }
+        }
+        catch(...)
         {
-            workOrder.filterE2_partialfourier_.create(E2);
-            GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateAsymmetricFilter(E2, workOrder.start_E2_, workOrder.end_E2_, workOrder.filterE2_partialfourier_, filterE2_pf_type_, std::ceil(filterE2_pf_width_*E2), filterE2_pf_densityComp_));
-            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, workOrder.filterE2_partialfourier_, "filterE2_partialfourier");
+            GADGET_ERROR_MSG("Errors in GtPlusReconGadget::generateKSpaceFilter(...) ... ");
+            return false;
         }
-    }
-    catch(...)
-    {
-        GADGET_ERROR_MSG("Errors in GtPlusReconGadget::generateKSpaceFilter(...) ... ");
-        return false;
+
+        return true;
     }
 
-    return true;
-}
+    bool GtPlusReconGadget::
+        recomputeImageGeometry(GtPlusGadgetImageArray* images, GtPlusGadgetImageExt& imageHeader, size_t slc, size_t e2, size_t con, size_t phs, size_t rep, size_t set, size_t seg, size_t ave, size_t maxE2)
+    {
+        size_t E2 = images->matrix_size[4];
 
-bool GtPlusReconGadget::
-recomputeImageGeometry(GtPlusGadgetImageArray* images, GtPlusGadgetImageExt& imageHeader, int slc, int e2, int con, int phs, int rep, int set, int seg, int maxE2)
-{
-    size_t E2 = images->matrix_size[4];
-
-    // if FOV are the same, return the stored image header, take care of E2 resizing
-    //if ( GT_ABS(field_of_view_recon_[2]-field_of_view_encoding_[2])<0.1 )
-    //{
-    //    if ( maxE2 == E2 ) // no E2 resizing
-    //    {
-    //        int offset = images->get_offset(slc, e2, con, phs, rep, set, 0);
-    //        imageHeader = images->imageArray_[offset];
-    //    }
-    //    else
-    //    {
-    //        double e2_sampled = e2*E2/(double)maxE2;
-
-    //        size_t e2_lower = std::floor(e2_sampled);
-    //        if ( e2_lower >= E2 ) e2_lower = E2-1;
-
-    //        size_t e2_higher = std::ceil(e2_sampled);
-    //        if ( e2_higher >= E2 ) e2_higher = E2-1;
-
-    //        GtPlusGadgetImageExt imageHeader_lower, imageHeader_higher;
-
-    //        if ( e2_lower == e2_higher )
-    //        {
-    //            int offset = images->get_offset(slc, e2_lower, con, phs, rep, set, 0);
-    //            imageHeader.copy(images->imageArray_[offset]);
-    //        }
-    //        else
-    //        {
-    //            int offset_lower = images->get_offset(slc, e2_lower, con, phs, rep, set, 0);
-    //            imageHeader_lower.copy(images->imageArray_[offset_lower]);
-
-    //            int offset_higher = images->get_offset(slc, e2_higher, con, phs, rep, set, 0);
-    //            imageHeader_higher.copy(images->imageArray_[offset_higher]);
-
-    //            imageHeader = imageHeader_lower;
-    //            imageHeader.recomputeHeader(imageHeader_higher, e2_higher-e2_sampled);
-    //        }
-    //    }
-    //}
-    //else
-    //{
         // need to recompute image geometry
         // no need to consider RO and E1, because image position vector points to the image center
 
         if ( e2 >= E2 ) e2 = E2/2;
 
-        int offsetCurr = images->get_offset(slc, e2, con, phs, rep, set, 0);
+        size_t offsetCurr = images->get_offset(slc, e2, con, phs, rep, set, 0, ave);
         imageHeader = images->imageArray_[offsetCurr];
 
         // find the center partition
         if ( E2 > 1 )
         {
-            int midE2 = E2/2;
-            int offset = images->get_offset(slc, midE2, con, phs, rep, set, 0);
+            size_t midE2 = E2/2;
+            size_t offset = images->get_offset(slc, midE2, con, phs, rep, set, 0, ave);
 
             while ( GT_ABS(imageHeader.slice_dir[0])<1e-6 && GT_ABS(imageHeader.slice_dir[1])<1e-6 && GT_ABS(imageHeader.slice_dir[2])<1e-6 )
             {
                 imageHeader = images->imageArray_[offset];
                 midE2++;
-                offset = images->get_offset(slc, midE2, con, phs, rep, set, 0);
+                offset = images->get_offset(slc, midE2, con, phs, rep, set, 0, ave);
             }
 
             // position vector for the center partition
@@ -1149,9 +1199,9 @@ recomputeImageGeometry(GtPlusGadgetImageArray* images, GtPlusGadgetImageExt& ima
 
             // comput slice postion vector for this partition
             float posVecCurr[3];
-            posVecCurr[0] = posVec[0] + aSpacing_[2]*sliceVec[0]*(e2-midE2+0.5);
-            posVecCurr[1] = posVec[1] + aSpacing_[2]*sliceVec[1]*(e2-midE2+0.5);
-            posVecCurr[2] = posVec[2] + aSpacing_[2]*sliceVec[2]*(e2-midE2+0.5);
+            posVecCurr[0] = (float)(posVec[0] + aSpacing_[2]*sliceVec[0]*(e2-midE2+0.5f));
+            posVecCurr[1] = (float)(posVec[1] + aSpacing_[2]*sliceVec[1]*(e2-midE2+0.5f));
+            posVecCurr[2] = (float)(posVec[2] + aSpacing_[2]*sliceVec[2]*(e2-midE2+0.5f));
 
             imageHeader.position[0] = posVecCurr[0];
             imageHeader.position[1] = posVecCurr[1];
@@ -1159,9 +1209,9 @@ recomputeImageGeometry(GtPlusGadgetImageArray* images, GtPlusGadgetImageExt& ima
 
             GADGET_CONDITION_MSG(verboseMode_, "--> image position : [" << imageHeader.position[0] << " , " << imageHeader.position[1] << " , " << imageHeader.position[2] << "]");
 
-            imageHeader.field_of_view[2] = aSpacing_[2];
+            imageHeader.field_of_view[2] = (float)(aSpacing_[2]);
 
-            imageHeader.user_int[0] = e2;
+            imageHeader.user_int[0] = (int32_t)e2;
         }
 
         if ( imageHeader.measurement_uid == 0 )
@@ -1169,191 +1219,359 @@ recomputeImageGeometry(GtPlusGadgetImageArray* images, GtPlusGadgetImageExt& ima
             GADGET_WARN_MSG("imageHeader.measurement_uid == 0");
         }
 
-        /*double e2_sampled = 0;
-        double coord_in_encoding_space = field_of_view_recon_[2]*e2/maxE2 + (field_of_view_encoding_[2]/2-field_of_view_recon_[2]/2);
-        e2_sampled = E2 * coord_in_encoding_space/field_of_view_encoding_[2];
+        return true;
+    }
 
-        if ( e2_sampled < 0 )
+    bool GtPlusReconGadget::
+        sendOutRecon(GtPlusGadgetImageArray* images, const hoNDArray<ValueType>& res, int seriesNum, const std::vector<DimensionRecordType>& dimStartingIndexes, const std::string& prefix, const std::string& dataRole)
+    {
+        try
         {
-            int offset = images->get_offset(slc, 0, con, phs, rep, set, 0);
-            imageHeader.copy(images->imageArray_[offset]);
+            hoNDArray<real_value_type> timeStamp, physioTimeStamp;
+            GADGET_CHECK_RETURN_FALSE( this->sendOutRecon(images, res, timeStamp, physioTimeStamp, seriesNum, dimStartingIndexes, prefix, dataRole) );
         }
-        else if ( e2_sampled > E2-1 )
+        catch(...)
         {
-            int offset = images->get_offset(slc, E2-1, con, phs, rep, set, 0);
-            imageHeader.copy(images->imageArray_[offset]);
+            GADGET_ERROR_MSG("Errors in GtPlusReconGadget::sendOutRecon(complex float) ... ");
+            return false;
         }
-        else
-        {
-            size_t e2_lower = std::floor(e2_sampled);
-            if ( e2_lower >= E2 ) e2_lower = E2-1;
-
-            size_t e2_higher = std::ceil(e2_sampled);
-            if ( e2_higher >= E2 ) e2_higher = E2-1;
 
-            GtPlusGadgetImageExt imageHeader_lower, imageHeader_higher;
+        return true;
+    }
 
-            if ( e2_lower == e2_higher )
+    bool GtPlusReconGadget::
+        sendOutRecon(GtPlusGadgetImageArray* images, const hoNDArray<ValueType>& res, const hoNDArray<real_value_type>& timeStamp, const hoNDArray<real_value_type>& physioTimeStamp, 
+        int seriesNum, const std::vector<DimensionRecordType>& dimStartingIndexes, const std::string& prefix, const std::string& dataRole)
+    {
+        try
+        {
+            boost::shared_ptr< std::vector<size_t> > dims = res.get_dimensions();
+            size_t RO =  (*dims)[0];
+            size_t E1 =  (*dims)[1];
+            size_t CHA = (*dims)[2];
+            size_t SLC = (*dims)[3];
+            size_t E2 =  (*dims)[4];
+            size_t CON = (*dims)[5];
+            size_t PHS = (*dims)[6];
+            size_t REP = (*dims)[7];
+            size_t SET = (*dims)[8];
+            size_t AVE = (*dims)[9];
+
+            GADGET_CONDITION_MSG(true, "sending out images, acquisition boundary [RO E1 CHA SLC E2 CON PHS REP SET AVE] = [" 
+                << RO << " " << E1 << " " << CHA << " " 
+                << SLC << " " << E2 << " " << CON << " " 
+                << PHS << " " << REP << " " << SET << " " 
+                << AVE << "] " );
+
+            bool hasTimeStamp = false;
+            if ( timeStamp.get_number_of_elements()>0 
+                && timeStamp.get_size(9)==AVE 
+                && timeStamp.get_size(8)==SET 
+                && timeStamp.get_size(7)==REP 
+                && timeStamp.get_size(6)==PHS 
+                && timeStamp.get_size(5)==CON 
+                && timeStamp.get_size(4)==E2 
+                && timeStamp.get_size(3)==SLC )
             {
-                int offset = images->get_offset(slc, e2_lower, con, phs, rep, set, 0);
-                imageHeader.copy(images->imageArray_[offset]);
+                hasTimeStamp = true;
             }
-            else
-            {
-                int offset_lower = images->get_offset(slc, e2_lower, con, phs, rep, set, 0);
-                imageHeader_lower.copy(images->imageArray_[offset_lower]);
 
-                int offset_higher = images->get_offset(slc, e2_higher, con, phs, rep, set, 0);
-                imageHeader_higher.copy(images->imageArray_[offset_higher]);
-
-                imageHeader.copy(imageHeader_lower);
-                imageHeader.recomputeHeader(imageHeader_higher, e2_higher-e2_sampled);
+            bool hasPhysioTimeStamp = false;
+            if ( physioTimeStamp.get_number_of_elements()>0 
+                && physioTimeStamp.get_size(9)==AVE 
+                && physioTimeStamp.get_size(8)==SET 
+                && physioTimeStamp.get_size(7)==REP 
+                && physioTimeStamp.get_size(6)==PHS 
+                && physioTimeStamp.get_size(5)==CON 
+                && physioTimeStamp.get_size(4)==E2 
+                && physioTimeStamp.get_size(3)==SLC )
+            {
+                hasPhysioTimeStamp = true;
             }
-        }*/
-    //}
-
-    return true;
-}
 
-bool GtPlusReconGadget::
-sendOutReconMag(GtPlusGadgetImageArray* images, const hoNDArray<float>& res, int seriesNum, const std::vector<DimensionRecordType>& dimStartingIndexes, const std::string& prefix)
-{
-    try
-    {
-        boost::shared_ptr< std::vector<size_t> > dims = res.get_dimensions();
-        size_t RO = (*dims)[0];
-        size_t E1 = (*dims)[1];
-        size_t CHA = (*dims)[2];
-        size_t SLC = (*dims)[3];
-        size_t E2 = (*dims)[4];
-        size_t CON = (*dims)[5];
-        size_t PHS = (*dims)[6];
-        size_t REP = (*dims)[7];
-        size_t SET = (*dims)[8];
-
-        GADGET_CONDITION_MSG(true, "sending out images, acquisition boundary [RO E1 CHA SLC E2 CON PHS REP SET] = [" 
-                                                                      << RO << " " << E1 << " " << CHA << " " 
-                                                                      << SLC << " " << E2 << " " << CON << " " 
-                                                                      << PHS << " " << REP << " " << SET << "] " );
-
-        size_t set(0), rep(0), phs(0), con(0), e2(0), slc(0), cha(0), seg(0);
-        // size_t set_sInd(0), rep_sInd(0), phs_sInd(0), con_sInd(0), e2_sInd(0), slc_sInd(0);
-
-        for ( set=0; set<SET; set++ )
-        {
-            // GADGET_CHECK_RETURN_FALSE(gtPlus_util_.findDimIndex(dimStartingIndexes, DIM_Set, set_sInd));
+            // info string for image, gfactor, snr map and std map
+            std::ostringstream ostr_image;
+            ostr_image << "x" << std::setprecision(4) << this->scalingFactor_;
+            std::string imageInfo = ostr_image.str();
 
-            for ( rep=0; rep<REP; rep++ )
-            {
-                // GADGET_CHECK_RETURN_FALSE(gtPlus_util_.findDimIndex(dimStartingIndexes, DIM_Repetition, rep_sInd));
+            std::ostringstream ostr_gfactor;
+            ostr_gfactor << "x" << this->scalingFactor_gfactor_;
+            std::string gfactorInfo = ostr_gfactor.str();
 
-                for ( phs=0; phs<PHS; phs++ )
-                {
-                    // GADGET_CHECK_RETURN_FALSE(gtPlus_util_.findDimIndex(dimStartingIndexes, DIM_Phase, phs_sInd));
+            std::ostringstream ostr_wrap_around_map;
+            ostr_wrap_around_map << "x" << this->scalingFactor_wrap_around_map_;
+            std::string wrapAroundMapInfo = ostr_wrap_around_map.str();
 
-                    for ( con=0; con<CON; con++ )
-                    {
-                        // GADGET_CHECK_RETURN_FALSE(gtPlus_util_.findDimIndex(dimStartingIndexes, DIM_Contrast, con_sInd));
+            std::ostringstream ostr_snr;
+            ostr_snr << "x" << this->scalingFactor_snr_image_;
+            std::string snrMapInfo = ostr_snr.str();
 
-                        for ( e2=0; e2<E2; e2++ )
-                        {
-                            // GADGET_CHECK_RETURN_FALSE(gtPlus_util_.findDimIndex(dimStartingIndexes, DIM_Encoding2, e2_sInd));
+            std::ostringstream ostr_std;
+            ostr_std << "x" << this->scalingFactor_std_map_;
+            std::string stdMapInfo = ostr_std.str();
 
-                            for ( slc=0; slc<SLC; slc++ )
-                            {
-                                // GADGET_CHECK_RETURN_FALSE(gtPlus_util_.findDimIndex(dimStartingIndexes, DIM_Slice, slc_sInd));
+            // ------------------------------------------------------------- //
 
-                                GtPlusGadgetImageExt imageHeaderSent;
-                                GADGET_CHECK_RETURN_FALSE(recomputeImageGeometry(images, imageHeaderSent, slc, e2, con, phs, rep, set, 0, E2));
+            std::vector<size_t> ind(10, 0);
 
-                                //int offset = images->get_offset(slc, e2, con, phs, rep, set, 0);
-                                //imageHeaderSent = images->imageArray_[offset];
+            std::vector<size_t> dim2D(2);
+            dim2D[0] = RO;
+            dim2D[1] = E1;
 
-                                if ( imageHeaderSent.measurement_uid == 0 )
-                                {
-                                    continue;
-                                }
-
-                                for ( cha=0; cha<CHA; cha++ )
+            size_t set(0), rep(0), phs(0), con(0), e2(0), slc(0), cha(0), seg(0), ave(0);
+            for ( ave=0; ave<AVE; ave++ )
+            {
+                for ( e2=0; e2<E2; e2++ )
+                {
+                    for ( slc=0; slc<SLC; slc++ )
+                    {
+                        for ( rep=0; rep<REP; rep++ )
+                        {
+                            for ( phs=0; phs<PHS; phs++ )
+                            {
+                                for ( set=0; set<SET; set++ )
                                 {
-                                    Gadgetron::GadgetContainerMessage<ISMRMRD::ImageHeader>* cm1 = new Gadgetron::GadgetContainerMessage<ISMRMRD::ImageHeader>();
-                                    *cm1->getObjectPtr() = imageHeaderSent;
-
-                                    cm1->getObjectPtr()->flags = 0;
-                                    cm1->getObjectPtr()->image_data_type = ISMRMRD::DATA_FLOAT;
-                                    cm1->getObjectPtr()->image_type = ISMRMRD::TYPE_MAGNITUDE;
-
-                                    // image number and image series
-                                    cm1->getObjectPtr()->image_index = computeSeriesImageNumber (*cm1->getObjectPtr(), CHA, cha, E2, e2);
-                                    cm1->getObjectPtr()->image_series_index = seriesNum;
-                                    // GADGET_CONDITION_MSG(verboseMode_, "image number " << cm1->getObjectPtr()->image_index << "    image series " << cm1->getObjectPtr()->image_series_index << " ... ");
-
-                                    // set the time stamp
-                                    // the time stamp of the first readout line in this 2D kspace is used
-
-                                    Gadgetron::GadgetContainerMessage< Gadgetron::hoNDArray<float> >* cm2 = new Gadgetron::GadgetContainerMessage< Gadgetron::hoNDArray<float> >();
-                                    cm1->cont(cm2);
-
-                                    std::vector<size_t> img_dims(2);
-                                    img_dims[0] = RO;
-                                    img_dims[1] = E1;
-
-                                    //Fixing array dimensions (MSH)
-                                    cm1->getObjectPtr()->matrix_size[0] = RO;
-                                    cm1->getObjectPtr()->matrix_size[1] = E1;
-                                    cm1->getObjectPtr()->matrix_size[2] = 1;
-                                    cm1->getObjectPtr()->channels = 1;
-
-                                    try
+                                    for ( con=0; con<CON; con++ )
                                     {
-                                        cm2->getObjectPtr()->create(&img_dims);
-                                        Gadgetron::clear(cm2->getObjectPtr());
-                                    }
-                                    catch(...)
-                                    {
-                                        GADGET_DEBUG1("Unable to allocate new image\n");
-                                        cm1->release();
-                                        return false;
-                                    }
-
-                                    std::vector<size_t> ind(9, 0);
-                                    ind[2] = cha;
-                                    ind[3] = slc;
-                                    ind[4] = e2;
-                                    ind[5] = con;
-                                    ind[6] = phs;
-                                    ind[7] = rep;
-                                    ind[8] = set;
-
-                                    memcpy(cm2->getObjectPtr()->begin(), res.begin()+res.calculate_offset(ind), sizeof(float)*RO*E1);
-
-                                    if ( !debugFolder2_fullPath_.empty() )
-                                    {
-                                        std::ostringstream ostr;
-                                        ostr << prefix << "_" << cm1->getObjectPtr()->image_index;
-                                        GADGET_EXPORT_ARRAY(debugFolder2_fullPath_, gt_exporter_, *cm2->getObjectPtr(), ostr.str());
-
-                                        //hoNDArray<unsigned short> imageUS2D;
-                                        //imageUS2D.copyFrom(*cm2->getObjectPtr());
-                                        //std::ostringstream ostr2;
-                                        //ostr2 << prefix << "_US_" << cm1->getObjectPtr()->image_index;
-                                        //GADGET_EXPORT_ARRAY(debugFolder2_fullPath_, gt_exporter_, imageUS2D, ostr2.str());
-                                    }
-
-                                    GADGET_CONDITION_MSG(true, "sending out 2D image [CHA SLC E2 CON PHS REP SET] = [" 
-                                                                      << cha << " " 
-                                                                      << cm1->getObjectPtr()->slice << " " 
-                                                                      << e2 << " " 
-                                                                      << cm1->getObjectPtr()->contrast << " " 
-                                                                      << cm1->getObjectPtr()->phase << " " 
-                                                                      << cm1->getObjectPtr()->repetition << " " 
-                                                                      << cm1->getObjectPtr()->set << "] \t" 
-                                                                      << " -- Image number -- " << cm1->getObjectPtr()->image_index);
-
-                                    // send out the images
-                                    if (this->next()->putq(cm1) < 0) 
-                                    {
-                                        return false;
+                                        GtPlusGadgetImageExt imageHeaderSent;
+
+                                        GADGET_CHECK_RETURN_FALSE(recomputeImageGeometry(images, imageHeaderSent, slc, e2, con, phs, rep, set, 0, ave, E2));
+
+                                        if ( imageHeaderSent.measurement_uid == 0 )
+                                        {
+                                            continue;
+                                        }
+
+                                        ind[0] = 0;
+                                        ind[1] = 0;
+                                        ind[2] = 0;
+                                        ind[3] = slc;
+                                        ind[4] = e2;
+                                        ind[5] = con;
+                                        ind[6] = phs;
+                                        ind[7] = rep;
+                                        ind[8] = set;
+                                        ind[9] = ave;
+
+                                        if ( hasTimeStamp )
+                                        {
+                                            if ( timeStamp(ind) > 0 )
+                                            {
+                                                imageHeaderSent.acquisition_time_stamp = (uint32_t)( (double)(timeStamp(ind)/timeStampResolution_) + 0.5 );
+                                                GADGET_CONDITION_MSG(verboseMode_, "Set acquisition time stamp : " << imageHeaderSent.acquisition_time_stamp);
+                                            }
+                                        }
+
+                                        if ( hasPhysioTimeStamp )
+                                        {
+                                            if ( physioTimeStamp(ind) > 0 )
+                                            {
+                                                imageHeaderSent.physiology_time_stamp[0] = (uint32_t)( (double)(physioTimeStamp(ind)/timeStampResolution_) + 0.5 );
+                                                GADGET_CONDITION_MSG(verboseMode_, "Set physio time stamp : " << imageHeaderSent.physiology_time_stamp[0]);
+                                            }
+                                        }
+
+                                        for ( cha=0; cha<CHA; cha++ )
+                                        {
+                                            ind[0] = 0;
+                                            ind[1] = 0;
+                                            ind[2] = cha;
+                                            ind[3] = slc;
+                                            ind[4] = e2;
+                                            ind[5] = con;
+                                            ind[6] = phs;
+                                            ind[7] = rep;
+                                            ind[8] = set;
+                                            ind[9] = ave;
+
+                                            hoNDArray<ValueType> currIm(dim2D, const_cast<ValueType*>(res.begin()+res.calculate_offset(ind)) );
+
+                                            Gadgetron::GadgetContainerMessage<ISMRMRD::ImageHeader>* cm1 = new Gadgetron::GadgetContainerMessage<ISMRMRD::ImageHeader>();
+
+                                            Gadgetron::GadgetContainerMessage<ISMRMRD::MetaContainer>* cm3 = new Gadgetron::GadgetContainerMessage<ISMRMRD::MetaContainer>();
+
+                                            *(cm1->getObjectPtr()) = imageHeaderSent;
+
+                                            cm1->getObjectPtr()->flags = 0;
+                                            cm1->getObjectPtr()->data_type = ISMRMRD::ISMRMRD_CXFLOAT;
+
+                                            // image number and image series
+                                            cm1->getObjectPtr()->image_index = (uint16_t)computeSeriesImageNumber ( *(cm1->getObjectPtr()), CHA, cha, E2, e2);
+                                            cm1->getObjectPtr()->image_series_index = seriesNum;
+                                            // GADGET_CONDITION_MSG(verboseMode_, "image number " << cm1->getObjectPtr()->image_index << "    image series " << cm1->getObjectPtr()->image_series_index << " ... ");
+
+                                            // ----------------------------------------------------------
+                                            // set the image attributes
+                                            cm3->getObjectPtr()->set(GTPLUS_IMAGENUMBER, (long)cm1->getObjectPtr()->image_index);
+
+                                            cm3->getObjectPtr()->set(GTPLUS_CHA,        (long)cha);
+                                            cm3->getObjectPtr()->set(GTPLUS_SLC,        (long)cm1->getObjectPtr()->slice);
+                                            cm3->getObjectPtr()->set(GTPLUS_E2,         (long)e2);
+                                            cm3->getObjectPtr()->set(GTPLUS_CONTRAST,   (long)cm1->getObjectPtr()->contrast);
+                                            cm3->getObjectPtr()->set(GTPLUS_PHASE,      (long)cm1->getObjectPtr()->phase);
+                                            cm3->getObjectPtr()->set(GTPLUS_REP,        (long)cm1->getObjectPtr()->repetition);
+                                            cm3->getObjectPtr()->set(GTPLUS_SET,        (long)cm1->getObjectPtr()->set);
+                                            cm3->getObjectPtr()->set(GTPLUS_AVERAGE,    (long)cm1->getObjectPtr()->average);
+
+                                            cm3->getObjectPtr()->set(GTPLUS_IMAGEPROCESSINGHISTORY, "GT");
+
+                                            if ( dataRole == GTPLUS_IMAGE_REGULAR )
+                                            {
+                                                cm1->getObjectPtr()->image_type = ISMRMRD::ISMRMRD_IMTYPE_MAGNITUDE;
+
+                                                cm3->getObjectPtr()->set(GTPLUS_IMAGECOMMENT, "GT");
+                                                cm3->getObjectPtr()->append(GTPLUS_IMAGECOMMENT, imageInfo.c_str());
+
+                                                cm3->getObjectPtr()->append(GTPLUS_SEQUENCEDESCRIPTION, "_GT");
+                                                cm3->getObjectPtr()->set(GTPLUS_DATA_ROLE, GTPLUS_IMAGE_REGULAR);
+                                                cm3->getObjectPtr()->set(GTPLUS_IMAGE_SCALE_RATIO, (double)(this->scalingFactor_));
+                                            }
+                                            else if ( dataRole == GTPLUS_IMAGE_RETRO )
+                                            {
+                                                cm1->getObjectPtr()->image_type = ISMRMRD::ISMRMRD_IMTYPE_MAGNITUDE;
+
+                                                cm3->getObjectPtr()->set(GTPLUS_IMAGECOMMENT, "GT");
+                                                cm3->getObjectPtr()->append(GTPLUS_IMAGECOMMENT, "RETRO");
+                                                cm3->getObjectPtr()->append(GTPLUS_IMAGECOMMENT, imageInfo.c_str());
+
+                                                cm3->getObjectPtr()->set(GTPLUS_IMAGEPROCESSINGHISTORY, "RETRO");
+
+                                                cm3->getObjectPtr()->set(GTPLUS_SEQUENCEDESCRIPTION, "_GT_RETRO");
+                                                cm3->getObjectPtr()->set(GTPLUS_DATA_ROLE, GTPLUS_IMAGE_RETRO);
+                                                cm3->getObjectPtr()->set(GTPLUS_IMAGE_SCALE_RATIO, (double)(this->scalingFactor_));
+                                            }
+                                            else if ( dataRole == GTPLUS_IMAGE_PHASE )
+                                            {
+                                                cm1->getObjectPtr()->image_type = ISMRMRD::ISMRMRD_IMTYPE_PHASE;
+
+                                                cm3->getObjectPtr()->set(GTPLUS_IMAGECOMMENT, "PHS_GT");
+                                                cm3->getObjectPtr()->set(GTPLUS_SEQUENCEDESCRIPTION, "PHS_GT");
+                                                cm3->getObjectPtr()->set(GTPLUS_DATA_ROLE, GTPLUS_IMAGE_PHASE);
+                                                cm3->getObjectPtr()->set(GTPLUS_IMAGE_SCALE_RATIO, (double)(this->scalingFactor_));
+                                            }
+                                            else if ( dataRole == GTPLUS_IMAGE_GFACTOR )
+                                            {
+                                                cm1->getObjectPtr()->image_type = ISMRMRD::ISMRMRD_IMTYPE_MAGNITUDE;
+
+                                                std::string comment = gfactorInfo;
+                                                comment.append("_");
+                                                comment.append("gfactor_GT");
+
+                                                cm3->getObjectPtr()->set(GTPLUS_IMAGECOMMENT, comment.c_str());
+                                                cm3->getObjectPtr()->set(GTPLUS_SEQUENCEDESCRIPTION, "_gfactor_GT");
+                                                cm3->getObjectPtr()->set(GTPLUS_DATA_ROLE, GTPLUS_IMAGE_GFACTOR);
+                                                cm3->getObjectPtr()->set(GTPLUS_IMAGE_SCALE_RATIO, (double)(this->scalingFactor_gfactor_));
+                                            }
+                                            else if ( dataRole == GTPLUS_IMAGE_WRAPAROUNDMAP )
+                                            {
+                                                cm1->getObjectPtr()->image_type = ISMRMRD::ISMRMRD_IMTYPE_MAGNITUDE;
+
+                                                std::string comment = wrapAroundMapInfo;
+                                                comment.append("_");
+                                                comment.append("WrapAround_Map_GT");
+
+                                                cm3->getObjectPtr()->set(GTPLUS_IMAGECOMMENT, comment.c_str());
+                                                cm3->getObjectPtr()->set(GTPLUS_SEQUENCEDESCRIPTION, "_WrapAround_Map_GT");
+                                                cm3->getObjectPtr()->set(GTPLUS_DATA_ROLE, GTPLUS_IMAGE_WRAPAROUNDMAP);
+                                                cm3->getObjectPtr()->set(GTPLUS_IMAGE_SCALE_RATIO, (float)(this->scalingFactor_wrap_around_map_));
+                                            }
+                                            else if ( dataRole == GTPLUS_IMAGE_SNR_MAP )
+                                            {
+                                                cm1->getObjectPtr()->image_type = ISMRMRD::ISMRMRD_IMTYPE_MAGNITUDE;
+
+                                                std::string comment = snrMapInfo;
+                                                comment.append("_");
+                                                comment.append("SNR_Map_GT");
+
+                                                cm3->getObjectPtr()->set(GTPLUS_IMAGECOMMENT, comment.c_str());
+                                                cm3->getObjectPtr()->set(GTPLUS_SEQUENCEDESCRIPTION, "_SNR_Map_GT");
+                                                cm3->getObjectPtr()->set(GTPLUS_DATA_ROLE, GTPLUS_IMAGE_SNR_MAP);
+                                                cm3->getObjectPtr()->set(GTPLUS_IMAGE_SCALE_RATIO, (double)(this->scalingFactor_snr_image_));
+                                            }
+                                            else if ( dataRole == GTPLUS_IMAGE_STD_MAP )
+                                            {
+                                                cm1->getObjectPtr()->image_type = ISMRMRD::ISMRMRD_IMTYPE_MAGNITUDE;
+
+                                                std::string comment = stdMapInfo;
+                                                comment.append("_");
+                                                comment.append("Std_Map_GT");
+
+                                                cm3->getObjectPtr()->set(GTPLUS_IMAGECOMMENT, comment.c_str());
+                                                cm3->getObjectPtr()->set(GTPLUS_SEQUENCEDESCRIPTION, "_Std_Map_GT");
+                                                cm3->getObjectPtr()->set(GTPLUS_DATA_ROLE, GTPLUS_IMAGE_STD_MAP);
+                                                cm3->getObjectPtr()->set(GTPLUS_IMAGE_SCALE_RATIO, (double)(this->scalingFactor_std_map_));
+
+                                                cm3->getObjectPtr()->set(GTPLUS_IMAGE_WINDOWCENTER, (long)(this->scalingFactor_std_map_));
+                                                cm3->getObjectPtr()->set(GTPLUS_IMAGE_WINDOWWIDTH, (long)(2*this->scalingFactor_std_map_));
+                                            }
+                                            else if ( dataRole == GTPLUS_IMAGE_OTHER )
+                                            {
+                                                cm1->getObjectPtr()->image_type = ISMRMRD::ISMRMRD_IMTYPE_MAGNITUDE;
+
+                                                cm3->getObjectPtr()->set(GTPLUS_IMAGECOMMENT, "GT");
+                                                cm3->getObjectPtr()->set(GTPLUS_SEQUENCEDESCRIPTION, "_GT");
+                                                cm3->getObjectPtr()->set(GTPLUS_DATA_ROLE, GTPLUS_IMAGE_OTHER);
+                                                cm3->getObjectPtr()->set(GTPLUS_IMAGE_SCALE_RATIO, (double)(this->scalingFactor_));
+                                            }
+
+                                            // ----------------------------------------------------------
+
+                                            // set the time stamp
+                                            // the time stamp of the first readout line in this 2D kspace is used
+
+                                            Gadgetron::GadgetContainerMessage< Gadgetron::hoNDArray<ValueType> >* cm2 = new Gadgetron::GadgetContainerMessage< Gadgetron::hoNDArray<ValueType> >();
+                                            cm1->cont(cm2);
+                                            cm2->cont(cm3);
+
+                                            std::vector<size_t> img_dims(2);
+                                            img_dims[0] = RO;
+                                            img_dims[1] = E1;
+
+                                            //Fixing array dimensions (MSH)
+                                            cm1->getObjectPtr()->matrix_size[0] = (uint16_t)RO;
+                                            cm1->getObjectPtr()->matrix_size[1] = (uint16_t)E1;
+                                            cm1->getObjectPtr()->matrix_size[2] = 1;
+                                            cm1->getObjectPtr()->channels = 1;
+
+                                            try
+                                            {
+                                                cm2->getObjectPtr()->create(&img_dims);
+                                                Gadgetron::clear(cm2->getObjectPtr());
+                                            }
+                                            catch(...)
+                                            {
+                                                GADGET_DEBUG1("Unable to allocate new image\n");
+                                                cm1->release();
+                                                return false;
+                                            }
+
+                                            memcpy(cm2->getObjectPtr()->begin(), currIm.begin(), sizeof(ValueType)*RO*E1);
+
+                                            if ( !debugFolder2_fullPath_.empty() )
+                                            {
+                                                std::ostringstream ostr;
+                                                ostr << prefix << "_" << cm1->getObjectPtr()->image_index;
+                                                GADGET_EXPORT_ARRAY_COMPLEX(debugFolder2_fullPath_, gt_exporter_, *cm2->getObjectPtr(), ostr.str());
+                                            }
+
+                                            GADGET_CONDITION_MSG(verboseMode_, "sending out " << dataRole << " image [CHA SLC E2 CON PHS REP SET AVE] = [" 
+                                                << cha << " " 
+                                                << cm1->getObjectPtr()->slice << " " 
+                                                << e2 << " " 
+                                                << cm1->getObjectPtr()->contrast << " " 
+                                                << cm1->getObjectPtr()->phase << " " 
+                                                << cm1->getObjectPtr()->repetition << " " 
+                                                << cm1->getObjectPtr()->set << " " 
+                                                << cm1->getObjectPtr()->average << " " << "] " 
+                                                << " -- Image number -- " << cm1->getObjectPtr()->image_index);
+
+                                            // send out the images
+                                            if (this->next()->putq(cm1) < 0) 
+                                            {
+                                                GADGET_ERROR_MSG("Put image to Q failed ... ");
+                                                return false;
+                                            }
+                                        }
                                     }
                                 }
                             }
@@ -1362,117 +1580,331 @@ sendOutReconMag(GtPlusGadgetImageArray* images, const hoNDArray<float>& res, int
                 }
             }
         }
-    }
-    catch(...)
-    {
-        GADGET_ERROR_MSG("Errors in GtPlusReconGadget::sendOutReconMag(float) ... ");
-        return false;
-    }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in GtPlusReconGadget::sendOutRecon(complex float, time stamp) ... ");
+            return false;
+        }
 
-    return true;
-}
+        return true;
+    }
 
-bool GtPlusReconGadget::
-sendOutRecon(GtPlusGadgetImageArray* images, const hoNDArray<ValueType>& res, int seriesNum, const std::vector<DimensionRecordType>& dimStartingIndexes, const std::string& prefix)
-{
-    try
+    bool GtPlusReconGadget::sendOutRecon2D(GtPlusGadgetImageArray* images, const hoNDArray<ValueType>& res, int seriesNum, int imageNum)
     {
-        hoNDArray<float> mag(res.get_dimensions());
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::absolute(res, mag));
-        GADGET_CHECK_RETURN_FALSE(scalingMagnitude(mag));
-        GADGET_CHECK_RETURN_FALSE(this->sendOutReconMag(images, mag, seriesNum, dimStartingIndexes, prefix));
+        try
+        {
+            // extract the magnitude
+            hoNDArray<float> mag(res.get_dimensions());
+            Gadgetron::abs(res, mag);
+            GADGET_CHECK_RETURN_FALSE(scalingMagnitude(mag));
+            GADGET_CHECK_RETURN_FALSE(sendOutRecon2D(images, mag, seriesNum, imageNum));
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Exceptions happened in GtPlusReconGadget::sendOutRecon2D(...) ... ");
+            return false;
+        }
+
+        return true;
     }
-    catch(...)
+
+    bool GtPlusReconGadget::sendOutRecon2D(GtPlusGadgetImageArray* images, const hoNDArray<float>& res, int seriesNum, int imageNum)
     {
-        GADGET_ERROR_MSG("Errors in GtPlusReconGadget::sendOutRecon(ValueType) ... ");
-        return false;
-    }
+        try
+        {
+            Gadgetron::GadgetContainerMessage<ISMRMRD::ImageHeader>* cm1 = new Gadgetron::GadgetContainerMessage<ISMRMRD::ImageHeader>();
+            Gadgetron::GadgetContainerMessage<ISMRMRD::MetaContainer>* cm3 = new Gadgetron::GadgetContainerMessage<ISMRMRD::MetaContainer>();
+
+            *(cm1->getObjectPtr()) = images->imageArray_[0];
+
+            cm1->getObjectPtr()->flags = 0;
+            cm1->getObjectPtr()->data_type = ISMRMRD::ISMRMRD_FLOAT;
+            cm1->getObjectPtr()->image_type = ISMRMRD::ISMRMRD_IMTYPE_MAGNITUDE;
+
+            // image number and image series
+            cm1->getObjectPtr()->image_index = imageNum;
+            cm1->getObjectPtr()->image_series_index = seriesNum;
+
+            Gadgetron::GadgetContainerMessage< Gadgetron::hoNDArray<float> >* cm2 = new Gadgetron::GadgetContainerMessage< Gadgetron::hoNDArray<float> >();
+            cm1->cont(cm2);
+            cm2->cont(cm3);
+
+            std::vector<size_t> img_dims(2);
+            img_dims[0] = res.get_size(0);
+            img_dims[1] = res.get_size(1);
+
+            // set the image attributes
+            cm3->getObjectPtr()->set(GTPLUS_IMAGECOMMENT, "GT");
+            cm3->getObjectPtr()->set(GTPLUS_SEQUENCEDESCRIPTION, "_GT");
+            cm3->getObjectPtr()->set(GTPLUS_IMAGEPROCESSINGHISTORY, "GT");
+            cm3->getObjectPtr()->set(GTPLUS_DATA_ROLE, GTPLUS_IMAGE_REGULAR);
+
+            cm3->getObjectPtr()->set(GTPLUS_CHA,        (long)0);
+            cm3->getObjectPtr()->set(GTPLUS_SLC,        (long)cm1->getObjectPtr()->slice);
+            cm3->getObjectPtr()->set(GTPLUS_E2,         (long)0);
+            cm3->getObjectPtr()->set(GTPLUS_CONTRAST,   (long)cm1->getObjectPtr()->contrast);
+            cm3->getObjectPtr()->set(GTPLUS_PHASE,      (long)cm1->getObjectPtr()->phase);
+            cm3->getObjectPtr()->set(GTPLUS_REP,        (long)cm1->getObjectPtr()->repetition);
+            cm3->getObjectPtr()->set(GTPLUS_SET,        (long)cm1->getObjectPtr()->set);
+            cm3->getObjectPtr()->set(GTPLUS_AVERAGE,    (long)cm1->getObjectPtr()->average);
+
+            cm3->getObjectPtr()->set(GTPLUS_IMAGE_SCALE_RATIO, (double)(this->scalingFactor_));
+
+            //Fixing array dimensions (MSH)
+            cm1->getObjectPtr()->matrix_size[0] = (uint16_t)res.get_size(0);
+            cm1->getObjectPtr()->matrix_size[1] = (uint16_t)res.get_size(1);
+            cm1->getObjectPtr()->matrix_size[2] = 1;
+            cm1->getObjectPtr()->channels = 1;
+
+            try
+            {
+                cm2->getObjectPtr()->create(&img_dims);
+            }
+            catch(...)
+            {
+                GADGET_DEBUG1("Unable to allocate new image\n");
+                cm1->release();
+                return false;
+            }
 
-    return true;
-}
+            memcpy(cm2->getObjectPtr()->begin(), res.begin(), sizeof(float)*res.get_size(0)*res.get_size(1));
 
-bool GtPlusReconGadget::sendOutRecon2D(GtPlusGadgetImageArray* images, const hoNDArray<ValueType>& res, int seriesNum, int imageNum)
-{
-    try
-    {
-        // extract the magnitude
-        hoNDArray<float> mag(res.get_dimensions());
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::absolute(res, mag));
-        GADGET_CHECK_RETURN_FALSE(scalingMagnitude(mag));
-        GADGET_CHECK_RETURN_FALSE(sendOutRecon2D(images, mag, seriesNum, imageNum));
+            if ( !debugFolder2_fullPath_.empty() )
+            {
+                std::ostringstream ostr;
+                ostr << "SentImage2D" << "_" << cm1->getObjectPtr()->image_index;
+                GADGET_EXPORT_ARRAY(debugFolder2_fullPath_, gt_exporter_, *cm2->getObjectPtr(), ostr.str());
+            }
+
+            // send out the images
+            if (this->next()->putq(cm1) < 0) 
+            {
+                return false;
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in GtPlusReconGadget::sendOutRecon2D(float) ... ");
+            return false;
+        }
+
+        return true;
     }
-    catch(...)
+
+    bool GtPlusReconGadget::computeSNRImage(const hoNDArray<ValueType>& res, const hoNDArray<ValueType>& gfactor, unsigned int startInd, bool withAcceleration, hoNDArray<ValueType>& snrImage, hoNDArray<ValueType>& stdMap)
     {
-        GADGET_ERROR_MSG("Exceptions happened in GtPlusReconGadget::sendOutRecon2D(...) ... ");
-        return false;
-    }
+        try
+        {
+            boost::shared_ptr< std::vector<size_t> > dims = res.get_dimensions();
+            size_t RO = (*dims)[0];
+            size_t E1 = (*dims)[1];
+            size_t CHA = (*dims)[2];
+            size_t SLC = (*dims)[3];
+            size_t E2 = (*dims)[4];
+            size_t CON = (*dims)[5];
+            size_t PHS = (*dims)[6];
+            size_t REP = (*dims)[7];
+            size_t SET = (*dims)[8];
+            size_t AVE = (*dims)[9];
+
+            snrImage = gfactor;
+
+            if ( withAcceleration )
+            {
+                Gadgetron::addEpsilon(snrImage);
+                Gadgetron::divide(res, snrImage, snrImage);
+            }
+            else
+            {
+                snrImage = res;
+            }
 
-    return true;
-}
+            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder2_fullPath_, gt_exporter_, snrImage, "snrImage");
 
-bool GtPlusReconGadget::sendOutRecon2D(GtPlusGadgetImageArray* images, const hoNDArray<float>& res, int seriesNum, int imageNum)
-{
-    try
-    {
-        Gadgetron::GadgetContainerMessage<ISMRMRD::ImageHeader>* cm1 = new Gadgetron::GadgetContainerMessage<ISMRMRD::ImageHeader>();
-        *cm1->getObjectPtr() = images->imageArray_[0];
+            std::vector<size_t> dimStdMap(*dims);
 
-        cm1->getObjectPtr()->flags = 0;
-        cm1->getObjectPtr()->image_data_type = ISMRMRD::DATA_FLOAT;
-        cm1->getObjectPtr()->image_type = ISMRMRD::TYPE_MAGNITUDE;
+            std::vector<size_t> ind(10, 0);
+            size_t set(0), rep(0), phs(0), con(0), e2(0), slc(0), cha(0), seg(0), ave(0);
 
-        // image number and image series
-        cm1->getObjectPtr()->image_index = imageNum;
-        cm1->getObjectPtr()->image_series_index = seriesNum;
+            if ( REP > startInd+2 )
+            {
+                dimStdMap[7] = 1;
+                stdMap.create(dimStdMap);
+                Gadgetron::clear(stdMap);
 
-        Gadgetron::GadgetContainerMessage< Gadgetron::hoNDArray<float> >* cm2 = new Gadgetron::GadgetContainerMessage< Gadgetron::hoNDArray<float> >();
-        cm1->cont(cm2);
+                size_t numOfIm = REP - startInd;
 
-        std::vector<size_t> img_dims(2);
-        img_dims[0] = res.get_size(0);
-        img_dims[1] = res.get_size(1);
+                hoNDArray<ValueType> repBuf(RO, E1, numOfIm);
+                hoNDArray<real_value_type> repBufMag(RO, E1, numOfIm);
+                hoNDArray<real_value_type> stdMap2D(RO, E1);
+
+                for ( ave=0; ave<AVE; ave++ )
+                {
+                    for ( set=0; set<SET; set++ )
+                    {
+                        for ( phs=0; phs<PHS; phs++ )
+                        {
+                            for ( con=0; con<CON; con++ )
+                            {
+                                for ( e2=0; e2<E2; e2++ )
+                                {
+                                    for ( slc=0; slc<SLC; slc++ )
+                                    {
+                                        for ( cha=0; cha<CHA; cha++ )
+                                        {
+                                            Gadgetron::clear(repBuf);
+
+                                            for ( rep=startInd; rep<REP; rep++ )
+                                            {
+                                                ind[2] = cha;
+                                                ind[3] = slc;
+                                                ind[4] = e2;
+                                                ind[5] = con;
+                                                ind[6] = phs;
+                                                ind[7] = rep;
+                                                ind[8] = set;
+                                                ind[9] = ave;
+
+                                                size_t offset = snrImage.calculate_offset(ind);
+
+                                                memcpy(repBuf.begin()+(rep-startInd)*RO*E1, 
+                                                    snrImage.begin()+offset, sizeof(ValueType)*RO*E1);
+                                            }
+
+                                            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder2_fullPath_, gt_exporter_, repBuf, "repBuf");
+
+                                            Gadgetron::abs(repBuf, repBufMag);
+                                            GADGET_EXPORT_ARRAY(debugFolder2_fullPath_, gt_exporter_, repBufMag, "repBufMag");
+
+                                            // compute std
+                                            GADGET_CHECK_RETURN_FALSE(Gadgetron::stdOver3rdDimension(repBufMag, stdMap2D, true));
+                                            GADGET_EXPORT_ARRAY(debugFolder2_fullPath_, gt_exporter_, stdMap2D, "stdMap2D");
+
+                                            // copy it to the std map
+                                            ind[2] = cha;
+                                            ind[3] = slc;
+                                            ind[4] = e2;
+                                            ind[5] = con;
+                                            ind[6] = phs;
+                                            ind[7] = 0;
+                                            ind[8] = set;
+                                            ind[9] = ave;
+
+                                            size_t offset = stdMap.calculate_offset(ind);
+                                            hoNDArray<ValueType> stdMapCurr(RO, E1, stdMap.begin()+offset, false);
+                                            stdMapCurr.copyFrom(stdMap2D);
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            else if ( PHS > startInd+2 )
+            {
+                dimStdMap[6] = 1;
+                stdMap.create(dimStdMap);
+                Gadgetron::clear(stdMap);
 
-        //Fixing array dimensions (MSH)
-        cm1->getObjectPtr()->matrix_size[0] = res.get_size(0);
-        cm1->getObjectPtr()->matrix_size[1] = res.get_size(1);
-        cm1->getObjectPtr()->matrix_size[2] = 1;
-        cm1->getObjectPtr()->channels = 1;
+                size_t numOfIm = PHS - startInd;
 
-        try
-        {
-            cm2->getObjectPtr()->create(&img_dims);
+                hoNDArray<ValueType> phsBuf(RO, E1, numOfIm);
+                hoNDArray<real_value_type> phsBufMag(RO, E1, numOfIm);
+                hoNDArray<real_value_type> stdMap2D(RO, E1);
+
+                for ( ave=0; ave<AVE; ave++ )
+                {
+                    for ( set=0; set<SET; set++ )
+                    {
+                        for ( rep=0; rep<REP; rep++ )
+                        {
+                            for ( con=0; con<CON; con++ )
+                            {
+                                for ( e2=0; e2<E2; e2++ )
+                                {
+                                    for ( slc=0; slc<SLC; slc++ )
+                                    {
+                                        for ( cha=0; cha<CHA; cha++ )
+                                        {
+                                            Gadgetron::clear(phsBuf);
+
+                                            for ( phs=startInd; phs<PHS; phs++ )
+                                            {
+                                                ind[2] = cha;
+                                                ind[3] = slc;
+                                                ind[4] = e2;
+                                                ind[5] = con;
+                                                ind[6] = phs;
+                                                ind[7] = rep;
+                                                ind[8] = set;
+                                                ind[9] = ave;
+
+                                                size_t offset = snrImage.calculate_offset(ind);
+
+                                                memcpy(phsBuf.begin()+(phs-startInd)*RO*E1, 
+                                                    snrImage.begin()+offset, sizeof(ValueType)*RO*E1);
+                                            }
+
+                                            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder2_fullPath_, gt_exporter_, phsBuf, "phsBuf");
+
+                                            Gadgetron::abs(phsBuf, phsBufMag);
+                                            GADGET_EXPORT_ARRAY(debugFolder2_fullPath_, gt_exporter_, phsBufMag, "phsBufMag");
+
+                                            // compute std
+                                            GADGET_CHECK_RETURN_FALSE(Gadgetron::stdOver3rdDimension(phsBufMag, stdMap2D, true));
+                                            GADGET_EXPORT_ARRAY(debugFolder2_fullPath_, gt_exporter_, stdMap2D, "stdMap2D");
+
+                                            // copy it to the std map
+                                            ind[2] = cha;
+                                            ind[3] = slc;
+                                            ind[4] = e2;
+                                            ind[5] = con;
+                                            ind[6] = 0;
+                                            ind[7] = rep;
+                                            ind[8] = set;
+                                            ind[9] = ave;
+
+                                            size_t offset = stdMap.calculate_offset(ind);
+                                            hoNDArray<ValueType> stdMapCurr(RO, E1, stdMap.begin()+offset, false);
+                                            stdMapCurr.copyFrom(stdMap2D);
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
         }
         catch(...)
         {
-            GADGET_DEBUG1("Unable to allocate new image\n");
-            cm1->release();
+            GADGET_ERROR_MSG("Errors in GtPlusReconGadget::computeSNRImage(res, gfactor, snrImage, stdmap) ... ");
             return false;
         }
 
-        memcpy(cm2->getObjectPtr()->begin(), res.begin(), sizeof(float)*res.get_size(0)*res.get_size(1));
+        return true;
+    }
 
-        if ( !debugFolder2_fullPath_.empty() )
-        {
-            std::ostringstream ostr;
-            ostr << "SentImage2D" << "_" << cm1->getObjectPtr()->image_index;
-            GADGET_EXPORT_ARRAY(debugFolder2_fullPath_, gt_exporter_, *cm2->getObjectPtr(), ostr.str());
-        }
+    int GtPlusReconGadget::close(unsigned long flags)
+    {
+        GADGET_CONDITION_MSG(true, "GtPlusReconGadget - close(flags) : " << flags);
+
+        if ( BaseClass::close(flags) != GADGET_OK ) return GADGET_FAIL;
 
-        // send out the images
-        if (this->next()->putq(cm1) < 0) 
+        if ( flags != 0 )
         {
-            return false;
+            std::string procTime;
+            gtPlus_util_.getCurrentMoment(procTime);
+
+            GADGET_MSG("* ============================================================================== *");
+            GADGET_MSG("---> MR recon phase, Currnt processing time : " << procTime << " <---");
+            GADGET_MSG("* ============================================================================== *");
         }
-    }
-    catch(...)
-    {
-        GADGET_ERROR_MSG("Errors in GtPlusReconGadget::sendOutRecon2D(float) ... ");
-        return false;
-    }
 
-    return true;
-}
+        return GADGET_OK;
+    }
 
-GADGET_FACTORY_DECLARE(GtPlusReconGadget)
+    GADGET_FACTORY_DECLARE(GtPlusReconGadget)
 
 }
diff --git a/gadgets/gtPlus/GtPlusReconGadget.h b/gadgets/gtPlus/GtPlusReconGadget.h
index 7463f93..f90b211 100644
--- a/gadgets/gtPlus/GtPlusReconGadget.h
+++ b/gadgets/gtPlus/GtPlusReconGadget.h
@@ -9,20 +9,24 @@
 #include "GtPlusGadgetExport.h"
 #include "Gadget.h"
 #include "hoNDArray.h"
-#include "ismrmrd.h"
-#include "GadgetIsmrmrdReadWrite.h"
+#include "ismrmrd/ismrmrd.h"
+#include "ismrmrd/xml.h"
+#include "ismrmrd/meta.h"
 #include "GadgetronTimer.h"
 
 #include "hoNDArray_utils.h"
 
 #include "GtPlusGadgetImageArray.h"
 
+#include "GadgetronCommon.h"
 #include "gtPlusIOAnalyze.h"
 #include "gtPlusISMRMRDReconUtil.h"
 #include "gtPlusISMRMRDReconWorkOrder.h"
 
 #include "GadgetStreamController.h"
 
+#include "GtPlusReconGadgetUtil.h"
+
 #ifdef USE_OMP
     #include "omp.h"
 #endif // USE_OMP
@@ -37,7 +41,8 @@ class EXPORTGTPLUSGADGET GtPlusReconGadget : public Gadgetron::Gadget2< GtPlusGa
 public:
     GADGET_DECLARE(GtPlusReconGadget);
 
-    typedef std::complex<float> ValueType;
+    typedef float real_value_type;
+    typedef std::complex<real_value_type> ValueType;
 
     typedef Gadgetron::gtPlus::gtPlusReconWorkOrder<ValueType> WorkOrderType;
 
@@ -64,6 +69,21 @@ public:
     // scaling factor for recon results
     double scalingFactor_;
 
+    // scaling factor for gfactor images
+    double scalingFactor_gfactor_;
+
+    // scaling factor for wrap around map
+    double scalingFactor_wrap_around_map_;
+
+    // scaling factor for snr images
+    double scalingFactor_snr_image_;
+
+    // scaling factor for std map
+    double scalingFactor_std_map_;
+
+    // start frame to compute std map, to avoid transitional signal
+    unsigned int start_frame_for_std_map_;
+
     // whether to use the fixed intensity scaling factor
     bool use_constant_scalingFactor_;
 
@@ -92,6 +112,13 @@ public:
     // whether to recon kspace
     bool recon_kspace_needed_;
 
+    // whether the second set of recon results is required
+    bool recon_res_second_required_;
+
+    // whether to send out recon results
+    bool send_out_recon_;
+    bool send_out_recon_second_;
+
     // parameters for gt-plus recon
     Gadgetron::gtPlus::gtPlusReconWorkOrderPara workOrderPara_;
 
@@ -99,15 +126,8 @@ public:
     // utility functions
     // --------------------------------------------------
 
-    // generate the debug folder path
-    // debugFolderPath = ${GADGETRON_HOME}/debugFolder
-    virtual bool generateDebugFolderPath(const std::string& debugFolder, std::string& debugFolderPath);
-
-    // get the current moment
-    void getCurrentMoment(std::string& procTime);
-
     // compute image number using ICE way
-    int computeSeriesImageNumber (ISMRMRD::ImageHeader& imheader, size_t nCHA=1, size_t cha=0, size_t nE2=1, size_t e2=0);
+    size_t computeSeriesImageNumber (ISMRMRD::ImageHeader& imheader, size_t nCHA=1, size_t cha=0, size_t nE2=1, size_t e2=0);
 
     // to handle partial fourier, add pre or post zeros
     // PrePostZeros: 0 no zeros; 1 pre zeros; 2 post zeros
@@ -116,15 +136,24 @@ public:
     // find the dimension index
     bool findStartingDimIndex(const std::vector<DimensionRecordType>& dimStartingIndexes, Gadgetron::gtPlus::ISMRMRDDIM& dim, size_t ind);
 
+    // compute SNR image and std map
+    bool computeSNRImage(const hoNDArray<ValueType>& res, const hoNDArray<ValueType>& gfactor, unsigned int startInd, bool withAcceleration, hoNDArray<ValueType>& snrImage, hoNDArray<ValueType>& stdMap);
+
+    // scale the recon images
+    bool scalingImages(hoNDArray<ValueType>& res);
+
     // scale the magnitude images
     bool scalingMagnitude(hoNDArray<float>& mag);
 
     // recompute the image geometry parameters if the recon FOV is different from encoding FOV
-    bool recomputeImageGeometry(GtPlusGadgetImageArray* images, GtPlusGadgetImageExt& imageHeader, int slc, int e2, int con, int phs, int rep, int set, int seg, int maxE2);
+    bool recomputeImageGeometry(GtPlusGadgetImageArray* images, GtPlusGadgetImageExt& imageHeader, size_t slc, size_t e2, size_t con, size_t phs, size_t rep, size_t set, size_t seg, size_t ave, size_t maxE2);
+
+    // get the acquisition and PMU time stamps
+    bool getTimeStamp(GtPlusGadgetImageArray* images, WorkOrderType& workOrder, hoNDArray<real_value_type>& timeStamp,  hoNDArray<real_value_type>& pmuTimeStamp);
 
     // send out the recon results
-    virtual bool sendOutReconMag(GtPlusGadgetImageArray* images, const hoNDArray<float>& res, int seriesNum, const std::vector<DimensionRecordType>& dimStartingIndexes, const std::string& prefix);
-    virtual bool sendOutRecon(GtPlusGadgetImageArray* images, const hoNDArray<ValueType>& res, int seriesNum, const std::vector<DimensionRecordType>& dimStartingIndexes, const std::string& prefix);
+    virtual bool sendOutRecon(GtPlusGadgetImageArray* images, const hoNDArray<ValueType>& res, int seriesNum, const std::vector<DimensionRecordType>& dimStartingIndexes, const std::string& prefix, const std::string& dataRole);
+    virtual bool sendOutRecon(GtPlusGadgetImageArray* images, const hoNDArray<ValueType>& res, const hoNDArray<real_value_type>& timeStamp, const hoNDArray<real_value_type>& physioTimeStamp, int seriesNum, const std::vector<DimensionRecordType>& dimStartingIndexes, const std::string& prefix, const std::string& dataRole);
 
     // special sending function for the interactive cases
     virtual bool sendOutRecon2D(GtPlusGadgetImageArray* images, const hoNDArray<float>& res, int seriesNum, int imageNum);
@@ -132,6 +161,7 @@ public:
 
     // compute the kspace filter
     bool generateKSpaceFilter(WorkOrderType& workOrder);
+    //void GADGET_CONDITION_MSG(bool verboseMode_, const char* arg2);
 
 protected:
 
@@ -149,6 +179,9 @@ protected:
     // parse the cloud file if any
     virtual bool parseGTCloudNodeFile(const std::string& filename, CloudType& gtCloud);
 
+    // close call
+    int close(unsigned long flags);
+
 public:
 
     // --------------------------------------------------
@@ -250,6 +283,11 @@ public:
     // encoding space size
     ISMRMRD::EncodingCounters meas_max_idx_;
 
+    // define the maximal number of threads used
+    // number_of_used_threads = thread_number_ratio_ * max_available_threads_number
+    // 0 means all threads are used
+    float thread_number_ratio_;
+
     Gadgetron::gtPlus::gtPlusISMRMRDReconUtil<ValueType> gtPlus_util_;
     Gadgetron::gtPlus::gtPlusISMRMRDReconUtilComplex<ValueType> gtPlus_util_complex_;
 
diff --git a/gadgets/gtPlus/GtPlusReconGadgetUtil.cpp b/gadgets/gtPlus/GtPlusReconGadgetUtil.cpp
new file mode 100644
index 0000000..1b92bfa
--- /dev/null
+++ b/gadgets/gtPlus/GtPlusReconGadgetUtil.cpp
@@ -0,0 +1,710 @@
+
+#include "GtPlusReconGadgetUtil.h"
+
+#include <boost/filesystem.hpp>
+using namespace boost::filesystem;
+
+using namespace Gadgetron::gtPlus;
+
+namespace Gadgetron
+{
+
+    bool findCalibMode(ISMRMRD::IsmrmrdHeader& h, Gadgetron::gtPlus::ISMRMRDCALIBMODE& CalibMode, ISMRMRDDIM& InterleaveDim, double& acceFactorE1, double& acceFactorE2, bool verbose)
+    {
+        try
+        {
+            if (!h.encoding[0].parallelImaging)
+            {
+                GADGET_ERROR_MSG("Parallel Imaging section not found in header");
+                return false;
+            }
+
+            ISMRMRD::ParallelImaging p_imaging = *h.encoding[0].parallelImaging;
+
+            acceFactorE1 = (double)(p_imaging.accelerationFactor.kspace_encoding_step_1);
+            acceFactorE2 = (double)(p_imaging.accelerationFactor.kspace_encoding_step_2);
+
+            GADGET_CONDITION_MSG(verbose, "acceFactorE1 is " << acceFactorE1);
+            GADGET_CONDITION_MSG(verbose, "acceFactorE2 is " << acceFactorE2);
+
+            if ( !p_imaging.calibrationMode.is_present() )
+            {
+                GADGET_ERROR_MSG("Parallel calibration mode not found in header");
+                return false;
+            }
+
+            std::string calib = *p_imaging.calibrationMode;
+            if ( calib.compare("interleaved") == 0 )
+            {
+                CalibMode = Gadgetron::gtPlus::ISMRMRD_interleaved;
+                GADGET_CONDITION_MSG(verbose, "Calibration mode is interleaved");
+
+                if ( p_imaging.interleavingDimension )
+                {
+                    if ( p_imaging.interleavingDimension->compare("phase") == 0 )
+                    {
+                        InterleaveDim = Gadgetron::gtPlus::DIM_Phase;
+                    }
+                    else if ( p_imaging.interleavingDimension->compare("repetition") == 0 )
+                    {
+                        InterleaveDim = Gadgetron::gtPlus::DIM_Repetition;
+                    }
+                    else if ( p_imaging.interleavingDimension->compare("average") == 0 )
+                    {
+                        InterleaveDim = Gadgetron::gtPlus::DIM_Average;
+                    }
+                    else if ( p_imaging.interleavingDimension->compare("contrast") == 0 )
+                    {
+                        InterleaveDim = Gadgetron::gtPlus::DIM_Contrast;
+                    }
+                    else if ( p_imaging.interleavingDimension->compare("other") == 0 )
+                    {
+                        InterleaveDim = Gadgetron::gtPlus::DIM_other1;
+                    }
+                    else
+                    {
+                        GADGET_ERROR_MSG("Unknown interleaving dimension. Bailing out");
+                        return false;
+                    }
+                }
+            }
+            else if ( calib.compare("embedded") == 0 )
+            {
+                CalibMode = Gadgetron::gtPlus::ISMRMRD_embedded;
+                GADGET_CONDITION_MSG(verbose, "Calibration mode is embedded");
+            }
+            else if ( calib.compare("separate") == 0 )
+            {
+                CalibMode = Gadgetron::gtPlus::ISMRMRD_separate;
+                GADGET_CONDITION_MSG(verbose, "Calibration mode is separate");
+            }
+            else if ( calib.compare("external") == 0 )
+            {
+                CalibMode = Gadgetron::gtPlus::ISMRMRD_external;
+            }
+            else if ( (calib.compare("other") == 0) && acceFactorE1==1 && acceFactorE2==1 )
+            {
+                CalibMode = Gadgetron::gtPlus::ISMRMRD_noacceleration;
+                acceFactorE1=1;
+            }
+            else if ( (calib.compare("other") == 0) &&  (acceFactorE1>1 || acceFactorE2>1) )
+            {
+                CalibMode = Gadgetron::gtPlus::ISMRMRD_interleaved;
+                acceFactorE1=2;
+                InterleaveDim = Gadgetron::gtPlus::DIM_Phase;
+            }
+            else
+            {
+                GADGET_ERROR_MSG("Failed to process parallel imaging calibration mode");
+                return false;
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Error happened in findCalibMode(...) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    bool findEncodingLimits(ISMRMRD::IsmrmrdHeader& h, ISMRMRD::EncodingCounters& meas_max_idx, bool verbose)
+    {
+        try
+        {
+            ISMRMRD::EncodingSpace e_space = h.encoding[0].encodedSpace;
+            ISMRMRD::EncodingSpace r_space = h.encoding[0].reconSpace;
+            ISMRMRD::EncodingLimits e_limits = h.encoding[0].encodingLimits;
+
+            meas_max_idx.kspace_encode_step_1 = (uint16_t)e_space.matrixSize.y-1;
+
+            meas_max_idx.set = (e_limits.set && (e_limits.set->maximum>0)) ? e_limits.set->maximum : 0;
+            meas_max_idx.phase = (e_limits.phase && (e_limits.phase->maximum>0)) ? e_limits.phase->maximum : 0;
+
+            meas_max_idx.kspace_encode_step_2 = (uint16_t)e_space.matrixSize.z-1;
+
+            meas_max_idx.contrast = (e_limits.contrast && (e_limits.contrast->maximum > 0)) ? e_limits.contrast->maximum : 0;
+
+            meas_max_idx.slice = (e_limits.slice && (e_limits.slice->maximum > 0)) ? e_limits.slice->maximum : 0;
+
+            meas_max_idx.repetition = e_limits.repetition ? e_limits.repetition->maximum : 0;
+
+            meas_max_idx.average = e_limits.average ? e_limits.average->maximum : 0;
+
+            // always combine the SEG
+            meas_max_idx.segment = 0;
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Error happened in findEncodingLimits(...) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    void findMatrixSizeEncoding(ISMRMRD::IsmrmrdHeader& h, size_t matrix_size_encoding[3])
+    {
+        matrix_size_encoding[0] = h.encoding[0].encodedSpace.matrixSize.x;
+        matrix_size_encoding[1] = h.encoding[0].encodedSpace.matrixSize.y;
+        matrix_size_encoding[2] = h.encoding[0].encodedSpace.matrixSize.z;
+    }
+
+    void findFOVEncoding(ISMRMRD::IsmrmrdHeader& h, float field_of_view_encoding[3])
+    {
+        field_of_view_encoding[0] = h.encoding[0].encodedSpace.fieldOfView_mm.x;
+        field_of_view_encoding[1] = h.encoding[0].encodedSpace.fieldOfView_mm.y;
+        field_of_view_encoding[2] = h.encoding[0].encodedSpace.fieldOfView_mm.z;
+    }
+
+    void findMatrixSizeRecon(ISMRMRD::IsmrmrdHeader& h, size_t matrix_size_recon[3])
+    {
+        matrix_size_recon[0] = h.encoding[0].reconSpace.matrixSize.x;
+        matrix_size_recon[1] = h.encoding[0].reconSpace.matrixSize.y;
+        matrix_size_recon[2] = h.encoding[0].reconSpace.matrixSize.z;
+    }
+
+    void findFOVRecon(ISMRMRD::IsmrmrdHeader& h, float field_of_view_recon[3])
+    {
+        field_of_view_recon[0] = h.encoding[0].reconSpace.fieldOfView_mm.x;
+        field_of_view_recon[1] = h.encoding[0].reconSpace.fieldOfView_mm.y;
+        field_of_view_recon[2] = h.encoding[0].reconSpace.fieldOfView_mm.z;
+    }
+
+    bool checkReadoutStatus(uint64_t flag, int samples, Gadgetron::gtPlus::ISMRMRDCALIBMODE& CalibMode, int roLen, 
+        bool& bIsKSpace, bool& bIsRef, bool& bIsNoise, 
+        bool& bIsPhaseCorr, bool& bIsReflect, bool& bIsOther, 
+        bool& bIsNavigator, bool& bIsRTFeedback, bool& bIsHPFeedback, 
+        bool& bIsDummyScan)
+    {
+        try
+        {
+            bIsNoise = ISMRMRD::FlagBit(ISMRMRD::ISMRMRD_ACQ_IS_NOISE_MEASUREMENT).isSet(flag);
+            bool is_ref = ISMRMRD::FlagBit(ISMRMRD::ISMRMRD_ACQ_IS_PARALLEL_CALIBRATION).isSet(flag);
+            bool is_ref_kspace = ISMRMRD::FlagBit(ISMRMRD::ISMRMRD_ACQ_IS_PARALLEL_CALIBRATION_AND_IMAGING).isSet(flag);
+            bIsReflect = ISMRMRD::FlagBit(ISMRMRD::ISMRMRD_ACQ_IS_REVERSE).isSet(flag);
+            bIsPhaseCorr = ISMRMRD::FlagBit(ISMRMRD::ISMRMRD_ACQ_IS_PHASECORR_DATA).isSet(flag);
+            bIsNavigator = ISMRMRD::FlagBit(ISMRMRD::ISMRMRD_ACQ_IS_NAVIGATION_DATA).isSet(flag);
+            bIsRTFeedback = ISMRMRD::FlagBit(ISMRMRD::ISMRMRD_ACQ_IS_RTFEEDBACK_DATA).isSet(flag);
+            bIsHPFeedback = ISMRMRD::FlagBit(ISMRMRD::ISMRMRD_ACQ_IS_HPFEEDBACK_DATA).isSet(flag);
+            bIsDummyScan = ISMRMRD::FlagBit(ISMRMRD::ISMRMRD_ACQ_IS_DUMMYSCAN_DATA).isSet(flag);
+
+            bIsKSpace = false;
+            bIsRef = false;
+            bIsOther = false;
+
+            if ( bIsNoise || bIsDummyScan )
+            {
+                return true;
+            }
+
+            if ( CalibMode==ISMRMRD_noacceleration )
+            {
+                bIsKSpace = true;
+                bIsRef = false;
+            }
+
+            // in interleaved mode, only store the image data
+            if ( CalibMode==ISMRMRD_interleaved )
+            {
+                bIsKSpace = true;
+                bIsRef = false;
+            }
+
+            // in embedded, kspace stores only the undersampled lines
+            // ref stores all lines used for references
+            if ( CalibMode==ISMRMRD_embedded )
+            {
+                if ( is_ref && !is_ref_kspace )
+                {
+                    bIsKSpace = false;
+                    bIsRef = true;
+                }
+
+                if ( !is_ref && is_ref_kspace )
+                {
+                    bIsKSpace = true;
+                    bIsRef = true;
+                }
+
+                if ( is_ref && is_ref_kspace )
+                {
+                    bIsKSpace = true;
+                    bIsRef = true;
+                }
+
+                if ( !is_ref && !is_ref_kspace )
+                {
+                    bIsKSpace = true;
+                    bIsRef = false;
+                }
+            }
+
+            // in separate mode
+            if ( CalibMode==ISMRMRD_separate 
+                || CalibMode==ISMRMRD_external )
+            {
+                if ( is_ref )
+                {
+                    bIsKSpace = false;
+                    bIsRef = true;
+                }
+
+                if ( !is_ref )
+                {
+                    bIsKSpace = true;
+                    bIsRef = false;
+                }
+            }
+
+            // store other data, e.g. AIF
+            // only for tpat
+            if ( !is_ref && !is_ref_kspace && (samples!=roLen) )
+            {
+                bIsOther = true;
+                bIsKSpace = false;
+                bIsRef = false;
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Error happened in checkReadoutStatus(...) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    bool estimateMaxSEGForRetroGating(Gadgetron::gtPlus::ISMRMRDCALIBMODE CalibMode, 
+        double acceFactorE1, double acceFactorE2, 
+        size_t retro_gated_segment_size, 
+        uint16_t E1, uint16_t embedded_ref_lines_E1, 
+        uint16_t E2, uint16_t embedded_ref_lines_E2, 
+        uint16_t& segment, bool verbose)
+    {
+        try
+        {
+            if ( acceFactorE2 <= 1 )
+            {
+                if ( CalibMode == ISMRMRD_embedded )
+                {
+                    segment = (uint16_t)std::ceil( (double)E1/acceFactorE1/retro_gated_segment_size 
+                        + (acceFactorE1-1)*(double)embedded_ref_lines_E1/acceFactorE1/retro_gated_segment_size );
+                }
+                else
+                {
+                    segment = (uint16_t)std::ceil( (double)E1/acceFactorE1/retro_gated_segment_size );
+                }
+            }
+            else
+            {
+                if ( CalibMode == ISMRMRD_embedded )
+                {
+                    segment = (uint16_t)std::ceil( (double)E1*E2/(acceFactorE1*acceFactorE2*retro_gated_segment_size) 
+                        + (acceFactorE1*acceFactorE2-1)*(double)(embedded_ref_lines_E1*embedded_ref_lines_E2)/(acceFactorE1*acceFactorE2*retro_gated_segment_size) );
+                }
+                else
+                {
+                    segment = (uint16_t)std::ceil( (double)E1*E2/(acceFactorE1*acceFactorE2*retro_gated_segment_size) );
+                }
+            }
+
+            if ( segment > 1 ) segment--;
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Error happened in estimateMaxSEGForRetroGating(...) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    void getDebugFolderPath(const std::string& debugFolder, std::string& debugFolderPath, bool verbose)
+    {
+      debugFolderPath = getenv("GADGETRON_DEBUG_FOLDER");
+      if ( debugFolderPath.empty() )
+      {
+#ifdef _WIN32
+            debugFolderPath = "c:/temp/gadgetron";
+#else
+            debugFolderPath = "/tmp/gadgetron";
+#endif // _WIN32
+        }
+
+        debugFolderPath.append("/");
+        debugFolderPath.append(debugFolder);
+        debugFolderPath.append("/");
+
+        createFolderWithAllPermissions(debugFolderPath);
+
+        GADGET_CONDITION_MSG(verbose, "Debug folder is " << debugFolderPath);
+    }
+
+    bool createFolderWithAllPermissions(const std::string& workingdirectory)
+    {
+        if ( !boost::filesystem::exists(workingdirectory) )
+        {
+            boost::filesystem::path workingPath(workingdirectory);
+            if ( !boost::filesystem::create_directory(workingPath) )
+            {
+                ACE_ERROR_RETURN((LM_ERROR, ACE_TEXT("Error creating the working directory.\n")), false);
+            }
+
+            // set the permission for the folder
+#ifdef _WIN32
+            try
+            {
+                boost::filesystem::permissions(workingPath, all_all);
+            }
+            catch(...)
+            {
+                ACE_ERROR_RETURN((LM_ERROR, ACE_TEXT("Error changing the permission of the working directory.\n")), false);
+            }
+#else
+            // in case an older version of boost is used in non-win system
+            // the system call is used
+            int res = chmod(workingPath.string().c_str(), S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IWGRP|S_IXGRP|S_IROTH|S_IWOTH|S_IXOTH);
+            if ( res != 0 )
+            {
+                ACE_ERROR_RETURN((LM_ERROR, ACE_TEXT("Error changing the permission of the working directory.\n")), false);
+            }
+#endif // _WIN32
+        }
+
+        return true;
+    }
+
+    bool getISMRMRMetaValues(const ISMRMRD::MetaContainer& attrib, const std::string& name, std::vector<long>& v)
+    {
+        try
+        {
+            size_t num = attrib.length(name.c_str());
+            if ( num == 0 )
+            {
+                v.clear();
+                GADGET_WARN_MSG("getISMRMRMetaValues, can not find field : " << name);
+                return true;
+            }
+
+            v.resize(num);
+
+            size_t ii;
+            for ( ii=0; ii<num; ii++ )
+            {
+                v[ii] = attrib.as_long(name.c_str(), ii);
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Error happened in getISMRMRMetaValues(const ISMRMRD::MetaContainer& attrib, const std::string& name, std::vector<long>& v) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    bool getISMRMRMetaValues(const ISMRMRD::MetaContainer& attrib, const std::string& name, std::vector<double>& v)
+    {
+        try
+        {
+            size_t num = attrib.length(name.c_str());
+            if ( num == 0 )
+            {
+                v.clear();
+                GADGET_WARN_MSG("getISMRMRMetaValues, can not find field : " << name);
+                return true;
+            }
+
+            v.resize(num);
+
+            size_t ii;
+            for ( ii=0; ii<num; ii++ )
+            {
+                v[ii] = attrib.as_double(name.c_str(), ii);
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Error happened in getISMRMRMetaValues(const ISMRMRD::MetaContainer& attrib, const std::string& name, std::vector<double>& v) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    bool getISMRMRMetaValues(const ISMRMRD::MetaContainer& attrib, const std::string& name, std::vector<std::string>& v)
+    {
+        try
+        {
+            size_t num = attrib.length(name.c_str());
+            if ( num == 0 )
+            {
+                v.clear();
+                GADGET_WARN_MSG("getISMRMRMetaValues, can not find field : " << name);
+                return true;
+            }
+
+            v.resize(num);
+
+            size_t ii;
+            for ( ii=0; ii<num; ii++ )
+            {
+                v[ii] = std::string( attrib.as_str(name.c_str(), ii) );
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Error happened in getISMRMRMetaValues(const ISMRMRD::MetaContainer& attrib, const std::string& name, std::vector<std::string>& v) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename T>
+    bool setISMRMRMetaValues(ISMRMRD::MetaContainer& attrib, const std::string& name, const std::vector<T>& v)
+    {
+        try
+        {
+            size_t num = v.size();
+            if ( num == 0 )
+            {
+                GADGET_WARN_MSG("setISMRMRMetaValues, input vector is empty ... " << name);
+                return true;
+            }
+
+            attrib.set(name.c_str(), v[0]);
+
+            size_t ii;
+            for ( ii=1; ii<v.size(); ii++ )
+            {
+                attrib.append(name.c_str(), v[ii]);
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Error happened in setISMRMRMetaValues(ISMRMRD::MetaContainer& attrib, const std::string& name, const std::vector<T>& v) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template EXPORTGTPLUSGADGET bool setISMRMRMetaValues(ISMRMRD::MetaContainer& attrib, const std::string& name, const std::vector<long>& v);
+    template EXPORTGTPLUSGADGET bool setISMRMRMetaValues(ISMRMRD::MetaContainer& attrib, const std::string& name, const std::vector<double>& v);
+
+    bool setISMRMRMetaValues(ISMRMRD::MetaContainer& attrib, const std::string& name, const std::vector<std::string>& v)
+    {
+        try
+        {
+            size_t num = v.size();
+            if ( num == 0 )
+            {
+                GADGET_WARN_MSG("setISMRMRMetaValues, input vector is empty ... " << name);
+                return true;
+            }
+
+            attrib.set(name.c_str(), v[0].c_str());
+
+            size_t ii;
+            for ( ii=1; ii<v.size(); ii++ )
+            {
+                attrib.append(name.c_str(), v[ii].c_str());
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Error happened in setISMRMRMetaValues(ISMRMRD::MetaContainer& attrib, const std::string& name, const std::vector<std::string>& v) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename T>
+    bool appendISMRMRMetaValues(ISMRMRD::MetaContainer& attrib, const std::string& name, const std::vector<T>& v)
+    {
+        try
+        {
+            size_t num = v.size();
+            if ( num == 0 )
+            {
+                GADGET_WARN_MSG("appendISMRMRMetaValues, input vector is empty ... " << name);
+                return true;
+            }
+
+            attrib.append(name.c_str(), v[0]);
+
+            size_t ii;
+            for ( ii=1; ii<v.size(); ii++ )
+            {
+                attrib.append(name.c_str(), v[ii]);
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Error happened in appendISMRMRMetaValues(ISMRMRD::MetaContainer& attrib, const std::string& name, const std::vector<T>& v) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template EXPORTGTPLUSGADGET bool appendISMRMRMetaValues(ISMRMRD::MetaContainer& attrib, const std::string& name, const std::vector<long>& v);
+    template EXPORTGTPLUSGADGET bool appendISMRMRMetaValues(ISMRMRD::MetaContainer& attrib, const std::string& name, const std::vector<double>& v);
+
+    bool appendISMRMRMetaValues(ISMRMRD::MetaContainer& attrib, const std::string& name, const std::vector<std::string>& v)
+    {
+        try
+        {
+            size_t num = v.size();
+            if ( num == 0 )
+            {
+                GADGET_WARN_MSG("appendISMRMRMetaValues, input vector is empty ... " << name);
+                return true;
+            }
+
+            attrib.append(name.c_str(), v[0].c_str());
+
+            size_t ii;
+            for ( ii=1; ii<v.size(); ii++ )
+            {
+                attrib.append(name.c_str(), v[ii].c_str());
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Error happened in appendISMRMRMetaValues(ISMRMRD::MetaContainer& attrib, const std::string& name, const std::vector<std::string>& v) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    bool PatientCoordinateSystemToDeviceCoordinateSystem(double& x, double& y, double& z, const std::string& position)
+    {
+        // this is following dicom tag (0020, 0037)
+
+        if ( position == "HFS" ) // Head-first supine (HFS)
+        {
+            y = -y;
+            z = -z;
+        }
+        else if ( position == "HFP" ) // Head-first prone (HFP)
+        {
+            x = -x;
+            z = -z;
+        }
+        else if ( position == "HFDR" ) // Head-first decubitus-right 
+        {
+            double v = x;
+            x = y;
+            y = v;
+            z = -z;
+        }
+        else if ( position == "HFDL" ) // Head-first decubitus-left (HFDL)
+        {
+            double v = x;
+            x = y;
+            y = v;
+
+            x = -x;
+            y = -y;
+            z = -z;
+        }
+        else if ( position == "FFDR" ) // Feet-first decubitus-right (FFDR)
+        {
+            double v = x;
+            x = y;
+            y = v;
+
+            x = -x;
+        }
+        else if ( position == "FFDL" ) // Feet-first decubitus-left (FFDL)
+        {
+            double v = x;
+            x = y;
+            y = v;
+
+            y = -y;
+        }
+        else if ( position == "FFP" ) // Feet-first prone (FFP)
+        {
+        }
+        else if ( position == "FFS" ) // Feet-first supine (FFS)
+        {
+            x = -x;
+            y = -y;
+        }
+        else 
+        {
+            GADGET_ERROR_MSG("Unknown position string :" << position);
+            return false;
+        }
+
+        return true;
+    }
+
+    bool DeviceCoordinateSystemToPatientCoordinateSystem(double& x, double& y, double& z, const std::string& position)
+    {
+        if ( position == "HFS" ) // Head-first supine (HFS)
+        {
+            y = -y;
+            z = -z;
+        }
+        else if ( position == "HFP" ) // Head-first prone (HFP)
+        {
+            x = -x;
+            z = -z;
+        }
+        else if ( position == "HFDR" ) // Head-first decubitus-right 
+        {
+            double v = x;
+            x = y;
+            y = v;
+            z = -z;
+        }
+        else if ( position == "HFDL" ) // Head-first decubitus-left (HFDL)
+        {
+            double v = x;
+            x = y;
+            y = v;
+
+            x = -x;
+            y = -y;
+            z = -z;
+        }
+        else if ( position == "FFDR" ) // Feet-first decubitus-right (FFDR)
+        {
+            double v = x;
+            x = y;
+            y = v;
+
+            y = -y;
+        }
+        else if ( position == "FFDL" ) // Feet-first decubitus-left (FFDL)
+        {
+            double v = x;
+            x = y;
+            y = v;
+
+            x = -x;
+        }
+        else if ( position == "FFP" ) // Feet-first prone (FFP)
+        {
+        }
+        else if ( position == "FFS" ) // Feet-first supine (FFS)
+        {
+            x = -x;
+            y = -y;
+        }
+        else 
+        {
+            GADGET_ERROR_MSG("Unknown position string :" << position);
+            return false;
+        }
+
+        return true;
+    }
+}
diff --git a/gadgets/gtPlus/GtPlusReconGadgetUtil.h b/gadgets/gtPlus/GtPlusReconGadgetUtil.h
new file mode 100644
index 0000000..199c5e4
--- /dev/null
+++ b/gadgets/gtPlus/GtPlusReconGadgetUtil.h
@@ -0,0 +1,76 @@
+/** \file   GtPlusReconGadgetUtil.h
+    \brief  Store some utilities functions for reconstruction
+    \author Hui Xue
+*/
+
+#pragma once
+
+#include <complex>
+#include "GtPlusGadgetExport.h"
+#include "hoNDArray.h"
+#include "GtPlusDefinition.h"
+#include "GadgetIsmrmrdReadWrite.h"
+
+#include "ismrmrd/ismrmrd.h"
+#include "ismrmrd/xml.h"
+#include "ismrmrd/meta.h"
+
+namespace Gadgetron
+{
+
+// [Ro E1 Cha Slice E2 Con Phase Rep Set Seg AVE]
+//   0  1  2   3    4   5    6     7  8   9  10
+
+    // find the calibration mode from protocol
+    bool EXPORTGTPLUSGADGET findCalibMode(ISMRMRD::IsmrmrdHeader& h, Gadgetron::gtPlus::ISMRMRDCALIBMODE& CalibMode, Gadgetron::gtPlus::ISMRMRDDIM& InterleaveDim, double& acceFactorE1, double& acceFactorE2, bool verbose=false);
+
+    // find the encoding limits from protocol
+    bool EXPORTGTPLUSGADGET findEncodingLimits(ISMRMRD::IsmrmrdHeader& h, ISMRMRD::EncodingCounters& meas_max_idx, bool verbose=false);
+
+    // find encoding matrix size and FOV
+    void EXPORTGTPLUSGADGET findMatrixSizeEncoding(ISMRMRD::IsmrmrdHeader& h, size_t matrix_size_encoding[3]);
+    void EXPORTGTPLUSGADGET findFOVEncoding(ISMRMRD::IsmrmrdHeader& h, float field_of_view_encoding[3]);
+
+    // find recon matrix size and FOV
+    void EXPORTGTPLUSGADGET findMatrixSizeRecon(ISMRMRD::IsmrmrdHeader& h, size_t matrix_size_recon[3]);
+    void EXPORTGTPLUSGADGET findFOVRecon(ISMRMRD::IsmrmrdHeader& h, float field_of_view_recon[3]);
+
+    // find the status of a readout line
+    bool EXPORTGTPLUSGADGET checkReadoutStatus(uint64_t flag, int samples, Gadgetron::gtPlus::ISMRMRDCALIBMODE& CalibMode, int roLen, 
+                        bool& bIsKSpace, bool& bIsRef, bool& bIsNoise, 
+                        bool& bIsPhaseCorr, bool& bIsReflect, bool& bIsOther, 
+                        bool& bIsNavigator, bool& bIsRTFeedback, bool& bIsHPFeedback, 
+                        bool& bIsDummyScan);
+
+    // estimate the max SEG for a segmented acquisition (number of total segments is segment+1)
+    // retro_gated_segment_size : number of readout lines acquired in one segment
+    // E1, embedded_ref_lines_E1: number of lines measured along E1 and number of reference lines for embedded mode
+    bool EXPORTGTPLUSGADGET estimateMaxSEGForRetroGating(Gadgetron::gtPlus::ISMRMRDCALIBMODE CalibMode, 
+                                                      double acceFactorE1, double acceFactorE2, 
+                                                      size_t retro_gated_segment_size, 
+                                                      uint16_t E1, uint16_t embedded_ref_lines_E1, 
+                                                      uint16_t E2, uint16_t embedded_ref_lines_E2, 
+                                                      uint16_t& segment, bool verbose=false);
+
+
+    // get debug folder full path
+    void EXPORTGTPLUSGADGET getDebugFolderPath(const std::string& debugFolder, std::string& debugFolderPath, bool verbose=false);
+
+    // create a folder with all permissions for all users
+    bool EXPORTGTPLUSGADGET createFolderWithAllPermissions(const std::string& workingdirectory);
+
+    // get a vector of values from ismrmrd meta
+    bool EXPORTGTPLUSGADGET getISMRMRMetaValues(const ISMRMRD::MetaContainer& attrib, const std::string& name, std::vector<long>& v);
+    bool EXPORTGTPLUSGADGET getISMRMRMetaValues(const ISMRMRD::MetaContainer& attrib, const std::string& name, std::vector<double>& v);
+    bool EXPORTGTPLUSGADGET getISMRMRMetaValues(const ISMRMRD::MetaContainer& attrib, const std::string& name, std::vector<std::string>& v);
+
+    template <typename T> EXPORTGTPLUSGADGET bool setISMRMRMetaValues(ISMRMRD::MetaContainer& attrib, const std::string& name, const std::vector<T>& v);
+    bool EXPORTGTPLUSGADGET setISMRMRMetaValues(ISMRMRD::MetaContainer& attrib, const std::string& name, const std::vector<std::string>& v);
+
+    template <typename T> EXPORTGTPLUSGADGET bool appendISMRMRMetaValues(ISMRMRD::MetaContainer& attrib, const std::string& name, const std::vector<T>& v);
+    bool EXPORTGTPLUSGADGET appendISMRMRMetaValues(ISMRMRD::MetaContainer& attrib, const std::string& name, const std::vector<std::string>& v);
+
+    // perform the patient to device coordinate transformation
+    bool EXPORTGTPLUSGADGET PatientCoordinateSystemToDeviceCoordinateSystem(double& x, double& y, double& z, const std::string& position);
+    bool EXPORTGTPLUSGADGET DeviceCoordinateSystemToPatientCoordinateSystem(double& x, double& y, double& z, const std::string& position);
+}
diff --git a/gadgets/gtPlus/GtPlusReconJob2DTGadget.cpp b/gadgets/gtPlus/GtPlusReconJob2DTGadget.cpp
index 298ac69..54f5508 100644
--- a/gadgets/gtPlus/GtPlusReconJob2DTGadget.cpp
+++ b/gadgets/gtPlus/GtPlusReconJob2DTGadget.cpp
@@ -22,7 +22,6 @@ GtPlusReconJob2DTGadget::GtPlusReconJob2DTGadget() : mem_manager_(new Gadgetron:
     process_config_called_ = false;
 
     Gadgetron::prepOpenMP();
-    Gadgetron::prepMKL();
 }
 
 GtPlusReconJob2DTGadget::~GtPlusReconJob2DTGadget()
@@ -67,7 +66,7 @@ int GtPlusReconJob2DTGadget::process_config(ACE_Message_Block* mb)
     // generate the destination folder
     if ( !debugFolder_.empty() )
     {
-        GADGET_CHECK_RETURN_FALSE(generateDebugFolderPath(debugFolder_, debugFolder_fullPath_));
+        Gadgetron::getDebugFolderPath(debugFolder_, debugFolder_fullPath_, verboseMode_);
     }
     else
     {
@@ -75,7 +74,7 @@ int GtPlusReconJob2DTGadget::process_config(ACE_Message_Block* mb)
     }
 
     GADGET_START_TIMING_CONDITION(gt_timer1_, "Pre-allocate memory ... ", performTiming_);
-    mem_manager_->increase(4.0*1024*1024*1024);
+    mem_manager_->increase( (size_t)(4.0*1024*1024*1024) );
     GADGET_STOP_TIMING_CONDITION(gt_timer1_, performTiming_);
 
     worker_grappa_.gtPlus_mem_manager_ = mem_manager_;
@@ -136,7 +135,7 @@ int GtPlusReconJob2DTGadget::process(Gadgetron::GadgetContainerMessage< int >* m
         std::ostringstream ostr;
         ostr << "ReconJob2DT_ID" << *jobID;
 
-        hoNDArray<GT_Complex8> res = job->res;
+        hoNDArray< std::complex<float> > res = job->res;
         res.squeeze();
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, res, ostr.str());
     }
@@ -206,29 +205,6 @@ sendOutJob(int jobID, GtPlusReconJobTypeCPFL* job)
     return true;
 }
 
-bool GtPlusReconJob2DTGadget::
-    generateDebugFolderPath(const std::string& debugFolder, std::string& debugFolderPath)
-{
-    debugFolderPath = ACE_OS::getenv("GADGETRON_HOME");
-    debugFolderPath.append("/");
-    debugFolderPath.append(debugFolder);
-    debugFolderPath.append("/");
-    GADGET_CONDITION_MSG(verboseMode_, "Debug folder is " << debugFolderPath);
-    return true;
-}
-
-void GtPlusReconJob2DTGadget::
-    getCurrentMoment(std::string& procTime)
-{
-    char timestamp[100];
-    time_t mytime;
-    struct tm *mytm;
-    mytime=time(NULL);
-    mytm=localtime(&mytime);
-    strftime(timestamp, sizeof(timestamp),"_%a_%d_%b_%Y_%H_%M_%S",mytm);
-    procTime = timestamp;
-}
-
 GADGET_FACTORY_DECLARE(GtPlusReconJob2DTGadget)
 
 }
diff --git a/gadgets/gtPlus/GtPlusReconJob2DTGadget.h b/gadgets/gtPlus/GtPlusReconJob2DTGadget.h
index 654e16e..905e033 100644
--- a/gadgets/gtPlus/GtPlusReconJob2DTGadget.h
+++ b/gadgets/gtPlus/GtPlusReconJob2DTGadget.h
@@ -7,8 +7,8 @@
             Ref to: 
 
             Hui Xue, Souheil Inati, Thomas Sangild Sorensen, Peter Kellman, Michael S. Hansen. 
-            Distributed MRI Reconstruction using Gadgetron based Cloud Computing. Submitted to
-            Magenetic Resonance in Medicine on Dec 2013.
+            Distributed MRI Reconstruction using Gadgetron based Cloud Computing. 
+            Magenetic Resonance in Medicine, doi: 10.1002/mrm.25213.
 
     \author Hui Xue
 */
@@ -22,6 +22,7 @@
 #include "GadgetCloudJobMessageReadWrite.h"
 #include "GadgetronTimer.h"
 
+#include "GadgetronCommon.h"
 #include "gtPlusIOAnalyze.h"
 #include "gtPlusISMRMRDReconUtil.h"
 #include "gtPlusISMRMRDReconWorkOrder.h"
@@ -31,6 +32,7 @@
 #include "gtPlusISMRMRDReconWorker2DTSPIRIT.h"
 #include "gtPlusISMRMRDReconWorker2DTL1SPIRITNCG.h"
 #include "gtPlusMemoryManager.h"
+#include "GtPlusReconGadgetUtil.h"
 
 #ifdef USE_OMP
     #include "omp.h"
@@ -63,17 +65,6 @@ public:
 protected:
 
     // --------------------------------------------------
-    // utility functions
-    // --------------------------------------------------
-
-    // generate the debug folder path
-    // debugFolderPath = ${GADGETRON_HOME}/debugFolder
-    virtual bool generateDebugFolderPath(const std::string& debugFolder, std::string& debugFolderPath);
-
-    // get the current moment
-    void getCurrentMoment(std::string& procTime);
-
-    // --------------------------------------------------
     // functional functions
     // --------------------------------------------------
 
diff --git a/gadgets/gtPlus/GtPlusReconJob2DTGadgetCloud.cpp b/gadgets/gtPlus/GtPlusReconJob2DTGadgetCloud.cpp
index 10b6220..a544f09 100644
--- a/gadgets/gtPlus/GtPlusReconJob2DTGadgetCloud.cpp
+++ b/gadgets/gtPlus/GtPlusReconJob2DTGadgetCloud.cpp
@@ -1,6 +1,7 @@
 
 #include "GtPlusReconJob2DTGadgetCloud.h"
 #include "GtPlusGadgetOpenMP.h"
+#include "gadgetron_paths.h"
 
 using namespace Gadgetron::gtPlus;
 
@@ -61,7 +62,6 @@ GtPlusReconJob2DTGadgetCloud::GtPlusReconJob2DTGadgetCloud() : mem_manager_(new
     process_config_called_ = false;
 
     Gadgetron::prepOpenMP();
-    Gadgetron::prepMKL();
 }
 
 GtPlusReconJob2DTGadgetCloud::~GtPlusReconJob2DTGadgetCloud()
@@ -243,7 +243,7 @@ int GtPlusReconJob2DTGadgetCloud::process_config(ACE_Message_Block* mb)
     // generate the destination folder
     if ( !debugFolder_.empty() )
     {
-        GADGET_CHECK_RETURN_FALSE(generateDebugFolderPath(debugFolder_, debugFolder_fullPath_));
+        getDebugFolderPath(debugFolder_, debugFolder_fullPath_, verboseMode_);
     }
     else
     {
@@ -252,7 +252,7 @@ int GtPlusReconJob2DTGadgetCloud::process_config(ACE_Message_Block* mb)
 
     if ( !debugFolder2_.empty() )
     {
-        GADGET_CHECK_RETURN_FALSE(generateDebugFolderPath(debugFolder2_, debugFolder2_fullPath_));
+        getDebugFolderPath(debugFolder2_, debugFolder2_fullPath_, verboseMode_);
     }
     else
     {
@@ -260,7 +260,7 @@ int GtPlusReconJob2DTGadgetCloud::process_config(ACE_Message_Block* mb)
     }
 
     GADGET_START_TIMING_CONDITION(gt_timer1_, "Pre-allocate memory ... ", performTiming_);
-    mem_manager_->increase(2.0*1024*1024*1024);
+    mem_manager_->increase( (size_t)(2.0*1024*1024*1024) );
     GADGET_STOP_TIMING_CONDITION(gt_timer1_, performTiming_);
 
     worker_grappa_.gtPlus_mem_manager_ = mem_manager_;
@@ -314,7 +314,7 @@ bool GtPlusReconJob2DTGadgetCloud::setWorkOrder2DTParameters(GtPlusRecon2DTPara&
 
 bool GtPlusReconJob2DTGadgetCloud::parseGTCloudNodeFile(const std::string& filename, CloudType& gtCloud)
 {
-    std::string nodeFileName = ACE_OS::getenv("GADGETRON_HOME");
+    std::string nodeFileName = get_gadgetron_home();
     nodeFileName.append("/config/gtCloud/");
     nodeFileName.append(filename);
     GADGET_CONDITION_MSG(verboseMode_, "Cloud node file name is " << nodeFileName);
@@ -381,7 +381,7 @@ int GtPlusReconJob2DTGadgetCloud::process(Gadgetron::GadgetContainerMessage< int
             if ( parseSuccess )
             {
                 CloudComputing_ = true;
-                CloudSize_ = gt_cloud_.size();
+                CloudSize_ = (int)gt_cloud_.size();
 
                 if ( CloudSize_ == 0 )
                 {
@@ -441,7 +441,17 @@ int GtPlusReconJob2DTGadgetCloud::process(Gadgetron::GadgetContainerMessage< int
     workOrder.CloudSize_ = CloudSize_;
     workOrder.gt_cloud_ = gt_cloud_;
 
-    workOrder.data_ = job->kspace;
+    if ( workOrder.acceFactorE1_>1 && workOrder.CalibMode_==Gadgetron::gtPlus::ISMRMRD_interleaved )
+    {
+        Gadgetron::fillSampledLinesUpTo11DArray(job->kspace, workOrder.data_, job->timeStamp);
+    }
+    else
+    {
+        workOrder.data_ = job->kspace;
+    }
+
+    workOrder.time_stamp_ = job->timeStamp;
+    workOrder.physio_time_stamp_ = job->physioTimeStamp;
     workOrder.ref_ = job->ref;
 
     // ---------------------------------------------------------
@@ -461,24 +471,6 @@ int GtPlusReconJob2DTGadgetCloud::process(Gadgetron::GadgetContainerMessage< int
 
     if ( !debugFolder_fullPath_.empty() ) workflow_.debugFolder_ = debugFolder_fullPath_;
 
-    // set the worker
-    worker_grappa_.performTiming_ = performTiming_;
-    if ( !debugFolder_fullPath_.empty() ) worker_grappa_.debugFolder_ = debugFolder_fullPath_;
-
-    worker_noacceleration_.performTiming_ = performTiming_;
-    if ( !debugFolder_fullPath_.empty() ) worker_noacceleration_.debugFolder_ = debugFolder_fullPath_;
-
-    worker_spirit_.performTiming_ = performTiming_;
-    if ( !debugFolder_fullPath_.empty() ) worker_spirit_.debugFolder_ = debugFolder_fullPath_;
-
-    worker_spirit_L1_ncg_.performTiming_ = performTiming_;
-    if ( !debugFolder_fullPath_.empty() ) worker_spirit_L1_ncg_.debugFolder_ = debugFolder_fullPath_;
-
-    if ( verboseMode_ )
-    {
-        workOrder.print(std::cout);
-    }
-
     // perform the recon
     GADGET_START_TIMING_CONDITION(gt_timer1_, "Recon 2DT workorder on cloud node ... ", performTiming_);
 
@@ -488,12 +480,13 @@ int GtPlusReconJob2DTGadgetCloud::process(Gadgetron::GadgetContainerMessage< int
     setWorkOrder2DTParameters(para, &workOrder_recon_);
 
     workflow_.workOrder_ = &workOrder_recon_;
+
     if ( verboseMode_ )
     {
         workflow_.workOrder_->print(std::cout);
     }
 
-    workflow_.setDataArray(workOrder.data_);
+    workflow_.setDataArray(workOrder.data_, workOrder.time_stamp_, workOrder.physio_time_stamp_);
 
     if ( workOrder.ref_.get_number_of_elements() > 0 )
     {
@@ -537,6 +530,11 @@ int GtPlusReconJob2DTGadgetCloud::process(Gadgetron::GadgetContainerMessage< int
         }
     }
 
+    if ( !succeed )
+    {
+        GADGET_ERROR_MSG("GtPlusReconJob2DTGadgetCloud::process(...) failed... ");
+    }
+
     GADGET_STOP_TIMING_CONDITION(gt_timer1_, performTiming_);
 
     if ( !debugFolder2_fullPath_.empty() )
@@ -544,22 +542,43 @@ int GtPlusReconJob2DTGadgetCloud::process(Gadgetron::GadgetContainerMessage< int
         std::ostringstream ostr;
         ostr << "Node_Recon2DT_" << *jobID;
 
-        hoNDArray<GT_Complex8> res = workflow_.res_;
+        hoNDArray< std::complex<float> > res = workflow_.res_;
         res.squeeze();
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder2_fullPath_, gt_exporter_, res, ostr.str());
+
+        if ( workflow_.res_second_.get_number_of_elements() > 0 )
+        {
+            hoNDArray< std::complex<float> > res = workflow_.res_second_;
+            res.squeeze();
+
+            std::ostringstream ostr;
+            ostr << "Node_Recon2DT_second_" << *jobID;
+
+            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder2_fullPath_, gt_exporter_, res, ostr.str());
+        }
     }
 
     // clean the kspace and ker and coil map
     job->kspace.clear();
+    job->timeStamp.clear();
+    job->physioTimeStamp.clear();
+    job->ref.clear();
 
     if ( succeed )
     {
         job->complexIm = workflow_.res_;
+        job->complexImSecond = workflow_.res_second_;
+        job->resTimeStampSecond = workflow_.res_time_stamp_second_;
+        job->resPhysioTimeStampSecond = workflow_.res_physio_time_stamp_second_;
     }
     else
     {
         job->complexIm.clear();
         job->res.clear();
+
+        job->complexImSecond.clear();
+        job->resTimeStampSecond.clear();
+        job->resPhysioTimeStampSecond.clear();
     }
 
     // send out the results
@@ -569,6 +588,8 @@ int GtPlusReconJob2DTGadgetCloud::process(Gadgetron::GadgetContainerMessage< int
 
     // reset the status
     workflow_.data_ = NULL;
+    workflow_.time_stamp_ = NULL;
+    workflow_.physio_time_stamp_ = NULL;
     workflow_.ref_ = NULL;
     workflow_.noise_ = NULL;
     workflow_.workOrder_ = NULL;
@@ -576,6 +597,16 @@ int GtPlusReconJob2DTGadgetCloud::process(Gadgetron::GadgetContainerMessage< int
 
     m1->release();
 
+    if ( this->verboseMode_ )
+    {
+        std::string procTime;
+        gtPlus_util_.getCurrentMoment(procTime);
+
+        GADGET_MSG("* ============================================================================== *");
+        GADGET_MSG("---> MR recon 2DT gadget cloud, Currnt processing time : " << procTime << " <---");
+        GADGET_MSG("* ============================================================================== *");
+    }
+
     return GADGET_OK;
 }
 
@@ -624,29 +655,6 @@ sendOutJob(int jobID, GtPlusRecon2DTCloudPackageCPFL* job)
 }
 
 bool GtPlusReconJob2DTGadgetCloud::
-    generateDebugFolderPath(const std::string& debugFolder, std::string& debugFolderPath)
-{
-    debugFolderPath = ACE_OS::getenv("GADGETRON_HOME");
-    debugFolderPath.append("/");
-    debugFolderPath.append(debugFolder);
-    debugFolderPath.append("/");
-    GADGET_CONDITION_MSG(verboseMode_, "Debug folder is " << debugFolderPath);
-    return true;
-}
-
-void GtPlusReconJob2DTGadgetCloud::
-    getCurrentMoment(std::string& procTime)
-{
-    char timestamp[100];
-    time_t mytime;
-    struct tm *mytm;
-    mytime=time(NULL);
-    mytm=localtime(&mytime);
-    strftime(timestamp, sizeof(timestamp),"_%a_%d_%b_%Y_%H_%M_%S",mytm);
-    procTime = timestamp;
-}
-
-bool GtPlusReconJob2DTGadgetCloud::
 generateKSpaceFilter(WorkOrderType& workOrder)
 {
     try
@@ -670,21 +678,21 @@ generateKSpaceFilter(WorkOrderType& workOrder)
         if ( RO>1 && filterRO_type_ != ISMRMRD_FILTER_NONE )
         {
             workOrder.filterRO_.create(RO);
-            GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilter(RO, workOrder.filterRO_, filterRO_type_, filterRO_sigma_, std::ceil(filterRO_width_*RO)));
+            GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilter(RO, workOrder.start_RO_, workOrder.end_RO_, workOrder.filterRO_, filterRO_type_, filterRO_sigma_, (size_t)std::ceil(filterRO_width_*RO)));
             GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, workOrder.filterRO_, "filterRO");
         }
 
         if ( E1>1 && filterE1_type_ != ISMRMRD_FILTER_NONE )
         {
             workOrder.filterE1_.create(E1);
-            GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilter(E1, workOrder.filterE1_, filterE1_type_, filterE1_sigma_, std::ceil(filterE1_width_*E1)));
+            GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilter(E1, workOrder.start_E1_, workOrder.end_E1_, workOrder.filterE1_, filterE1_type_, filterE1_sigma_, (size_t)std::ceil(filterE1_width_*E1)));
             GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, workOrder.filterE1_, "filterE1");
         }
 
         if ( E2>1 && filterE2_type_ != ISMRMRD_FILTER_NONE )
         {
             workOrder.filterE2_.create(E2);
-            GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilter(E2, workOrder.filterE2_, filterE2_type_, filterE2_sigma_, std::ceil(filterE2_width_*E2)));
+            GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilter(E2, workOrder.start_E2_, workOrder.end_E2_, workOrder.filterE2_, filterE2_type_, filterE2_sigma_, (size_t)std::ceil(filterE2_width_*E2)));
             GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, workOrder.filterE2_, "filterE2");
         }
 
@@ -711,7 +719,7 @@ generateKSpaceFilter(WorkOrderType& workOrder)
             if ( RO_ref > 1 && filterRO_ref_type_ != ISMRMRD_FILTER_NONE )
             {
                 workOrder.filterRO_ref_.create(RO_ref);
-                GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilterForRef(RO_ref, startRO, endRO, workOrder.filterRO_ref_, filterRO_ref_type_, filterRO_ref_sigma_, std::ceil(filterRO_ref_width_*RO_ref)));
+                GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilterForRef(RO_ref, startRO, endRO, workOrder.filterRO_ref_, filterRO_ref_type_, filterRO_ref_sigma_, (size_t)std::ceil(filterRO_ref_width_*RO_ref)));
                 GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, workOrder.filterRO_ref_, "filterRO_ref");
             }
 
@@ -721,7 +729,7 @@ generateKSpaceFilter(WorkOrderType& workOrder)
                 {
                     size_t len = endE1-startE1+1;
                     workOrder.filterE1_ref_.create(len);
-                    GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilter(len, workOrder.filterE1_ref_, filterE1_ref_type_, filterE1_ref_sigma_, std::ceil(filterE1_ref_width_*len)));
+                    GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilter(len, 0, len-1, workOrder.filterE1_ref_, filterE1_ref_type_, filterE1_ref_sigma_, (size_t)std::ceil(filterE1_ref_width_*len)));
                     GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, workOrder.filterE1_ref_, "filterE1_ref");
                 }
 
@@ -729,7 +737,7 @@ generateKSpaceFilter(WorkOrderType& workOrder)
                 {
                     size_t len = endE2-startE2+1;
                     workOrder.filterE2_ref_.create(len);
-                    GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilter(len, workOrder.filterE2_ref_, filterE2_ref_type_, filterE2_ref_sigma_, std::ceil(filterE2_ref_width_*len)));
+                    GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilter(len, 0, len-1, workOrder.filterE2_ref_, filterE2_ref_type_, filterE2_ref_sigma_, (size_t)std::ceil(filterE2_ref_width_*len)));
                     GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, workOrder.filterE2_ref_, "filterE2_ref");
                 }
             }
@@ -740,7 +748,7 @@ generateKSpaceFilter(WorkOrderType& workOrder)
                 {
                     size_t len = E1_ref;
                     workOrder.filterE1_ref_.create(len);
-                    GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilterForRef(len, startE1, endE1, workOrder.filterE1_ref_, filterE1_ref_type_, filterE1_ref_sigma_, std::ceil(filterE1_ref_width_*len)));
+                    GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilterForRef(len, startE1, endE1, workOrder.filterE1_ref_, filterE1_ref_type_, filterE1_ref_sigma_, (size_t)std::ceil(filterE1_ref_width_*len)));
                     GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, workOrder.filterE1_ref_, "filterE1_ref");
                 }
 
@@ -748,7 +756,7 @@ generateKSpaceFilter(WorkOrderType& workOrder)
                 {
                     size_t len = E2_ref;
                     workOrder.filterE2_ref_.create(len);
-                    GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilterForRef(len, startE2, endE2, workOrder.filterE2_ref_, filterE2_ref_type_, filterE2_ref_sigma_, std::ceil(filterE2_ref_width_*len)));
+                    GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilterForRef(len, startE2, endE2, workOrder.filterE2_ref_, filterE2_ref_type_, filterE2_ref_sigma_, (size_t)std::ceil(filterE2_ref_width_*len)));
                     GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, workOrder.filterE2_ref_, "filterE2_ref");
                 }
             }
@@ -758,21 +766,21 @@ generateKSpaceFilter(WorkOrderType& workOrder)
         if ( RO>1 && workOrder.start_RO_>=0 && workOrder.end_RO_>0 )
         {
             workOrder.filterRO_partialfourier_.create(RO);
-            GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateAsymmetricFilter(RO, workOrder.start_RO_, workOrder.end_RO_, workOrder.filterRO_partialfourier_, filterRO_pf_type_, std::ceil(filterRO_pf_width_*RO), filterRO_pf_densityComp_));
+            GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateAsymmetricFilter(RO, workOrder.start_RO_, workOrder.end_RO_, workOrder.filterRO_partialfourier_, filterRO_pf_type_, (size_t)std::ceil(filterRO_pf_width_*RO), filterRO_pf_densityComp_));
             GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, workOrder.filterRO_partialfourier_, "filterRO_partialfourier");
         }
 
         if ( E1>1 && workOrder.start_E1_>=0 && workOrder.end_E1_>0 )
         {
             workOrder.filterE1_partialfourier_.create(E1);
-            GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateAsymmetricFilter(E1, workOrder.start_E1_, workOrder.end_E1_, workOrder.filterE1_partialfourier_, filterE1_pf_type_, std::ceil(filterE1_pf_width_*E1), filterE1_pf_densityComp_));
+            GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateAsymmetricFilter(E1, workOrder.start_E1_, workOrder.end_E1_, workOrder.filterE1_partialfourier_, filterE1_pf_type_, (size_t)std::ceil(filterE1_pf_width_*E1), filterE1_pf_densityComp_));
             GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, workOrder.filterE1_partialfourier_, "filterE1_partialfourier");
         }
 
         if ( E2>1 && workOrder.start_E2_>=0 && workOrder.end_E2_>0 )
         {
             workOrder.filterE2_partialfourier_.create(E2);
-            GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateAsymmetricFilter(E2, workOrder.start_E2_, workOrder.end_E2_, workOrder.filterE2_partialfourier_, filterE2_pf_type_, std::ceil(filterE2_pf_width_*E2), filterE2_pf_densityComp_));
+            GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateAsymmetricFilter(E2, workOrder.start_E2_, workOrder.end_E2_, workOrder.filterE2_partialfourier_, filterE2_pf_type_, (size_t)std::ceil(filterE2_pf_width_*E2), filterE2_pf_densityComp_));
             GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_fullPath_, gt_exporter_, workOrder.filterE2_partialfourier_, "filterE2_partialfourier");
         }
     }
diff --git a/gadgets/gtPlus/GtPlusReconJob2DTGadgetCloud.h b/gadgets/gtPlus/GtPlusReconJob2DTGadgetCloud.h
index 59fe9e7..a725865 100644
--- a/gadgets/gtPlus/GtPlusReconJob2DTGadgetCloud.h
+++ b/gadgets/gtPlus/GtPlusReconJob2DTGadgetCloud.h
@@ -4,8 +4,8 @@
             Ref to: 
 
             Hui Xue, Souheil Inati, Thomas Sangild Sorensen, Peter Kellman, Michael S. Hansen. 
-            Distributed MRI Reconstruction using Gadgetron based Cloud Computing. Submitted to
-            Magenetic Resonance in Medicine on Dec 2013.
+            Distributed MRI Reconstruction using Gadgetron based Cloud Computing. 
+            Magenetic Resonance in Medicine, doi: 10.1002/mrm.25213.
 
     \author Hui Xue
 */
@@ -21,6 +21,7 @@
 
 #include "hoNDArray_utils.h"
 
+#include "GadgetronCommon.h"
 #include "gtPlusIOAnalyze.h"
 #include "gtPlusISMRMRDReconUtil.h"
 #include "gtPlusISMRMRDReconWorkOrder.h"
@@ -32,6 +33,7 @@
 #include "gtPlusMemoryManager.h"
 
 #include "GtPlusRecon2DTCloudPackage.h"
+#include "GtPlusReconGadgetUtil.h"
 
 #ifdef USE_OMP
     #include "omp.h"
@@ -127,17 +129,6 @@ public:
 protected:
 
     // --------------------------------------------------
-    // utility functions
-    // --------------------------------------------------
-
-    // generate the debug folder path
-    // debugFolderPath = ${GADGETRON_HOME}/debugFolder
-    virtual bool generateDebugFolderPath(const std::string& debugFolder, std::string& debugFolderPath);
-
-    // get the current moment
-    void getCurrentMoment(std::string& procTime);
-
-    // --------------------------------------------------
     // functional functions
     // --------------------------------------------------
 
diff --git a/gadgets/gtPlus/GtPlusReconJob3DTGadget.cpp b/gadgets/gtPlus/GtPlusReconJob3DTGadget.cpp
index 3cf5a98..b8a26b6 100644
--- a/gadgets/gtPlus/GtPlusReconJob3DTGadget.cpp
+++ b/gadgets/gtPlus/GtPlusReconJob3DTGadget.cpp
@@ -22,7 +22,6 @@ GtPlusReconJob3DTGadget::GtPlusReconJob3DTGadget() : mem_manager_(new Gadgetron:
     process_config_called_ = false;
 
     Gadgetron::prepOpenMP();
-    Gadgetron::prepMKL();
 }
 
 GtPlusReconJob3DTGadget::~GtPlusReconJob3DTGadget()
@@ -71,7 +70,7 @@ int GtPlusReconJob3DTGadget::process_config(ACE_Message_Block* mb)
     // generate the destination folder
     if ( !debugFolder_.empty() )
     {
-        GADGET_CHECK_RETURN_FALSE(generateDebugFolderPath(debugFolder_, debugFolder_fullPath_));
+        getDebugFolderPath(debugFolder_, debugFolder_fullPath_, verboseMode_);
     }
     else
     {
@@ -80,7 +79,7 @@ int GtPlusReconJob3DTGadget::process_config(ACE_Message_Block* mb)
 
     if ( !debugFolder2_.empty() )
     {
-        GADGET_CHECK_RETURN_FALSE(generateDebugFolderPath(debugFolder2_, debugFolder2_fullPath_));
+        getDebugFolderPath(debugFolder2_, debugFolder2_fullPath_, verboseMode_);
     }
     else
     {
@@ -88,7 +87,7 @@ int GtPlusReconJob3DTGadget::process_config(ACE_Message_Block* mb)
     }
 
     GADGET_START_TIMING_CONDITION(gt_timer1_, "Pre-allocate memory ... ", performTiming_);
-    mem_manager_->increase(6.0*1024*1024*1024);
+    mem_manager_->increase( (size_t)(6.0*1024*1024*1024) );
     GADGET_STOP_TIMING_CONDITION(gt_timer1_, performTiming_);
 
     worker_grappa_.gtPlus_mem_manager_ = mem_manager_;
@@ -149,7 +148,7 @@ int GtPlusReconJob3DTGadget::process(Gadgetron::GadgetContainerMessage< int >* m
         std::ostringstream ostr;
         ostr << "ReconJob2DT_ID" << *jobID;
 
-        hoNDArray<GT_Complex8> res = job->res;
+        hoNDArray< std::complex<float> > res = job->res;
         res.squeeze();
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder2_fullPath_, gt_exporter_, res, ostr.str());
 
@@ -234,29 +233,6 @@ sendOutJob(int jobID, GtPlusReconJobTypeCPFL* job)
     return true;
 }
 
-bool GtPlusReconJob3DTGadget::
-    generateDebugFolderPath(const std::string& debugFolder, std::string& debugFolderPath)
-{
-    debugFolderPath = ACE_OS::getenv("GADGETRON_HOME");
-    debugFolderPath.append("/");
-    debugFolderPath.append(debugFolder);
-    debugFolderPath.append("/");
-    GADGET_CONDITION_MSG(verboseMode_, "Debug folder is " << debugFolderPath);
-    return true;
-}
-
-void GtPlusReconJob3DTGadget::
-    getCurrentMoment(std::string& procTime)
-{
-    char timestamp[100];
-    time_t mytime;
-    struct tm *mytm;
-    mytime=time(NULL);
-    mytm=localtime(&mytime);
-    strftime(timestamp, sizeof(timestamp),"_%a_%d_%b_%Y_%H_%M_%S",mytm);
-    procTime = timestamp;
-}
-
 GADGET_FACTORY_DECLARE(GtPlusReconJob3DTGadget)
 
 }
diff --git a/gadgets/gtPlus/GtPlusReconJob3DTGadget.h b/gadgets/gtPlus/GtPlusReconJob3DTGadget.h
index 947f9f4..d2ddb4a 100644
--- a/gadgets/gtPlus/GtPlusReconJob3DTGadget.h
+++ b/gadgets/gtPlus/GtPlusReconJob3DTGadget.h
@@ -4,8 +4,8 @@
             Ref to: 
 
             Hui Xue, Souheil Inati, Thomas Sangild Sorensen, Peter Kellman, Michael S. Hansen. 
-            Distributed MRI Reconstruction using Gadgetron based Cloud Computing. Submitted to
-            Magenetic Resonance in Medicine on Dec 2013.
+            Distributed MRI Reconstruction using Gadgetron based Cloud Computing. 
+            Magenetic Resonance in Medicine, doi: 10.1002/mrm.25213.
 
     \author Hui Xue
 */
@@ -19,6 +19,7 @@
 #include "GadgetCloudJobMessageReadWrite.h"
 #include "GadgetronTimer.h"
 
+#include "GadgetronCommon.h"
 #include "gtPlusIOAnalyze.h"
 #include "gtPlusISMRMRDReconUtil.h"
 #include "gtPlusISMRMRDReconWorkOrder.h"
@@ -28,6 +29,7 @@
 #include "gtPlusISMRMRDReconWorker3DTSPIRIT.h"
 #include "gtPlusISMRMRDReconWorker3DTL1SPIRITNCG.h"
 #include "gtPlusMemoryManager.h"
+#include "GtPlusReconGadgetUtil.h"
 
 #ifdef USE_OMP
     #include "omp.h"
@@ -63,17 +65,6 @@ public:
 protected:
 
     // --------------------------------------------------
-    // utility functions
-    // --------------------------------------------------
-
-    // generate the debug folder path
-    // debugFolderPath = ${GADGETRON_HOME}/debugFolder
-    virtual bool generateDebugFolderPath(const std::string& debugFolder, std::string& debugFolderPath);
-
-    // get the current moment
-    void getCurrentMoment(std::string& procTime);
-
-    // --------------------------------------------------
     // functional functions
     // --------------------------------------------------
 
diff --git a/gadgets/gtPlus/config/GT_2DT_Cartesian.xml b/gadgets/gtPlus/config/GT_2DT_Cartesian.xml
new file mode 100644
index 0000000..23abb4e
--- /dev/null
+++ b/gadgets/gtPlus/config/GT_2DT_Cartesian.xml
@@ -0,0 +1,850 @@
+<?xml version="1.0" encoding="utf-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+    <!--
+       _____              _____     _____   ______   _______   _____     ____    _   _ 
+      / ____|     /\     |  __ \   / ____| |  ____| |__   __| |  __ \   / __ \  | \ | |
+     | |  __     /  \    | |  | | | |  __  | |__       | |    | |__) | | |  | | |  \| |
+     | | |_ |   / /\ \   | |  | | | | |_ | |  __|      | |    |  _  /  | |  | | | . ` |
+     | |__| |  / ____ \  | |__| | | |__| | | |____     | |    | | \ \  | |__| | | |\  |
+      \_____| /_/    \_\ |_____/   \_____| |______|    |_|    |_|  \_\  \____/  |_| \_|
+                                                                                       
+    -->
+
+    <!-- 
+        GT Plus configuratin file for general 2D or 2D+T cartesian reconstruction
+
+        Author: Hui Xue
+        Magnetic Resonance Technology Program
+        National Heart, Lung and Blood Institute
+        National Institutes of Health
+        10 Center Drive, Bethesda
+        MD 20814
+        USA
+        Email: hui.xue at nih.gov
+    -->
+
+    <!-- reader -->
+    <reader>
+        <slot>1008</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+    </reader>
+
+    <!-- writer -->
+    <writer>
+        <slot>1004</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1005</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1006</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterUSHORT</classname>
+    </writer>
+
+    <writer>
+        <slot>1015</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1016</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1017</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterUSHORT</classname>
+    </writer>
+
+    <!-- RO asymmetric echo handling -->
+    <gadget>
+        <name>AsymmetricEcho</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>AsymmetricEchoAdjustROGadget</classname>
+    </gadget>
+
+    <!-- RO oversampling removal -->
+    <gadget>
+        <name>RemoveROOversampling</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>RemoveROOversamplingGadget</classname>
+
+        <property>
+            <name>constant_noise_variance</name>
+            <value>true</value>
+        </property>
+    </gadget>
+
+    <!-- Noise prewhitening -->
+    <gadget>
+        <name>NoiseAdjust</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>NoiseAdjustGadget</classname>
+
+        <property>
+            <name>performTiming</name>
+            <value>false</value>
+        </property>
+    </gadget>
+
+    <!-- Data accumulation and trigger gadget -->
+    <gadget>
+        <name>Acc</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
+
+        <!-- debug and info mode -->
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>noacceleration_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>noacceleration_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>noacceleration_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>interleaved_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>interleaved_numOfKSpace_triggerDim1</name>
+            <value>4</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
+        <property>
+            <name>other_kspace_matching_Dim</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+    </gadget>
+
+    <!--Recon computation for 2DT cases -->
+    <gadget>
+        <name>Recon</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusRecon2DTGadget</classname>
+
+        <!-- kspace data -->
+        <property>
+            <name>dim_4th</name>
+            <value>DIM_NONE</value>
+        </property>
+        <property>
+            <name>dim_5th</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- work flow -->
+        <property>
+            <name>workOrder_ShareDim</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>no_acceleration_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>no_acceleration_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>interleaved_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>interleaved_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap_useHighestSignal</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>embedded_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_ref_fillback</name>
+            <value>true</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>separate_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- coil compression -->
+        <property>
+            <name>same_coil_compression_coeff_allS</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>downstream_coil_compression</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>coil_compression_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>coil_compression_num_modesKept</name>
+            <value>8</value>
+        </property>
+
+        <!-- parameters for coil map estimation 
+            enum ISMRMRDCOILMAPALGO
+            {
+                ISMRMRD_SOUHEIL,
+                ISMRMRD_SOUHEIL_ITER
+            };
+        -->
+        <property>
+            <name>coil_map_algorithm</name>
+            <value>ISMRMRD_SOUHEIL</value>
+        </property>
+        <property>
+            <name>csm_kSize</name>
+            <value>7</value>
+        </property>
+
+        <property>
+            <name>csm_powermethod_num</name>
+            <value>3</value>
+        </property>
+
+        <property>
+            <name>csm_true_3D</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>csm_iter_num</name>
+            <value>5</value>
+        </property>
+
+        <property>
+            <name>csm_iter_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>csm_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- algorithm -->
+        <property>
+            <name>recon_algorithm</name>
+            <value>ISMRMRD_GRAPPA</value>
+        </property>
+
+        <property>
+            <name>recon_kspace_needed</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>recon_auto_parameters</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>gfactor_needed</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>wrap_around_map_needed</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_GRAPPA -->
+        <property>
+            <name>grappa_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E1</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E2</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_reg_lamda</name>
+            <value>0.0005</value>
+        </property>
+        <property>
+            <name>grappa_calib_over_determine_ratio</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>grappa_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_SPIRIT -->
+        <property>
+            <name>spirit_kSize_RO</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E1</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>spirit_reg_lamda</name>
+            <value>0.005</value>
+        </property>
+        <property>
+            <name>spirit_use_gpu</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_calib_over_determine_ratio</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>spirit_solve_symmetric</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_iter_max</name>
+            <value>90</value>
+        </property>
+        <property>
+            <name>spirit_iter_thres</name>
+            <value>0.0015</value>
+        </property>
+        <property>
+            <name>spirit_print_iter</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_L1SPIRIT -->
+        <property>
+            <name>spirit_perform_linear</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_perform_nonlinear</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_parallel_imaging_lamda</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_image_reg_lamda</name>
+            <value>0.0025</value>
+        </property>
+        <property>
+            <name>spirit_data_fidelity_lamda</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>spirit_ncg_iter_max</name>
+            <value>10</value>
+        </property>
+        <property>
+            <name>spirit_ncg_iter_thres</name>
+            <value>0.0001</value>
+        </property>
+        <property>
+            <name>spirit_ncg_print_iter</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_use_coil_sen_map</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_use_moco_enhancement</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_recon_moco_images</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_RO_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_E1_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_E2_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_temporal_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_2D_scale_per_chunk</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for scaling and image sending -->
+        <property>
+            <name>min_intensity_value</name>
+            <value>64</value>
+        </property>
+
+        <property>
+            <name>max_intensity_value</name>
+            <value>4095</value>
+        </property>
+
+        <property>
+            <name>scalingFactor</name>
+            <value>-1.0</value>
+        </property>
+
+        <property>
+            <name>use_constant_scalingFactor</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for kspace filter, image data -->
+        <property>
+            <name>filterRO</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterRO_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE1</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE1_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE2</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE2_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, ref data -->
+        <property>
+            <name>filterRefRO</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE1</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE2</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
+        <property>
+            <name>filterPartialFourierRO</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE1</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE2</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_HOMODYNE, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
+        <property>
+            <name>partialFourier_algo</name>
+            <value>ISMRMRD_PF_FENGHUANG</value>
+        </property>
+
+        <!-- parameters for partial fourier homodyne algorithm -->
+        <property>
+            <name>partialFourier_homodyne_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_densityComp</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for partial fourier POCS algorithm -->
+        <property>
+            <name>partialFourier_POCS_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for partial fourier FengHuang algorithm -->
+        <property>
+            <name>partialFourier_FengHuang_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E1</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_thresReg</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_sameKernel_allN</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for debug and timing -->
+        <property>
+            <name>debugFolder</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>debugFolder2</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>cloudNodeFile</name>
+            <value>myCloud_2DT.txt</value>
+        </property>
+
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for system acquisition -->
+        <property>
+            <name>timeStampResolution</name>
+            <value>0.0025</value>
+        </property>
+
+        <!-- parameters for recon job split -->
+        <property>
+            <name>job_split_by_S</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>job_num_of_N</name>
+            <value>32</value>
+        </property>
+        <property>
+            <name>job_max_Megabytes</name>
+            <value>10240</value>
+        </property>
+        <property>
+            <name>job_overlap</name>
+            <value>2</value>
+        </property>
+        <property>
+            <name>job_perform_on_control_node</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for the cloud computation 
+             The cloud should be defined as the following: CloudNodeX_IP/Port/XMLConfiguration etc.
+        -->
+        <property>
+            <name>CloudComputing</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>CloudSize</name>
+            <value>1</value>
+        </property>
+
+        <!-- node 0 -->
+        <property>
+            <name>CloudNode0_IP</name>
+            <value>localhost</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_Port</name>
+            <value>9003</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_XMLConfiguration</name>
+            <value>GtProg_2DT_Cartesian_CloudNode.xml</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_ComputingPowerIndex</name>
+            <value>1</value>
+        </property>
+
+    </gadget>
+
+    <!-- after recon processing -->
+    <gadget>
+        <name>ComplexToFloatAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ComplexToFloatAttribGadget</classname>
+    </gadget>
+
+    <gadget>
+        <name>FloatToShortAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>FloatToUShortAttribGadget</classname>
+    </gadget>
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribCPLX</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetCPLX</classname>
+    </gadget>
+    -->
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribFLOAT</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetFLOAT</classname>
+    </gadget>
+    -->
+
+    <gadget>
+        <name>ImageFinishAttribUSHORT</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ImageFinishAttribGadgetUSHORT</classname>
+    </gadget>
+
+</gadgetronStreamConfiguration>
diff --git a/gadgets/gtPlus/config/GT_2DT_Cartesian_CloudNode.xml b/gadgets/gtPlus/config/GT_2DT_Cartesian_CloudNode.xml
new file mode 100644
index 0000000..d555c4d
--- /dev/null
+++ b/gadgets/gtPlus/config/GT_2DT_Cartesian_CloudNode.xml
@@ -0,0 +1,77 @@
+<?xml version="1.0" encoding="utf-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+    <!--
+       _____              _____     _____   ______   _______   _____     ____    _   _ 
+      / ____|     /\     |  __ \   / ____| |  ____| |__   __| |  __ \   / __ \  | \ | |
+     | |  __     /  \    | |  | | | |  __  | |__       | |    | |__) | | |  | | |  \| |
+     | | |_ |   / /\ \   | |  | | | | |_ | |  __|      | |    |  _  /  | |  | | | . ` |
+     | |__| |  / ____ \  | |__| | | |__| | | |____     | |    | | \ \  | |__| | | |\  |
+      \_____| /_/    \_\ |_____/   \_____| |______|    |_|    |_|  \_\  \____/  |_| \_|
+                                                                                       
+    -->
+
+    <!-- 
+        GT Plus configuratin file for 2DT cartesian reconstruction on GtPlus Cloud
+        This configuration file configures one gadget to perform the reconstruction for
+        2DT job packages
+
+        Depending on the incoming algorithm parameters, both linear and non-linear reconstruction
+        can be performed
+
+        Author: Hui Xue
+        Magnetic Resonance Technology Program
+        National Heart, Lung and Blood Institute
+        National Institutes of Health
+        10 Center Drive, Bethesda
+        MD 20814
+        USA
+        Email: hui.xue at nih.gov
+    -->
+
+    <!-- reader -->
+    <reader>
+        <slot>1013</slot>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusCloudJobMessageReaderCPFL</classname>
+    </reader>
+
+    <!-- writer -->
+    <writer>
+        <slot>1013</slot>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusCloudJobMessageWriterCPFL</classname>
+    </writer>
+
+    <!--
+    Recon computation for 2DT/3DT cases, process one job
+    a gtPlusReconJob2DT job consists of kspace, kernel and parameters
+    kspace: [RO E1 CHA E2/PHS]
+    -->
+
+    <gadget>
+        <name>Recon</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusReconJob2DTGadget</classname>
+
+        <!-- parameters for debug and timing -->
+        <property>
+            <name>debugFolder</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>verboseMode</name>
+            <value>true</value>
+        </property>
+
+    </gadget>
+
+</gadgetronStreamConfiguration>
diff --git a/gadgets/gtPlus/config/GT_2DT_Cartesian_Dicom.xml b/gadgets/gtPlus/config/GT_2DT_Cartesian_Dicom.xml
new file mode 100644
index 0000000..e3cec76
--- /dev/null
+++ b/gadgets/gtPlus/config/GT_2DT_Cartesian_Dicom.xml
@@ -0,0 +1,857 @@
+<?xml version="1.0" encoding="utf-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+    <!--
+       _____              _____     _____   ______   _______   _____     ____    _   _ 
+      / ____|     /\     |  __ \   / ____| |  ____| |__   __| |  __ \   / __ \  | \ | |
+     | |  __     /  \    | |  | | | |  __  | |__       | |    | |__) | | |  | | |  \| |
+     | | |_ |   / /\ \   | |  | | | | |_ | |  __|      | |    |  _  /  | |  | | | . ` |
+     | |__| |  / ____ \  | |__| | | |__| | | |____     | |    | | \ \  | |__| | | |\  |
+      \_____| /_/    \_\ |_____/   \_____| |______|    |_|    |_|  \_\  \____/  |_| \_|
+                                                                                       
+    -->
+
+    <!-- 
+        GT Plus configuratin file for general 2D or 2D+T cartesian reconstruction
+
+        Author: Hui Xue
+        Magnetic Resonance Technology Program
+        National Heart, Lung and Blood Institute
+        National Institutes of Health
+        10 Center Drive, Bethesda
+        MD 20814
+        USA
+        Email: hui.xue at nih.gov
+    -->
+
+    <!-- reader -->
+    <reader>
+        <slot>1008</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+    </reader>
+
+    <!-- writer -->
+    <writer>
+        <slot>1004</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1005</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1006</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterUSHORT</classname>
+    </writer>
+
+    <writer>
+        <slot>1015</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1016</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1017</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterUSHORT</classname>
+    </writer>
+
+    <writer>
+        <slot>1012</slot>
+        <dll>gadgetron_dicom</dll>
+        <classname>DicomImageWriter</classname>
+    </writer>
+
+    <writer>
+        <slot>1018</slot>
+        <dll>gadgetron_dicom</dll>
+        <classname>DicomImageAttribWriter</classname>
+    </writer>
+
+    <!-- RO asymmetric echo handling -->
+    <gadget>
+        <name>AsymmetricEcho</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>AsymmetricEchoAdjustROGadget</classname>
+    </gadget>
+
+    <!-- Noise prewhitening -->
+    <gadget>
+        <name>NoiseAdjust</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>NoiseAdjustGadget</classname>
+    </gadget>
+
+    <!-- RO oversampling removal -->
+    <gadget>
+        <name>RemoveROOversampling</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>RemoveROOversamplingGadget</classname>
+
+        <property>
+            <name>constant_noise_variance</name>
+            <value>true</value>
+        </property>
+    </gadget>
+
+    <!-- Data accumulation and trigger gadget -->
+    <gadget>
+        <name>Acc</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
+
+        <!-- debug and info mode -->
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>noacceleration_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>noacceleration_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>noacceleration_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>interleaved_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>interleaved_numOfKSpace_triggerDim1</name>
+            <value>4</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
+        <property>
+            <name>other_kspace_matching_Dim</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+    </gadget>
+
+    <!--Recon computation for 2DT cases -->
+    <gadget>
+        <name>Recon</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusRecon2DTGadget</classname>
+
+        <!-- kspace data -->
+        <property>
+            <name>dim_4th</name>
+            <value>DIM_NONE</value>
+        </property>
+        <property>
+            <name>dim_5th</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- work flow -->
+        <property>
+            <name>workOrder_ShareDim</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>no_acceleration_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>no_acceleration_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>interleaved_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>interleaved_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap_useHighestSignal</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>embedded_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_ref_fillback</name>
+            <value>true</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>separate_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- coil compression -->
+        <property>
+            <name>same_coil_compression_coeff_allS</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>downstream_coil_compression</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>coil_compression_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>coil_compression_num_modesKept</name>
+            <value>8</value>
+        </property>
+
+        <!-- parameters for coil map estimation 
+            enum ISMRMRDCOILMAPALGO
+            {
+                ISMRMRD_SOUHEIL,
+                ISMRMRD_SOUHEIL_ITER
+            };
+        -->
+        <property>
+            <name>coil_map_algorithm</name>
+            <value>ISMRMRD_SOUHEIL</value>
+        </property>
+        <property>
+            <name>csm_kSize</name>
+            <value>7</value>
+        </property>
+
+        <property>
+            <name>csm_powermethod_num</name>
+            <value>3</value>
+        </property>
+
+        <property>
+            <name>csm_true_3D</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>csm_iter_num</name>
+            <value>5</value>
+        </property>
+
+        <property>
+            <name>csm_iter_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>csm_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- algorithm -->
+        <property>
+            <name>recon_algorithm</name>
+            <value>ISMRMRD_GRAPPA</value>
+        </property>
+
+        <property>
+            <name>recon_kspace_needed</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>recon_auto_parameters</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>gfactor_needed</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>wrap_around_map_needed</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_GRAPPA -->
+        <property>
+            <name>grappa_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E1</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E2</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_reg_lamda</name>
+            <value>0.0005</value>
+        </property>
+        <property>
+            <name>grappa_calib_over_determine_ratio</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>grappa_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_SPIRIT -->
+        <property>
+            <name>spirit_kSize_RO</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E1</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>spirit_reg_lamda</name>
+            <value>0.005</value>
+        </property>
+        <property>
+            <name>spirit_use_gpu</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_calib_over_determine_ratio</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>spirit_solve_symmetric</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_iter_max</name>
+            <value>90</value>
+        </property>
+        <property>
+            <name>spirit_iter_thres</name>
+            <value>0.0015</value>
+        </property>
+        <property>
+            <name>spirit_print_iter</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_L1SPIRIT -->
+        <property>
+            <name>spirit_perform_linear</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_perform_nonlinear</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_parallel_imaging_lamda</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_image_reg_lamda</name>
+            <value>0.0025</value>
+        </property>
+        <property>
+            <name>spirit_data_fidelity_lamda</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>spirit_ncg_iter_max</name>
+            <value>10</value>
+        </property>
+        <property>
+            <name>spirit_ncg_iter_thres</name>
+            <value>0.0001</value>
+        </property>
+        <property>
+            <name>spirit_ncg_print_iter</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_use_coil_sen_map</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_use_moco_enhancement</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_recon_moco_images</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_RO_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_E1_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_E2_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_temporal_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_2D_scale_per_chunk</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for scaling and image sending -->
+        <property>
+            <name>min_intensity_value</name>
+            <value>64</value>
+        </property>
+
+        <property>
+            <name>max_intensity_value</name>
+            <value>4095</value>
+        </property>
+
+        <property>
+            <name>scalingFactor</name>
+            <value>-1.0</value>
+        </property>
+
+        <property>
+            <name>use_constant_scalingFactor</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for kspace filter, image data -->
+        <property>
+            <name>filterRO</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterRO_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE1</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE1_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE2</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE2_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, ref data -->
+        <property>
+            <name>filterRefRO</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE1</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE2</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
+        <property>
+            <name>filterPartialFourierRO</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE1</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE2</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_HOMODYNE, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
+        <property>
+            <name>partialFourier_algo</name>
+            <value>ISMRMRD_PF_FENGHUANG</value>
+        </property>
+
+        <!-- parameters for partial fourier homodyne algorithm -->
+        <property>
+            <name>partialFourier_homodyne_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_densityComp</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for partial fourier POCS algorithm -->
+        <property>
+            <name>partialFourier_POCS_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for partial fourier FengHuang algorithm -->
+        <property>
+            <name>partialFourier_FengHuang_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E1</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_thresReg</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_sameKernel_allN</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for debug and timing -->
+        <property>
+            <name>debugFolder</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>debugFolder2</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>cloudNodeFile</name>
+            <value>myCloud_2DT.txt</value>
+        </property>
+
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for system acquisition -->
+        <property>
+            <name>timeStampResolution</name>
+            <value>0.0025</value>
+        </property>
+
+        <!-- parameters for recon job split -->
+        <property>
+            <name>job_split_by_S</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>job_num_of_N</name>
+            <value>32</value>
+        </property>
+        <property>
+            <name>job_max_Megabytes</name>
+            <value>10240</value>
+        </property>
+        <property>
+            <name>job_overlap</name>
+            <value>2</value>
+        </property>
+        <property>
+            <name>job_perform_on_control_node</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for the cloud computation 
+             The cloud should be defined as the following: CloudNodeX_IP/Port/XMLConfiguration etc.
+        -->
+        <property>
+            <name>CloudComputing</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>CloudSize</name>
+            <value>1</value>
+        </property>
+
+        <!-- node 0 -->
+        <property>
+            <name>CloudNode0_IP</name>
+            <value>localhost</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_Port</name>
+            <value>9003</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_XMLConfiguration</name>
+            <value>GtProg_2DT_Cartesian_CloudNode.xml</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_ComputingPowerIndex</name>
+            <value>1</value>
+        </property>
+
+    </gadget>
+
+    <!-- after recon processing -->
+    <gadget>
+        <name>ComplexToFloatAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ComplexToFloatAttribGadget</classname>
+    </gadget>
+
+    <gadget>
+        <name>FloatToShortAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>FloatToUShortAttribGadget</classname>
+    </gadget>
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribCPLX</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetCPLX</classname>
+    </gadget>
+    -->
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribFLOAT</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetFLOAT</classname>
+    </gadget>
+    -->
+
+    <gadget>
+        <name>DicomFinishAttribGadget</name>
+        <dll>gadgetron_dicom</dll>
+        <classname>DicomFinishAttribGadgetUSHORT</classname>
+    </gadget>
+
+</gadgetronStreamConfiguration>
diff --git a/gadgets/gtPlus/config/GT_2DT_Cartesian_DualLayer_Gateway_L1SPIRIT.xml b/gadgets/gtPlus/config/GT_2DT_Cartesian_DualLayer_Gateway_L1SPIRIT.xml
new file mode 100644
index 0000000..a653131
--- /dev/null
+++ b/gadgets/gtPlus/config/GT_2DT_Cartesian_DualLayer_Gateway_L1SPIRIT.xml
@@ -0,0 +1,851 @@
+<?xml version="1.0" encoding="utf-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+    <!--
+       _____              _____     _____   ______   _______   _____     ____    _   _ 
+      / ____|     /\     |  __ \   / ____| |  ____| |__   __| |  __ \   / __ \  | \ | |
+     | |  __     /  \    | |  | | | |  __  | |__       | |    | |__) | | |  | | |  \| |
+     | | |_ |   / /\ \   | |  | | | | |_ | |  __|      | |    |  _  /  | |  | | | . ` |
+     | |__| |  / ____ \  | |__| | | |__| | | |____     | |    | | \ \  | |__| | | |\  |
+      \_____| /_/    \_\ |_____/   \_____| |______|    |_|    |_|  \_\  \____/  |_| \_|
+                                                                                       
+    -->
+
+    <!-- 
+        GT Plus configuratin file for general 2D cartesian reconstruction using L1 SPIRIT
+        The GtPlus cloud computing by default is turned on in this configuration file
+        The dual-layer cloud topology is used here, therefore every incoming SLICE is sent
+        to one first layer GtPlus cloud node. This first layer node can further split the job and
+        process the SLICE with one or more second layer nodes.
+
+        Author: Hui Xue
+        Magnetic Resonance Technology Program
+        National Heart, Lung and Blood Institute
+        National Institutes of Health
+        10 Center Drive, Bethesda
+        MD 20814
+        USA
+        Email: hui.xue at nih.gov
+
+        Ref to: 
+
+        Hui Xue, Souheil Inati, Thomas Sangild Sorensen, Peter Kellman, Michael S. Hansen. 
+        Distributed MRI Reconstruction using Gadgetron based Cloud Computing. 
+        Magenetic Resonance in Medicine, doi: 10.1002/mrm.25213.
+    -->
+
+    <!-- reader -->
+    <reader>
+        <slot>1008</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+    </reader>
+
+    <!-- writer -->
+    <writer>
+        <slot>1004</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1005</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1006</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterUSHORT</classname>
+    </writer>
+
+    <writer>
+        <slot>1015</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1016</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1017</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterUSHORT</classname>
+    </writer>
+
+    <!-- RO asymmetric echo handling -->
+    <gadget>
+        <name>AsymmetricEcho</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>AsymmetricEchoAdjustROGadget</classname>
+    </gadget>
+
+    <!-- Noise prewhitening -->
+    <gadget>
+        <name>NoiseAdjust</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>NoiseAdjustGadget</classname>
+    </gadget>
+
+    <!-- RO oversampling removal -->
+    <gadget>
+        <name>RemoveROOversampling</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>RemoveROOversamplingGadget</classname>
+
+        <property>
+            <name>constant_noise_variance</name>
+            <value>true</value>
+        </property>
+    </gadget>
+
+    <!-- Data accumulation and trigger gadget -->
+    <gadget>
+        <name>Acc</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
+
+        <!-- debug and info mode -->
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>noacceleration_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>noacceleration_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>noacceleration_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>interleaved_triggerDim2</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <property>
+            <name>interleaved_numOfKSpace_triggerDim1</name>
+            <value>0</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
+        <property>
+            <name>other_kspace_matching_Dim</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+    </gadget>
+
+    <!-- Recon computation for 2DT cases -->
+    <gadget>
+        <name>Recon</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusRecon2DTGadgetCloud</classname>
+
+        <!-- kspace data -->
+        <property>
+            <name>dim_4th</name>
+            <value>DIM_Phase</value>
+        </property>
+        <property>
+            <name>dim_5th</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <!-- work flow -->
+        <property>
+            <name>workOrder_ShareDim</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>no_acceleration_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>no_acceleration_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>interleaved_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>interleaved_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap_useHighestSignal</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>embedded_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_ref_fillback</name>
+            <value>true</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>separate_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- coil compression -->
+        <property>
+            <name>same_coil_compression_coeff_allS</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>downstream_coil_compression</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>coil_compression_thres</name>
+            <value>0.002</value>
+        </property>
+
+        <property>
+            <name>coil_compression_num_modesKept</name>
+            <value>-1</value>
+        </property>
+
+        <!-- parameters for coil map estimation 
+            enum ISMRMRDCOILMAPALGO
+            {
+                ISMRMRD_SOUHEIL,
+                ISMRMRD_SOUHEIL_ITER
+            };
+        -->
+        <property>
+            <name>coil_map_algorithm</name>
+            <value>ISMRMRD_SOUHEIL</value>
+        </property>
+        <property>
+            <name>csm_kSize</name>
+            <value>7</value>
+        </property>
+
+        <property>
+            <name>csm_powermethod_num</name>
+            <value>3</value>
+        </property>
+
+        <property>
+            <name>csm_true_3D</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>csm_iter_num</name>
+            <value>5</value>
+        </property>
+
+        <property>
+            <name>csm_iter_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>csm_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- algorithm -->
+        <property>
+            <name>recon_algorithm</name>
+            <value>ISMRMRD_L1SPIRIT</value>
+        </property>
+
+        <property>
+            <name>recon_kspace_needed</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>recon_auto_parameters</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_GRAPPA -->
+        <property>
+            <name>grappa_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E1</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E2</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_reg_lamda</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>grappa_calib_over_determine_ratio</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>grappa_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_SPIRIT -->
+        <property>
+            <name>spirit_kSize_RO</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E1</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>spirit_reg_lamda</name>
+            <value>0.005</value>
+        </property>
+        <property>
+            <name>spirit_use_gpu</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_calib_over_determine_ratio</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>spirit_solve_symmetric</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_iter_max</name>
+            <value>90</value>
+        </property>
+        <property>
+            <name>spirit_iter_thres</name>
+            <value>0.0015</value>
+        </property>
+        <property>
+            <name>spirit_print_iter</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_L1SPIRIT -->
+        <property>
+            <name>spirit_perform_linear</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_perform_nonlinear</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_parallel_imaging_lamda</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_image_reg_lamda</name>
+            <value>0.005</value>
+        </property>
+        <property>
+            <name>spirit_data_fidelity_lamda</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>spirit_ncg_iter_max</name>
+            <value>10</value>
+        </property>
+        <property>
+            <name>spirit_ncg_iter_thres</name>
+            <value>0.0001</value>
+        </property>
+        <property>
+            <name>spirit_ncg_print_iter</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_use_coil_sen_map</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_use_moco_enhancement</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_recon_moco_images</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_RO_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_E1_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_E2_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_temporal_enhancement_ratio</name>
+            <value>20.0</value>
+        </property>
+        <property>
+            <name>spirit_2D_scale_per_chunk</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for scaling and image sending -->
+        <property>
+            <name>min_intensity_value</name>
+            <value>64</value>
+        </property>
+
+        <property>
+            <name>max_intensity_value</name>
+            <value>4095</value>
+        </property>
+
+        <property>
+            <name>scalingFactor</name>
+            <value>-1.0</value>
+        </property>
+
+        <property>
+            <name>use_constant_scalingFactor</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for kspace filter, image data -->
+        <property>
+            <name>filterRO</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterRO_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE1</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE1_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE2</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE2_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, ref data -->
+        <property>
+            <name>filterRefRO</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE1</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE2</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
+        <property>
+            <name>filterPartialFourierRO</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE1</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE2</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_HOMODYNE, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
+        <property>
+            <name>partialFourier_algo</name>
+            <value>ISMRMRD_PF_ZEROFILLING_FILTER</value>
+        </property>
+
+        <!-- parameters for partial fourier homodyne algorithm -->
+        <property>
+            <name>partialFourier_homodyne_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_densityComp</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for partial fourier POCS algorithm -->
+        <property>
+            <name>partialFourier_POCS_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for partial fourier FengHuang algorithm -->
+        <property>
+            <name>partialFourier_FengHuang_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E1</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_thresReg</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_sameKernel_allN</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for debug and timing -->
+        <property>
+            <name>debugFolder</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>debugFolder2</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>cloudNodeFile</name>
+            <value>myCloud_2DT_DualLayer.txt</value>
+        </property>
+
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>verboseMode</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for system acquisition -->
+        <property>
+            <name>timeStampResolution</name>
+            <value>0.0025</value>
+        </property>
+
+        <!-- parameters for recon job split -->
+        <property>
+            <name>job_split_by_S</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>job_num_of_N</name>
+            <value>48</value>
+        </property>
+        <property>
+            <name>job_max_Megabytes</name>
+            <value>2048</value>
+        </property>
+        <property>
+            <name>job_overlap</name>
+            <value>1</value>
+        </property>
+        <property>
+            <name>job_perform_on_control_node</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for the cloud computation 
+             The cloud should be defined as the following: CloudNodeX_IP/Port/XMLConfiguration etc.
+        -->
+
+        <property>
+            <name>CloudNodeXMLConfiguration</name>
+            <value>GT_2DT_Cartesian_FirstLayer_CloudNode.xml</value>
+        </property>
+
+        <property>
+            <name>CloudComputing</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>CloudSize</name>
+            <value>1</value>
+        </property>
+
+        <!-- node 0 -->
+        <property>
+            <name>CloudNode0_IP</name>
+            <value>localhost</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_Port</name>
+            <value>9003</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_XMLConfiguration</name>
+            <value>GtProg_2DT_Cartesian_FirstLayer_CloudNode.xml</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_ComputingPowerIndex</name>
+            <value>1</value>
+        </property>
+
+    </gadget>
+
+    <!-- after recon processing -->
+    <gadget>
+        <name>ComplexToFloatAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ComplexToFloatAttribGadget</classname>
+    </gadget>
+
+    <gadget>
+        <name>FloatToShortAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>FloatToUShortAttribGadget</classname>
+    </gadget>
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribCPLX</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetCPLX</classname>
+    </gadget>
+    -->
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribFLOAT</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetFLOAT</classname>
+    </gadget>
+    -->
+
+    <gadget>
+        <name>ImageFinishAttribUSHORT</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ImageFinishAttribGadgetUSHORT</classname>
+    </gadget>
+
+</gadgetronStreamConfiguration>
diff --git a/gadgets/gtPlus/config/GT_2DT_Cartesian_DualLayer_Gateway_SPIRIT.xml b/gadgets/gtPlus/config/GT_2DT_Cartesian_DualLayer_Gateway_SPIRIT.xml
new file mode 100644
index 0000000..30c6503
--- /dev/null
+++ b/gadgets/gtPlus/config/GT_2DT_Cartesian_DualLayer_Gateway_SPIRIT.xml
@@ -0,0 +1,845 @@
+<?xml version="1.0" encoding="utf-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+    <!--
+       _____              _____     _____   ______   _______   _____     ____    _   _ 
+      / ____|     /\     |  __ \   / ____| |  ____| |__   __| |  __ \   / __ \  | \ | |
+     | |  __     /  \    | |  | | | |  __  | |__       | |    | |__) | | |  | | |  \| |
+     | | |_ |   / /\ \   | |  | | | | |_ | |  __|      | |    |  _  /  | |  | | | . ` |
+     | |__| |  / ____ \  | |__| | | |__| | | |____     | |    | | \ \  | |__| | | |\  |
+      \_____| /_/    \_\ |_____/   \_____| |______|    |_|    |_|  \_\  \____/  |_| \_|
+                                                                                       
+    -->
+
+    <!-- 
+        GT Plus configuratin file for general 2D cartesian reconstruction using linear SPIRIT
+        The GtPlus cloud computing by default is turned on in this configuration file
+        The dual-layer cloud topology is used here, therefore every incoming SLICE is sent
+        to one first layer GtPlus cloud node. This first layer node can further split the job and
+        process the SLICE with one or more second layer nodes.
+
+        Author: Hui Xue
+        Magnetic Resonance Technology Program
+        National Heart, Lung and Blood Institute
+        National Institutes of Health
+        10 Center Drive, Bethesda
+        MD 20814
+        USA
+        Email: hui.xue at nih.gov
+
+        Ref to: 
+
+        Hui Xue, Souheil Inati, Thomas Sangild Sorensen, Peter Kellman, Michael S. Hansen. 
+        Distributed MRI Reconstruction using Gadgetron based Cloud Computing. 
+        Magenetic Resonance in Medicine, doi: 10.1002/mrm.25213.
+    -->
+
+    <!-- reader -->
+    <reader>
+        <slot>1008</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+    </reader>
+
+    <!-- writer -->
+    <writer>
+        <slot>1004</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1005</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1006</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterUSHORT</classname>
+    </writer>
+
+    <writer>
+        <slot>1015</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1016</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1017</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterUSHORT</classname>
+    </writer>
+
+    <!-- RO asymmetric echo handling -->
+    <gadget>
+        <name>AsymmetricEcho</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>AsymmetricEchoAdjustROGadget</classname>
+    </gadget>
+
+    <!-- Noise prewhitening -->
+    <gadget>
+        <name>NoiseAdjust</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>NoiseAdjustGadget</classname>
+    </gadget>
+
+    <!-- RO oversampling removal -->
+    <gadget>
+        <name>RemoveROOversampling</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>RemoveROOversamplingGadget</classname>
+
+        <property>
+            <name>constant_noise_variance</name>
+            <value>true</value>
+        </property>
+    </gadget>
+
+    <!-- Data accumulation and trigger gadget -->
+    <gadget>
+        <name>Acc</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
+
+        <!-- debug and info mode -->
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>noacceleration_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>noacceleration_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>noacceleration_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>interleaved_triggerDim2</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <property>
+            <name>interleaved_numOfKSpace_triggerDim1</name>
+            <value>0</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
+        <property>
+            <name>other_kspace_matching_Dim</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+    </gadget>
+
+    <!-- Recon computation for 2DT cases -->
+    <gadget>
+        <name>Recon</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusRecon2DTGadgetCloud</classname>
+
+        <!-- kspace data -->
+        <property>
+            <name>dim_4th</name>
+            <value>DIM_Phase</value>
+        </property>
+        <property>
+            <name>dim_5th</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <!-- work flow -->
+        <property>
+            <name>workOrder_ShareDim</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>no_acceleration_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>no_acceleration_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>interleaved_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>interleaved_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap_useHighestSignal</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>embedded_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_ref_fillback</name>
+            <value>true</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>separate_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- coil compression -->
+        <property>
+            <name>same_coil_compression_coeff_allS</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>downstream_coil_compression</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>coil_compression_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>coil_compression_num_modesKept</name>
+            <value>-1</value>
+        </property>
+
+        <!-- parameters for coil map estimation 
+            enum ISMRMRDCOILMAPALGO
+            {
+                ISMRMRD_SOUHEIL,
+                ISMRMRD_SOUHEIL_ITER
+            };
+        -->
+        <property>
+            <name>coil_map_algorithm</name>
+            <value>ISMRMRD_SOUHEIL</value>
+        </property>
+        <property>
+            <name>csm_kSize</name>
+            <value>7</value>
+        </property>
+
+        <property>
+            <name>csm_powermethod_num</name>
+            <value>3</value>
+        </property>
+
+        <property>
+            <name>csm_true_3D</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>csm_iter_num</name>
+            <value>5</value>
+        </property>
+
+        <property>
+            <name>csm_iter_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>csm_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- algorithm -->
+        <property>
+            <name>recon_algorithm</name>
+            <value>ISMRMRD_GRAPPA</value>
+        </property>
+
+        <property>
+            <name>recon_kspace_needed</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>recon_auto_parameters</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_GRAPPA -->
+        <property>
+            <name>grappa_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E1</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E2</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_reg_lamda</name>
+            <value>0.0005</value>
+        </property>
+        <property>
+            <name>grappa_calib_over_determine_ratio</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>grappa_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_SPIRIT -->
+        <property>
+            <name>spirit_kSize_RO</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E1</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>spirit_reg_lamda</name>
+            <value>0.005</value>
+        </property>
+        <property>
+            <name>spirit_use_gpu</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_calib_over_determine_ratio</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>spirit_solve_symmetric</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_iter_max</name>
+            <value>90</value>
+        </property>
+        <property>
+            <name>spirit_iter_thres</name>
+            <value>0.0015</value>
+        </property>
+        <property>
+            <name>spirit_print_iter</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_L1SPIRIT -->
+        <property>
+            <name>spirit_perform_linear</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_perform_nonlinear</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_parallel_imaging_lamda</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_image_reg_lamda</name>
+            <value>0.0025</value>
+        </property>
+        <property>
+            <name>spirit_data_fidelity_lamda</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>spirit_ncg_iter_max</name>
+            <value>10</value>
+        </property>
+        <property>
+            <name>spirit_ncg_iter_thres</name>
+            <value>0.0001</value>
+        </property>
+        <property>
+            <name>spirit_ncg_print_iter</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_use_coil_sen_map</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_use_moco_enhancement</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_recon_moco_images</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_RO_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_E1_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_E2_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_temporal_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_2D_scale_per_chunk</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for scaling and image sending -->
+        <property>
+            <name>min_intensity_value</name>
+            <value>64</value>
+        </property>
+
+        <property>
+            <name>max_intensity_value</name>
+            <value>4095</value>
+        </property>
+
+        <property>
+            <name>scalingFactor</name>
+            <value>-1.0</value>
+        </property>
+
+        <property>
+            <name>use_constant_scalingFactor</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for kspace filter, image data -->
+        <property>
+            <name>filterRO</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterRO_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE1</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE1_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE2</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE2_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, ref data -->
+        <property>
+            <name>filterRefRO</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE1</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE2</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
+        <property>
+            <name>filterPartialFourierRO</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE1</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE2</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_HOMODYNE, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
+        <property>
+            <name>partialFourier_algo</name>
+            <value>ISMRMRD_PF_FENGHUANG</value>
+        </property>
+
+        <!-- parameters for partial fourier homodyne algorithm -->
+        <property>
+            <name>partialFourier_homodyne_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_densityComp</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for partial fourier POCS algorithm -->
+        <property>
+            <name>partialFourier_POCS_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for partial fourier FengHuang algorithm -->
+        <property>
+            <name>partialFourier_FengHuang_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E1</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_thresReg</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_sameKernel_allN</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for debug and timing -->
+        <property>
+            <name>debugFolder</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>debugFolder2</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>cloudNodeFile</name>
+            <value>myCloud_2DT_DualLayer.txt</value>
+        </property>
+
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for system acquisition -->
+        <property>
+            <name>timeStampResolution</name>
+            <value>0.0025</value>
+        </property>
+
+        <!-- parameters for recon job split -->
+        <property>
+            <name>job_split_by_S</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>job_num_of_N</name>
+            <value>48</value>
+        </property>
+        <property>
+            <name>job_max_Megabytes</name>
+            <value>2048</value>
+        </property>
+        <property>
+            <name>job_overlap</name>
+            <value>2</value>
+        </property>
+        <property>
+            <name>job_perform_on_control_node</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for the cloud computation 
+             The cloud should be defined as the following: CloudNodeX_IP/Port/XMLConfiguration etc.
+        -->
+        <property>
+            <name>CloudComputing</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>CloudSize</name>
+            <value>1</value>
+        </property>
+
+        <!-- node 0 -->
+        <property>
+            <name>CloudNode0_IP</name>
+            <value>localhost</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_Port</name>
+            <value>9003</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_XMLConfiguration</name>
+            <value>GtProg_2DT_Cartesian_FirstLayer_CloudNode.xml</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_ComputingPowerIndex</name>
+            <value>1</value>
+        </property>
+
+    </gadget>
+
+    <!-- after recon processing -->
+    <gadget>
+        <name>ComplexToFloatAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ComplexToFloatAttribGadget</classname>
+    </gadget>
+
+    <gadget>
+        <name>FloatToShortAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>FloatToUShortAttribGadget</classname>
+    </gadget>
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribCPLX</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetCPLX</classname>
+    </gadget>
+    -->
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribFLOAT</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetFLOAT</classname>
+    </gadget>
+    -->
+
+    <gadget>
+        <name>ImageFinishAttribUSHORT</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ImageFinishAttribGadgetUSHORT</classname>
+    </gadget>
+
+</gadgetronStreamConfiguration>
diff --git a/gadgets/gtPlus/config/GT_2DT_Cartesian_FirstLayer_CloudNode.xml b/gadgets/gtPlus/config/GT_2DT_Cartesian_FirstLayer_CloudNode.xml
new file mode 100644
index 0000000..e35e3ea
--- /dev/null
+++ b/gadgets/gtPlus/config/GT_2DT_Cartesian_FirstLayer_CloudNode.xml
@@ -0,0 +1,279 @@
+<?xml version="1.0" encoding="utf-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+    <!--
+       _____              _____     _____   ______   _______   _____     ____    _   _ 
+      / ____|     /\     |  __ \   / ____| |  ____| |__   __| |  __ \   / __ \  | \ | |
+     | |  __     /  \    | |  | | | |  __  | |__       | |    | |__) | | |  | | |  \| |
+     | | |_ |   / /\ \   | |  | | | | |_ | |  __|      | |    |  _  /  | |  | | | . ` |
+     | |__| |  / ____ \  | |__| | | |__| | | |____     | |    | | \ \  | |__| | | |\  |
+      \_____| /_/    \_\ |_____/   \_____| |______|    |_|    |_|  \_\  \____/  |_| \_|
+                                                                                       
+    -->
+
+    <!-- 
+        GT Plus configuratin file for general 2D cartesian reconstruction using linear or non-linear SPIRIT
+        The dual-layer cloud topology is used here, therefore every incoming SLICE is sent
+        to one first layer GtPlus cloud node. This first layer node can further split the job and
+        process the SLICE with one or more second layer nodes.
+
+        This configuration file is for the first layer GtPlus cloud node.
+
+        Author: Hui Xue
+        Magnetic Resonance Technology Program
+        National Heart, Lung and Blood Institute
+        National Institutes of Health
+        10 Center Drive, Bethesda
+        MD 20814
+        USA
+        Email: hui.xue at nih.gov
+
+        Ref to: 
+
+        Hui Xue, Souheil Inati, Thomas Sangild Sorensen, Peter Kellman, Michael S. Hansen. 
+        Distributed MRI Reconstruction using Gadgetron based Cloud Computing. 
+        Magenetic Resonance in Medicine, doi: 10.1002/mrm.25213.
+    -->
+
+    <!-- reader -->
+    <reader>
+        <slot>1014</slot>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlus2DTGadgetCloudJobMessageReaderCPFL</classname>
+    </reader>
+
+    <!-- writer -->
+    <writer>
+        <slot>1014</slot>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlus2DTGadgetCloudJobMessageWriterCPFL</classname>
+    </writer>
+
+    <!--
+    Recon computation for 2DT cases
+    -->
+
+    <gadget>
+        <name>Recon</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusReconJob2DTGadgetCloud</classname>
+
+        <!-- parameters for kspace filter, image data -->
+        <property>
+            <name>filterRO</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterRO_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE1</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE1_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE2</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE2_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, ref data -->
+        <property>
+            <name>filterRefRO</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE1</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE2</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
+        <property>
+            <name>filterPartialFourierRO</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE1</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE2</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for recon job split -->
+        <property>
+            <name>job_split_by_S</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>job_num_of_N</name>
+            <value>48</value>
+        </property>
+        <property>
+            <name>job_max_Megabytes</name>
+            <value>2048</value>
+        </property>
+        <property>
+            <name>job_overlap</name>
+            <value>1</value>
+        </property>
+        <property>
+            <name>job_perform_on_control_node</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for the cloud computation 
+             The cloud should be defined as the following: CloudNodeX_IP/Port/XMLConfiguration etc.
+        -->
+        <property>
+            <name>CloudComputing</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>CloudSize</name>
+            <value>1</value>
+        </property>
+
+        <!-- node 0 -->
+        <property>
+            <name>CloudNode0_IP</name>
+            <value>localhost</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_Port</name>
+            <value>9004</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_XMLConfiguration</name>
+            <value>GtProg_2DT_Cartesian_CloudNode.xml</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_ComputingPowerIndex</name>
+            <value>1</value>
+        </property>
+
+        <!-- parameters for debug and timing -->
+        <property>
+            <name>debugFolder</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>debugFolder2</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>cloudNodeFile</name>
+            <value>myCloud_2DT_DualLayer_FirstLayer.txt</value>
+        </property>
+
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>verboseMode</name>
+            <value>true</value>
+        </property>
+
+    </gadget>
+
+</gadgetronStreamConfiguration>
diff --git a/gadgets/gtPlus/config/GT_2DT_Cartesian_GFactor.xml b/gadgets/gtPlus/config/GT_2DT_Cartesian_GFactor.xml
new file mode 100644
index 0000000..c346d42
--- /dev/null
+++ b/gadgets/gtPlus/config/GT_2DT_Cartesian_GFactor.xml
@@ -0,0 +1,872 @@
+<?xml version="1.0" encoding="utf-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+    <!--
+       _____              _____     _____   ______   _______   _____     ____    _   _ 
+      / ____|     /\     |  __ \   / ____| |  ____| |__   __| |  __ \   / __ \  | \ | |
+     | |  __     /  \    | |  | | | |  __  | |__       | |    | |__) | | |  | | |  \| |
+     | | |_ |   / /\ \   | |  | | | | |_ | |  __|      | |    |  _  /  | |  | | | . ` |
+     | |__| |  / ____ \  | |__| | | |__| | | |____     | |    | | \ \  | |__| | | |\  |
+      \_____| /_/    \_\ |_____/   \_____| |______|    |_|    |_|  \_\  \____/  |_| \_|
+                                                                                       
+    -->
+
+    <!-- 
+        GT Plus configuratin file for general 2D or 2D+T cartesian reconstruction
+
+        Author: Hui Xue
+        Magnetic Resonance Technology Program
+        National Heart, Lung and Blood Institute
+        National Institutes of Health
+        10 Center Drive, Bethesda
+        MD 20814
+        USA
+        Email: hui.xue at nih.gov
+    -->
+
+    <!-- reader -->
+    <reader>
+        <slot>1008</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+    </reader>
+
+    <!-- writer -->
+    <writer>
+        <slot>1004</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1005</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1006</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterUSHORT</classname>
+    </writer>
+
+    <writer>
+        <slot>1015</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1016</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1017</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterUSHORT</classname>
+    </writer>
+
+    <!--
+    <writer>
+        <slot>1012</slot>
+        <dll>gadgetron_dicom</dll>
+        <classname>DicomImageWriter</classname>
+    </writer>
+
+    <writer>
+        <slot>1018</slot>
+        <dll>gadgetron_dicom</dll>
+        <classname>DicomImageAttribWriter</classname>
+    </writer>
+    -->
+
+    <!-- Noise prewhitening -->
+    <gadget>
+        <name>NoiseAdjust</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>NoiseAdjustGadget</classname>
+
+        <!-- File prefix for stored noise prewhitener matrix -->
+        <property>
+            <name>noise_dependency_prefix</name>
+            <value>GadgetronNoisePreWhitener</value>
+        </property>
+
+        <!-- Preset noise dwell time; for noise dependency measurements -->
+        <property>
+            <name>noise_dwell_time_us_preset</name>
+            <value>5.0</value>
+        </property>
+
+        <!-- Whether to perform timing -->
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+    </gadget>
+
+    <!-- RO asymmetric echo handling -->
+    <gadget>
+        <name>AsymmetricEcho</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>AsymmetricEchoAdjustROGadget</classname>
+    </gadget>
+
+    <!-- RO oversampling removal -->
+    <gadget>
+        <name>RemoveROOversampling</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>RemoveROOversamplingGadget</classname>
+
+        <property>
+            <name>constant_noise_variance</name>
+            <value>true</value>
+        </property>
+    </gadget>
+
+    <!-- Data accumulation and trigger gadget -->
+    <gadget>
+        <name>Acc</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
+
+        <!-- debug and info mode -->
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>noacceleration_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>noacceleration_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>noacceleration_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>interleaved_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>interleaved_numOfKSpace_triggerDim1</name>
+            <value>4</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
+        <property>
+            <name>other_kspace_matching_Dim</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+    </gadget>
+
+    <!--Recon computation for 2DT cases -->
+    <gadget>
+        <name>Recon</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusRecon2DTGadget</classname>
+
+        <!-- kspace data -->
+        <property>
+            <name>dim_4th</name>
+            <value>DIM_NONE</value>
+        </property>
+        <property>
+            <name>dim_5th</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- work flow -->
+        <property>
+            <name>workOrder_ShareDim</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>no_acceleration_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>no_acceleration_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>interleaved_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>interleaved_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap_useHighestSignal</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>embedded_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_ref_fillback</name>
+            <value>false</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>separate_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- coil compression -->
+        <property>
+            <name>same_coil_compression_coeff_allS</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>upstream_coil_compression</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>upstream_coil_compression_thres</name>
+            <value>-1</value>
+        </property>
+
+        <property>
+            <name>upstream_coil_compression_num_modesKept</name>
+            <value>-1</value>
+        </property>
+
+        <property>
+            <name>downstream_coil_compression</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>coil_compression_thres</name>
+            <value>-1</value>
+        </property>
+
+        <property>
+            <name>coil_compression_num_modesKept</name>
+            <value>-1</value>
+        </property>
+
+        <!-- parameters for coil map estimation, ISMRMRD_SOUHEIL, ISMRMRD_SOUHEIL_ITER -->
+        <property>
+            <name>coil_map_algorithm</name>
+            <value>ISMRMRD_SOUHEIL</value>
+        </property>
+        <property>
+            <name>csm_kSize</name>
+            <value>7</value>
+        </property>
+
+        <property>
+            <name>csm_powermethod_num</name>
+            <value>3</value>
+        </property>
+
+        <property>
+            <name>csm_true_3D</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>csm_iter_num</name>
+            <value>5</value>
+        </property>
+
+        <property>
+            <name>csm_iter_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>csm_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- algorithm -->
+        <property>
+            <name>recon_algorithm</name>
+            <value>ISMRMRD_GRAPPA</value>
+        </property>
+
+        <property>
+            <name>recon_kspace_needed</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>recon_auto_parameters</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>gfactor_needed</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>wrap_around_map_needed</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_GRAPPA -->
+        <property>
+            <name>grappa_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E1</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E2</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_reg_lamda</name>
+            <value>0.0005</value>
+        </property>
+        <property>
+            <name>grappa_calib_over_determine_ratio</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>grappa_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_SPIRIT -->
+        <property>
+            <name>spirit_kSize_RO</name>
+            <value>9</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E1</name>
+            <value>9</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>spirit_reg_lamda</name>
+            <value>0.005</value>
+        </property>
+        <property>
+            <name>spirit_use_gpu</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_calib_over_determine_ratio</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>spirit_solve_symmetric</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_iter_max</name>
+            <value>90</value>
+        </property>
+        <property>
+            <name>spirit_iter_thres</name>
+            <value>0.0015</value>
+        </property>
+        <property>
+            <name>spirit_print_iter</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for scaling and image sending -->
+        <property>
+            <name>min_intensity_value</name>
+            <value>64</value>
+        </property>
+
+        <property>
+            <name>max_intensity_value</name>
+            <value>4095</value>
+        </property>
+
+        <property>
+            <name>scalingFactor</name>
+            <value>10</value>
+        </property>
+
+        <property>
+            <name>scalingFactor_gfactor</name>
+            <value>100</value>
+        </property>
+
+        <property>
+            <name>scalingFactor_snr_image</name>
+            <value>10</value>
+        </property>
+
+        <property>
+            <name>scalingFactor_std_map</name>
+            <value>1000</value>
+        </property>
+
+        <property>
+            <name>start_frame_for_std_map</name>
+            <value>5</value>
+        </property>
+
+        <property>
+            <name>use_constant_scalingFactor</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for kspace filter, image data -->
+        <!-- 
+             ISMRMRD_FILTER_GAUSSIAN,
+             ISMRMRD_FILTER_HANNING,
+             ISMRMRD_FILTER_TUKEY,
+             ISMRMRD_FILTER_TAPERED_HANNING,
+             ISMRMRD_FILTER_NONE 
+        -->
+        <property>
+            <name>filterRO</name>
+            <value>ISMRMRD_FILTER_NONE</value>
+        </property>
+        <property>
+            <name>filterRO_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE1</name>
+            <value>ISMRMRD_FILTER_NONE</value>
+        </property>
+        <property>
+            <name>filterE1_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE2</name>
+            <value>ISMRMRD_FILTER_NONE</value>
+        </property>
+        <property>
+            <name>filterE2_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, ref data -->
+        <!-- 
+             ISMRMRD_FILTER_GAUSSIAN,
+             ISMRMRD_FILTER_HANNING,
+             ISMRMRD_FILTER_TUKEY,
+             ISMRMRD_FILTER_TAPERED_HANNING,
+             ISMRMRD_FILTER_NONE 
+        -->
+        <property>
+            <name>filterRefRO</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE1</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE2</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
+        <!-- only ISMRMRD_FILTER_TAPERED_HANNING is available for the moment -->
+        <property>
+            <name>filterPartialFourierRO</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_densityComp</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE1</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_densityComp</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE2</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_densityComp</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_HOMODYNE, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
+        <property>
+            <name>partialFourier_algo</name>
+            <value>ISMRMRD_PF_ZEROFILLING_FILTER</value>
+        </property>
+
+        <!-- parameters for partial fourier homodyne algorithm -->
+        <property>
+            <name>partialFourier_homodyne_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_densityComp</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for partial fourier POCS algorithm -->
+        <property>
+            <name>partialFourier_POCS_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for partial fourier FengHuang algorithm -->
+        <property>
+            <name>partialFourier_FengHuang_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E1</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_thresReg</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_sameKernel_allN</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for debug and timing -->
+        <property>
+            <name>debugFolder</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>debugFolder2</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>cloudNodeFile</name>
+            <value>myCloud_2DT.txt</value>
+        </property>
+
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>verboseMode</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for system acquisition -->
+        <property>
+            <name>timeStampResolution</name>
+            <value>0.0025</value>
+        </property>
+
+        <!-- parameters for recon job split -->
+        <property>
+            <name>job_split_by_S</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>job_num_of_N</name>
+            <value>32</value>
+        </property>
+        <property>
+            <name>job_max_Megabytes</name>
+            <value>10240</value>
+        </property>
+        <property>
+            <name>job_overlap</name>
+            <value>2</value>
+        </property>
+        <property>
+            <name>job_perform_on_control_node</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for the cloud computation 
+             The cloud should be defined as the following: CloudNodeX_IP/Port/XMLConfiguration etc.
+        -->
+        <property>
+            <name>CloudComputing</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>CloudSize</name>
+            <value>1</value>
+        </property>
+
+        <!-- node 0 -->
+        <property>
+            <name>CloudNode0_IP</name>
+            <value>localhost</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_Port</name>
+            <value>9003</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_XMLConfiguration</name>
+            <value>GtProg_2DT_Cartesian_CloudNode.xml</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_ComputingPowerIndex</name>
+            <value>1</value>
+        </property>
+
+    </gadget>
+
+    <!-- after recon processing -->
+    <gadget>
+        <name>ComplexToFloatAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ComplexToFloatAttribGadget</classname>
+    </gadget>
+
+    <gadget>
+        <name>FloatToShortAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>FloatToUShortAttribGadget</classname>
+
+        <property>
+            <name>max_intensity</name>
+            <value>32767</value>
+        </property>
+
+        <property>
+            <name>intensity_offset</name>
+            <value>16384</value>
+        </property>
+    </gadget>
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribCPLX</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetCPLX</classname>
+    </gadget>
+    -->
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribFLOAT</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetFLOAT</classname>
+    </gadget>
+    -->
+
+    <gadget>
+        <name>ImageFinishAttribUSHORT</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ImageFinishAttribGadgetUSHORT</classname>
+    </gadget>
+
+    <!--
+    <gadget>
+        <name>DicomFinishAttribGadget</name>
+        <dll>gadgetron_dicom</dll>
+        <classname>DicomFinishAttribGadgetUSHORT</classname>
+    </gadget>
+    -->
+</gadgetronStreamConfiguration>
diff --git a/gadgets/gtPlus/config/GT_2DT_Cartesian_ImageTrigger_Dicom.xml b/gadgets/gtPlus/config/GT_2DT_Cartesian_ImageTrigger_Dicom.xml
new file mode 100644
index 0000000..97ef06a
--- /dev/null
+++ b/gadgets/gtPlus/config/GT_2DT_Cartesian_ImageTrigger_Dicom.xml
@@ -0,0 +1,879 @@
+<?xml version="1.0" encoding="utf-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+    <!--
+       _____              _____     _____   ______   _______   _____     ____    _   _ 
+      / ____|     /\     |  __ \   / ____| |  ____| |__   __| |  __ \   / __ \  | \ | |
+     | |  __     /  \    | |  | | | |  __  | |__       | |    | |__) | | |  | | |  \| |
+     | | |_ |   / /\ \   | |  | | | | |_ | |  __|      | |    |  _  /  | |  | | | . ` |
+     | |__| |  / ____ \  | |__| | | |__| | | |____     | |    | | \ \  | |__| | | |\  |
+      \_____| /_/    \_\ |_____/   \_____| |______|    |_|    |_|  \_\  \____/  |_| \_|
+                                                                                       
+    -->
+
+    <!-- 
+        GT Plus configuratin file for general 2D or 2D+T cartesian reconstruction
+
+        Author: Hui Xue
+        Magnetic Resonance Technology Program
+        National Heart, Lung and Blood Institute
+        National Institutes of Health
+        10 Center Drive, Bethesda
+        MD 20814
+        USA
+        Email: hui.xue at nih.gov
+    -->
+
+    <!-- reader -->
+    <reader>
+        <slot>1008</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+    </reader>
+
+    <!-- writer -->
+    <writer>
+        <slot>1004</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1005</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1006</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterUSHORT</classname>
+    </writer>
+
+    <writer>
+        <slot>1015</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1016</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1017</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterUSHORT</classname>
+    </writer>
+
+    <writer>
+        <slot>1012</slot>
+        <dll>gadgetron_dicom</dll>
+        <classname>DicomImageWriter</classname>
+    </writer>
+
+    <writer>
+        <slot>1018</slot>
+        <dll>gadgetron_dicom</dll>
+        <classname>DicomImageAttribWriter</classname>
+    </writer>
+
+    <!-- RO asymmetric echo handling -->
+    <gadget>
+        <name>AsymmetricEcho</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>AsymmetricEchoAdjustROGadget</classname>
+    </gadget>
+
+    <!-- Noise prewhitening -->
+    <gadget>
+        <name>NoiseAdjust</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>NoiseAdjustGadget</classname>
+    </gadget>
+
+    <!-- RO oversampling removal -->
+    <gadget>
+        <name>RemoveROOversampling</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>RemoveROOversamplingGadget</classname>
+
+        <property>
+            <name>constant_noise_variance</name>
+            <value>true</value>
+        </property>
+    </gadget>
+
+    <!-- Data accumulation and trigger gadget -->
+    <gadget>
+        <name>Acc</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
+
+        <!-- debug and info mode -->
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>noacceleration_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>noacceleration_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>noacceleration_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>interleaved_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>interleaved_numOfKSpace_triggerDim1</name>
+            <value>4</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
+        <property>
+            <name>other_kspace_matching_Dim</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+    </gadget>
+
+    <!--Recon computation for 2DT cases -->
+    <gadget>
+        <name>Recon</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusRecon2DTGadget</classname>
+
+        <!-- kspace data -->
+        <property>
+            <name>dim_4th</name>
+            <value>DIM_NONE</value>
+        </property>
+        <property>
+            <name>dim_5th</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- work flow -->
+        <property>
+            <name>workOrder_ShareDim</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>no_acceleration_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>no_acceleration_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>interleaved_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>interleaved_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap_useHighestSignal</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>embedded_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_ref_fillback</name>
+            <value>true</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>separate_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- coil compression -->
+        <property>
+            <name>same_coil_compression_coeff_allS</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>downstream_coil_compression</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>coil_compression_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>coil_compression_num_modesKept</name>
+            <value>8</value>
+        </property>
+
+        <!-- parameters for coil map estimation 
+            enum ISMRMRDCOILMAPALGO
+            {
+                ISMRMRD_SOUHEIL,
+                ISMRMRD_SOUHEIL_ITER
+            };
+        -->
+        <property>
+            <name>coil_map_algorithm</name>
+            <value>ISMRMRD_SOUHEIL</value>
+        </property>
+        <property>
+            <name>csm_kSize</name>
+            <value>7</value>
+        </property>
+
+        <property>
+            <name>csm_powermethod_num</name>
+            <value>3</value>
+        </property>
+
+        <property>
+            <name>csm_true_3D</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>csm_iter_num</name>
+            <value>5</value>
+        </property>
+
+        <property>
+            <name>csm_iter_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>csm_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- algorithm -->
+        <property>
+            <name>recon_algorithm</name>
+            <value>ISMRMRD_GRAPPA</value>
+        </property>
+
+        <property>
+            <name>recon_kspace_needed</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>recon_auto_parameters</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>gfactor_needed</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>wrap_around_map_needed</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_GRAPPA -->
+        <property>
+            <name>grappa_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E1</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E2</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_reg_lamda</name>
+            <value>0.0005</value>
+        </property>
+        <property>
+            <name>grappa_calib_over_determine_ratio</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>grappa_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_SPIRIT -->
+        <property>
+            <name>spirit_kSize_RO</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E1</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>spirit_reg_lamda</name>
+            <value>0.005</value>
+        </property>
+        <property>
+            <name>spirit_use_gpu</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_calib_over_determine_ratio</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>spirit_solve_symmetric</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_iter_max</name>
+            <value>90</value>
+        </property>
+        <property>
+            <name>spirit_iter_thres</name>
+            <value>0.0015</value>
+        </property>
+        <property>
+            <name>spirit_print_iter</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_L1SPIRIT -->
+        <property>
+            <name>spirit_perform_linear</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_perform_nonlinear</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_parallel_imaging_lamda</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_image_reg_lamda</name>
+            <value>0.0025</value>
+        </property>
+        <property>
+            <name>spirit_data_fidelity_lamda</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>spirit_ncg_iter_max</name>
+            <value>10</value>
+        </property>
+        <property>
+            <name>spirit_ncg_iter_thres</name>
+            <value>0.0001</value>
+        </property>
+        <property>
+            <name>spirit_ncg_print_iter</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_use_coil_sen_map</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_use_moco_enhancement</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_recon_moco_images</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_RO_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_E1_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_E2_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_temporal_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_2D_scale_per_chunk</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for scaling and image sending -->
+        <property>
+            <name>min_intensity_value</name>
+            <value>64</value>
+        </property>
+
+        <property>
+            <name>max_intensity_value</name>
+            <value>4095</value>
+        </property>
+
+        <property>
+            <name>scalingFactor</name>
+            <value>-1.0</value>
+        </property>
+
+        <property>
+            <name>use_constant_scalingFactor</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for kspace filter, image data -->
+        <property>
+            <name>filterRO</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterRO_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE1</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE1_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE2</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE2_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, ref data -->
+        <property>
+            <name>filterRefRO</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE1</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE2</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
+        <property>
+            <name>filterPartialFourierRO</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE1</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE2</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_HOMODYNE, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
+        <property>
+            <name>partialFourier_algo</name>
+            <value>ISMRMRD_PF_FENGHUANG</value>
+        </property>
+
+        <!-- parameters for partial fourier homodyne algorithm -->
+        <property>
+            <name>partialFourier_homodyne_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_densityComp</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for partial fourier POCS algorithm -->
+        <property>
+            <name>partialFourier_POCS_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for partial fourier FengHuang algorithm -->
+        <property>
+            <name>partialFourier_FengHuang_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E1</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_thresReg</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_sameKernel_allN</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for debug and timing -->
+        <property>
+            <name>debugFolder</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>debugFolder2</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>cloudNodeFile</name>
+            <value>myCloud_2DT.txt</value>
+        </property>
+
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for system acquisition -->
+        <property>
+            <name>timeStampResolution</name>
+            <value>0.0025</value>
+        </property>
+
+        <!-- parameters for recon job split -->
+        <property>
+            <name>job_split_by_S</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>job_num_of_N</name>
+            <value>32</value>
+        </property>
+        <property>
+            <name>job_max_Megabytes</name>
+            <value>10240</value>
+        </property>
+        <property>
+            <name>job_overlap</name>
+            <value>2</value>
+        </property>
+        <property>
+            <name>job_perform_on_control_node</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for the cloud computation 
+             The cloud should be defined as the following: CloudNodeX_IP/Port/XMLConfiguration etc.
+        -->
+        <property>
+            <name>CloudComputing</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>CloudSize</name>
+            <value>1</value>
+        </property>
+
+        <!-- node 0 -->
+        <property>
+            <name>CloudNode0_IP</name>
+            <value>localhost</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_Port</name>
+            <value>9003</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_XMLConfiguration</name>
+            <value>GtProg_2DT_Cartesian_CloudNode.xml</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_ComputingPowerIndex</name>
+            <value>1</value>
+        </property>
+
+    </gadget>
+
+    <!-- Image recon processing -->
+    <gadget>
+        <name>ImageAcc</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusAccumulatorImageTriggerGadget</classname>
+
+        <!-- triggered dimensions -->
+        <property> <name>TriggerChannel</name>      <value>false</value> </property>
+        <property> <name>TriggerSlice</name>        <value>false</value> </property>
+        <property> <name>TriggerE2</name>           <value>false</value> </property>
+        <property> <name>TriggerContrast</name>     <value>false</value> </property>
+        <property> <name>TriggerPhase</name>        <value>true</value> </property>
+        <property> <name>TriggerRepetition</name>   <value>false</value> </property>
+        <property> <name>TriggerSet</name>          <value>false</value> </property>
+
+        <!-- work flow -->
+        <property> <name>PassImageImmediately</name> <value>false</value> </property>
+
+        <!-- debug and info mode -->
+        <property> <name>verboseMode</name> <value>true</value> </property>
+    </gadget>
+
+    <!-- after recon processing -->
+    <gadget>
+        <name>ComplexToFloatAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ComplexToFloatAttribGadget</classname>
+    </gadget>
+
+    <gadget>
+        <name>FloatToShortAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>FloatToUShortAttribGadget</classname>
+    </gadget>
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribCPLX</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetCPLX</classname>
+    </gadget>
+    -->
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribFLOAT</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetFLOAT</classname>
+    </gadget>
+    -->
+
+    <gadget>
+        <name>DicomFinishAttribGadget</name>
+        <dll>gadgetron_dicom</dll>
+        <classname>DicomFinishAttribGadgetUSHORT</classname>
+    </gadget>
+
+</gadgetronStreamConfiguration>
diff --git a/gadgets/gtPlus/config/GT_2DT_Cartesian_L1SPIRIT.xml b/gadgets/gtPlus/config/GT_2DT_Cartesian_L1SPIRIT.xml
new file mode 100644
index 0000000..7dd1e3d
--- /dev/null
+++ b/gadgets/gtPlus/config/GT_2DT_Cartesian_L1SPIRIT.xml
@@ -0,0 +1,836 @@
+<?xml version="1.0" encoding="utf-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+    <!--
+       _____              _____     _____   ______   _______   _____     ____    _   _ 
+      / ____|     /\     |  __ \   / ____| |  ____| |__   __| |  __ \   / __ \  | \ | |
+     | |  __     /  \    | |  | | | |  __  | |__       | |    | |__) | | |  | | |  \| |
+     | | |_ |   / /\ \   | |  | | | | |_ | |  __|      | |    |  _  /  | |  | | | . ` |
+     | |__| |  / ____ \  | |__| | | |__| | | |____     | |    | | \ \  | |__| | | |\  |
+      \_____| /_/    \_\ |_____/   \_____| |______|    |_|    |_|  \_\  \____/  |_| \_|
+                                                                                       
+    -->
+
+    <!-- 
+        GT Plus configuratin file for general 2D cartesian reconstruction using L1 SPIRIT
+        The GtPlus cloud computing can be turned on in this configuration file
+
+        Author: Hui Xue
+        Magnetic Resonance Technology Program
+        National Heart, Lung and Blood Institute
+        National Institutes of Health
+        10 Center Drive, Bethesda
+        MD 20814
+        USA
+        Email: hui.xue at nih.gov
+    -->
+
+    <!-- reader -->
+    <reader>
+        <slot>1008</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+    </reader>
+
+    <!-- writer -->
+    <writer>
+        <slot>1004</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1005</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1006</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterUSHORT</classname>
+    </writer>
+
+    <writer>
+        <slot>1015</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1016</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1017</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterUSHORT</classname>
+    </writer>
+
+    <!-- RO asymmetric echo handling -->
+    <gadget>
+        <name>AsymmetricEcho</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>AsymmetricEchoAdjustROGadget</classname>
+    </gadget>
+
+    <!-- RO oversampling removal -->
+    <gadget>
+        <name>RemoveROOversampling</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>RemoveROOversamplingGadget</classname>
+
+        <property>
+            <name>constant_noise_variance</name>
+            <value>true</value>
+        </property>
+    </gadget>
+
+    <!-- Noise prewhitening -->
+    <gadget>
+        <name>NoiseAdjust</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>NoiseAdjustGadget</classname>
+    </gadget>
+
+    <!-- Data accumulation and trigger gadget -->
+    <gadget>
+        <name>Acc</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
+
+        <!-- debug and info mode -->
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>noacceleration_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>noacceleration_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>noacceleration_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>interleaved_triggerDim2</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <property>
+            <name>interleaved_numOfKSpace_triggerDim1</name>
+            <value>8</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
+        <property>
+            <name>other_kspace_matching_Dim</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+    </gadget>
+
+    <!-- Recon computation for 2DT cases -->
+    <gadget>
+        <name>Recon</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusRecon2DTGadget</classname>
+
+        <!-- kspace data -->
+        <property>
+            <name>dim_4th</name>
+            <value>DIM_NONE</value>
+        </property>
+        <property>
+            <name>dim_5th</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- work flow -->
+        <property>
+            <name>workOrder_ShareDim</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>no_acceleration_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>no_acceleration_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>interleaved_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>interleaved_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap_useHighestSignal</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>embedded_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_ref_fillback</name>
+            <value>true</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>separate_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- coil compression -->
+        <property>
+            <name>same_coil_compression_coeff_allS</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>downstream_coil_compression</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>coil_compression_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>coil_compression_num_modesKept</name>
+            <value>-1</value>
+        </property>
+
+        <!-- parameters for coil map estimation 
+            enum ISMRMRDCOILMAPALGO
+            {
+                ISMRMRD_SOUHEIL,
+                ISMRMRD_SOUHEIL_ITER
+            };
+        -->
+        <property>
+            <name>coil_map_algorithm</name>
+            <value>ISMRMRD_SOUHEIL</value>
+        </property>
+        <property>
+            <name>csm_kSize</name>
+            <value>7</value>
+        </property>
+
+        <property>
+            <name>csm_powermethod_num</name>
+            <value>3</value>
+        </property>
+
+        <property>
+            <name>csm_true_3D</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>csm_iter_num</name>
+            <value>5</value>
+        </property>
+
+        <property>
+            <name>csm_iter_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>csm_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- algorithm -->
+        <property>
+            <name>recon_algorithm</name>
+            <value>ISMRMRD_L1SPIRIT</value>
+        </property>
+
+        <property>
+            <name>recon_kspace_needed</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>recon_auto_parameters</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_GRAPPA -->
+        <property>
+            <name>grappa_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E1</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E2</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_reg_lamda</name>
+            <value>0.0005</value>
+        </property>
+        <property>
+            <name>grappa_calib_over_determine_ratio</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>grappa_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_SPIRIT -->
+        <property>
+            <name>spirit_kSize_RO</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E1</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>spirit_reg_lamda</name>
+            <value>0.005</value>
+        </property>
+        <property>
+            <name>spirit_use_gpu</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_calib_over_determine_ratio</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>spirit_solve_symmetric</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_iter_max</name>
+            <value>90</value>
+        </property>
+        <property>
+            <name>spirit_iter_thres</name>
+            <value>0.0015</value>
+        </property>
+        <property>
+            <name>spirit_print_iter</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_L1SPIRIT -->
+        <property>
+            <name>spirit_perform_linear</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_perform_nonlinear</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_parallel_imaging_lamda</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_image_reg_lamda</name>
+            <value>0.0025</value>
+        </property>
+        <property>
+            <name>spirit_data_fidelity_lamda</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>spirit_ncg_iter_max</name>
+            <value>10</value>
+        </property>
+        <property>
+            <name>spirit_ncg_iter_thres</name>
+            <value>0.0001</value>
+        </property>
+        <property>
+            <name>spirit_ncg_print_iter</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_use_coil_sen_map</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_use_moco_enhancement</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_recon_moco_images</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_RO_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_E1_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_E2_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_temporal_enhancement_ratio</name>
+            <value>20.0</value>
+        </property>
+        <property>
+            <name>spirit_2D_scale_per_chunk</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for scaling and image sending -->
+        <property>
+            <name>min_intensity_value</name>
+            <value>64</value>
+        </property>
+
+        <property>
+            <name>max_intensity_value</name>
+            <value>4095</value>
+        </property>
+
+        <property>
+            <name>scalingFactor</name>
+            <value>-1.0</value>
+        </property>
+
+        <property>
+            <name>use_constant_scalingFactor</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for kspace filter, image data -->
+        <property>
+            <name>filterRO</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterRO_sigma</name>
+            <value>0.5</value>
+        </property>
+        <property>
+            <name>filterRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE1</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE1_sigma</name>
+            <value>0.5</value>
+        </property>
+        <property>
+            <name>filterE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE2</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE2_sigma</name>
+            <value>0.5</value>
+        </property>
+        <property>
+            <name>filterE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, ref data -->
+        <property>
+            <name>filterRefRO</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE1</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE2</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
+        <property>
+            <name>filterPartialFourierRO</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE1</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE2</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_HOMODYNE, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
+        <property>
+            <name>partialFourier_algo</name>
+            <value>ISMRMRD_PF_ZEROFILLING_FILTER</value>
+        </property>
+
+        <!-- parameters for partial fourier homodyne algorithm -->
+        <property>
+            <name>partialFourier_homodyne_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_densityComp</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for partial fourier POCS algorithm -->
+        <property>
+            <name>partialFourier_POCS_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for partial fourier FengHuang algorithm -->
+        <property>
+            <name>partialFourier_FengHuang_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E1</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_thresReg</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_sameKernel_allN</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for debug and timing -->
+        <property>
+            <name>debugFolder</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>debugFolder2</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>cloudNodeFile</name>
+            <value>myCloud_2DT.txt</value>
+        </property>
+
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for system acquisition -->
+        <property>
+            <name>timeStampResolution</name>
+            <value>0.0025</value>
+        </property>
+
+        <!-- parameters for recon job split -->
+        <property>
+            <name>job_split_by_S</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>job_num_of_N</name>
+            <value>64</value>
+        </property>
+        <property>
+            <name>job_max_Megabytes</name>
+            <value>8192</value>
+        </property>
+        <property>
+            <name>job_overlap</name>
+            <value>2</value>
+        </property>
+        <property>
+            <name>job_perform_on_control_node</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for the cloud computation 
+             The cloud should be defined as the following: CloudNodeX_IP/Port/XMLConfiguration etc.
+        -->
+        <property>
+            <name>CloudComputing</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>CloudSize</name>
+            <value>1</value>
+        </property>
+
+        <!-- node 0 -->
+        <property>
+            <name>CloudNode0_IP</name>
+            <value>localhost</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_Port</name>
+            <value>9003</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_XMLConfiguration</name>
+            <value>GtProg_2DT_Cartesian_CloudNode.xml</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_ComputingPowerIndex</name>
+            <value>1</value>
+        </property>
+
+    </gadget>
+
+    <!-- after recon processing -->
+    <gadget>
+        <name>ComplexToFloatAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ComplexToFloatAttribGadget</classname>
+    </gadget>
+
+    <gadget>
+        <name>FloatToShortAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>FloatToUShortAttribGadget</classname>
+    </gadget>
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribCPLX</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetCPLX</classname>
+    </gadget>
+    -->
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribFLOAT</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetFLOAT</classname>
+    </gadget>
+    -->
+
+    <gadget>
+        <name>ImageFinishAttribUSHORT</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ImageFinishAttribGadgetUSHORT</classname>
+    </gadget>
+
+</gadgetronStreamConfiguration>
diff --git a/gadgets/gtPlus/config/GT_2DT_Cartesian_PseudoReplica_SNRUnitRecon.xml b/gadgets/gtPlus/config/GT_2DT_Cartesian_PseudoReplica_SNRUnitRecon.xml
new file mode 100644
index 0000000..309589b
--- /dev/null
+++ b/gadgets/gtPlus/config/GT_2DT_Cartesian_PseudoReplica_SNRUnitRecon.xml
@@ -0,0 +1,810 @@
+<?xml version="1.0" encoding="utf-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+    <!--
+       _____              _____     _____   ______   _______   _____     ____    _   _ 
+      / ____|     /\     |  __ \   / ____| |  ____| |__   __| |  __ \   / __ \  | \ | |
+     | |  __     /  \    | |  | | | |  __  | |__       | |    | |__) | | |  | | |  \| |
+     | | |_ |   / /\ \   | |  | | | | |_ | |  __|      | |    |  _  /  | |  | | | . ` |
+     | |__| |  / ____ \  | |__| | | |__| | | |____     | |    | | \ \  | |__| | | |\  |
+      \_____| /_/    \_\ |_____/   \_____| |______|    |_|    |_|  \_\  \____/  |_| \_|
+                                                                                       
+    -->
+
+    <!-- 
+        GtProg_2DT_Cartesian_PseudoRplica_SNRUnitRecon.xml
+
+        GT Plus configuratin file for general 2D or 2D+T cartesian reconstruction with pseudo replica noise addon
+
+        Author: Hui Xue
+        Magnetic Resonance Technology Program
+        National Heart, Lung and Blood Institute
+        National Institutes of Health
+        10 Center Drive, Bethesda
+        MD 20814
+        USA
+        Email: hui.xue at nih.gov
+
+        This workflow is the second step of pseudo-replica SNR unit reconstruction.
+
+        The input data to the workflow is SNR unit scaled kspace data.
+    -->
+
+    <!-- reader -->
+    <reader>
+        <slot>1008</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+    </reader>
+
+    <!-- writer -->
+    <writer>
+        <slot>1004</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1005</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1006</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterUSHORT</classname>
+    </writer>
+
+    <writer>
+        <slot>1015</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1016</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1017</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterUSHORT</classname>
+    </writer>
+
+    <writer>
+        <slot>1012</slot>
+        <dll>gadgetron_dicom</dll>
+        <classname>DicomImageWriter</classname>
+    </writer>
+
+    <writer>
+        <slot>1018</slot>
+        <dll>gadgetron_dicom</dll>
+        <classname>DicomImageAttribWriter</classname>
+    </writer>
+
+    <!-- add pesudo white noise -->
+    <gadget>
+        <name>WhiteNoiseAdd</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>WhiteNoiseInjectorGadget</classname>
+
+        <!-- mean and standard deviation for the added white noise -->
+        <property>
+            <name>noise_mean</name>
+            <value>0.0</value>
+        </property>
+
+        <property>
+            <name>noise_std</name>
+            <value>1.0</value>
+        </property>
+
+        <!-- whether to add noise on seperate or external reference acquisitions -->
+        <property>
+            <name>add_noise_ref</name>
+            <value>false</value>
+        </property>
+    </gadget>
+
+    <!-- RO asymmetric echo handling -->
+    <gadget>
+        <name>AsymmetricEcho</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>AsymmetricEchoAdjustROGadget</classname>
+    </gadget>
+
+    <!-- RO oversampling removal -->
+    <gadget>
+        <name>RemoveROOversampling</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>RemoveROOversamplingGadget</classname>
+
+        <property>
+            <name>constant_noise_variance</name>
+            <value>true</value>
+        </property>
+    </gadget>
+
+    <!-- Data accumulation and trigger gadget -->
+    <gadget>
+        <name>Acc</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
+
+        <!-- debug and info mode -->
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>noacceleration_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>noacceleration_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>noacceleration_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>interleaved_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>interleaved_numOfKSpace_triggerDim1</name>
+            <value>4</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
+        <property>
+            <name>other_kspace_matching_Dim</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+    </gadget>
+
+    <!--Recon computation for 2DT cases -->
+    <gadget>
+        <name>Recon</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusRecon2DTGadget</classname>
+
+        <!-- kspace data -->
+        <property>
+            <name>dim_4th</name>
+            <value>DIM_NONE</value>
+        </property>
+        <property>
+            <name>dim_5th</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- work flow -->
+        <property>
+            <name>workOrder_ShareDim</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>no_acceleration_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>no_acceleration_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>interleaved_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>interleaved_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap_useHighestSignal</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>embedded_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_ref_fillback</name>
+            <value>false</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>separate_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- coil compression -->
+        <property>
+            <name>same_coil_compression_coeff_allS</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>upstream_coil_compression</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>upstream_coil_compression_thres</name>
+            <value>-1</value>
+        </property>
+
+        <property>
+            <name>upstream_coil_compression_num_modesKept</name>
+            <value>-1</value>
+        </property>
+
+        <property>
+            <name>downstream_coil_compression</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>coil_compression_thres</name>
+            <value>-1</value>
+        </property>
+
+        <property>
+            <name>coil_compression_num_modesKept</name>
+            <value>-1</value>
+        </property>
+
+        <!-- parameters for coil map estimation, ISMRMRD_SOUHEIL, ISMRMRD_SOUHEIL_ITER -->
+        <property>
+            <name>coil_map_algorithm</name>
+            <value>ISMRMRD_SOUHEIL</value>
+        </property>
+        <property>
+            <name>csm_kSize</name>
+            <value>7</value>
+        </property>
+
+        <property>
+            <name>csm_powermethod_num</name>
+            <value>3</value>
+        </property>
+
+        <property>
+            <name>csm_true_3D</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>csm_iter_num</name>
+            <value>5</value>
+        </property>
+
+        <property>
+            <name>csm_iter_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>csm_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- algorithm -->
+        <property>
+            <name>recon_algorithm</name>
+            <value>ISMRMRD_GRAPPA</value>
+        </property>
+
+        <property>
+            <name>recon_kspace_needed</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>recon_auto_parameters</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>gfactor_needed</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>wrap_around_map_needed</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_GRAPPA -->
+        <property>
+            <name>grappa_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E1</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E2</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_reg_lamda</name>
+            <value>0.0005</value>
+        </property>
+        <property>
+            <name>grappa_calib_over_determine_ratio</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>grappa_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for scaling and image sending -->
+        <property>
+            <name>min_intensity_value</name>
+            <value>64</value>
+        </property>
+
+        <property>
+            <name>max_intensity_value</name>
+            <value>4095</value>
+        </property>
+
+        <property>
+            <name>scalingFactor</name>
+            <value>100</value>
+        </property>
+
+        <property>
+            <name>scalingFactor_gfactor</name>
+            <value>1000</value>
+        </property>
+
+        <property>
+            <name>use_constant_scalingFactor</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for kspace filter, image data -->
+        <!-- 
+             ISMRMRD_FILTER_GAUSSIAN,
+             ISMRMRD_FILTER_HANNING,
+             ISMRMRD_FILTER_TUKEY,
+             ISMRMRD_FILTER_TAPERED_HANNING,
+             ISMRMRD_FILTER_NONE 
+        -->
+        <property>
+            <name>filterRO</name>
+            <value>ISMRMRD_FILTER_NONE</value>
+        </property>
+        <property>
+            <name>filterRO_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE1</name>
+            <value>ISMRMRD_FILTER_NONE</value>
+        </property>
+        <property>
+            <name>filterE1_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE2</name>
+            <value>ISMRMRD_FILTER_NONE</value>
+        </property>
+        <property>
+            <name>filterE2_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, ref data -->
+        <!-- 
+             ISMRMRD_FILTER_GAUSSIAN,
+             ISMRMRD_FILTER_HANNING,
+             ISMRMRD_FILTER_TUKEY,
+             ISMRMRD_FILTER_TAPERED_HANNING,
+             ISMRMRD_FILTER_NONE 
+        -->
+        <property>
+            <name>filterRefRO</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE1</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE2</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
+        <!-- only ISMRMRD_FILTER_TAPERED_HANNING is available for the moment -->
+        <property>
+            <name>filterPartialFourierRO</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_densityComp</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE1</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_densityComp</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE2</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_densityComp</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_HOMODYNE, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
+        <property>
+            <name>partialFourier_algo</name>
+            <value>ISMRMRD_PF_ZEROFILLING_FILTER</value>
+        </property>
+
+        <!-- parameters for partial fourier homodyne algorithm -->
+        <property>
+            <name>partialFourier_homodyne_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_densityComp</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for partial fourier POCS algorithm -->
+        <property>
+            <name>partialFourier_POCS_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for partial fourier FengHuang algorithm -->
+        <property>
+            <name>partialFourier_FengHuang_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E1</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_thresReg</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_sameKernel_allN</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for debug and timing -->
+        <property>
+            <name>debugFolder</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>debugFolder2</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>cloudNodeFile</name>
+            <value>myCloud_2DT.txt</value>
+        </property>
+
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>verboseMode</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for system acquisition -->
+        <property>
+            <name>timeStampResolution</name>
+            <value>0.0025</value>
+        </property>
+
+        <!-- parameters for recon job split -->
+        <property>
+            <name>job_split_by_S</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>job_num_of_N</name>
+            <value>32</value>
+        </property>
+        <property>
+            <name>job_max_Megabytes</name>
+            <value>10240</value>
+        </property>
+        <property>
+            <name>job_overlap</name>
+            <value>2</value>
+        </property>
+        <property>
+            <name>job_perform_on_control_node</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for the cloud computation 
+             The cloud should be defined as the following: CloudNodeX_IP/Port/XMLConfiguration etc.
+        -->
+        <property>
+            <name>CloudComputing</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>CloudSize</name>
+            <value>1</value>
+        </property>
+
+        <!-- node 0 -->
+        <property>
+            <name>CloudNode0_IP</name>
+            <value>localhost</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_Port</name>
+            <value>9003</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_XMLConfiguration</name>
+            <value>GtProg_2DT_Cartesian_CloudNode.xml</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_ComputingPowerIndex</name>
+            <value>1</value>
+        </property>
+
+    </gadget>
+
+    <!-- after recon processing -->
+    <!--
+    <gadget>
+        <name>ComplexToFloatAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ComplexToFloatAttribGadget</classname>
+    </gadget>
+
+    <gadget>
+        <name>FloatToShortAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>FloatToUShortAttribGadget</classname>
+
+        <property>
+            <name>max_intensity</name>
+            <value>32767</value>
+        </property>
+
+        <property>
+            <name>intensity_offset</name>
+            <value>16384</value>
+        </property>
+    </gadget>
+    -->
+
+    <gadget>
+      <name>ImageFinishAttribCPLX</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetCPLX</classname>
+    </gadget>
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribFLOAT</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetFLOAT</classname>
+    </gadget>
+
+    <gadget>
+        <name>DicomFinishAttribGadget</name>
+        <dll>gadgetron_dicom</dll>
+        <classname>DicomFinishAttribGadgetUSHORT</classname>
+    </gadget>
+    -->
+</gadgetronStreamConfiguration>
diff --git a/gadgets/gtPlus/config/GT_2DT_Cartesian_SPIRIT.xml b/gadgets/gtPlus/config/GT_2DT_Cartesian_SPIRIT.xml
new file mode 100644
index 0000000..8b552b2
--- /dev/null
+++ b/gadgets/gtPlus/config/GT_2DT_Cartesian_SPIRIT.xml
@@ -0,0 +1,836 @@
+<?xml version="1.0" encoding="utf-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+    <!--
+       _____              _____     _____   ______   _______   _____     ____    _   _ 
+      / ____|     /\     |  __ \   / ____| |  ____| |__   __| |  __ \   / __ \  | \ | |
+     | |  __     /  \    | |  | | | |  __  | |__       | |    | |__) | | |  | | |  \| |
+     | | |_ |   / /\ \   | |  | | | | |_ | |  __|      | |    |  _  /  | |  | | | . ` |
+     | |__| |  / ____ \  | |__| | | |__| | | |____     | |    | | \ \  | |__| | | |\  |
+      \_____| /_/    \_\ |_____/   \_____| |______|    |_|    |_|  \_\  \____/  |_| \_|
+                                                                                       
+    -->
+
+    <!-- 
+        GT Plus configuratin file for general 2D cartesian reconstruction using linear SPIRIT
+        The GtPlus cloud computing can be turned on in this configuration file
+
+        Author: Hui Xue
+        Magnetic Resonance Technology Program
+        National Heart, Lung and Blood Institute
+        National Institutes of Health
+        10 Center Drive, Bethesda
+        MD 20814
+        USA
+        Email: hui.xue at nih.gov
+    -->
+
+    <!-- reader -->
+    <reader>
+        <slot>1008</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+    </reader>
+
+    <!-- writer -->
+    <writer>
+        <slot>1004</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1005</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1006</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterUSHORT</classname>
+    </writer>
+
+    <writer>
+        <slot>1015</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1016</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1017</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterUSHORT</classname>
+    </writer>
+
+    <!-- RO asymmetric echo handling -->
+    <gadget>
+        <name>AsymmetricEcho</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>AsymmetricEchoAdjustROGadget</classname>
+    </gadget>
+
+    <!-- RO oversampling removal -->
+    <gadget>
+        <name>RemoveROOversampling</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>RemoveROOversamplingGadget</classname>
+
+        <property>
+            <name>constant_noise_variance</name>
+            <value>true</value>
+        </property>
+    </gadget>
+
+    <!-- Noise prewhitening -->
+    <gadget>
+        <name>NoiseAdjust</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>NoiseAdjustGadget</classname>
+    </gadget>
+
+    <!-- Data accumulation and trigger gadget -->
+    <gadget>
+        <name>Acc</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
+
+        <!-- debug and info mode -->
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>noacceleration_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>noacceleration_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>noacceleration_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>interleaved_triggerDim2</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <property>
+            <name>interleaved_numOfKSpace_triggerDim1</name>
+            <value>8</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
+        <property>
+            <name>other_kspace_matching_Dim</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+    </gadget>
+
+    <!-- Recon computation for 2DT cases -->
+    <gadget>
+        <name>Recon</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusRecon2DTGadget</classname>
+
+        <!-- kspace data -->
+        <property>
+            <name>dim_4th</name>
+            <value>DIM_NONE</value>
+        </property>
+        <property>
+            <name>dim_5th</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- work flow -->
+        <property>
+            <name>workOrder_ShareDim</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>no_acceleration_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>no_acceleration_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>interleaved_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>interleaved_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap_useHighestSignal</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>embedded_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_ref_fillback</name>
+            <value>true</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>separate_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- coil compression -->
+        <property>
+            <name>same_coil_compression_coeff_allS</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>downstream_coil_compression</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>coil_compression_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>coil_compression_num_modesKept</name>
+            <value>-1</value>
+        </property>
+
+        <!-- parameters for coil map estimation 
+            enum ISMRMRDCOILMAPALGO
+            {
+                ISMRMRD_SOUHEIL,
+                ISMRMRD_SOUHEIL_ITER
+            };
+        -->
+        <property>
+            <name>coil_map_algorithm</name>
+            <value>ISMRMRD_SOUHEIL</value>
+        </property>
+        <property>
+            <name>csm_kSize</name>
+            <value>7</value>
+        </property>
+
+        <property>
+            <name>csm_powermethod_num</name>
+            <value>3</value>
+        </property>
+
+        <property>
+            <name>csm_true_3D</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>csm_iter_num</name>
+            <value>5</value>
+        </property>
+
+        <property>
+            <name>csm_iter_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>csm_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- algorithm -->
+        <property>
+            <name>recon_algorithm</name>
+            <value>ISMRMRD_SPIRIT</value>
+        </property>
+
+        <property>
+            <name>recon_kspace_needed</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>recon_auto_parameters</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_GRAPPA -->
+        <property>
+            <name>grappa_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E1</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E2</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_reg_lamda</name>
+            <value>0.0005</value>
+        </property>
+        <property>
+            <name>grappa_calib_over_determine_ratio</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>grappa_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_SPIRIT -->
+        <property>
+            <name>spirit_kSize_RO</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E1</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>spirit_reg_lamda</name>
+            <value>0.005</value>
+        </property>
+        <property>
+            <name>spirit_use_gpu</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_calib_over_determine_ratio</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>spirit_solve_symmetric</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_iter_max</name>
+            <value>90</value>
+        </property>
+        <property>
+            <name>spirit_iter_thres</name>
+            <value>0.0015</value>
+        </property>
+        <property>
+            <name>spirit_print_iter</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_L1SPIRIT -->
+        <property>
+            <name>spirit_perform_linear</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_perform_nonlinear</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_parallel_imaging_lamda</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_image_reg_lamda</name>
+            <value>0.0025</value>
+        </property>
+        <property>
+            <name>spirit_data_fidelity_lamda</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>spirit_ncg_iter_max</name>
+            <value>10</value>
+        </property>
+        <property>
+            <name>spirit_ncg_iter_thres</name>
+            <value>0.0001</value>
+        </property>
+        <property>
+            <name>spirit_ncg_print_iter</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_use_coil_sen_map</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_use_moco_enhancement</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_recon_moco_images</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_RO_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_E1_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_E2_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_temporal_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_2D_scale_per_chunk</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for scaling and image sending -->
+        <property>
+            <name>min_intensity_value</name>
+            <value>64</value>
+        </property>
+
+        <property>
+            <name>max_intensity_value</name>
+            <value>4095</value>
+        </property>
+
+        <property>
+            <name>scalingFactor</name>
+            <value>-1.0</value>
+        </property>
+
+        <property>
+            <name>use_constant_scalingFactor</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for kspace filter, image data -->
+        <property>
+            <name>filterRO</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterRO_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE1</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE1_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE2</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE2_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, ref data -->
+        <property>
+            <name>filterRefRO</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE1</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE2</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
+        <property>
+            <name>filterPartialFourierRO</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE1</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE2</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_HOMODYNE, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
+        <property>
+            <name>partialFourier_algo</name>
+            <value>ISMRMRD_PF_FENGHUANG</value>
+        </property>
+
+        <!-- parameters for partial fourier homodyne algorithm -->
+        <property>
+            <name>partialFourier_homodyne_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_densityComp</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for partial fourier POCS algorithm -->
+        <property>
+            <name>partialFourier_POCS_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for partial fourier FengHuang algorithm -->
+        <property>
+            <name>partialFourier_FengHuang_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E1</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_thresReg</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_sameKernel_allN</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for debug and timing -->
+        <property>
+            <name>debugFolder</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>debugFolder2</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>cloudNodeFile</name>
+            <value>myCloud_2DT.txt</value>
+        </property>
+
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for system acquisition -->
+        <property>
+            <name>timeStampResolution</name>
+            <value>0.0025</value>
+        </property>
+
+        <!-- parameters for recon job split -->
+        <property>
+            <name>job_split_by_S</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>job_num_of_N</name>
+            <value>64</value>
+        </property>
+        <property>
+            <name>job_max_Megabytes</name>
+            <value>8192</value>
+        </property>
+        <property>
+            <name>job_overlap</name>
+            <value>1</value>
+        </property>
+        <property>
+            <name>job_perform_on_control_node</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for the cloud computation 
+             The cloud should be defined as the following: CloudNodeX_IP/Port/XMLConfiguration etc.
+        -->
+        <property>
+            <name>CloudComputing</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>CloudSize</name>
+            <value>1</value>
+        </property>
+
+        <!-- node 0 -->
+        <property>
+            <name>CloudNode0_IP</name>
+            <value>localhost</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_Port</name>
+            <value>9003</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_XMLConfiguration</name>
+            <value>GtProg_2DT_Cartesian_CloudNode.xml</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_ComputingPowerIndex</name>
+            <value>1</value>
+        </property>
+
+    </gadget>
+
+    <!-- after recon processing -->
+    <gadget>
+        <name>ComplexToFloatAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ComplexToFloatAttribGadget</classname>
+    </gadget>
+
+    <gadget>
+        <name>FloatToShortAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>FloatToUShortAttribGadget</classname>
+    </gadget>
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribCPLX</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetCPLX</classname>
+    </gadget>
+    -->
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribFLOAT</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetFLOAT</classname>
+    </gadget>
+    -->
+
+    <gadget>
+        <name>ImageFinishAttribUSHORT</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ImageFinishAttribGadgetUSHORT</classname>
+    </gadget>
+
+</gadgetronStreamConfiguration>
diff --git a/gadgets/gtPlus/config/GT_2DT_Cartesian_SingleLayer_CloudNode.xml b/gadgets/gtPlus/config/GT_2DT_Cartesian_SingleLayer_CloudNode.xml
new file mode 100644
index 0000000..e35e3ea
--- /dev/null
+++ b/gadgets/gtPlus/config/GT_2DT_Cartesian_SingleLayer_CloudNode.xml
@@ -0,0 +1,279 @@
+<?xml version="1.0" encoding="utf-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+    <!--
+       _____              _____     _____   ______   _______   _____     ____    _   _ 
+      / ____|     /\     |  __ \   / ____| |  ____| |__   __| |  __ \   / __ \  | \ | |
+     | |  __     /  \    | |  | | | |  __  | |__       | |    | |__) | | |  | | |  \| |
+     | | |_ |   / /\ \   | |  | | | | |_ | |  __|      | |    |  _  /  | |  | | | . ` |
+     | |__| |  / ____ \  | |__| | | |__| | | |____     | |    | | \ \  | |__| | | |\  |
+      \_____| /_/    \_\ |_____/   \_____| |______|    |_|    |_|  \_\  \____/  |_| \_|
+                                                                                       
+    -->
+
+    <!-- 
+        GT Plus configuratin file for general 2D cartesian reconstruction using linear or non-linear SPIRIT
+        The dual-layer cloud topology is used here, therefore every incoming SLICE is sent
+        to one first layer GtPlus cloud node. This first layer node can further split the job and
+        process the SLICE with one or more second layer nodes.
+
+        This configuration file is for the first layer GtPlus cloud node.
+
+        Author: Hui Xue
+        Magnetic Resonance Technology Program
+        National Heart, Lung and Blood Institute
+        National Institutes of Health
+        10 Center Drive, Bethesda
+        MD 20814
+        USA
+        Email: hui.xue at nih.gov
+
+        Ref to: 
+
+        Hui Xue, Souheil Inati, Thomas Sangild Sorensen, Peter Kellman, Michael S. Hansen. 
+        Distributed MRI Reconstruction using Gadgetron based Cloud Computing. 
+        Magenetic Resonance in Medicine, doi: 10.1002/mrm.25213.
+    -->
+
+    <!-- reader -->
+    <reader>
+        <slot>1014</slot>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlus2DTGadgetCloudJobMessageReaderCPFL</classname>
+    </reader>
+
+    <!-- writer -->
+    <writer>
+        <slot>1014</slot>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlus2DTGadgetCloudJobMessageWriterCPFL</classname>
+    </writer>
+
+    <!--
+    Recon computation for 2DT cases
+    -->
+
+    <gadget>
+        <name>Recon</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusReconJob2DTGadgetCloud</classname>
+
+        <!-- parameters for kspace filter, image data -->
+        <property>
+            <name>filterRO</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterRO_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE1</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE1_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE2</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE2_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, ref data -->
+        <property>
+            <name>filterRefRO</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE1</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE2</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
+        <property>
+            <name>filterPartialFourierRO</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE1</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE2</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for recon job split -->
+        <property>
+            <name>job_split_by_S</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>job_num_of_N</name>
+            <value>48</value>
+        </property>
+        <property>
+            <name>job_max_Megabytes</name>
+            <value>2048</value>
+        </property>
+        <property>
+            <name>job_overlap</name>
+            <value>1</value>
+        </property>
+        <property>
+            <name>job_perform_on_control_node</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for the cloud computation 
+             The cloud should be defined as the following: CloudNodeX_IP/Port/XMLConfiguration etc.
+        -->
+        <property>
+            <name>CloudComputing</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>CloudSize</name>
+            <value>1</value>
+        </property>
+
+        <!-- node 0 -->
+        <property>
+            <name>CloudNode0_IP</name>
+            <value>localhost</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_Port</name>
+            <value>9004</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_XMLConfiguration</name>
+            <value>GtProg_2DT_Cartesian_CloudNode.xml</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_ComputingPowerIndex</name>
+            <value>1</value>
+        </property>
+
+        <!-- parameters for debug and timing -->
+        <property>
+            <name>debugFolder</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>debugFolder2</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>cloudNodeFile</name>
+            <value>myCloud_2DT_DualLayer_FirstLayer.txt</value>
+        </property>
+
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>verboseMode</name>
+            <value>true</value>
+        </property>
+
+    </gadget>
+
+</gadgetronStreamConfiguration>
diff --git a/gadgets/gtPlus/config/GT_2DT_FatWater.xml b/gadgets/gtPlus/config/GT_2DT_FatWater.xml
new file mode 100644
index 0000000..062d0e8
--- /dev/null
+++ b/gadgets/gtPlus/config/GT_2DT_FatWater.xml
@@ -0,0 +1,696 @@
+<?xml version="1.0" encoding="utf-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+    <!--
+       _____              _____     _____   ______   _______   _____     ____    _   _ 
+      / ____|     /\     |  __ \   / ____| |  ____| |__   __| |  __ \   / __ \  | \ | |
+     | |  __     /  \    | |  | | | |  __  | |__       | |    | |__) | | |  | | |  \| |
+     | | |_ |   / /\ \   | |  | | | | |_ | |  __|      | |    |  _  /  | |  | | | . ` |
+     | |__| |  / ____ \  | |__| | | |__| | | |____     | |    | | \ \  | |__| | | |\  |
+      \_____| /_/    \_\ |_____/   \_____| |______|    |_|    |_|  \_\  \____/  |_| \_|
+                                                                                       
+    -->
+
+    <!-- 
+        GT Plus configuratin file for 2DT cartesian reconstruction on fat water multi-contrast application
+
+        Author: Hui Xue
+        Magnetic Resonance Technology Program
+        National Heart, Lung and Blood Institute
+        National Institutes of Health
+        10 Center Drive, Bethesda
+        MD 20814
+        USA
+        Email: hui.xue at nih.gov
+    -->
+
+    <!-- reader -->
+    <reader>
+        <slot>1008</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+    </reader>
+
+    <!-- writer -->
+    <writer>
+        <slot>1004</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1005</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1006</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterUSHORT</classname>
+    </writer>
+
+    <writer>
+        <slot>1015</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1016</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1017</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterUSHORT</classname>
+    </writer>
+
+    <!-- RO asymmetric echo handling -->
+    <gadget>
+        <name>AsymmetricEcho</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>AsymmetricEchoAdjustROGadget</classname>
+    </gadget>
+
+    <!-- Noise prewhitening -->
+    <gadget>
+        <name>NoiseAdjust</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>NoiseAdjustGadget</classname>
+    </gadget>
+
+    <!-- RO oversampling removal -->
+    <gadget>
+        <name>RemoveROOversampling</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>RemoveROOversamplingGadget</classname>
+
+        <property>
+            <name>constant_noise_variance</name>
+            <value>true</value>
+        </property>
+    </gadget>
+
+    <!-- Data accumulation and trigger gadget -->
+    <gadget>
+        <name>Acc</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
+
+        <!-- debug and info mode -->
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>noacceleration_triggerDim1</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+        <property>
+            <name>noacceleration_triggerDim2</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <property>
+            <name>noacceleration_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>interleaved_triggerDim2</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <property>
+            <name>interleaved_numOfKSpace_triggerDim1</name>
+            <value>4</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_triggerDim2</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <property>
+            <name>embedded_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_triggerDim2</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <property>
+            <name>separate_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
+        <property>
+            <name>other_kspace_matching_Dim</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+    </gadget>
+
+    <!-- Recon computation for 2DT cases -->
+    <gadget>
+        <name>Recon</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusRecon2DTGadget</classname>
+
+        <!-- kspace data -->
+        <property>
+            <name>dim_4th</name>
+            <value>DIM_Repetition</value>
+        </property>
+        <property>
+            <name>dim_5th</name>
+            <value>DIM_Contrast</value>
+        </property>
+
+        <!-- work flow -->
+        <property>
+            <name>workOrder_ShareDim</name>
+            <value>DIM_Set</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>no_acceleration_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>no_acceleration_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_whichS_combinationcoeff</name>
+            <value>1</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>interleaved_whichS_combinationcoeff</name>
+            <value>1</value>
+        </property>
+        <property>
+            <name>interleaved_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap_useHighestSignal</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>embedded_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_whichS_combinationcoeff</name>
+            <value>1</value>
+        </property>
+        <property>
+            <name>embedded_ref_fillback</name>
+            <value>true</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>separate_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_whichS_combinationcoeff</name>
+            <value>1</value>
+        </property>
+
+        <!-- coil compression -->
+        <property>
+            <name>same_coil_compression_coeff_allS</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>downstream_coil_compression</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>coil_compression_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>coil_compression_num_modesKept</name>
+            <value>8</value>
+        </property>
+
+        <!-- parameters for coil map estimation 
+            enum ISMRMRDCOILMAPALGO
+            {
+                ISMRMRD_SOUHEIL,
+                ISMRMRD_SOUHEIL_ITER
+            };
+        -->
+        <property>
+            <name>coil_map_algorithm</name>
+            <value>ISMRMRD_SOUHEIL</value>
+        </property>
+        <property>
+            <name>csm_kSize</name>
+            <value>7</value>
+        </property>
+
+        <property>
+            <name>csm_powermethod_num</name>
+            <value>3</value>
+        </property>
+
+        <property>
+            <name>csm_true_3D</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>csm_iter_num</name>
+            <value>5</value>
+        </property>
+
+        <property>
+            <name>csm_iter_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>csm_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- algorithm -->
+        <property>
+            <name>recon_algorithm</name>
+            <value>ISMRMRD_GRAPPA</value>
+        </property>
+
+        <property>
+            <name>recon_kspace_needed</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>gfactor_needed</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_GRAPPA -->
+        <property>
+            <name>grappa_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E1</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E2</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_reg_lamda</name>
+            <value>0.0005</value>
+        </property>
+        <property>
+            <name>grappa_calib_over_determine_ratio</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>grappa_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_SPIRIT -->
+        <property>
+            <name>spirit_kSize_RO</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E1</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>spirit_reg_lamda</name>
+            <value>0.005</value>
+        </property>
+        <property>
+            <name>spirit_use_gpu</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_iter_max</name>
+            <value>70</value>
+        </property>
+        <property>
+            <name>spirit_iter_thres</name>
+            <value>1e-5</value>
+        </property>
+
+        <!-- parameters for scaling and image sending -->
+        <property>
+            <name>min_intensity_value</name>
+            <value>64</value>
+        </property>
+
+        <property>
+            <name>max_intensity_value</name>
+            <value>4095</value>
+        </property>
+
+        <property>
+            <name>scalingFactor</name>
+            <value>-1.0</value>
+        </property>
+
+        <property>
+            <name>use_constant_scalingFactor</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for kspace filter, image data -->
+        <property>
+            <name>filterRO</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterRO_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE1</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE1_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE2</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE2_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, ref data -->
+        <property>
+            <name>filterRefRO</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE1</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE2</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
+        <property>
+            <name>filterPartialFourierRO</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE1</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE2</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_HOMODYNE, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
+        <property>
+            <name>partialFourier_algo</name>
+            <value>ISMRMRD_PF_FENGHUANG</value>
+        </property>
+
+        <!-- parameters for partial fourier homodyne algorithm -->
+        <property>
+            <name>partialFourier_homodyne_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_densityComp</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for partial fourier POCS algorithm -->
+        <property>
+            <name>partialFourier_POCS_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for partial fourier FengHuang algorithm -->
+        <property>
+            <name>partialFourier_FengHuang_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E1</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_thresReg</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_sameKernel_allN</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for debug and timing -->
+        <property>
+            <name>debugFolder</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>debugFolder2</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for system acquisition -->
+        <property>
+            <name>timeStampResolution</name>
+            <value>0.0025</value>
+        </property>
+
+    </gadget>
+
+    <!-- after recon processing -->
+    <gadget>
+        <name>ComplexToFloatAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ComplexToFloatAttribGadget</classname>
+    </gadget>
+
+    <gadget>
+        <name>FloatToShortAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>FloatToUShortAttribGadget</classname>
+    </gadget>
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribCPLX</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetCPLX</classname>
+    </gadget>
+    -->
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribFLOAT</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetFLOAT</classname>
+    </gadget>
+    -->
+
+    <gadget>
+        <name>ImageFinishAttribUSHORT</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ImageFinishAttribGadgetUSHORT</classname>
+    </gadget>
+
+</gadgetronStreamConfiguration>
diff --git a/gadgets/gtPlus/config/GT_2DT_HASTE.xml b/gadgets/gtPlus/config/GT_2DT_HASTE.xml
new file mode 100644
index 0000000..59425fc
--- /dev/null
+++ b/gadgets/gtPlus/config/GT_2DT_HASTE.xml
@@ -0,0 +1,815 @@
+<?xml version="1.0" encoding="utf-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+    <!--
+       _____              _____     _____   ______   _______   _____     ____    _   _ 
+      / ____|     /\     |  __ \   / ____| |  ____| |__   __| |  __ \   / __ \  | \ | |
+     | |  __     /  \    | |  | | | |  __  | |__       | |    | |__) | | |  | | |  \| |
+     | | |_ |   / /\ \   | |  | | | | |_ | |  __|      | |    |  _  /  | |  | | | . ` |
+     | |__| |  / ____ \  | |__| | | |__| | | |____     | |    | | \ \  | |__| | | |\  |
+      \_____| /_/    \_\ |_____/   \_____| |______|    |_|    |_|  \_\  \____/  |_| \_|
+                                                                                       
+    -->
+
+    <!-- 
+        GT Plus configuratin file for general 2D or 2D+T cartesian reconstruction
+
+        Author: Hui Xue
+        Magnetic Resonance Technology Program
+        National Heart, Lung and Blood Institute
+        National Institutes of Health
+        10 Center Drive, Bethesda
+        MD 20814
+        USA
+        Email: hui.xue at nih.gov
+    -->
+
+    <!-- reader -->
+    <reader>
+        <slot>1008</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+    </reader>
+
+    <!-- writer -->
+    <writer>
+        <slot>1004</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1005</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1006</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterUSHORT</classname>
+    </writer>
+
+    <writer>
+        <slot>1015</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1016</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1017</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterUSHORT</classname>
+    </writer>
+
+    <!--
+    <writer>
+        <slot>1012</slot>
+        <dll>gadgetron_dicom</dll>
+        <classname>DicomImageWriter</classname>
+    </writer>
+
+    <writer>
+        <slot>1018</slot>
+        <dll>gadgetron_dicom</dll>
+        <classname>DicomImageAttribWriter</classname>
+    </writer>
+    -->
+
+    <!-- RO asymmetric echo handling -->
+    <gadget>
+        <name>AsymmetricEcho</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>AsymmetricEchoAdjustROGadget</classname>
+    </gadget>
+
+    <!-- RO oversampling removal -->
+    <gadget>
+        <name>RemoveROOversampling</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>RemoveROOversamplingGadget</classname>
+
+        <property>
+            <name>constant_noise_variance</name>
+            <value>true</value>
+        </property>
+    </gadget>
+
+    <!-- Noise prewhitening -->
+    <gadget>
+        <name>NoiseAdjust</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>NoiseAdjustGadget</classname>
+
+        <!-- File prefix for stored noise prewhitener matrix -->
+        <property>
+            <name>noise_dependency_prefix</name>
+            <value>GadgetronNoisePreWhitener</value>
+        </property>
+
+        <!-- Preset noise dwell time; for noise dependency measurements -->
+        <property>
+            <name>noise_dwell_time_us_preset</name>
+            <value>5.0</value>
+        </property>
+
+        <!-- Whether to perform timing -->
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+    </gadget>
+
+    <!-- Data accumulation and trigger gadget -->
+    <gadget>
+        <name>Acc</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
+
+        <!-- debug and info mode -->
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>noacceleration_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>noacceleration_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>noacceleration_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>interleaved_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>interleaved_numOfKSpace_triggerDim1</name>
+            <value>4</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
+        <property>
+            <name>other_kspace_matching_Dim</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+    </gadget>
+
+    <!--Recon computation for 2DT cases -->
+    <gadget>
+        <name>Recon</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusRecon2DTGadget</classname>
+
+        <!-- kspace data -->
+        <property>
+            <name>dim_4th</name>
+            <value>DIM_Repetition</value>
+        </property>
+        <property>
+            <name>dim_5th</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <!-- work flow -->
+        <property>
+            <name>workOrder_ShareDim</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>no_acceleration_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>no_acceleration_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>no_acceleration_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>interleaved_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>interleaved_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap_useHighestSignal</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>embedded_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>embedded_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_ref_fillback</name>
+            <value>true</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>separate_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>separate_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- coil compression -->
+        <property>
+            <name>same_coil_compression_coeff_allS</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>upstream_coil_compression</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>upstream_coil_compression_thres</name>
+            <value>-1</value>
+        </property>
+
+        <property>
+            <name>upstream_coil_compression_num_modesKept</name>
+            <value>-1</value>
+        </property>
+
+        <property>
+            <name>downstream_coil_compression</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>coil_compression_thres</name>
+            <value>-1</value>
+        </property>
+
+        <property>
+            <name>coil_compression_num_modesKept</name>
+            <value>8</value>
+        </property>
+
+        <!-- parameters for coil map estimation, ISMRMRD_SOUHEIL, ISMRMRD_SOUHEIL_ITER -->
+        <property>
+            <name>coil_map_algorithm</name>
+            <value>ISMRMRD_SOUHEIL</value>
+        </property>
+        <property>
+            <name>csm_kSize</name>
+            <value>7</value>
+        </property>
+
+        <property>
+            <name>csm_powermethod_num</name>
+            <value>3</value>
+        </property>
+
+        <property>
+            <name>csm_true_3D</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>csm_iter_num</name>
+            <value>5</value>
+        </property>
+
+        <property>
+            <name>csm_iter_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>csm_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- algorithm -->
+        <property>
+            <name>recon_algorithm</name>
+            <value>ISMRMRD_GRAPPA</value>
+        </property>
+
+        <property>
+            <name>recon_kspace_needed</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>recon_auto_parameters</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>gfactor_needed</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_GRAPPA -->
+        <property>
+            <name>grappa_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E1</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E2</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_reg_lamda</name>
+            <value>0.0005</value>
+        </property>
+        <property>
+            <name>grappa_calib_over_determine_ratio</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>grappa_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for scaling and image sending -->
+        <property>
+            <name>min_intensity_value</name>
+            <value>64</value>
+        </property>
+
+        <property>
+            <name>max_intensity_value</name>
+            <value>4095</value>
+        </property>
+
+        <property>
+            <name>scalingFactor</name>
+            <value>10</value>
+        </property>
+
+        <property>
+            <name>scalingFactor_gfactor</name>
+            <value>100</value>
+        </property>
+
+        <property>
+            <name>scalingFactor_snr_image</name>
+            <value>10</value>
+        </property>
+
+        <property>
+            <name>scalingFactor_std_map</name>
+            <value>1000</value>
+        </property>
+
+        <property>
+            <name>start_frame_for_std_map</name>
+            <value>5</value>
+        </property>
+
+        <property>
+            <name>use_constant_scalingFactor</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for kspace filter, image data -->
+        <!-- 
+             ISMRMRD_FILTER_GAUSSIAN,
+             ISMRMRD_FILTER_HANNING,
+             ISMRMRD_FILTER_TUKEY,
+             ISMRMRD_FILTER_TAPERED_HANNING,
+             ISMRMRD_FILTER_NONE 
+        -->
+        <property>
+            <name>filterRO</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterRO_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE1</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE1_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE2</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE2_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, ref data -->
+        <!-- 
+             ISMRMRD_FILTER_GAUSSIAN,
+             ISMRMRD_FILTER_HANNING,
+             ISMRMRD_FILTER_TUKEY,
+             ISMRMRD_FILTER_TAPERED_HANNING,
+             ISMRMRD_FILTER_NONE 
+        -->
+        <property>
+            <name>filterRefRO</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE1</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE2</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
+        <!-- only ISMRMRD_FILTER_TAPERED_HANNING is available for the moment -->
+        <property>
+            <name>filterPartialFourierRO</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_densityComp</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE1</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_densityComp</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE2</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_densityComp</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_HOMODYNE, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
+        <property>
+            <name>partialFourier_algo</name>
+            <value>ISMRMRD_PF_POCS</value>
+        </property>
+
+        <!-- parameters for partial fourier homodyne algorithm -->
+        <property>
+            <name>partialFourier_homodyne_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_densityComp</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for partial fourier POCS algorithm -->
+        <property>
+            <name>partialFourier_POCS_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_transitBand</name>
+            <value>6</value>
+        </property>
+
+        <!-- parameters for partial fourier FengHuang algorithm -->
+        <property>
+            <name>partialFourier_FengHuang_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E1</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_thresReg</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_sameKernel_allN</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_transitBand</name>
+            <value>6</value>
+        </property>
+
+        <!-- parameters for debug and timing -->
+        <property>
+            <name>debugFolder</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>debugFolder2</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>cloudNodeFile</name>
+            <value>myCloud_2DT.txt</value>
+        </property>
+
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>verboseMode</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for system acquisition -->
+        <property>
+            <name>timeStampResolution</name>
+            <value>0.0025</value>
+        </property>
+
+        <!-- parameters for recon job split -->
+        <property>
+            <name>job_split_by_S</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>job_num_of_N</name>
+            <value>32</value>
+        </property>
+        <property>
+            <name>job_max_Megabytes</name>
+            <value>10240</value>
+        </property>
+        <property>
+            <name>job_overlap</name>
+            <value>2</value>
+        </property>
+        <property>
+            <name>job_perform_on_control_node</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for the cloud computation 
+             The cloud should be defined as the following: CloudNodeX_IP/Port/XMLConfiguration etc.
+        -->
+        <property>
+            <name>CloudComputing</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>CloudSize</name>
+            <value>1</value>
+        </property>
+
+        <!-- node 0 -->
+        <property>
+            <name>CloudNode0_IP</name>
+            <value>localhost</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_Port</name>
+            <value>9003</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_XMLConfiguration</name>
+            <value>GtProg_2DT_Cartesian_CloudNode.xml</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_ComputingPowerIndex</name>
+            <value>1</value>
+        </property>
+
+    </gadget>
+
+    <!-- after recon processing -->
+    <gadget>
+        <name>ComplexToFloatAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ComplexToFloatAttribGadget</classname>
+    </gadget>
+
+    <gadget>
+        <name>FloatToShortAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>FloatToUShortAttribGadget</classname>
+    </gadget>
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribCPLX</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetCPLX</classname>
+    </gadget>
+    -->
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribFLOAT</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetFLOAT</classname>
+    </gadget>
+    -->
+
+    <gadget>
+        <name>ImageFinishAttribUSHORT</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ImageFinishAttribGadgetUSHORT</classname>
+    </gadget>
+
+    <!--
+    <gadget>
+        <name>DicomFinishAttribGadget</name>
+        <dll>gadgetron_dicom</dll>
+        <classname>DicomFinishAttribGadgetUSHORT</classname>
+    </gadget>
+    -->
+</gadgetronStreamConfiguration>
diff --git a/gadgets/gtPlus/config/GT_2DT_HASTE_MOCO_AVE.xml b/gadgets/gtPlus/config/GT_2DT_HASTE_MOCO_AVE.xml
new file mode 100644
index 0000000..911ce58
--- /dev/null
+++ b/gadgets/gtPlus/config/GT_2DT_HASTE_MOCO_AVE.xml
@@ -0,0 +1,1085 @@
+<?xml version="1.0" encoding="utf-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+    <!--
+       _____              _____     _____   ______   _______   _____     ____    _   _ 
+      / ____|     /\     |  __ \   / ____| |  ____| |__   __| |  __ \   / __ \  | \ | |
+     | |  __     /  \    | |  | | | |  __  | |__       | |    | |__) | | |  | | |  \| |
+     | | |_ |   / /\ \   | |  | | | | |_ | |  __|      | |    |  _  /  | |  | | | . ` |
+     | |__| |  / ____ \  | |__| | | |__| | | |____     | |    | | \ \  | |__| | | |\  |
+      \_____| /_/    \_\ |_____/   \_____| |______|    |_|    |_|  \_\  \____/  |_| \_|
+                                                                                       
+    -->
+
+    <!-- 
+        GT Plus configuratin file for HASTE imaging with MOCO+ACE
+
+        Author: Hui Xue
+        Magnetic Resonance Technology Program
+        National Heart, Lung and Blood Institute
+        National Institutes of Health
+        10 Center Drive, Bethesda
+        MD 20814
+        USA
+        Email: hui.xue at nih.gov
+    -->
+
+    <!-- reader -->
+    <reader>
+        <slot>1008</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+    </reader>
+
+    <!-- writer -->
+    <writer>
+        <slot>1004</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1005</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1006</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterUSHORT</classname>
+    </writer>
+
+    <writer>
+        <slot>1015</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1016</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1017</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterUSHORT</classname>
+    </writer>
+
+    <!--
+    <writer>
+        <slot>1012</slot>
+        <dll>gadgetron_dicom</dll>
+        <classname>DicomImageWriter</classname>
+    </writer>
+
+    <writer>
+        <slot>1018</slot>
+        <dll>gadgetron_dicom</dll>
+        <classname>DicomImageAttribWriter</classname>
+    </writer>
+    -->
+
+    <!-- RO asymmetric echo handling -->
+    <gadget>
+        <name>AsymmetricEcho</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>AsymmetricEchoAdjustROGadget</classname>
+    </gadget>
+
+    <!-- RO oversampling removal -->
+    <gadget>
+        <name>RemoveROOversampling</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>RemoveROOversamplingGadget</classname>
+
+        <property>
+            <name>constant_noise_variance</name>
+            <value>true</value>
+        </property>
+    </gadget>
+
+    <!-- Noise prewhitening -->
+    <gadget>
+        <name>NoiseAdjust</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>NoiseAdjustGadget</classname>
+
+        <!-- File prefix for stored noise prewhitener matrix -->
+        <property>
+            <name>noise_dependency_prefix</name>
+            <value>GadgetronNoisePreWhitener</value>
+        </property>
+
+        <!-- Preset noise dwell time; for noise dependency measurements -->
+        <property>
+            <name>noise_dwell_time_us_preset</name>
+            <value>5.0</value>
+        </property>
+
+        <!-- Whether to perform timing -->
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+    </gadget>
+
+    <!-- Data accumulation and trigger gadget -->
+    <gadget>
+        <name>Acc</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
+
+        <!-- debug and info mode -->
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>noacceleration_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>noacceleration_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>noacceleration_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>interleaved_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>interleaved_numOfKSpace_triggerDim1</name>
+            <value>4</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
+        <property>
+            <name>other_kspace_matching_Dim</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+    </gadget>
+
+    <!--Recon computation for 2DT cases -->
+    <gadget>
+        <name>Recon</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusRecon2DTGadget</classname>
+
+        <!-- kspace data -->
+        <property>
+            <name>dim_4th</name>
+            <value>DIM_Repetition</value>
+        </property>
+        <property>
+            <name>dim_5th</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <!-- work flow -->
+        <property>
+            <name>workOrder_ShareDim</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>no_acceleration_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>no_acceleration_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>no_acceleration_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>interleaved_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>interleaved_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap_useHighestSignal</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>embedded_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>embedded_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_ref_fillback</name>
+            <value>true</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>separate_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>separate_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- coil compression -->
+        <property>
+            <name>same_coil_compression_coeff_allS</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>upstream_coil_compression</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>upstream_coil_compression_thres</name>
+            <value>-1</value>
+        </property>
+
+        <property>
+            <name>upstream_coil_compression_num_modesKept</name>
+            <value>-1</value>
+        </property>
+
+        <property>
+            <name>downstream_coil_compression</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>coil_compression_thres</name>
+            <value>-1</value>
+        </property>
+
+        <property>
+            <name>coil_compression_num_modesKept</name>
+            <value>8</value>
+        </property>
+
+        <!-- parameters for coil map estimation, ISMRMRD_SOUHEIL, ISMRMRD_SOUHEIL_ITER -->
+        <property>
+            <name>coil_map_algorithm</name>
+            <value>ISMRMRD_SOUHEIL</value>
+        </property>
+        <property>
+            <name>csm_kSize</name>
+            <value>7</value>
+        </property>
+
+        <property>
+            <name>csm_powermethod_num</name>
+            <value>3</value>
+        </property>
+
+        <property>
+            <name>csm_true_3D</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>csm_iter_num</name>
+            <value>5</value>
+        </property>
+
+        <property>
+            <name>csm_iter_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>csm_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- algorithm -->
+        <property>
+            <name>recon_algorithm</name>
+            <value>ISMRMRD_GRAPPA</value>
+        </property>
+
+        <property>
+            <name>recon_kspace_needed</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>recon_auto_parameters</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>gfactor_needed</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_GRAPPA -->
+        <property>
+            <name>grappa_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E1</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E2</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_reg_lamda</name>
+            <value>0.0005</value>
+        </property>
+        <property>
+            <name>grappa_calib_over_determine_ratio</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>grappa_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for scaling and image sending -->
+        <property>
+            <name>min_intensity_value</name>
+            <value>64</value>
+        </property>
+
+        <property>
+            <name>max_intensity_value</name>
+            <value>4095</value>
+        </property>
+
+        <property>
+            <name>scalingFactor</name>
+            <value>10</value>
+        </property>
+
+        <property>
+            <name>scalingFactor_gfactor</name>
+            <value>100</value>
+        </property>
+
+        <property>
+            <name>scalingFactor_snr_image</name>
+            <value>10</value>
+        </property>
+
+        <property>
+            <name>scalingFactor_std_map</name>
+            <value>1000</value>
+        </property>
+
+        <property>
+            <name>start_frame_for_std_map</name>
+            <value>5</value>
+        </property>
+
+        <property>
+            <name>use_constant_scalingFactor</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for kspace filter, image data -->
+        <!-- 
+             ISMRMRD_FILTER_GAUSSIAN,
+             ISMRMRD_FILTER_HANNING,
+             ISMRMRD_FILTER_TUKEY,
+             ISMRMRD_FILTER_TAPERED_HANNING,
+             ISMRMRD_FILTER_NONE 
+        -->
+        <property>
+            <name>filterRO</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterRO_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE1</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE1_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE2</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE2_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, ref data -->
+        <!-- 
+             ISMRMRD_FILTER_GAUSSIAN,
+             ISMRMRD_FILTER_HANNING,
+             ISMRMRD_FILTER_TUKEY,
+             ISMRMRD_FILTER_TAPERED_HANNING,
+             ISMRMRD_FILTER_NONE 
+        -->
+        <property>
+            <name>filterRefRO</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE1</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE2</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
+        <!-- only ISMRMRD_FILTER_TAPERED_HANNING is available for the moment -->
+        <property>
+            <name>filterPartialFourierRO</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_densityComp</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE1</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_densityComp</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE2</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_densityComp</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_HOMODYNE, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
+        <property>
+            <name>partialFourier_algo</name>
+            <value>ISMRMRD_PF_POCS</value>
+        </property>
+
+        <!-- parameters for partial fourier homodyne algorithm -->
+        <property>
+            <name>partialFourier_homodyne_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_densityComp</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for partial fourier POCS algorithm -->
+        <property>
+            <name>partialFourier_POCS_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_transitBand</name>
+            <value>6</value>
+        </property>
+
+        <!-- parameters for partial fourier FengHuang algorithm -->
+        <property>
+            <name>partialFourier_FengHuang_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E1</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_thresReg</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_sameKernel_allN</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_transitBand</name>
+            <value>6</value>
+        </property>
+
+        <!-- parameters for debug and timing -->
+        <property>
+            <name>debugFolder</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>debugFolder2</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>cloudNodeFile</name>
+            <value>myCloud_2DT.txt</value>
+        </property>
+
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>verboseMode</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for system acquisition -->
+        <property>
+            <name>timeStampResolution</name>
+            <value>0.0025</value>
+        </property>
+
+        <!-- parameters for recon job split -->
+        <property>
+            <name>job_split_by_S</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>job_num_of_N</name>
+            <value>32</value>
+        </property>
+        <property>
+            <name>job_max_Megabytes</name>
+            <value>10240</value>
+        </property>
+        <property>
+            <name>job_overlap</name>
+            <value>2</value>
+        </property>
+        <property>
+            <name>job_perform_on_control_node</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for the cloud computation 
+             The cloud should be defined as the following: CloudNodeX_IP/Port/XMLConfiguration etc.
+        -->
+        <property>
+            <name>CloudComputing</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>CloudSize</name>
+            <value>1</value>
+        </property>
+
+        <!-- node 0 -->
+        <property>
+            <name>CloudNode0_IP</name>
+            <value>localhost</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_Port</name>
+            <value>9003</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_XMLConfiguration</name>
+            <value>GtProg_2DT_Cartesian_CloudNode.xml</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_ComputingPowerIndex</name>
+            <value>1</value>
+        </property>
+
+    </gadget>
+
+    <!-- Image recon accummulator -->
+    <gadget>
+        <name>ImageAcc</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusAccumulatorImageTriggerGadget</classname>
+
+        <!-- triggered dimensions -->
+        <property>
+            <name>TriggerChannel</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>TriggerSlice</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>TriggerE2</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>TriggerContrast</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>TriggerPhase</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>TriggerRepetition</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>TriggerSet</name>
+            <value>false</value>
+        </property>
+
+        <!-- work flow -->
+        <property>
+            <name>PassImageImmediately</name>
+            <value>false</value>
+        </property>
+
+        <!-- debug and info mode -->
+        <property>
+            <name>verboseMode</name>
+            <value>true</value>
+        </property>
+    </gadget>
+
+    <!-- MOCO AVE, PSIR recon -->
+    <gadget>
+        <name>MoCoAve</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusImageMoCoAveGadget</classname>
+
+        <!-- ===================================================================================== -->
+        <!-- dimension to perform MOCO -->
+        <property>
+            <name>moco_dim</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+        <!-- dimension to perform cross-row MOCO, e.g. MOCO between PD and IR images -->
+        <property>
+            <name>moco_cross_row_dim</name>
+            <value>DIM_Set</value>
+        </property>
+
+        <!-- whether to perform averaging -->
+        <property>
+            <name>moco_ave</name>
+            <value>true</value>
+        </property>
+
+        <!-- whether to perform cross-row MOCO -->
+        <property>
+            <name>moco_cross_row</name>
+            <value>false</value>
+        </property>
+
+        <!-- whether all rows have the same reference -->
+        <property>
+            <name>cross_row_same_reference</name>
+            <value>false</value>
+        </property>
+
+        <!-- whether to perform 3D MOCO -->
+        <property>
+            <name>moco_ave_3D</name>
+            <value>false</value>
+        </property>
+
+        <!-- If cross-row MOCO is performed, which row is selected as the reference -->
+        <property>
+            <name>ref_moco_cross_row</name>
+            <value>0</value>
+        </property>
+
+        <!-- Strategy to pick reference for rows, "SSD" or "Deformation" -->
+        <property>
+            <name>row_ref_pick_strategy</name>
+            <value>SSD</value>
+        </property>
+
+        <!-- ===================================================================================== -->
+        <!-- Parameters for image sending -->
+        <!-- Whether to send original images -->
+        <property>
+            <name>send_ori</name>
+            <value>true</value>
+        </property>
+
+        <!-- Whether to send MOCO images -->
+        <property>
+            <name>send_moco</name>
+            <value>true</value>
+        </property>
+
+        <!-- Whether to send averaged images -->
+        <property>
+            <name>send_moco_ave</name>
+            <value>true</value>
+        </property>
+
+        <!-- Whether to keep original image number -->
+        <property>
+            <name>moco_ave_keep_origial_image_number</name>
+            <value>false</value>
+        </property>
+
+        <!-- ===================================================================================== -->
+        <!-- Parameters for MOCO -->
+        <!-- MOCO strategy, 'FixedReference', or 'Progressive' -->
+        <property>
+            <name>strategy</name>
+            <value>FixedReference</value>
+        </property>
+        <!-- Image dissimilarity measures, 'SSD' or 'LocalCCR' or 'MutualInformation' -->
+        <property>
+            <name>dissimilarity</name>
+            <value>LocalCCR</value>
+        </property>
+        <!-- Number of levels for the multi-resolution pyramid -->
+        <property>
+            <name>level</name>
+            <value>4</value>
+        </property>
+        <!-- Number of iterations for every pyramid level, level 0 is for the highest resolution -->
+        <property>
+            <name>iter_0</name>
+            <value>16</value>
+        </property>
+        <property>
+            <name>iter_1</name>
+            <value>32</value>
+        </property>
+        <property>
+            <name>iter_2</name>
+            <value>64</value>
+        </property>
+        <property>
+            <name>iter_3</name>
+            <value>64</value>
+        </property>
+        <!-- Strength of registration regularization, in the unit of pixel -->
+        <property>
+            <name>regularization_hilbert_strength</name>
+            <value>12.0</value>
+        </property>
+        <!-- Whether to perform bidirectional MOCO -->
+        <property>
+            <name>bidirectional_moco</name>
+            <value>false</value>
+        </property>
+
+        <!-- ========================================= -->
+        <!-- Image dissimilarity measures for the cross-row MOCO, 'SSD' or 'LocalCCR' or 'MutualInformation' -->
+        <property>
+            <name>dissimilarity_cross_row</name>
+            <value>LocalCCR</value>
+        </property>
+        <!-- Number of levels for the multi-resolution pyramid for the cross-row MOCO -->
+        <property>
+            <name>level_cross_row</name>
+            <value>3</value>
+        </property>
+        <!-- Number of iterations for every pyramid level for the cross-row MOCO, level 0 is for the highest resolution -->
+        <property>
+            <name>iter_cross_row_0</name>
+            <value>8</value>
+        </property>
+        <property>
+            <name>iter_cross_row_1</name>
+            <value>32</value>
+        </property>
+        <property>
+            <name>iter_cross_row_2</name>
+            <value>64</value>
+        </property>
+        <!-- Strength of registration regularization for the cross-row MOCO, in the unit of pixel -->
+        <property>
+            <name>regularization_hilbert_strength_cross_row</name>
+            <value>32.0</value>
+        </property>
+        <!-- Whether to perform bidirectional MOCO for the cross-row MOCO -->
+        <property>
+            <name>bidirectional_moco_cross_row</name>
+            <value>true</value>
+        </property>
+
+        <!-- ========================================= -->
+
+        <!-- Threshold for image dissimilarity minimization -->
+        <property>
+            <name>dissimilarity_thres</name>
+            <value>1e-5</value>
+        </property>
+        <!-- Number of sub-division search in minimization -->
+        <property>
+            <name>div_num</name>
+            <value>2</value>
+        </property>
+        <!-- For the bidirectional MOCO, the number of bidirectional iteration -->
+        <property>
+            <name>inverse_deform_enforce_iter</name>
+            <value>10</value>
+        </property>
+        <!-- For the bidirectional MOCO, the weight between forward and inverse MOCO -->
+        <property>
+            <name>inverse_deform_enforce_weight</name>
+            <value>0.5</value>
+        </property>
+
+        <!-- ===================================================================================== -->
+
+        <!-- Fraction of images kept for averaging -->
+        <property>
+            <name>percentage_kept_for_averaging</name>
+            <value>0.5</value>
+        </property>
+
+        <!-- Whether to perform soft averaging -->
+        <property>
+            <name>soft_averaging</name>
+            <value>true</value>
+        </property>
+
+        <!-- ===================================================================================== -->
+        <!-- parameters for debug and timing -->
+        <property>
+            <name>debugFolder</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>verboseModeMOCO</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>verboseMode</name>
+            <value>true</value>
+        </property>
+    </gadget>
+
+    <!-- after recon processing -->
+    <gadget>
+        <name>ComplexToFloatAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ComplexToFloatAttribGadget</classname>
+    </gadget>
+
+    <gadget>
+        <name>FloatToShortAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>FloatToUShortAttribGadget</classname>
+    </gadget>
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribCPLX</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetCPLX</classname>
+    </gadget>
+    -->
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribFLOAT</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetFLOAT</classname>
+    </gadget>
+    -->
+
+    <gadget>
+        <name>ImageFinishAttribUSHORT</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ImageFinishAttribGadgetUSHORT</classname>
+    </gadget>
+
+    <!--
+    <gadget>
+        <name>DicomFinishAttribGadget</name>
+        <dll>gadgetron_dicom</dll>
+        <classname>DicomFinishAttribGadgetUSHORT</classname>
+    </gadget>
+    -->
+</gadgetronStreamConfiguration>
diff --git a/gadgets/gtPlus/config/GT_2DT_LGE.xml b/gadgets/gtPlus/config/GT_2DT_LGE.xml
new file mode 100644
index 0000000..2d386cf
--- /dev/null
+++ b/gadgets/gtPlus/config/GT_2DT_LGE.xml
@@ -0,0 +1,701 @@
+<?xml version="1.0" encoding="utf-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+    <!--
+       _____              _____     _____   ______   _______   _____     ____    _   _ 
+      / ____|     /\     |  __ \   / ____| |  ____| |__   __| |  __ \   / __ \  | \ | |
+     | |  __     /  \    | |  | | | |  __  | |__       | |    | |__) | | |  | | |  \| |
+     | | |_ |   / /\ \   | |  | | | | |_ | |  __|      | |    |  _  /  | |  | | | . ` |
+     | |__| |  / ____ \  | |__| | | |__| | | |____     | |    | | \ \  | |__| | | |\  |
+      \_____| /_/    \_\ |_____/   \_____| |______|    |_|    |_|  \_\  \____/  |_| \_|
+                                                                                       
+    -->
+
+    <!-- 
+        GT Plus configuratin file for 2DT cartesian reconstruction on cardiac LGE imaging
+
+        Author: Hui Xue
+        Magnetic Resonance Technology Program
+        National Heart, Lung and Blood Institute
+        National Institutes of Health
+        10 Center Drive, Bethesda
+        MD 20814
+        USA
+        Email: hui.xue at nih.gov
+    -->
+
+    <!-- reader -->
+    <reader>
+        <slot>1008</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+    </reader>
+
+    <!-- writer -->
+    <writer>
+        <slot>1004</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1005</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1006</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterUSHORT</classname>
+    </writer>
+
+    <writer>
+        <slot>1015</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1016</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1017</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterUSHORT</classname>
+    </writer>
+
+    <!-- RO asymmetric echo handling -->
+    <gadget>
+        <name>AsymmetricEcho</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>AsymmetricEchoAdjustROGadget</classname>
+    </gadget>
+
+    <!-- Noise prewhitening -->
+    <gadget>
+        <name>NoiseAdjust</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>NoiseAdjustGadget</classname>
+    </gadget>
+
+    <!-- RO oversampling removal -->
+    <gadget>
+        <name>RemoveROOversampling</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>RemoveROOversamplingGadget</classname>
+
+        <property>
+            <name>constant_noise_variance</name>
+            <value>true</value>
+        </property>
+    </gadget>
+
+    <!-- Data accumulation and trigger gadget -->
+    <gadget>
+        <name>Acc</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
+
+        <!-- debug and info mode -->
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>noacceleration_triggerDim1</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+        <property>
+            <name>noacceleration_triggerDim2</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <property>
+            <name>noacceleration_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_triggerDim1</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+        <property>
+            <name>interleaved_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>interleaved_numOfKSpace_triggerDim1</name>
+            <value>4</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_triggerDim1</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+        <property>
+            <name>embedded_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_triggerDim1</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+        <property>
+            <name>separate_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
+        <property>
+            <name>other_kspace_matching_Dim</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+    </gadget>
+
+    <!-- Recon computation for 2DT cases -->
+    <gadget>
+        <name>Recon</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusRecon2DTGadget</classname>
+
+        <!-- kspace data -->
+        <property>
+            <name>dim_4th</name>
+            <value>DIM_Repetition</value>
+        </property>
+        <property>
+            <name>dim_5th</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <!-- work flow -->
+        <property>
+            <name>workOrder_ShareDim</name>
+            <value>DIM_Set</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>no_acceleration_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>no_acceleration_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_whichS_combinationcoeff</name>
+            <value>1</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>interleaved_whichS_combinationcoeff</name>
+            <value>1</value>
+        </property>
+        <property>
+            <name>interleaved_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap_useHighestSignal</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>embedded_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>embedded_whichS_combinationcoeff</name>
+            <value>1</value>
+        </property>
+        <property>
+            <name>embedded_ref_fillback</name>
+            <value>true</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>separate_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>separate_whichS_combinationcoeff</name>
+            <value>1</value>
+        </property>
+
+        <!-- coil compression -->
+        <property>
+            <name>same_coil_compression_coeff_allS</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>downstream_coil_compression</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>coil_compression_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>coil_compression_num_modesKept</name>
+            <value>8</value>
+        </property>
+
+        <!-- parameters for coil map estimation 
+            enum ISMRMRDCOILMAPALGO
+            {
+                ISMRMRD_SOUHEIL,
+                ISMRMRD_SOUHEIL_ITER
+            };
+        -->
+        <property>
+            <name>coil_map_algorithm</name>
+            <value>ISMRMRD_SOUHEIL</value>
+        </property>
+        <property>
+            <name>csm_kSize</name>
+            <value>7</value>
+        </property>
+
+        <property>
+            <name>csm_powermethod_num</name>
+            <value>3</value>
+        </property>
+
+        <property>
+            <name>csm_true_3D</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>csm_iter_num</name>
+            <value>5</value>
+        </property>
+
+        <property>
+            <name>csm_iter_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>csm_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- algorithm -->
+        <property>
+            <name>recon_algorithm</name>
+            <value>ISMRMRD_GRAPPA</value>
+        </property>
+
+        <property>
+            <name>recon_kspace_needed</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>gfactor_needed</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>wrap_around_map_needed</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_GRAPPA -->
+        <property>
+            <name>grappa_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E1</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E2</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_reg_lamda</name>
+            <value>0.0005</value>
+        </property>
+        <property>
+            <name>grappa_calib_over_determine_ratio</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>grappa_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_SPIRIT -->
+        <property>
+            <name>spirit_kSize_RO</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E1</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>spirit_reg_lamda</name>
+            <value>0.005</value>
+        </property>
+        <property>
+            <name>spirit_use_gpu</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_iter_max</name>
+            <value>70</value>
+        </property>
+        <property>
+            <name>spirit_iter_thres</name>
+            <value>1e-5</value>
+        </property>
+
+        <!-- parameters for scaling and image sending -->
+        <property>
+            <name>min_intensity_value</name>
+            <value>64</value>
+        </property>
+
+        <property>
+            <name>max_intensity_value</name>
+            <value>4095</value>
+        </property>
+
+        <property>
+            <name>scalingFactor</name>
+            <value>-1.0</value>
+        </property>
+
+        <property>
+            <name>use_constant_scalingFactor</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for kspace filter, image data -->
+        <property>
+            <name>filterRO</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterRO_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE1</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE1_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE2</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE2_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, ref data -->
+        <property>
+            <name>filterRefRO</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE1</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE2</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
+        <property>
+            <name>filterPartialFourierRO</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE1</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE2</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_HOMODYNE, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
+        <property>
+            <name>partialFourier_algo</name>
+            <value>ISMRMRD_PF_FENGHUANG</value>
+        </property>
+
+        <!-- parameters for partial fourier homodyne algorithm -->
+        <property>
+            <name>partialFourier_homodyne_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_densityComp</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for partial fourier POCS algorithm -->
+        <property>
+            <name>partialFourier_POCS_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for partial fourier FengHuang algorithm -->
+        <property>
+            <name>partialFourier_FengHuang_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E1</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_thresReg</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_sameKernel_allN</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for debug and timing -->
+        <property>
+            <name>debugFolder</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>debugFolder2</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for system acquisition -->
+        <property>
+            <name>timeStampResolution</name>
+            <value>0.0025</value>
+        </property>
+
+    </gadget>
+
+    <!-- after recon processing -->
+    <gadget>
+        <name>ComplexToFloatAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ComplexToFloatAttribGadget</classname>
+    </gadget>
+
+    <gadget>
+        <name>FloatToShortAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>FloatToUShortAttribGadget</classname>
+    </gadget>
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribCPLX</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetCPLX</classname>
+    </gadget>
+    -->
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribFLOAT</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetFLOAT</classname>
+    </gadget>
+    -->
+
+    <gadget>
+        <name>ImageFinishAttribUSHORT</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ImageFinishAttribGadgetUSHORT</classname>
+    </gadget>
+
+</gadgetronStreamConfiguration>
diff --git a/gadgets/gtPlus/config/GT_2DT_MOLLI.xml b/gadgets/gtPlus/config/GT_2DT_MOLLI.xml
new file mode 100644
index 0000000..eca820a
--- /dev/null
+++ b/gadgets/gtPlus/config/GT_2DT_MOLLI.xml
@@ -0,0 +1,696 @@
+<?xml version="1.0" encoding="utf-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+    <!--
+       _____              _____     _____   ______   _______   _____     ____    _   _ 
+      / ____|     /\     |  __ \   / ____| |  ____| |__   __| |  __ \   / __ \  | \ | |
+     | |  __     /  \    | |  | | | |  __  | |__       | |    | |__) | | |  | | |  \| |
+     | | |_ |   / /\ \   | |  | | | | |_ | |  __|      | |    |  _  /  | |  | | | . ` |
+     | |__| |  / ____ \  | |__| | | |__| | | |____     | |    | | \ \  | |__| | | |\  |
+      \_____| /_/    \_\ |_____/   \_____| |______|    |_|    |_|  \_\  \____/  |_| \_|
+                                                                                       
+    -->
+
+    <!-- 
+        GT Plus configuratin file for 2DT cartesian reconstruction on cardiac MOLLI T1 mapping
+
+        Author: Hui Xue
+        Magnetic Resonance Technology Program
+        National Heart, Lung and Blood Institute
+        National Institutes of Health
+        10 Center Drive, Bethesda
+        MD 20814
+        USA
+        Email: hui.xue at nih.gov
+    -->
+
+    <!-- reader -->
+    <reader>
+        <slot>1008</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+    </reader>
+
+    <!-- writer -->
+    <writer>
+        <slot>1004</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1005</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1006</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterUSHORT</classname>
+    </writer>
+
+    <writer>
+        <slot>1015</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1016</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1017</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterUSHORT</classname>
+    </writer>
+
+    <!-- RO asymmetric echo handling -->
+    <gadget>
+        <name>AsymmetricEcho</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>AsymmetricEchoAdjustROGadget</classname>
+    </gadget>
+
+    <!-- Noise prewhitening -->
+    <gadget>
+        <name>NoiseAdjust</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>NoiseAdjustGadget</classname>
+    </gadget>
+
+    <!-- RO oversampling removal -->
+    <gadget>
+        <name>RemoveROOversampling</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>RemoveROOversamplingGadget</classname>
+
+        <property>
+            <name>constant_noise_variance</name>
+            <value>true</value>
+        </property>
+    </gadget>
+
+    <!-- Data accumulation and trigger gadget -->
+    <gadget>
+        <name>Acc</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
+
+        <!-- debug and info mode -->
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>noacceleration_triggerDim1</name>
+            <value>DIM_Set</value>
+        </property>
+
+        <property>
+            <name>noacceleration_triggerDim2</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <property>
+            <name>noacceleration_numOfKSpace_triggerDim1</name>
+            <value>5</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>interleaved_triggerDim2</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <property>
+            <name>interleaved_numOfKSpace_triggerDim1</name>
+            <value>4</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_triggerDim2</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <property>
+            <name>embedded_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
+        <property>
+            <name>other_kspace_matching_Dim</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+    </gadget>
+
+    <!-- Recon computation for 2DT cases -->
+    <gadget>
+        <name>Recon</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusRecon2DTGadget</classname>
+
+        <!-- kspace data -->
+        <property>
+            <name>dim_4th</name>
+            <value>DIM_Set</value>
+        </property>
+        <property>
+            <name>dim_5th</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <!-- work flow -->
+        <property>
+            <name>workOrder_ShareDim</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>no_acceleration_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_ref_numOfModes</name>
+            <value>3</value>
+        </property>
+        <property>
+            <name>no_acceleration_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>no_acceleration_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>interleaved_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>interleaved_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_ref_numOfModes</name>
+            <value>3</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap_useHighestSignal</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>embedded_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_ref_fillback</name>
+            <value>true</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_ref_numOfModes</name>
+            <value>3</value>
+        </property>
+        <property>
+            <name>separate_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>separate_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- coil compression -->
+        <property>
+            <name>same_coil_compression_coeff_allS</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>downstream_coil_compression</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>coil_compression_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>coil_compression_num_modesKept</name>
+            <value>8</value>
+        </property>
+
+        <!-- parameters for coil map estimation 
+            enum ISMRMRDCOILMAPALGO
+            {
+                ISMRMRD_SOUHEIL,
+                ISMRMRD_SOUHEIL_ITER
+            };
+        -->
+        <property>
+            <name>coil_map_algorithm</name>
+            <value>ISMRMRD_SOUHEIL</value>
+        </property>
+        <property>
+            <name>csm_kSize</name>
+            <value>7</value>
+        </property>
+
+        <property>
+            <name>csm_powermethod_num</name>
+            <value>3</value>
+        </property>
+
+        <property>
+            <name>csm_true_3D</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>csm_iter_num</name>
+            <value>5</value>
+        </property>
+
+        <property>
+            <name>csm_iter_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>csm_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- algorithm -->
+        <property>
+            <name>recon_algorithm</name>
+            <value>ISMRMRD_GRAPPA</value>
+        </property>
+
+        <property>
+            <name>recon_kspace_needed</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>gfactor_needed</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_GRAPPA -->
+        <property>
+            <name>grappa_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E1</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E2</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_reg_lamda</name>
+            <value>0.0005</value>
+        </property>
+        <property>
+            <name>grappa_calib_over_determine_ratio</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>grappa_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_SPIRIT -->
+        <property>
+            <name>spirit_kSize_RO</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E1</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>spirit_reg_lamda</name>
+            <value>0.005</value>
+        </property>
+        <property>
+            <name>spirit_use_gpu</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_iter_max</name>
+            <value>70</value>
+        </property>
+        <property>
+            <name>spirit_iter_thres</name>
+            <value>1e-5</value>
+        </property>
+
+        <!-- parameters for scaling and image sending -->
+        <property>
+            <name>min_intensity_value</name>
+            <value>64</value>
+        </property>
+
+        <property>
+            <name>max_intensity_value</name>
+            <value>4095</value>
+        </property>
+
+        <property>
+            <name>scalingFactor</name>
+            <value>-1.0</value>
+        </property>
+
+        <property>
+            <name>use_constant_scalingFactor</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for kspace filter, image data -->
+        <property>
+            <name>filterRO</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterRO_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE1</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE1_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE2</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE2_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, ref data -->
+        <property>
+            <name>filterRefRO</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE1</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE2</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
+        <property>
+            <name>filterPartialFourierRO</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE1</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE2</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_HOMODYNE, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
+        <property>
+            <name>partialFourier_algo</name>
+            <value>ISMRMRD_PF_FENGHUANG</value>
+        </property>
+
+        <!-- parameters for partial fourier homodyne algorithm -->
+        <property>
+            <name>partialFourier_homodyne_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_densityComp</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for partial fourier POCS algorithm -->
+        <property>
+            <name>partialFourier_POCS_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for partial fourier FengHuang algorithm -->
+        <property>
+            <name>partialFourier_FengHuang_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E1</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_thresReg</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_sameKernel_allN</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for debug and timing -->
+        <property>
+            <name>debugFolder</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>debugFolder2</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for system acquisition -->
+        <property>
+            <name>timeStampResolution</name>
+            <value>0.0025</value>
+        </property>
+
+    </gadget>
+
+    <!-- after recon processing -->
+    <gadget>
+        <name>ComplexToFloatAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ComplexToFloatAttribGadget</classname>
+    </gadget>
+
+    <gadget>
+        <name>FloatToShortAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>FloatToUShortAttribGadget</classname>
+    </gadget>
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribCPLX</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetCPLX</classname>
+    </gadget>
+    -->
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribFLOAT</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetFLOAT</classname>
+    </gadget>
+    -->
+
+    <gadget>
+        <name>ImageFinishAttribUSHORT</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ImageFinishAttribGadgetUSHORT</classname>
+    </gadget>
+
+</gadgetronStreamConfiguration>
diff --git a/gadgets/gtPlus/config/GT_2DT_MOLLI_Offline.xml b/gadgets/gtPlus/config/GT_2DT_MOLLI_Offline.xml
new file mode 100644
index 0000000..c1a77a3
--- /dev/null
+++ b/gadgets/gtPlus/config/GT_2DT_MOLLI_Offline.xml
@@ -0,0 +1,699 @@
+<?xml version="1.0" encoding="utf-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+    <!--
+       _____              _____     _____   ______   _______   _____     ____    _   _ 
+      / ____|     /\     |  __ \   / ____| |  ____| |__   __| |  __ \   / __ \  | \ | |
+     | |  __     /  \    | |  | | | |  __  | |__       | |    | |__) | | |  | | |  \| |
+     | | |_ |   / /\ \   | |  | | | | |_ | |  __|      | |    |  _  /  | |  | | | . ` |
+     | |__| |  / ____ \  | |__| | | |__| | | |____     | |    | | \ \  | |__| | | |\  |
+      \_____| /_/    \_\ |_____/   \_____| |______|    |_|    |_|  \_\  \____/  |_| \_|
+                                                                                       
+    -->
+
+    <!-- 
+        GT Plus configuratin file for 2DT cartesian reconstruction on cardiac MOLLI T1 mapping
+
+        Author: Hui Xue
+        Magnetic Resonance Technology Program
+        National Heart, Lung and Blood Institute
+        National Institutes of Health
+        10 Center Drive, Bethesda
+        MD 20814
+        USA
+        Email: hui.xue at nih.gov
+    -->
+
+    <!-- reader -->
+    <reader>
+        <slot>1008</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+    </reader>
+
+    <!-- writer -->
+    <writer>
+        <slot>1004</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1005</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1006</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterUSHORT</classname>
+    </writer>
+
+    <writer>
+        <slot>1015</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1016</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1017</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterUSHORT</classname>
+    </writer>
+
+    <!-- RO asymmetric echo handling -->
+    <gadget>
+        <name>AsymmetricEcho</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>AsymmetricEchoAdjustROGadget</classname>
+    </gadget>
+
+    <!-- Noise prewhitening -->
+    <gadget>
+        <name>NoiseAdjust</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>NoiseAdjustGadget</classname>
+    </gadget>
+
+    <!-- RO oversampling removal -->
+    <gadget>
+        <name>RemoveROOversampling</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>RemoveROOversamplingGadget</classname>
+
+        <property>
+            <name>constant_noise_variance</name>
+            <value>true</value>
+        </property>
+    </gadget>
+
+    <!-- Data accumulation and trigger gadget -->
+    <gadget>
+        <name>Acc</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
+
+        <!-- debug and info mode -->
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>noacceleration_triggerDim1</name>
+            <value>DIM_Set</value>
+        </property>
+
+        <property>
+            <name>noacceleration_triggerDim2</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <property>
+            <name>noacceleration_numOfKSpace_triggerDim1</name>
+            <value>5</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>interleaved_triggerDim2</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <property>
+            <name>interleaved_numOfKSpace_triggerDim1</name>
+            <value>4</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_triggerDim2</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <property>
+            <name>embedded_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
+        <property>
+            <name>other_kspace_matching_Dim</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+    </gadget>
+
+    <!-- Recon computation for 2DT cases -->
+    <gadget>
+        <name>Recon</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusRecon2DTGadget</classname>
+
+        <!-- kspace data -->
+        <property>
+            <name>dim_4th</name>
+            <value>DIM_Set</value>
+        </property>
+        <property>
+            <name>dim_5th</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <!-- work flow -->
+        <property>
+            <name>workOrder_ShareDim</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>no_acceleration_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>no_acceleration_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>no_acceleration_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>interleaved_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>interleaved_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap_useHighestSignal</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>embedded_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_ref_fillback</name>
+            <value>true</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>separate_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>separate_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- coil compression -->
+        <property>
+            <name>same_coil_compression_coeff_allS</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>downstream_coil_compression</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>coil_compression_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>coil_compression_num_modesKept</name>
+            <value>8</value>
+        </property>
+
+        <!-- parameters for coil map estimation 
+            enum ISMRMRDCOILMAPALGO
+            {
+                ISMRMRD_SOUHEIL,
+                ISMRMRD_SOUHEIL_ITER
+            };
+        -->
+        <property>
+            <name>coil_map_algorithm</name>
+            <value>ISMRMRD_SOUHEIL</value>
+        </property>
+        <property>
+            <name>csm_kSize</name>
+            <value>7</value>
+        </property>
+
+        <property>
+            <name>csm_powermethod_num</name>
+            <value>3</value>
+        </property>
+
+        <property>
+            <name>csm_true_3D</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>csm_iter_num</name>
+            <value>5</value>
+        </property>
+
+        <property>
+            <name>csm_iter_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>csm_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- algorithm -->
+        <property>
+            <name>recon_algorithm</name>
+            <value>ISMRMRD_GRAPPA</value>
+        </property>
+
+        <property>
+            <name>recon_kspace_needed</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>gfactor_needed</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_GRAPPA -->
+        <property>
+            <name>grappa_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E1</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E2</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_reg_lamda</name>
+            <value>0.0005</value>
+        </property>
+        <property>
+            <name>grappa_calib_over_determine_ratio</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>grappa_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_SPIRIT -->
+        <property>
+            <name>spirit_kSize_RO</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E1</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>spirit_reg_lamda</name>
+            <value>0.005</value>
+        </property>
+        <property>
+            <name>spirit_use_gpu</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_iter_max</name>
+            <value>70</value>
+        </property>
+        <property>
+            <name>spirit_iter_thres</name>
+            <value>1e-5</value>
+        </property>
+
+        <!-- parameters for scaling and image sending -->
+        <property>
+            <name>min_intensity_value</name>
+            <value>64</value>
+        </property>
+
+        <property>
+            <name>max_intensity_value</name>
+            <value>4095</value>
+        </property>
+
+        <property>
+            <name>scalingFactor</name>
+            <value>-1.0</value>
+        </property>
+
+        <property>
+            <name>use_constant_scalingFactor</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for kspace filter, image data -->
+        <property>
+            <name>filterRO</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterRO_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE1</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE1_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE2</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE2_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, ref data -->
+        <property>
+            <name>filterRefRO</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE1</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE2</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
+        <property>
+            <name>filterPartialFourierRO</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE1</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE2</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_HOMODYNE, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
+        <property>
+            <name>partialFourier_algo</name>
+            <value>ISMRMRD_PF_POCS</value>
+        </property>
+
+        <!-- parameters for partial fourier homodyne algorithm -->
+        <property>
+            <name>partialFourier_homodyne_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_densityComp</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for partial fourier POCS algorithm -->
+        <property>
+            <name>partialFourier_POCS_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for partial fourier FengHuang algorithm -->
+        <property>
+            <name>partialFourier_FengHuang_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E1</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_thresReg</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_sameKernel_allN</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for debug and timing -->
+        <property>
+            <name>debugFolder</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>debugFolder2</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>verboseMode</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for system acquisition -->
+        <property>
+            <name>timeStampResolution</name>
+            <value>0.0025</value>
+        </property>
+
+    </gadget>
+
+    <!-- after recon processing 
+    <gadget>
+        <name>ComplexToFloatAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ComplexToFloatAttribGadget</classname>
+    </gadget>
+    -->
+
+    <!--
+    <gadget>
+        <name>FloatToShortAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>FloatToUShortAttribGadget</classname>
+    </gadget>
+    -->
+
+    <gadget>
+      <name>ImageFinishAttribCPLX</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetCPLX</classname>
+    </gadget>
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribFLOAT</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetFLOAT</classname>
+    </gadget>
+    -->
+
+    <!--
+    <gadget>
+        <name>ImageFinishAttribUSHORT</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ImageFinishAttribGadgetUSHORT</classname>
+    </gadget>
+    -->
+
+</gadgetronStreamConfiguration>
diff --git a/gadgets/gtPlus/config/GT_2DT_Perfusion.xml b/gadgets/gtPlus/config/GT_2DT_Perfusion.xml
new file mode 100644
index 0000000..3e3b875
--- /dev/null
+++ b/gadgets/gtPlus/config/GT_2DT_Perfusion.xml
@@ -0,0 +1,702 @@
+<?xml version="1.0" encoding="utf-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+    <!--
+       _____              _____     _____   ______   _______   _____     ____    _   _ 
+      / ____|     /\     |  __ \   / ____| |  ____| |__   __| |  __ \   / __ \  | \ | |
+     | |  __     /  \    | |  | | | |  __  | |__       | |    | |__) | | |  | | |  \| |
+     | | |_ |   / /\ \   | |  | | | | |_ | |  __|      | |    |  _  /  | |  | | | . ` |
+     | |__| |  / ____ \  | |__| | | |__| | | |____     | |    | | \ \  | |__| | | |\  |
+      \_____| /_/    \_\ |_____/   \_____| |______|    |_|    |_|  \_\  \____/  |_| \_|
+                                                                                       
+    -->
+
+    <!-- 
+        GT Plus configuratin file for 2DT cartesian reconstruction on cardiac perfusion mapping
+        The support for AIF acquisition is implemented.
+
+        Author: Hui Xue
+        Magnetic Resonance Technology Program
+        National Heart, Lung and Blood Institute
+        National Institutes of Health
+        10 Center Drive, Bethesda
+        MD 20814
+        USA
+        Email: hui.xue at nih.gov
+    -->
+
+    <!-- reader -->
+    <reader>
+        <slot>1008</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+    </reader>
+
+    <!-- writer -->
+    <writer>
+        <slot>1004</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1005</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1006</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterUSHORT</classname>
+    </writer>
+
+    <writer>
+        <slot>1015</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1016</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1017</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterUSHORT</classname>
+    </writer>
+
+    <!-- RO asymmetric echo handling -->
+    <gadget>
+        <name>AsymmetricEcho</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>AsymmetricEchoAdjustROGadget</classname>
+    </gadget>
+
+    <!-- RO oversampling removal -->
+    <gadget>
+        <name>RemoveROOversampling</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>RemoveROOversamplingGadget</classname>
+
+        <property>
+            <name>constant_noise_variance</name>
+            <value>true</value>
+        </property>
+    </gadget>
+
+    <!-- Noise prewhitening -->
+    <gadget>
+        <name>NoiseAdjust</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>NoiseAdjustGadget</classname>
+    </gadget>
+
+    <!-- Data accumulation and trigger gadget -->
+    <gadget>
+        <name>Acc</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
+
+        <!-- debug and info mode -->
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>noacceleration_triggerDim1</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+        <property>
+            <name>noacceleration_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>noacceleration_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_triggerDim1</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+        <property>
+            <name>interleaved_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>interleaved_numOfKSpace_triggerDim1</name>
+            <value>4</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_triggerDim1</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+        <property>
+            <name>embedded_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_triggerDim1</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+        <property>
+            <name>separate_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
+        <property>
+            <name>other_kspace_matching_Dim</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+    </gadget>
+
+    <!-- Recon computation for 2DT cases -->
+    <gadget>
+        <name>Recon</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusRecon2DTGadget</classname>
+
+        <!-- kspace data -->
+        <property>
+            <name>dim_4th</name>
+            <value>DIM_Repetition</value>
+        </property>
+        <property>
+            <name>dim_5th</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <!-- work flow -->
+        <property>
+            <name>workOrder_ShareDim</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>no_acceleration_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>no_acceleration_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>no_acceleration_whichS_combinationcoeff</name>
+            <value>1</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>interleaved_whichS_combinationcoeff</name>
+            <value>1</value>
+        </property>
+        <property>
+            <name>interleaved_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap_useHighestSignal</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>embedded_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>embedded_whichS_combinationcoeff</name>
+            <value>1</value>
+        </property>
+        <property>
+            <name>embedded_ref_fillback</name>
+            <value>true</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>separate_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>separate_whichS_combinationcoeff</name>
+            <value>1</value>
+        </property>
+
+        <!-- coil compression -->
+        <property>
+            <name>same_coil_compression_coeff_allS</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>downstream_coil_compression</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>coil_compression_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>coil_compression_num_modesKept</name>
+            <value>8</value>
+        </property>
+
+        <!-- parameters for coil map estimation 
+            enum ISMRMRDCOILMAPALGO
+            {
+                ISMRMRD_SOUHEIL,
+                ISMRMRD_SOUHEIL_ITER
+            };
+        -->
+        <property>
+            <name>coil_map_algorithm</name>
+            <value>ISMRMRD_SOUHEIL</value>
+        </property>
+        <property>
+            <name>csm_kSize</name>
+            <value>7</value>
+        </property>
+
+        <property>
+            <name>csm_powermethod_num</name>
+            <value>3</value>
+        </property>
+
+        <property>
+            <name>csm_true_3D</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>csm_iter_num</name>
+            <value>5</value>
+        </property>
+
+        <property>
+            <name>csm_iter_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>csm_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- algorithm -->
+        <property>
+            <name>recon_algorithm</name>
+            <value>ISMRMRD_GRAPPA</value>
+        </property>
+
+        <property>
+            <name>recon_kspace_needed</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>gfactor_needed</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>wrap_around_map_needed</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_GRAPPA -->
+        <property>
+            <name>grappa_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E1</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E2</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_reg_lamda</name>
+            <value>0.0005</value>
+        </property>
+        <property>
+            <name>grappa_calib_over_determine_ratio</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>grappa_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_SPIRIT -->
+        <property>
+            <name>spirit_kSize_RO</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E1</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>spirit_reg_lamda</name>
+            <value>0.005</value>
+        </property>
+        <property>
+            <name>spirit_use_gpu</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_iter_max</name>
+            <value>70</value>
+        </property>
+        <property>
+            <name>spirit_iter_thres</name>
+            <value>1e-5</value>
+        </property>
+
+        <!-- parameters for scaling and image sending -->
+        <property>
+            <name>min_intensity_value</name>
+            <value>64</value>
+        </property>
+
+        <property>
+            <name>max_intensity_value</name>
+            <value>4095</value>
+        </property>
+
+        <property>
+            <name>scalingFactor</name>
+            <value>-1.0</value>
+        </property>
+
+        <property>
+            <name>use_constant_scalingFactor</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for kspace filter, image data -->
+        <property>
+            <name>filterRO</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterRO_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE1</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE1_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE2</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE2_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, ref data -->
+        <property>
+            <name>filterRefRO</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE1</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE2</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
+        <property>
+            <name>filterPartialFourierRO</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE1</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE2</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_HOMODYNE, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
+        <property>
+            <name>partialFourier_algo</name>
+            <value>ISMRMRD_PF_FENGHUANG</value>
+        </property>
+
+        <!-- parameters for partial fourier homodyne algorithm -->
+        <property>
+            <name>partialFourier_homodyne_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_densityComp</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for partial fourier POCS algorithm -->
+        <property>
+            <name>partialFourier_POCS_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for partial fourier FengHuang algorithm -->
+        <property>
+            <name>partialFourier_FengHuang_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E1</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_thresReg</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_sameKernel_allN</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for debug and timing -->
+        <property>
+            <name>debugFolder</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>debugFolder2</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for system acquisition -->
+        <property>
+            <name>timeStampResolution</name>
+            <value>0.0025</value>
+        </property>
+
+    </gadget>
+
+    <!-- after recon processing -->
+    <gadget>
+        <name>ComplexToFloatAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ComplexToFloatAttribGadget</classname>
+    </gadget>
+
+    <gadget>
+        <name>FloatToShortAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>FloatToUShortAttribGadget</classname>
+    </gadget>
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribCPLX</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetCPLX</classname>
+    </gadget>
+    -->
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribFLOAT</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetFLOAT</classname>
+    </gadget>
+    -->
+
+    <gadget>
+        <name>ImageFinishAttribUSHORT</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ImageFinishAttribGadgetUSHORT</classname>
+    </gadget>
+
+</gadgetronStreamConfiguration>
diff --git a/gadgets/gtPlus/config/GT_2DT_PseudoReplica_SNRUnitRecon_DataExport.xml b/gadgets/gtPlus/config/GT_2DT_PseudoReplica_SNRUnitRecon_DataExport.xml
new file mode 100644
index 0000000..1b02fd9
--- /dev/null
+++ b/gadgets/gtPlus/config/GT_2DT_PseudoReplica_SNRUnitRecon_DataExport.xml
@@ -0,0 +1,74 @@
+<?xml version="1.0" encoding="utf-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+    <!--
+       _____              _____     _____   ______   _______   _____     ____    _   _ 
+      / ____|     /\     |  __ \   / ____| |  ____| |__   __| |  __ \   / __ \  | \ | |
+     | |  __     /  \    | |  | | | |  __  | |__       | |    | |__) | | |  | | |  \| |
+     | | |_ |   / /\ \   | |  | | | | |_ | |  __|      | |    |  _  /  | |  | | | . ` |
+     | |__| |  / ____ \  | |__| | | |__| | | |____     | |    | | \ \  | |__| | | |\  |
+      \_____| /_/    \_\ |_____/   \_____| |______|    |_|    |_|  \_\  \____/  |_| \_|
+                                                                                       
+    -->
+
+    <!-- 
+        GtProg_PseudoRplica_SNRUnitRecon_DataExport.xml
+
+        Hui Xue
+        hui.xue at nih.gov
+
+        To export the SNR unit scaled ISMRMRD data
+        This is the first step for pseudo-replica SNR measurements
+    -->
+
+    <!-- reader -->
+    <reader>
+        <slot>1008</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+    </reader>
+
+    <!-- Noise prewhitening 
+         The SNR unit scaling is performed
+    -->
+    <gadget>
+        <name>NoiseAdjust</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>NoiseAdjustGadget</classname>
+
+        <!-- File prefix for stored noise prewhitener matrix -->
+        <property>
+            <name>noise_dependency_prefix</name>
+            <value>GadgetronNoisePreWhitener</value>
+        </property>
+
+        <!-- Preset noise dwell time; for noise dependency measurements -->
+        <property>
+            <name>noise_dwell_time_us_preset</name>
+            <value>5.0</value>
+        </property>
+    </gadget>
+
+    <!-- Dump the noise scaled data to an ismrmd dataset 
+    -->
+    <gadget>
+        <name>IsmrmrdDump</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>IsmrmrdDumpGadget</classname>
+
+        <!-- File prefix for the data set -->
+        <property>
+            <name>file_prefix</name>
+            <value>ISMRMRD_PseudoReplica</value>
+        </property>
+
+        <!-- Whether to append time stamp to the file name -->
+        <property>
+            <name>append_timestamp</name>
+            <value>false</value>
+        </property>
+    </gadget>
+
+</gadgetronStreamConfiguration>
diff --git a/gadgets/gtPlus/config/GT_2DT_RTCine_L1SPIRIT_PhysioInterp.xml b/gadgets/gtPlus/config/GT_2DT_RTCine_L1SPIRIT_PhysioInterp.xml
new file mode 100644
index 0000000..0edf29a
--- /dev/null
+++ b/gadgets/gtPlus/config/GT_2DT_RTCine_L1SPIRIT_PhysioInterp.xml
@@ -0,0 +1,866 @@
+<?xml version="1.0" encoding="utf-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+    <!--
+       _____              _____     _____   ______   _______   _____     ____    _   _ 
+      / ____|     /\     |  __ \   / ____| |  ____| |__   __| |  __ \   / __ \  | \ | |
+     | |  __     /  \    | |  | | | |  __  | |__       | |    | |__) | | |  | | |  \| |
+     | | |_ |   / /\ \   | |  | | | | |_ | |  __|      | |    |  _  /  | |  | | | . ` |
+     | |__| |  / ____ \  | |__| | | |__| | | |____     | |    | | \ \  | |__| | | |\  |
+      \_____| /_/    \_\ |_____/   \_____| |______|    |_|    |_|  \_\  \____/  |_| \_|
+                                                                                       
+    -->
+
+    <!-- 
+        GT Plus configuratin file for general 2D cartesian reconstruction using L1 SPIRIT
+        The GtPlus cloud computing can be turned on in this configuration file
+
+        Author: Hui Xue
+        Magnetic Resonance Technology Program
+        National Heart, Lung and Blood Institute
+        National Institutes of Health
+        10 Center Drive, Bethesda
+        MD 20814
+        USA
+        Email: hui.xue at nih.gov
+    -->
+
+    <!-- reader -->
+    <reader>
+        <slot>1008</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+    </reader>
+
+    <!-- writer -->
+    <writer>
+        <slot>1004</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1005</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1006</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterUSHORT</classname>
+    </writer>
+
+    <writer>
+        <slot>1015</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1016</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1017</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterUSHORT</classname>
+    </writer>
+
+    <!-- RO asymmetric echo handling -->
+    <gadget>
+        <name>AsymmetricEcho</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>AsymmetricEchoAdjustROGadget</classname>
+    </gadget>
+
+    <!-- RO oversampling removal -->
+    <gadget>
+        <name>RemoveROOversampling</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>RemoveROOversamplingGadget</classname>
+
+        <property>
+            <name>constant_noise_variance</name>
+            <value>true</value>
+        </property>
+    </gadget>
+
+    <!-- Noise prewhitening -->
+    <gadget>
+        <name>NoiseAdjust</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>NoiseAdjustGadget</classname>
+    </gadget>
+
+    <!-- Data accumulation and trigger gadget -->
+    <gadget>
+        <name>Acc</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
+
+        <!-- debug and info mode -->
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>noacceleration_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>noacceleration_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>noacceleration_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>interleaved_triggerDim2</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <property>
+            <name>interleaved_numOfKSpace_triggerDim1</name>
+            <value>8</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
+        <property>
+            <name>other_kspace_matching_Dim</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+    </gadget>
+
+    <!-- Recon computation for 2DT cases -->
+    <gadget>
+        <name>Recon</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusRecon2DTGadget</classname>
+
+        <!-- kspace data -->
+        <property>
+            <name>dim_4th</name>
+            <value>DIM_NONE</value>
+        </property>
+        <property>
+            <name>dim_5th</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- work flow -->
+        <property>
+            <name>workOrder_ShareDim</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>no_acceleration_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>no_acceleration_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>interleaved_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>interleaved_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap_useHighestSignal</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>embedded_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_ref_fillback</name>
+            <value>true</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>separate_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- coil compression -->
+        <property>
+            <name>same_coil_compression_coeff_allS</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>downstream_coil_compression</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>coil_compression_thres</name>
+            <value>0.002</value>
+        </property>
+
+        <property>
+            <name>coil_compression_num_modesKept</name>
+            <value>-1</value>
+        </property>
+
+        <!-- parameters for coil map estimation 
+            enum ISMRMRDCOILMAPALGO
+            {
+                ISMRMRD_SOUHEIL,
+                ISMRMRD_SOUHEIL_ITER
+            };
+        -->
+        <property>
+            <name>coil_map_algorithm</name>
+            <value>ISMRMRD_SOUHEIL</value>
+        </property>
+        <property>
+            <name>csm_kSize</name>
+            <value>7</value>
+        </property>
+
+        <property>
+            <name>csm_powermethod_num</name>
+            <value>3</value>
+        </property>
+
+        <property>
+            <name>csm_true_3D</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>csm_iter_num</name>
+            <value>5</value>
+        </property>
+
+        <property>
+            <name>csm_iter_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>csm_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- algorithm, ISMRMRD_GRAPPA, ISMRMRD_L1SPIRIT -->
+        <property>
+            <name>recon_algorithm</name>
+            <value>ISMRMRD_L1SPIRIT</value>
+        </property>
+
+        <property>
+            <name>recon_kspace_needed</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>recon_auto_parameters</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_GRAPPA -->
+        <property>
+            <name>grappa_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E1</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E2</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_reg_lamda</name>
+            <value>0.0005</value>
+        </property>
+        <property>
+            <name>grappa_calib_over_determine_ratio</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>grappa_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_SPIRIT -->
+        <property>
+            <name>spirit_kSize_RO</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E1</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>spirit_reg_lamda</name>
+            <value>0.005</value>
+        </property>
+        <property>
+            <name>spirit_use_gpu</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_calib_over_determine_ratio</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>spirit_solve_symmetric</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_iter_max</name>
+            <value>90</value>
+        </property>
+        <property>
+            <name>spirit_iter_thres</name>
+            <value>0.0015</value>
+        </property>
+        <property>
+            <name>spirit_print_iter</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_L1SPIRIT -->
+        <property>
+            <name>spirit_perform_linear</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_perform_nonlinear</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_parallel_imaging_lamda</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_image_reg_lamda</name>
+            <value>0.005</value>
+        </property>
+        <property>
+            <name>spirit_data_fidelity_lamda</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>spirit_ncg_iter_max</name>
+            <value>10</value>
+        </property>
+        <property>
+            <name>spirit_ncg_iter_thres</name>
+            <value>0.0001</value>
+        </property>
+        <property>
+            <name>spirit_ncg_print_iter</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_use_coil_sen_map</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_use_moco_enhancement</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_recon_moco_images</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_RO_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_E1_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_E2_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_temporal_enhancement_ratio</name>
+            <value>20.0</value>
+        </property>
+        <property>
+            <name>spirit_2D_scale_per_chunk</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for scaling and image sending -->
+        <property>
+            <name>min_intensity_value</name>
+            <value>64</value>
+        </property>
+
+        <property>
+            <name>max_intensity_value</name>
+            <value>4095</value>
+        </property>
+
+        <property>
+            <name>scalingFactor</name>
+            <value>-1.0</value>
+        </property>
+
+        <property>
+            <name>use_constant_scalingFactor</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for kspace filter, image data -->
+        <property>
+            <name>filterRO</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterRO_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE1</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE1_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE2</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE2_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, ref data -->
+        <property>
+            <name>filterRefRO</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE1</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE2</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
+        <property>
+            <name>filterPartialFourierRO</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE1</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE2</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_HOMODYNE, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
+        <property>
+            <name>partialFourier_algo</name>
+            <value>ISMRMRD_PF_ZEROFILLING_FILTER</value>
+        </property>
+
+        <!-- parameters for partial fourier homodyne algorithm -->
+        <property>
+            <name>partialFourier_homodyne_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_densityComp</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for partial fourier POCS algorithm -->
+        <property>
+            <name>partialFourier_POCS_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for partial fourier FengHuang algorithm -->
+        <property>
+            <name>partialFourier_FengHuang_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E1</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_thresReg</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_sameKernel_allN</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for debug and timing -->
+        <property>
+            <name>debugFolder</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>debugFolder2</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>cloudNodeFile</name>
+            <value>myCloud_2DT.txt</value>
+        </property>
+
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for system acquisition -->
+        <property>
+            <name>timeStampResolution</name>
+            <value>0.0025</value>
+        </property>
+
+        <!-- parameters for recon job split -->
+        <property>
+            <name>job_split_by_S</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>job_num_of_N</name>
+            <value>64</value>
+        </property>
+        <property>
+            <name>job_max_Megabytes</name>
+            <value>8192</value>
+        </property>
+        <property>
+            <name>job_overlap</name>
+            <value>2</value>
+        </property>
+        <property>
+            <name>job_perform_on_control_node</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for the cloud computation 
+             The cloud should be defined as the following: CloudNodeX_IP/Port/XMLConfiguration etc.
+        -->
+        <property>
+            <name>CloudComputing</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>CloudSize</name>
+            <value>1</value>
+        </property>
+
+        <!-- node 0 -->
+        <property>
+            <name>CloudNode0_IP</name>
+            <value>localhost</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_Port</name>
+            <value>9003</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_XMLConfiguration</name>
+            <value>GtProg_2DT_Cartesian_CloudNode.xml</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_ComputingPowerIndex</name>
+            <value>1</value>
+        </property>
+
+    </gadget>
+
+    <!-- Physio interpolation -->
+    <gadget>
+        <name>PhysioInterpolation</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>PhysioInterpolationGadget</classname>
+
+        <property>
+            <name>phases</name>
+            <value>30</value>
+        </property>
+
+        <!-- 0=seperate series for each complete RR -->
+        <!-- 1=First complete RR interval only -->
+        <property>
+            <name>mode</name>
+            <value>1</value>
+        </property>
+
+        <property>
+            <name>first_beat_on_trigger</name>
+            <value>true</value>
+        </property>
+
+        <!-- "Spline" or "BSpline" -->
+        <property>
+            <name>interp_method</name>
+            <value>BSpline</value>
+        </property>
+    </gadget>
+
+    <!-- after recon processing -->
+    <gadget>
+        <name>ComplexToFloatAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ComplexToFloatAttribGadget</classname>
+    </gadget>
+
+    <gadget>
+        <name>FloatToShortAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>FloatToUShortAttribGadget</classname>
+    </gadget>
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribCPLX</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetCPLX</classname>
+    </gadget>
+    -->
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribFLOAT</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetFLOAT</classname>
+    </gadget>
+    -->
+
+    <gadget>
+        <name>ImageFinishAttribUSHORT</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ImageFinishAttribGadgetUSHORT</classname>
+    </gadget>
+
+</gadgetronStreamConfiguration>
diff --git a/gadgets/gtPlus/config/GT_2DT_RTCine_L1SPIRIT_PhysioInterp_DualLayer_Gateway.xml b/gadgets/gtPlus/config/GT_2DT_RTCine_L1SPIRIT_PhysioInterp_DualLayer_Gateway.xml
new file mode 100644
index 0000000..eaaf6c5
--- /dev/null
+++ b/gadgets/gtPlus/config/GT_2DT_RTCine_L1SPIRIT_PhysioInterp_DualLayer_Gateway.xml
@@ -0,0 +1,875 @@
+<?xml version="1.0" encoding="utf-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+    <!--
+       _____              _____     _____   ______   _______   _____     ____    _   _ 
+      / ____|     /\     |  __ \   / ____| |  ____| |__   __| |  __ \   / __ \  | \ | |
+     | |  __     /  \    | |  | | | |  __  | |__       | |    | |__) | | |  | | |  \| |
+     | | |_ |   / /\ \   | |  | | | | |_ | |  __|      | |    |  _  /  | |  | | | . ` |
+     | |__| |  / ____ \  | |__| | | |__| | | |____     | |    | | \ \  | |__| | | |\  |
+      \_____| /_/    \_\ |_____/   \_____| |______|    |_|    |_|  \_\  \____/  |_| \_|
+                                                                                       
+    -->
+
+    <!-- 
+        GT Plus configuratin file for general 2D cartesian reconstruction using L1 SPIRIT
+        The GtPlus cloud computing by default is turned on in this configuration file
+        The dual-layer cloud topology is used here, therefore every incoming SLICE is sent
+        to one first layer GtPlus cloud node. This first layer node can further split the job and
+        process the SLICE with one or more second layer nodes.
+
+        Author: Hui Xue
+        Magnetic Resonance Technology Program
+        National Heart, Lung and Blood Institute
+        National Institutes of Health
+        10 Center Drive, Bethesda
+        MD 20814
+        USA
+        Email: hui.xue at nih.gov
+
+        Ref to: 
+
+        Hui Xue, Souheil Inati, Thomas Sangild Sorensen, Peter Kellman, Michael S. Hansen. 
+        Distributed MRI Reconstruction using Gadgetron based Cloud Computing. 
+        Magenetic Resonance in Medicine, doi: 10.1002/mrm.25213.
+    -->
+
+    <!-- reader -->
+    <reader>
+        <slot>1008</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+    </reader>
+
+    <!-- writer -->
+    <writer>
+        <slot>1004</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1005</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1006</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterUSHORT</classname>
+    </writer>
+
+    <writer>
+        <slot>1015</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1016</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1017</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterUSHORT</classname>
+    </writer>
+
+    <!-- RO asymmetric echo handling -->
+    <gadget>
+        <name>AsymmetricEcho</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>AsymmetricEchoAdjustROGadget</classname>
+    </gadget>
+
+    <!-- Noise prewhitening -->
+    <gadget>
+        <name>NoiseAdjust</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>NoiseAdjustGadget</classname>
+    </gadget>
+
+    <!-- RO oversampling removal -->
+    <gadget>
+        <name>RemoveROOversampling</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>RemoveROOversamplingGadget</classname>
+
+        <property>
+            <name>constant_noise_variance</name>
+            <value>true</value>
+        </property>
+    </gadget>
+
+    <!-- Data accumulation and trigger gadget -->
+    <gadget>
+        <name>Acc</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
+
+        <!-- debug and info mode -->
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>noacceleration_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>noacceleration_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>noacceleration_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>interleaved_triggerDim2</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <property>
+            <name>interleaved_numOfKSpace_triggerDim1</name>
+            <value>0</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
+        <property>
+            <name>other_kspace_matching_Dim</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+    </gadget>
+
+    <!-- Recon computation for 2DT cases -->
+    <gadget>
+        <name>Recon</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusRecon2DTGadgetCloud</classname>
+
+        <!-- kspace data -->
+        <property>
+            <name>dim_4th</name>
+            <value>DIM_Phase</value>
+        </property>
+        <property>
+            <name>dim_5th</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <!-- work flow -->
+        <property>
+            <name>workOrder_ShareDim</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>no_acceleration_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>no_acceleration_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>interleaved_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>interleaved_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap_useHighestSignal</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>embedded_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_ref_fillback</name>
+            <value>true</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>separate_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- coil compression -->
+        <property>
+            <name>same_coil_compression_coeff_allS</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>downstream_coil_compression</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>coil_compression_thres</name>
+            <value>0.002</value>
+        </property>
+
+        <property>
+            <name>coil_compression_num_modesKept</name>
+            <value>-1</value>
+        </property>
+
+        <!-- parameters for coil map estimation 
+            enum ISMRMRDCOILMAPALGO
+            {
+                ISMRMRD_SOUHEIL,
+                ISMRMRD_SOUHEIL_ITER
+            };
+        -->
+        <property>
+            <name>coil_map_algorithm</name>
+            <value>ISMRMRD_SOUHEIL</value>
+        </property>
+        <property>
+            <name>csm_kSize</name>
+            <value>7</value>
+        </property>
+
+        <property>
+            <name>csm_powermethod_num</name>
+            <value>3</value>
+        </property>
+
+        <property>
+            <name>csm_true_3D</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>csm_iter_num</name>
+            <value>5</value>
+        </property>
+
+        <property>
+            <name>csm_iter_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>csm_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- algorithm -->
+        <property>
+            <name>recon_algorithm</name>
+            <value>ISMRMRD_L1SPIRIT</value>
+        </property>
+
+        <property>
+            <name>recon_kspace_needed</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>recon_auto_parameters</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_GRAPPA -->
+        <property>
+            <name>grappa_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E1</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E2</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_reg_lamda</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>grappa_calib_over_determine_ratio</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>grappa_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_SPIRIT -->
+        <property>
+            <name>spirit_kSize_RO</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E1</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>spirit_reg_lamda</name>
+            <value>0.005</value>
+        </property>
+        <property>
+            <name>spirit_use_gpu</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_calib_over_determine_ratio</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>spirit_solve_symmetric</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_iter_max</name>
+            <value>90</value>
+        </property>
+        <property>
+            <name>spirit_iter_thres</name>
+            <value>0.0015</value>
+        </property>
+        <property>
+            <name>spirit_print_iter</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_L1SPIRIT -->
+        <property>
+            <name>spirit_perform_linear</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_perform_nonlinear</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_parallel_imaging_lamda</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_image_reg_lamda</name>
+            <value>0.005</value>
+        </property>
+        <property>
+            <name>spirit_data_fidelity_lamda</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>spirit_ncg_iter_max</name>
+            <value>10</value>
+        </property>
+        <property>
+            <name>spirit_ncg_iter_thres</name>
+            <value>0.0001</value>
+        </property>
+        <property>
+            <name>spirit_ncg_print_iter</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_use_coil_sen_map</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_use_moco_enhancement</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_recon_moco_images</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_RO_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_E1_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_E2_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_temporal_enhancement_ratio</name>
+            <value>20.0</value>
+        </property>
+        <property>
+            <name>spirit_2D_scale_per_chunk</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for scaling and image sending -->
+        <property>
+            <name>min_intensity_value</name>
+            <value>64</value>
+        </property>
+
+        <property>
+            <name>max_intensity_value</name>
+            <value>4095</value>
+        </property>
+
+        <property>
+            <name>scalingFactor</name>
+            <value>-1.0</value>
+        </property>
+
+        <property>
+            <name>use_constant_scalingFactor</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for kspace filter, image data -->
+        <property>
+            <name>filterRO</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterRO_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE1</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE1_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE2</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE2_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, ref data -->
+        <property>
+            <name>filterRefRO</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE1</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE2</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
+        <property>
+            <name>filterPartialFourierRO</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE1</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE2</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_HOMODYNE, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
+        <property>
+            <name>partialFourier_algo</name>
+            <value>ISMRMRD_PF_ZEROFILLING_FILTER</value>
+        </property>
+
+        <!-- parameters for partial fourier homodyne algorithm -->
+        <property>
+            <name>partialFourier_homodyne_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_densityComp</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for partial fourier POCS algorithm -->
+        <property>
+            <name>partialFourier_POCS_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for partial fourier FengHuang algorithm -->
+        <property>
+            <name>partialFourier_FengHuang_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E1</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_thresReg</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_sameKernel_allN</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for debug and timing -->
+        <property>
+            <name>debugFolder</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>debugFolder2</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>cloudNodeFile</name>
+            <value>NHLBI_Cloud_2DT_DualLayer.txt</value>
+        </property>
+
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>verboseMode</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for system acquisition -->
+        <property>
+            <name>timeStampResolution</name>
+            <value>0.0025</value>
+        </property>
+
+        <!-- parameters for recon job split -->
+        <property>
+            <name>job_split_by_S</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>job_num_of_N</name>
+            <value>48</value>
+        </property>
+        <property>
+            <name>job_max_Megabytes</name>
+            <value>2048</value>
+        </property>
+        <property>
+            <name>job_overlap</name>
+            <value>1</value>
+        </property>
+        <property>
+            <name>job_perform_on_control_node</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for the cloud computation 
+             The cloud should be defined as the following: CloudNodeX_IP/Port/XMLConfiguration etc.
+        -->
+        <property>
+            <name>CloudComputing</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>CloudSize</name>
+            <value>1</value>
+        </property>
+
+        <!-- node 0 -->
+        <property>
+            <name>CloudNode0_IP</name>
+            <value>localhost</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_Port</name>
+            <value>9003</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_XMLConfiguration</name>
+            <value>GtProg_2DT_Cartesian_FirstLayer_CloudNode.xml</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_ComputingPowerIndex</name>
+            <value>1</value>
+        </property>
+
+    </gadget>
+
+    <!-- Physio interpolation -->
+    <gadget>
+        <name>PhysioInterpolation</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>PhysioInterpolationGadget</classname>
+
+        <property>
+            <name>phases</name>
+            <value>30</value>
+        </property>
+
+        <!-- 0=seperate series for each complete RR -->
+        <!-- 1=First complete RR interval only -->
+        <property>
+            <name>mode</name>
+            <value>1</value>
+        </property>
+
+        <property>
+            <name>first_beat_on_trigger</name>
+            <value>true</value>
+        </property>
+
+        <!-- "Spline" or "BSpline" -->
+        <property>
+            <name>interp_method</name>
+            <value>BSpline</value>
+        </property>
+    </gadget>
+
+    <!-- after recon processing -->
+    <gadget>
+        <name>ComplexToFloatAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ComplexToFloatAttribGadget</classname>
+    </gadget>
+
+    <gadget>
+        <name>FloatToShortAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>FloatToUShortAttribGadget</classname>
+    </gadget>
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribCPLX</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetCPLX</classname>
+    </gadget>
+    -->
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribFLOAT</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetFLOAT</classname>
+    </gadget>
+    -->
+
+    <gadget>
+        <name>ImageFinishAttribUSHORT</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ImageFinishAttribGadgetUSHORT</classname>
+    </gadget>
+
+</gadgetronStreamConfiguration>
diff --git a/gadgets/gtPlus/config/GT_2DT_RealTimeCine.xml b/gadgets/gtPlus/config/GT_2DT_RealTimeCine.xml
new file mode 100644
index 0000000..bf292f1
--- /dev/null
+++ b/gadgets/gtPlus/config/GT_2DT_RealTimeCine.xml
@@ -0,0 +1,783 @@
+<?xml version="1.0" encoding="utf-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+    <!--
+       _____              _____     _____   ______   _______   _____     ____    _   _ 
+      / ____|     /\     |  __ \   / ____| |  ____| |__   __| |  __ \   / __ \  | \ | |
+     | |  __     /  \    | |  | | | |  __  | |__       | |    | |__) | | |  | | |  \| |
+     | | |_ |   / /\ \   | |  | | | | |_ | |  __|      | |    |  _  /  | |  | | | . ` |
+     | |__| |  / ____ \  | |__| | | |__| | | |____     | |    | | \ \  | |__| | | |\  |
+      \_____| /_/    \_\ |_____/   \_____| |______|    |_|    |_|  \_\  \____/  |_| \_|
+                                                                                       
+    -->
+
+    <!-- 
+        GT Plus configuratin file for 2DT cartesian reconstruction on real-time cine imaging
+        The GtPlus supports the on-the-fly reconstruction, therefore the reconstruction starts 
+        whenever sufficient data is received. The reconstructed images are sent out once the 
+        computation is finished.
+
+        Author: Hui Xue
+        Magnetic Resonance Technology Program
+        National Heart, Lung and Blood Institute
+        National Institutes of Health
+        10 Center Drive, Bethesda
+        MD 20814
+        USA
+        Email: hui.xue at nih.gov
+    -->
+
+    <!-- reader -->
+    <reader>
+        <slot>1008</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+    </reader>
+
+    <!-- writer -->
+    <writer>
+        <slot>1004</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1005</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1006</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterUSHORT</classname>
+    </writer>
+
+    <writer>
+        <slot>1015</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1016</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1017</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterUSHORT</classname>
+    </writer>
+
+    <!-- RO asymmetric echo handling -->
+    <gadget>
+        <name>AsymmetricEcho</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>AsymmetricEchoAdjustROGadget</classname>
+    </gadget>
+
+    <!-- RO oversampling removal -->
+    <gadget>
+        <name>RemoveROOversampling</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>RemoveROOversamplingGadget</classname>
+
+        <property>
+            <name>constant_noise_variance</name>
+            <value>true</value>
+        </property>
+    </gadget>
+
+    <!-- Noise prewhitening -->
+    <gadget>
+        <name>NoiseAdjust</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>NoiseAdjustGadget</classname>
+    </gadget>
+
+    <!-- Data accumulation and trigger gadget -->
+    <gadget>
+        <name>Acc</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
+
+        <!-- debug and info mode -->
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>noacceleration_triggerDim1</name>
+            <value>DIM_Phase</value>
+        </property>
+
+        <property>
+            <name>noacceleration_triggerDim2</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <property>
+            <name>noacceleration_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_triggerDim1</name>
+            <value>DIM_Phase</value>
+        </property>
+
+        <property>
+            <name>interleaved_triggerDim2</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <property>
+            <name>interleaved_numOfKSpace_triggerDim1</name>
+            <value>8</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_triggerDim1</name>
+            <value>DIM_Phase</value>
+        </property>
+
+        <property>
+            <name>embedded_triggerDim2</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <property>
+            <name>embedded_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_triggerDim1</name>
+            <value>DIM_Phase</value>
+        </property>
+
+        <property>
+            <name>separate_triggerDim2</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <property>
+            <name>separate_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
+        <property>
+            <name>other_kspace_matching_Dim</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+    </gadget>
+
+    <!-- Recon computation for 2DT cases -->
+    <gadget>
+        <name>Recon</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusRecon2DTGadget</classname>
+
+        <!-- kspace data -->
+        <property>
+            <name>dim_4th</name>
+            <value>DIM_Phase</value>
+        </property>
+        <property>
+            <name>dim_5th</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <!-- work flow -->
+        <property>
+            <name>workOrder_ShareDim</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>no_acceleration_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>no_acceleration_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>no_acceleration_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>interleaved_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>interleaved_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap_useHighestSignal</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>embedded_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>embedded_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_ref_fillback</name>
+            <value>true</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>separate_fullres_coilmap</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>separate_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>separate_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- coil compression -->
+        <property>
+            <name>same_coil_compression_coeff_allS</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>downstream_coil_compression</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>coil_compression_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>coil_compression_num_modesKept</name>
+            <value>8</value>
+        </property>
+
+        <!-- parameters for coil map estimation 
+            enum ISMRMRDCOILMAPALGO
+            {
+                ISMRMRD_SOUHEIL,
+                ISMRMRD_SOUHEIL_ITER
+            };
+        -->
+        <property>
+            <name>coil_map_algorithm</name>
+            <value>ISMRMRD_SOUHEIL</value>
+        </property>
+        <property>
+            <name>csm_kSize</name>
+            <value>7</value>
+        </property>
+
+        <property>
+            <name>csm_powermethod_num</name>
+            <value>3</value>
+        </property>
+
+        <property>
+            <name>csm_true_3D</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>csm_iter_num</name>
+            <value>5</value>
+        </property>
+
+        <property>
+            <name>csm_iter_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>csm_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- algorithm -->
+        <property>
+            <name>recon_algorithm</name>
+            <value>ISMRMRD_GRAPPA</value>
+        </property>
+
+        <property>
+            <name>recon_kspace_needed</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>gfactor_needed</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_GRAPPA -->
+        <property>
+            <name>grappa_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E1</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E2</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_reg_lamda</name>
+            <value>0.0005</value>
+        </property>
+        <property>
+            <name>grappa_calib_over_determine_ratio</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>grappa_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_SPIRIT -->
+        <property>
+            <name>spirit_kSize_RO</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E1</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>spirit_reg_lamda</name>
+            <value>0.005</value>
+        </property>
+        <property>
+            <name>spirit_use_gpu</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_iter_max</name>
+            <value>100</value>
+        </property>
+        <property>
+            <name>spirit_iter_thres</name>
+            <value>1e-5</value>
+        </property>
+        <property>
+            <name>spirit_print_iter</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_L1SPIRIT -->
+        <property>
+            <name>spirit_perform_linear</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_perform_nonlinear</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_parallel_imaging_lamda</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_image_reg_lamda</name>
+            <value>0.001</value>
+        </property>
+        <property>
+            <name>spirit_data_fidelity_lamda</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>spirit_ncg_iter_max</name>
+            <value>10</value>
+        </property>
+        <property>
+            <name>spirit_ncg_iter_thres</name>
+            <value>0.001</value>
+        </property>
+        <property>
+            <name>spirit_ncg_print_iter</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_use_coil_sen_map</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_use_moco_enhancement</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_recon_moco_images</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_RO_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_E1_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_E2_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_temporal_enhancement_ratio</name>
+            <value>5.0</value>
+        </property>
+
+        <!-- parameters for recon job split -->
+        <property>
+            <name>job_split_by_S</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>job_num_of_N</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>job_max_Megabytes</name>
+            <value>20480</value>
+        </property>
+        <property>
+            <name>job_overlap</name>
+            <value>2</value>
+        </property>
+
+        <!-- parameters for scaling and image sending -->
+        <property>
+            <name>min_intensity_value</name>
+            <value>64</value>
+        </property>
+
+        <property>
+            <name>max_intensity_value</name>
+            <value>4095</value>
+        </property>
+
+        <property>
+            <name>scalingFactor</name>
+            <value>-1.0</value>
+        </property>
+
+        <property>
+            <name>use_constant_scalingFactor</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for kspace filter, image data -->
+        <property>
+            <name>filterRO</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterRO_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE1</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE1_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE2</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE2_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, ref data -->
+        <property>
+            <name>filterRefRO</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE1</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE2</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
+        <property>
+            <name>filterPartialFourierRO</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE1</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE2</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_HOMODYNE, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
+        <property>
+            <name>partialFourier_algo</name>
+            <value>ISMRMRD_PF_POCS</value>
+        </property>
+
+        <!-- parameters for partial fourier homodyne algorithm -->
+        <property>
+            <name>partialFourier_homodyne_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_densityComp</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for partial fourier POCS algorithm -->
+        <property>
+            <name>partialFourier_POCS_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for partial fourier FengHuang algorithm -->
+        <property>
+            <name>partialFourier_FengHuang_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E1</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_thresReg</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_sameKernel_allN</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for debug and timing -->
+        <property>
+            <name>debugFolder</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>debugFolder2</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for system acquisition -->
+        <property>
+            <name>timeStampResolution</name>
+            <value>0.0025</value>
+        </property>
+
+    </gadget>
+
+    <!-- after recon processing -->
+    <gadget>
+        <name>ComplexToFloatAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ComplexToFloatAttribGadget</classname>
+    </gadget>
+
+    <gadget>
+        <name>FloatToShortAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>FloatToUShortAttribGadget</classname>
+    </gadget>
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribCPLX</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetCPLX</classname>
+    </gadget>
+    -->
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribFLOAT</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetFLOAT</classname>
+    </gadget>
+    -->
+
+    <gadget>
+        <name>ImageFinishAttribUSHORT</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ImageFinishAttribGadgetUSHORT</classname>
+    </gadget>
+
+</gadgetronStreamConfiguration>
diff --git a/gadgets/gtPlus/config/GT_2DT_RealTimeFlow.xml b/gadgets/gtPlus/config/GT_2DT_RealTimeFlow.xml
new file mode 100644
index 0000000..c7efea6
--- /dev/null
+++ b/gadgets/gtPlus/config/GT_2DT_RealTimeFlow.xml
@@ -0,0 +1,765 @@
+<?xml version="1.0" encoding="utf-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+    <!--
+       _____              _____     _____   ______   _______   _____     ____    _   _ 
+      / ____|     /\     |  __ \   / ____| |  ____| |__   __| |  __ \   / __ \  | \ | |
+     | |  __     /  \    | |  | | | |  __  | |__       | |    | |__) | | |  | | |  \| |
+     | | |_ |   / /\ \   | |  | | | | |_ | |  __|      | |    |  _  /  | |  | | | . ` |
+     | |__| |  / ____ \  | |__| | | |__| | | |____     | |    | | \ \  | |__| | | |\  |
+      \_____| /_/    \_\ |_____/   \_____| |______|    |_|    |_|  \_\  \____/  |_| \_|
+                                                                                       
+    -->
+
+    <!-- 
+        GT Plus configuratin file for 2DT cartesian reconstruction on real-time flow imaging
+        The GtPlus supports the on-the-fly reconstruction, therefore the reconstruction starts 
+        whenever sufficient data is received. The reconstructed images are sent out once the 
+        computation is finished.
+
+        Author: Hui Xue
+        Magnetic Resonance Technology Program
+        National Heart, Lung and Blood Institute
+        National Institutes of Health
+        10 Center Drive, Bethesda
+        MD 20814
+        USA
+        Email: hui.xue at nih.gov
+    -->
+
+    <!-- reader -->
+    <reader>
+        <slot>1008</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+    </reader>
+
+    <!-- writer -->
+    <writer>
+        <slot>1004</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1005</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1006</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterUSHORT</classname>
+    </writer>
+
+    <writer>
+        <slot>1015</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1016</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1017</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterUSHORT</classname>
+    </writer>
+
+    <!-- RO asymmetric echo handling -->
+    <gadget>
+        <name>AsymmetricEcho</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>AsymmetricEchoAdjustROGadget</classname>
+    </gadget>
+
+    <!-- Noise prewhitening -->
+    <gadget>
+        <name>NoiseAdjust</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>NoiseAdjustGadget</classname>
+    </gadget>
+
+    <!-- RO oversampling removal -->
+    <gadget>
+        <name>RemoveROOversampling</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>RemoveROOversamplingGadget</classname>
+
+        <property>
+            <name>constant_noise_variance</name>
+            <value>true</value>
+        </property>
+    </gadget>
+
+    <gadget>
+        <name>Acc</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
+
+        <!-- debug and info mode -->
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>noacceleration_triggerDim1</name>
+            <value>DIM_None</value>
+        </property>
+
+        <property>
+            <name>noacceleration_triggerDim2</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <property>
+            <name>noacceleration_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_triggerDim1</name>
+            <value>DIM_None</value>
+        </property>
+
+        <property>
+            <name>interleaved_triggerDim2</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <property>
+            <name>interleaved_numOfKSpace_triggerDim1</name>
+            <value>4</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_triggerDim1</name>
+            <value>DIM_None</value>
+        </property>
+
+        <property>
+            <name>embedded_triggerDim2</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <property>
+            <name>embedded_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_triggerDim1</name>
+            <value>DIM_None</value>
+        </property>
+
+        <property>
+            <name>separate_triggerDim2</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <property>
+            <name>separate_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
+        <property>
+            <name>other_kspace_matching_Dim</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+    </gadget>
+
+    <!--
+    Recon computation for 2DT cases
+
+    kspace_: [RO E1 CHA N S], for 2D recon, N can be 1
+    ref_: [RO E1 CHA M S], M can equal to N or 1 or others
+    fullkspace_: [RO E1 CHA N S]
+    complexIm_: [RO E1 N S], after coil combination
+    coilMap_: [RO E1 CHA 1 or N S]
+    gfactor_: [RO E1 CHA 1 or N S]
+
+    the 4th and 5th dimensions (N and S) needs to be specified. For example,
+    for real-time cine, N = DIM_Phase and S=DIM_Slice
+
+    default behavior
+    a) the coil compression coefficients are computed once across all S
+    b) the kernel or coil sensitivity are estimated for every S
+
+    embedded mode
+    a) perform recon and estimate kernel/coil sensitivity for every 2D kspace [RO E1 CHA]
+    b) coil combination uses different coil maps for every S
+    c) if the kspace recon is performed, the coil combination map is reestimated on the fullkspace for every 2D images
+    d) the ref lines are filled back to fullkspace_
+
+    separate mode
+    a) perform recon and estimate kernel/coil sensitivity for every 2D kspace [RO E1 CHA] if M==N
+    b) if M==1, the kernel is only estimated once for every S
+    c) coil combination uses different coil maps for every S
+    d) if the kspace recon is performed, the coil combination map is reestimated on the fullkspace for every 2D images
+
+    interleave
+    a) the average-all ref is used
+    b) kernel/coil sensitivity is estimated once for every S
+    -->
+
+    <gadget>
+        <name>Recon</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusRecon2DTGadget</classname>
+
+        <!-- kspace data -->
+        <property>
+            <name>dim_4th</name>
+            <value>DIM_Phase</value>
+        </property>
+        <property>
+            <name>dim_5th</name>
+            <value>DIM_Set</value>
+        </property>
+
+        <!-- work flow -->
+        <property>
+            <name>workOrder_ShareDim</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>no_acceleration_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>no_acceleration_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>interleaved_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>interleaved_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap_useHighestSignal</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>embedded_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_ref_fillback</name>
+            <value>true</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>separate_fullres_coilmap</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>separate_same_combinationcoeff_allS</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- coil compression -->
+        <property>
+            <name>same_coil_compression_coeff_allS</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>downstream_coil_compression</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>coil_compression_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>coil_compression_num_modesKept</name>
+            <value>-1</value>
+        </property>
+
+        <!-- parameters for coil map estimation 
+            enum ISMRMRDCOILMAPALGO
+            {
+                ISMRMRD_SOUHEIL,
+                ISMRMRD_SOUHEIL_ITER
+            };
+        -->
+        <property>
+            <name>coil_map_algorithm</name>
+            <value>ISMRMRD_SOUHEIL</value>
+        </property>
+        <property>
+            <name>csm_kSize</name>
+            <value>7</value>
+        </property>
+
+        <property>
+            <name>csm_powermethod_num</name>
+            <value>3</value>
+        </property>
+
+        <property>
+            <name>csm_true_3D</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>csm_iter_num</name>
+            <value>5</value>
+        </property>
+
+        <property>
+            <name>csm_iter_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>csm_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- algorithm -->
+        <property>
+            <name>recon_algorithm</name>
+            <value>ISMRMRD_GRAPPA</value>
+        </property>
+
+        <property>
+            <name>recon_kspace_needed</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>gfactor_needed</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_GRAPPA -->
+        <property>
+            <name>grappa_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E1</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E2</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_reg_lamda</name>
+            <value>0.0005</value>
+        </property>
+        <property>
+            <name>grappa_calib_over_determine_ratio</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>grappa_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_SPIRIT -->
+        <property>
+            <name>spirit_kSize_RO</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E1</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>spirit_reg_lamda</name>
+            <value>0.005</value>
+        </property>
+        <property>
+            <name>spirit_use_gpu</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_iter_max</name>
+            <value>70</value>
+        </property>
+        <property>
+            <name>spirit_iter_thres</name>
+            <value>1e-5</value>
+        </property>
+
+        <!-- parameters for scaling and image sending -->
+        <property>
+            <name>min_intensity_value</name>
+            <value>64</value>
+        </property>
+
+        <property>
+            <name>max_intensity_value</name>
+            <value>4095</value>
+        </property>
+
+        <property>
+            <name>scalingFactor</name>
+            <value>-1.0</value>
+        </property>
+
+        <property>
+            <name>use_constant_scalingFactor</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for kspace filter, image data -->
+        <property>
+            <name>filterRO</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterRO_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE1</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE1_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE2</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE2_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, ref data -->
+        <property>
+            <name>filterRefRO</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE1</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE2</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
+        <property>
+            <name>filterPartialFourierRO</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE1</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE2</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_HOMODYNE, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
+        <property>
+            <name>partialFourier_algo</name>
+            <value>ISMRMRD_PF_FENGHUANG</value>
+        </property>
+
+        <!-- parameters for partial fourier homodyne algorithm -->
+        <property>
+            <name>partialFourier_homodyne_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_densityComp</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for partial fourier POCS algorithm -->
+        <property>
+            <name>partialFourier_POCS_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for partial fourier FengHuang algorithm -->
+        <property>
+            <name>partialFourier_FengHuang_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E1</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_thresReg</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_sameKernel_allN</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for debug and timing -->
+        <property>
+            <name>debugFolder</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>debugFolder2</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for system acquisition -->
+        <property>
+            <name>timeStampResolution</name>
+            <value>0.0025</value>
+        </property>
+
+    </gadget>
+
+    <!-- flow processing -->
+    <gadget>
+        <name>PhaseSubtraction</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>FlowPhaseSubtractionGadget</classname>
+    </gadget>
+
+    <gadget>
+        <name>MaxwellCorrection</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>MaxwellCorrectionGadget</classname>
+    </gadget>
+
+    <gadget>
+        <name>Extract</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ExtractGadget</classname>
+        <property>
+            <name>extract_mask</name>
+            <value>9</value>
+        </property>
+        <property>
+            <name>scaling_factor_angle</name>
+            <value>1.0</value>
+        </property>
+    </gadget>
+
+    <!-- after recon processing -->
+    <!--
+    <gadget>
+        <name>ComplexToFloatAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ComplexToFloatAttribGadget</classname>
+    </gadget>
+    -->
+
+    <gadget>
+        <name>FloatToShortAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>FloatToUShortAttribGadget</classname>
+
+        <property>
+            <name>intensity_offset</name>
+            <value>2048</value>
+        </property>
+    </gadget>
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribCPLX</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetCPLX</classname>
+    </gadget>
+    -->
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribFLOAT</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetFLOAT</classname>
+    </gadget>
+    -->
+
+    <gadget>
+        <name>ImageFinishAttribUSHORT</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ImageFinishAttribGadgetUSHORT</classname>
+    </gadget>
+
+</gadgetronStreamConfiguration>
diff --git a/gadgets/gtPlus/config/GT_2DT_T2W.xml b/gadgets/gtPlus/config/GT_2DT_T2W.xml
new file mode 100644
index 0000000..49c55e5
--- /dev/null
+++ b/gadgets/gtPlus/config/GT_2DT_T2W.xml
@@ -0,0 +1,701 @@
+<?xml version="1.0" encoding="utf-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+    <!--
+       _____              _____     _____   ______   _______   _____     ____    _   _ 
+      / ____|     /\     |  __ \   / ____| |  ____| |__   __| |  __ \   / __ \  | \ | |
+     | |  __     /  \    | |  | | | |  __  | |__       | |    | |__) | | |  | | |  \| |
+     | | |_ |   / /\ \   | |  | | | | |_ | |  __|      | |    |  _  /  | |  | | | . ` |
+     | |__| |  / ____ \  | |__| | | |__| | | |____     | |    | | \ \  | |__| | | |\  |
+      \_____| /_/    \_\ |_____/   \_____| |______|    |_|    |_|  \_\  \____/  |_| \_|
+                                                                                       
+    -->
+
+    <!-- 
+        GT Plus configuratin file for 2DT cartesian reconstruction on T2 weigthed cardiac imaging
+
+        Author: Hui Xue
+        Magnetic Resonance Technology Program
+        National Heart, Lung and Blood Institute
+        National Institutes of Health
+        10 Center Drive, Bethesda
+        MD 20814
+        USA
+        Email: hui.xue at nih.gov
+    -->
+
+    <!-- reader -->
+    <reader>
+        <slot>1008</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+    </reader>
+
+    <!-- writer -->
+    <writer>
+        <slot>1004</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1005</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1006</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterUSHORT</classname>
+    </writer>
+
+    <writer>
+        <slot>1015</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1016</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1017</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterUSHORT</classname>
+    </writer>
+
+    <!-- RO asymmetric echo handling -->
+    <gadget>
+        <name>AsymmetricEcho</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>AsymmetricEchoAdjustROGadget</classname>
+    </gadget>
+
+    <!-- Noise prewhitening -->
+    <gadget>
+        <name>NoiseAdjust</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>NoiseAdjustGadget</classname>
+    </gadget>
+
+    <!-- RO oversampling removal -->
+    <gadget>
+        <name>RemoveROOversampling</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>RemoveROOversamplingGadget</classname>
+
+        <property>
+            <name>constant_noise_variance</name>
+            <value>true</value>
+        </property>
+    </gadget>
+
+    <!-- Data accumulation and trigger gadget -->
+    <gadget>
+        <name>Acc</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
+
+        <!-- debug and info mode -->
+        <property>
+            <name>verboseMode</name>
+            <value>true</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>noacceleration_triggerDim1</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+        <property>
+            <name>noacceleration_triggerDim2</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <property>
+            <name>noacceleration_numOfKSpace_triggerDim1</name>
+            <value>5</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_triggerDim1</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+        <property>
+            <name>interleaved_triggerDim2</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <property>
+            <name>interleaved_numOfKSpace_triggerDim1</name>
+            <value>4</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_triggerDim1</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+        <property>
+            <name>embedded_triggerDim2</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <property>
+            <name>embedded_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_triggerDim1</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+        <property>
+            <name>separate_triggerDim2</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <property>
+            <name>separate_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
+        <property>
+            <name>other_kspace_matching_Dim</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+    </gadget>
+
+    <!-- Recon computation for 2DT cases -->
+    <gadget>
+        <name>Recon</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusRecon2DTGadget</classname>
+
+        <!-- kspace data -->
+        <property>
+            <name>dim_4th</name>
+            <value>DIM_Repetition</value>
+        </property>
+        <property>
+            <name>dim_5th</name>
+            <value>DIM_Set</value>
+        </property>
+
+        <!-- work flow -->
+        <property>
+            <name>workOrder_ShareDim</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>no_acceleration_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>no_acceleration_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>no_acceleration_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>interleaved_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>interleaved_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap_useHighestSignal</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>embedded_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>embedded_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_ref_fillback</name>
+            <value>true</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_ref_numOfModes</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>separate_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_same_combinationcoeff_allS</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>separate_whichS_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- coil compression -->
+        <property>
+            <name>same_coil_compression_coeff_allS</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>downstream_coil_compression</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>coil_compression_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>coil_compression_num_modesKept</name>
+            <value>8</value>
+        </property>
+
+        <!-- parameters for coil map estimation 
+            enum ISMRMRDCOILMAPALGO
+            {
+                ISMRMRD_SOUHEIL,
+                ISMRMRD_SOUHEIL_ITER
+            };
+        -->
+        <property>
+            <name>coil_map_algorithm</name>
+            <value>ISMRMRD_SOUHEIL</value>
+        </property>
+        <property>
+            <name>csm_kSize</name>
+            <value>7</value>
+        </property>
+
+        <property>
+            <name>csm_powermethod_num</name>
+            <value>3</value>
+        </property>
+
+        <property>
+            <name>csm_true_3D</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>csm_iter_num</name>
+            <value>5</value>
+        </property>
+
+        <property>
+            <name>csm_iter_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>csm_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- algorithm -->
+        <property>
+            <name>recon_algorithm</name>
+            <value>ISMRMRD_GRAPPA</value>
+        </property>
+
+        <property>
+            <name>recon_kspace_needed</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>gfactor_needed</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>wrap_around_map_needed</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_GRAPPA -->
+        <property>
+            <name>grappa_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E1</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E2</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_reg_lamda</name>
+            <value>0.0005</value>
+        </property>
+        <property>
+            <name>grappa_calib_over_determine_ratio</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>grappa_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_SPIRIT -->
+        <property>
+            <name>spirit_kSize_RO</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E1</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>spirit_reg_lamda</name>
+            <value>0.005</value>
+        </property>
+        <property>
+            <name>spirit_use_gpu</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_iter_max</name>
+            <value>70</value>
+        </property>
+        <property>
+            <name>spirit_iter_thres</name>
+            <value>1e-5</value>
+        </property>
+
+        <!-- parameters for scaling and image sending -->
+        <property>
+            <name>min_intensity_value</name>
+            <value>64</value>
+        </property>
+
+        <property>
+            <name>max_intensity_value</name>
+            <value>4095</value>
+        </property>
+
+        <property>
+            <name>scalingFactor</name>
+            <value>-1.0</value>
+        </property>
+
+        <property>
+            <name>use_constant_scalingFactor</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for kspace filter, image data -->
+        <property>
+            <name>filterRO</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterRO_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE1</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE1_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE2</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE2_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, ref data -->
+        <property>
+            <name>filterRefRO</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE1</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE2</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
+        <property>
+            <name>filterPartialFourierRO</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE1</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE2</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_HOMODYNE, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
+        <property>
+            <name>partialFourier_algo</name>
+            <value>ISMRMRD_PF_FENGHUANG</value>
+        </property>
+
+        <!-- parameters for partial fourier homodyne algorithm -->
+        <property>
+            <name>partialFourier_homodyne_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_homodyne_densityComp</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for partial fourier POCS algorithm -->
+        <property>
+            <name>partialFourier_POCS_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for partial fourier FengHuang algorithm -->
+        <property>
+            <name>partialFourier_FengHuang_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E1</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_thresReg</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_sameKernel_allN</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_transitBand</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for debug and timing -->
+        <property>
+            <name>debugFolder</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>debugFolder2</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for system acquisition -->
+        <property>
+            <name>timeStampResolution</name>
+            <value>0.0025</value>
+        </property>
+
+    </gadget>
+
+    <!-- after recon processing -->
+    <gadget>
+        <name>ComplexToFloatAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ComplexToFloatAttribGadget</classname>
+    </gadget>
+
+    <gadget>
+        <name>FloatToShortAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>FloatToUShortAttribGadget</classname>
+    </gadget>
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribCPLX</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetCPLX</classname>
+    </gadget>
+    -->
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribFLOAT</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetFLOAT</classname>
+    </gadget>
+    -->
+
+    <gadget>
+        <name>ImageFinishAttribUSHORT</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ImageFinishAttribGadgetUSHORT</classname>
+    </gadget>
+
+</gadgetronStreamConfiguration>
diff --git a/gadgets/gtPlus/config/GT_3DT_Cartesian.xml b/gadgets/gtPlus/config/GT_3DT_Cartesian.xml
new file mode 100644
index 0000000..4ef9fe7
--- /dev/null
+++ b/gadgets/gtPlus/config/GT_3DT_Cartesian.xml
@@ -0,0 +1,849 @@
+<?xml version="1.0" encoding="utf-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+    <!--
+       _____              _____     _____   ______   _______   _____     ____    _   _ 
+      / ____|     /\     |  __ \   / ____| |  ____| |__   __| |  __ \   / __ \  | \ | |
+     | |  __     /  \    | |  | | | |  __  | |__       | |    | |__) | | |  | | |  \| |
+     | | |_ |   / /\ \   | |  | | | | |_ | |  __|      | |    |  _  /  | |  | | | . ` |
+     | |__| |  / ____ \  | |__| | | |__| | | |____     | |    | | \ \  | |__| | | |\  |
+      \_____| /_/    \_\ |_____/   \_____| |______|    |_|    |_|  \_\  \____/  |_| \_|
+                                                                                       
+    -->
+
+    <!-- 
+        GT Plus configuratin file for general 3D or 3D+T cartesian reconstruction
+
+        Author: Hui Xue
+        Magnetic Resonance Technology Program
+        National Heart, Lung and Blood Institute
+        National Institutes of Health
+        10 Center Drive, Bethesda
+        MD 20814
+        USA
+        Email: hui.xue at nih.gov
+    -->
+
+    <!-- reader -->
+    <reader>
+        <slot>1008</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+    </reader>
+
+    <!-- writer -->
+    <writer>
+        <slot>1004</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1005</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1006</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterUSHORT</classname>
+    </writer>
+
+    <writer>
+        <slot>1015</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1016</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1017</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterUSHORT</classname>
+    </writer>
+
+    <!-- RO asymmetric echo handling -->
+    <gadget>
+        <name>AsymmetricEcho</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>AsymmetricEchoAdjustROGadget</classname>
+    </gadget>
+
+    <!-- RO oversampling removal -->
+    <gadget>
+        <name>RemoveROOversampling</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>RemoveROOversamplingGadget</classname>
+
+        <property>
+            <name>constant_noise_variance</name>
+            <value>true</value>
+        </property>
+    </gadget>
+
+    <!-- Noise prewhitening -->
+    <gadget>
+        <name>NoiseAdjust</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>NoiseAdjustGadget</classname>
+    </gadget>
+
+    <!-- Data accumulation and trigger gadget -->
+    <gadget>
+        <name>Acc</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
+
+        <!-- debug and info mode -->
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>noacceleration_triggerDim1</name>
+            <value>DIM_Encoding2</value>
+        </property>
+
+        <property>
+            <name>noacceleration_triggerDim2</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <property>
+            <name>noacceleration_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>interleaved_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>interleaved_numOfKSpace_triggerDim1</name>
+            <value>4</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
+        <property>
+            <name>other_kspace_matching_Dim</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+    </gadget>
+
+    <!-- Recon computation for 3DT cases -->
+    <gadget>
+        <name>Recon</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusRecon3DTGadget</classname>
+
+        <!-- kspace data -->
+        <property>
+            <name>dim_5th</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- work flow -->
+        <property>
+            <name>workOrder_ShareDim</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>no_acceleration_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_same_combinationcoeff_allN</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>no_acceleration_whichN_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_same_combinationcoeff_allN</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>interleaved_whichN_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_same_combinationcoeff_allN</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_whichN_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_ref_fillback</name>
+            <value>true</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_same_combinationcoeff_allN</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_whichN_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- coil compression -->
+        <property>
+            <name>same_coil_compression_coeff_allN</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>upstream_coil_compression</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>upstream_coil_compression_thres</name>
+            <value>0.01</value>
+        </property>
+
+        <property>
+            <name>upstream_coil_compression_num_modesKept</name>
+            <value>-1</value>
+        </property>
+
+        <property>
+            <name>downstream_coil_compression</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>coil_compression_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>coil_compression_num_modesKept</name>
+            <value>8</value>
+        </property>
+
+        <!-- parameters for coil map estimation 
+            enum ISMRMRDCOILMAPALGO
+            {
+                ISMRMRD_SOUHEIL,
+                ISMRMRD_SOUHEIL_ITER
+            };
+        -->
+        <property>
+            <name>coil_map_algorithm</name>
+            <value>ISMRMRD_SOUHEIL_ITER</value>
+        </property>
+        <property>
+            <name>csm_kSize</name>
+            <value>7</value>
+        </property>
+
+        <property>
+            <name>csm_powermethod_num</name>
+            <value>3</value>
+        </property>
+
+        <property>
+            <name>csm_true_3D</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>csm_iter_num</name>
+            <value>3</value>
+        </property>
+
+        <property>
+            <name>csm_iter_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>csm_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- algorithm -->
+        <property>
+            <name>recon_algorithm</name>
+            <value>ISMRMRD_GRAPPA</value>
+        </property>
+
+        <property>
+            <name>recon_kspace_needed</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>recon_auto_parameters</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>gfactor_needed</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_GRAPPA -->
+        <property>
+            <name>grappa_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E1</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E2</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_reg_lamda</name>
+            <value>0.0005</value>
+        </property>
+        <property>
+            <name>grappa_calib_over_determine_ratio</name>
+            <value>45</value>
+        </property>
+        <property>
+            <name>grappa_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_SPIRIT -->
+        <property>
+            <name>spirit_kSize_RO</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E1</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>spirit_reg_lamda</name>
+            <value>0.005</value>
+        </property>
+        <property>
+            <name>spirit_use_gpu</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_calib_over_determine_ratio</name>
+            <value>15</value>
+        </property>
+        <property>
+            <name>spirit_solve_symmetric</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_iter_max</name>
+            <value>70</value>
+        </property>
+        <property>
+            <name>spirit_iter_thres</name>
+            <value>0.0015</value>
+        </property>
+        <property>
+            <name>spirit_print_iter</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_L1SPIRIT -->
+        <property>
+            <name>spirit_perform_linear</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_perform_nonlinear</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_parallel_imaging_lamda</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_image_reg_lamda</name>
+            <value>0.001</value>
+        </property>
+        <property>
+            <name>spirit_data_fidelity_lamda</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>spirit_ncg_iter_max</name>
+            <value>10</value>
+        </property>
+        <property>
+            <name>spirit_ncg_iter_thres</name>
+            <value>0.001</value>
+        </property>
+        <property>
+            <name>spirit_ncg_print_iter</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_use_coil_sen_map</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_use_moco_enhancement</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_recon_moco_images</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_temporal_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_2D_scale_per_chunk</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_RO_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_E1_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_E2_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_3D_scale_per_chunk</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for scaling and image sending -->
+        <property>
+            <name>min_intensity_value</name>
+            <value>64</value>
+        </property>
+
+        <property>
+            <name>max_intensity_value</name>
+            <value>4095</value>
+        </property>
+
+        <property>
+            <name>scalingFactor</name>
+            <value>-1.0</value>
+        </property>
+
+        <property>
+            <name>scalingFactor_gfactor</name>
+            <value>1000</value>
+        </property>
+
+        <property>
+            <name>scalingFactor_snr_image</name>
+            <value>-1</value>
+        </property>
+
+        <property>
+            <name>scalingFactor_std_map</name>
+            <value>-1</value>
+        </property>
+
+        <property>
+            <name>start_frame_for_std_map</name>
+            <value>5</value>
+        </property>
+
+        <property>
+            <name>use_constant_scalingFactor</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for kspace filter, image data -->
+        <property>
+            <name>filterRO</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterRO_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE1</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE1_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE2</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE2_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, ref data -->
+        <property>
+            <name>filterRefRO</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE1</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE2</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
+        <property>
+            <name>filterPartialFourierRO</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE1</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE2</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
+        <property>
+            <name>partialFourier_algo</name>
+            <value>ISMRMRD_PF_POCS</value>
+        </property>
+
+        <!-- parameters for partial fourier POCS algorithm -->
+        <property>
+            <name>partialFourier_POCS_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_transitBand</name>
+            <value>24</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_transitBand_E2</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for partial fourier FengHuang algorithm -->
+        <property>
+            <name>partialFourier_FengHuang_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E1</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_thresReg</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_sameKernel_allN</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_transitBand</name>
+            <value>24</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_transitBand_E2</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for debug and timing -->
+        <property>
+            <name>debugFolder</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>debugFolder2</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>cloudNodeFile</name>
+            <value>myCloud_3DT.txt</value>
+        </property>
+
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>verboseMode</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for system acquisition -->
+        <property>
+            <name>timeStampResolution</name>
+            <value>0.0025</value>
+        </property>
+
+        <!-- parameters for recon job split -->
+        <property>
+            <name>job_split_by_S</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>job_num_of_N</name>
+            <value>64</value>
+        </property>
+        <property>
+            <name>job_max_Megabytes</name>
+            <value>13000</value>
+        </property>
+        <property>
+            <name>job_overlap</name>
+            <value>2</value>
+        </property>
+        <property>
+            <name>job_perform_on_control_node</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for the cloud computation 
+             The cloud should be defined as the following: CloudNodeX_IP/Port/XMLConfiguration etc.
+        -->
+        <property>
+            <name>CloudComputing</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>CloudSize</name>
+            <value>1</value>
+        </property>
+
+        <!-- node 0 -->
+        <property>
+            <name>CloudNode0_IP</name>
+            <value>localhost</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_Port</name>
+            <value>9003</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_XMLConfiguration</name>
+            <value>GtProg_3DT_Cartesian_CloudNode.xml</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_ComputingPowerIndex</name>
+            <value>1</value>
+        </property>
+
+    </gadget>
+
+    <!-- after recon processing -->
+    <gadget>
+        <name>ComplexToFloatAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ComplexToFloatAttribGadget</classname>
+    </gadget>
+
+    <gadget>
+        <name>FloatToShortAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>FloatToUShortAttribGadget</classname>
+    </gadget>
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribCPLX</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetCPLX</classname>
+    </gadget>
+    -->
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribFLOAT</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetFLOAT</classname>
+    </gadget>
+    -->
+
+    <gadget>
+        <name>ImageFinishAttribUSHORT</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ImageFinishAttribGadgetUSHORT</classname>
+    </gadget>
+
+</gadgetronStreamConfiguration>
diff --git a/gadgets/gtPlus/config/GT_3DT_Cartesian_CloudNode.xml b/gadgets/gtPlus/config/GT_3DT_Cartesian_CloudNode.xml
new file mode 100644
index 0000000..a59fd8b
--- /dev/null
+++ b/gadgets/gtPlus/config/GT_3DT_Cartesian_CloudNode.xml
@@ -0,0 +1,82 @@
+<?xml version="1.0" encoding="utf-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+    <!--
+       _____              _____     _____   ______   _______   _____     ____    _   _ 
+      / ____|     /\     |  __ \   / ____| |  ____| |__   __| |  __ \   / __ \  | \ | |
+     | |  __     /  \    | |  | | | |  __  | |__       | |    | |__) | | |  | | |  \| |
+     | | |_ |   / /\ \   | |  | | | | |_ | |  __|      | |    |  _  /  | |  | | | . ` |
+     | |__| |  / ____ \  | |__| | | |__| | | |____     | |    | | \ \  | |__| | | |\  |
+      \_____| /_/    \_\ |_____/   \_____| |______|    |_|    |_|  \_\  \____/  |_| \_|
+                                                                                       
+    -->
+
+    <!-- 
+        GT Plus configuratin file for 3D cartesian reconstruction on GtPlus Cloud
+        This configuration file configures one gadget to perform the reconstruction for
+        3DT job packages
+
+        Depending on the incoming algorithm parameters, both linear and non-linear reconstruction
+        can be performed
+
+        Author: Hui Xue
+        Magnetic Resonance Technology Program
+        National Heart, Lung and Blood Institute
+        National Institutes of Health
+        10 Center Drive, Bethesda
+        MD 20814
+        USA
+        Email: hui.xue at nih.gov
+    -->
+
+    <!-- reader -->
+    <reader>
+        <slot>1013</slot>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusCloudJobMessageReaderCPFL</classname>
+    </reader>
+
+    <!-- writer -->
+    <writer>
+        <slot>1013</slot>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusCloudJobMessageWriterCPFL</classname>
+    </writer>
+
+    <!--
+    Recon computation for 2DT/3DT cases, process one job
+    a gtPlusReconJob2DT job consists of kspace, kernel and parameters
+    kspace: [RO E1 CHA E2/PHS]
+    -->
+
+    <gadget>
+        <name>Recon</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusReconJob3DTGadget</classname>
+
+        <!-- parameters for debug and timing -->
+        <property>
+            <name>debugFolder</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>debugFolder2</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+    </gadget>
+
+</gadgetronStreamConfiguration>
diff --git a/gadgets/gtPlus/config/GT_3DT_Cartesian_GFactor.xml b/gadgets/gtPlus/config/GT_3DT_Cartesian_GFactor.xml
new file mode 100644
index 0000000..ef8a558
--- /dev/null
+++ b/gadgets/gtPlus/config/GT_3DT_Cartesian_GFactor.xml
@@ -0,0 +1,735 @@
+<?xml version="1.0" encoding="utf-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+    <!--
+       _____              _____     _____   ______   _______   _____     ____    _   _ 
+      / ____|     /\     |  __ \   / ____| |  ____| |__   __| |  __ \   / __ \  | \ | |
+     | |  __     /  \    | |  | | | |  __  | |__       | |    | |__) | | |  | | |  \| |
+     | | |_ |   / /\ \   | |  | | | | |_ | |  __|      | |    |  _  /  | |  | | | . ` |
+     | |__| |  / ____ \  | |__| | | |__| | | |____     | |    | | \ \  | |__| | | |\  |
+      \_____| /_/    \_\ |_____/   \_____| |______|    |_|    |_|  \_\  \____/  |_| \_|
+                                                                                       
+    -->
+
+    <!-- 
+        GT Plus configuratin file for general 3D or 3D+T cartesian reconstruction
+
+        Author: Hui Xue
+        Magnetic Resonance Technology Program
+        National Heart, Lung and Blood Institute
+        National Institutes of Health
+        10 Center Drive, Bethesda
+        MD 20814
+        USA
+        Email: hui.xue at nih.gov
+    -->
+
+    <!-- reader -->
+    <reader>
+        <slot>1008</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+    </reader>
+
+    <!-- writer -->
+    <writer>
+        <slot>1004</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1005</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1006</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterUSHORT</classname>
+    </writer>
+
+    <writer>
+        <slot>1015</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1016</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1017</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterUSHORT</classname>
+    </writer>
+
+    <!-- Noise prewhitening -->
+    <gadget>
+        <name>NoiseAdjust</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>NoiseAdjustGadget</classname>
+
+        <!-- File prefix for stored noise prewhitener matrix -->
+        <property>
+            <name>noise_dependency_prefix</name>
+            <value>GadgetronNoisePreWhitener</value>
+        </property>
+
+        <!-- Preset noise dwell time; for noise dependency measurements -->
+        <property>
+            <name>noise_dwell_time_us_preset</name>
+            <value>5.0</value>
+        </property>
+
+        <!-- Whether to perform timing -->
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+    </gadget>
+
+    <!-- RO asymmetric echo handling -->
+    <gadget>
+        <name>AsymmetricEcho</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>AsymmetricEchoAdjustROGadget</classname>
+    </gadget>
+
+    <!-- RO oversampling removal -->
+    <gadget>
+        <name>RemoveROOversampling</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>RemoveROOversamplingGadget</classname>
+
+        <property>
+            <name>constant_noise_variance</name>
+            <value>true</value>
+        </property>
+    </gadget>
+
+    <!-- Data accumulation and trigger gadget -->
+    <gadget>
+        <name>Acc</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
+
+        <!-- debug and info mode -->
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>noacceleration_triggerDim1</name>
+            <value>DIM_Encoding2</value>
+        </property>
+
+        <property>
+            <name>noacceleration_triggerDim2</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <property>
+            <name>noacceleration_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>interleaved_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>interleaved_numOfKSpace_triggerDim1</name>
+            <value>4</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
+        <property>
+            <name>other_kspace_matching_Dim</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+    </gadget>
+
+    <!-- Recon computation for 3DT cases -->
+    <gadget>
+        <name>Recon</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusRecon3DTGadget</classname>
+
+        <!-- kspace data -->
+        <property>
+            <name>dim_5th</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- work flow -->
+        <property>
+            <name>workOrder_ShareDim</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>no_acceleration_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_same_combinationcoeff_allN</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>no_acceleration_whichN_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_same_combinationcoeff_allN</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>interleaved_whichN_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_same_combinationcoeff_allN</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_whichN_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_ref_fillback</name>
+            <value>true</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_same_combinationcoeff_allN</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_whichN_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- coil compression -->
+        <property>
+            <name>same_coil_compression_coeff_allN</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>upstream_coil_compression</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>upstream_coil_compression_thres</name>
+            <value>0.01</value>
+        </property>
+
+        <property>
+            <name>upstream_coil_compression_num_modesKept</name>
+            <value>-1</value>
+        </property>
+
+        <property>
+            <name>downstream_coil_compression</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>coil_compression_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>coil_compression_num_modesKept</name>
+            <value>8</value>
+        </property>
+
+        <!-- parameters for coil map estimation 
+            enum ISMRMRDCOILMAPALGO
+            {
+                ISMRMRD_SOUHEIL,
+                ISMRMRD_SOUHEIL_ITER
+            };
+        -->
+        <property>
+            <name>coil_map_algorithm</name>
+            <value>ISMRMRD_SOUHEIL_ITER</value>
+        </property>
+        <property>
+            <name>csm_kSize</name>
+            <value>7</value>
+        </property>
+
+        <property>
+            <name>csm_powermethod_num</name>
+            <value>3</value>
+        </property>
+
+        <property>
+            <name>csm_true_3D</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>csm_iter_num</name>
+            <value>3</value>
+        </property>
+
+        <property>
+            <name>csm_iter_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>csm_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- algorithm -->
+        <property>
+            <name>recon_algorithm</name>
+            <value>ISMRMRD_GRAPPA</value>
+        </property>
+
+        <property>
+            <name>recon_kspace_needed</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>recon_auto_parameters</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>gfactor_needed</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_GRAPPA -->
+        <property>
+            <name>grappa_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E1</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E2</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_reg_lamda</name>
+            <value>0.0005</value>
+        </property>
+        <property>
+            <name>grappa_calib_over_determine_ratio</name>
+            <value>45</value>
+        </property>
+        <property>
+            <name>grappa_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for scaling and image sending -->
+        <property>
+            <name>min_intensity_value</name>
+            <value>64</value>
+        </property>
+
+        <property>
+            <name>max_intensity_value</name>
+            <value>4095</value>
+        </property>
+
+        <property>
+            <name>scalingFactor</name>
+            <value>10</value>
+        </property>
+
+        <property>
+            <name>use_constant_scalingFactor</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for kspace filter, image data -->
+        <property>
+            <name>filterRO</name>
+            <value>ISMRMRD_FILTER_NONE</value>
+        </property>
+        <property>
+            <name>filterRO_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE1</name>
+            <value>ISMRMRD_FILTER_NONE</value>
+        </property>
+        <property>
+            <name>filterE1_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE2</name>
+            <value>ISMRMRD_FILTER_NONE</value>
+        </property>
+        <property>
+            <name>filterE2_sigma</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>filterE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, ref data -->
+        <property>
+            <name>filterRefRO</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE1</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE2</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
+        <property>
+            <name>filterPartialFourierRO</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE1</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE2</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
+        <property>
+            <name>partialFourier_algo</name>
+            <value>ISMRMRD_PF_POCS</value>
+        </property>
+
+        <!-- parameters for partial fourier POCS algorithm -->
+        <property>
+            <name>partialFourier_POCS_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_transitBand</name>
+            <value>24</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_transitBand_E2</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for partial fourier FengHuang algorithm -->
+        <property>
+            <name>partialFourier_FengHuang_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E1</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_thresReg</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_sameKernel_allN</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_transitBand</name>
+            <value>24</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_transitBand_E2</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for debug and timing -->
+        <property>
+            <name>debugFolder</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>debugFolder2</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>cloudNodeFile</name>
+            <value>myCloud_3DT.txt</value>
+        </property>
+
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>verboseMode</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for system acquisition -->
+        <property>
+            <name>timeStampResolution</name>
+            <value>0.0025</value>
+        </property>
+
+        <!-- parameters for recon job split -->
+        <property>
+            <name>job_split_by_S</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>job_num_of_N</name>
+            <value>32</value>
+        </property>
+        <property>
+            <name>job_max_Megabytes</name>
+            <value>13000</value>
+        </property>
+        <property>
+            <name>job_overlap</name>
+            <value>2</value>
+        </property>
+        <property>
+            <name>job_perform_on_control_node</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for the cloud computation 
+             The cloud should be defined as the following: CloudNodeX_IP/Port/XMLConfiguration etc.
+        -->
+        <property>
+            <name>CloudComputing</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>CloudSize</name>
+            <value>1</value>
+        </property>
+
+        <!-- node 0 -->
+        <property>
+            <name>CloudNode0_IP</name>
+            <value>localhost</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_Port</name>
+            <value>9003</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_XMLConfiguration</name>
+            <value>GtProg_3DT_Cartesian_CloudNode.xml</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_ComputingPowerIndex</name>
+            <value>1</value>
+        </property>
+
+    </gadget>
+
+    <!-- after recon processing -->
+    <gadget>
+        <name>ComplexToFloatAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ComplexToFloatAttribGadget</classname>
+    </gadget>
+
+    <gadget>
+        <name>FloatToShortAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>FloatToUShortAttribGadget</classname>
+    </gadget>
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribCPLX</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetCPLX</classname>
+    </gadget>
+    -->
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribFLOAT</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetFLOAT</classname>
+    </gadget>
+    -->
+
+    <gadget>
+        <name>ImageFinishAttribUSHORT</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ImageFinishAttribGadgetUSHORT</classname>
+    </gadget>
+
+</gadgetronStreamConfiguration>
diff --git a/gadgets/gtPlus/config/GT_3DT_Cartesian_L1SPIRIT.xml b/gadgets/gtPlus/config/GT_3DT_Cartesian_L1SPIRIT.xml
new file mode 100644
index 0000000..594b29e
--- /dev/null
+++ b/gadgets/gtPlus/config/GT_3DT_Cartesian_L1SPIRIT.xml
@@ -0,0 +1,853 @@
+<?xml version="1.0" encoding="utf-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+    <!--
+       _____              _____     _____   ______   _______   _____     ____    _   _ 
+      / ____|     /\     |  __ \   / ____| |  ____| |__   __| |  __ \   / __ \  | \ | |
+     | |  __     /  \    | |  | | | |  __  | |__       | |    | |__) | | |  | | |  \| |
+     | | |_ |   / /\ \   | |  | | | | |_ | |  __|      | |    |  _  /  | |  | | | . ` |
+     | |__| |  / ____ \  | |__| | | |__| | | |____     | |    | | \ \  | |__| | | |\  |
+      \_____| /_/    \_\ |_____/   \_____| |______|    |_|    |_|  \_\  \____/  |_| \_|
+                                                                                       
+    -->
+
+    <!-- 
+        GT Plus configuratin file for general 3D or 3D+T cartesian reconstruction using L1 SPIRIT
+        The GtPlus cloud computing can be turned on in this configuration file
+        The single-layer cloud topology is used here.
+
+        Author: Hui Xue
+        Magnetic Resonance Technology Program
+        National Heart, Lung and Blood Institute
+        National Institutes of Health
+        10 Center Drive, Bethesda
+        MD 20814
+        USA
+        Email: hui.xue at nih.gov
+
+        Ref to: 
+
+        Hui Xue, Souheil Inati, Thomas Sangild Sorensen, Peter Kellman, Michael S. Hansen. 
+        Distributed MRI Reconstruction using Gadgetron based Cloud Computing. 
+        Magenetic Resonance in Medicine, doi: 10.1002/mrm.25213.
+    -->
+
+    <!-- reader -->
+    <reader>
+        <slot>1008</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+    </reader>
+
+    <!-- writer -->
+    <writer>
+        <slot>1004</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1005</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1006</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterUSHORT</classname>
+    </writer>
+
+    <writer>
+        <slot>1015</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1016</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1017</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterUSHORT</classname>
+    </writer>
+
+    <!-- RO asymmetric echo handling -->
+    <gadget>
+        <name>AsymmetricEcho</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>AsymmetricEchoAdjustROGadget</classname>
+    </gadget>
+
+    <!-- RO oversampling removal -->
+    <gadget>
+        <name>RemoveROOversampling</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>RemoveROOversamplingGadget</classname>
+
+        <property>
+            <name>constant_noise_variance</name>
+            <value>true</value>
+        </property>
+    </gadget>
+
+    <!-- Noise prewhitening -->
+    <gadget>
+        <name>NoiseAdjust</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>NoiseAdjustGadget</classname>
+    </gadget>
+
+    <!-- Data accumulation and trigger gadget -->
+    <gadget>
+        <name>Acc</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
+
+        <!-- debug and info mode -->
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>noacceleration_triggerDim1</name>
+            <value>DIM_Encoding2</value>
+        </property>
+
+        <property>
+            <name>noacceleration_triggerDim2</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <property>
+            <name>noacceleration_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>interleaved_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>interleaved_numOfKSpace_triggerDim1</name>
+            <value>8</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
+        <property>
+            <name>other_kspace_matching_Dim</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+    </gadget>
+
+    <!-- Recon computation for 3DT cases -->
+    <gadget>
+        <name>Recon</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusRecon3DTGadget</classname>
+
+        <!-- kspace data -->
+        <property>
+            <name>dim_5th</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- work flow -->
+        <property>
+            <name>workOrder_ShareDim</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>no_acceleration_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_same_combinationcoeff_allN</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>no_acceleration_whichN_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_same_combinationcoeff_allN</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>interleaved_whichN_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_same_combinationcoeff_allN</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_whichN_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_ref_fillback</name>
+            <value>true</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_same_combinationcoeff_allN</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_whichN_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- coil compression -->
+        <property>
+            <name>same_coil_compression_coeff_allN</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>upstream_coil_compression</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>upstream_coil_compression_thres</name>
+            <value>0.01</value>
+        </property>
+
+        <property>
+            <name>upstream_coil_compression_num_modesKept</name>
+            <value>-1</value>
+        </property>
+
+        <property>
+            <name>downstream_coil_compression</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>coil_compression_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>coil_compression_num_modesKept</name>
+            <value>-1</value>
+        </property>
+
+        <!-- parameters for coil map estimation 
+            enum ISMRMRDCOILMAPALGO
+            {
+                ISMRMRD_SOUHEIL,
+                ISMRMRD_SOUHEIL_ITER
+            };
+        -->
+        <property>
+            <name>coil_map_algorithm</name>
+            <value>ISMRMRD_SOUHEIL</value>
+        </property>
+        <property>
+            <name>csm_kSize</name>
+            <value>7</value>
+        </property>
+
+        <property>
+            <name>csm_powermethod_num</name>
+            <value>3</value>
+        </property>
+
+        <property>
+            <name>csm_true_3D</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>csm_iter_num</name>
+            <value>3</value>
+        </property>
+
+        <property>
+            <name>csm_iter_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>csm_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- algorithm -->
+        <property>
+            <name>recon_algorithm</name>
+            <value>ISMRMRD_L1SPIRIT</value>
+        </property>
+
+        <property>
+            <name>recon_kspace_needed</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>recon_auto_parameters</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_GRAPPA -->
+        <property>
+            <name>grappa_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E1</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E2</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_reg_lamda</name>
+            <value>0.0005</value>
+        </property>
+        <property>
+            <name>grappa_calib_over_determine_ratio</name>
+            <value>45</value>
+        </property>
+        <property>
+            <name>grappa_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_SPIRIT -->
+        <property>
+            <name>spirit_kSize_RO</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E1</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>spirit_reg_lamda</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>spirit_use_gpu</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_calib_over_determine_ratio</name>
+            <value>15</value>
+        </property>
+        <property>
+            <name>spirit_solve_symmetric</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_iter_max</name>
+            <value>100</value>
+        </property>
+        <property>
+            <name>spirit_iter_thres</name>
+            <value>0.0015</value>
+        </property>
+        <property>
+            <name>spirit_print_iter</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_L1SPIRIT -->
+        <property>
+            <name>spirit_perform_linear</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_perform_nonlinear</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_parallel_imaging_lamda</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_image_reg_lamda</name>
+            <value>0.002</value>
+        </property>
+        <property>
+            <name>spirit_data_fidelity_lamda</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>spirit_ncg_iter_max</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>spirit_ncg_iter_thres</name>
+            <value>0.001</value>
+        </property>
+        <property>
+            <name>spirit_ncg_print_iter</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_use_coil_sen_map</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_use_moco_enhancement</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_recon_moco_images</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_temporal_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_2D_scale_per_chunk</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_RO_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_E1_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_E2_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_3D_scale_per_chunk</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for scaling and image sending -->
+        <property>
+            <name>min_intensity_value</name>
+            <value>64</value>
+        </property>
+
+        <property>
+            <name>max_intensity_value</name>
+            <value>4095</value>
+        </property>
+
+        <property>
+            <name>scalingFactor</name>
+            <value>-1.0</value>
+        </property>
+
+        <property>
+            <name>use_constant_scalingFactor</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for kspace filter, image data -->
+        <property>
+            <name>filterRO</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterRO_sigma</name>
+            <value>0.5</value>
+        </property>
+        <property>
+            <name>filterRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE1</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE1_sigma</name>
+            <value>0.5</value>
+        </property>
+        <property>
+            <name>filterE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE2</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE2_sigma</name>
+            <value>0.5</value>
+        </property>
+        <property>
+            <name>filterE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, ref data -->
+        <property>
+            <name>filterRefRO</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE1</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE2</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
+        <property>
+            <name>filterPartialFourierRO</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE1</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE2</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
+        <property>
+            <name>partialFourier_algo</name>
+            <value>ISMRMRD_PF_POCS</value>
+        </property>
+
+        <!-- parameters for partial fourier POCS algorithm -->
+        <property>
+            <name>partialFourier_POCS_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_transitBand</name>
+            <value>24</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_transitBand_E2</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for partial fourier FengHuang algorithm -->
+        <property>
+            <name>partialFourier_FengHuang_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E1</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_thresReg</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_sameKernel_allN</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_transitBand</name>
+            <value>24</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_transitBand_E2</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for debug and timing -->
+        <property>
+            <name>debugFolder</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>debugFolder2</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>cloudNodeFile</name>
+            <value>myCloud_3DT.txt</value>
+        </property>
+
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>verboseMode</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for system acquisition -->
+        <property>
+            <name>timeStampResolution</name>
+            <value>0.0025</value>
+        </property>
+
+        <!-- parameters for recon job split -->
+        <property>
+            <name>job_split_by_S</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>job_num_of_N</name>
+            <value>32</value>
+        </property>
+        <property>
+            <name>job_max_Megabytes</name>
+            <value>2499</value>
+        </property>
+        <property>
+            <name>job_overlap</name>
+            <value>1</value>
+        </property>
+        <property>
+            <name>job_perform_on_control_node</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for the cloud computation 
+             The cloud should be defined as the following: CloudNodeX_IP/Port/XMLConfiguration etc.
+        -->
+        <property>
+            <name>CloudComputing</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>CloudSize</name>
+            <value>2</value>
+        </property>
+
+        <!-- node 0 -->
+        <property>
+            <name>CloudNode0_IP</name>
+            <value>localhost</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_Port</name>
+            <value>9003</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_XMLConfiguration</name>
+            <value>GtProg_3DT_Cartesian_CloudNode.xml</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_ComputingPowerIndex</name>
+            <value>1</value>
+        </property>
+
+        <!-- node 1 -->
+        <property>
+            <name>CloudNode1_IP</name>
+            <value>localhost</value>
+        </property>
+
+        <property>
+            <name>CloudNode1_Port</name>
+            <value>9004</value>
+        </property>
+
+        <property>
+            <name>CloudNode1_XMLConfiguration</name>
+            <value>GtProg_3DT_Cartesian_CloudNode.xml</value>
+        </property>
+
+        <property>
+            <name>CloudNode1_ComputingPowerIndex</name>
+            <value>1</value>
+        </property>
+
+    </gadget>
+
+    <!-- after recon processing -->
+    <gadget>
+        <name>ComplexToFloatAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ComplexToFloatAttribGadget</classname>
+    </gadget>
+
+    <gadget>
+        <name>FloatToShortAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>FloatToUShortAttribGadget</classname>
+    </gadget>
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribCPLX</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetCPLX</classname>
+    </gadget>
+    -->
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribFLOAT</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetFLOAT</classname>
+    </gadget>
+    -->
+
+    <gadget>
+        <name>ImageFinishAttribUSHORT</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ImageFinishAttribGadgetUSHORT</classname>
+    </gadget>
+
+</gadgetronStreamConfiguration>
diff --git a/gadgets/gtPlus/config/GT_3DT_Cartesian_SPIRIT.xml b/gadgets/gtPlus/config/GT_3DT_Cartesian_SPIRIT.xml
new file mode 100644
index 0000000..535cbe8
--- /dev/null
+++ b/gadgets/gtPlus/config/GT_3DT_Cartesian_SPIRIT.xml
@@ -0,0 +1,844 @@
+<?xml version="1.0" encoding="utf-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+    <!--
+       _____              _____     _____   ______   _______   _____     ____    _   _ 
+      / ____|     /\     |  __ \   / ____| |  ____| |__   __| |  __ \   / __ \  | \ | |
+     | |  __     /  \    | |  | | | |  __  | |__       | |    | |__) | | |  | | |  \| |
+     | | |_ |   / /\ \   | |  | | | | |_ | |  __|      | |    |  _  /  | |  | | | . ` |
+     | |__| |  / ____ \  | |__| | | |__| | | |____     | |    | | \ \  | |__| | | |\  |
+      \_____| /_/    \_\ |_____/   \_____| |______|    |_|    |_|  \_\  \____/  |_| \_|
+                                                                                       
+    -->
+
+    <!-- 
+        GT Plus configuratin file for general 3D or 3D+T cartesian reconstruction using linear SPIRIT
+        The GtPlus cloud computing can be turned on in this configuration file
+        The single-layer cloud topology is used here.
+
+        Author: Hui Xue
+        Magnetic Resonance Technology Program
+        National Heart, Lung and Blood Institute
+        National Institutes of Health
+        10 Center Drive, Bethesda
+        MD 20814
+        USA
+        Email: hui.xue at nih.gov
+
+        Ref to: 
+
+        Hui Xue, Souheil Inati, Thomas Sangild Sorensen, Peter Kellman, Michael S. Hansen. 
+        Distributed MRI Reconstruction using Gadgetron based Cloud Computing. 
+        Magenetic Resonance in Medicine, doi: 10.1002/mrm.25213.
+    -->
+
+    <!-- reader -->
+    <reader>
+        <slot>1008</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+    </reader>
+
+    <!-- writer -->
+    <writer>
+        <slot>1004</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1005</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1006</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterUSHORT</classname>
+    </writer>
+
+    <writer>
+        <slot>1015</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1016</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1017</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterUSHORT</classname>
+    </writer>
+
+    <!-- RO asymmetric echo handling -->
+    <gadget>
+        <name>AsymmetricEcho</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>AsymmetricEchoAdjustROGadget</classname>
+    </gadget>
+
+    <!-- RO oversampling removal -->
+    <gadget>
+        <name>RemoveROOversampling</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>RemoveROOversamplingGadget</classname>
+
+        <property>
+            <name>constant_noise_variance</name>
+            <value>true</value>
+        </property>
+    </gadget>
+
+    <!-- Noise prewhitening -->
+    <gadget>
+        <name>NoiseAdjust</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>NoiseAdjustGadget</classname>
+    </gadget>
+
+    <!-- Data accumulation and trigger gadget -->
+    <gadget>
+        <name>Acc</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
+
+        <!-- debug and info mode -->
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>noacceleration_triggerDim1</name>
+            <value>DIM_Encoding2</value>
+        </property>
+
+        <property>
+            <name>noacceleration_triggerDim2</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <property>
+            <name>noacceleration_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>interleaved_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>interleaved_numOfKSpace_triggerDim1</name>
+            <value>8</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
+        <property>
+            <name>other_kspace_matching_Dim</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+    </gadget>
+
+    <!-- Recon computation for 3DT cases -->
+    <gadget>
+        <name>Recon</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusRecon3DTGadget</classname>
+
+        <!-- kspace data -->
+        <property>
+            <name>dim_5th</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- work flow -->
+        <property>
+            <name>workOrder_ShareDim</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>no_acceleration_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_same_combinationcoeff_allN</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>no_acceleration_whichN_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_same_combinationcoeff_allN</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>interleaved_whichN_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_same_combinationcoeff_allN</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_whichN_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_ref_fillback</name>
+            <value>true</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_same_combinationcoeff_allN</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_whichN_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- coil compression -->
+        <property>
+            <name>same_coil_compression_coeff_allN</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>upstream_coil_compression</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>upstream_coil_compression_thres</name>
+            <value>0.005</value>
+        </property>
+
+        <property>
+            <name>upstream_coil_compression_num_modesKept</name>
+            <value>-1</value>
+        </property>
+
+        <property>
+            <name>downstream_coil_compression</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>coil_compression_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>coil_compression_num_modesKept</name>
+            <value>-1</value>
+        </property>
+
+        <!-- parameters for coil map estimation 
+            enum ISMRMRDCOILMAPALGO
+            {
+                ISMRMRD_SOUHEIL,
+                ISMRMRD_SOUHEIL_ITER
+            };
+        -->
+        <property>
+            <name>coil_map_algorithm</name>
+            <value>ISMRMRD_SOUHEIL</value>
+        </property>
+        <property>
+            <name>csm_kSize</name>
+            <value>7</value>
+        </property>
+
+        <property>
+            <name>csm_powermethod_num</name>
+            <value>3</value>
+        </property>
+
+        <property>
+            <name>csm_true_3D</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>csm_iter_num</name>
+            <value>3</value>
+        </property>
+
+        <property>
+            <name>csm_iter_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>csm_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- algorithm -->
+        <property>
+            <name>recon_algorithm</name>
+            <value>ISMRMRD_SPIRIT</value>
+        </property>
+
+        <property>
+            <name>recon_kspace_needed</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>recon_auto_parameters</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_GRAPPA -->
+        <property>
+            <name>grappa_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E1</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E2</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_reg_lamda</name>
+            <value>0.0005</value>
+        </property>
+        <property>
+            <name>grappa_calib_over_determine_ratio</name>
+            <value>45</value>
+        </property>
+        <property>
+            <name>grappa_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_SPIRIT -->
+        <property>
+            <name>spirit_kSize_RO</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E1</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>spirit_oSize_RO</name>
+            <value>1</value>
+        </property>
+        <property>
+            <name>spirit_oSize_E1</name>
+            <value>1</value>
+        </property>
+        <property>
+            <name>spirit_oSize_E2</name>
+            <value>1</value>
+        </property>
+        <property>
+            <name>spirit_reg_lamda</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>spirit_use_gpu</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_calib_over_determine_ratio</name>
+            <value>15</value>
+        </property>
+        <property>
+            <name>spirit_solve_symmetric</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_iter_max</name>
+            <value>100</value>
+        </property>
+        <property>
+            <name>spirit_iter_thres</name>
+            <value>0.0015</value>
+        </property>
+        <property>
+            <name>spirit_print_iter</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_L1SPIRIT -->
+        <property>
+            <name>spirit_perform_linear</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_perform_nonlinear</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_parallel_imaging_lamda</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_image_reg_lamda</name>
+            <value>0.001</value>
+        </property>
+        <property>
+            <name>spirit_data_fidelity_lamda</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>spirit_ncg_iter_max</name>
+            <value>10</value>
+        </property>
+        <property>
+            <name>spirit_ncg_iter_thres</name>
+            <value>0.001</value>
+        </property>
+        <property>
+            <name>spirit_ncg_print_iter</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_use_coil_sen_map</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_use_moco_enhancement</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_recon_moco_images</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_temporal_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_2D_scale_per_chunk</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_RO_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_E1_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_E2_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_3D_scale_per_chunk</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for scaling and image sending -->
+        <property>
+            <name>min_intensity_value</name>
+            <value>64</value>
+        </property>
+
+        <property>
+            <name>max_intensity_value</name>
+            <value>4095</value>
+        </property>
+
+        <property>
+            <name>scalingFactor</name>
+            <value>-1.0</value>
+        </property>
+
+        <property>
+            <name>use_constant_scalingFactor</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for kspace filter, image data -->
+        <property>
+            <name>filterRO</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterRO_sigma</name>
+            <value>0.5</value>
+        </property>
+        <property>
+            <name>filterRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE1</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE1_sigma</name>
+            <value>0.5</value>
+        </property>
+        <property>
+            <name>filterE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE2</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE2_sigma</name>
+            <value>0.5</value>
+        </property>
+        <property>
+            <name>filterE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, ref data -->
+        <property>
+            <name>filterRefRO</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE1</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE2</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
+        <property>
+            <name>filterPartialFourierRO</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE1</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE2</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
+        <property>
+            <name>partialFourier_algo</name>
+            <value>ISMRMRD_PF_POCS</value>
+        </property>
+
+        <!-- parameters for partial fourier POCS algorithm -->
+        <property>
+            <name>partialFourier_POCS_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_transitBand</name>
+            <value>24</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_transitBand_E2</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for partial fourier FengHuang algorithm -->
+        <property>
+            <name>partialFourier_FengHuang_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E1</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_thresReg</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_sameKernel_allN</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_transitBand</name>
+            <value>24</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_transitBand_E2</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for debug and timing -->
+        <property>
+            <name>debugFolder</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>debugFolder2</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>cloudNodeFile</name>
+            <value>myCloud_3DT.txt</value>
+        </property>
+
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>verboseMode</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for system acquisition -->
+        <property>
+            <name>timeStampResolution</name>
+            <value>0.0025</value>
+        </property>
+
+        <!-- parameters for recon job split -->
+        <property>
+            <name>job_split_by_S</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>job_num_of_N</name>
+            <value>32</value>
+        </property>
+        <property>
+            <name>job_max_Megabytes</name>
+            <value>2499</value>
+        </property>
+        <property>
+            <name>job_overlap</name>
+            <value>2</value>
+        </property>
+        <property>
+            <name>job_perform_on_control_node</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for the cloud computation 
+             The cloud should be defined as the following: CloudNodeX_IP/Port/XMLConfiguration etc.
+        -->
+        <property>
+            <name>CloudComputing</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>CloudSize</name>
+            <value>1</value>
+        </property>
+
+        <!-- node 0 -->
+        <property>
+            <name>CloudNode0_IP</name>
+            <value>localhost</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_Port</name>
+            <value>9003</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_XMLConfiguration</name>
+            <value>GadgetronProgram_gtPlus_3DT_Cartesian_CloudNode.xml</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_ComputingPowerIndex</name>
+            <value>1</value>
+        </property>
+
+    </gadget>
+
+    <!-- after recon processing -->
+    <gadget>
+        <name>ComplexToFloatAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ComplexToFloatAttribGadget</classname>
+    </gadget>
+
+    <gadget>
+        <name>FloatToShortAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>FloatToUShortAttribGadget</classname>
+    </gadget>
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribCPLX</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetCPLX</classname>
+    </gadget>
+    -->
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribFLOAT</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetFLOAT</classname>
+    </gadget>
+    -->
+
+    <gadget>
+        <name>ImageFinishAttribUSHORT</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ImageFinishAttribGadgetUSHORT</classname>
+    </gadget>
+
+</gadgetronStreamConfiguration>
diff --git a/gadgets/gtPlus/config/GT_3DT_Cartesian_SingleLayer_L1SPIRIT.xml b/gadgets/gtPlus/config/GT_3DT_Cartesian_SingleLayer_L1SPIRIT.xml
new file mode 100644
index 0000000..e64b358
--- /dev/null
+++ b/gadgets/gtPlus/config/GT_3DT_Cartesian_SingleLayer_L1SPIRIT.xml
@@ -0,0 +1,853 @@
+<?xml version="1.0" encoding="utf-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+    <!--
+       _____              _____     _____   ______   _______   _____     ____    _   _ 
+      / ____|     /\     |  __ \   / ____| |  ____| |__   __| |  __ \   / __ \  | \ | |
+     | |  __     /  \    | |  | | | |  __  | |__       | |    | |__) | | |  | | |  \| |
+     | | |_ |   / /\ \   | |  | | | | |_ | |  __|      | |    |  _  /  | |  | | | . ` |
+     | |__| |  / ____ \  | |__| | | |__| | | |____     | |    | | \ \  | |__| | | |\  |
+      \_____| /_/    \_\ |_____/   \_____| |______|    |_|    |_|  \_\  \____/  |_| \_|
+                                                                                       
+    -->
+
+    <!-- 
+        GT Plus configuratin file for general 3D or 3D+T cartesian reconstruction using L1 SPIRIT
+        The GtPlus cloud computing can be turned on in this configuration file
+        The single-layer cloud topology is used here.
+
+        Author: Hui Xue
+        Magnetic Resonance Technology Program
+        National Heart, Lung and Blood Institute
+        National Institutes of Health
+        10 Center Drive, Bethesda
+        MD 20814
+        USA
+        Email: hui.xue at nih.gov
+
+        Ref to: 
+
+        Hui Xue, Souheil Inati, Thomas Sangild Sorensen, Peter Kellman, Michael S. Hansen. 
+        Distributed MRI Reconstruction using Gadgetron based Cloud Computing. 
+        Magenetic Resonance in Medicine, doi: 10.1002/mrm.25213.
+    -->
+
+    <!-- reader -->
+    <reader>
+        <slot>1008</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+    </reader>
+
+    <!-- writer -->
+    <writer>
+        <slot>1004</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1005</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1006</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageWriterUSHORT</classname>
+    </writer>
+
+    <writer>
+        <slot>1015</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1016</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1017</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterUSHORT</classname>
+    </writer>
+
+    <!-- RO asymmetric echo handling -->
+    <gadget>
+        <name>AsymmetricEcho</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>AsymmetricEchoAdjustROGadget</classname>
+    </gadget>
+
+    <!-- RO oversampling removal -->
+    <gadget>
+        <name>RemoveROOversampling</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>RemoveROOversamplingGadget</classname>
+
+        <property>
+            <name>constant_noise_variance</name>
+            <value>true</value>
+        </property>
+    </gadget>
+
+    <!-- Noise prewhitening -->
+    <gadget>
+        <name>NoiseAdjust</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>NoiseAdjustGadget</classname>
+    </gadget>
+
+    <!-- Data accumulation and trigger gadget -->
+    <gadget>
+        <name>Acc</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
+
+        <!-- debug and info mode -->
+        <property>
+            <name>verboseMode</name>
+            <value>false</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>noacceleration_triggerDim1</name>
+            <value>DIM_Encoding2</value>
+        </property>
+
+        <property>
+            <name>noacceleration_triggerDim2</name>
+            <value>DIM_Slice</value>
+        </property>
+
+        <property>
+            <name>noacceleration_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>interleaved_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>interleaved_numOfKSpace_triggerDim1</name>
+            <value>8</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>embedded_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_triggerDim1</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_triggerDim2</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <property>
+            <name>separate_numOfKSpace_triggerDim1</name>
+            <value>1</value>
+        </property>
+
+        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
+        <property>
+            <name>other_kspace_matching_Dim</name>
+            <value>DIM_Repetition</value>
+        </property>
+
+    </gadget>
+
+    <!-- Recon computation for 3DT cases -->
+    <gadget>
+        <name>Recon</name>
+        <dll>gadgetronPlus</dll>
+        <classname>GtPlusRecon3DTGadget</classname>
+
+        <!-- kspace data -->
+        <property>
+            <name>dim_5th</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- work flow -->
+        <property>
+            <name>workOrder_ShareDim</name>
+            <value>DIM_NONE</value>
+        </property>
+
+        <!-- No acceleration mode -->
+        <property>
+            <name>no_acceleration_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>no_acceleration_same_combinationcoeff_allN</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>no_acceleration_whichN_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- Interleaved mode -->
+        <property>
+            <name>interleaved_same_combinationcoeff_allN</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>interleaved_whichN_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- Embedded mode -->
+        <property>
+            <name>embedded_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_same_combinationcoeff_allN</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>embedded_whichN_combinationcoeff</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>embedded_ref_fillback</name>
+            <value>true</value>
+        </property>
+
+        <!-- Separate mode -->
+        <property>
+            <name>separate_averageall_ref</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_fullres_coilmap</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_same_combinationcoeff_allN</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>separate_whichN_combinationcoeff</name>
+            <value>0</value>
+        </property>
+
+        <!-- coil compression -->
+        <property>
+            <name>same_coil_compression_coeff_allN</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>upstream_coil_compression</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>upstream_coil_compression_thres</name>
+            <value>0.01</value>
+        </property>
+
+        <property>
+            <name>upstream_coil_compression_num_modesKept</name>
+            <value>-1</value>
+        </property>
+
+        <property>
+            <name>downstream_coil_compression</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>coil_compression_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>coil_compression_num_modesKept</name>
+            <value>-1</value>
+        </property>
+
+        <!-- parameters for coil map estimation 
+            enum ISMRMRDCOILMAPALGO
+            {
+                ISMRMRD_SOUHEIL,
+                ISMRMRD_SOUHEIL_ITER
+            };
+        -->
+        <property>
+            <name>coil_map_algorithm</name>
+            <value>ISMRMRD_SOUHEIL</value>
+        </property>
+        <property>
+            <name>csm_kSize</name>
+            <value>7</value>
+        </property>
+
+        <property>
+            <name>csm_powermethod_num</name>
+            <value>3</value>
+        </property>
+
+        <property>
+            <name>csm_true_3D</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>csm_iter_num</name>
+            <value>3</value>
+        </property>
+
+        <property>
+            <name>csm_iter_thres</name>
+            <value>0.001</value>
+        </property>
+
+        <property>
+            <name>csm_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- algorithm -->
+        <property>
+            <name>recon_algorithm</name>
+            <value>ISMRMRD_L1SPIRIT</value>
+        </property>
+
+        <property>
+            <name>recon_kspace_needed</name>
+            <value>false</value>
+        </property>
+
+        <property>
+            <name>recon_auto_parameters</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_GRAPPA -->
+        <property>
+            <name>grappa_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E1</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_kSize_E2</name>
+            <value>4</value>
+        </property>
+        <property>
+            <name>grappa_reg_lamda</name>
+            <value>0.0005</value>
+        </property>
+        <property>
+            <name>grappa_calib_over_determine_ratio</name>
+            <value>45</value>
+        </property>
+        <property>
+            <name>grappa_use_gpu</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_SPIRIT -->
+        <property>
+            <name>spirit_kSize_RO</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E1</name>
+            <value>7</value>
+        </property>
+        <property>
+            <name>spirit_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>spirit_reg_lamda</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>spirit_use_gpu</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_calib_over_determine_ratio</name>
+            <value>15</value>
+        </property>
+        <property>
+            <name>spirit_solve_symmetric</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_iter_max</name>
+            <value>100</value>
+        </property>
+        <property>
+            <name>spirit_iter_thres</name>
+            <value>0.0015</value>
+        </property>
+        <property>
+            <name>spirit_print_iter</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for ISMRMRD_L1SPIRIT -->
+        <property>
+            <name>spirit_perform_linear</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_perform_nonlinear</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_parallel_imaging_lamda</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_image_reg_lamda</name>
+            <value>0.002</value>
+        </property>
+        <property>
+            <name>spirit_data_fidelity_lamda</name>
+            <value>0</value>
+        </property>
+        <property>
+            <name>spirit_ncg_iter_max</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>spirit_ncg_iter_thres</name>
+            <value>0.001</value>
+        </property>
+        <property>
+            <name>spirit_ncg_print_iter</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_use_coil_sen_map</name>
+            <value>true</value>
+        </property>
+        <property>
+            <name>spirit_use_moco_enhancement</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_recon_moco_images</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_temporal_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_2D_scale_per_chunk</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>spirit_RO_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_E1_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_E2_enhancement_ratio</name>
+            <value>1.0</value>
+        </property>
+        <property>
+            <name>spirit_3D_scale_per_chunk</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for scaling and image sending -->
+        <property>
+            <name>min_intensity_value</name>
+            <value>64</value>
+        </property>
+
+        <property>
+            <name>max_intensity_value</name>
+            <value>4095</value>
+        </property>
+
+        <property>
+            <name>scalingFactor</name>
+            <value>-1.0</value>
+        </property>
+
+        <property>
+            <name>use_constant_scalingFactor</name>
+            <value>false</value>
+        </property>
+
+        <!-- parameters for kspace filter, image data -->
+        <property>
+            <name>filterRO</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterRO_sigma</name>
+            <value>0.5</value>
+        </property>
+        <property>
+            <name>filterRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE1</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE1_sigma</name>
+            <value>0.5</value>
+        </property>
+        <property>
+            <name>filterE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterE2</name>
+            <value>ISMRMRD_FILTER_GAUSSIAN</value>
+        </property>
+        <property>
+            <name>filterE2_sigma</name>
+            <value>0.5</value>
+        </property>
+        <property>
+            <name>filterE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, ref data -->
+        <property>
+            <name>filterRefRO</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefRO_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE1</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE1_width</name>
+            <value>0.15</value>
+        </property>
+
+        <property>
+            <name>filterRefE2</name>
+            <value>ISMRMRD_FILTER_HANNING</value>
+        </property>
+        <property>
+            <name>filterRefE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterRefE2_width</name>
+            <value>0.15</value>
+        </property>
+
+        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
+        <property>
+            <name>filterPartialFourierRO</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierRO_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE1</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE1_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>filterPartialFourierE2</name>
+            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_sigma</name>
+            <value>1.5</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_width</name>
+            <value>0.15</value>
+        </property>
+        <property>
+            <name>filterPartialFourierE2_densityComp</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
+        <property>
+            <name>partialFourier_algo</name>
+            <value>ISMRMRD_PF_POCS</value>
+        </property>
+
+        <!-- parameters for partial fourier POCS algorithm -->
+        <property>
+            <name>partialFourier_POCS_iters</name>
+            <value>6</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_thres</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_transitBand</name>
+            <value>24</value>
+        </property>
+        <property>
+            <name>partialFourier_POCS_transitBand_E2</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for partial fourier FengHuang algorithm -->
+        <property>
+            <name>partialFourier_FengHuang_kSize_RO</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E1</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_kSize_E2</name>
+            <value>5</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_thresReg</name>
+            <value>0.01</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_sameKernel_allN</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_transitBand</name>
+            <value>24</value>
+        </property>
+        <property>
+            <name>partialFourier_FengHuang_transitBand_E2</name>
+            <value>24</value>
+        </property>
+
+        <!-- parameters for debug and timing -->
+        <property>
+            <name>debugFolder</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>debugFolder2</name>
+            <value></value>
+        </property>
+
+        <property>
+            <name>cloudNodeFile</name>
+            <value>NHLBI_Cloud_3DT.txt</value>
+        </property>
+
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>verboseMode</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for system acquisition -->
+        <property>
+            <name>timeStampResolution</name>
+            <value>0.0025</value>
+        </property>
+
+        <!-- parameters for recon job split -->
+        <property>
+            <name>job_split_by_S</name>
+            <value>false</value>
+        </property>
+        <property>
+            <name>job_num_of_N</name>
+            <value>32</value>
+        </property>
+        <property>
+            <name>job_max_Megabytes</name>
+            <value>2499</value>
+        </property>
+        <property>
+            <name>job_overlap</name>
+            <value>1</value>
+        </property>
+        <property>
+            <name>job_perform_on_control_node</name>
+            <value>true</value>
+        </property>
+
+        <!-- parameters for the cloud computation 
+             The cloud should be defined as the following: CloudNodeX_IP/Port/XMLConfiguration etc.
+        -->
+        <property>
+            <name>CloudComputing</name>
+            <value>true</value>
+        </property>
+
+        <property>
+            <name>CloudSize</name>
+            <value>2</value>
+        </property>
+
+        <!-- node 0 -->
+        <property>
+            <name>CloudNode0_IP</name>
+            <value>localhost</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_Port</name>
+            <value>9003</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_XMLConfiguration</name>
+            <value>GtProg_3DT_Cartesian_CloudNode.xml</value>
+        </property>
+
+        <property>
+            <name>CloudNode0_ComputingPowerIndex</name>
+            <value>1</value>
+        </property>
+
+        <!-- node 1 -->
+        <property>
+            <name>CloudNode1_IP</name>
+            <value>localhost</value>
+        </property>
+
+        <property>
+            <name>CloudNode1_Port</name>
+            <value>9004</value>
+        </property>
+
+        <property>
+            <name>CloudNode1_XMLConfiguration</name>
+            <value>GtProg_3DT_Cartesian_CloudNode.xml</value>
+        </property>
+
+        <property>
+            <name>CloudNode1_ComputingPowerIndex</name>
+            <value>1</value>
+        </property>
+
+    </gadget>
+
+    <!-- after recon processing -->
+    <gadget>
+        <name>ComplexToFloatAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ComplexToFloatAttribGadget</classname>
+    </gadget>
+
+    <gadget>
+        <name>FloatToShortAttrib</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>FloatToUShortAttribGadget</classname>
+    </gadget>
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribCPLX</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetCPLX</classname>
+    </gadget>
+    -->
+
+    <!--
+    <gadget>
+      <name>ImageFinishAttribFLOAT</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetFLOAT</classname>
+    </gadget>
+    -->
+
+    <gadget>
+        <name>ImageFinishAttribUSHORT</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>ImageFinishAttribGadgetUSHORT</classname>
+    </gadget>
+
+</gadgetronStreamConfiguration>
diff --git a/gadgets/gtPlus/config/gtCloud/myCloud_2DT.txt b/gadgets/gtPlus/config/gtCloud/myCloud_2DT.txt
new file mode 100644
index 0000000..191eb7a
--- /dev/null
+++ b/gadgets/gtPlus/config/gtCloud/myCloud_2DT.txt
@@ -0,0 +1,8 @@
+localhost
+9002
+1
+localhost
+9003
+GT_2DT_Cartesian_CloudNode.xml
+1
+0
\ No newline at end of file
diff --git a/gadgets/gtPlus/config/gtCloud/myCloud_2DT_DualLayer.txt b/gadgets/gtPlus/config/gtCloud/myCloud_2DT_DualLayer.txt
new file mode 100644
index 0000000..5edc0a6
--- /dev/null
+++ b/gadgets/gtPlus/config/gtCloud/myCloud_2DT_DualLayer.txt
@@ -0,0 +1,8 @@
+localhost
+9002
+1
+localhost
+9003
+GT_2DT_Cartesian_FirstLayer_CloudNode.xml
+1
+0
diff --git a/gadgets/gtPlus/config/gtCloud/myCloud_2DT_DualLayer_FirstLayer.txt b/gadgets/gtPlus/config/gtCloud/myCloud_2DT_DualLayer_FirstLayer.txt
new file mode 100644
index 0000000..2121e88
--- /dev/null
+++ b/gadgets/gtPlus/config/gtCloud/myCloud_2DT_DualLayer_FirstLayer.txt
@@ -0,0 +1,8 @@
+localhost
+9003
+1
+localhost
+9004
+GT_2DT_Cartesian_CloudNode.xml
+1
+0
diff --git a/gadgets/gtPlus/config/gtCloud/myCloud_3DT.txt b/gadgets/gtPlus/config/gtCloud/myCloud_3DT.txt
new file mode 100644
index 0000000..4a6a3be
--- /dev/null
+++ b/gadgets/gtPlus/config/gtCloud/myCloud_3DT.txt
@@ -0,0 +1,12 @@
+localhost
+9002
+2
+localhost
+9003
+GT_3DT_Cartesian_CloudNode.xml
+1
+localhost
+9004
+GT_3DT_Cartesian_CloudNode.xml
+1
+0
\ No newline at end of file
diff --git a/gadgets/interventional_mri/CMakeLists.txt b/gadgets/interventional_mri/CMakeLists.txt
new file mode 100644
index 0000000..991a7f8
--- /dev/null
+++ b/gadgets/interventional_mri/CMakeLists.txt
@@ -0,0 +1,54 @@
+IF (WIN32)
+    ADD_DEFINITIONS(-D__BUILD_GADGETRON_INTERVENTIONAL_MRI__)
+ENDIF (WIN32)
+
+find_package(Ismrmrd REQUIRED)
+
+include_directories(
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/math
+    ${CMAKE_SOURCE_DIR}/toolboxes/core
+    ${CMAKE_SOURCE_DIR}/toolboxes/mri_core
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/image
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/algorithm
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/hostutils
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/math
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/gpu
+    ${CMAKE_SOURCE_DIR}/toolboxes/mri/pmri/gpu
+    ${CMAKE_SOURCE_DIR}/toolboxes/fft/cpu
+    ${CMAKE_SOURCE_DIR}/toolboxes/fft/gpu
+    ${CMAKE_SOURCE_DIR}/toolboxes/gadgettools
+    ${CMAKE_SOURCE_DIR}/toolboxes/gtplus
+    ${CMAKE_SOURCE_DIR}/gadgets/mri_core
+    ${HDF5_INCLUDE_DIR}
+    ${HDF5_INCLUDE_DIR}/cpp
+    ${ARMADILLO_INCLUDE_DIRS}
+    ${MKL_INCLUDE_DIR}
+    ${ISMRMRD_INCLUDE_DIR}
+)
+
+
+add_library(gadgetron_interventional_mri SHARED 
+    gadgetron_interventional_mri_export.h 
+    DeviceChannelSplitterGadget.h
+    DeviceChannelSplitterGadget.cpp
+)
+
+set_target_properties(gadgetron_interventional_mri PROPERTIES VERSION ${GADGETRON_VERSION_STRING} SOVERSION ${GADGETRON_SOVERSION})                                                                                                                                                                                                      
+
+target_link_libraries(gadgetron_interventional_mri
+    gadgetron_gadgetbase
+    gadgetron_toolbox_cpucore
+    gadgetron_toolbox_gadgettools
+    ${ISMRMRD_LIBRARIES} 
+    ${ACE_LIBRARIES}
+)
+
+install(FILES 
+    gadgetron_interventional_mri_export.h
+    DeviceChannelSplitterGadget.h
+    DESTINATION include COMPONENT main)
+
+install(FILES grappa_device.xml DESTINATION ${GADGETRON_INSTALL_CONFIG_PATH} COMPONENT main)
+
+install(TARGETS gadgetron_interventional_mri DESTINATION lib COMPONENT main)
diff --git a/gadgets/interventional_mri/DeviceChannelSplitterGadget.cpp b/gadgets/interventional_mri/DeviceChannelSplitterGadget.cpp
new file mode 100644
index 0000000..b53176b
--- /dev/null
+++ b/gadgets/interventional_mri/DeviceChannelSplitterGadget.cpp
@@ -0,0 +1,95 @@
+#include "DeviceChannelSplitterGadget.h"
+#include "Gadgetron.h"
+#include "ismrmrd/meta.h"
+
+ //This is needed for things such as data role, which should NOT be defined in gtPlus
+#include "GtPlusDefinition.h"
+
+namespace Gadgetron{
+
+template <typename T>
+int DeviceChannelSplitterGadget<T>
+::process(GadgetContainerMessage<ISMRMRD::ImageHeader>* m1,
+	  GadgetContainerMessage< hoNDArray< T > >* m2)
+{
+  
+  //Some consistency checking
+  unsigned int header_channels = m1->getObjectPtr()->channels;
+  unsigned int array_channels = m2->getObjectPtr()->get_size(m2->getObjectPtr()->get_number_of_dimensions()-1);
+  unsigned int dim_x = m2->getObjectPtr()->get_size(0);
+  unsigned int dim_y = m2->getObjectPtr()->get_size(1);
+  unsigned int dim_z = m2->getObjectPtr()->get_size(2);
+  size_t image_elements = dim_x*dim_y*dim_z;
+
+  if (header_channels != array_channels) {
+    GADGET_DEBUG2("Inconsistent number of header channels (%d) and array channels (%d)\n", header_channels, array_channels);
+    m1->release();
+    return GADGET_FAIL;
+  }
+  
+
+  for (int i = 0; i < array_channels; i++) {
+    
+
+    GadgetContainerMessage<ISMRMRD::ImageHeader>* im1 = new GadgetContainerMessage<ISMRMRD::ImageHeader>();
+    *(im1->getObjectPtr()) = *(m1->getObjectPtr());
+    im1->getObjectPtr()->channels = 1;
+    
+    /*
+    GADGET_DEBUG2("Image with matrix (cha=%d): %d, %d, %d and fov %f, %f, %f\n", 
+		  i,
+		  im1->getObjectPtr()->matrix_size[0], 
+		  im1->getObjectPtr()->matrix_size[1], 
+		  im1->getObjectPtr()->matrix_size[2],
+		  im1->getObjectPtr()->field_of_view[0],
+		  im1->getObjectPtr()->field_of_view[1],
+		  im1->getObjectPtr()->field_of_view[2]);
+
+    */
+
+    GadgetContainerMessage< hoNDArray< T > >* im2 = new GadgetContainerMessage< hoNDArray< T > >();
+    im2->getObjectPtr()->create(dim_x,dim_y,dim_z,1);
+    memcpy(im2->getObjectPtr()->get_data_ptr(), m2->getObjectPtr()->get_data_ptr() + i*image_elements, sizeof(T)*image_elements);
+    
+    im1->cont(im2);
+    
+    Gadgetron::GadgetContainerMessage<ISMRMRD::MetaContainer>* im3 = new Gadgetron::GadgetContainerMessage<ISMRMRD::MetaContainer>();
+    if (i == 0) {
+      im3->getObjectPtr()->set(GTPLUS_DATA_ROLE, GTPLUS_IMAGE_IRT_IMAGE);
+    } else {
+      im3->getObjectPtr()->set(GTPLUS_DATA_ROLE, GTPLUS_IMAGE_IRT_DEVICE);
+      im3->getObjectPtr()->set(GTPLUS_IMAGE_CUR_DEVICE_CHA, (long)i);
+
+    }
+    im3->getObjectPtr()->append(GTPLUS_DATA_ROLE, GTPLUS_IMAGE_INTENSITY_UNCHANGED);
+
+    if (array_channels > 1) {
+      im3->getObjectPtr()->set(GTPLUS_IMAGE_NUM_DEVICE_CHA, (long)(array_channels-1));
+    } else {
+      im3->getObjectPtr()->set(GTPLUS_IMAGE_NUM_DEVICE_CHA, (long)(-1));
+    }
+
+    im2->cont(im3);
+
+    if (this->next()->putq(im1) == -1) {
+      m1->release();
+      ACE_ERROR_RETURN( (LM_ERROR,
+			 ACE_TEXT("%p\n"),
+			 ACE_TEXT("DeviceChannelSplitterGadget::process, passing data on to next gadget")),
+			-1);
+    }
+  }
+
+  //We are done with the original data
+  m1->release();
+
+
+  return GADGET_OK;
+}
+
+//Declare factories for the various template instances
+GADGET_FACTORY_DECLARE(DeviceChannelSplitterGadgetFLOAT);
+GADGET_FACTORY_DECLARE(DeviceChannelSplitterGadgetUSHORT);
+GADGET_FACTORY_DECLARE(DeviceChannelSplitterGadgetCPLX);
+
+}
diff --git a/gadgets/interventional_mri/DeviceChannelSplitterGadget.h b/gadgets/interventional_mri/DeviceChannelSplitterGadget.h
new file mode 100644
index 0000000..6a1a2c9
--- /dev/null
+++ b/gadgets/interventional_mri/DeviceChannelSplitterGadget.h
@@ -0,0 +1,45 @@
+#ifndef DEVICECHANNELSPLITTERGADGET_H
+#define DEVICECHANNELSPLITTERGADGET_H
+
+#include "Gadget.h"
+#include "hoNDArray.h"
+#include "GadgetMRIHeaders.h"
+#include "GadgetStreamController.h"
+#include "gadgetron_interventional_mri_export.h"
+
+#include <ismrmrd/ismrmrd.h>
+#include <complex>
+
+namespace Gadgetron{
+
+  template <typename T> class EXPORTGADGETSINTERVENTIONAL_MRI DeviceChannelSplitterGadget : 
+  public Gadget2<ISMRMRD::ImageHeader,hoNDArray< T > >
+  {
+  protected:
+    virtual int process(GadgetContainerMessage<ISMRMRD::ImageHeader>* m1, 
+			GadgetContainerMessage< hoNDArray< T > >* m2);
+  };
+  
+  class EXPORTGADGETSINTERVENTIONAL_MRI DeviceChannelSplitterGadgetUSHORT :
+  public DeviceChannelSplitterGadget<ACE_UINT16>
+  {
+  public:
+    GADGET_DECLARE(DeviceChannelSplitterGadgetUSHORT);
+  };
+
+  class EXPORTGADGETSINTERVENTIONAL_MRI DeviceChannelSplitterGadgetFLOAT :
+  public DeviceChannelSplitterGadget<float>
+  {
+  public:
+    GADGET_DECLARE(DeviceChannelSplitterGadgetFLOAT);
+  };
+
+  class EXPORTGADGETSINTERVENTIONAL_MRI DeviceChannelSplitterGadgetCPLX :
+  public DeviceChannelSplitterGadget< std::complex<float> >
+  {
+  public:
+    GADGET_DECLARE(DeviceChannelSplitterGadgetCPLX);
+  };
+}
+
+#endif //DEVICECHANNELSPLITTERGADGET_H
diff --git a/gadgets/interventional_mri/gadgetron_interventional_mri_export.h b/gadgets/interventional_mri/gadgetron_interventional_mri_export.h
new file mode 100644
index 0000000..43f3f30
--- /dev/null
+++ b/gadgets/interventional_mri/gadgetron_interventional_mri_export.h
@@ -0,0 +1,14 @@
+#ifndef GADGETRON_INTERVENTIONAL_MRI_EXPORT_H_
+#define GADGETRON_INTERVENTIONAL_MRI_EXPORT_H_
+
+#if defined (WIN32)
+#if defined (__BUILD_GADGETRON_INTERVENTIONAL_MRI__)
+#define EXPORTGADGETSINTERVENTIONAL_MRI __declspec(dllexport)
+#else
+#define EXPORTGADGETSINTERVENTIONAL_MRI __declspec(dllimport)
+#endif
+#else
+#define EXPORTGADGETSINTERVENTIONAL_MRI
+#endif
+
+#endif /* GADGETRON_INTERVENTIONAL_MRI_EXPORT_H_ */
diff --git a/gadgets/interventional_mri/grappa_device.xml b/gadgets/interventional_mri/grappa_device.xml
new file mode 100644
index 0000000..8d4959e
--- /dev/null
+++ b/gadgets/interventional_mri/grappa_device.xml
@@ -0,0 +1,147 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+         
+    <reader>
+      <slot>1008</slot>
+      <dll>gadgetron_mricore</dll>
+      <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+    </reader>
+ 
+     <writer>
+      <slot>1004</slot>
+      <dll>gadgetron_mricore</dll>
+      <classname>MRIImageWriterCPLX</classname>
+    </writer>
+    <writer>
+      <slot>1005</slot>
+      <dll>gadgetron_mricore</dll>
+      <classname>MRIImageWriterFLOAT</classname>
+    </writer>
+    <writer>
+      <slot>1006</slot>
+      <dll>gadgetron_mricore</dll>
+      <classname>MRIImageWriterUSHORT</classname>
+    </writer>
+
+    <writer>
+        <slot>1015</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterCPLX</classname>
+    </writer>
+    <writer>
+        <slot>1016</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterFLOAT</classname>
+    </writer>
+    <writer>
+        <slot>1017</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>MRIImageAttribWriterUSHORT</classname>
+    </writer>
+
+    <!-- We need to modify the coil adjust to that we only scale channels according to noise variance -->
+    <!--
+    <gadget>
+      <name>NoiseAdjust</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>NoiseAdjustGadget</classname>
+      <property><name>scale_only</name><value>present_uncombined_channels at PCA</value></property>
+    </gadget>    
+    -->
+
+    <gadget>
+      <name>PCA</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>PCACoilGadget</classname>
+      <property><name>uncombined_channels_by_name</name><value>Loop_7:L7</value></property>
+
+      <!-- present_uncombined_channels will get updated by the gadget based on the attached coils -->
+      <property><name>present_uncombined_channels</name><value>0</value></property>
+    </gadget>
+
+    <gadget>
+      <name>CoilReduction</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>CoilReductionGadget</classname>
+      <property><name>coils_out</name><value>16</value></property>
+    </gadget>
+
+    <gadget>
+      <name>RemoveROOversampling</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>RemoveROOversamplingGadget</classname>
+    </gadget>
+
+    <gadget>
+      <name>Grappa</name>
+      <dll>gadgetron_grappa</dll>
+      <classname>GrappaGadget</classname>
+      <!-- After PCA gadget, the device channel with be the first channel -->
+      <!--
+      <property><name>uncombined_channels</name><value>0</value></property>
+      -->
+      <property><name>device_channels</name><value>present_uncombined_channels at PCA</value></property>
+    </gadget>
+
+    <gadget>
+      <name>GrappaUnmixing</name>
+      <dll>gadgetron_grappa</dll>
+      <classname>GrappaUnmixingGadget</classname>
+    </gadget>
+
+     <gadget>
+      <name>Extract</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ExtractGadget</classname>
+    </gadget>
+
+    <!--
+    <gadget>
+      <name>ImageWrite</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageWriterGadgetFLOAT</classname>
+    </gadget>
+    -->
+
+    <gadget>
+      <name>AutoScale</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>AutoScaleGadget</classname>
+    </gadget>
+    
+    <gadget>
+      <name>FloatToShort</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>FloatToUShortGadget</classname>
+    </gadget>
+    
+    <!--
+    <gadget>
+      <name>ImageFinishCPLX</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishGadgetCPLX</classname>
+    </gadget>
+    -->
+
+    <!--
+    <gadget>
+      <name>ImageFinishFLOAT</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishGadgetFLOAT</classname>
+    </gadget>
+ -->
+
+    <gadget>
+      <name>DeviceChannelSplitter</name>
+      <dll>gadgetron_interventional_mri</dll>
+      <classname>DeviceChannelSplitterGadgetUSHORT</classname>
+    </gadget>>
+
+     <gadget>
+      <name>ImageFinishAttribUSHORT</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishAttribGadgetUSHORT</classname>
+    </gadget>
+</gadgetronStreamConfiguration>
diff --git a/gadgets/matlab/BaseGadget.m b/gadgets/matlab/BaseGadget.m
index 37e836a..5f6a5c4 100644
--- a/gadgets/matlab/BaseGadget.m
+++ b/gadgets/matlab/BaseGadget.m
@@ -16,7 +16,7 @@ classdef BaseGadget < handle
         % Init function
         function init(g, xmlstr)
             % Convert the xml config string to an IsmrmrdHeader object
-            g.xml = org.ismrm.ismrmrd.XMLString.StringToIsmrmrdHeader(xmlstr);
+            g.xml = ismrmrd.xml.deserialize(xmlstr);
             g.emptyQ();
         end
 
@@ -35,7 +35,7 @@ classdef BaseGadget < handle
 
         % Config function
         function config(g)
-            fprintf('%s\n',char(org.ismrm.ismrmrd.xmlhdr.XMLString.IsmrmrdHeaderToString(g.xml)));
+            fprintf('%s\n',char(serialize(g.xml)));
         end
         
         % Process function
diff --git a/gadgets/matlab/CMakeLists.txt b/gadgets/matlab/CMakeLists.txt
index d4c7358..6f808a6 100644
--- a/gadgets/matlab/CMakeLists.txt
+++ b/gadgets/matlab/CMakeLists.txt
@@ -12,13 +12,16 @@ else(UNIX)
     SET(MATLAB_SUFFIX ".dll")
 endif(UNIX)
 
-add_library(gadgetron_matlab SHARED MatlabGadget.cpp)
+add_library(gadgetron_matlab SHARED gadgetron_matlab_export.h MatlabGadget.h MatlabGadget.cpp)
+set_target_properties(gadgetron_matlab PROPERTIES VERSION ${GADGETRON_VERSION_STRING} SOVERSION ${GADGETRON_SOVERSION})
+
 target_link_libraries(
     gadgetron_matlab
-    cpucore
+    gadgetron_gadgetbase
+    gadgetron_toolbox_cpucore
+    gadgetron_toolbox_cpucore_math
     ${MATLAB_LIBRARIES}
     ${ISMRMRD_LIBRARIES}
-    ${ISMRMRD_XSD_LIBRARIES}
     optimized ${ACE_LIBRARIES}
     debug ${ACE_DEBUG_LIBRARY}
 )
@@ -37,12 +40,12 @@ if (UNIX)
         COMMENT "Generating Matlab Command Server class" VERBATIM
     )
     add_custom_target(matlab_command_server ALL DEPENDS ${JAVA_MATLAB_SERVER_CLASS})
-    install(FILES ${JAVA_MATLAB_SERVER_CLASS} DESTINATION matlab)
+    install(FILES ${JAVA_MATLAB_SERVER_CLASS} DESTINATION ${GADGETRON_INSTALL_MATLAB_PATH} COMPONENT main)
 else(UNIX)
         MESSAGE( "Don't know how to build the Matlab Command Server class on Windows" )
 endif(UNIX)
 
-install(TARGETS gadgetron_matlab DESTINATION lib)
-install(FILES MatlabGadget.h gadgetron_matlab_export.h DESTINATION include)
-install(FILES BaseGadget.m scale.m accumulate_and_recon.m mask_image.m DESTINATION matlab)
-install(FILES matlab.xml DESTINATION config)
+install(TARGETS gadgetron_matlab DESTINATION lib COMPONENT main)
+install(FILES MatlabGadget.h gadgetron_matlab_export.h DESTINATION include COMPONENT main)
+install(FILES BaseGadget.m scale.m accumulate_and_recon.m mask_image.m DESTINATION ${GADGETRON_INSTALL_MATLAB_PATH} COMPONENT main)
+install(FILES matlab.xml DESTINATION ${GADGETRON_INSTALL_CONFIG_PATH} COMPONENT main)
diff --git a/gadgets/matlab/MatlabGadget.cpp b/gadgets/matlab/MatlabGadget.cpp
index 1f23329..9959961 100644
--- a/gadgets/matlab/MatlabGadget.cpp
+++ b/gadgets/matlab/MatlabGadget.cpp
@@ -55,18 +55,22 @@ int AcquisitionMatlabGadget::process(GadgetContainerMessage<ISMRMRD::Acquisition
     // The queue is a structure array and we read it back
     // TODO put this in a readme file somewhere useful
     engPutVariable(engine_, "hdr_bytes", acq_hdr_bytes);
+
     engPutVariable(engine_, "data", acq_data);
-    cmd = "Q = matgadget.run_process(1, hdr_bytes, data); matgadget.emptyQ();";
+    cmd = "Q = matgadget.run_process(1, hdr_bytes, data); matgadget.emptyQ(); whos()";
     send_matlab_command(cmd);
 
+    GADGET_DEBUG1("Test1\n");
     // Get the size of the gadget's queue
+
     mxArray *Q = engGetVariable(engine_, "Q");
     if (Q == NULL) {
         GADGET_DEBUG1("Failed to get the Queue from matgadget\n");
         return GADGET_FAIL;
     }
+    GADGET_DEBUG1("Test2\n");
     size_t qlen = mxGetNumberOfElements(Q);
-    //GADGET_DEBUG2("Queue size: %ld", qlen);
+    GADGET_DEBUG2("Queue size: %ld", qlen);
 
     // Loop over the elements of the Q, reading one entry at a time
     // to get a structure with type, headerbytes, and data
@@ -164,6 +168,12 @@ int AcquisitionMatlabGadget::process(GadgetContainerMessage<ISMRMRD::Acquisition
     mxDestroyArray(acq_hdr_bytes);
     mxDestroyArray(acq_data);
 
+    //Match engGetVariable with mxDestroy___s
+    mxDestroyArray(Q);
+
+    // We are finished with the incoming messages m1 and m2
+    m1->release();
+
     return GADGET_OK;
 }
 
@@ -279,6 +289,12 @@ int ImageMatlabGadget::process(GadgetContainerMessage<ISMRMRD::ImageHeader>* m1,
     mxDestroyArray(img_hdr_bytes);
     mxDestroyArray(img_data);
 
+    // Match engGetVariable with mxDestroy___s
+    mxDestroyArray(Q);
+
+    // We are finished with the incoming messages m1 and m2
+    m1->release();
+
     return GADGET_OK;
 }
 
diff --git a/gadgets/matlab/MatlabGadget.h b/gadgets/matlab/MatlabGadget.h
index fb461d7..8ca97b5 100644
--- a/gadgets/matlab/MatlabGadget.h
+++ b/gadgets/matlab/MatlabGadget.h
@@ -4,7 +4,7 @@
 #include "Gadget.h"
 #include "Gadgetron.h"
 #include "hoNDArray.h"
-#include "ismrmrd.h"
+#include "ismrmrd/ismrmrd.h"
 #include "engine.h"     // Matlab Engine header
 
 #include "ace/Synch.h"  // For the MatlabCommandServer
@@ -54,8 +54,6 @@ public:
             // ISMRMRD matlab library
             engEvalString(engine_, "addpath(fullfile(getenv('ISMRMRD_HOME'), 'matlab'));");
 
-            // Call the ISMRMRD utility function for setting the Java path for the XML header
-            engEvalString(engine_, "ismrmrd.util.includejar;");
 
 	    GADGET_DEBUG2("%s", matlab_buffer_);
         }
@@ -90,8 +88,9 @@ protected:
 
         GADGET_DEBUG2("MATLAB Class Name : %s\n", classname_.get()->c_str());
 
-        char matlab_buffer_[2049] = "\0";
-        engOutputBuffer(engine_, matlab_buffer_, 2048);
+        //char matlab_buffer_[2049] = "\0";
+        char matlab_buffer_[20481] = "\0";
+        engOutputBuffer(engine_, matlab_buffer_, 20480);
 
    	// Instantiate the Java Command server
         // TODO: we HAVE to pause in Matlab to allow the java command server thread to start
@@ -177,7 +176,6 @@ class EXPORTGADGETSMATLAB AcquisitionMatlabGadget :
 {
     public:
         GADGET_DECLARE(AcquisitionMatlabGadget);
-
         int process(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1,
                 GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2);
 
@@ -188,7 +186,6 @@ class EXPORTGADGETSMATLAB ImageMatlabGadget :
 {
     public:
         GADGET_DECLARE(ImageMatlabGadget);
-
         int process(GadgetContainerMessage<ISMRMRD::ImageHeader>* m1,
                 GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2);
 
diff --git a/gadgets/matlab/accumulate_and_recon.m b/gadgets/matlab/accumulate_and_recon.m
index 50a6e7c..0085fb4 100644
--- a/gadgets/matlab/accumulate_and_recon.m
+++ b/gadgets/matlab/accumulate_and_recon.m
@@ -12,36 +12,39 @@ classdef accumulate_and_recon < handle & BaseGadget
     methods
 
         function g = config(g)
-            fprintf('The resonance frequency is %d\n', g.xml.getExperimentalConditions().getH1ResonanceFrequencyHz());
-            nx = g.xml.getEncoding().get(0).getEncodedSpace().getMatrixSize().getX();
-            ny = g.xml.getEncoding().get(0).getEncodedSpace().getMatrixSize().getY();
+            
+            fprintf('The resonance frequency is %d\n', g.xml.experimentalConditions.H1resonanceFrequency_Hz);
+            nx = g.xml.encoding.encodedSpace.matrixSize.x;
+            ny = g.xml.encoding.encodedSpace.matrixSize.y;
             % for 2D sequences the number of getZ breaks
             try
-              nz = g.xml.getEncoding().get(0).getEncodedSpace().getMatrixSize().getZ();
+              nz = g.xml.encoding.encodedSpace.maxtrixSize.z;
             catch
+            
 	      nz =1;
             end
             % the number of receiver channels is optional
             try
                 % this is the only cast from java.lang.Integer that works in Matlab
-                nc = double(g.xml.getAcquisitionSystemInformation().getReceiverChannels());
+                nc = g.xml.acquisitionSystemInformation.receiverChannels;
             catch
 	        nc = 1;
             end
             % the number of slices is optional
             try
-                ns = g.xml.getEncoding().get(0).getEncodingLimits().getSlice().getMaximum() + 1;
+                ns = g.xml.encoding.encodingLimits.slice.maximum + 1;
             catch
 	        ns = 1;
             end
 
-            g.center_line = g.xml.getEncoding().get(0).getEncodingLimits().getKspaceEncodingStep1().getCenter();
+            g.center_line = g.xml.encoding.encodingLimits.kspace_encoding_step_1.center;
             g.accumulation = zeros(nx, ny, nz, ns, nc);
             g.image_num = 0;   % todo this needs to be static or global...
             g.series_num = 0;  % todo this needs to be static or global...
         end
 
         function g = process(g, head, data)
+            disp('Processing')
             % stuff the line
             line_offset = floor(size(g.accumulation,2)/2) - g.center_line;
             kyind = head.idx.kspace_encode_step_1 + line_offset + 1;
@@ -53,6 +56,7 @@ classdef accumulate_and_recon < handle & BaseGadget
 
             % At the end of the acquisition, reconstruct the slice
             if (head.flagIsSet(head.FLAGS.ACQ_LAST_IN_SLICE))
+                disp('Found last in slice')
                 img_head = ismrmrd.ImageHeader;
                 img_head.channels = head.active_channels;
                 img_head.slice = head.idx.slice;
diff --git a/gadgets/matlab/scale.m b/gadgets/matlab/scale.m
index db279f7..7d0e08d 100644
--- a/gadgets/matlab/scale.m
+++ b/gadgets/matlab/scale.m
@@ -13,7 +13,6 @@ classdef scale < BaseGadget
         function process(g, head, data)
     	    fprintf('Processing line = %d\n', head.idx.kspace_encode_step_1);
             reshdr = head;
-            reshdr.version = 99;
             resdata = g.factor * data;
             g.putQ(reshdr, resdata);
         end
diff --git a/gadgets/moco/CMakeLists.txt b/gadgets/moco/CMakeLists.txt
index 16b5033..ca99384 100644
--- a/gadgets/moco/CMakeLists.txt
+++ b/gadgets/moco/CMakeLists.txt
@@ -12,7 +12,7 @@ if(ARMADILLO_FOUND)
     
     set(CPU_REG 1)
     list(APPEND CPU_GADGETS cpuRegistrationAveragingGadget.cpp)
-    list(APPEND CPU_LIBS cpucore_math cpureg)
+    list(APPEND CPU_LIBS gadgetron_toolbox_cpucore_math gadgetron_toolbox_cpureg)
     
   elseif(ARMADILLO_VERSION_STRING VERSION_GREATER "3.819" )
     MESSAGE("Armadillo of at least version 3.820 not found, not compiling cpu-based registration gadgets")
@@ -24,7 +24,7 @@ endif (ARMADILLO_FOUND)
 if(CUDA_FOUND)
   set(GPU_REG 1)
   list(APPEND GPU_GADGETS gpuRegistrationAveragingGadget.cpp gpuRegistrationScatteringGadget.cpp)
-  list(APPEND GPU_LIBS gpucore gpureg ${CUDA_LIBRARIES})
+  list(APPEND GPU_LIBS gadgetron_toolbox_gpucore gadgetron_toolbox_gpureg ${CUDA_LIBRARIES})
 elseif (CUDA_FOUND)
   MESSAGE("Cuda not found, not compiling gpu-based registration gadgets")
 endif (CUDA_FOUND)
@@ -34,18 +34,18 @@ include_directories(
   ${CMAKE_SOURCE_DIR}/gadgets/mri_core
   ${CMAKE_SOURCE_DIR}/gadgets/moco
   ${CMAKE_SOURCE_DIR}/toolboxes/core
+  ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/image
   ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu
   ${CMAKE_SOURCE_DIR}/toolboxes/operators
   ${CMAKE_SOURCE_DIR}/toolboxes/solvers
   ${CMAKE_SOURCE_DIR}/toolboxes/registration/optical_flow/
   ${HDF5_INCLUDE_DIR}
   ${HDF5_INCLUDE_DIR}/cpp
-  ${ISMRMRD_XSD_INCLUDE_DIR}
 )
 
 if(CPU_REG)
   include_directories(   
-    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/arma_math
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/math
     ${CMAKE_SOURCE_DIR}/toolboxes/registration/optical_flow/cpu
     ${ARMADILLO_INCLUDE_DIRS}
     )
@@ -59,18 +59,36 @@ if(GPU_REG)
     )
 endif(GPU_REG)
 
-add_library(gadgetron_moco SHARED
-  ${CPU_GADGETS}
-  ${GPU_GADGETS}
-  ${ISMRMRD_XSD_SOURCE}
-  )
+if(CUDA_FOUND)
+    add_library(gadgetron_moco SHARED
+        cpuRegistrationAveragingGadget.h
+        gadgetron_moco_export.h
+        gpuRegistrationAveragingGadget.h
+        gpuRegistrationScatteringGadget.h
+        RegistrationAveragingGadget.h
+        RegistrationScatteringGadget.h
+        ${CPU_GADGETS}
+        ${GPU_GADGETS}
+      )
+
+    set_target_properties(gadgetron_moco PROPERTIES VERSION ${GADGETRON_VERSION_STRING} SOVERSION ${GADGETRON_SOVERSION})                                                                                                                                                                                                         
+
+    target_link_libraries(gadgetron_moco
+      gadgetron_gadgetbase
+      gadgetron_toolbox_cpucore gadgetron_mricore ${CPU_LIBS} ${GPU_LIBS}
+      ${Boost_LIBRARIES} ${ISMRMRD_LIBRARIES}
+      optimized ${ACE_LIBRARIES} debug ${ACE_DEBUG_LIBRARY} 
+      )
 
-target_link_libraries(gadgetron_moco
-  cpucore gadgetron_mricore ${CPU_LIBS} ${GPU_LIBS}
-  ${Boost_LIBRARIES} ${ISMRMRD_LIBRARIES} ${XERCESC_LIBRARIES}
-  optimized ${ACE_LIBRARIES} debug ${ACE_DEBUG_LIBRARY} 
-  )
+    install (TARGETS gadgetron_moco DESTINATION lib COMPONENT main)
+endif(CUDA_FOUND)
 
-install (TARGETS gadgetron_moco DESTINATION lib)
+install(FILES   cpuRegistrationAveragingGadget.h
+                gadgetron_moco_export.h
+                gpuRegistrationAveragingGadget.h
+                gpuRegistrationScatteringGadget.h
+                RegistrationAveragingGadget.h
+                RegistrationScatteringGadget.h
+                DESTINATION include COMPONENT main)
 
 add_subdirectory(config)
diff --git a/gadgets/moco/RegistrationAveragingGadget.h b/gadgets/moco/RegistrationAveragingGadget.h
index c5d9b23..13b6217 100644
--- a/gadgets/moco/RegistrationAveragingGadget.h
+++ b/gadgets/moco/RegistrationAveragingGadget.h
@@ -15,7 +15,7 @@
 #include "cuNDArray_reductions.h"
 #endif // USE_CUDA
 
-#include <ismrmrd.h>
+#include <ismrmrd/ismrmrd.h>
 #include <complex>
 #include <boost/shared_ptr.hpp>
 #include <boost/shared_array.hpp>
diff --git a/gadgets/moco/RegistrationScatteringGadget.h b/gadgets/moco/RegistrationScatteringGadget.h
index 6dfc2a5..bd96adf 100644
--- a/gadgets/moco/RegistrationScatteringGadget.h
+++ b/gadgets/moco/RegistrationScatteringGadget.h
@@ -11,7 +11,7 @@
 #include "gadgetron_moco_export.h"
 #include "hoNDArray_fileio.h"
 
-#include <ismrmrd.h>
+#include <ismrmrd/ismrmrd.h>
 #include <complex>
 #include <boost/shared_ptr.hpp>
 #include <boost/shared_array.hpp>
diff --git a/gadgets/moco/config/CMakeLists.txt b/gadgets/moco/config/CMakeLists.txt
index b0191d7..7c18f7c 100644
--- a/gadgets/moco/config/CMakeLists.txt
+++ b/gadgets/moco/config/CMakeLists.txt
@@ -2,12 +2,12 @@ if(CUDA_FOUND)
   if(CPU_REG)
     install (FILES 
       cpureg_cartesian_averaging.xml
-      DESTINATION config)
+      DESTINATION ${GADGETRON_INSTALL_CONFIG_PATH} COMPONENT main)
   endif(CPU_REG)
   
   if(GPU_REG)
     install (FILES 
       gpureg_cartesian_averaging.xml
-      DESTINATION config)
+      DESTINATION ${GADGETRON_INSTALL_CONFIG_PATH} COMPONENT main)
   endif(GPU_REG)
 endif(CUDA_FOUND)
diff --git a/gadgets/moco/config/cpureg_cartesian_averaging.xml b/gadgets/moco/config/cpureg_cartesian_averaging.xml
index 08f1990..7113a2d 100644
--- a/gadgets/moco/config/cpureg_cartesian_averaging.xml
+++ b/gadgets/moco/config/cpureg_cartesian_averaging.xml
@@ -55,7 +55,7 @@
 
   <gadget>
     <name>gpuGenericSensePrepGadget</name>
-    <dll>gadgetron_gpusense</dll>
+    <dll>gadgetron_gpuparallelmri</dll>
     <classname>gpuGenericSensePrepGadget</classname>
     <property><name>deviceno</name><value>0</value></property>
     <property><name>rotations_per_reconstruction</name><value>8</value></property>
@@ -65,7 +65,7 @@
   
   <gadget>
     <name>gpuCgSenseGadget</name>
-    <dll>gadgetron_gpusense</dll>
+    <dll>gadgetron_gpuparallelmri</dll>
     <classname>gpuCgSenseGadget</classname>
     <property><name>pass_on_undesired_data</name>  <value>true</value></property>
     <property><name>deviceno</name>                <value>0</value></property>
diff --git a/gadgets/moco/config/gpureg_cartesian_averaging.xml b/gadgets/moco/config/gpureg_cartesian_averaging.xml
index ad1c727..da6054a 100644
--- a/gadgets/moco/config/gpureg_cartesian_averaging.xml
+++ b/gadgets/moco/config/gpureg_cartesian_averaging.xml
@@ -55,7 +55,7 @@
 
   <gadget>
     <name>gpuGenericSensePrepGadget</name>
-    <dll>gadgetron_gpusense</dll>
+    <dll>gadgetron_gpuparallelmri</dll>
     <classname>gpuGenericSensePrepGadget</classname>
     <property><name>deviceno</name><value>0</value></property>
     <property><name>rotations_per_reconstruction</name><value>8</value></property>
@@ -65,7 +65,7 @@
   
   <gadget>
     <name>gpuCgSenseGadget</name>
-    <dll>gadgetron_gpusense</dll>
+    <dll>gadgetron_gpuparallelmri</dll>
     <classname>gpuCgSenseGadget</classname>
     <property><name>pass_on_undesired_data</name>  <value>true</value></property>
     <property><name>deviceno</name>                <value>0</value></property>
diff --git a/gadgets/moco/cpuRegistrationAveragingGadget.h b/gadgets/moco/cpuRegistrationAveragingGadget.h
index c01d40b..162acac 100644
--- a/gadgets/moco/cpuRegistrationAveragingGadget.h
+++ b/gadgets/moco/cpuRegistrationAveragingGadget.h
@@ -1,9 +1,8 @@
 #ifndef cpuRegistrationAveragingGadget_H
 #define cpuRegistrationAveragingGadget_H
 
-#include "hoNDArray_operators.h"
+#include "hoNDArray_math.h"
 #include "hoNDArray_utils.h"
-#include "hoRegistration_utils.h"
 #include "hoCKOpticalFlowSolver.h"
 #include "RegistrationAveragingGadget.h"
 
@@ -12,10 +11,8 @@ namespace Gadgetron{
   class EXPORTGADGETS_MOCO cpuRegistrationAveragingGadget2D :
     public RegistrationAveragingGadget< hoNDArray<float>, 2 >
   {    
-
   public:
     GADGET_DECLARE(cpuRegistrationAveragingGadget2D);
-    
     cpuRegistrationAveragingGadget2D() : RegistrationAveragingGadget< hoNDArray<float>, 2 >() {}
     virtual ~cpuRegistrationAveragingGadget2D() {}
 
diff --git a/gadgets/moco/gpuRegistrationAveragingGadget.h b/gadgets/moco/gpuRegistrationAveragingGadget.h
index 495e53e..6f50b35 100644
--- a/gadgets/moco/gpuRegistrationAveragingGadget.h
+++ b/gadgets/moco/gpuRegistrationAveragingGadget.h
@@ -11,10 +11,9 @@ namespace Gadgetron{
   class EXPORTGADGETS_MOCO gpuRegistrationAveragingGadget2D :
     public RegistrationAveragingGadget< cuNDArray<float>, 2 >
   {    
-
   public:
     GADGET_DECLARE(gpuRegistrationAveragingGadget2D);
-    
+
     gpuRegistrationAveragingGadget2D() : RegistrationAveragingGadget< cuNDArray<float>, 2 >() {}
     virtual ~gpuRegistrationAveragingGadget2D() {}
 
diff --git a/gadgets/moco/gpuRegistrationScatteringGadget.h b/gadgets/moco/gpuRegistrationScatteringGadget.h
index 5e7ea66..4c0f388 100644
--- a/gadgets/moco/gpuRegistrationScatteringGadget.h
+++ b/gadgets/moco/gpuRegistrationScatteringGadget.h
@@ -11,10 +11,8 @@ namespace Gadgetron{
   class EXPORTGADGETS_MOCO gpuRegistrationScatteringGadget2D :
     public RegistrationScatteringGadget< cuNDArray<float>, 2 >
   {    
-
   public:
     GADGET_DECLARE(gpuRegistrationScatteringGadget2D);
-    
     gpuRegistrationScatteringGadget2D() : RegistrationScatteringGadget< cuNDArray<float>, 2 >() {}
     virtual ~gpuRegistrationScatteringGadget2D() {}
 
diff --git a/gadgets/mri_core/AccumulatorGadget.cpp b/gadgets/mri_core/AccumulatorGadget.cpp
index ebd9166..b14eae4 100644
--- a/gadgets/mri_core/AccumulatorGadget.cpp
+++ b/gadgets/mri_core/AccumulatorGadget.cpp
@@ -1,5 +1,5 @@
 #include "AccumulatorGadget.h"
-#include "GadgetIsmrmrdReadWrite.h"
+#include "ismrmrd/xml.h"
 
 namespace Gadgetron{
 AccumulatorGadget::AccumulatorGadget()
@@ -21,31 +21,31 @@ AccumulatorGadget::~AccumulatorGadget()
  */
 int AccumulatorGadget::process_config(ACE_Message_Block* mb)
 {
+  ISMRMRD::IsmrmrdHeader h;
+  ISMRMRD::deserialize(mb->rd_ptr(),h);
 
-	boost::shared_ptr<ISMRMRD::ismrmrdHeader> cfg = parseIsmrmrdXMLHeader(std::string(mb->rd_ptr()));
-
-	ISMRMRD::ismrmrdHeader::encoding_sequence e_seq = cfg->encoding();
-	if (e_seq.size() != 1) {
-		GADGET_DEBUG2("Number of encoding spaces: %d\n", e_seq.size());
-		GADGET_DEBUG1("This simple AccumulatorGadget only supports one encoding space\n");
-		return GADGET_FAIL;
-	}
-
-	ISMRMRD::encodingSpaceType e_space = (*e_seq.begin()).encodedSpace();
-	ISMRMRD::encodingSpaceType r_space = (*e_seq.begin()).reconSpace();
-	ISMRMRD::encodingLimitsType e_limits = (*e_seq.begin()).encodingLimits();
-
-	GADGET_DEBUG2("Matrix size: %d, %d, %d\n", e_space.matrixSize().x(), e_space.matrixSize().y(), e_space.matrixSize().z());
-	dimensions_.push_back(e_space.matrixSize().x());
-	dimensions_.push_back(e_space.matrixSize().y());
-	dimensions_.push_back(e_space.matrixSize().z());
+  if (h.encoding.size() != 1) {
+    GADGET_DEBUG2("Number of encoding spaces: %d\n", h.encoding.size());
+    GADGET_DEBUG1("This simple AccumulatorGadget only supports one encoding space\n");
+    return GADGET_FAIL;
+  }
 
-    field_of_view_.push_back(e_space.fieldOfView_mm().x());
-    field_of_view_.push_back(e_space.fieldOfView_mm().y());
-    field_of_view_.push_back(e_space.fieldOfView_mm().z());
-    GADGET_DEBUG2("FOV: %f, %f, %f\n", e_space.fieldOfView_mm().x(), e_space.fieldOfView_mm().y(), e_space.fieldOfView_mm().z());
 
-	slices_ = e_limits.slice().present() ? e_limits.slice().get().maximum()+1 : 1;
+  ISMRMRD::EncodingSpace e_space = h.encoding[0].encodedSpace;
+  ISMRMRD::EncodingSpace r_space = h.encoding[0].reconSpace;
+  ISMRMRD::EncodingLimits e_limits = h.encoding[0].encodingLimits;
+  
+  GADGET_DEBUG2("Matrix size: %d, %d, %d\n", r_space.matrixSize.x, e_space.matrixSize.y, e_space.matrixSize.z);
+  dimensions_.push_back(r_space.matrixSize.x);
+  dimensions_.push_back(e_space.matrixSize.y);
+  dimensions_.push_back(e_space.matrixSize.z);
+  
+  field_of_view_.push_back(r_space.fieldOfView_mm.x);
+  field_of_view_.push_back(e_space.fieldOfView_mm.y);
+  field_of_view_.push_back(e_space.fieldOfView_mm.z);
+  GADGET_DEBUG2("FOV: %f, %f, %f\n", r_space.fieldOfView_mm.x, e_space.fieldOfView_mm.y, e_space.fieldOfView_mm.z);
+  
+  slices_ = e_limits.slice? e_limits.slice->maximum+1 : 1;
 
   return GADGET_OK;
 }
@@ -105,13 +105,16 @@ process(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1,
     	sizeof(std::complex<float>)*samples);
   }
   
-  bool is_last_scan_in_slice = ISMRMRD::FlagBit(ISMRMRD::ACQ_LAST_IN_SLICE).isSet(m1->getObjectPtr()->flags);
+  bool is_last_scan_in_slice = m1->getObjectPtr()->isFlagSet(ISMRMRD::ISMRMRD_ACQ_LAST_IN_SLICE);
   
   if (is_last_scan_in_slice) {
     GadgetContainerMessage<ISMRMRD::ImageHeader>* cm1 = 
       new GadgetContainerMessage<ISMRMRD::ImageHeader>();
+
+    // On some platforms, it is necessary to initialize the image header
+    memset(cm1->getObjectPtr(),0,sizeof(ISMRMRD::ImageHeader));
     
-    cm1->getObjectPtr()->flags = 0;
+    cm1->getObjectPtr()->clearAllFlags();
 
     GadgetContainerMessage< hoNDArray< std::complex<float> > >* cm2 = 
       new GadgetContainerMessage<hoNDArray< std::complex<float> > >();
@@ -139,13 +142,13 @@ process(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1,
     memcpy(cm2->getObjectPtr()->get_data_ptr(),b+offset,
 	   sizeof(std::complex<float>)*data_length);
     
-    cm1->getObjectPtr()->matrix_size[0]     = img_dims[0];
-    cm1->getObjectPtr()->matrix_size[1]     = img_dims[1];
-    cm1->getObjectPtr()->matrix_size[2]     = img_dims[2];
+    cm1->getObjectPtr()->matrix_size[0]     = (uint16_t)img_dims[0];
+    cm1->getObjectPtr()->matrix_size[1]     = (uint16_t)img_dims[1];
+    cm1->getObjectPtr()->matrix_size[2]     = (uint16_t)img_dims[2];
     cm1->getObjectPtr()->field_of_view[0]   = field_of_view_[0];
     cm1->getObjectPtr()->field_of_view[1]   = field_of_view_[1];
     cm1->getObjectPtr()->field_of_view[2]   = field_of_view_[2];
-    cm1->getObjectPtr()->channels           = img_dims[3];
+    cm1->getObjectPtr()->channels           = (uint16_t)img_dims[3];
     cm1->getObjectPtr()->slice   = m1->getObjectPtr()->idx.slice;
 
     memcpy(cm1->getObjectPtr()->position,
@@ -167,9 +170,9 @@ process(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1,
     memcpy(cm1->getObjectPtr()->patient_table_position,
     		m1->getObjectPtr()->patient_table_position, sizeof(float)*3);
 
-    cm1->getObjectPtr()->image_data_type = ISMRMRD::DATA_COMPLEX_FLOAT;
-    cm1->getObjectPtr()->image_index = ++image_counter_;
-    cm1->getObjectPtr()->image_series_index = image_series_;
+    cm1->getObjectPtr()->data_type = ISMRMRD::ISMRMRD_CXFLOAT;
+    cm1->getObjectPtr()->image_index = (uint16_t)(++image_counter_);
+    cm1->getObjectPtr()->image_series_index = (uint16_t)image_series_;
 
     if (this->next()->putq(cm1) < 0) {
     	return GADGET_FAIL;
diff --git a/gadgets/mri_core/AccumulatorGadget.h b/gadgets/mri_core/AccumulatorGadget.h
index d0fa9dc..5eddf8c 100644
--- a/gadgets/mri_core/AccumulatorGadget.h
+++ b/gadgets/mri_core/AccumulatorGadget.h
@@ -5,7 +5,7 @@
 #include "hoNDArray.h"
 #include "gadgetron_mricore_export.h"
 
-#include <ismrmrd.h>
+#include <ismrmrd/ismrmrd.h>
 #include <complex>
 
 namespace Gadgetron{
diff --git a/gadgets/mri_core/AcquisitionAccumulateTriggerGadget.cpp b/gadgets/mri_core/AcquisitionAccumulateTriggerGadget.cpp
new file mode 100644
index 0000000..b74b0a5
--- /dev/null
+++ b/gadgets/mri_core/AcquisitionAccumulateTriggerGadget.cpp
@@ -0,0 +1,403 @@
+#include "GadgetIsmrmrdReadWrite.h"
+#include "AcquisitionAccumulateTriggerGadget.h"
+#include "Gadgetron.h"
+#include "mri_core_data.h"
+
+namespace Gadgetron{
+
+  AcquisitionAccumulateTriggerGadget::~AcquisitionAccumulateTriggerGadget()
+  {
+    //The buckets array should be empty but just in case, let's make sure all the stuff is released.
+    for (map_type_::iterator it = buckets_.begin(); it != buckets_.end(); it++) {
+      if (it->second) {
+	it->second->release();
+      }
+    }
+  }
+
+  int AcquisitionAccumulateTriggerGadget
+  ::process_config(ACE_Message_Block* mb)
+  {
+
+    std::string trigger_dimension = *this->get_string_value("trigger_dimension");
+    std::string sorting_dimension = *this->get_string_value("sorting_dimension");
+    
+    if (trigger_dimension.size() == 0) {
+      trigger_ = NONE;
+    } else if (trigger_dimension.compare("kspace_encode_step_1") == 0) {
+      trigger_ = KSPACE_ENCODE_STEP_1;
+    } else if (trigger_dimension.compare("kspace_encode_step_2") == 0) {
+      trigger_ = KSPACE_ENCODE_STEP_2;
+    } else if (trigger_dimension.compare("average") == 0) {
+      trigger_ = AVERAGE;
+    } else if (trigger_dimension.compare("slice") == 0) {
+      trigger_ = SLICE;
+    } else if (trigger_dimension.compare("contrast") == 0) {
+      trigger_ = CONTRAST;
+    } else if (trigger_dimension.compare("phase") == 0) {
+      trigger_ = PHASE;
+    } else if (trigger_dimension.compare("repetition") == 0) {
+      trigger_ = REPETITION;
+    } else if (trigger_dimension.compare("set") == 0) {
+      trigger_ = SET;
+    } else if (trigger_dimension.compare("segment") == 0) {
+      trigger_ = SEGMENT;
+    } else if (trigger_dimension.compare("user_0") == 0) {
+      trigger_ = USER_0;
+    } else if (trigger_dimension.compare("user_1") == 0) {
+      trigger_ = USER_1;
+    } else if (trigger_dimension.compare("user_2") == 0) {
+      trigger_ = USER_2;
+    } else if (trigger_dimension.compare("user_3") == 0) {
+      trigger_ = USER_3;
+    } else if (trigger_dimension.compare("user_4") == 0) {
+      trigger_ = USER_4;
+    } else if (trigger_dimension.compare("user_5") == 0) {
+      trigger_ = USER_5;
+    } else if (trigger_dimension.compare("user_6") == 0) {
+      trigger_ = USER_6;
+    } else if (trigger_dimension.compare("user_7") == 0) {
+      trigger_ = USER_7;
+    } else {
+      GADGET_DEBUG2("WARNING: Unknown trigger dimension (%s), trigger condition set to NONE (end of scan)", trigger_dimension.c_str());
+      trigger_ = NONE;
+    }
+  
+    GADGET_DEBUG2("TRIGGER DIMENSION IS: %s (%d)\n", trigger_dimension.c_str(), trigger_);
+
+    if (sorting_dimension.size() == 0) {
+      sort_ = NONE;
+    } else if (sorting_dimension.compare("kspace_encode_step_1") == 0) {
+      sort_ = KSPACE_ENCODE_STEP_1;
+    } else if (sorting_dimension.compare("kspace_encode_step_2") == 0) {
+      sort_ = KSPACE_ENCODE_STEP_2;
+    } else if (sorting_dimension.compare("average") == 0) {
+      sort_ = AVERAGE;
+    } else if (sorting_dimension.compare("slice") == 0) {
+      sort_ = SLICE;
+    } else if (sorting_dimension.compare("contrast") == 0) {
+      sort_ = CONTRAST;
+    } else if (sorting_dimension.compare("phase") == 0) {
+      sort_ = PHASE;
+    } else if (sorting_dimension.compare("repetition") == 0) {
+      sort_ = REPETITION;
+    } else if (sorting_dimension.compare("set") == 0) {
+      sort_ = SET;
+    } else if (sorting_dimension.compare("segment") == 0) {
+      sort_ = SEGMENT;
+    } else if (sorting_dimension.compare("user_0") == 0) {
+      sort_ = USER_0;
+    } else if (sorting_dimension.compare("user_1") == 0) {
+      sort_ = USER_1;
+    } else if (sorting_dimension.compare("user_2") == 0) {
+      sort_ = USER_2;
+    } else if (sorting_dimension.compare("user_3") == 0) {
+      sort_ = USER_3;
+    } else if (sorting_dimension.compare("user_4") == 0) {
+      sort_ = USER_4;
+    } else if (sorting_dimension.compare("user_5") == 0) {
+      sort_ = USER_5;
+    } else if (sorting_dimension.compare("user_6") == 0) {
+      sort_ = USER_6;
+    } else if (sorting_dimension.compare("user_7") == 0) {
+      sort_ = USER_7;
+    } else {
+      GADGET_DEBUG2("WARNING: Unknown sort dimension (%s), sorting set to NONE\n", sorting_dimension.c_str());
+      sort_ = NONE;
+    }
+  
+    GADGET_DEBUG2("SORTING DIMENSION IS: %s (%d)\n", sorting_dimension.c_str(), sort_);
+
+    trigger_events_ = 0;
+
+    return GADGET_OK;
+  }
+
+  int AcquisitionAccumulateTriggerGadget
+  ::process(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1,
+	    GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2)
+  {
+
+    //Ignore noise scans
+    if (m1->getObjectPtr()->isFlagSet(ISMRMRD::ISMRMRD_ACQ_IS_NOISE_MEASUREMENT)) {
+        m1->release();
+        return GADGET_OK;
+    }
+                
+    //It is enough to put the first one, since they are linked
+    unsigned short sorting_index = 0;
+    switch (sort_) {
+    case KSPACE_ENCODE_STEP_1:
+      sorting_index = m1->getObjectPtr()->idx.kspace_encode_step_1;
+      break;
+    case KSPACE_ENCODE_STEP_2:
+      sorting_index = m1->getObjectPtr()->idx.kspace_encode_step_2;
+      break;
+    case AVERAGE:
+      sorting_index = m1->getObjectPtr()->idx.average;
+      break;
+    case SLICE:
+      sorting_index = m1->getObjectPtr()->idx.slice;
+      break;
+    case CONTRAST:
+      sorting_index = m1->getObjectPtr()->idx.contrast;
+      break;
+    case PHASE:
+      sorting_index = m1->getObjectPtr()->idx.phase;
+      break;
+    case REPETITION:
+      sorting_index = m1->getObjectPtr()->idx.repetition;
+      break;
+    case SET:
+      sorting_index = m1->getObjectPtr()->idx.set;
+      break;
+    case SEGMENT:
+      sorting_index = m1->getObjectPtr()->idx.segment;	
+      break;
+    case USER_0:
+      sorting_index = m1->getObjectPtr()->idx.user[0];
+      break;
+    case USER_1:
+      sorting_index = m1->getObjectPtr()->idx.user[1];	
+      break;
+    case USER_2:
+      sorting_index = m1->getObjectPtr()->idx.user[2];
+      break;
+    case USER_3:
+      sorting_index = m1->getObjectPtr()->idx.user[3];
+      break;
+    case USER_4:
+      sorting_index = m1->getObjectPtr()->idx.user[4];	
+      break;
+    case USER_5:
+      sorting_index = m1->getObjectPtr()->idx.user[5];
+      break;
+    case USER_6:
+      sorting_index = m1->getObjectPtr()->idx.user[6];	
+      break;
+    case USER_7:
+      sorting_index = m1->getObjectPtr()->idx.user[7];
+      break;
+    case NONE:
+      sorting_index = 0;
+      break;	
+    default:
+      GADGET_DEBUG2("Unknown sorting condition %d\n", sort_);
+      m1->release();
+      return GADGET_FAIL;
+    }
+    
+    //Create the data structure that will go in the bucket
+    IsmrmrdAcquisitionData d(m1,m2,AsContainerMessage< hoNDArray<float> >(m2->cont()));
+
+    //Now let's figure out if a trigger condition has occurred.
+    if (prev_.head_) { //Make sure this is not the first acquisition we are receiving
+      switch (trigger_) {
+      case KSPACE_ENCODE_STEP_1:
+ 	if (prev_.head_->getObjectPtr()->idx.kspace_encode_step_1 !=
+	    d.head_->getObjectPtr()->idx.kspace_encode_step_1) {
+	  trigger();
+	}
+	break;
+      case KSPACE_ENCODE_STEP_2:
+ 	if (prev_.head_->getObjectPtr()->idx.kspace_encode_step_2 !=
+	    d.head_->getObjectPtr()->idx.kspace_encode_step_2) {
+	  trigger();
+	}
+	break;
+      case AVERAGE:
+ 	if (prev_.head_->getObjectPtr()->idx.average !=
+	    d.head_->getObjectPtr()->idx.average) {
+	  trigger();
+	}
+	break;
+      case SLICE:
+	if (prev_.head_->getObjectPtr()->idx.slice !=
+	    d.head_->getObjectPtr()->idx.slice) {
+	  trigger();
+	}
+	break;
+      case CONTRAST:
+	if (prev_.head_->getObjectPtr()->idx.contrast !=
+	    d.head_->getObjectPtr()->idx.contrast) {
+	  trigger();
+	}
+	break;
+      case PHASE:
+	if (prev_.head_->getObjectPtr()->idx.phase !=
+	    d.head_->getObjectPtr()->idx.phase) {
+	  trigger();
+	}
+	break;
+      case REPETITION:
+	if (prev_.head_->getObjectPtr()->idx.repetition !=
+	    d.head_->getObjectPtr()->idx.repetition) {
+	  trigger();
+	}
+	break;
+      case SET:
+	if (prev_.head_->getObjectPtr()->idx.set !=
+	    d.head_->getObjectPtr()->idx.set) {
+	  trigger();
+	}      
+	break;
+      case SEGMENT:
+	if (prev_.head_->getObjectPtr()->idx.segment !=
+	    d.head_->getObjectPtr()->idx.segment) {
+	  trigger();
+	}
+	break;
+      case USER_0:
+	if (prev_.head_->getObjectPtr()->idx.user[0] !=
+	    d.head_->getObjectPtr()->idx.user[0]) {
+	  trigger();
+	}
+	break;
+      case USER_1:
+	if (prev_.head_->getObjectPtr()->idx.user[1] !=
+	    d.head_->getObjectPtr()->idx.user[1]) {
+	  trigger();
+	}
+	break;
+      case USER_2:
+	if (prev_.head_->getObjectPtr()->idx.user[2] !=
+	    d.head_->getObjectPtr()->idx.user[2]) {
+	  trigger();
+	}
+	break;
+      case USER_3:
+	if (prev_.head_->getObjectPtr()->idx.user[3] !=
+	    d.head_->getObjectPtr()->idx.user[3]) {
+	  trigger();
+	}
+	break;
+      case USER_4:
+	if (prev_.head_->getObjectPtr()->idx.user[4] !=
+	    d.head_->getObjectPtr()->idx.user[4]) {
+	  trigger();
+	}
+	break;
+      case USER_5:
+	if (prev_.head_->getObjectPtr()->idx.user[5] !=
+	    d.head_->getObjectPtr()->idx.user[5]) {
+	  trigger();
+	}
+	break;
+      case USER_6:
+	if (prev_.head_->getObjectPtr()->idx.user[6] !=
+	    d.head_->getObjectPtr()->idx.user[6]) {
+	  trigger();
+	}
+	break;
+      case USER_7:
+	if (prev_.head_->getObjectPtr()->idx.user[7] !=
+	    d.head_->getObjectPtr()->idx.user[7]) {
+	  trigger();
+	}
+	break;
+      case NONE:
+	break;	
+      default:
+	GADGET_DEBUG2("Unknown trigger condition %d\n", trigger_);
+	return GADGET_FAIL;	
+      }
+    }
+    
+    //Now we can update the previous data item that we store for 
+    //purposes of determining if trigger condition has occurred. 
+    prev_ = d;
+    
+    //Find the bucket the data should go in
+    map_type_::iterator it = buckets_.find(sorting_index);
+    if (it == buckets_.end()) {
+      //Bucket does not exist, create it
+      buckets_[sorting_index] = new GadgetContainerMessage<IsmrmrdAcquisitionBucket>;
+    }
+    IsmrmrdAcquisitionBucket* bucket = buckets_[sorting_index]->getObjectPtr();
+
+    uint16_t espace = m1->getObjectPtr()->encoding_space_ref;
+
+    if (!ISMRMRD::FlagBit(ISMRMRD::ISMRMRD_ACQ_IS_PARALLEL_CALIBRATION).isSet(m1->getObjectPtr()->flags))
+      {
+	bucket->data_.push_back(d);
+        if (bucket->datastats_.size() < (espace+1)) {
+            bucket->datastats_.resize(espace+1);
+        }
+        bucket->datastats_[espace].kspace_encode_step_1.insert(m1->getObjectPtr()->idx.kspace_encode_step_1);
+        bucket->datastats_[espace].kspace_encode_step_2.insert(m1->getObjectPtr()->idx.kspace_encode_step_2);
+        bucket->datastats_[espace].slice.insert(m1->getObjectPtr()->idx.slice);
+        bucket->datastats_[espace].phase.insert(m1->getObjectPtr()->idx.phase);
+        bucket->datastats_[espace].contrast.insert(m1->getObjectPtr()->idx.contrast);
+        bucket->datastats_[espace].set.insert(m1->getObjectPtr()->idx.set);
+        bucket->datastats_[espace].segment.insert(m1->getObjectPtr()->idx.segment);
+        bucket->datastats_[espace].average.insert(m1->getObjectPtr()->idx.average);
+        bucket->datastats_[espace].repetition.insert(m1->getObjectPtr()->idx.repetition);
+      }
+
+    if ( ISMRMRD::FlagBit(ISMRMRD::ISMRMRD_ACQ_IS_PARALLEL_CALIBRATION).isSet(m1->getObjectPtr()->flags) ||
+	 ISMRMRD::FlagBit(ISMRMRD::ISMRMRD_ACQ_IS_PARALLEL_CALIBRATION_AND_IMAGING).isSet(m1->getObjectPtr()->flags) )
+      {
+	bucket->ref_.push_back(d);
+        if (bucket->refstats_.size() < (espace+1)) {
+            bucket->refstats_.resize(espace+1);
+        }
+        bucket->refstats_[espace].kspace_encode_step_1.insert(m1->getObjectPtr()->idx.kspace_encode_step_1);
+        bucket->refstats_[espace].kspace_encode_step_2.insert(m1->getObjectPtr()->idx.kspace_encode_step_2);
+        bucket->refstats_[espace].slice.insert(m1->getObjectPtr()->idx.slice);
+        bucket->refstats_[espace].phase.insert(m1->getObjectPtr()->idx.phase);
+        bucket->refstats_[espace].contrast.insert(m1->getObjectPtr()->idx.contrast);
+        bucket->refstats_[espace].set.insert(m1->getObjectPtr()->idx.set);
+        bucket->refstats_[espace].segment.insert(m1->getObjectPtr()->idx.segment);
+        bucket->refstats_[espace].average.insert(m1->getObjectPtr()->idx.average);
+        bucket->refstats_[espace].repetition.insert(m1->getObjectPtr()->idx.repetition);
+      }
+
+    //We can release the data now. It is reference counted and counter have been incremented through operations above. 
+    m1->release();
+
+    //TODO: 
+    // At this point it would make sense to check the data flags for trigger conditions. 
+    
+    return GADGET_OK;
+  }
+
+  int AcquisitionAccumulateTriggerGadget::trigger() 
+  {
+    //We will keep track of the triggers we encounter
+    trigger_events_++;
+
+    GADGET_DEBUG2("Trigger (%d) occurred, sending out %d buckets\n", trigger_events_, buckets_.size());
+    //Pass all buckets down the chain
+    for (map_type_::iterator it = buckets_.begin(); it != buckets_.end(); it++) {
+      if (it->second) {
+	  if (this->next()->putq(it->second) == -1) {
+	    it->second->release();
+	    GADGET_DEBUG1("Failed to pass bucket down the chain\n");
+	    return GADGET_FAIL;
+	  }
+      }
+    }
+
+    buckets_.clear();
+    prev_ = IsmrmrdAcquisitionData(); //Reset previous so that we don't end up triggering again
+    return GADGET_OK;
+  }
+
+  int AcquisitionAccumulateTriggerGadget::close(unsigned long flags)
+  {
+    
+    int ret = Gadget::close(flags);
+    
+    if ( flags != 0 ) {
+      GADGET_DEBUG1("AcquisitionAccumulateTriggerGadget::close\n");
+      trigger();
+    }
+    return ret;
+  }
+
+
+  GADGET_FACTORY_DECLARE(AcquisitionAccumulateTriggerGadget)
+
+}
+
+
diff --git a/gadgets/mri_core/AcquisitionAccumulateTriggerGadget.h b/gadgets/mri_core/AcquisitionAccumulateTriggerGadget.h
new file mode 100644
index 0000000..485b337
--- /dev/null
+++ b/gadgets/mri_core/AcquisitionAccumulateTriggerGadget.h
@@ -0,0 +1,47 @@
+#ifndef ACQUISITIONACCUMULATETRIGGERGADGET_H
+#define ACQUISITIONACCUMULATETRIGGERGADGET_H
+
+#include "Gadget.h"
+#include "hoNDArray.h"
+#include "gadgetron_mricore_export.h"
+
+#include <ismrmrd/ismrmrd.h>
+#include <complex>
+#include <map>
+#include "mri_core_data.h"
+
+namespace Gadgetron{
+
+
+  class EXPORTGADGETSMRICORE AcquisitionAccumulateTriggerGadget : 
+  public Gadget2<ISMRMRD::AcquisitionHeader,hoNDArray< std::complex<float> > >
+    {
+    public:
+      GADGET_DECLARE(AcquisitionAccumulateTriggerGadget);
+
+      typedef std::map< unsigned short int, GadgetContainerMessage<IsmrmrdAcquisitionBucket>* > map_type_;
+
+      virtual ~AcquisitionAccumulateTriggerGadget();
+
+      int close(unsigned long flags);
+
+
+    protected:
+      IsmrmrdCONDITION trigger_;
+      IsmrmrdCONDITION sort_;
+      map_type_  buckets_;
+      IsmrmrdAcquisitionData prev_;
+      unsigned long trigger_events_;
+
+      virtual int process_config(ACE_Message_Block* mb);
+
+      virtual int process(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1,
+			  GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2);
+
+      virtual int trigger();
+
+    };
+
+  
+}
+#endif //ACQUISITIONACCUMULATETRIGGERGADGET_H
diff --git a/gadgets/mri_core/AcquisitionFinishGadget.h b/gadgets/mri_core/AcquisitionFinishGadget.h
index 99fccba..3d19143 100644
--- a/gadgets/mri_core/AcquisitionFinishGadget.h
+++ b/gadgets/mri_core/AcquisitionFinishGadget.h
@@ -6,7 +6,7 @@
 #include "GadgetMRIHeaders.h"
 #include "gadgetron_mricore_export.h"
 
-#include <ismrmrd.h>
+#include <ismrmrd/ismrmrd.h>
 #include <complex>
 
 namespace Gadgetron{
diff --git a/gadgets/mri_core/AcquisitionPassthroughGadget.h b/gadgets/mri_core/AcquisitionPassthroughGadget.h
index fe56836..9708de5 100644
--- a/gadgets/mri_core/AcquisitionPassthroughGadget.h
+++ b/gadgets/mri_core/AcquisitionPassthroughGadget.h
@@ -5,7 +5,7 @@
 #include "hoNDArray.h"
 #include "gadgetron_mricore_export.h"
 
-#include <ismrmrd.h>
+#include <ismrmrd/ismrmrd.h>
 #include <complex>
 
 namespace Gadgetron{
diff --git a/gadgets/mri_core/AsymmetricEchoAdjustROGadget.cpp b/gadgets/mri_core/AsymmetricEchoAdjustROGadget.cpp
new file mode 100644
index 0000000..fb5a367
--- /dev/null
+++ b/gadgets/mri_core/AsymmetricEchoAdjustROGadget.cpp
@@ -0,0 +1,142 @@
+#include "AsymmetricEchoAdjustROGadget.h"
+#include "ismrmrd/xml.h"
+
+namespace Gadgetron
+{
+
+AsymmetricEchoAdjustROGadget::AsymmetricEchoAdjustROGadget() : maxRO_(0)
+{
+
+}
+
+int AsymmetricEchoAdjustROGadget::process_config(ACE_Message_Block* mb)
+{
+  ISMRMRD::IsmrmrdHeader h;
+  deserialize(mb->rd_ptr(),h);
+
+  if (h.encoding.size() != 1) {
+    GADGET_DEBUG2("Number of encoding spaces: %d\n", h.encoding.size());
+    GADGET_DEBUG1("This partial fourier gadget only supports one encoding space\n");
+    return GADGET_FAIL;
+  }
+
+  ISMRMRD::EncodingSpace e_space = h.encoding[0].encodedSpace;
+  maxRO_ = e_space.matrixSize.x;
+  GADGET_MSG("max RO : " << maxRO_);
+  return GADGET_OK;
+}
+
+int addPrePostZeros(size_t centre_column, size_t samples)
+{
+    // 1 : pre zeros
+    // 2 : post zeros
+    // 0 : no zeros
+    if ( 2*centre_column == samples )
+    {
+        return 0;
+    }
+
+    if ( 2*centre_column < samples )
+    {
+        return 1;
+    }
+
+    if ( 2*centre_column > samples )
+    {
+        return 2;
+    }
+
+    return 0;
+}
+
+int AsymmetricEchoAdjustROGadget
+::process(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1,
+        GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2)
+{
+
+    bool is_noise = ISMRMRD::FlagBit(ISMRMRD::ISMRMRD_ACQ_IS_NOISE_MEASUREMENT).isSet(m1->getObjectPtr()->flags);
+    long long channels = (long long)m1->getObjectPtr()->active_channels;
+    size_t samples = m1->getObjectPtr()->number_of_samples;
+    size_t centre_column = m1->getObjectPtr()->center_sample;
+
+    if (!is_noise) 
+    {
+        // adjust the center echo
+        int az = addPrePostZeros(centre_column, samples);
+
+        if ( az!= 0 && samples < maxRO_ )
+        {
+            GadgetContainerMessage< hoNDArray< std::complex<float> > >* m3 = new GadgetContainerMessage< hoNDArray< std::complex<float> > >();
+            if (!m3)
+            {
+                return GADGET_FAIL;
+            }
+
+            std::vector<size_t> data_out_dims = *m2->getObjectPtr()->get_dimensions();
+            data_out_dims[0] = maxRO_;
+            try
+            {
+                m3->getObjectPtr()->create(&data_out_dims);
+            }
+            catch(...)
+            {
+                GADGET_DEBUG1("Unable to create new data array for downsampled data\n");
+                return GADGET_FAIL;
+            }
+            m3->getObjectPtr()->fill(0);
+
+            std::complex<float>* pM3 = m3->getObjectPtr()->get_data_ptr();
+            std::complex<float>* pM2 = m2->getObjectPtr()->get_data_ptr();
+
+            long long c;
+            size_t numOfBytes = sizeof( std::complex<float> )*samples;
+
+            if ( az == 1 ) // pre zeros
+            {
+                //#pragma omp parallel for default(none) private(c) shared(channels, pM3, pM2, samples, numOfBytes)
+                for ( c=0; c<channels; c++ )
+                {
+                    memcpy(pM3+c*maxRO_+maxRO_-samples, pM2+c*samples, numOfBytes);
+                }
+            }
+
+            if ( az == 2 ) // post zeros
+            {
+                //#pragma omp parallel for default(none) private(c) shared(channels, pM3, pM2, samples, numOfBytes)
+                for ( c=0; c<channels; c++ )
+                {
+                    memcpy(pM3+c*maxRO_, pM2+c*samples, numOfBytes);
+                }
+            }
+
+            m2->release(); //We are done with this data
+
+            m1->cont(m3);
+            m1->getObjectPtr()->number_of_samples = data_out_dims[0];
+        }
+
+        if (this->next()->putq(m1) == -1) 
+        {
+            ACE_ERROR_RETURN( (LM_ERROR,
+                    ACE_TEXT("%p\n"),
+                    ACE_TEXT("NoiseAdjustGadget::process, passing data on to next gadget")),
+                    -1);
+        }
+    }
+    else
+    {
+        if (this->next()->putq(m1) == -1) 
+        {
+            ACE_ERROR_RETURN( (LM_ERROR,
+                    ACE_TEXT("%p\n"),
+                    ACE_TEXT("NoiseAdjustGadget::process, passing data on to next gadget")),
+                    -1);
+        }
+    }
+
+    return GADGET_OK;
+}
+
+GADGET_FACTORY_DECLARE(AsymmetricEchoAdjustROGadget)
+
+}
diff --git a/gadgets/mri_core/AsymmetricEchoAdjustROGadget.h b/gadgets/mri_core/AsymmetricEchoAdjustROGadget.h
new file mode 100644
index 0000000..1c97492
--- /dev/null
+++ b/gadgets/mri_core/AsymmetricEchoAdjustROGadget.h
@@ -0,0 +1,32 @@
+
+#pragma once
+
+#include "Gadget.h"
+#include "hoNDArray.h"
+#include "ismrmrd/ismrmrd.h"
+#include "gadgetron_mricore_export.h"
+
+namespace Gadgetron
+{
+
+/// for incoming readout
+/// if not the noise scan and the partial fourier along readout is detected
+/// the readout data will be realigned with center of echo at the centre of incoming 1D array
+class EXPORTGADGETSMRICORE AsymmetricEchoAdjustROGadget : public Gadgetron::Gadget2<ISMRMRD::AcquisitionHeader, hoNDArray< std::complex<float> > >
+{
+public:
+
+    GADGET_DECLARE(AsymmetricEchoAdjustROGadget);
+
+    AsymmetricEchoAdjustROGadget();
+
+protected:
+
+    virtual int process_config(ACE_Message_Block* mb);
+    virtual int process(Gadgetron::GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1,
+        Gadgetron::GadgetContainerMessage< Gadgetron::hoNDArray< std::complex<float> > >* m2);
+
+    unsigned int maxRO_;
+};
+
+}
diff --git a/gadgets/mri_core/AutoScaleGadget.cpp b/gadgets/mri_core/AutoScaleGadget.cpp
index d77d82a..e4e301d 100644
--- a/gadgets/mri_core/AutoScaleGadget.cpp
+++ b/gadgets/mri_core/AutoScaleGadget.cpp
@@ -23,7 +23,7 @@ AutoScaleGadget::~AutoScaleGadget() {
 
 int AutoScaleGadget::process(GadgetContainerMessage<ISMRMRD::ImageHeader> *m1, GadgetContainerMessage<hoNDArray<float> > *m2)
 {
-	if (m1->getObjectPtr()->image_type == ISMRMRD::TYPE_MAGNITUDE) { //Only scale magnitude images for now
+	if (m1->getObjectPtr()->image_type == ISMRMRD::ISMRMRD_IMTYPE_MAGNITUDE) { //Only scale magnitude images for now
 		float max = 0.0f;
 		float* d = m2->getObjectPtr()->get_data_ptr();
 		for (unsigned long int i = 0; i < m2->getObjectPtr()->get_number_of_elements(); i++) {
@@ -47,10 +47,10 @@ int AutoScaleGadget::process(GadgetContainerMessage<ISMRMRD::ImageHeader> *m1, G
 		}
 
 		//Find 99th percentile
-		long cumsum = 0;
+		long long cumsum = 0;
 		size_t counter = 0;
 		while (cumsum < (0.99*m2->getObjectPtr()->get_number_of_elements())) {
-			cumsum += histogram_[counter++];
+			cumsum += (long long)(histogram_[counter++]);
 		}
 		max = (counter+1)*(max/histogram_bins_);
 
diff --git a/gadgets/mri_core/AutoScaleGadget.h b/gadgets/mri_core/AutoScaleGadget.h
index 2bbda2a..bcef06e 100644
--- a/gadgets/mri_core/AutoScaleGadget.h
+++ b/gadgets/mri_core/AutoScaleGadget.h
@@ -5,7 +5,7 @@
 #include "hoNDArray.h"
 #include "gadgetron_mricore_export.h"
 
-#include <ismrmrd.h>
+#include <ismrmrd/ismrmrd.h>
 
 namespace Gadgetron{
 
diff --git a/gadgets/mri_core/BucketToBufferGadget.cpp b/gadgets/mri_core/BucketToBufferGadget.cpp
new file mode 100644
index 0000000..1701263
--- /dev/null
+++ b/gadgets/mri_core/BucketToBufferGadget.cpp
@@ -0,0 +1,628 @@
+#include "GadgetIsmrmrdReadWrite.h"
+#include "BucketToBufferGadget.h"
+#include "Gadgetron.h"
+#include "mri_core_data.h"
+#include "hoNDArray_elemwise.h"
+#include "hoNDArray_reductions.h"
+namespace Gadgetron{
+
+  BucketToBufferGadget::~BucketToBufferGadget()
+  {
+    //The buckets array should be empty but just in case, let's make sure all the stuff is released.
+  }
+
+  int BucketToBufferGadget
+  ::process_config(ACE_Message_Block* mb)
+  {
+
+    std::string N_dimension = *this->get_string_value("N_dimension");
+    std::string S_dimension = *this->get_string_value("S_dimension");
+
+    if (N_dimension.size() == 0) {
+        N_ = NONE;
+    } else if (N_dimension.compare("average") == 0) {
+        N_ = AVERAGE;
+    } else if (N_dimension.compare("contrast") == 0) {
+        N_ = CONTRAST;
+    } else if (N_dimension.compare("phase") == 0) {
+        N_ = PHASE;
+    } else if (N_dimension.compare("repetition") == 0) {
+        N_ = REPETITION;
+    } else if (N_dimension.compare("set") == 0) {
+        N_ = SET;
+    } else if (N_dimension.compare("segment") == 0) {
+        N_ = SEGMENT;
+    } else if (N_dimension.compare("slice") == 0){
+        N_ = SLICE;
+    } else {
+        GADGET_DEBUG2("WARNING: Unknown N dimension (%s), N set to NONE", N_dimension.c_str());
+        N_ = NONE;
+    }
+
+    GADGET_DEBUG2("N DIMENSION IS: %s (%d)\n", N_dimension.c_str(), N_);
+
+    if (S_dimension.size() == 0) {
+        S_ = NONE;
+    } else if (S_dimension.compare("average") == 0) {
+        S_ = AVERAGE;
+    } else if (S_dimension.compare("contrast") == 0) {
+        S_ = CONTRAST;
+    } else if (S_dimension.compare("phase") == 0) {
+        S_ = PHASE;
+    } else if (S_dimension.compare("repetition") == 0) {
+        S_ = REPETITION;
+    } else if (S_dimension.compare("set") == 0) {
+        S_ = SET;
+    } else if (S_dimension.compare("segment") == 0) {
+        S_ = SEGMENT;
+    } else if (N_dimension.compare("slice") == 0){
+        S_ = SLICE;
+    } else {
+        GADGET_DEBUG2("WARNING: Unknown sort dimension (%s), sorting set to NONE\n", S_dimension.c_str());
+        S_ = NONE;
+    }
+
+    GADGET_DEBUG2("S DIMENSION IS: %s (%d)\n", S_dimension.c_str(), S_);
+
+    split_slices_  = this->get_bool_value("split_slices");
+    GADGET_DEBUG2("SPLIT SLICES IS: %b\n", split_slices_);
+
+    ignore_segment_  = this->get_bool_value("ignore_segment");
+    GADGET_DEBUG2("IGNORE SEGMENT IS: %b\n", ignore_segment_);
+
+    // keep a copy of the deserialized ismrmrd xml header for runtime
+    ISMRMRD::deserialize(mb->rd_ptr(), hdr_);
+
+    return GADGET_OK;
+  }
+
+  int BucketToBufferGadget
+  ::process(GadgetContainerMessage<IsmrmrdAcquisitionBucket>* m1)
+  {
+
+    size_t key;
+    std::map<size_t, GadgetContainerMessage<IsmrmrdReconData>* > recon_data_buffers;
+
+    //GADGET_DEBUG1("BucketToBufferGadget::process\n");
+
+    //Some information about the bucket
+    //std::cout << "The Reference part: " << m1->getObjectPtr()->refstats_.size() << std::endl;
+    //std::cout << "   nslices: " << m1->getObjectPtr()->refstats_[0].slice.size() << std::endl;
+    //for (int e=0; e<m1->getObjectPtr()->refstats_.size() ; e++) {
+    //    for (std::set<uint16_t>::iterator it = m1->getObjectPtr()->refstats_[e].kspace_encode_step_1.begin();
+    //         it != m1->getObjectPtr()->refstats_[e].kspace_encode_step_1.end(); ++it) {
+    //        std::cout << "   K1: " <<  *it << std::endl;
+    //    }
+    //}
+    //std::cout << "The data part: " << m1->getObjectPtr()->datastats_.size() << std::endl;
+    //std::cout << "   nslices: " << m1->getObjectPtr()->datastats_[0].slice.size() << std::endl;
+    //for (int e=0; e<m1->getObjectPtr()->datastats_.size() ; e++) {
+    //    for (std::set<uint16_t>::iterator it = m1->getObjectPtr()->datastats_[e].kspace_encode_step_1.begin();
+    //         it != m1->getObjectPtr()->datastats_[e].kspace_encode_step_1.end(); ++it) {
+    //        std::cout << "   K1: " <<  *it << std::endl;
+    //    }
+    //}
+
+    //Iterate over the reference data of the bucket
+    for(std::vector<IsmrmrdAcquisitionData>::iterator it = m1->getObjectPtr()->ref_.begin();
+        it != m1->getObjectPtr()->ref_.end(); ++it)
+      {
+        //Get a reference to the header for this acquisition
+        ISMRMRD::AcquisitionHeader & acqhdr = *it->head_->getObjectPtr();
+
+        //Generate the key to the corresponding ReconData buffer
+        key = getKey(acqhdr.idx);
+
+        //The storage is based on the encoding space
+        uint16_t espace = acqhdr.encoding_space_ref;
+
+        //std::cout << "espace: " << acqhdr.encoding_space_ref << std::endl;
+        //std::cout << "slice: " << acqhdr.idx.slice << std::endl;
+        //std::cout << "rep: " << acqhdr.idx.repetition << std::endl;
+        //std::cout << "k1: " << acqhdr.idx.kspace_encode_step_1 << std::endl;
+        //std::cout << "k2: " << acqhdr.idx.kspace_encode_step_2 << std::endl;
+        //std::cout << "seg: " << acqhdr.idx.segment << std::endl;
+        //std::cout << "key: " << key << std::endl;
+
+        //Get some references to simplify the notation
+        //the reconstruction bit corresponding to this ReconDataBuffer and encoding space
+        IsmrmrdReconBit & rbit = getRBit(recon_data_buffers, key, espace);
+        //and the corresponding data buffer for the reference data
+        IsmrmrdDataBuffered & dataBuffer = rbit.ref_;
+        //this encoding space's xml header info
+        ISMRMRD::Encoding & encoding = hdr_.encoding[espace];
+        //this bucket's reference stats
+        IsmrmrdAcquisitionBucketStats & stats = m1->getObjectPtr()->refstats_[espace];
+
+        //Fill the sampling description for this data buffer
+        fillSamplingDescription(dataBuffer.sampling_, encoding, stats);
+
+        //Make sure that the data storage for this data buffer has been allocated
+        //TODO should this check the limits, or should that be done in the stuff function?
+        allocateDataArrays(dataBuffer, acqhdr, encoding, stats);
+
+        // Stuff the data, header and trajectory into this data buffer
+        stuff(it, dataBuffer, encoding);
+      }
+
+
+    //Iterate over the imaging data of the bucket
+    // this is exactly the same code as for the reference data except for
+    // the chunk of the data buffer.
+    for(std::vector<IsmrmrdAcquisitionData>::iterator it = m1->getObjectPtr()->data_.begin();
+        it != m1->getObjectPtr()->data_.end(); ++it)
+      {
+        //Get a reference to the header for this acquisition
+        ISMRMRD::AcquisitionHeader & acqhdr = *it->head_->getObjectPtr();
+
+        //Generate the key to the corresponding ReconData buffer
+        key = getKey(acqhdr.idx);
+
+        //The storage is based on the encoding space
+        uint16_t espace = acqhdr.encoding_space_ref;
+
+        //std::cout << "espace: " << acqhdr.encoding_space_ref << std::endl;
+        //std::cout << "slice: " << acqhdr.idx.slice << std::endl;
+        //std::cout << "rep: " << acqhdr.idx.repetition << std::endl;
+        //std::cout << "k1: " << acqhdr.idx.kspace_encode_step_1 << std::endl;
+        //std::cout << "k2: " << acqhdr.idx.kspace_encode_step_2 << std::endl;
+        //std::cout << "seg: " << acqhdr.idx.segment << std::endl;
+        //std::cout << "key: " << key << std::endl;
+
+        //Get some references to simplify the notation
+        //the reconstruction bit corresponding to this ReconDataBuffer and encoding space
+        IsmrmrdReconBit & rbit = getRBit(recon_data_buffers, key, espace);
+        //and the corresponding data buffer for the imaging data
+        IsmrmrdDataBuffered & dataBuffer = rbit.data_;
+        //this encoding space's xml header info
+        ISMRMRD::Encoding & encoding = hdr_.encoding[espace];
+        //this bucket's imaging data stats
+        IsmrmrdAcquisitionBucketStats & stats = m1->getObjectPtr()->datastats_[espace];
+
+        //Fill the sampling description for this data buffer
+        fillSamplingDescription(dataBuffer.sampling_, encoding, stats);
+
+        //Make sure that the data storage for this data buffer has been allocated
+        //TODO should this check the limits, or should that be done in the stuff function?
+        allocateDataArrays(dataBuffer, acqhdr, encoding, stats);
+
+        // Stuff the data, header and trajectory into this data buffer
+        stuff(it, dataBuffer, encoding);
+      }
+
+
+    //Send all the ReconData messages
+    GADGET_DEBUG2("End of bucket reached, sending out %d ReconData buffers\n", recon_data_buffers.size());
+    for(std::map<size_t, GadgetContainerMessage<IsmrmrdReconData>* >::iterator it = recon_data_buffers.begin(); it != recon_data_buffers.end(); it++)
+      {
+        //std::cout << "Sending: " << it->first << std::endl;
+        if (it->second) {
+            if (this->next()->putq(it->second) == -1) {
+                it->second->release();
+                throw std::runtime_error("Failed to pass bucket down the chain\n");
+            }
+        }
+      }
+
+    //Clear the recondata buffer map
+    recon_data_buffers.clear();  // is this necessary?
+
+    //We can release the incoming bucket now. This will release all of the data it contains.
+    m1->release();
+
+    return GADGET_OK;
+  }
+
+  int BucketToBufferGadget::close(unsigned long flags)
+  {
+
+    int ret = Gadget::close(flags);
+    GADGET_DEBUG1("BucketToBufferGadget::close\n");
+
+    return ret;
+  }
+
+  size_t BucketToBufferGadget::getSlice(ISMRMRD::ISMRMRD_EncodingCounters idx)
+  {
+    size_t index;
+
+    if( split_slices_ ) {
+        index = idx.slice;
+    } else {
+        index = 0;
+    }
+
+    return index;
+  }
+
+  size_t BucketToBufferGadget::getN(ISMRMRD::ISMRMRD_EncodingCounters idx)
+  {
+    size_t index;
+
+    if (N_ == AVERAGE) {
+        index = idx.average;
+    } else if (N_ == CONTRAST) {
+        index = idx.contrast;
+    } else if (N_ == PHASE) {
+        index = idx.phase;
+    } else if (N_ == REPETITION) {
+        index = idx.repetition;
+    } else if (N_ == SET) {
+        index = idx.set;
+    } else if (N_ == SEGMENT) {
+        index = idx.segment;
+    } else {
+        index = 0;
+    }
+
+    return index;
+  }
+
+  size_t BucketToBufferGadget::getS(ISMRMRD::ISMRMRD_EncodingCounters idx)
+  {
+    size_t index;
+
+    if (S_ == AVERAGE) {
+        index = idx.average;
+    } else if (S_ == CONTRAST) {
+        index = idx.contrast;
+    } else if (S_ == PHASE) {
+        index = idx.phase;
+    } else if (S_ == REPETITION) {
+        index = idx.repetition;
+    } else if (S_ == SET) {
+        index = idx.set;
+    } else if (S_ == SEGMENT) {
+        index = idx.segment;
+    } else {
+        index = 0;
+    }
+
+    return index;
+  }
+
+  size_t BucketToBufferGadget::getKey(ISMRMRD::ISMRMRD_EncodingCounters idx)
+  {
+    //[RO, E1, E2, CHA, SLC, PHS, CON, REP, SET, SEG, AVE]
+    //[SLC, PHS, CON, REP, SET, SEG, AVE]
+    //collapse across two of them (N and S)
+
+    size_t slice, phase, contrast, repetition, set, segment, average;
+
+    if (split_slices_) {
+        slice = idx.slice;
+    } else {
+        slice = 0;
+    }
+
+    if ((N_ == PHASE) || (S_ == PHASE)) {
+        phase = 0;
+    } else {
+        phase = idx.phase;
+    }
+
+    if ((N_ == CONTRAST) || (S_ == CONTRAST)) {
+        contrast = 0;
+    } else {
+        contrast = idx.contrast;
+    }
+
+    if ((N_ == REPETITION) || (S_ == REPETITION)) {
+        repetition = 0;
+    } else {
+        repetition = idx.repetition;
+    }
+
+    if ((N_ == SET) || (S_ == SET)) {
+        set = 0;
+    } else {
+        set = idx.set;
+    }
+
+    if ((N_ == SEGMENT) || (S_ == SEGMENT) || ignore_segment_) {
+        segment = 0;
+    } else {
+        segment = idx.segment;
+    }
+
+    if ((S_ == AVERAGE) || (N_ == AVERAGE)) {
+        average = 0;
+    } else {
+        average = idx.average;
+    }
+
+    size_t key = 0;
+    key += slice      * 0x1;
+    key += phase      * 0x100;
+    key += contrast   * 0x10000;
+    key += repetition * 0x1000000;
+    key += set        * 0x100000000;
+    key += segment    * 0x10000000000;
+    key += average    * 0x1000000000000;
+
+    return key;
+  }
+
+  IsmrmrdReconBit & BucketToBufferGadget::getRBit(std::map<size_t, GadgetContainerMessage<IsmrmrdReconData>* > & recon_data_buffers, size_t key, uint16_t espace)
+  {
+    //Look up the corresponding ReconData buffer
+    if (recon_data_buffers.find(key) == recon_data_buffers.end())
+      {
+        //ReconData buffer does not exist, create it
+        recon_data_buffers[key] = new GadgetContainerMessage<IsmrmrdReconData>;
+      }
+
+    //Look up the DataBuffered entry corresponding to this encoding space
+    // create if needed and set the fields of view and matrix size
+    if ( recon_data_buffers[key]->getObjectPtr()->rbit_.size() < (espace+1) )
+      {
+        recon_data_buffers[key]->getObjectPtr()->rbit_.resize(espace+1);
+      }
+
+    return recon_data_buffers[key]->getObjectPtr()->rbit_[espace];
+
+  }
+
+  void BucketToBufferGadget::allocateDataArrays(IsmrmrdDataBuffered & dataBuffer, ISMRMRD::AcquisitionHeader & acqhdr, ISMRMRD::Encoding encoding, IsmrmrdAcquisitionBucketStats & stats)
+  {
+    if (dataBuffer.data_.get_number_of_elements() == 0)
+      {
+        //Allocate the reference data array
+        //7D,  fixed order [RO, E1, E2, CHA, SLC, N, S]
+        //11D, fixed order [RO, E1, E2, CHA, SLC, PHS, CON, REP, SET, SEG, AVE]
+        uint16_t NRO;
+        if (encoding.trajectory.compare("cartesian") == 0) {
+            NRO = encoding.reconSpace.matrixSize.x;
+        } else {
+            NRO = acqhdr.number_of_samples - acqhdr.discard_pre - acqhdr.discard_post;
+        }
+
+        uint16_t NE1;
+        if (encoding.trajectory.compare("cartesian") == 0) {
+            NE1 = encoding.encodedSpace.matrixSize.y;
+        } else {
+            if (encoding.encodingLimits.kspace_encoding_step_1.is_present()) {
+                NE1 = encoding.encodingLimits.kspace_encoding_step_1->maximum - encoding.encodingLimits.kspace_encoding_step_1->minimum + 1;
+            } else {
+                NE1 = *stats.kspace_encode_step_1.rbegin() - *stats.kspace_encode_step_1.begin() + 1;
+            }
+        }
+
+        uint16_t NE2;
+        if (encoding.trajectory.compare("cartesian") == 0) {
+            NE2 = encoding.encodedSpace.matrixSize.z;
+        } else {
+            if (encoding.encodingLimits.kspace_encoding_step_2.is_present()) {
+                NE2 = encoding.encodingLimits.kspace_encoding_step_2->maximum - encoding.encodingLimits.kspace_encoding_step_2->minimum + 1;
+            } else {
+                NE2 = *stats.kspace_encode_step_2.rbegin() - *stats.kspace_encode_step_2.begin() + 1;
+            }
+        }
+
+        uint16_t NCHA = acqhdr.active_channels;
+
+        uint16_t NSLC;
+        if (split_slices_) {
+            NSLC = 1;
+        } else {
+            if (encoding.encodingLimits.slice.is_present()) {
+                NSLC = encoding.encodingLimits.slice->maximum - encoding.encodingLimits.slice->minimum + 1;
+            } else {
+                NSLC = *stats.slice.rbegin() - *stats.slice.begin() + 1;
+            }
+        }
+
+        uint16_t NN;
+        switch (N_) {
+        case PHASE:
+          NN = *stats.phase.rbegin() - *stats.phase.begin() + 1;
+          break;
+        case CONTRAST:
+          NN = *stats.contrast.rbegin() - *stats.contrast.begin() + 1;
+          break;
+        case REPETITION:
+          NN = *stats.repetition.rbegin() - *stats.repetition.begin() + 1;
+          break;
+        case SET:
+          NN = *stats.set.rbegin() - *stats.set.begin() + 1;
+          break;
+        case SEGMENT:
+          NN = *stats.segment.rbegin() - *stats.segment.begin() + 1;
+          break;
+        case AVERAGE:
+          NN = *stats.average.rbegin() - *stats.average.begin() + 1;
+          break;
+        case SLICE:
+          NN =  *stats.slice.rbegin() - *stats.slice.begin() + 1;
+          break;
+        default:
+          NN = 1;
+        }
+
+        uint16_t NS;
+        switch (S_) {
+        case PHASE:
+          NS = *stats.phase.rbegin() - *stats.phase.begin() + 1;
+          break;
+        case CONTRAST:
+          NS = *stats.contrast.rbegin() - *stats.contrast.begin() + 1;
+          break;
+        case REPETITION:
+          NS = *stats.repetition.rbegin() - *stats.repetition.begin() + 1;
+          break;
+        case SET:
+          NS = *stats.set.rbegin() - *stats.set.begin() + 1;
+          break;
+        case SEGMENT:
+          NS = *stats.segment.rbegin() - *stats.segment.begin() + 1;
+          break;
+        case AVERAGE:
+          NS = *stats.average.rbegin() - *stats.average.begin() + 1;
+          break;
+        case SLICE:
+          NS =  *stats.slice.rbegin() - *stats.slice.begin() + 1;
+          break;
+        default:
+          NS = 1;
+        }
+
+        //std::cout << "Data dimensions:" << std::endl;
+        //std::cout << "   NRO:  " << NRO  << std::endl;
+        //std::cout << "   NE1:  " << NE1  << std::endl;
+        //std::cout << "   NE2:  " << NE2  << std::endl;
+        //std::cout << "   NSLC: " << NSLC << std::endl;
+        //std::cout << "   NCHA: " << NCHA << std::endl;
+        //std::cout << "   NN:   " << NN   << std::endl;
+        //std::cout << "   NS:   " << NS   << std::endl;
+
+        //Allocate the array for the data
+        dataBuffer.data_.create(NRO, NE1, NE2, NCHA, NSLC, NN, NS);
+        clear(&dataBuffer.data_);
+
+        //Allocate the array for the headers
+        dataBuffer.headers_.create(NE1, NE2, NSLC, NN, NS);
+
+        //Allocate the array for the trajectories
+        uint16_t TRAJDIM = acqhdr.trajectory_dimensions;
+        if (TRAJDIM > 0)
+          {
+            dataBuffer.trajectory_.create(TRAJDIM, NRO, NE1, NE2, NSLC, NN, NS);
+            clear(&dataBuffer.trajectory_);
+          }
+
+        //boost::shared_ptr< std::vector<size_t> > dims =  dataBuffer.data_.get_dimensions();
+        //std::cout << "NDArray dims: ";
+        //for( std::vector<size_t>::const_iterator i = dims->begin(); i != dims->end(); ++i) {
+        //    std::cout << *i << ' ';
+        //}
+        //std::cout << std::endl;
+      }
+
+  }
+
+  void BucketToBufferGadget::fillSamplingDescription(SamplingDescription & sampling, ISMRMRD::Encoding & encoding, IsmrmrdAcquisitionBucketStats & stats)
+  {
+    // For cartesian trajectories, assume that any oversampling has been removed.
+    if (encoding.trajectory.compare("cartesian") == 0) {
+        sampling.encoded_FOV_[0] = encoding.reconSpace.fieldOfView_mm.x;
+        sampling.encoded_matrix_[0] = encoding.reconSpace.matrixSize.x;
+    } else {
+        sampling.encoded_FOV_[0] = encoding.encodedSpace.fieldOfView_mm.x;
+        sampling.encoded_matrix_[0] = encoding.encodedSpace.matrixSize.x;
+    }
+
+    sampling.encoded_FOV_[1] = encoding.encodedSpace.fieldOfView_mm.y;
+    sampling.encoded_FOV_[2] = encoding.encodedSpace.fieldOfView_mm.z;
+
+    sampling.encoded_matrix_[1] = encoding.encodedSpace.matrixSize.y;
+    sampling.encoded_matrix_[2] = encoding.encodedSpace.matrixSize.z;
+
+    sampling.recon_FOV_[0] = encoding.reconSpace.fieldOfView_mm.x;
+    sampling.recon_FOV_[1] = encoding.reconSpace.fieldOfView_mm.y;
+    sampling.recon_FOV_[2] = encoding.reconSpace.fieldOfView_mm.z;
+
+    sampling.recon_matrix_[0] = encoding.reconSpace.matrixSize.x;
+    sampling.recon_matrix_[1] = encoding.reconSpace.matrixSize.y;
+    sampling.recon_matrix_[2] = encoding.reconSpace.matrixSize.z;
+
+    // For cartesian trajectories, assume that any oversampling has been removed.
+    if (encoding.trajectory.compare("cartesian") == 0) {
+        sampling.sampling_limits_[0].min_ = 0;
+        sampling.sampling_limits_[0].max_ = encoding.reconSpace.matrixSize.x - 1;
+        sampling.sampling_limits_[0].center_ = encoding.reconSpace.matrixSize.x / 2;
+    } else {
+        sampling.sampling_limits_[0].min_ = 0;
+        sampling.sampling_limits_[0].max_ = encoding.encodedSpace.matrixSize.x - 1;
+        sampling.sampling_limits_[0].center_ = encoding.encodedSpace.matrixSize.x / 2;
+    }
+
+    sampling.sampling_limits_[1].min_ =
+        encoding.encodingLimits.kspace_encoding_step_1->minimum;
+    sampling.sampling_limits_[1].max_ =
+        encoding.encodingLimits.kspace_encoding_step_1->maximum;
+    sampling.sampling_limits_[1].center_ =
+        encoding.encodingLimits.kspace_encoding_step_1->center;
+
+    sampling.sampling_limits_[2].min_ =
+        encoding.encodingLimits.kspace_encoding_step_2->minimum;
+    sampling.sampling_limits_[2].max_ =
+        encoding.encodingLimits.kspace_encoding_step_2->maximum;
+    sampling.sampling_limits_[2].center_ =
+        encoding.encodingLimits.kspace_encoding_step_2->center;
+  }
+
+  void BucketToBufferGadget::stuff(std::vector<IsmrmrdAcquisitionData>::iterator it, IsmrmrdDataBuffered & dataBuffer, ISMRMRD::Encoding encoding)
+  {
+
+    // The acquisition header and data
+    ISMRMRD::AcquisitionHeader & acqhdr = *it->head_->getObjectPtr();
+    hoNDArray< std::complex<float> > & acqdata = *it->data_->getObjectPtr();
+    // we make one for the trajectory down below if we need it
+
+    size_t slice_loc;
+    if (split_slices_)
+      {
+        slice_loc = 0;
+      }
+    else
+      {
+        slice_loc = acqhdr.idx.slice;
+      }
+
+    //Stuff the data
+    uint16_t npts_to_copy = acqhdr.number_of_samples - acqhdr.discard_pre - acqhdr.discard_post;
+    long long offset;
+    if (encoding.trajectory.compare("cartesian") == 0) {
+        offset  = (long long) dataBuffer.sampling_.sampling_limits_[0].center_ - (long long) acqhdr.center_sample;
+    } else {
+        //TODO what about EPI with asymmetric readouts?
+        //TODO any other sort of trajectory?
+        offset = 0;
+    }
+    long long roffset = (long long) dataBuffer.data_.get_size(0) - npts_to_copy - offset;
+
+    //std::cout << "Num_samp: "<< acqhdr.number_of_samples << ", pre: " << acqhdr.discard_pre << ", post" << acqhdr.discard_post << std::endl;
+    //std::cout << "Sampling limits: "
+    //    << "  min: " << dataBuffer.sampling_.sampling_limits_[0].min_
+    //    << "  max: " << dataBuffer.sampling_.sampling_limits_[0].max_
+    //    << "  center: " << dataBuffer.sampling_.sampling_limits_[0].center_
+    //    << std::endl;
+    //std::cout << "npts_to_copy = " << npts_to_copy  << std::endl;
+    //std::cout << "offset = " << offset  << std::endl;
+    //std::cout << "loffset = " << roffset << std::endl;
+
+    if ((offset < 0) | (roffset < 0) )
+      {
+        throw std::runtime_error("Acquired reference data does not fit into the reference data buffer.\n");
+      }
+
+    std::complex<float> *dataptr;
+    uint16_t NCHA = dataBuffer.data_.get_size(3);
+    for (uint16_t cha = 0; cha < NCHA; cha++)
+      {
+        dataptr = & dataBuffer.data_(
+            offset, acqhdr.idx.kspace_encode_step_1, acqhdr.idx.kspace_encode_step_2, cha, slice_loc, getN(acqhdr.idx),  getS(acqhdr.idx));
+
+
+        memcpy(dataptr, &acqdata(acqhdr.discard_pre, cha), sizeof(std::complex<float>)*npts_to_copy);
+      }
+
+    //Stuff the header
+    dataBuffer.headers_(acqhdr.idx.kspace_encode_step_1,
+        acqhdr.idx.kspace_encode_step_2, slice_loc, getN(acqhdr.idx),  getS(acqhdr.idx)) = acqhdr;
+
+    //Stuff the trajectory
+    if (acqhdr.trajectory_dimensions > 0) {
+        hoNDArray< float > & acqtraj = *it->traj_->getObjectPtr();  // TODO do we need to check this?
+
+        float * trajptr;
+        trajptr = &dataBuffer.trajectory_(0,
+            offset, acqhdr.idx.kspace_encode_step_1, acqhdr.idx.kspace_encode_step_2, slice_loc, getN(acqhdr.idx),  getS(acqhdr.idx));
+        memcpy(trajptr, & acqtraj(0,acqhdr.discard_pre ), sizeof(float)*npts_to_copy*acqhdr.trajectory_dimensions);
+
+    }
+  }
+
+  GADGET_FACTORY_DECLARE(BucketToBufferGadget)
+
+}
diff --git a/gadgets/mri_core/BucketToBufferGadget.h b/gadgets/mri_core/BucketToBufferGadget.h
new file mode 100644
index 0000000..189debc
--- /dev/null
+++ b/gadgets/mri_core/BucketToBufferGadget.h
@@ -0,0 +1,52 @@
+#ifndef BUCKETTOBUFFER_H
+#define BUCKETTOBUFFER_H
+
+#include "Gadget.h"
+#include "hoNDArray.h"
+#include "gadgetron_mricore_export.h"
+
+#include <ismrmrd/ismrmrd.h>
+#include <ismrmrd/xml.h>
+#include <complex>
+#include <map>
+#include "mri_core_data.h"
+
+namespace Gadgetron{
+
+    // TODO the ignore_segment_ flag is a hack for some EPI sequences
+    // should be fixed on the converter side.
+
+  class EXPORTGADGETSMRICORE BucketToBufferGadget : 
+  public Gadget1<IsmrmrdAcquisitionBucket>
+    {
+    public:
+      GADGET_DECLARE(BucketToBufferGadget);
+
+      virtual ~BucketToBufferGadget();
+
+      int close(unsigned long flags);
+      
+    protected:
+      IsmrmrdCONDITION N_;
+      IsmrmrdCONDITION S_;
+      bool split_slices_;
+      bool ignore_segment_;
+      ISMRMRD::IsmrmrdHeader hdr_;
+      
+      virtual int process_config(ACE_Message_Block* mb);
+      virtual int process(GadgetContainerMessage<IsmrmrdAcquisitionBucket>* m1);
+      size_t getKey(ISMRMRD::ISMRMRD_EncodingCounters idx);
+      size_t getSlice(ISMRMRD::ISMRMRD_EncodingCounters idx);
+      size_t getN(ISMRMRD::ISMRMRD_EncodingCounters idx);
+      size_t getS(ISMRMRD::ISMRMRD_EncodingCounters idx);
+
+      IsmrmrdReconBit & getRBit(std::map<size_t, GadgetContainerMessage<IsmrmrdReconData>* > & recon_data_buffers, size_t key, uint16_t espace);
+      void allocateDataArrays(IsmrmrdDataBuffered &  dataBuffer, ISMRMRD::AcquisitionHeader & acqhdr, ISMRMRD::Encoding encoding, IsmrmrdAcquisitionBucketStats & stats);
+      void fillSamplingDescription(SamplingDescription & sampling, ISMRMRD::Encoding & encoding, IsmrmrdAcquisitionBucketStats & stats);
+      void stuff(std::vector<IsmrmrdAcquisitionData>::iterator it, IsmrmrdDataBuffered & dataBuffer, ISMRMRD::Encoding encoding);
+
+    };
+
+  
+}
+#endif //BUCKETTOBUFFER_H
diff --git a/gadgets/mri_core/CMakeLists.txt b/gadgets/mri_core/CMakeLists.txt
index 09336be..7082c13 100644
--- a/gadgets/mri_core/CMakeLists.txt
+++ b/gadgets/mri_core/CMakeLists.txt
@@ -1,10 +1,8 @@
 IF (WIN32)
-  ADD_DEFINITIONS(-D__BUILD_GADGETRON_MRICORE__)
+    ADD_DEFINITIONS(-D__BUILD_GADGETRON_MRICORE__)
 ENDIF (WIN32)
 
 find_package(Ismrmrd REQUIRED)
-find_package(XSD REQUIRED)
-find_package(XercesC REQUIRED)
 
 if (MKL_FOUND)
     # This is a fix for the bug in SVD when MKL is multi-threaded
@@ -19,97 +17,149 @@ if (MKL_FOUND)
 endif (MKL_FOUND)
 
 include_directories(
-  ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/arma_math
-  ${CMAKE_SOURCE_DIR}/toolboxes/gadgettools
-  ${HDF5_INCLUDE_DIR}
-  ${HDF5_INCLUDE_DIR}/cpp
-  ${ARMADILLO_INCLUDE_DIRS}
-  ${MKL_INCLUDE_DIR}
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/math
+    ${CMAKE_SOURCE_DIR}/toolboxes/core
+    ${CMAKE_SOURCE_DIR}/toolboxes/mri_core
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/image
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/algorithm
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/hostutils
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/math
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/gpu
+    ${CMAKE_SOURCE_DIR}/toolboxes/mri/pmri/gpu
+    ${CMAKE_SOURCE_DIR}/toolboxes/fft/cpu
+    ${CMAKE_SOURCE_DIR}/toolboxes/fft/gpu
+    ${CMAKE_SOURCE_DIR}/toolboxes/gadgettools
+    ${CMAKE_SOURCE_DIR}/toolboxes/gtplus
+    ${CMAKE_SOURCE_DIR}/toolboxes/gtplus/util
+    ${CMAKE_SOURCE_DIR}/toolboxes/gtplus/workflow
+    ${CMAKE_SOURCE_DIR}/toolboxes/gtplus/algorithm
+    ${CMAKE_SOURCE_DIR}/toolboxes/gtplus/solver
+    ${CMAKE_SOURCE_DIR}/toolboxes/gtplus/application
+    ${HDF5_INCLUDE_DIR}
+    ${HDF5_INCLUDE_DIR}/cpp
+    ${ARMADILLO_INCLUDE_DIRS}
+    ${MKL_INCLUDE_DIR}
+    ${ISMRMRD_INCLUDE_DIR}
 )
 
 if (ARMADILLO_FOUND)
-  list(APPEND OPTIMIZED_GADGETS NoiseAdjustGadget.cpp)
-  list(APPEND OPTIMIZED_GADGETS PCACoilGadget.cpp)
-  list(APPEND OPTIMIZED_GADGET_HEADERS NoiseAdjustGadget.h)
-  list(APPEND OPTIMIZED_GADGET_HEADERS PCACoilGadget.h)
-  list(APPEND OPTIMIZED_GADGET_LIBS cpucore_math ${ARMADILLO_LIBRARIES})
+    list(APPEND OPTIMIZED_GADGETS NoiseAdjustGadget.cpp)
+    list(APPEND OPTIMIZED_GADGETS PCACoilGadget.cpp)
+    list(APPEND OPTIMIZED_GADGET_HEADERS NoiseAdjustGadget.h)
+    list(APPEND OPTIMIZED_GADGET_HEADERS PCACoilGadget.h)
+    list(APPEND OPTIMIZED_GADGET_LIBS gadgetron_toolbox_cpucore_math ${ARMADILLO_LIBRARIES})
 elseif (ARMADILLO_FOUND)
-  MESSAGE("Armadillo not found, i.e. not compiling Blas/Lapack optimized MRI Gadgets")
+    MESSAGE("Armadillo not found, i.e. not compiling Blas/Lapack optimized MRI Gadgets")
 endif (ARMADILLO_FOUND)
 
+set( gadgetron_mricore_header_files GadgetMRIHeaders.h 
+                                    AcquisitionPassthroughGadget.h 
+                                    AcquisitionFinishGadget.h 
+                                    AccumulatorGadget.h 
+                                    FFTGadget.h 
+                                    ImageFinishGadget.h 
+                                    ImageFinishAttribGadget.h 
+                                    CombineGadget.h
+                                    CropAndCombineGadget.h  
+                                    ImageWriterGadget.h 
+                                    MRIImageWriter.h 
+                                    MRIImageAttribWriter.h 
+                                    NoiseAdjustGadget_unoptimized.h 
+                                    ExtractGadget.h 
+                                    FloatToUShortGadget.h 
+                                    FloatToUShortAttribGadget.h 
+                                    RemoveROOversamplingGadget.h 
+                                    CoilReductionGadget.h 
+                                    AutoScaleGadget.h 
+                                    FlowPhaseSubtractionGadget.h 
+                                    GadgetIsmrmrdReadWrite.h 
+                                    PhysioInterpolationGadget.h 
+                                    IsmrmrdDumpGadget.h 
+                                    AsymmetricEchoAdjustROGadget.h 
+                                    MaxwellCorrectionGadget.h 
+                                    CplxDumpGadget.h 
+                                    DependencyQueryGadget.h 
+                                    DependencyQueryWriter.h 
+                                    ComplexToFloatAttribGadget.h
+                                    AcquisitionAccumulateTriggerGadget.h
+                                    BucketToBufferGadget.h )
+
+set( gadgetron_mricore_src_files AcquisitionPassthroughGadget.cpp 
+                                AcquisitionFinishGadget.cpp 
+                                AccumulatorGadget.cpp 
+                                FFTGadget.cpp 
+                                ImageFinishGadget.cpp 
+                                ImageFinishAttribGadget.cpp 
+                                CombineGadget.cpp 
+                                CropAndCombineGadget.cpp 
+                                ImageWriterGadget.cpp 
+                                MRIImageWriter.cpp 
+                                MRIImageAttribWriter.cpp 
+                                NoiseAdjustGadget_unoptimized.cpp 
+                                ExtractGadget.cpp 
+                                FloatToUShortGadget.cpp 
+                                FloatToUShortAttribGadget.cpp 
+                                RemoveROOversamplingGadget.cpp
+                                CoilReductionGadget.cpp
+                                AutoScaleGadget.cpp
+                                FlowPhaseSubtractionGadget.cpp
+                                GadgetIsmrmrdReadWrite.cpp
+                                PhysioInterpolationGadget.cpp
+                                IsmrmrdDumpGadget.cpp
+                                AsymmetricEchoAdjustROGadget.cpp
+                                MaxwellCorrectionGadget.cpp
+                                CplxDumpGadget.cpp 
+                                DependencyQueryGadget.cpp 
+                                DependencyQueryWriter.cpp 
+                                ComplexToFloatAttribGadget.cpp 
+                                AcquisitionAccumulateTriggerGadget.cpp
+                                BucketToBufferGadget.cpp )
+
+if (WIN32)
+    set( gadgetron_mricore_header_files ${gadgetron_mricore_header_files} WhiteNoiseInjectorGadget.h )
+    set( gadgetron_mricore_src_files ${gadgetron_mricore_src_files} WhiteNoiseInjectorGadget.cpp )
+endif (WIN32)
+
+set( gadgetron_mricore_config_files
+    default.xml
+    default_short.xml
+    default_optimized.xml
+    default_measurement_dependencies.xml
+)
+
 add_library(gadgetron_mricore SHARED 
-  GadgetMRIHeaders.h 
-  AcquisitionPassthroughGadget.h AcquisitionPassthroughGadget.cpp
-  AcquisitionFinishGadget.h AcquisitionFinishGadget.cpp 
-  AccumulatorGadget.h AccumulatorGadget.cpp
-  FFTGadget.h FFTGadget.cpp
-  ImageFinishGadget.h ImageFinishGadget.cpp
-  CropAndCombineGadget.h CropAndCombineGadget.cpp
-  ImageWriterGadget.h ImageWriterGadget.cpp
-  MRIImageWriter.h MRIImageWriter.cpp
-  NoiseAdjustGadget_unoptimized.h NoiseAdjustGadget_unoptimized.cpp
-  ExtractGadget.h ExtractGadget.cpp
-  FloatToUShortGadget.h FloatToUShortGadget.cpp
-  RemoveROOversamplingGadget.h RemoveROOversamplingGadget.cpp
-  CoilReductionGadget.h CoilReductionGadget.cpp
-  AutoScaleGadget.h AutoScaleGadget.cpp
-  FlowPhaseSubtractionGadget.h FlowPhaseSubtractionGadget.cpp
-  GadgetIsmrmrdReadWrite.h GadgetIsmrmrdReadWrite.cpp
-  PhysioInterpolationGadget.h PhysioInterpolationGadget.cpp
-  IsmrmrdDumpGadget.h IsmrmrdDumpGadget.cpp
-  PartialFourierAdjustROGadget.h PartialFourierAdjustROGadget.cpp
-  MaxwellCorrectionGadget.h MaxwellCorrectionGadget.cpp
-  CplxDumpGadget.h CplxDumpGadget.cpp
-  ${OPTIMIZED_GADGETS}
-  ${OPTIMIZED_GADGET_HEADERS}
-  ${ISMRMRD_XSD_SOURCE}
-  )
+    gadgetron_mricore_export.h 
+    ${gadgetron_mricore_header_files} 
+    ${gadgetron_mricore_src_files}
+    ${gadgetron_mricore_config_files}
+    ${OPTIMIZED_GADGETS}
+    ${OPTIMIZED_GADGET_HEADERS}
+)
+
+set_target_properties(gadgetron_mricore PROPERTIES VERSION ${GADGETRON_VERSION_STRING} SOVERSION ${GADGETRON_SOVERSION})                                                                                                                                                                                                      
 
 target_link_libraries(gadgetron_mricore 
-  cpucore
-  ${ISMRMRD_LIBRARIES} 
-  ${FFTW3_LIBRARIES} 
-  optimized ${ACE_LIBRARIES} debug ${ACE_DEBUG_LIBRARY} 
-  ${BOOST_LIBRARIES}
-  ${XERCESC_LIBRARIES} 
-  ${OPTIMIZED_GADGET_LIBS}
-  ${MKL_LIBRARIES}
-  ${EXTRA_MKL_LIBRARIES}
-  )
+    gadgetron_gadgetbase
+    gadgetron_toolbox_cpucore
+    gadgetron_toolbox_cpufft
+    ${ISMRMRD_LIBRARIES} 
+    ${FFTW3_LIBRARIES} 
+    optimized ${ACE_LIBRARIES} debug ${ACE_DEBUG_LIBRARY} 
+    ${BOOST_LIBRARIES}
+    ${OPTIMIZED_GADGET_LIBS}
+)
 
 install(FILES 
-  AccumulatorGadget.h
-  AcquisitionFinishGadget.h
-  AcquisitionPassthroughGadget.h
-  CropAndCombineGadget.h
-  ExtractGadget.h
-  FloatToUShortGadget.h
-  FFTGadget.h
-  GadgetMRIHeaders.h
-  ImageFinishGadget.h
-  ImageWriterGadget.h
-  MRIImageWriter.h
-  MaxwellCorrectionGadget.h
-  NoiseAdjustGadget.h
-  RemoveROOversamplingGadget.h
-  CoilReductionGadget.h
-  AutoScaleGadget.h
-  FlowPhaseSubtractionGadget.h
-  GadgetIsmrmrdReadWrite.h
-  PhysioInterpolationGadget.h
-  IsmrmrdDumpGadget.h
-  CplxDumpGadget.h
-  MaxwellCorrectionGadget.h
-  PartialFourierAdjustROGadget.h
-  gadgetron_mricore_export.h
-  ${OPTIMIZED_GADGET_HEADERS}
-  DESTINATION include)
+    gadgetron_mricore_export.h
+    ${gadgetron_mricore_header_files}
+    ${OPTIMIZED_GADGET_HEADERS}
+    DESTINATION include COMPONENT main)
 
 if (ARMADILLO_FOUND)
-  install(FILES default.xml default_short.xml default_optimized.xml DESTINATION config)
+    install(FILES ${gadgetron_mricore_config_files} DESTINATION ${GADGETRON_INSTALL_CONFIG_PATH} COMPONENT main)
 endif (ARMADILLO_FOUND)
 
-install(FILES ismrmrd_dump.xml DESTINATION config)
+install(FILES ismrmrd_dump.xml DESTINATION ${GADGETRON_INSTALL_CONFIG_PATH} COMPONENT main)
 
-install(TARGETS gadgetron_mricore DESTINATION lib)
-install(FILES ${ISMRMRD_SCHEMA_DIR}/ismrmrd.xsd DESTINATION schema)
+install(TARGETS gadgetron_mricore DESTINATION lib COMPONENT main)
diff --git a/gadgets/mri_core/CoilReductionGadget.cpp b/gadgets/mri_core/CoilReductionGadget.cpp
index 2b84313..8559eca 100644
--- a/gadgets/mri_core/CoilReductionGadget.cpp
+++ b/gadgets/mri_core/CoilReductionGadget.cpp
@@ -6,10 +6,9 @@
 */
 
 #include "CoilReductionGadget.h"
-#include "GadgetIsmrmrdReadWrite.h"
-
 #include <boost/algorithm/string.hpp>
 #include <boost/algorithm/string/split.hpp>
+#include "ismrmrd/xml.h"
 
 namespace Gadgetron{
 
@@ -21,52 +20,53 @@ namespace Gadgetron{
 
     int CoilReductionGadget::process_config(ACE_Message_Block *mb)
     {
-        boost::shared_ptr<ISMRMRD::ismrmrdHeader> cfg = parseIsmrmrdXMLHeader(std::string(mb->rd_ptr()));
-
-        coils_in_ = cfg->acquisitionSystemInformation().get().receiverChannels().present() ? cfg->acquisitionSystemInformation().get().receiverChannels().get() : 128;
+      ISMRMRD::IsmrmrdHeader h;
+      ISMRMRD::deserialize(mb->rd_ptr(),h);
+      
+      coils_in_ = h.acquisitionSystemInformation->receiverChannels ? *h.acquisitionSystemInformation->receiverChannels : 128;
 
-        boost::shared_ptr<std::string> coil_mask = this->get_string_value("coil_mask");
+      boost::shared_ptr<std::string> coil_mask = this->get_string_value("coil_mask");
 
-        if (coil_mask->compare(std::string("")) == 0) {
-            int coils_out = this->get_int_value("coils_out");
-            if (coils_out <= 0) {
-                GADGET_DEBUG2("Invalid number of output coils %d\n", coils_out);
-                return GADGET_FAIL;
-            }
-            coil_mask_ = std::vector<unsigned short>(coils_out,1);
-        } else {
-            std::vector<std::string> chm;
-            boost::split(chm, *coil_mask, boost::is_any_of(" "));
-            for (size_t i = 0; i < chm.size(); i++) {
-                std::string ch = boost::algorithm::trim_copy(chm[i]);
-                if (ch.size() > 0) {
-                    size_t mv = static_cast<size_t>(ACE_OS::atoi(ch.c_str()));
-                    //GADGET_DEBUG2("Coil mask value: %d\n", mv);
-                    if (mv > 0) {
-                        coil_mask_.push_back(1);
-                    } else {
-                        coil_mask_.push_back(0);
-                    }
-                }
-            }
-        }
-
-        while (coil_mask_.size() < coils_in_) coil_mask_.push_back(0);
-        while (coil_mask_.size() > coils_in_) coil_mask_.pop_back();
-
-        if (coil_mask_.size() != coils_in_) {
-            GADGET_DEBUG1("Error configuring coils for coil reduction\n");
-            return GADGET_FAIL;
-        }
-
-        coils_out_ = 0;
-        for (size_t i = 0; i < coil_mask_.size(); i++) {
-            if (coil_mask_[i]) coils_out_++;
-        }
-
-        GADGET_DEBUG2("Coil reduction from %d to %d\n", coils_in_, coils_out_);
-
-        return GADGET_OK;
+      if (coil_mask->compare(std::string("")) == 0) {
+	int coils_out = this->get_int_value("coils_out");
+	if (coils_out <= 0) {
+	  GADGET_DEBUG2("Invalid number of output coils %d\n", coils_out);
+	  return GADGET_FAIL;
+	}
+	coil_mask_ = std::vector<unsigned short>(coils_out,1);
+      } else {
+	std::vector<std::string> chm;
+	boost::split(chm, *coil_mask, boost::is_any_of(" "));
+	for (size_t i = 0; i < chm.size(); i++) {
+	  std::string ch = boost::algorithm::trim_copy(chm[i]);
+	  if (ch.size() > 0) {
+	    size_t mv = static_cast<size_t>(ACE_OS::atoi(ch.c_str()));
+	    //GADGET_DEBUG2("Coil mask value: %d\n", mv);
+	    if (mv > 0) {
+	      coil_mask_.push_back(1);
+	    } else {
+	      coil_mask_.push_back(0);
+	    }
+	  }
+	}
+      }
+      
+      while (coil_mask_.size() < coils_in_) coil_mask_.push_back(0);
+      while (coil_mask_.size() > coils_in_) coil_mask_.pop_back();
+      
+      if (coil_mask_.size() != coils_in_) {
+	GADGET_DEBUG1("Error configuring coils for coil reduction\n");
+	return GADGET_FAIL;
+      }
+      
+      coils_out_ = 0;
+      for (size_t i = 0; i < coil_mask_.size(); i++) {
+	if (coil_mask_[i]) coils_out_++;
+      }
+      
+      GADGET_DEBUG2("Coil reduction from %d to %d\n", coils_in_, coils_out_);
+      
+      return GADGET_OK;
     }
 
 
diff --git a/gadgets/mri_core/CoilReductionGadget.h b/gadgets/mri_core/CoilReductionGadget.h
index 3654ab8..ca0ff77 100644
--- a/gadgets/mri_core/CoilReductionGadget.h
+++ b/gadgets/mri_core/CoilReductionGadget.h
@@ -5,7 +5,7 @@
 #include "hoNDArray.h"
 #include "gadgetron_mricore_export.h"
 
-#include <ismrmrd.h>
+#include <ismrmrd/ismrmrd.h>
 #include <complex>
 
 namespace Gadgetron{
diff --git a/gadgets/mri_core/CombineGadget.cpp b/gadgets/mri_core/CombineGadget.cpp
new file mode 100644
index 0000000..ad40572
--- /dev/null
+++ b/gadgets/mri_core/CombineGadget.cpp
@@ -0,0 +1,69 @@
+#include "GadgetIsmrmrdReadWrite.h"
+#include "CombineGadget.h"
+
+namespace Gadgetron{
+
+  CombineGadget::CombineGadget() {}
+  CombineGadget::~CombineGadget() {}
+
+int CombineGadget::
+process( GadgetContainerMessage<ISMRMRD::ImageHeader>* m1,
+	 GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2)
+{
+
+  // Get the dimensions
+  size_t nx = m2->getObjectPtr()->get_size(0);
+  size_t ny = m2->getObjectPtr()->get_size(1);
+  size_t nz = m2->getObjectPtr()->get_size(2);
+  size_t nc = m2->getObjectPtr()->get_size(3);
+
+  // Create a new message with an hoNDArray for the combined image
+  GadgetContainerMessage< hoNDArray<std::complex<float> > >* m3 = 
+    new GadgetContainerMessage< hoNDArray<std::complex<float> > >();
+
+  std::vector<size_t> dimensions(3);
+  dimensions[0] = nx;
+  dimensions[1] = ny; 
+  dimensions[2] = nz;
+
+  try{m3->getObjectPtr()->create(&dimensions);}
+  catch (std::runtime_error &err){
+  	GADGET_DEBUG_EXCEPTION(err,"CombineGadget, failed to allocate new array\n");
+    return -1;
+  }
+
+  std::complex<float>* d1 = m2->getObjectPtr()->get_data_ptr();
+  std::complex<float>* d2 = m3->getObjectPtr()->get_data_ptr();
+
+  size_t img_block = nx*ny*nz;
+
+  for (size_t z = 0; z < nz; z++) {
+    for (size_t y = 0; y < ny; y++) {
+      for (size_t x = 0; x < nx; x++) {
+	float mag = 0;
+	float phase = 0;
+	size_t offset = z*ny*nx+y*nx+x;
+	for (size_t c = 0; c < nc; c++) {
+	  float mag_tmp = norm(d1[offset + c*img_block]);
+	  phase += mag_tmp*arg(d1[offset + c*img_block]);
+	  mag += mag_tmp;
+	}
+	d2[offset] = std::polar(std::sqrt(mag),phase);
+      }
+    }
+  }
+
+  // Modify header to match the size and change the type to real
+  m1->getObjectPtr()->channels = 1;
+
+  // Now add the new array to the outgoing message
+  m1->cont(m3);
+
+  // Release the old data
+  m2->release();
+
+  return this->next()->putq(m1);
+}
+
+GADGET_FACTORY_DECLARE(CombineGadget)
+}
diff --git a/gadgets/mri_core/CombineGadget.h b/gadgets/mri_core/CombineGadget.h
new file mode 100644
index 0000000..fc3e89b
--- /dev/null
+++ b/gadgets/mri_core/CombineGadget.h
@@ -0,0 +1,27 @@
+#ifndef COMBINEGADGET_H
+#define COMBINEGADGET_H
+
+#include "Gadget.h"
+#include "hoNDArray.h"
+#include "hoArmadillo.h"
+#include "gadgetron_mricore_export.h"
+
+#include <ismrmrd/ismrmrd.h>
+#include <complex>
+
+namespace Gadgetron{
+  
+  class  EXPORTGADGETSMRICORE CombineGadget : 
+  public Gadget2<ISMRMRD::ImageHeader, hoNDArray< std::complex<float> > >
+    {
+    public:
+      CombineGadget();
+      virtual ~CombineGadget();
+      
+    protected:
+      virtual int process( GadgetContainerMessage<ISMRMRD::ImageHeader>* m1,
+			   GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2);     
+    };
+}
+
+#endif //COMBINEGADGET_H
diff --git a/gadgets/mri_core/ComplexToFloatAttribGadget.cpp b/gadgets/mri_core/ComplexToFloatAttribGadget.cpp
new file mode 100644
index 0000000..cd5dbea
--- /dev/null
+++ b/gadgets/mri_core/ComplexToFloatAttribGadget.cpp
@@ -0,0 +1,88 @@
+/*
+*       ComplexToFloatAttribGadget.cpp
+*       Author: Hui Xue
+*/
+
+#include "GadgetIsmrmrdReadWrite.h"
+#include "ComplexToFloatAttribGadget.h"
+#include "hoNDArray_elemwise.h"
+
+namespace Gadgetron
+{
+    ComplexToFloatAttribGadget::ComplexToFloatAttribGadget()
+    {
+    }
+
+    ComplexToFloatAttribGadget::~ComplexToFloatAttribGadget()
+    {
+    }
+
+    int ComplexToFloatAttribGadget::process(GadgetContainerMessage<ISMRMRD::ImageHeader>* m1, GadgetContainerMessage< hoNDArray< ValueType > >* m2, GadgetContainerMessage<ISMRMRD::MetaContainer>* m3)
+    {
+        GadgetContainerMessage<hoNDArray< float > > *cm2 = new GadgetContainerMessage<hoNDArray< float > >();
+
+        boost::shared_ptr< std::vector<size_t> > dims = m2->getObjectPtr()->get_dimensions();
+
+        try
+        {
+            cm2->getObjectPtr()->create(dims);
+        }
+        catch (std::runtime_error &err)
+        {
+            GADGET_DEBUG_EXCEPTION(err,"Unable to create float storage in ComplexToFloatAttribGadget");
+            return GADGET_FAIL;
+        }
+
+        switch (m1->getObjectPtr()->image_type)
+        {
+            case ISMRMRD::ISMRMRD_IMTYPE_MAGNITUDE:
+            {
+                GADGET_CHECK_EXCEPTION_RETURN(Gadgetron::abs(*m2->getObjectPtr(), *cm2->getObjectPtr()), GADGET_FAIL);
+            }
+            break;
+
+            case ISMRMRD::ISMRMRD_IMTYPE_REAL:
+            {
+                GADGET_CHECK_EXCEPTION_RETURN(Gadgetron::complex_to_real(*m2->getObjectPtr(), *cm2->getObjectPtr()), GADGET_FAIL);
+            }
+            break;
+
+            case ISMRMRD::ISMRMRD_IMTYPE_IMAG:
+            {
+                GADGET_CHECK_EXCEPTION_RETURN(Gadgetron::complex_to_imag(*m2->getObjectPtr(), *cm2->getObjectPtr()), GADGET_FAIL);
+            }
+            break;
+
+            case ISMRMRD::ISMRMRD_IMTYPE_PHASE:
+            {
+                GADGET_CHECK_EXCEPTION_RETURN(Gadgetron::argument(*m2->getObjectPtr(), *cm2->getObjectPtr()), GADGET_FAIL);
+            }
+            break;
+
+            default:
+                GADGET_DEBUG2("Unknown image type %d, bailing out\n",m1->getObjectPtr()->image_type);
+                m1->release();
+                cm2->release();
+                return GADGET_FAIL;
+        }
+
+        m1->cont(cm2);
+        cm2->cont(m3);
+
+        m2->cont(NULL);
+        m2->release();
+
+        m1->getObjectPtr()->data_type = ISMRMRD::ISMRMRD_FLOAT;
+
+        if (this->next()->putq(m1) == -1)
+        {
+            m1->release();
+            GADGET_DEBUG1("Unable to put unsigned short magnitude image on next gadgets queue");
+            return GADGET_FAIL;
+        }
+
+        return GADGET_OK;
+    }
+
+    GADGET_FACTORY_DECLARE(ComplexToFloatAttribGadget)
+}
diff --git a/gadgets/mri_core/ComplexToFloatAttribGadget.h b/gadgets/mri_core/ComplexToFloatAttribGadget.h
new file mode 100644
index 0000000..01f6d71
--- /dev/null
+++ b/gadgets/mri_core/ComplexToFloatAttribGadget.h
@@ -0,0 +1,34 @@
+/** \file   ComplexToFloatAttribGadget.h
+    \brief  This Gadget converts complex float values to float format.
+    \author Hui Xue
+*/
+
+#ifndef ComplexToFloatAttribGadget_H_
+#define ComplexToFloatAttribGadget_H_
+
+#include "Gadget.h"
+#include "hoNDArray.h"
+#include "ismrmrd/meta.h"
+#include "gadgetron_mricore_export.h"
+
+#include <ismrmrd/ismrmrd.h>
+
+namespace Gadgetron
+{
+    class EXPORTGADGETSMRICORE ComplexToFloatAttribGadget:public Gadget3<ISMRMRD::ImageHeader, hoNDArray< std::complex<float> >, ISMRMRD::MetaContainer >
+    {
+    public:
+
+        GADGET_DECLARE(ComplexToFloatAttribGadget);
+
+        typedef std::complex<float> ValueType;
+
+        ComplexToFloatAttribGadget();
+        virtual ~ComplexToFloatAttribGadget();
+
+    protected:
+        virtual int process(GadgetContainerMessage<ISMRMRD::ImageHeader>* m1, GadgetContainerMessage< hoNDArray< ValueType > >* m2, GadgetContainerMessage<ISMRMRD::MetaContainer>* m3);
+    };
+}
+
+#endif // ComplexToFloatAttribGadget
diff --git a/gadgets/mri_core/CplxDumpGadget.cpp b/gadgets/mri_core/CplxDumpGadget.cpp
index 0ddcd7e..6fd5d65 100644
--- a/gadgets/mri_core/CplxDumpGadget.cpp
+++ b/gadgets/mri_core/CplxDumpGadget.cpp
@@ -112,7 +112,7 @@ namespace Gadgetron{
     // Noise should have been consumed by the noise adjust, but just in case...
     //
     
-    bool is_noise = ISMRMRD::FlagBit(ISMRMRD::ACQ_IS_NOISE_MEASUREMENT).isSet(m1->getObjectPtr()->flags);
+    bool is_noise = ISMRMRD::FlagBit(ISMRMRD::ISMRMRD_ACQ_IS_NOISE_MEASUREMENT).isSet(m1->getObjectPtr()->flags);
     if (is_noise) {
       m1->release();
       return GADGET_OK;
diff --git a/gadgets/mri_core/CplxDumpGadget.h b/gadgets/mri_core/CplxDumpGadget.h
index 1b37b46..aa24458 100644
--- a/gadgets/mri_core/CplxDumpGadget.h
+++ b/gadgets/mri_core/CplxDumpGadget.h
@@ -4,8 +4,7 @@
 #include "hoNDArray.h"
 #include "gadgetron_mricore_export.h"
 
-#include <ismrmrd.h>
-#include <ismrmrd_hdf5.h>
+#include <ismrmrd/ismrmrd.h>
 
 namespace Gadgetron{
 
diff --git a/gadgets/mri_core/CropAndCombineGadget.cpp b/gadgets/mri_core/CropAndCombineGadget.cpp
index 0b446fd..7460d6e 100644
--- a/gadgets/mri_core/CropAndCombineGadget.cpp
+++ b/gadgets/mri_core/CropAndCombineGadget.cpp
@@ -22,27 +22,27 @@ process( GadgetContainerMessage<ISMRMRD::ImageHeader>* m1,
     return -1;
   }
 
-  int dimx     = m3->getObjectPtr()->get_size(0);
-  int dimx_old = m2->getObjectPtr()->get_size(0);
+  size_t dimx     = m3->getObjectPtr()->get_size(0);
+  size_t dimx_old = m2->getObjectPtr()->get_size(0);
 
-  int dimy = m3->getObjectPtr()->get_size(1);
-  int dimz = m3->getObjectPtr()->get_size(2);
+  size_t dimy = m3->getObjectPtr()->get_size(1);
+  size_t dimz = m3->getObjectPtr()->get_size(2);
 
-  int channels = m2->getObjectPtr()->get_size(3);
+  size_t channels = m2->getObjectPtr()->get_size(3);
 
   std::complex<float>* d1 = m2->getObjectPtr()->get_data_ptr();
   std::complex<float>* d2 = m3->getObjectPtr()->get_data_ptr();
 
   size_t img_block_old = dimx_old*dimy*dimz;
 
-  for (int z = 0; z < dimz; z++) {
-    for (int y = 0; y < dimy; y++) {
-      for (int x = 0; x < dimx; x++) {
+  for (size_t z = 0; z < dimz; z++) {
+    for (size_t y = 0; y < dimy; y++) {
+      for (size_t x = 0; x < dimx; x++) {
 	float mag = 0;
 	float phase = 0;
 	size_t offset_1 = z*dimy*dimx_old+y*dimx_old+x+((dimx_old-dimx)>>1);
 	size_t offset_2 = z*dimy*dimx+y*dimx+x;
-	for (int c = 0; c < channels; c++) {
+	for (size_t c = 0; c < channels; c++) {
 	  float mag_tmp = norm(d1[offset_1 + c*img_block_old]);
 	  phase += mag_tmp*arg(d1[offset_1 + c*img_block_old]);
 	  mag += mag_tmp;
diff --git a/gadgets/mri_core/CropAndCombineGadget.h b/gadgets/mri_core/CropAndCombineGadget.h
index dda1ed6..14bcac6 100644
--- a/gadgets/mri_core/CropAndCombineGadget.h
+++ b/gadgets/mri_core/CropAndCombineGadget.h
@@ -5,7 +5,7 @@
 #include "hoNDArray.h"
 #include "gadgetron_mricore_export.h"
 
-#include <ismrmrd.h>
+#include <ismrmrd/ismrmrd.h>
 #include <complex>
 
 namespace Gadgetron{
diff --git a/gadgets/mri_core/DependencyQueryGadget.cpp b/gadgets/mri_core/DependencyQueryGadget.cpp
new file mode 100644
index 0000000..095d9b5
--- /dev/null
+++ b/gadgets/mri_core/DependencyQueryGadget.cpp
@@ -0,0 +1,200 @@
+#include "Gadgetron.h"
+#include "GadgetIsmrmrdReadWrite.h"
+#include "DependencyQueryGadget.h"
+#include "gtPlusUtil.h"
+
+#include <boost/version.hpp>
+#include <boost/filesystem.hpp>
+using namespace boost::filesystem;
+
+namespace Gadgetron
+{
+    DependencyQueryGadget::DependencyQueryGadget()
+    {
+        processed_in_close_ = false;
+
+        noise_dependency_prefix_ = "GadgetronNoisePreWhitener";
+
+        noise_dependency_attrib_name_ = "NoiseDependencies";
+
+        clean_storage_while_query_ = true;
+        time_limit_in_storage_ = 24.0;
+
+        // get current time
+        std::time(&curr_time_UTC_);
+        struct tm* currTm = std::gmtime(&curr_time_UTC_);
+        curr_time_UTC_ = std::mktime(currTm);
+    }
+
+    DependencyQueryGadget::~DependencyQueryGadget()
+    {
+    }
+
+    int DependencyQueryGadget::process_config(ACE_Message_Block* mb)
+    {
+        return GADGET_OK;
+    }
+
+    int DependencyQueryGadget::process(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1, GadgetContainerMessage< hoNDArray< ValueType > >* m2)
+    {
+        return GADGET_OK;
+    }
+
+    int DependencyQueryGadget::close(unsigned long flags)
+    {
+        typedef unsigned long long size_t_type;
+
+        if ( BaseClass::close(flags) != GADGET_OK ) return GADGET_FAIL;
+
+        if ( !processed_in_close_ )
+        {
+            processed_in_close_ = true;
+
+            boost::shared_ptr<std::string> str = this->get_string_value("workingDirectory");
+            if ( !str->empty() )
+            {
+                noise_dependency_folder_ = *str;
+            }
+            else
+            {
+	      //This is an error, we should not be writing dependencies without having a working directory
+	      return GADGET_FAIL;
+            }
+            GADGET_MSG("Folder to store noise dependencies is " << noise_dependency_folder_);
+
+            str = this->get_string_value("noise_dependency_prefix");
+
+            if ( !str->empty() )
+            {
+                noise_dependency_prefix_ = *str;
+            }
+
+            str = this->get_string_value("noise_dependency_attrib_name");
+
+            if ( !str->empty() )
+            {
+                noise_dependency_attrib_name_ = *str;
+            }
+
+            clean_storage_while_query_ = this->get_bool_value("clean_storage_while_query");
+            GADGET_MSG( "clean_storage_while_query_ is " << clean_storage_while_query_);
+
+            time_limit_in_storage_ = this->get_double_value("time_limit_in_storage");
+            if ( time_limit_in_storage_ < 0 )
+            {
+                time_limit_in_storage_ = 24.0;
+            }
+            GADGET_MSG( "time_limit_in_storage_ is " << time_limit_in_storage_);
+
+            // list the content in the noise dependency folder
+            path p (noise_dependency_folder_);
+
+            try
+            {
+                if ( exists(p) )
+                {
+                    if ( is_directory(p) )
+                    {
+                        typedef std::vector<path> vec;
+                        vec v;
+                        v.reserve(100);
+
+                        copy(directory_iterator(p), directory_iterator(), back_inserter(v));
+                        sort(v.begin(), v.end());
+
+                        GADGET_MSG( "A total of " << v.size() << " dependency measurements are found ... ");
+
+                        // if needed, clean the storage first
+                        std::string filename;
+
+                        if ( clean_storage_while_query_ )
+                        {
+                            Gadgetron::gtPlus::gtPlusUtil<ValueType> gt_util;
+
+                            for (vec::const_iterator it (v.begin()); it != v.end(); ++it)
+                            {
+                                filename = it->string();
+
+                                // find the file creation/modification time
+                                std::time_t lastWriteTime = last_write_time(*it);
+                                struct tm* lastWriteTm = std::gmtime(&lastWriteTime);
+                                lastWriteTime = std::mktime(lastWriteTm);
+
+                                if ( GT_ABS( (double)lastWriteTime - (double)curr_time_UTC_ ) > time_limit_in_storage_*3600.0 )
+                                {
+                                    remove(*it);
+                                }
+                            }
+
+                            // update the file list
+                            v.clear();
+                            copy(directory_iterator(p), directory_iterator(), back_inserter(v));
+                            sort(v.begin(), v.end());
+
+                            GADGET_MSG( "A total of " << v.size() << " dependency measurements are found after cleaning ... ");
+                        }
+
+                        // declear the attributes
+                        Gadgetron::GadgetContainerMessage<ISMRMRD::MetaContainer>* m1 = new Gadgetron::GadgetContainerMessage<ISMRMRD::MetaContainer>();
+
+                        size_t count = 0;
+                        size_t ind;
+
+                        for (vec::const_iterator it (v.begin()); it != v.end(); ++it)
+                        {
+#                       if BOOST_VERSION < 104600
+                            filename = it->filename();
+#                       else
+                            filename = it->filename().string();
+#                       endif
+                            ind = filename.find(noise_dependency_prefix_);
+
+                            if ( ind != std::string::npos )
+                            {
+                                m1->getObjectPtr()->append(noise_dependency_attrib_name_.c_str(), filename.c_str());
+                                count++;
+                            }
+                        }
+
+                        GADGET_MSG( "A total of " << count << " noise dependency measurements are found ... ");
+
+                        if ( count == 0 )
+                        {
+                            // put into a dummy item
+                            m1->getObjectPtr()->set(noise_dependency_attrib_name_.c_str(), "Dummy");
+                        }
+
+                        // send the found dependencies
+                        GadgetContainerMessage<GadgetMessageIdentifier>* mb = new GadgetContainerMessage<GadgetMessageIdentifier>();
+                        mb->getObjectPtr()->id = GADGET_MESSAGE_DEPENDENCY_QUERY;
+                        mb->cont(m1);
+
+                        int ret =  this->controller_->output_ready(mb);
+                        if ( (ret < 0) )
+                        {
+                            GADGET_DEBUG1("Failed to return massage to controller\n");
+                            return GADGET_FAIL;
+                        }
+                    }
+                    else
+                    {
+                        GADGET_ERROR_MSG( noise_dependency_folder_ << " is not a valid folder ... ");
+                    }
+                }
+                else
+                {
+                    GADGET_ERROR_MSG("Cannot find dependency folder : " << noise_dependency_folder_);
+                }
+            }
+            catch (const filesystem_error& ex)
+            {
+                GADGET_ERROR_MSG( ex.what() );
+            }
+        }
+
+        return GADGET_OK;
+    }
+
+    GADGET_FACTORY_DECLARE(DependencyQueryGadget)
+
+} // namespace Gadgetron
diff --git a/gadgets/mri_core/DependencyQueryGadget.h b/gadgets/mri_core/DependencyQueryGadget.h
new file mode 100644
index 0000000..d42674b
--- /dev/null
+++ b/gadgets/mri_core/DependencyQueryGadget.h
@@ -0,0 +1,53 @@
+#pragma once
+
+#include "Gadget.h"
+#include "hoNDArray.h"
+#include "gadgetron_mricore_export.h"
+
+#include <ismrmrd/ismrmrd.h>
+#include <complex>
+#include <ctime>
+#include "GadgetMRIHeaders.h"
+#include "ismrmrd/meta.h"
+#include "GadgetStreamController.h"
+
+namespace Gadgetron
+{
+    class EXPORTGADGETSMRICORE DependencyQueryGadget : public Gadget2<ISMRMRD::AcquisitionHeader,hoNDArray< std::complex<float> > >
+    {
+    public:
+        GADGET_DECLARE(DependencyQueryGadget);
+
+        typedef std::complex<float> ValueType;
+
+        typedef Gadget2<ISMRMRD::AcquisitionHeader,hoNDArray< ValueType > > BaseClass;
+
+        DependencyQueryGadget();
+        virtual ~DependencyQueryGadget();
+
+        virtual int close(unsigned long flags);
+
+    protected:
+
+        // if true, the old stored file will be deleted while querying
+        bool clean_storage_while_query_;
+
+        // in the unit of hours, how long a file is allowed to be in the storage
+        double time_limit_in_storage_;
+
+        // current time, year/month/day/hour/min/second
+        std::time_t curr_time_UTC_;
+
+        bool processed_in_close_;
+
+        std::string noise_dependency_folder_;
+        std::string noise_dependency_prefix_;
+
+        std::string noise_dependency_attrib_name_;
+
+        virtual int process_config(ACE_Message_Block* mb);
+
+        virtual int process(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1,
+            GadgetContainerMessage< hoNDArray< ValueType > >* m2);
+    };
+}
diff --git a/gadgets/mri_core/DependencyQueryWriter.cpp b/gadgets/mri_core/DependencyQueryWriter.cpp
new file mode 100644
index 0000000..84b8af7
--- /dev/null
+++ b/gadgets/mri_core/DependencyQueryWriter.cpp
@@ -0,0 +1,76 @@
+#include "GadgetIsmrmrdReadWrite.h"
+#include "DependencyQueryWriter.h"
+#include "GadgetContainerMessage.h"
+
+namespace Gadgetron{
+
+int DependencyQueryWriter::write(ACE_SOCK_Stream* sock, ACE_Message_Block* mb)
+{
+    typedef unsigned long long size_t_type;
+
+    GadgetContainerMessage<ISMRMRD::MetaContainer>* attribmb = AsContainerMessage<ISMRMRD::MetaContainer>(mb);
+    if (!attribmb)
+    {
+        ACE_DEBUG( (LM_ERROR, ACE_TEXT("(%P,%l), DependencyQueryWriter::write, invalid meta attribute message objects\n")) );
+        return -1;
+    }
+
+    ssize_t send_cnt = 0;
+    GadgetMessageIdentifier id;
+    id.id = GADGET_MESSAGE_DEPENDENCY_QUERY;
+
+    if ((send_cnt = sock->send_n (&id, sizeof(GadgetMessageIdentifier))) <= 0)
+    {
+        ACE_DEBUG ((LM_ERROR,
+                ACE_TEXT ("(%P|%t) Unable to send image message identifier\n")));
+
+        return -1;
+    }
+
+    char* buf = NULL;
+    size_t_type len(0);
+
+    try
+    {
+        std::stringstream str;
+        ISMRMRD::serialize( *attribmb->getObjectPtr(), str);
+        std::string attribContent = str.str();
+        len = attribContent.length()+1;
+
+        buf = new char[len];
+        GADGET_CHECK_THROW(buf != NULL);
+
+        memset(buf, '\0', sizeof(char)*len);
+        memcpy(buf, attribContent.c_str(), len-1);
+    }
+    catch(...)
+    {
+        ACE_DEBUG ((LM_ERROR, ACE_TEXT ("(%P|%t) Unable to serialize image meta attributes \n")));
+
+        return -1;
+    }
+
+    if ( (send_cnt = sock->send_n (&len, sizeof(size_t_type))) <= 0 )
+    {
+        ACE_DEBUG ((LM_ERROR, ACE_TEXT ("(%P|%t) Unable to send image meta attributes length \n")));
+        if ( buf != NULL ) delete [] buf;
+        return -1;
+    }
+
+    if ( (send_cnt = sock->send_n (buf, len)) <= 0 )
+    {
+        ACE_DEBUG ((LM_ERROR,
+                ACE_TEXT ("(%P|%t) Unable to send image meta attributes\n")));
+
+        if ( buf != NULL ) delete [] buf;
+
+        return -1;
+    }
+
+    if ( buf != NULL ) delete [] buf;
+
+    return 0;
+}
+
+GADGETRON_WRITER_FACTORY_DECLARE(DependencyQueryWriter)
+}
diff --git a/gadgets/mri_core/DependencyQueryWriter.h b/gadgets/mri_core/DependencyQueryWriter.h
new file mode 100644
index 0000000..6e31abe
--- /dev/null
+++ b/gadgets/mri_core/DependencyQueryWriter.h
@@ -0,0 +1,28 @@
+/** \file   DependencyQueryWriter.h
+    \brief  MRI image writer with meta attributes.
+    \author Hui Xue
+*/
+
+#ifndef DependencyQueryWriter_H
+#define DependencyQueryWriter_H
+
+#include "GadgetMessageInterface.h"
+#include "GadgetMRIHeaders.h"
+#include "ismrmrd/meta.h"
+#include "gadgetron_mricore_export.h"
+
+#include <ismrmrd/ismrmrd.h>
+#include <complex>
+
+namespace Gadgetron
+{
+
+    class EXPORTGADGETSMRICORE DependencyQueryWriter : public GadgetMessageWriter
+    {
+    public:
+        GADGETRON_WRITER_DECLARE(DependencyQueryWriter)
+        virtual int write(ACE_SOCK_Stream* sock, ACE_Message_Block* mb);
+    };
+
+}
+#endif
diff --git a/gadgets/mri_core/ExtractGadget.cpp b/gadgets/mri_core/ExtractGadget.cpp
index 0418466..5d04ff0 100644
--- a/gadgets/mri_core/ExtractGadget.cpp
+++ b/gadgets/mri_core/ExtractGadget.cpp
@@ -76,22 +76,22 @@ int ExtractGadget::process(GadgetContainerMessage<ISMRMRD::ImageHeader> *m1, Gad
 			}
 
 			cm1->cont(cm2);
-			cm1->getObjectPtr()->image_data_type = ISMRMRD::DATA_FLOAT;//GADGET_IMAGE_REAL_FLOAT;
+			cm1->getObjectPtr()->data_type = ISMRMRD::ISMRMRD_FLOAT;//GADGET_IMAGE_REAL_FLOAT;
 
 			switch (m) {
 			case GADGET_EXTRACT_MAGNITUDE:
-				cm1->getObjectPtr()->image_type = ISMRMRD::TYPE_MAGNITUDE;//GADGET_IMAGE_MAGNITUDE;
+				cm1->getObjectPtr()->image_type = ISMRMRD::ISMRMRD_IMTYPE_MAGNITUDE;//GADGET_IMAGE_MAGNITUDE;
 				break;
 			case GADGET_EXTRACT_REAL:
-				cm1->getObjectPtr()->image_type = ISMRMRD::TYPE_REAL;
+				cm1->getObjectPtr()->image_type = ISMRMRD::ISMRMRD_IMTYPE_REAL;
 				cm1->getObjectPtr()->image_series_index += 1000; //Ensure that this will go in a different series
 				break;
 			case GADGET_EXTRACT_IMAG:
-				cm1->getObjectPtr()->image_type = ISMRMRD::TYPE_IMAG;
+				cm1->getObjectPtr()->image_type = ISMRMRD::ISMRMRD_IMTYPE_IMAG;
 				cm1->getObjectPtr()->image_series_index += 2000; //Ensure that this will go in a different series
 				break;
 			case GADGET_EXTRACT_PHASE:
-				cm1->getObjectPtr()->image_type = ISMRMRD::TYPE_PHASE;
+				cm1->getObjectPtr()->image_type = ISMRMRD::ISMRMRD_IMTYPE_PHASE;
 				cm1->getObjectPtr()->image_series_index += 3000; //Ensure that this will go in a different series
 				break;
 			default:
diff --git a/gadgets/mri_core/ExtractGadget.h b/gadgets/mri_core/ExtractGadget.h
index 35e046d..c521369 100644
--- a/gadgets/mri_core/ExtractGadget.h
+++ b/gadgets/mri_core/ExtractGadget.h
@@ -6,7 +6,7 @@
 #include "GadgetMRIHeaders.h"
 #include "gadgetron_mricore_export.h"
 
-#include <ismrmrd.h>
+#include <ismrmrd/ismrmrd.h>
 #include <complex>
 
 #define MAX_UNSIGNED_SHORT_IMAGE_VALUE
diff --git a/gadgets/mri_core/FFTGadget.cpp b/gadgets/mri_core/FFTGadget.cpp
index cc06e6b..f019465 100644
--- a/gadgets/mri_core/FFTGadget.cpp
+++ b/gadgets/mri_core/FFTGadget.cpp
@@ -1,22 +1,105 @@
-#include "GadgetIsmrmrdReadWrite.h"
 #include "FFTGadget.h"
 #include "hoNDFFT.h"
 
 namespace Gadgetron{
 
-  int FFTGadget::process( GadgetContainerMessage< ISMRMRD::ImageHeader>* m1,
-			  GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2)
-  {
-    hoNDFFT<float>::instance()->ifft(m2->getObjectPtr(),0);
-    hoNDFFT<float>::instance()->ifft(m2->getObjectPtr(),1);
-    hoNDFFT<float>::instance()->ifft(m2->getObjectPtr(),2);
+FFTGadget::FFTGadget()
+  : image_counter_(0)
+{
+
+}
+
+
+int FFTGadget::process( GadgetContainerMessage<IsmrmrdReconData>* m1)
+{
     
-    if (this->next()->putq(m1) < 0) {
-      return GADGET_FAIL;
+    //Iterate over all the recon bits
+    for(std::vector<IsmrmrdReconBit>::iterator it = m1->getObjectPtr()->rbit_.begin();
+        it != m1->getObjectPtr()->rbit_.end(); ++it)
+    {
+        //Grab a reference to the buffer containing the imaging data
+        IsmrmrdDataBuffered & dbuff = it->data_;
+
+        //7D, fixed order [RO, E1, E2, CHA, LOC, N, S]
+        uint16_t RO = dbuff.data_.get_size(0);
+        uint16_t E1 = dbuff.data_.get_size(1);
+        uint16_t E2 = dbuff.data_.get_size(2);
+        uint16_t CHA = dbuff.data_.get_size(3);
+        uint16_t LOC = dbuff.data_.get_size(4);
+        uint16_t N = dbuff.data_.get_size(5);
+        uint16_t S = dbuff.data_.get_size(6);
+
+        //Each image will be [RO,E1,E2,CHA] big
+        std::vector<size_t> img_dims(4);
+        img_dims[0] = RO;
+        img_dims[1] = E1;
+        img_dims[2] = E2;
+        img_dims[3] = CHA;
+
+        //Loop over S and N and LOC
+        for (uint16_t s=0; s < S; s++) {                
+            for (uint16_t n=0; n < N; n++) {
+                for (uint16_t loc=0; loc < LOC; loc++) {
+                    
+                    //Create a new image
+                    GadgetContainerMessage<ISMRMRD::ImageHeader>* cm1 = 
+                            new GadgetContainerMessage<ISMRMRD::ImageHeader>();
+                    GadgetContainerMessage< hoNDArray< std::complex<float> > >* cm2 = 
+                            new GadgetContainerMessage<hoNDArray< std::complex<float> > >();
+                    cm1->cont(cm2);
+                    //TODO do we want an image attribute string?  
+                    try{cm2->getObjectPtr()->create(&img_dims);}
+                    catch (std::runtime_error &err){
+                        GADGET_DEBUG_EXCEPTION(err,"Unable to allocate new image array\n");
+                        cm1->release();
+                        return GADGET_FAIL;
+                    }
+
+                    //Set some information into the image header
+                    //Use the middle header for some info
+                    //[E1, E2, LOC, N, S]
+                    ISMRMRD::AcquisitionHeader & acqhdr = dbuff.headers_(dbuff.sampling_.sampling_limits_[1].center_,
+                                                                         dbuff.sampling_.sampling_limits_[2].center_,
+                                                                         loc, n, s);
+                    
+                    cm1->getObjectPtr()->matrix_size[0]     = RO;
+                    cm1->getObjectPtr()->matrix_size[1]     = E1;
+                    cm1->getObjectPtr()->matrix_size[2]     = E2;
+                    cm1->getObjectPtr()->field_of_view[0]   = dbuff.sampling_.recon_FOV_[0];
+                    cm1->getObjectPtr()->field_of_view[1]   = dbuff.sampling_.recon_FOV_[1];
+                    cm1->getObjectPtr()->field_of_view[2]   = dbuff.sampling_.recon_FOV_[2];
+                    cm1->getObjectPtr()->channels           = CHA;
+                    cm1->getObjectPtr()->slice   = acqhdr.idx.slice;
+
+                    memcpy(cm1->getObjectPtr()->position, acqhdr.position, sizeof(float)*3);
+                    memcpy(cm1->getObjectPtr()->read_dir, acqhdr.read_dir, sizeof(float)*3);
+                    memcpy(cm1->getObjectPtr()->phase_dir, acqhdr.phase_dir, sizeof(float)*3);
+                    memcpy(cm1->getObjectPtr()->slice_dir, acqhdr.slice_dir, sizeof(float)*3);
+                    memcpy(cm1->getObjectPtr()->patient_table_position, acqhdr.patient_table_position, sizeof(float)*3);
+                    cm1->getObjectPtr()->data_type = ISMRMRD::ISMRMRD_CXFLOAT;
+                    cm1->getObjectPtr()->image_index = ++image_counter_;
+
+                    //Copy the 4D data block [RO,E1,E2,CHA] for this loc, n, and s into the output image
+                    memcpy(cm2->getObjectPtr()->get_data_ptr(), &dbuff.data_(0,0,0,0,loc,n,s), RO*E1*E2*CHA*sizeof(std::complex<float>));
+
+                    //Do the FFTs in place
+                    hoNDFFT<float>::instance()->ifft(cm2->getObjectPtr(),0);
+                    hoNDFFT<float>::instance()->ifft(cm2->getObjectPtr(),1);
+                    if (E2>1) {
+                        hoNDFFT<float>::instance()->ifft(cm2->getObjectPtr(),2);
+                    }
+
+                    //Pass the image down the chain
+                    if (this->next()->putq(cm1) < 0) {
+                        return GADGET_FAIL;
+                    }
+                }
+            }
+        }
     }
-    
-    return GADGET_OK;    
-  }
-  
-  GADGET_FACTORY_DECLARE(FFTGadget)
+    return GADGET_OK;  
+
+}
+
+GADGET_FACTORY_DECLARE(FFTGadget)
 }
diff --git a/gadgets/mri_core/FFTGadget.h b/gadgets/mri_core/FFTGadget.h
index 6031ba4..5e703c6 100644
--- a/gadgets/mri_core/FFTGadget.h
+++ b/gadgets/mri_core/FFTGadget.h
@@ -5,20 +5,22 @@
 #include "hoNDArray.h"
 #include "gadgetron_mricore_export.h"
 
-#include <ismrmrd.h>
-#include <complex>
+#include <ismrmrd/ismrmrd.h>
+#include <ismrmrd/xml.h>
+#include "mri_core_data.h"
 
 namespace Gadgetron{
 
   class EXPORTGADGETSMRICORE FFTGadget : 
-  public Gadget2<ISMRMRD::ImageHeader, hoNDArray< std::complex<float> > >
+  public Gadget1<IsmrmrdReconData>
     {
     public:
       GADGET_DECLARE(FFTGadget)
+      FFTGadget();
 	
-	protected:
-      virtual int process( GadgetContainerMessage< ISMRMRD::ImageHeader>* m1,
-			   GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2);      
+    protected:
+      virtual int process(GadgetContainerMessage<IsmrmrdReconData>* m1);
+      long long image_counter_;      
     };
 }
 #endif //FFTGADGET_H
diff --git a/gadgets/mri_core/FloatToUShortAttribGadget.cpp b/gadgets/mri_core/FloatToUShortAttribGadget.cpp
new file mode 100644
index 0000000..7cb74be
--- /dev/null
+++ b/gadgets/mri_core/FloatToUShortAttribGadget.cpp
@@ -0,0 +1,130 @@
+/*
+*       FloatToUShortAttribGadget.cpp
+*
+*       Created on: March 10, 2014
+*       Author: Hui Xue
+*/
+
+#include "GadgetIsmrmrdReadWrite.h"
+#include "FloatToUShortAttribGadget.h"
+#include "GtPlusDefinition.h"
+
+namespace Gadgetron
+{
+    FloatToUShortAttribGadget::FloatToUShortAttribGadget() : max_intensity_value_(4095), intensity_offset_value_(2048)
+    {
+    }
+
+    FloatToUShortAttribGadget::~FloatToUShortAttribGadget()
+    {
+    }
+
+    int FloatToUShortAttribGadget::process_config(ACE_Message_Block* mb)
+    {
+        // gadget parameters
+        max_intensity_value_ = this->get_int_value("max_intensity");
+        if ( max_intensity_value_ == 0 ) max_intensity_value_ = 4095;
+
+        intensity_offset_value_ = this->get_int_value("intensity_offset");
+
+        return GADGET_OK;
+    }
+
+    int FloatToUShortAttribGadget::process(GadgetContainerMessage<ISMRMRD::ImageHeader>* m1, GadgetContainerMessage< hoNDArray< float > >* m2, GadgetContainerMessage<ISMRMRD::MetaContainer>* m3)
+    {
+        GadgetContainerMessage<hoNDArray< ACE_UINT16 > > *cm2 =
+            new GadgetContainerMessage<hoNDArray< ACE_UINT16 > >();
+
+        boost::shared_ptr< std::vector<size_t> > dims = m2->getObjectPtr()->get_dimensions();
+
+        try {cm2->getObjectPtr()->create(dims);}
+        catch (std::runtime_error &err){
+            GADGET_DEBUG_EXCEPTION(err,"Unable to create unsigned short storage in Extract Magnitude Gadget");
+            return GADGET_FAIL;
+        }
+
+        float* src = m2->getObjectPtr()->get_data_ptr();
+        ACE_UINT16* dst = cm2->getObjectPtr()->get_data_ptr();
+
+        long long i;
+        long long numOfPixels = (long long)cm2->getObjectPtr()->get_number_of_elements();
+
+        switch (m1->getObjectPtr()->image_type)
+        {
+            case ISMRMRD::ISMRMRD_IMTYPE_MAGNITUDE:
+            {
+                #pragma omp parallel for default(none) private(i) shared(numOfPixels, src, dst)
+                for (i=0; i<numOfPixels; i++)
+                {
+                    float pix_val = src[i];
+                    pix_val = std::abs(pix_val);
+                    if (pix_val > max_intensity_value_) pix_val = max_intensity_value_;
+                    dst[i] = static_cast<unsigned short>(pix_val+0.5);
+                }
+            }
+            break;
+
+            case ISMRMRD::ISMRMRD_IMTYPE_REAL:
+            case ISMRMRD::ISMRMRD_IMTYPE_IMAG:
+            {
+                #pragma omp parallel for default(none) private(i) shared(numOfPixels, src, dst)
+                for (i=0; i<numOfPixels; i++)
+                {
+                    float pix_val = src[i];
+                    pix_val = pix_val + intensity_offset_value_;
+                    if (pix_val < 0) pix_val = 0;
+                    if (pix_val > max_intensity_value_) pix_val = max_intensity_value_;
+                    dst[i] = static_cast<unsigned short>(pix_val+0.5);
+                }
+
+                if ( m3->getObjectPtr()->length(GTPLUS_IMAGE_WINDOWCENTER) > 0 )
+                {
+                    long windowCenter;
+                    windowCenter = m3->getObjectPtr()->as_long(GTPLUS_IMAGE_WINDOWCENTER, 0);
+                    m3->getObjectPtr()->set(GTPLUS_IMAGE_WINDOWCENTER, windowCenter+(long)intensity_offset_value_);
+                }
+            }
+            break;
+
+            case ISMRMRD::ISMRMRD_IMTYPE_PHASE:
+            {
+                #pragma omp parallel for default(none) private(i) shared(numOfPixels, src, dst)
+                for (i=0; i<numOfPixels; i++)
+                {
+                    float pix_val = src[i];
+                    pix_val *= (float)(intensity_offset_value_/3.14159265);
+                    pix_val += intensity_offset_value_;
+                    if (pix_val < 0) pix_val = 0;
+                    if (pix_val > max_intensity_value_) pix_val = max_intensity_value_;
+                    dst[i] = static_cast<unsigned short>(pix_val);
+                }
+            }
+            break;
+
+            default:
+                GADGET_DEBUG2("Unknown image type %d, bailing out\n",m1->getObjectPtr()->image_type);
+                m1->release();
+                cm2->release();
+                return GADGET_FAIL;
+        }
+
+        m1->cont(cm2);
+        cm2->cont(m3);
+
+        m2->cont(NULL);
+        m2->release();
+
+        m1->getObjectPtr()->data_type = ISMRMRD::ISMRMRD_USHORT;
+
+        if (this->next()->putq(m1) == -1)
+        {
+            m1->release();
+            GADGET_DEBUG1("Unable to put unsigned short magnitude image on next gadgets queue");
+            return GADGET_FAIL;
+        }
+
+        return GADGET_OK;
+    }
+
+    GADGET_FACTORY_DECLARE(FloatToUShortAttribGadget)
+}
diff --git a/gadgets/mri_core/FloatToUShortAttribGadget.h b/gadgets/mri_core/FloatToUShortAttribGadget.h
new file mode 100644
index 0000000..c453abd
--- /dev/null
+++ b/gadgets/mri_core/FloatToUShortAttribGadget.h
@@ -0,0 +1,43 @@
+#ifndef FloatToUShortAttribGadget_H_
+#define FloatToUShortAttribGadget_H_
+
+#include "Gadget.h"
+#include "hoNDArray.h"
+#include "ismrmrd/meta.h"
+#include "gadgetron_mricore_export.h"
+
+#include <ismrmrd/ismrmrd.h>
+
+namespace Gadgetron
+{
+
+    /**
+    * This Gadget converts float values to unsigned unsigned short int format.
+    *
+    * How the conversion is done will depend on the image type:
+    * Magnitude images: Values above 4095 will be clamped.
+    * Real or Imag: Values below -2048 and above 2047 will be clamped. Zero will be 2048.
+    * Phase: -pi will be 0, +pi will be 4095.
+    *
+    */
+
+    class EXPORTGADGETSMRICORE FloatToUShortAttribGadget:public Gadget3<ISMRMRD::ImageHeader, hoNDArray< float >, ISMRMRD::MetaContainer >
+    {
+    public:
+
+        GADGET_DECLARE(FloatToUShortAttribGadget);
+
+        FloatToUShortAttribGadget();
+        virtual ~FloatToUShortAttribGadget();
+
+    protected:
+
+        ACE_UINT16 max_intensity_value_;
+        ACE_UINT16 intensity_offset_value_;
+
+        virtual int process_config(ACE_Message_Block* mb);
+        virtual int process(GadgetContainerMessage<ISMRMRD::ImageHeader>* m1, GadgetContainerMessage< hoNDArray< float > >* m2, GadgetContainerMessage<ISMRMRD::MetaContainer>* m3);
+    };
+}
+
+#endif /* FloatToUShortAttribGadget_H_ */
diff --git a/gadgets/mri_core/FloatToUShortGadget.cpp b/gadgets/mri_core/FloatToUShortGadget.cpp
index 512650f..24bf663 100644
--- a/gadgets/mri_core/FloatToUShortGadget.cpp
+++ b/gadgets/mri_core/FloatToUShortGadget.cpp
@@ -40,18 +40,18 @@ int FloatToUShortGadget::process(GadgetContainerMessage<ISMRMRD::ImageHeader> *m
 	for (unsigned long i = 0; i < cm2->getObjectPtr()->get_number_of_elements(); i++) {
 		float pix_val = src[i];
 		switch (m1->getObjectPtr()->image_type) {
-		case ISMRMRD::TYPE_MAGNITUDE:
+		case ISMRMRD::ISMRMRD_IMTYPE_MAGNITUDE:
 			pix_val = std::abs(pix_val);
 			if (pix_val > 4095) pix_val = 4095;
 			break;
-		case ISMRMRD::TYPE_REAL:
-		case ISMRMRD::TYPE_IMAG:
+		case ISMRMRD::ISMRMRD_IMTYPE_REAL:
+		case ISMRMRD::ISMRMRD_IMTYPE_IMAG:
 			pix_val = pix_val + 2048;
 			if (pix_val < 0) pix_val = 0;
 			if (pix_val > 4095) pix_val = 4095;
 			break;
-		case ISMRMRD::TYPE_PHASE:
-			pix_val *= 2048.0/3.14159265;
+		case ISMRMRD::ISMRMRD_IMTYPE_PHASE:
+			pix_val *= (float)(2048.0/3.14159265);
 			pix_val += 2048;
 			if (pix_val < 0) pix_val = 0;
 			if (pix_val > 4095) pix_val = 4095;
@@ -67,7 +67,7 @@ int FloatToUShortGadget::process(GadgetContainerMessage<ISMRMRD::ImageHeader> *m
 
 	m1->cont(cm2);
 	m2->release();
-	m1->getObjectPtr()->image_data_type = ISMRMRD::DATA_UNSIGNED_SHORT;
+	m1->getObjectPtr()->data_type = ISMRMRD::ISMRMRD_USHORT;
 
 	if (this->next()->putq(m1) == -1) {
 		m1->release();
diff --git a/gadgets/mri_core/FloatToUShortGadget.h b/gadgets/mri_core/FloatToUShortGadget.h
index c1900ad..261b0dc 100644
--- a/gadgets/mri_core/FloatToUShortGadget.h
+++ b/gadgets/mri_core/FloatToUShortGadget.h
@@ -5,7 +5,7 @@
 #include "hoNDArray.h"
 #include "gadgetron_mricore_export.h"
 
-#include <ismrmrd.h>
+#include <ismrmrd/ismrmrd.h>
 
 namespace Gadgetron{
   
diff --git a/gadgets/mri_core/FlowPhaseSubtractionGadget.cpp b/gadgets/mri_core/FlowPhaseSubtractionGadget.cpp
index 90ab8bd..d7d3f7e 100644
--- a/gadgets/mri_core/FlowPhaseSubtractionGadget.cpp
+++ b/gadgets/mri_core/FlowPhaseSubtractionGadget.cpp
@@ -1,6 +1,6 @@
 #include "FlowPhaseSubtractionGadget.h"
 #include "Gadgetron.h"
-#include "GadgetIsmrmrdReadWrite.h"
+#include "ismrmrd/xml.h"
 
 #ifdef USE_OMP
 #include <omp.h>
@@ -14,38 +14,38 @@ namespace Gadgetron{
 
   int FlowPhaseSubtractionGadget::process_config(ACE_Message_Block* mb)
   {
-    boost::shared_ptr<ISMRMRD::ismrmrdHeader> cfg = parseIsmrmrdXMLHeader(std::string(mb->rd_ptr()));
 
-    std::vector<long> dims;
-    ISMRMRD::ismrmrdHeader::encoding_sequence e_seq = cfg->encoding();
-    if (e_seq.size() != 1) {
-      GADGET_DEBUG2("Number of encoding spaces: %d\n", e_seq.size());
+    ISMRMRD::IsmrmrdHeader h;
+    ISMRMRD::deserialize(mb->rd_ptr(),h);
+    
+    if (h.encoding.size() != 1) {
+      GADGET_DEBUG2("Number of encoding spaces: %d\n", h.encoding.size());
       GADGET_DEBUG1("This Gadget only supports one encoding space\n");
       return GADGET_FAIL;
     }
 
-    ISMRMRD::encodingSpaceType e_space = (*e_seq.begin()).encodedSpace();
-    ISMRMRD::encodingSpaceType r_space = (*e_seq.begin()).reconSpace();
-    ISMRMRD::encodingLimitsType e_limits = (*e_seq.begin()).encodingLimits();
-
-    sets_ = e_limits.set().present() ? e_limits.set().get().maximum() + 1 : 1;
-
-    if (sets_ > 2) {
-      GADGET_DEBUG1("Phase subtraction only implemented for two sets for now\n");
-      GADGET_DEBUG2("Number of sets detected: %d, bailing out.\n", sets_);
-      return GADGET_FAIL;
-    }
-
-    buffer_ = boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> >(new ACE_Message_Queue<ACE_MT_SYNCH>[sets_]); 
-
-    size_t bsize = sizeof(GadgetContainerMessage< GadgetContainerMessage<ISMRMRD::ImageHeader> >)*10000;
-
-    for( size_t i=0; i<sets_; i++ ){
-      buffer_[i].high_water_mark(bsize);
-      buffer_[i].low_water_mark(bsize);
-    }
-
-    return GADGET_OK;
+  ISMRMRD::EncodingSpace e_space = h.encoding[0].encodedSpace;
+  ISMRMRD::EncodingSpace r_space = h.encoding[0].reconSpace;
+  ISMRMRD::EncodingLimits e_limits = h.encoding[0].encodingLimits;
+  
+  sets_ = e_limits.set ? e_limits.set->maximum + 1 : 1;
+  
+  if (sets_ > 2) {
+    GADGET_DEBUG1("Phase subtraction only implemented for two sets for now\n");
+    GADGET_DEBUG2("Number of sets detected: %d, bailing out.\n", sets_);
+    return GADGET_FAIL;
+  }
+  
+  buffer_ = boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> >(new ACE_Message_Queue<ACE_MT_SYNCH>[sets_]); 
+  
+  size_t bsize = sizeof(GadgetContainerMessage< GadgetContainerMessage<ISMRMRD::ImageHeader> >)*10000;
+  
+  for( size_t i=0; i<sets_; i++ ){
+    buffer_[i].high_water_mark(bsize);
+    buffer_[i].low_water_mark(bsize);
+  }
+  
+  return GADGET_OK;
   }
 
   int FlowPhaseSubtractionGadget::
diff --git a/gadgets/mri_core/FlowPhaseSubtractionGadget.h b/gadgets/mri_core/FlowPhaseSubtractionGadget.h
index 9d1c1fa..69c884a 100644
--- a/gadgets/mri_core/FlowPhaseSubtractionGadget.h
+++ b/gadgets/mri_core/FlowPhaseSubtractionGadget.h
@@ -6,7 +6,7 @@
 #include "hoNDArray.h"
 #include "gadgetron_mricore_export.h"
 
-#include <ismrmrd.h>
+#include <ismrmrd/ismrmrd.h>
 #include <complex>
 #include <boost/shared_ptr.hpp>
 #include <boost/shared_array.hpp>
diff --git a/gadgets/mri_core/GadgetIsmrmrdReadWrite.h b/gadgets/mri_core/GadgetIsmrmrdReadWrite.h
index 87eb3d6..9fca852 100644
--- a/gadgets/mri_core/GadgetIsmrmrdReadWrite.h
+++ b/gadgets/mri_core/GadgetIsmrmrdReadWrite.h
@@ -9,10 +9,7 @@
 #include "url_encode.h"
 #include "gadgetron_mricore_export.h"
 
-#include <ismrmrd.h>
-#ifndef EXCLUDE_ISMRMRD_XSD
-#include <ismrmrd.hxx>
-#endif
+#include <ismrmrd/ismrmrd.h>
 
 #include <ace/SOCK_Stream.h>
 #include <ace/Task.h>
@@ -46,7 +43,8 @@ namespace Gadgetron{
                 return -1;
             }
 
-            if ((send_cnt = sock->send_n (&acqmb->getObjectPtr()->getHead(), sizeof(ISMRMRD::AcquisitionHeader))) <= 0) {
+            ISMRMRD::ISMRMRD_AcquisitionHeader acqHead = acqmb->getObjectPtr()->getHead();
+            if ((send_cnt = sock->send_n (&acqHead, sizeof(ISMRMRD::ISMRMRD_AcquisitionHeader))) <= 0) {
                 ACE_DEBUG ((LM_ERROR,
                     ACE_TEXT ("(%P|%t) Unable to send acquisition header\n")));
 
@@ -57,7 +55,7 @@ namespace Gadgetron{
             unsigned long data_elements = acqmb->getObjectPtr()->getHead().active_channels*acqmb->getObjectPtr()->getHead().number_of_samples;
 
             if (trajectory_elements) {
-                if ((send_cnt = sock->send_n (&acqmb->getObjectPtr()->getTraj()[0], sizeof(float)*trajectory_elements)) <= 0) {
+                if ((send_cnt = sock->send_n (&acqmb->getObjectPtr()->getTrajPtr()[0], sizeof(float)*trajectory_elements)) <= 0) {
                     ACE_DEBUG ((LM_ERROR,
                         ACE_TEXT ("(%P|%t) Unable to send acquisition trajectory elements\n")));
 
@@ -66,7 +64,7 @@ namespace Gadgetron{
             }
 
             if (data_elements) {
-                if ((send_cnt = sock->send_n (&acqmb->getObjectPtr()->getData()[0], 2*sizeof(float)*data_elements)) <= 0) {
+                if ((send_cnt = sock->send_n (&acqmb->getObjectPtr()->getDataPtr()[0], 2*sizeof(float)*data_elements)) <= 0) {
                     ACE_DEBUG ((LM_ERROR,
                         ACE_TEXT ("(%P|%t) Unable to send acquisition data elements\n")));
 
@@ -169,34 +167,5 @@ namespace Gadgetron{
 
     };
 
-#ifndef EXCLUDE_ISMRMRD_XSD
-    inline boost::shared_ptr<ISMRMRD::ismrmrdHeader> parseIsmrmrdXMLHeader(std::string xml) {
-        char * gadgetron_home = ACE_OS::getenv("GADGETRON_HOME");
-        ACE_TCHAR schema_file_name[4096];
-        ACE_OS::sprintf(schema_file_name, "%s/schema/ismrmrd.xsd", gadgetron_home);
-
-        std::string tmp(schema_file_name);
-        tmp = url_encode(tmp);
-        ACE_OS_String::strncpy(schema_file_name,tmp.c_str(), 4096);
-
-        xml_schema::properties props;
-        props.schema_location (
-            "http://www.ismrm.org/ISMRMRD",
-            std::string (schema_file_name));
-
-
-        std::istringstream str_stream(xml, std::stringstream::in);
-
-        boost::shared_ptr<ISMRMRD::ismrmrdHeader> cfg;
-
-        try {
-            cfg = boost::shared_ptr<ISMRMRD::ismrmrdHeader>(ISMRMRD::ismrmrdHeader_ (str_stream,0,props));
-        }  catch (const xml_schema::exception& e) {
-            GADGET_DEBUG2("Failed to parse XML Parameters: %s\n", e.what());
-        }
-
-        return cfg;
-    }
-#endif
 }
 #endif //GADGETISMRMRDREADWRITE_H
diff --git a/gadgets/mri_core/GadgetMRIHeaders.h b/gadgets/mri_core/GadgetMRIHeaders.h
index f235df7..ab4c998 100644
--- a/gadgets/mri_core/GadgetMRIHeaders.h
+++ b/gadgets/mri_core/GadgetMRIHeaders.h
@@ -36,22 +36,27 @@ enum GadgetImageTypes
 namespace Gadgetron{
 
 enum GadgetMessageID {
-  GADGET_MESSAGE_EXT_ID_MIN                = 1000,
-  GADGET_MESSAGE_ACQUISITION               = 1001, /**< DEPRECATED */
-  GADGET_MESSAGE_NEW_MEASUREMENT           = 1002, /**< DEPRECATED */
-  GADGET_MESSAGE_END_OF_SCAN               = 1003, /**< DEPRECATED */
-  GADGET_MESSAGE_IMAGE_CPLX_FLOAT          = 1004, /**< DEPRECATED */
-  GADGET_MESSAGE_IMAGE_REAL_FLOAT          = 1005, /**< DEPRECATED */
-  GADGET_MESSAGE_IMAGE_REAL_USHORT         = 1006, /**< DEPRECATED */
-  GADGET_MESSAGE_EMPTY                     = 1007, /**< DEPRECATED */
-  GADGET_MESSAGE_ISMRMRD_ACQUISITION       = 1008,
-  GADGET_MESSAGE_ISMRMRD_IMAGE_CPLX_FLOAT  = 1009,
-  GADGET_MESSAGE_ISMRMRD_IMAGE_REAL_FLOAT  = 1010,
-  GADGET_MESSAGE_ISMRMRD_IMAGE_REAL_USHORT = 1011,
-  GADGET_MESSAGE_DICOM                     = 1012,
-  GADGET_MESSAGE_CLOUD_JOB                 = 1013,
-  GADGET_MESSAGE_GADGETCLOUD_JOB           = 1014,
-  GADGET_MESSAGE_EXT_ID_MAX                = 4096
+  GADGET_MESSAGE_EXT_ID_MIN                             = 1000,
+  GADGET_MESSAGE_ACQUISITION                            = 1001, /**< DEPRECATED */
+  GADGET_MESSAGE_NEW_MEASUREMENT                        = 1002, /**< DEPRECATED */
+  GADGET_MESSAGE_END_OF_SCAN                            = 1003, /**< DEPRECATED */
+  GADGET_MESSAGE_IMAGE_CPLX_FLOAT                       = 1004, /**< DEPRECATED */
+  GADGET_MESSAGE_IMAGE_REAL_FLOAT                       = 1005, /**< DEPRECATED */
+  GADGET_MESSAGE_IMAGE_REAL_USHORT                      = 1006, /**< DEPRECATED */
+  GADGET_MESSAGE_EMPTY                                  = 1007, /**< DEPRECATED */
+  GADGET_MESSAGE_ISMRMRD_ACQUISITION                    = 1008,
+  GADGET_MESSAGE_ISMRMRD_IMAGE_CPLX_FLOAT               = 1009,
+  GADGET_MESSAGE_ISMRMRD_IMAGE_REAL_FLOAT               = 1010,
+  GADGET_MESSAGE_ISMRMRD_IMAGE_REAL_USHORT              = 1011,
+  GADGET_MESSAGE_DICOM                                  = 1012,
+  GADGET_MESSAGE_CLOUD_JOB                              = 1013,
+  GADGET_MESSAGE_GADGETCLOUD_JOB                        = 1014,
+  GADGET_MESSAGE_ISMRMRD_IMAGEWITHATTRIB_CPLX_FLOAT     = 1015,
+  GADGET_MESSAGE_ISMRMRD_IMAGEWITHATTRIB_REAL_FLOAT     = 1016,
+  GADGET_MESSAGE_ISMRMRD_IMAGEWITHATTRIB_REAL_USHORT    = 1017,
+  GADGET_MESSAGE_DICOM_WITHNAME                         = 1018,
+  GADGET_MESSAGE_DEPENDENCY_QUERY                       = 1019,
+  GADGET_MESSAGE_EXT_ID_MAX                             = 4096
 };
 
 
diff --git a/gadgets/mri_core/ImageFinishAttribGadget.cpp b/gadgets/mri_core/ImageFinishAttribGadget.cpp
new file mode 100644
index 0000000..2dd2337
--- /dev/null
+++ b/gadgets/mri_core/ImageFinishAttribGadget.cpp
@@ -0,0 +1,53 @@
+#include "GadgetIsmrmrdReadWrite.h"
+#include "ImageFinishAttribGadget.h"
+
+namespace Gadgetron
+{
+    template <typename T>
+    int ImageFinishAttribGadget<T>::process(GadgetContainerMessage<ISMRMRD::ImageHeader>* m1, GadgetContainerMessage< hoNDArray< T > >* m2, GadgetContainerMessage<ISMRMRD::MetaContainer>* m3)
+    {
+        if (!this->controller_)
+        {
+            ACE_DEBUG( (LM_DEBUG, 
+                ACE_TEXT("Cannot return result to controller, no controller set")) );
+            return -1;
+        }
+
+        GadgetContainerMessage<GadgetMessageIdentifier>* mb = new GadgetContainerMessage<GadgetMessageIdentifier>();
+
+        switch (sizeof(T))
+        {
+        case 2: //Unsigned short
+            mb->getObjectPtr()->id = GADGET_MESSAGE_ISMRMRD_IMAGEWITHATTRIB_REAL_USHORT;
+            break;
+        case 4: //Float
+            mb->getObjectPtr()->id = GADGET_MESSAGE_ISMRMRD_IMAGEWITHATTRIB_REAL_FLOAT;
+            break;
+        case 8: //Complex float
+            mb->getObjectPtr()->id = GADGET_MESSAGE_ISMRMRD_IMAGEWITHATTRIB_CPLX_FLOAT;
+            break;
+        default:
+            GADGET_DEBUG2("Wrong data size detected: %d\n", sizeof(T));
+            mb->release();
+            m1->release();
+            return GADGET_FAIL;
+        }
+
+        mb->cont(m1);
+
+        int ret =  this->controller_->output_ready(mb);
+
+        if ( (ret < 0) )
+        {
+            GADGET_DEBUG1("Failed to return massage to controller\n");
+            return GADGET_FAIL;
+        }
+
+        return GADGET_OK;
+    }
+
+    //Declare factories for the various template instances
+    GADGET_FACTORY_DECLARE(ImageFinishAttribGadgetFLOAT)
+    GADGET_FACTORY_DECLARE(ImageFinishAttribGadgetUSHORT)
+    GADGET_FACTORY_DECLARE(ImageFinishAttribGadgetCPLX)
+}
diff --git a/gadgets/mri_core/ImageFinishAttribGadget.h b/gadgets/mri_core/ImageFinishAttribGadget.h
new file mode 100644
index 0000000..d0103c1
--- /dev/null
+++ b/gadgets/mri_core/ImageFinishAttribGadget.h
@@ -0,0 +1,49 @@
+/** \file   ImageFinishAttribGadget.h
+    \brief  Image finish gadget with meta attributes support.
+    \author Hui Xue
+*/
+
+#ifndef ImageFinishAttribGadget_H
+#define ImageFinishAttribGadget_H
+
+#include "Gadget.h"
+#include "hoNDArray.h"
+#include "GadgetMRIHeaders.h"
+#include "GadgetStreamController.h"
+#include "ismrmrd/meta.h"
+#include "gadgetron_mricore_export.h"
+
+#include <ismrmrd/ismrmrd.h>
+#include <complex>
+
+namespace Gadgetron{
+
+    template <typename T> class EXPORTGADGETSMRICORE ImageFinishAttribGadget : public Gadget3<ISMRMRD::ImageHeader, hoNDArray< T >, ISMRMRD::MetaContainer >
+    {
+    protected:
+        virtual int process(GadgetContainerMessage<ISMRMRD::ImageHeader>* m1, GadgetContainerMessage< hoNDArray< T > >* m2, GadgetContainerMessage<ISMRMRD::MetaContainer>* m3);
+    };
+
+    class EXPORTGADGETSMRICORE ImageFinishAttribGadgetUSHORT :
+        public ImageFinishAttribGadget<ACE_UINT16>
+    {
+    public:
+        GADGET_DECLARE(ImageFinishAttribGadgetUSHORT);
+    };
+
+    class EXPORTGADGETSMRICORE ImageFinishAttribGadgetFLOAT :
+        public ImageFinishAttribGadget<float>
+    {
+    public:
+        GADGET_DECLARE(ImageFinishAttribGadgetFLOAT);
+    };
+
+    class EXPORTGADGETSMRICORE ImageFinishAttribGadgetCPLX :
+        public ImageFinishAttribGadget< std::complex<float> >
+    {
+    public:
+        GADGET_DECLARE(ImageFinishAttribGadgetCPLX);
+    };
+}
+
+#endif //ImageFinishAttribGadget_H
diff --git a/gadgets/mri_core/ImageFinishGadget.cpp b/gadgets/mri_core/ImageFinishGadget.cpp
index c5b1752..8a7bc24 100644
--- a/gadgets/mri_core/ImageFinishGadget.cpp
+++ b/gadgets/mri_core/ImageFinishGadget.cpp
@@ -45,7 +45,7 @@ int ImageFinishGadget<T>
 }
 
 //Declare factories for the various template instances
-GADGET_FACTORY_DECLARE(ImageFinishGadgetFLOAT)
-GADGET_FACTORY_DECLARE(ImageFinishGadgetUSHORT)
-GADGET_FACTORY_DECLARE(ImageFinishGadgetCPLX)
+GADGET_FACTORY_DECLARE(ImageFinishGadgetFLOAT);
+GADGET_FACTORY_DECLARE(ImageFinishGadgetUSHORT);
+GADGET_FACTORY_DECLARE(ImageFinishGadgetCPLX);
 }
diff --git a/gadgets/mri_core/ImageFinishGadget.h b/gadgets/mri_core/ImageFinishGadget.h
index da86e96..bfd5950 100644
--- a/gadgets/mri_core/ImageFinishGadget.h
+++ b/gadgets/mri_core/ImageFinishGadget.h
@@ -7,7 +7,7 @@
 #include "GadgetStreamController.h"
 #include "gadgetron_mricore_export.h"
 
-#include <ismrmrd.h>
+#include <ismrmrd/ismrmrd.h>
 #include <complex>
 
 namespace Gadgetron{
diff --git a/gadgets/mri_core/ImageWriterGadget.h b/gadgets/mri_core/ImageWriterGadget.h
index 9ee52eb..f944fe8 100644
--- a/gadgets/mri_core/ImageWriterGadget.h
+++ b/gadgets/mri_core/ImageWriterGadget.h
@@ -5,7 +5,7 @@
 #include "hoNDArray.h"
 #include "gadgetron_mricore_export.h"
 
-#include <ismrmrd.h>
+#include <ismrmrd/ismrmrd.h>
 #include <complex>
 
 namespace Gadgetron{
diff --git a/gadgets/mri_core/IsmrmrdDumpGadget.cpp b/gadgets/mri_core/IsmrmrdDumpGadget.cpp
index 7d55225..58e055e 100644
--- a/gadgets/mri_core/IsmrmrdDumpGadget.cpp
+++ b/gadgets/mri_core/IsmrmrdDumpGadget.cpp
@@ -1,134 +1,138 @@
 #include "GadgetIsmrmrdReadWrite.h"
 #include "IsmrmrdDumpGadget.h"
 #include "Gadgetron.h"
-namespace Gadgetron{
-
-
-  std::string get_date_time_string()
-  {
-    time_t rawtime;
-    struct tm * timeinfo;
-    time ( &rawtime );
-    timeinfo = localtime ( &rawtime );
-
-    
-    std::stringstream str;
-    str << timeinfo->tm_year+1900
-	<< std::setw(2) << std::setfill('0') << timeinfo->tm_mon+1
-	<< std::setw(2) << std::setfill('0') << timeinfo->tm_mday
-	<< "-"
-	<< std::setw(2) << std::setfill('0') << timeinfo->tm_hour
-	<< std::setw(2) << std::setfill('0') << timeinfo->tm_min
-	<< std::setw(2) << std::setfill('0') << timeinfo->tm_sec;
-    
-    std::string ret = str.str();
-    
-    return ret;
-  }
-
-  IsmrmrdDumpGadget::IsmrmrdDumpGadget()
-    : Gadget2<ISMRMRD::AcquisitionHeader,hoNDArray< std::complex<float> > >()
-    , file_prefix_("ISMRMRD_DUMP")
-    , ismrmrd_file_name_("ISMRMRD_DUMP.h5") //This will be reset during configuration
-    , append_timestamp_(true)
-  {
-    set_parameter("file_prefix","ISMRMRD_DUMP",false);
-    set_parameter("append_timestamp","1",false);
-
-  }
-
-int IsmrmrdDumpGadget
-::process(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1,
-	  GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2)
-{
-
-
-  ISMRMRD::Acquisition ismrmrd_acq;
-
-  ismrmrd_acq.setHead(*m1->getObjectPtr());
-  
-  std::valarray<float> d(reinterpret_cast<float*>(m2->getObjectPtr()->get_data_ptr()),
-			 m2->getObjectPtr()->get_number_of_elements()*2);
-
-  ismrmrd_acq.setData(d);
 
-  if (m2->cont()) {
-    //Write trajectory
-    if (ismrmrd_acq.getTrajectoryDimensions() == 0) {
-      GADGET_DEBUG1("Malformed dataset. Trajectory attached but trajectory dimensions == 0\n");
-      return GADGET_FAIL;
-    }
-    
-    GadgetContainerMessage< hoNDArray<float> >* m3 = AsContainerMessage< hoNDArray<float> >(m2->cont());
-
-    if (!m3) {
-      GADGET_DEBUG1("Error casting trajectory data package");
-      return GADGET_FAIL;
-    } 
-
-    std::valarray<float> t(m3->getObjectPtr()->get_data_ptr(),
-			   m3->getObjectPtr()->get_number_of_elements());
-    
-    ismrmrd_acq.setTraj(t);
-
-  } else {
-    if (ismrmrd_acq.getTrajectoryDimensions() != 0) {
-      GADGET_DEBUG1("Malformed dataset. Trajectory dimensions not zero but no trajectory attached\n");
-      return GADGET_FAIL;
+namespace Gadgetron
+{
+    std::string get_date_time_string()
+    {
+        time_t rawtime;
+        struct tm * timeinfo;
+        time ( &rawtime );
+        timeinfo = localtime ( &rawtime );
+
+        std::stringstream str;
+        str << timeinfo->tm_year+1900
+            << std::setw(2) << std::setfill('0') << timeinfo->tm_mon+1
+            << std::setw(2) << std::setfill('0') << timeinfo->tm_mday
+            << "-"
+            << std::setw(2) << std::setfill('0') << timeinfo->tm_hour
+            << std::setw(2) << std::setfill('0') << timeinfo->tm_min
+            << std::setw(2) << std::setfill('0') << timeinfo->tm_sec;
+
+        std::string ret = str.str();
+
+        return ret;
     }
-  }
 
-
-  {
-    ISMRMRD::HDF5Exclusive lock;
-    if (ismrmrd_dataset_->appendAcquisition(&ismrmrd_acq) < 0) {
-      GADGET_DEBUG1("Error appending ISMRMRD Dataset\n");
-      return GADGET_FAIL;
+    IsmrmrdDumpGadget::IsmrmrdDumpGadget()
+                    : Gadget2<ISMRMRD::AcquisitionHeader,hoNDArray< std::complex<float> > >()
+                    , file_prefix_("ISMRMRD_DUMP")
+                    , ismrmrd_file_name_("ISMRMRD_DUMP.h5") //This will be reset during configuration
+                    , append_timestamp_(true)
+    {
+        file_prefix_ = "ISMRMRD_DUMP";
+        append_timestamp_ = true;
     }
-  }
-
 
-  //It is enough to put the first one, since they are linked
-  if (this->next()->putq(m1) == -1) {
-    m1->release();
-    ACE_ERROR_RETURN( (LM_ERROR,
-		       ACE_TEXT("%p\n"),
-		       ACE_TEXT("IsmrmrdDumpGadget::process, passing data on to next gadget")),
-		      -1);
-  }
-
-  return 0;
-}
-
-int IsmrmrdDumpGadget
-::process_config(ACE_Message_Block* mb)
-{
+    int IsmrmrdDumpGadget::process_config(ACE_Message_Block* mb)
+    {
+        file_prefix_ = *(get_string_value("file_prefix").get());
+        if ( file_prefix_.empty() )
+        {
+            file_prefix_ = "ISMRMRD_DUMP";
+        }
+
+        append_timestamp_ = get_bool_value("append_timestamp");
+
+        //Generate filename
+        if (append_timestamp_)
+        {
+            ismrmrd_file_name_ = file_prefix_ + std::string("_") + get_date_time_string() + std::string(".h5");
+        }
+        else
+        {
+            ismrmrd_file_name_ = file_prefix_ + std::string(".h5");
+        }
+
+        ismrmrd_dataset_ = boost::shared_ptr<ISMRMRD::Dataset>(new ISMRMRD::Dataset(ismrmrd_file_name_.c_str(), "dataset"));
+
+        std::string xml_config(mb->rd_ptr());
+
+        try {
+            ismrmrd_dataset_->writeHeader(xml_config);
+        }
+        catch (...)
+        {
+            GADGET_DEBUG1("Failed to write XML header to HDF file\n");
+            return GADGET_FAIL;
+        }
+
+        return GADGET_OK;
+    }
 
-  file_prefix_ = *(get_string_value("file_prefix").get());
-  append_timestamp_ = (get_int_value("append_timestamp") > 0);
-
-  //Generate filename
-  if (append_timestamp_) {
-    ismrmrd_file_name_ = file_prefix_ + std::string("_") + get_date_time_string() + std::string(".h5");
-  } else {
-    ismrmrd_file_name_ = file_prefix_ + std::string(".h5");
-  }
-  
-  
-  ISMRMRD::HDF5Exclusive lock; //This will ensure threadsafe access to HDF5
-  ismrmrd_dataset_ = boost::shared_ptr<ISMRMRD::IsmrmrdDataset>(new ISMRMRD::IsmrmrdDataset(ismrmrd_file_name_.c_str(), "dataset"));
- 
-  std::string xml_config(mb->rd_ptr());
-
-  if (ismrmrd_dataset_->writeHeader(xml_config) < 0 ) {
-    GADGET_DEBUG1("Failed to write XML header to HDF file\n");
-    return GADGET_FAIL;
-  }
- 
-  return GADGET_OK;
-}
+    int IsmrmrdDumpGadget::process(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1, GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2)
+    {
+        ISMRMRD::Acquisition ismrmrd_acq;
+
+        ismrmrd_acq.setHead(*m1->getObjectPtr());
+
+        memcpy((void *)ismrmrd_acq.getDataPtr(), m2->getObjectPtr()->get_data_ptr(), 
+               sizeof(float)*m2->getObjectPtr()->get_number_of_elements()*2);
+
+        if (m2->cont())
+        {
+            //Write trajectory
+            if (ismrmrd_acq.trajectory_dimensions() == 0)
+            {
+                GADGET_DEBUG1("Malformed dataset. Trajectory attached but trajectory dimensions == 0\n");
+                return GADGET_FAIL;
+            }
+
+            GadgetContainerMessage< hoNDArray<float> >* m3 = AsContainerMessage< hoNDArray<float> >(m2->cont());
+
+            if (!m3)
+            {
+                GADGET_DEBUG1("Error casting trajectory data package");
+                return GADGET_FAIL;
+            } 
+
+            memcpy((void *)ismrmrd_acq.getTrajPtr(), m3->getObjectPtr()->get_data_ptr(),
+		   sizeof(float)*m3->getObjectPtr()->get_number_of_elements());
+
+        }
+        else
+        {
+            if (ismrmrd_acq.trajectory_dimensions() != 0)
+            {
+                GADGET_DEBUG1("Malformed dataset. Trajectory dimensions not zero but no trajectory attached\n");
+                return GADGET_FAIL;
+            }
+        }
+
+        {
+            try {
+                ismrmrd_dataset_->appendAcquisition(ismrmrd_acq);
+            }
+            catch (...)
+            {
+                GADGET_DEBUG1("Error appending ISMRMRD Dataset\n");
+                return GADGET_FAIL;
+            }
+        }
+
+        //It is enough to put the first one, since they are linked
+        if (this->next()->putq(m1) == -1)
+        {
+            m1->release();
+            ACE_ERROR_RETURN( (LM_ERROR,
+                ACE_TEXT("%p\n"),
+                ACE_TEXT("IsmrmrdDumpGadget::process, passing data on to next gadget")),
+                -1);
+        }
+
+        return 0;
+    }
 
-GADGET_FACTORY_DECLARE(IsmrmrdDumpGadget)
+    GADGET_FACTORY_DECLARE(IsmrmrdDumpGadget)
 }
-
-
diff --git a/gadgets/mri_core/IsmrmrdDumpGadget.h b/gadgets/mri_core/IsmrmrdDumpGadget.h
index 26ea993..d982da7 100644
--- a/gadgets/mri_core/IsmrmrdDumpGadget.h
+++ b/gadgets/mri_core/IsmrmrdDumpGadget.h
@@ -5,8 +5,8 @@
 #include "hoNDArray.h"
 #include "gadgetron_mricore_export.h"
 
-#include <ismrmrd.h>
-#include <ismrmrd_hdf5.h>
+#include <ismrmrd/ismrmrd.h>
+#include <ismrmrd/dataset.h>
 
 #include <complex>
 
@@ -29,7 +29,7 @@ namespace Gadgetron{
     private:
       std::string file_prefix_;
       std::string ismrmrd_file_name_;
-      boost::shared_ptr<ISMRMRD::IsmrmrdDataset>  ismrmrd_dataset_;
+      boost::shared_ptr<ISMRMRD::Dataset>  ismrmrd_dataset_;
       bool append_timestamp_;
     };
 }
diff --git a/gadgets/mri_core/MRIImageAttribWriter.cpp b/gadgets/mri_core/MRIImageAttribWriter.cpp
new file mode 100644
index 0000000..86cccab
--- /dev/null
+++ b/gadgets/mri_core/MRIImageAttribWriter.cpp
@@ -0,0 +1,154 @@
+#include "GadgetIsmrmrdReadWrite.h"
+#include "MRIImageAttribWriter.h"
+#include "GadgetContainerMessage.h"
+#include "hoNDArray.h"
+
+#include <complex>
+
+namespace Gadgetron{
+
+template <typename T>
+int MRIImageAttribWriter<T>::write(ACE_SOCK_Stream* sock, ACE_Message_Block* mb)
+{
+    typedef unsigned long long size_t_type;
+
+    GadgetContainerMessage<ISMRMRD::ImageHeader>* imagemb =
+            AsContainerMessage<ISMRMRD::ImageHeader>(mb);
+
+    if (!imagemb)
+    {
+        ACE_DEBUG( (LM_ERROR, ACE_TEXT("(%P,%l), MRIImageAttribWriter::write, invalid image message objects, 1\n")) );
+        return -1;
+    }
+
+    GadgetContainerMessage< hoNDArray< T > >* datamb =
+            AsContainerMessage< hoNDArray< T > >(imagemb->cont());
+
+    if (!datamb)
+    {
+        ACE_DEBUG( (LM_ERROR, ACE_TEXT("(%P,%l), MRIImageAttribWriter::write, invalid image message objects\n")) );
+        return -1;
+    }
+
+    GadgetContainerMessage<ISMRMRD::MetaContainer>* attribmb =
+            AsContainerMessage<ISMRMRD::MetaContainer>(datamb->cont());
+
+    if (!attribmb)
+    {
+        ACE_DEBUG( (LM_ERROR, ACE_TEXT("(%P,%l), MRIImageAttribWriter::write, invalid image attribute message objects\n")) );
+        return -1;
+    }
+
+    ssize_t send_cnt = 0;
+    GadgetMessageIdentifier id;
+    switch (sizeof(T))
+    {
+    case 2: //Unsigned short
+        id.id = GADGET_MESSAGE_ISMRMRD_IMAGEWITHATTRIB_REAL_USHORT;
+        break;
+    case 4: //Float
+        id.id = GADGET_MESSAGE_ISMRMRD_IMAGEWITHATTRIB_REAL_FLOAT;
+        break;
+    case 8: //Complex float
+        id.id = GADGET_MESSAGE_ISMRMRD_IMAGEWITHATTRIB_CPLX_FLOAT;
+        break;
+    default:
+        ACE_DEBUG ((LM_ERROR,
+                ACE_TEXT ("(%P|%t) MRIImageAttribWriter Wrong data size detected\n")));
+        return GADGET_FAIL;
+    }
+
+    //Let's check if the image header is consistent with the data array size before sending:
+    uint16_t RO = imagemb->getObjectPtr()->matrix_size[0];
+    uint16_t E1 = imagemb->getObjectPtr()->matrix_size[1];
+    uint16_t E2 = imagemb->getObjectPtr()->matrix_size[2];
+
+    unsigned long expected_elements = RO*E1*E2;
+
+    if (expected_elements !=  datamb->getObjectPtr()->get_number_of_elements())
+    {
+        GADGET_DEBUG2("Number of header elements %d is inconsistent with number of elements in NDArray %d\n",expected_elements, datamb->getObjectPtr()->get_number_of_elements());
+        GADGET_DEBUG2("Header dimensions: %d, %d, %d\n",RO,E1,E2);
+        GADGET_DEBUG2("Number of array dimensions: %d:\n", datamb->getObjectPtr()->get_number_of_dimensions());
+        for (size_t i = 0; i < datamb->getObjectPtr()->get_number_of_dimensions(); i++)
+        {
+            GADGET_DEBUG2("Dimensions %d: %d\n", i, datamb->getObjectPtr()->get_size(i));
+        }
+        return -1;
+    }
+
+    if ((send_cnt = sock->send_n (&id, sizeof(GadgetMessageIdentifier))) <= 0)
+    {
+        ACE_DEBUG ((LM_ERROR,
+                ACE_TEXT ("(%P|%t) Unable to send image message identifier\n")));
+
+        return -1;
+    }
+
+    char* buf = NULL;
+    size_t_type len(0);
+
+    try
+    {
+        std::stringstream str;
+        ISMRMRD::serialize( *attribmb->getObjectPtr(), str);
+        std::string attribContent = str.str();
+        len = attribContent.length()+1;
+
+        buf = new char[len];
+        GADGET_CHECK_THROW(buf != NULL);
+
+        memset(buf, '\0', sizeof(char)*len);
+        memcpy(buf, attribContent.c_str(), len-1);
+    }
+    catch(...)
+    {
+        ACE_DEBUG ((LM_ERROR, ACE_TEXT ("(%P|%t) Unable to serialize image meta attributes \n")));
+
+        return -1;
+    }
+
+    imagemb->getObjectPtr()->attribute_string_len = len;
+
+    if ((send_cnt = sock->send_n ( imagemb->getObjectPtr(), sizeof(ISMRMRD::ImageHeader))) <= 0)
+    {
+        ACE_DEBUG ((LM_ERROR,
+                ACE_TEXT ("(%P|%t) Unable to send image header\n")));
+
+        return -1;
+    }
+
+    if ( (send_cnt = sock->send_n (&len, sizeof(size_t_type))) <= 0 )
+    {
+        ACE_DEBUG ((LM_ERROR, ACE_TEXT ("(%P|%t) Unable to send image meta attributes length \n")));
+        if ( buf != NULL ) delete [] buf;
+        return -1;
+    }
+
+    if ( (send_cnt = sock->send_n (buf, len)) <= 0 )
+    {
+        ACE_DEBUG ((LM_ERROR,
+                ACE_TEXT ("(%P|%t) Unable to send image meta attributes\n")));
+
+        if ( buf != NULL ) delete [] buf;
+
+        return -1;
+    }
+
+    if ( buf != NULL ) delete [] buf;
+
+    if ((send_cnt = sock->send_n (datamb->getObjectPtr()->get_data_ptr(), sizeof(T)*datamb->getObjectPtr()->get_number_of_elements())) <= 0)
+    {
+        ACE_DEBUG ((LM_ERROR,
+                ACE_TEXT ("(%P|%t) Unable to send image data\n")));
+
+        return -1;
+    }
+
+    return 0;
+}
+
+GADGETRON_WRITER_FACTORY_DECLARE(MRIImageAttribWriterFLOAT)
+GADGETRON_WRITER_FACTORY_DECLARE(MRIImageAttribWriterUSHORT)
+GADGETRON_WRITER_FACTORY_DECLARE(MRIImageAttribWriterCPLX)
+}
diff --git a/gadgets/mri_core/MRIImageAttribWriter.h b/gadgets/mri_core/MRIImageAttribWriter.h
new file mode 100644
index 0000000..ee7b604
--- /dev/null
+++ b/gadgets/mri_core/MRIImageAttribWriter.h
@@ -0,0 +1,43 @@
+/** \file   MRIImageAttribWriter.h
+    \brief  MRI image writer with meta attributes.
+    \author Hui Xue
+*/
+
+#ifndef MRIImageAttribWriter_H
+#define MRIImageAttribWriter_H
+
+#include "GadgetMessageInterface.h"
+#include "GadgetMRIHeaders.h"
+#include "ismrmrd/meta.h"
+#include "gadgetron_mricore_export.h"
+
+#include <ismrmrd/ismrmrd.h>
+#include <complex>
+
+namespace Gadgetron{
+
+    template<typename T> class MRIImageAttribWriter : public GadgetMessageWriter
+    {
+    public:
+        virtual int write(ACE_SOCK_Stream* sock, ACE_Message_Block* mb);
+    };
+
+    class EXPORTGADGETSMRICORE MRIImageAttribWriterUSHORT : public MRIImageAttribWriter<ACE_UINT16>
+    {
+    public:
+        GADGETRON_WRITER_DECLARE(MRIImageAttribWriterUSHORT);
+    };
+
+    class EXPORTGADGETSMRICORE MRIImageAttribWriterFLOAT : public MRIImageAttribWriter<float>
+    {
+    public:
+        GADGETRON_WRITER_DECLARE(MRIImageAttribWriterFLOAT);
+    };
+
+    class EXPORTGADGETSMRICORE MRIImageAttribWriterCPLX : public MRIImageAttribWriter< std::complex<float> >
+    {
+    public:
+        GADGETRON_WRITER_DECLARE(MRIImageAttribWriterCPLX);
+    };
+}
+#endif
diff --git a/gadgets/mri_core/MRIImageWriter.cpp b/gadgets/mri_core/MRIImageWriter.cpp
index c684ce0..1bcccca 100644
--- a/gadgets/mri_core/MRIImageWriter.cpp
+++ b/gadgets/mri_core/MRIImageWriter.cpp
@@ -7,83 +7,83 @@
 
 namespace Gadgetron{
 
-template <typename T>
-int MRIImageWriter<T>::write(ACE_SOCK_Stream* sock, ACE_Message_Block* mb)
-{
-	GadgetContainerMessage<ISMRMRD::ImageHeader>* imagemb =
+	template <typename T>
+	int MRIImageWriter<T>::write(ACE_SOCK_Stream* sock, ACE_Message_Block* mb)
+	{
+		GadgetContainerMessage<ISMRMRD::ImageHeader>* imagemb =
 			AsContainerMessage<ISMRMRD::ImageHeader>(mb);
 
-	if (!imagemb) {
-		ACE_DEBUG( (LM_ERROR, ACE_TEXT("(%P,%l), MRIImageWriter::write, invalid image message objects, 1\n")) );
-		return -1;
-	}
+		if (!imagemb) {
+			ACE_DEBUG( (LM_ERROR, ACE_TEXT("(%P,%l), MRIImageWriter::write, invalid image message objects, 1\n")) );
+			return -1;
+		}
 
-	GadgetContainerMessage< hoNDArray< T > >* datamb =
+		GadgetContainerMessage< hoNDArray< T > >* datamb =
 			AsContainerMessage< hoNDArray< T > >(imagemb->cont());
 
-	if (!datamb) {
-		ACE_DEBUG( (LM_ERROR, ACE_TEXT("(%P,%l), MRIImageWriter::write, invalid image message objects\n")) );
-		return -1;
-	}
+		if (!datamb) {
+			ACE_DEBUG( (LM_ERROR, ACE_TEXT("(%P,%l), MRIImageWriter::write, invalid image message objects\n")) );
+			return -1;
+		}
 
-	ssize_t send_cnt = 0;
-	GadgetMessageIdentifier id;
-	switch (sizeof(T)) {
-	case 2: //Unsigned short
-		id.id = GADGET_MESSAGE_ISMRMRD_IMAGE_REAL_USHORT;
-		break;
-	case 4: //Float
-		id.id = GADGET_MESSAGE_ISMRMRD_IMAGE_REAL_FLOAT;
-		break;
-	case 8: //Complex float
-		id.id = GADGET_MESSAGE_ISMRMRD_IMAGE_CPLX_FLOAT;
-		break;
-	default:
-		ACE_DEBUG ((LM_ERROR,
+		ssize_t send_cnt = 0;
+		GadgetMessageIdentifier id;
+		switch (sizeof(T)) {
+		case 2: //Unsigned short
+			id.id = GADGET_MESSAGE_ISMRMRD_IMAGE_REAL_USHORT;
+			break;
+		case 4: //Float
+			id.id = GADGET_MESSAGE_ISMRMRD_IMAGE_REAL_FLOAT;
+			break;
+		case 8: //Complex float
+			id.id = GADGET_MESSAGE_ISMRMRD_IMAGE_CPLX_FLOAT;
+			break;
+		default:
+			ACE_DEBUG ((LM_ERROR,
 				ACE_TEXT ("(%P|%t) MRIImageWriter Wrong data size detected\n")));
-		return GADGET_FAIL;
-	}
+			return GADGET_FAIL;
+		}
 
 
-	//Let's check if the image header is consistent with the data array size before sending:
-	unsigned long expected_elements = imagemb->getObjectPtr()->matrix_size[0]*
-			imagemb->getObjectPtr()->matrix_size[1] *  imagemb->getObjectPtr()->matrix_size[2];
+		//Let's check if the image header is consistent with the data array size before sending:
+		unsigned long expected_elements = imagemb->getObjectPtr()->matrix_size[0]*
+			imagemb->getObjectPtr()->matrix_size[1] *  imagemb->getObjectPtr()->matrix_size[2]*imagemb->getObjectPtr()->channels;
 
-	if (expected_elements !=  datamb->getObjectPtr()->get_number_of_elements()) {
-		GADGET_DEBUG2("Number of header elements %d is inconsistent with number of elements in NDArray %d\n",expected_elements, datamb->getObjectPtr()->get_number_of_elements());
-		GADGET_DEBUG2("Header dimensions: %d, %d, %d\n",imagemb->getObjectPtr()->matrix_size[0],imagemb->getObjectPtr()->matrix_size[1],imagemb->getObjectPtr()->matrix_size[2]);
-		GADGET_DEBUG2("Number of array dimensions: %d:\n", datamb->getObjectPtr()->get_number_of_dimensions());
-		for (size_t i = 0; i < datamb->getObjectPtr()->get_number_of_dimensions(); i++) {
-			GADGET_DEBUG2("Dimensions %d: %d\n", i, datamb->getObjectPtr()->get_size(i));
+		if (expected_elements !=  datamb->getObjectPtr()->get_number_of_elements()) {
+			GADGET_DEBUG2("Number of header elements %d is inconsistent with number of elements in NDArray %d\n",expected_elements, datamb->getObjectPtr()->get_number_of_elements());
+			GADGET_DEBUG2("Header dimensions: %d, %d, %d, %d\n",imagemb->getObjectPtr()->matrix_size[0],imagemb->getObjectPtr()->matrix_size[1],imagemb->getObjectPtr()->matrix_size[2], imagemb->getObjectPtr()->channels);
+			GADGET_DEBUG2("Number of array dimensions: %d:\n", datamb->getObjectPtr()->get_number_of_dimensions());
+			for (size_t i = 0; i < datamb->getObjectPtr()->get_number_of_dimensions(); i++) {
+				GADGET_DEBUG2("Dimensions %d: %d\n", i, datamb->getObjectPtr()->get_size(i));
+			}
+			return -1;
 		}
-		return -1;
-	}
 
-	if ((send_cnt = sock->send_n (&id, sizeof(GadgetMessageIdentifier))) <= 0) {
-		ACE_DEBUG ((LM_ERROR,
+		if ((send_cnt = sock->send_n (&id, sizeof(GadgetMessageIdentifier))) <= 0) {
+			ACE_DEBUG ((LM_ERROR,
 				ACE_TEXT ("(%P|%t) Unable to send image message identifier\n")));
 
-		return -1;
-	}
+			return -1;
+		}
 
-	if ((send_cnt = sock->send_n (imagemb->getObjectPtr(), sizeof(ISMRMRD::ImageHeader))) <= 0) {
-		ACE_DEBUG ((LM_ERROR,
+        if ((send_cnt = sock->send_n ( imagemb->getObjectPtr(), sizeof(ISMRMRD::ImageHeader))) <= 0) {
+			ACE_DEBUG ((LM_ERROR,
 				ACE_TEXT ("(%P|%t) Unable to send image header\n")));
 
-		return -1;
-	}
+			return -1;
+		}
 
-	if ((send_cnt = sock->send_n (datamb->getObjectPtr()->get_data_ptr(), sizeof(T)*datamb->getObjectPtr()->get_number_of_elements())) <= 0) {
-		ACE_DEBUG ((LM_ERROR,
+		if ((send_cnt = sock->send_n (datamb->getObjectPtr()->get_data_ptr(), sizeof(T)*datamb->getObjectPtr()->get_number_of_elements())) <= 0) {
+			ACE_DEBUG ((LM_ERROR,
 				ACE_TEXT ("(%P|%t) Unable to send image data\n")));
 
-		return -1;
-	}
+			return -1;
+		}
 
-	return 0;
-}
+		return 0;
+	}
 
-GADGETRON_WRITER_FACTORY_DECLARE(MRIImageWriterFLOAT)
-GADGETRON_WRITER_FACTORY_DECLARE(MRIImageWriterUSHORT)
-GADGETRON_WRITER_FACTORY_DECLARE(MRIImageWriterCPLX)
+	GADGETRON_WRITER_FACTORY_DECLARE(MRIImageWriterFLOAT);
+	GADGETRON_WRITER_FACTORY_DECLARE(MRIImageWriterUSHORT);
+	GADGETRON_WRITER_FACTORY_DECLARE(MRIImageWriterCPLX);
 }
diff --git a/gadgets/mri_core/MRIImageWriter.h b/gadgets/mri_core/MRIImageWriter.h
index ef13221..3d82fb6 100644
--- a/gadgets/mri_core/MRIImageWriter.h
+++ b/gadgets/mri_core/MRIImageWriter.h
@@ -5,33 +5,33 @@
 #include "GadgetMRIHeaders.h"
 #include "gadgetron_mricore_export.h"
 
-#include <ismrmrd.h>
+#include <ismrmrd/ismrmrd.h>
 #include <complex>
 
 namespace Gadgetron{
 
-  template<typename T> class MRIImageWriter : public GadgetMessageWriter
-  {
-  public:
-    virtual int write(ACE_SOCK_Stream* sock, ACE_Message_Block* mb);
-  };
+    template<typename T> class MRIImageWriter : public GadgetMessageWriter
+    {
+    public:
+        virtual int write(ACE_SOCK_Stream* sock, ACE_Message_Block* mb);
+    };
 
-  class EXPORTGADGETSMRICORE MRIImageWriterUSHORT : public MRIImageWriter<ACE_UINT16>
-  {
-  public:
-    GADGETRON_WRITER_DECLARE(GadgetMessageWriterUSHORT);
-  };
+    class EXPORTGADGETSMRICORE MRIImageWriterUSHORT : public MRIImageWriter<ACE_UINT16>
+    {
+    public:
+        GADGETRON_WRITER_DECLARE(MRIImageWriterUSHORT);
+    };
 
-  class EXPORTGADGETSMRICORE MRIImageWriterFLOAT : public MRIImageWriter<float>
-  {
-  public:
-    GADGETRON_WRITER_DECLARE(GadgetMessageWriterFLOAT);
-  };
+    class EXPORTGADGETSMRICORE MRIImageWriterFLOAT : public MRIImageWriter<float>
+    {
+    public:
+        GADGETRON_WRITER_DECLARE(MRIImageWriterFLOAT);
+    };
 
-  class EXPORTGADGETSMRICORE MRIImageWriterCPLX : public MRIImageWriter< std::complex<float> >
-  {
-  public:
-    GADGETRON_WRITER_DECLARE(GadgetMessageWriterCPLX);
-  };
+    class EXPORTGADGETSMRICORE MRIImageWriterCPLX : public MRIImageWriter< std::complex<float> >
+    {
+    public:
+        GADGETRON_WRITER_DECLARE(MRIImageWriterCPLX);
+    };
 }
 #endif
diff --git a/gadgets/mri_core/MaxwellCorrectionGadget.cpp b/gadgets/mri_core/MaxwellCorrectionGadget.cpp
index 86bb75e..b2d18f4 100644
--- a/gadgets/mri_core/MaxwellCorrectionGadget.cpp
+++ b/gadgets/mri_core/MaxwellCorrectionGadget.cpp
@@ -1,144 +1,142 @@
 #include "MaxwellCorrectionGadget.h"
 #include "Gadgetron.h"
-#include "GadgetIsmrmrdReadWrite.h"
 #include "GadgetronTimer.h"
 #include "Spline.h"
+#include "ismrmrd/xml.h"
 
 #include <numeric>
+#include <cstring>
+
 #ifdef USE_OMP
 #include <omp.h>
 #endif 
 
 namespace Gadgetron{
 
-  #ifdef M_PI
-    #undef M_PI
-  #endif // M_PI
-  #define M_PI 3.14159265358979323846
-
-  MaxwellCorrectionGadget::MaxwellCorrectionGadget()
-    : maxwell_coefficients_present_(false)
-    , maxwell_coefficients_(4,0)
-  {
-  }
-
-  MaxwellCorrectionGadget::~MaxwellCorrectionGadget() {}
-
-  int MaxwellCorrectionGadget::process_config(ACE_Message_Block* mb)
-  {
-    // Start parsing the ISMRMRD XML header
-    //
+#ifdef M_PI
+#undef M_PI
+#endif // M_PI
+#define M_PI 3.14159265358979323846
 
-    boost::shared_ptr<ISMRMRD::ismrmrdHeader> cfg = parseIsmrmrdXMLHeader(std::string(mb->rd_ptr()));
-
-    if( cfg.get() == 0x0 ){
-      GADGET_DEBUG1("Unable to parse Ismrmrd header\n");
-      return GADGET_FAIL;
-    }
-
-    if (cfg->userParameters().present()) {
-      for (ISMRMRD::userParameters::userParameterDouble_iterator 
-	     i (cfg->userParameters().get().userParameterDouble().begin ()); i != cfg->userParameters().get().userParameterDouble().end(); ++i) {
-	if (std::strcmp(i->name().c_str(),"MaxwellCoefficient_0") == 0) {
-	  maxwell_coefficients_[0] = i->value();
-	} else if (std::strcmp(i->name().c_str(),"MaxwellCoefficient_1") == 0) {
-	  maxwell_coefficients_[1] = i->value();
-	} else if (std::strcmp(i->name().c_str(),"MaxwellCoefficient_2") == 0) {
-	  maxwell_coefficients_[2] = i->value();
-	} else if (std::strcmp(i->name().c_str(),"MaxwellCoefficient_3") == 0) {
-	  maxwell_coefficients_[3] = i->value();
-	} else {
-	  GADGET_DEBUG2("WARNING: unused user parameter parameter %s found\n", i->name().c_str());
-	}
-      }
-    } else {
-      GADGET_DEBUG1("MaxwellCorrection coefficients are supposed to be in the UserParameters. No user parameter section found\n");
-      return GADGET_OK;
+    MaxwellCorrectionGadget::MaxwellCorrectionGadget()
+        : maxwell_coefficients_present_(false)
+        , maxwell_coefficients_(4,0)
+    {
     }
 
-    maxwell_coefficients_present_ = true;
-
-    GADGET_DEBUG2("Maxwell Coefficients: %f, %f, %f, %f\n", maxwell_coefficients_[0], maxwell_coefficients_[1], maxwell_coefficients_[2], maxwell_coefficients_[3]);
-
-    return GADGET_OK;
-  }
-
-  int MaxwellCorrectionGadget::
-  process(GadgetContainerMessage<ISMRMRD::ImageHeader>* m1,
-	  GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2)
-  {
-    if (maxwell_coefficients_present_) {
-      //GADGET_DEBUG1("Got coefficients\n");
-
-      int Nx = m2->getObjectPtr()->get_size(0);
-      int Ny = m2->getObjectPtr()->get_size(1);
-      int Nz = m2->getObjectPtr()->get_size(2);
-
-      float dx = m1->getObjectPtr()->field_of_view[0] / Nx;
-      float dy = m1->getObjectPtr()->field_of_view[1] / Ny;
-      float dz = m1->getObjectPtr()->field_of_view[2] / Nz;
-
-      /*
-      GADGET_DEBUG2("Nx = %d, Ny = %d, Nz = %d\n", Nx, Ny, Nz);
-      GADGET_DEBUG2("dx = %f, dy = %f, dz = %f\n", dx, dy, dz);
-      GADGET_DEBUG2("img_pos_x = %f, img_pos_y = %f, img_pos_z = %f\n", m1->getObjectPtr()->position[0], m1->getObjectPtr()->position[1], m1->getObjectPtr()->position[2]);
-      */
-
-      std::vector<float> dR(3,0);
-      std::vector<float> dP(3,0);
-      std::vector<float> dS(3,0);
-      std::vector<float> p(3,0);
-
-      for (int z = 0; z < Nz; z++) {
-	for (int y = 0; y < Ny; y++) {
-	  for (int x = 0; x < Nx; x++) {
-	   
-	    dR[0] = (x-Nx/2+0.5) * dx * m1->getObjectPtr()->read_dir[0];
-	    dR[1] = (x-Nx/2+0.5) * dx * m1->getObjectPtr()->read_dir[1];
-	    dR[2] = (x-Nx/2+0.5) * dx * m1->getObjectPtr()->read_dir[2];
-	    
-	    dP[0] = (y-Ny/2+0.5) * dy * m1->getObjectPtr()->phase_dir[0];
-	    dP[1] = (y-Ny/2+0.5) * dy * m1->getObjectPtr()->phase_dir[1];
-	    dP[2] = (y-Ny/2+0.5) * dy * m1->getObjectPtr()->phase_dir[2];
-	    
-	    if (Nz > 1) {
-	      dS[0] = (z-Nz/2+0.5) * dz * m1->getObjectPtr()->slice_dir[0];
-	      dS[1] = (z-Nz/2+0.5) * dz * m1->getObjectPtr()->slice_dir[1];
-	      dS[2] = (z-Nz/2+0.5) * dz * m1->getObjectPtr()->slice_dir[2];
-	    }
-
-	    p[0] = m1->getObjectPtr()->position[0] + dP[0] + dR[0] + dS[0];
-	    p[1] = m1->getObjectPtr()->position[1] + dP[1] + dR[1] + dS[1];
-	    p[2] = m1->getObjectPtr()->position[2] + dP[1] + dR[2] + dS[2];
-
-	    //Convert to centimeters
-	    p[0] = p[0]/1000.0;
-	    p[1] = p[1]/1000.0;
-	    p[2] = p[2]/1000.0;
-
-	    float delta_phi = maxwell_coefficients_[0]*p[2]*p[2] +
-	      maxwell_coefficients_[1]*(p[0]*p[0] + p[1]*p[1]) + 
-	      maxwell_coefficients_[2]*p[0]*p[2] + 
-	      maxwell_coefficients_[3]*p[1]*p[2];
-
-	    long index = z*Ny*Nx+y*Nx+x;
-	    std::complex<float>* data_ptr = m2->getObjectPtr()->get_data_ptr();
-
-	    std::complex<float> correction = std::polar(1.0f,static_cast<float>(2*M_PI*delta_phi));
-
-	    //data_ptr[index] *= correction;
-	  }
-	}
-      }
-
+    MaxwellCorrectionGadget::~MaxwellCorrectionGadget() {}
+
+    int MaxwellCorrectionGadget::process_config(ACE_Message_Block* mb)
+    {
+
+        ISMRMRD::IsmrmrdHeader h;
+        ISMRMRD::deserialize(mb->rd_ptr(),h);
+
+        if (h.userParameters)
+        {
+            for (std::vector<ISMRMRD::UserParameterDouble>::const_iterator i (h.userParameters->userParameterDouble.begin()); 
+                i != h.userParameters->userParameterDouble.end(); i++)
+            {
+                    if (std::strcmp(i->name.c_str(),"MaxwellCoefficient_0") == 0) {
+                        maxwell_coefficients_[0] = i->value;
+                    } else if (std::strcmp(i->name.c_str(),"MaxwellCoefficient_1") == 0) {
+                        maxwell_coefficients_[1] = i->value;
+                    } else if (std::strcmp(i->name.c_str(),"MaxwellCoefficient_2") == 0) {
+                        maxwell_coefficients_[2] = i->value;
+                    } else if (std::strcmp(i->name.c_str(),"MaxwellCoefficient_3") == 0) {
+                        maxwell_coefficients_[3] = i->value;
+                    } else {
+                        GADGET_DEBUG2("WARNING: unused user parameter parameter %s found\n", i->name.c_str());
+                    }
+            }
+        } else {
+            GADGET_DEBUG1("MaxwellCorrection coefficients are supposed to be in the UserParameters. No user parameter section found\n");
+            return GADGET_OK;
+        }
+
+        maxwell_coefficients_present_ = true;
+
+        GADGET_DEBUG2("Maxwell Coefficients: %f, %f, %f, %f\n", maxwell_coefficients_[0], maxwell_coefficients_[1], maxwell_coefficients_[2], maxwell_coefficients_[3]);
+
+        return GADGET_OK;
     }
 
-    if (this->next()->putq(m1) < 0) {
-      GADGET_DEBUG1("Unable to put data on next Gadgets Q\n");
-      return GADGET_FAIL;
+    int MaxwellCorrectionGadget::
+        process(GadgetContainerMessage<ISMRMRD::ImageHeader>* m1,
+        GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2)
+    {
+        if (maxwell_coefficients_present_) {
+            //GADGET_DEBUG1("Got coefficients\n");
+
+            int Nx = m2->getObjectPtr()->get_size(0);
+            int Ny = m2->getObjectPtr()->get_size(1);
+            int Nz = m2->getObjectPtr()->get_size(2);
+
+            float dx = m1->getObjectPtr()->field_of_view[0] / Nx;
+            float dy = m1->getObjectPtr()->field_of_view[1] / Ny;
+            float dz = m1->getObjectPtr()->field_of_view[2] / Nz;
+
+            /*
+            GADGET_DEBUG2("Nx = %d, Ny = %d, Nz = %d\n", Nx, Ny, Nz);
+            GADGET_DEBUG2("dx = %f, dy = %f, dz = %f\n", dx, dy, dz);
+            GADGET_DEBUG2("img_pos_x = %f, img_pos_y = %f, img_pos_z = %f\n", m1->getObjectPtr()->position[0], m1->getObjectPtr()->position[1], m1->getObjectPtr()->position[2]);
+            */
+
+            std::vector<float> dR(3,0);
+            std::vector<float> dP(3,0);
+            std::vector<float> dS(3,0);
+            std::vector<float> p(3,0);
+
+            for (int z = 0; z < Nz; z++) {
+                for (int y = 0; y < Ny; y++) {
+                    for (int x = 0; x < Nx; x++) {
+
+                        dR[0] = (x-Nx/2+0.5) * dx * m1->getObjectPtr()->read_dir[0];
+                        dR[1] = (x-Nx/2+0.5) * dx * m1->getObjectPtr()->read_dir[1];
+                        dR[2] = (x-Nx/2+0.5) * dx * m1->getObjectPtr()->read_dir[2];
+
+                        dP[0] = (y-Ny/2+0.5) * dy * m1->getObjectPtr()->phase_dir[0];
+                        dP[1] = (y-Ny/2+0.5) * dy * m1->getObjectPtr()->phase_dir[1];
+                        dP[2] = (y-Ny/2+0.5) * dy * m1->getObjectPtr()->phase_dir[2];
+
+                        if (Nz > 1) {
+                            dS[0] = (z-Nz/2+0.5) * dz * m1->getObjectPtr()->slice_dir[0];
+                            dS[1] = (z-Nz/2+0.5) * dz * m1->getObjectPtr()->slice_dir[1];
+                            dS[2] = (z-Nz/2+0.5) * dz * m1->getObjectPtr()->slice_dir[2];
+                        }
+
+                        p[0] = m1->getObjectPtr()->position[0] + dP[0] + dR[0] + dS[0];
+                        p[1] = m1->getObjectPtr()->position[1] + dP[1] + dR[1] + dS[1];
+                        p[2] = m1->getObjectPtr()->position[2] + dP[2] + dR[2] + dS[2];
+
+                        //Convert to centimeters
+                        p[0] = p[0]/1000.0;
+                        p[1] = p[1]/1000.0;
+                        p[2] = p[2]/1000.0;
+
+                        float delta_phi = maxwell_coefficients_[0]*p[2]*p[2] +
+                            maxwell_coefficients_[1]*(p[0]*p[0] + p[1]*p[1]) + 
+                            maxwell_coefficients_[2]*p[0]*p[2] + 
+                            maxwell_coefficients_[3]*p[1]*p[2];
+
+                        long index = z*Ny*Nx+y*Nx+x;
+                        std::complex<float>* data_ptr = m2->getObjectPtr()->get_data_ptr();
+
+                        std::complex<float> correction = std::polar(1.0f,static_cast<float>(2*M_PI*delta_phi));
+
+                        data_ptr[index] *= correction;
+                    }
+                }
+            }
+
+        }
+
+        if (this->next()->putq(m1) < 0) {
+            GADGET_DEBUG1("Unable to put data on next Gadgets Q\n");
+            return GADGET_FAIL;
+        }
+        return GADGET_OK;
     }
-    return GADGET_OK;
-  }
 
-  GADGET_FACTORY_DECLARE(MaxwellCorrectionGadget)
+    GADGET_FACTORY_DECLARE(MaxwellCorrectionGadget)
 }
diff --git a/gadgets/mri_core/MaxwellCorrectionGadget.h b/gadgets/mri_core/MaxwellCorrectionGadget.h
index 931eea2..5121715 100644
--- a/gadgets/mri_core/MaxwellCorrectionGadget.h
+++ b/gadgets/mri_core/MaxwellCorrectionGadget.h
@@ -6,7 +6,7 @@
 #include "hoNDArray.h"
 #include "gadgetron_mricore_export.h"
 
-#include <ismrmrd.h>
+#include <ismrmrd/ismrmrd.h>
 #include <complex>
 
 namespace Gadgetron{  
diff --git a/gadgets/mri_core/NoiseAdjustGadget.cpp b/gadgets/mri_core/NoiseAdjustGadget.cpp
index c9c6762..5401b7a 100644
--- a/gadgets/mri_core/NoiseAdjustGadget.cpp
+++ b/gadgets/mri_core/NoiseAdjustGadget.cpp
@@ -1,130 +1,597 @@
 #include "NoiseAdjustGadget.h"
 #include "Gadgetron.h"
-#include "GadgetIsmrmrdReadWrite.h"
 #include "hoArmadillo.h"
 #include "hoNDArray_elemwise.h"
+#include "GadgetronCommon.h"
+#include "hoMatrix.h"
+#include "hoNDArray_linalg.h"
+#include "hoNDArray_elemwise.h"
+#include "ismrmrd/xml.h"
+
+#ifdef USE_OMP
+    #include "omp.h"
+#endif // USE_OMP
+
+#ifndef _WIN32
+    #include <sys/types.h>
+    #include <sys/stat.h>
+#endif // _WIN32
 
 namespace Gadgetron{
 
-  NoiseAdjustGadget::NoiseAdjustGadget()
-  : noise_decorrelation_calculated_(false)
-  , number_of_noise_samples_(0)
-  , noise_bw_scale_factor_(1.0f)
-  , noise_dwell_time_us_(0.0f)
-  , is_configured_(false)
-  {
-  }
-
-  int NoiseAdjustGadget::process_config(ACE_Message_Block* mb)
-  {
-    boost::shared_ptr<ISMRMRD::ismrmrdHeader> cfg = parseIsmrmrdXMLHeader(std::string(mb->rd_ptr()));
- 
-    receiver_noise_bandwidth_ = cfg->acquisitionSystemInformation().get().relativeReceiverNoiseBandwidth().present() ?
-      cfg->acquisitionSystemInformation().get().relativeReceiverNoiseBandwidth().get() : 1.0;
-
-    return GADGET_OK;
-  }
-
-  int NoiseAdjustGadget
-  ::process(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1,
-	    GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2)
-  {
-
-    bool is_noise = ISMRMRD::FlagBit(ISMRMRD::ACQ_IS_NOISE_MEASUREMENT).isSet(m1->getObjectPtr()->flags);
-    unsigned int channels = m1->getObjectPtr()->active_channels;
-    unsigned int samples = m1->getObjectPtr()->number_of_samples;
-
-    if (is_noise) {
-      noise_dwell_time_us_ = m1->getObjectPtr()->sample_time_us;
-
-      //If noise covariance matrix is not allocated
-      if (noise_covariance_matrix_.get_number_of_elements() != channels*channels) {
-	std::vector<size_t> dims(2, channels);
-			
-	try{noise_covariance_matrix_.create(&dims);}
-	catch (std::runtime_error& err)	{
-	  GADGET_DEBUG_EXCEPTION(err, "Unable to allocate storage for noise covariance matrix\n" );
-	  return GADGET_FAIL;
-	}
-	noise_covariance_matrix_.fill(std::complex<double>(0.0,0.0));
-	number_of_noise_samples_ = 0;
-      }
-
-      std::complex<double>* cc_ptr = noise_covariance_matrix_.get_data_ptr();
-      std::complex<float>* data_ptr = m2->getObjectPtr()->get_data_ptr();
-
-      for (unsigned int s = 0; s < samples; s++) {
-	for (unsigned int i = 0; i < channels; i++) {
-	  for (unsigned int j = 0; j < channels; j++) {
-	    cc_ptr[i*channels + j] += (data_ptr[i * samples + s] * conj(data_ptr[j * samples + s]));
-	  }
-	}
-	number_of_noise_samples_++;
-      }
-
-    } else {
-      acquisition_dwell_time_us_ = m1->getObjectPtr()->sample_time_us;
-      if (!is_configured_) {
-	if ((noise_dwell_time_us_ == 0.0f) || (acquisition_dwell_time_us_ == 0.0f)) {
-	  noise_bw_scale_factor_ = 1.0f;
-	} else {
-	  noise_bw_scale_factor_ = std::sqrt(2*acquisition_dwell_time_us_/noise_dwell_time_us_*receiver_noise_bandwidth_);
-	}
-
-	GADGET_DEBUG2("Noise dwell time: %f\n", noise_dwell_time_us_);
-	GADGET_DEBUG2("Acquisition dwell time: %f\n", acquisition_dwell_time_us_);
-	GADGET_DEBUG2("receiver_noise_bandwidth: %f\n", receiver_noise_bandwidth_);
-	GADGET_DEBUG2("noise_bw_scale_factor: %f\n", noise_bw_scale_factor_);
-	is_configured_ = true;
-      }
-
-      if (number_of_noise_samples_ > 0) {
-	if (!noise_decorrelation_calculated_) {
-	  GADGET_DEBUG1("Calculating noise decorrelation\n");
-	  
-	  std::vector<size_t> dims(2, channels);
-	  try{noise_covariance_matrixf_.create(&dims);}
-	  catch (std::runtime_error& err){
-	    GADGET_DEBUG_EXCEPTION(err,"Unable to allocate storage for noise covariance matrix (float)\n");
-	    return GADGET_FAIL;
-	  }
-	  
-	  // Armadillo can best do its template magic when we concatenate all the operations...
-	  // 1. scale for number of samples
-	  // 2. Cholesky decomposition
-	  // 3. Invert lower triangular
-	  // 4. Scale for noise BW
-
-	  arma::cx_mat noise_cov = as_arma_matrix(&noise_covariance_matrix_);	  
-	  arma::cx_fmat noise_covf = as_arma_matrix(&noise_covariance_matrixf_);
-
-	  {	  
-	    noise_covf = arma::conv_to<arma::cx_fmat>::from
-	      (noise_bw_scale_factor_*arma::inv(arma::trimatu(arma::chol(noise_cov/number_of_noise_samples_))));
-	  }
-	  
-	  noise_decorrelation_calculated_ = true;
-	}
-		
-	if (noise_decorrelation_calculated_) {
-	  arma::cx_fmat noise_covf = as_arma_matrix(&noise_covariance_matrixf_);
-	  arma::cx_fmat am2 = as_arma_matrix(m2->getObjectPtr());	  
-	  am2 = am2*arma::trimatu(noise_covf);
-	}
-      }
-      
-      //It is enough to put the first one, since they are linked
-      if (this->next()->putq(m1) == -1) {
-	ACE_ERROR_RETURN( (LM_ERROR,
-			   ACE_TEXT("%p\n"),
-			   ACE_TEXT("NoiseAdjustGadget::process, passing data on to next gadget")),
-			  -1);
-      }
+    NoiseAdjustGadget::NoiseAdjustGadget()
+        : noise_decorrelation_calculated_(false)
+        , number_of_noise_samples_(0)
+        , number_of_noise_samples_per_acquisition_(0)
+        , noise_bw_scale_factor_(1.0f)
+        , noise_dwell_time_us_(-1.0f)
+        , is_configured_(false)
+        , computed_in_close_(false)
+        , use_stored_noise_prewhitener_(false)
+    {
+        noise_dependency_prefix_ = "GadgetronNoisePreWhitener";
+
+        patient_id_.clear();
+        study_id_.clear();
+        measurement_id_.clear();
+        measurement_id_of_noise_dependency_.clear();
+
+        noise_dwell_time_us_preset_ = 5;
+
+        perform_noise_adjust_ = true;
+
+        gt_timer_.set_timing_in_destruction(false);
+        performTiming_ = false;
     }
-    
-    return GADGET_OK;
-  }
-  
-  GADGET_FACTORY_DECLARE(NoiseAdjustGadget)
-  
+
+    NoiseAdjustGadget::~NoiseAdjustGadget()
+    {
+
+    }
+
+    int NoiseAdjustGadget::process_config(ACE_Message_Block* mb)
+    {
+        boost::shared_ptr<std::string> str = this->get_string_value("workingDirectory");
+        if ( !str->empty() )
+        {
+            noise_dependency_folder_ = *str;
+        }
+        else
+        {
+            #ifdef _WIN32
+                noise_dependency_folder_ = std::string("c:\\temp\\gadgetron\\");
+            #else
+                noise_dependency_folder_ =  std::string("/tmp/gadgetron/");
+            #endif // _WIN32
+        }
+        GADGET_MSG("Folder to store noise dependencies is " << noise_dependency_folder_);
+
+        str = this->get_string_value("noise_dependency_prefix");
+
+        if ( !str->empty() )
+        {
+            noise_dependency_prefix_ = *str;
+        }
+
+        performTiming_ = this->get_bool_value("performTiming");
+
+        str = get_string_value("perform_noise_adjust");
+        if ( !str->empty() )
+        {
+            perform_noise_adjust_ = this->get_bool_value("perform_noise_adjust");
+        }
+        else
+        {
+            perform_noise_adjust_ = true;
+        }
+        GADGET_MSG("NoiseAdjustGadget::perform_noise_adjust_ is " << perform_noise_adjust_);
+
+        noise_dwell_time_us_preset_ = (float)this->get_double_value("noise_dwell_time_us_preset");
+        if ( noise_dwell_time_us_preset_ == 0 ) noise_dwell_time_us_preset_ = 5;
+
+        ISMRMRD::IsmrmrdHeader h;
+        ISMRMRD::deserialize(mb->rd_ptr(),h);
+
+        if ( h.acquisitionSystemInformation )
+        {
+            receiver_noise_bandwidth_ = (float)(h.acquisitionSystemInformation->relativeReceiverNoiseBandwidth ?
+                *h.acquisitionSystemInformation->relativeReceiverNoiseBandwidth : 0.793f);
+
+            GADGET_MSG("receiver_noise_bandwidth_ is " << receiver_noise_bandwidth_);
+        }
+
+        // find the patient ID
+        if ( h.subjectInformation )
+        {
+            if ( h.subjectInformation->patientID )
+            {
+                patient_id_ = *h.subjectInformation->patientID;
+                GADGET_MSG("Patient ID is " << patient_id_);
+
+                size_t len = patient_id_.length();
+                for ( size_t n=0; n<len; n++ )
+                {
+                    if ( patient_id_[n] == '-' ) patient_id_[n] = '_';
+                    if ( patient_id_[n] == ':' ) patient_id_[n] = '_';
+                }
+            }
+        }
+
+        // find the study ID
+        if ( h.studyInformation )
+        {
+            if ( h.studyInformation->studyID )
+            {
+                study_id_ = *h.studyInformation->studyID;
+                GADGET_MSG("Study ID is " << study_id_);
+
+                size_t len = study_id_.length();
+                for ( size_t n=0; n<len; n++ )
+                {
+                    if ( study_id_[n] == '-' ) study_id_[n] = '_';
+                    if ( study_id_[n] == ':' ) study_id_[n] = '_';
+                }
+            }
+        }
+
+        // find the measurementID of this scan
+        if ( h.measurementInformation )
+        {
+            if ( h.measurementInformation->measurementID )
+            {
+                measurement_id_ = *h.measurementInformation->measurementID;
+                GADGET_MSG("Measurement ID is " << measurement_id_);
+            }
+
+            // find the noise depencies if any
+            if ( h.measurementInformation->measurementDependency.size() > 0 )
+            {
+                measurement_id_of_noise_dependency_.clear();
+
+        std::vector<ISMRMRD::MeasurementDependency>::const_iterator iter = h.measurementInformation->measurementDependency.begin();
+                for ( ; iter!= h.measurementInformation->measurementDependency.end(); iter++ )
+                {
+                    std::string dependencyType = iter->dependencyType;
+                    std::string dependencyID = iter->measurementID;
+
+                    GADGET_MSG("Found dependency measurement : " << dependencyType << " with ID " << dependencyID);
+            
+                    if ( dependencyType=="Noise" || dependencyType=="noise" )
+              {
+                        measurement_id_of_noise_dependency_ = dependencyID;
+                    }
+                }
+        
+                if ( !measurement_id_of_noise_dependency_.empty() )
+          {
+                    GADGET_MSG("Measurement ID of noise dependency is " << measurement_id_of_noise_dependency_);
+            
+                    full_name_stored_noise_dependency_ = this->generateFullNameWhenLoadNoiseDependency(measurement_id_of_noise_dependency_);
+                    GADGET_MSG("Stored noise dependency is " << full_name_stored_noise_dependency_);
+
+                    // try to load the precomputed noise prewhitener
+                    if ( !this->loadNoisePrewhitener(noise_dwell_time_us_, noise_covariance_matrixf_) )
+                    {
+                        GADGET_MSG("Stored noise dependency is NOT found : " << full_name_stored_noise_dependency_);
+                        use_stored_noise_prewhitener_ = false;
+                        noise_dwell_time_us_ = -1;
+                        noise_covariance_matrixf_.clear();
+                    }
+                    else
+                    {
+                        GADGET_MSG("Stored noise dependency is found : " << full_name_stored_noise_dependency_);
+                        GADGET_MSG("Stored noise dwell time in us is " << noise_dwell_time_us_);
+                        GADGET_MSG("Stored noise channel number is " << noise_covariance_matrixf_.get_size(0));
+                        use_stored_noise_prewhitener_ = true;
+                    }
+                }
+            }
+        }
+
+        // limit the number of threads used to be 1
+#ifdef USE_OMP
+        omp_set_num_threads(1);
+        GADGET_MSG("NoiseAdjustGadget:omp_set_num_threads(1) ... ");
+#endif // USE_OMP
+
+        return GADGET_OK;
+    }
+
+    std::string NoiseAdjustGadget::generateFullNameWhenLoadNoiseDependency(const std::string& measurement_id_of_noise)
+    {
+        // find the scan prefix
+        std::string measurementStr = measurement_id_;
+        size_t ind  = measurement_id_.find_last_of ("_");
+        if ( ind != std::string::npos )
+        {
+            measurementStr = measurement_id_.substr(0, ind);
+            measurementStr.append("_");
+            measurementStr.append(measurement_id_of_noise);
+        }
+
+        std::string full_name_loaded_noise_dependency;
+
+        full_name_loaded_noise_dependency = noise_dependency_folder_;
+        full_name_loaded_noise_dependency.append("/");
+        full_name_loaded_noise_dependency.append(noise_dependency_prefix_);
+        full_name_loaded_noise_dependency.append("_");
+        full_name_loaded_noise_dependency.append(measurementStr);
+
+        return full_name_loaded_noise_dependency;
+    }
+
+    std::string NoiseAdjustGadget::generateFullNameWhenStoreNoiseDependency(const std::string& measurement_id)
+    {
+        std::string full_name_stored_noise_dependency;
+
+        full_name_stored_noise_dependency = noise_dependency_folder_;
+        full_name_stored_noise_dependency.append("/");
+        full_name_stored_noise_dependency.append(noise_dependency_prefix_);
+        full_name_stored_noise_dependency.append("_");
+        full_name_stored_noise_dependency.append(measurement_id);
+
+        return full_name_stored_noise_dependency;
+    }
+
+    bool NoiseAdjustGadget::loadNoisePrewhitener(float& noise_dwell_time_us, hoNDArray< ValueType >& noise_covariance_matrixf)
+    {
+        std::ifstream infile;
+        infile.open (full_name_stored_noise_dependency_.c_str(), std::ios::in|std::ios::binary);
+
+        if (infile.good() )
+        {
+            infile.read( reinterpret_cast<char*>(&noise_dwell_time_us), sizeof(float));
+
+            size_t len;
+            infile.read( reinterpret_cast<char*>(&len), sizeof(size_t));
+
+            char* buf = new char[len];
+            if ( buf == NULL ) return false;
+
+            infile.read(buf, len);
+
+            if ( !noise_covariance_matrixf.deserialize(buf, len) )
+            {
+                delete [] buf;
+                return false;
+            }
+
+            delete [] buf;
+            infile.close();
+        }
+        else
+        {
+            GADGET_ERROR_MSG("Noise prewhitener file is not good for writing");
+            return false;
+        }
+
+        return true;
+    }
+
+    bool NoiseAdjustGadget::saveNoisePrewhitener(const std::string& full_name_stored_noise_dependency, float& noise_dwell_time_us, hoNDArray< ValueType >& noise_covariance_matrixf)
+    {
+        char* buf = NULL;
+        size_t len(0);
+        if ( !noise_covariance_matrixf.serialize(buf, len) )
+        {
+            GADGET_ERROR_MSG("Noise prewhitener serialization failed ...");
+            return false;
+        }
+
+        std::ofstream outfile;
+        outfile.open (full_name_stored_noise_dependency.c_str(), std::ios::out|std::ios::binary);
+
+        if (outfile.good())
+        {
+            GADGET_MSG("write out the noise dependency file : " << full_name_stored_noise_dependency);
+            outfile.write( reinterpret_cast<char*>(&noise_dwell_time_us), sizeof(float));
+            outfile.write( reinterpret_cast<char*>(&len), sizeof(size_t));
+            outfile.write(buf, len);
+            outfile.close();
+
+            // set the permission for the noise file to be rewritable
+            #ifndef _WIN32
+                int res = chmod(full_name_stored_noise_dependency.c_str(), S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IWGRP|S_IXGRP|S_IROTH|S_IWOTH|S_IXOTH);
+                if ( res != 0 )
+                {
+                    GADGET_ERROR_MSG("Changing noise prewhitener file permission failed ...");
+                }
+            #endif // _WIN32
+        }
+        else
+        {
+            delete [] buf;
+            GADGET_ERROR_MSG("Noise prewhitener file is not good for writing");
+            return false;
+        }
+
+        delete [] buf;
+        return true;
+    }
+
+    void NoiseAdjustGadget::computeNoisePrewhitener(bool savePrewhitener)
+    {
+        GADGET_START_TIMING_CONDITION(gt_timer_, "compute noise prewhitener ... ", performTiming_);
+
+        if ( noise_dwell_time_us_ > 0 )
+        {
+            if (number_of_noise_samples_ > 1)
+            {
+                GADGET_MSG("Noise dwell time: " << noise_dwell_time_us_);
+                GADGET_MSG("receiver_noise_bandwidth: " << receiver_noise_bandwidth_);
+
+                if (!noise_decorrelation_calculated_)
+                {
+                    GADGET_MSG("Calculating noise decorrelation");
+
+                    // Armadillo can best do its template magic when we concatenate all the operations...
+                    // 1. scale for number of samples
+                    // 2. Cholesky decomposition
+                    // 3. Invert lower triangular
+
+                    arma::cx_fmat noise_covf = as_arma_matrix(&noise_covariance_matrixf_);
+
+                    {
+                        noise_covf = arma::inv(arma::trimatu(arma::chol(noise_covf/( (float)number_of_noise_samples_-1))));
+                    }
+
+                    // save the noise prewhitener
+                    if ( savePrewhitener )
+                    {
+                        std::string fullNameOfStoredNoiseDependency;
+                        fullNameOfStoredNoiseDependency = this->generateFullNameWhenStoreNoiseDependency(measurement_id_);
+                        this->saveNoisePrewhitener(fullNameOfStoredNoiseDependency, noise_dwell_time_us_, noise_covariance_matrixf_);
+                    }
+
+                    noise_decorrelation_calculated_ = true;
+                }
+            }
+        }
+
+        GADGET_STOP_TIMING_CONDITION(gt_timer_, performTiming_);
+    }
+
+    int NoiseAdjustGadget::process(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1, GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2)
+    {
+        // GADGET_START_TIMING_CONDITION(gt_timer_, "in noise process ... ", performTiming_);
+
+        bool is_scc_correction = m1->getObjectPtr()->isFlagSet(ISMRMRD::ISMRMRD_ACQ_IS_SURFACECOILCORRECTIONSCAN_DATA);
+        bool is_noise = m1->getObjectPtr()->isFlagSet(ISMRMRD::ISMRMRD_ACQ_IS_NOISE_MEASUREMENT);
+
+        unsigned int channels = m1->getObjectPtr()->active_channels;
+        unsigned int samples = m1->getObjectPtr()->number_of_samples;
+
+        if ( measurement_id_.empty() )
+        {
+            unsigned int muid = m1->getObjectPtr()->measurement_uid;
+            std::ostringstream ostr;
+            ostr << muid;
+            measurement_id_ = ostr.str();
+        }
+
+        if ( !perform_noise_adjust_ )
+        {
+            if ( !is_noise )
+            {
+                if (this->next()->putq(m1) == -1)
+                {
+                    ACE_ERROR_RETURN( (LM_ERROR,
+                        ACE_TEXT("%p\n"),
+                        ACE_TEXT("NoiseAdjustGadget::process, passing data on to next gadget")),
+                        -1);
+                }
+            }
+            else
+            {
+                m1->release();
+            }
+
+            return GADGET_OK;
+        }
+
+        if ( use_stored_noise_prewhitener_ )
+        {
+            if ( !is_noise )
+            {
+                acquisition_dwell_time_us_ = m1->getObjectPtr()->sample_time_us;
+                if (!is_configured_)
+                {
+                    if ((noise_dwell_time_us_ == 0.0f) || (acquisition_dwell_time_us_ == 0.0f))
+                    {
+                        noise_bw_scale_factor_ = 1.0f;
+                    }
+                    else
+                    {
+                        noise_bw_scale_factor_ = (float)std::sqrt(2.0*acquisition_dwell_time_us_/noise_dwell_time_us_*receiver_noise_bandwidth_);
+                    }
+
+                    GADGET_MSG("Noise dwell time: " << noise_dwell_time_us_);
+                    GADGET_MSG("Acquisition dwell time:" << acquisition_dwell_time_us_);
+                    GADGET_MSG("receiver_noise_bandwidth: " << receiver_noise_bandwidth_);
+                    GADGET_MSG("noise_bw_scale_factor: " << noise_bw_scale_factor_);
+                    is_configured_ = true;
+                }
+
+                if ( !noise_decorrelation_calculated_ )
+                {
+                    arma::cx_fmat noise_covf = as_arma_matrix(&noise_covariance_matrixf_);
+                    noise_covf *= noise_bw_scale_factor_;
+                    noise_decorrelation_calculated_ = true;
+                }
+
+                if (noise_decorrelation_calculated_)
+                {
+                    // GADGET_START_TIMING_CONDITION(gt_timer_, "apply noise prewhitener ... ", performTiming_);
+
+                    if ( data_prewhitened_.get_size(0)!=m2->getObjectPtr()->get_size(0) 
+                        || data_prewhitened_.get_size(1)!=m2->getObjectPtr()->get_size(1) )
+                    {
+                        data_prewhitened_.create(m2->getObjectPtr()->get_dimensions());
+                    }
+
+                    memcpy(data_prewhitened_.begin(), m2->getObjectPtr()->begin(), m2->getObjectPtr()->get_number_of_bytes());
+
+                    gemm(*m2->getObjectPtr(), data_prewhitened_, noise_covariance_matrixf_);
+
+                    // GADGET_STOP_TIMING_CONDITION(gt_timer_, performTiming_);
+                }
+
+                if (this->next()->putq(m1) == -1)
+                {
+                    ACE_ERROR_RETURN( (LM_ERROR,
+                        ACE_TEXT("%p\n"),
+                        ACE_TEXT("NoiseAdjustGadget::process, passing data on to next gadget")),
+                        -1);
+                }
+            }
+            else
+            {
+                m1->release();
+            }
+        }
+        else
+        {
+            if ( is_noise )
+            {
+                // this noise can be from a noise scan or it can be from the built-in noise
+                if ( number_of_noise_samples_per_acquisition_ == 0 )
+                {
+                    number_of_noise_samples_per_acquisition_ = samples;
+                }
+
+                if ( noise_dwell_time_us_ < 0 )
+                {
+                    if ( !is_scc_correction && number_of_noise_samples_per_acquisition_>0 )
+                    {
+                        noise_dwell_time_us_ = m1->getObjectPtr()->sample_time_us;
+                    }
+                    else
+                    {
+                        noise_dwell_time_us_ = noise_dwell_time_us_preset_;
+                    }
+                }
+
+                //If noise covariance matrix is not allocated
+                if (noise_covariance_matrixf_.get_number_of_elements() != channels*channels)
+                {
+                    std::vector<size_t> dims(2, channels);
+
+                    try
+                    {
+                        noise_covariance_matrixf_.create(&dims);
+                        noise_covariance_matrixf_once_.create(&dims);
+                    }
+                    catch (std::runtime_error& err)
+                    {
+                        GADGET_DEBUG_EXCEPTION(err, "Unable to allocate storage for noise covariance matrix\n" );
+                        return GADGET_FAIL;
+                    }
+                    Gadgetron::clear(noise_covariance_matrixf_);
+                    Gadgetron::clear(noise_covariance_matrixf_once_);
+                    number_of_noise_samples_ = 0;
+                }
+
+                std::complex<float>* cc_ptr = noise_covariance_matrixf_.get_data_ptr();
+                std::complex<float>* data_ptr = m2->getObjectPtr()->get_data_ptr();
+
+                readout_ = *m2->getObjectPtr();
+                gemm(noise_covariance_matrixf_once_, readout_, true, *m2->getObjectPtr(), false);
+                Gadgetron::add(noise_covariance_matrixf_once_, noise_covariance_matrixf_, noise_covariance_matrixf_);
+
+                number_of_noise_samples_ += samples;
+                m1->release();
+            }
+            else
+            {
+                if ( noise_dwell_time_us_ > 0 )
+                {
+                    acquisition_dwell_time_us_ = m1->getObjectPtr()->sample_time_us;
+                    if (!is_configured_)
+                    {
+                        if ((noise_dwell_time_us_ == 0.0f) || (acquisition_dwell_time_us_ == 0.0f))
+                        {
+                            noise_bw_scale_factor_ = 1.0f;
+                        }
+                        else
+                        {
+                            noise_bw_scale_factor_ = (float)std::sqrt(2.0*acquisition_dwell_time_us_/noise_dwell_time_us_*receiver_noise_bandwidth_);
+                        }
+
+                        GADGET_MSG("Noise dwell time: " << noise_dwell_time_us_);
+                        GADGET_MSG("Acquisition dwell time:" << acquisition_dwell_time_us_);
+                        GADGET_MSG("receiver_noise_bandwidth: " << receiver_noise_bandwidth_);
+                        GADGET_MSG("noise_bw_scale_factor: " << noise_bw_scale_factor_);
+                        is_configured_ = true;
+                    }
+
+                    if (number_of_noise_samples_ > 0)
+                    {
+                        if (!noise_decorrelation_calculated_)
+                        {
+                            if ( is_scc_correction )
+                            {
+                                this->computeNoisePrewhitener(true);
+                            }
+                            else
+                            {
+                                this->computeNoisePrewhitener(false);
+                            }
+
+                            // apply the scaling
+                            arma::cx_fmat noise_covf = as_arma_matrix(&noise_covariance_matrixf_);
+                            noise_covf *= noise_bw_scale_factor_;
+                        }
+                        else
+                        {
+                            if ( m2->getObjectPtr()->get_size(1) == noise_covariance_matrixf_.get_size(0) )
+                            {
+                                if ( data_prewhitened_.get_size(0)!=m2->getObjectPtr()->get_size(0) 
+                                    || data_prewhitened_.get_size(1)!=m2->getObjectPtr()->get_size(1) )
+                                {
+                                    data_prewhitened_.create(m2->getObjectPtr()->get_dimensions());
+                                }
+
+                                memcpy(data_prewhitened_.begin(), m2->getObjectPtr()->begin(), m2->getObjectPtr()->get_number_of_bytes());
+
+                                gemm(*m2->getObjectPtr(), data_prewhitened_, noise_covariance_matrixf_);
+                            }
+                        }
+                    }
+                }
+
+                //It is enough to put the first one, since they are linked
+                if (this->next()->putq(m1) == -1)
+                {
+                    ACE_ERROR_RETURN( (LM_ERROR,
+                        ACE_TEXT("%p\n"),
+                        ACE_TEXT("NoiseAdjustGadget::process, passing data on to next gadget")),
+                        -1);
+                }
+            }
+        }
+
+        // GADGET_STOP_TIMING_CONDITION(gt_timer_, performTiming_);
+
+        return GADGET_OK;
+    }
+
+    int NoiseAdjustGadget::close(unsigned long flags)
+    {
+        if ( BaseClass::close(flags) != GADGET_OK ) return GADGET_FAIL;
+
+        if ( !computed_in_close_ )
+        {
+            computed_in_close_ = true;
+            if ( !this->use_stored_noise_prewhitener_ )
+            {
+                if ( noise_dwell_time_us_ < 0 ) noise_dwell_time_us_ = noise_dwell_time_us_preset_; // this scan is a noise measurement
+                this->computeNoisePrewhitener(true);
+            }
+        }
+
+        return GADGET_OK;
+    }
+
+    GADGET_FACTORY_DECLARE(NoiseAdjustGadget)
+
 } // namespace Gadgetron
diff --git a/gadgets/mri_core/NoiseAdjustGadget.h b/gadgets/mri_core/NoiseAdjustGadget.h
index ac72125..0ab312a 100644
--- a/gadgets/mri_core/NoiseAdjustGadget.h
+++ b/gadgets/mri_core/NoiseAdjustGadget.h
@@ -3,8 +3,9 @@
 #include "Gadget.h"
 #include "hoNDArray.h"
 #include "gadgetron_mricore_export.h"
+#include "GadgetronTimer.h"
 
-#include <ismrmrd.h>
+#include <ismrmrd/ismrmrd.h>
 #include <complex>
 
 namespace Gadgetron {
@@ -15,25 +16,63 @@ namespace Gadgetron {
     public:
         GADGET_DECLARE(NoiseAdjustGadget);
 
+        typedef Gadget2<ISMRMRD::AcquisitionHeader,hoNDArray< std::complex<float> > > BaseClass;
+
         typedef std::complex<float> ValueType;
         typedef std::complex<double> PerwhitenerValueType;
 
         NoiseAdjustGadget();
+        virtual ~NoiseAdjustGadget();
+
+        virtual int close(unsigned long flags);
 
     protected:
         bool noise_decorrelation_calculated_;
-        hoNDArray< PerwhitenerValueType > noise_covariance_matrix_;
         hoNDArray< ValueType > noise_covariance_matrixf_;
-        unsigned long int number_of_noise_samples_;
+        hoNDArray< ValueType > noise_covariance_matrixf_once_;
+
+        hoNDArray< ValueType > data_prewhitened_;
+
+        hoNDArray< ValueType > readout_;
+
+        unsigned long long number_of_noise_samples_;
+        unsigned long long number_of_noise_samples_per_acquisition_;
         float noise_dwell_time_us_;
         float acquisition_dwell_time_us_;
         float noise_bw_scale_factor_;
         float receiver_noise_bandwidth_;
         bool is_configured_;
-        hoNDArray< ValueType > prewhitened_buf_;
+        bool computed_in_close_;
+
+        std::string noise_dependency_folder_;
+
+        std::string noise_dependency_prefix_;
+
+        std::string patient_id_;
+        std::string study_id_;
+        std::string measurement_id_;
+        std::string measurement_id_of_noise_dependency_;
+
+        std::string full_name_stored_noise_dependency_;
+
+        float noise_dwell_time_us_preset_;
+
+        bool perform_noise_adjust_;
 
         virtual int process_config(ACE_Message_Block* mb);
         virtual int process(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1,
             GadgetContainerMessage< hoNDArray< ValueType > >* m2);
+
+        std::string generateFullNameWhenLoadNoiseDependency(const std::string& measurement_id_of_noise);
+        std::string generateFullNameWhenStoreNoiseDependency(const std::string& measurement_id);
+
+        bool use_stored_noise_prewhitener_;
+        bool loadNoisePrewhitener(float& noise_dwell_time_us, hoNDArray< ValueType >& noise_covariance_matrixf);
+        bool saveNoisePrewhitener(const std::string& full_name_stored_noise_dependency, float& noise_dwell_time_us, hoNDArray< ValueType >& noise_covariance_matrixf);
+
+        void computeNoisePrewhitener(bool savePrewhitener=true);
+
+        Gadgetron::GadgetronTimer gt_timer_;
+        bool performTiming_;
     };
 }
diff --git a/gadgets/mri_core/NoiseAdjustGadget_unoptimized.cpp b/gadgets/mri_core/NoiseAdjustGadget_unoptimized.cpp
index b0515b4..90c8d7b 100644
--- a/gadgets/mri_core/NoiseAdjustGadget_unoptimized.cpp
+++ b/gadgets/mri_core/NoiseAdjustGadget_unoptimized.cpp
@@ -1,7 +1,7 @@
 #include "NoiseAdjustGadget_unoptimized.h"
 #include "Gadgetron.h"
-#include "GadgetIsmrmrdReadWrite.h"
 #include "hoNDArray_fileio.h"
+#include "ismrmrd/xml.h"
 
 namespace Gadgetron{
 
@@ -103,15 +103,17 @@ NoiseAdjustGadget_unoptimized::NoiseAdjustGadget_unoptimized()
 
 int NoiseAdjustGadget_unoptimized::process_config(ACE_Message_Block* mb)
 {
-
-	boost::shared_ptr<ISMRMRD::ismrmrdHeader> cfg = parseIsmrmrdXMLHeader(std::string(mb->rd_ptr()));
-
-
-	receiver_noise_bandwidth_ = cfg->acquisitionSystemInformation().get().relativeReceiverNoiseBandwidth().present() ?
-								cfg->acquisitionSystemInformation().get().relativeReceiverNoiseBandwidth().get() : 1.0;
-
-
-	return GADGET_OK;
+  ISMRMRD::IsmrmrdHeader h;
+  ISMRMRD::deserialize(mb->rd_ptr(),h);
+  
+  if ( h.acquisitionSystemInformation ) {
+    receiver_noise_bandwidth_ = (float)(h.acquisitionSystemInformation->relativeReceiverNoiseBandwidth ?
+					*h.acquisitionSystemInformation->relativeReceiverNoiseBandwidth : 1.0f);
+    
+    GADGET_MSG("receiver_noise_bandwidth_ is " << receiver_noise_bandwidth_);
+  }
+  
+  return GADGET_OK;
 }
 
 int NoiseAdjustGadget_unoptimized
@@ -119,7 +121,7 @@ int NoiseAdjustGadget_unoptimized
 		GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2)
 {
 
-	bool is_noise = ISMRMRD::FlagBit(ISMRMRD::ACQ_IS_NOISE_MEASUREMENT).isSet(m1->getObjectPtr()->flags);
+	bool is_noise = m1->getObjectPtr()->isFlagSet(ISMRMRD::ISMRMRD_ACQ_IS_NOISE_MEASUREMENT);
 	unsigned int channels = m1->getObjectPtr()->active_channels;
 	unsigned int samples = m1->getObjectPtr()->number_of_samples;
 
diff --git a/gadgets/mri_core/NoiseAdjustGadget_unoptimized.h b/gadgets/mri_core/NoiseAdjustGadget_unoptimized.h
index 3bf340e..26088da 100644
--- a/gadgets/mri_core/NoiseAdjustGadget_unoptimized.h
+++ b/gadgets/mri_core/NoiseAdjustGadget_unoptimized.h
@@ -4,7 +4,7 @@
 #include "hoNDArray.h"
 #include "gadgetron_mricore_export.h"
 
-#include <ismrmrd.h>
+#include <ismrmrd/ismrmrd.h>
 #include <complex>
 
 namespace Gadgetron{
diff --git a/gadgets/mri_core/PCACoilGadget.cpp b/gadgets/mri_core/PCACoilGadget.cpp
index cb36eea..504c269 100644
--- a/gadgets/mri_core/PCACoilGadget.cpp
+++ b/gadgets/mri_core/PCACoilGadget.cpp
@@ -9,10 +9,12 @@
 #include "GadgetIsmrmrdReadWrite.h"
 #include "hoArmadillo.h"
 #include "hoNDArray_elemwise.h"
+#include "ismrmrd/xml.h"
+#include "hoNDArray_fileio.h"
 
-#ifdef HAVE_MKL
-#include "mkl_service.h"
-#endif
+#include <ace/OS_NS_stdlib.h>
+#include <boost/algorithm/string.hpp>
+#include <boost/algorithm/string/split.hpp>
 
 namespace Gadgetron {
 
@@ -20,11 +22,6 @@ namespace Gadgetron {
         : max_buffered_profiles_(100)
         , samples_to_use_(16)
     {
-        // There is a bug in the MKL SVD when running in multi-threaded mode.
-        // Set the number of threads to 1 in this gadget.
-        #ifdef HAVE_MKL
-                mkl_set_num_threads(1);
-        #endif
     }
 
     PCACoilGadget::~PCACoilGadget()
@@ -42,14 +39,58 @@ namespace Gadgetron {
 
     int PCACoilGadget::process_config(ACE_Message_Block *mb)
     {
-        return GADGET_OK;
+      ISMRMRD::IsmrmrdHeader h;
+      ISMRMRD::deserialize(mb->rd_ptr(),h);
+
+      if (h.userParameters) {
+	for (size_t i = 0; i < h.userParameters->userParameterString.size(); i++) {
+	  std::string name = h.userParameters->userParameterString[i].name;
+	  std::string value = h.userParameters->userParameterString[i].value;
+	  if (name.substr(0,5) == std::string("COIL_")) {
+	    int coil_num = std::atoi(name.substr(5,name.size()-5).c_str());
+	    channel_map_[value] = coil_num;
+	  }
+	}
+      }
+      
+      
+      boost::shared_ptr<std::string> uncomb_str = this->get_string_value("uncombined_channels_by_name");
+      std::vector<std::string> uncomb;
+      if (uncomb_str->size()) {
+	GADGET_DEBUG2("uncomb_str: %s\n",  uncomb_str->c_str());
+	boost::split(uncomb, *uncomb_str, boost::is_any_of(","));
+	for (unsigned int i = 0; i < uncomb.size(); i++) {
+	  std::string ch = boost::algorithm::trim_copy(uncomb[i]);
+	  coil_map_type_::iterator it = channel_map_.find(ch);
+	  if (it != channel_map_.end()) {
+	    unsigned int channel_id = static_cast<unsigned int>(it->second);
+	    GADGET_DEBUG2("Device channel: %s (%d)\n",  uncomb[i].c_str(), channel_id);
+	    uncombined_channels_.push_back(channel_id);
+	  }
+	}
+      }
+
+      char val[32];
+      sprintf(val,"%d",(int)uncombined_channels_.size());
+      this->set_parameter("present_uncombined_channels",val);
+
+      return GADGET_OK;
     }
 
     int PCACoilGadget::process(GadgetContainerMessage<ISMRMRD::AcquisitionHeader> *m1, GadgetContainerMessage<hoNDArray<std::complex<float> > > *m2)
     {
+      bool is_noise = m1->getObjectPtr()->isFlagSet(ISMRMRD::ISMRMRD_ACQ_IS_NOISE_MEASUREMENT);
+
+      //We should not be receiving noise here
+      if (is_noise) {
+	m1->release();
+	return GADGET_OK;
+      }
+
+
         std::map<int, bool>::iterator it;
         int location = m1->getObjectPtr()->idx.slice;
-        bool is_last_scan_in_slice = (ISMRMRD::FlagBit(ISMRMRD::ACQ_LAST_IN_SLICE).isSet(m1->getObjectPtr()->flags));
+        bool is_last_scan_in_slice = m1->getObjectPtr()->isFlagSet(ISMRMRD::ISMRMRD_ACQ_LAST_IN_SLICE);
         int samples_per_profile = m1->getObjectPtr()->number_of_samples;
         int channels = m1->getObjectPtr()->active_channels;
 
@@ -124,16 +165,21 @@ namespace Gadgetron {
 
                     std::complex<float>* d = m_tmp->getObjectPtr()->get_data_ptr();
 
-                    for (unsigned s = 0; s < samples_to_use; s++) {
-                        for (size_t c = 0; c < channels; c++) {
-                            //We use the conjugate of the data so that the output VT of the SVD is the actual PCA coefficient matrix
-                            A_ptr[c + sample_counter*channels] = d[c*samples_per_profile + data_offset + s];
-                            means_ptr[c] += d[c*samples_per_profile + data_offset + s];
-                        }
-
-                        sample_counter++;
-                        //GADGET_DEBUG2("Sample counter = %d/%d\n", sample_counter, total_samples);
-                    }
+		      for (unsigned s = 0; s < samples_to_use; s++) {
+			for (size_t c = 0; c < channels; c++) {
+			  bool uncombined_channel = std::find(uncombined_channels_.begin(),uncombined_channels_.end(), c) != uncombined_channels_.end();
+			  //We use the conjugate of the data so that the output VT of the SVD is the actual PCA coefficient matrix
+			  if (uncombined_channel) {
+			    A_ptr[c + sample_counter*channels] = std::complex<float>(0.0,0.0);
+			  } else {
+			    A_ptr[c + sample_counter*channels] = d[c*samples_per_profile + data_offset + s];
+			    means_ptr[c] += d[c*samples_per_profile + data_offset + s];
+			  }
+			}
+			
+			sample_counter++;
+			//GADGET_DEBUG2("Sample counter = %d/%d\n", sample_counter, total_samples);
+		      }
                 }
 
                 //Subtract off mean
@@ -150,7 +196,7 @@ namespace Gadgetron {
                 VT_dims.push_back(channels);
                 pca_coefficients_[location] = new hoNDArray< std::complex<float> >;
                 hoNDArray< std::complex<float> >* VT = pca_coefficients_[location];
-
+		
                 try {VT->create(&VT_dims);}
                 catch (std::runtime_error& err){
                     GADGET_DEBUG_EXCEPTION(err,"Failed to create array for VT\n");
@@ -162,10 +208,55 @@ namespace Gadgetron {
                 arma::cx_fmat Um;
                 arma::fvec Sv;
 
+
                 if( !arma::svd_econ(Um,Sv,Vm,Am.st(),'r') ){
                     GADGET_DEBUG1("Failed to compute SVD\n");
                     return GADGET_FAIL;
                 }
+		
+		//We will create a new matrix that explicitly preserves the uncombined channels
+		if (uncombined_channels_.size()) {
+		  hoNDArray< std::complex<float> >* VT_new = new hoNDArray< std::complex<float> >;
+		  try {VT_new->create(&VT_dims);}
+		  catch (std::runtime_error& err){
+                    GADGET_DEBUG_EXCEPTION(err,"Failed to create array for VT (new)\n");
+                    return GADGET_FAIL;
+		  }
+
+		  arma::cx_fmat Vm_new = as_arma_matrix(VT_new);
+
+		  size_t uncomb_count = 0;
+		  size_t comb_count = 0;
+		  for (size_t c = 0; c < Vm_new.n_cols; c++) {
+		    bool uncombined_channel = std::find(uncombined_channels_.begin(),uncombined_channels_.end(), c) != uncombined_channels_.end();
+		    if (uncombined_channel) {
+		      for (size_t r = 0; r < Vm_new.n_rows; r++) {
+			if (r == c) {
+			  Vm_new(r,uncomb_count) = 1;
+			} else {
+			  Vm_new(r,uncomb_count) = 0;
+			}
+		      }
+		      uncomb_count++;
+		    } else {
+		      for (size_t r = 0; r < Vm_new.n_rows; r++) { 
+			bool uncombined_channel_row = std::find(uncombined_channels_.begin(),uncombined_channels_.end(), r) != uncombined_channels_.end();
+			if (uncombined_channel_row) {
+			  Vm_new(r,comb_count+uncombined_channels_.size()) = 0;
+			} else {
+			  Vm_new(r,comb_count+uncombined_channels_.size()) = Vm(r,c);
+			}
+		      }
+		      comb_count++;
+		    }
+		  } 
+		  GADGET_DEBUG2("uncomb_count = %d, comb_count = %d\n", uncomb_count, comb_count);
+
+		  //Delete the old one and set the new one
+		  delete pca_coefficients_[location];
+		  pca_coefficients_[location] = VT_new;
+		}
+
 
                 //Switch off buffering for this slice
                 buffering_mode_[location] = false;
diff --git a/gadgets/mri_core/PCACoilGadget.h b/gadgets/mri_core/PCACoilGadget.h
index 63fcd62..93cf044 100644
--- a/gadgets/mri_core/PCACoilGadget.h
+++ b/gadgets/mri_core/PCACoilGadget.h
@@ -4,7 +4,7 @@
 #include "gadgetron_mricore_export.h"
 #include "Gadget.h"
 #include "hoNDArray.h"
-#include "ismrmrd.h"
+#include "ismrmrd/ismrmrd.h"
 
 #include <complex>
 #include <map>
@@ -27,6 +27,11 @@ namespace Gadgetron {
 			GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2);
 
   private:
+    typedef std::map< std::string, int > coil_map_type_;
+    coil_map_type_ channel_map_;
+    std::vector<unsigned int> uncombined_channels_;
+
+    
     //Map containing buffers, one for each location
     std::map< int, std::vector< ACE_Message_Block* > > buffer_;
 
diff --git a/gadgets/mri_core/PartialFourierAdjustROGadget.cpp b/gadgets/mri_core/PartialFourierAdjustROGadget.cpp
index 8d6d53b..6c5940e 100644
--- a/gadgets/mri_core/PartialFourierAdjustROGadget.cpp
+++ b/gadgets/mri_core/PartialFourierAdjustROGadget.cpp
@@ -1,6 +1,5 @@
-
 #include "PartialFourierAdjustROGadget.h"
-#include "GadgetIsmrmrdReadWrite.h"
+#include "ismrmrd/xml.h"
 
 namespace Gadgetron
 {
@@ -12,25 +11,19 @@ PartialFourierAdjustROGadget::PartialFourierAdjustROGadget() : maxRO_(0)
 
 int PartialFourierAdjustROGadget::process_config(ACE_Message_Block* mb)
 {
-    boost::shared_ptr<ISMRMRD::ismrmrdHeader> cfg = parseIsmrmrdXMLHeader(std::string(mb->rd_ptr()));
-
-    ISMRMRD::ismrmrdHeader::encoding_sequence e_seq = cfg->encoding();
-    if (e_seq.size() != 1)
-    {
-        GADGET_DEBUG2("Number of encoding spaces: %d\n", e_seq.size());
-        GADGET_DEBUG1("This simple partial fourier gadget only supports one encoding space\n");
-        return GADGET_FAIL;
-    }
-
-    ISMRMRD::encodingSpaceType e_space = (*e_seq.begin()).encodedSpace();
-    ISMRMRD::encodingSpaceType r_space = (*e_seq.begin()).reconSpace();
-    ISMRMRD::encodingLimitsType e_limits = (*e_seq.begin()).encodingLimits();
-
-    maxRO_ = e_space.matrixSize().x();
-
-    GADGET_MSG("max RO : " << maxRO_);
-
-    return GADGET_OK;
+  ISMRMRD::IsmrmrdHeader h;
+  deserialize(mb->rd_ptr(),h);
+
+  if (h.encoding.size() != 1) {
+    GADGET_DEBUG2("Number of encoding spaces: %d\n", h.encoding.size());
+    GADGET_DEBUG1("This partial fourier gadget only supports one encoding space\n");
+    return GADGET_FAIL;
+  }
+
+  ISMRMRD::EncodingSpaceType e_space = h.encoding[0].encodedSpace;
+  maxRO_ = e_space.matrixSize.x;
+  GADGET_MSG("max RO : " << maxRO_);
+  return GADGET_OK;
 }
 
 int addPrePostZeros(size_t centre_column, size_t samples)
@@ -61,7 +54,7 @@ int PartialFourierAdjustROGadget
         GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2)
 {
 
-    bool is_noise = ISMRMRD::FlagBit(ISMRMRD::ACQ_IS_NOISE_MEASUREMENT).isSet(m1->getObjectPtr()->flags);
+    bool is_noise = ISMRMRD::FlagBit(ISMRMRD::ISMRMRD_ACQ_IS_NOISE_MEASUREMENT).isSet(m1->getObjectPtr()->flags);
     size_t channels = m1->getObjectPtr()->active_channels;
     size_t samples = m1->getObjectPtr()->number_of_samples;
     size_t centre_column = m1->getObjectPtr()->center_sample;
diff --git a/gadgets/mri_core/PartialFourierAdjustROGadget.h b/gadgets/mri_core/PartialFourierAdjustROGadget.h
index 7477fd6..a2b873c 100644
--- a/gadgets/mri_core/PartialFourierAdjustROGadget.h
+++ b/gadgets/mri_core/PartialFourierAdjustROGadget.h
@@ -3,7 +3,7 @@
 
 #include "Gadget.h"
 #include "hoNDArray.h"
-#include "ismrmrd.h"
+#include "ismrmrd/ismrmrd.h"
 #include "gadgetron_mricore_export.h"
 
 namespace Gadgetron
@@ -16,8 +16,6 @@ class EXPORTGADGETSMRICORE PartialFourierAdjustROGadget : public Gadgetron::Gadg
 {
 public:
 
-    GADGET_DECLARE(PartialFourierAdjustROGadget);
-
     PartialFourierAdjustROGadget();
 
 protected:
diff --git a/gadgets/mri_core/PhysioInterpolationGadget.cpp b/gadgets/mri_core/PhysioInterpolationGadget.cpp
index d93de2a..e470952 100644
--- a/gadgets/mri_core/PhysioInterpolationGadget.cpp
+++ b/gadgets/mri_core/PhysioInterpolationGadget.cpp
@@ -1,8 +1,11 @@
 #include "PhysioInterpolationGadget.h"
 #include "Gadgetron.h"
-#include "GadgetIsmrmrdReadWrite.h"
 #include "GadgetronTimer.h"
 #include "Spline.h"
+#include "GtPlusDefinition.h"
+#include "ismrmrd/meta.h"
+#include "hoNDBSpline.h"
+#include "ismrmrd/xml.h"
 
 #include <numeric>
 #ifdef USE_OMP
@@ -11,249 +14,403 @@
 
 namespace Gadgetron{
 
-  PhysioInterpolationGadget::PhysioInterpolationGadget() 
-    : phys_time_index_(0)
-    , phases_to_reconstruct_(30)
-    , buffer_(ACE_Message_Queue_Base::DEFAULT_HWM * 10, ACE_Message_Queue_Base::DEFAULT_LWM * 10)
-  {
-    set_parameter(std::string("physiology_time_index").c_str(), "0");
-    set_parameter(std::string("mode").c_str(), "0");
-    set_parameter(std::string("phases").c_str(), "30");
-  }
-
-  PhysioInterpolationGadget::~PhysioInterpolationGadget() {}
-
-  int PhysioInterpolationGadget::process_config(ACE_Message_Block* mb)
-  {
-    phys_time_index_ = get_int_value("physiology_time_index");
-    phases_to_reconstruct_ = get_int_value("phases");
-    mode_ = get_int_value("mode");
-    return GADGET_OK;
-  }
-
-  int PhysioInterpolationGadget::close(unsigned long flags) {
-    
-    int ret = Gadget::close(flags);
-
-    GADGET_DEBUG1("PhysioInterpolationGadget::close...\n");
-
-    GADGET_DEBUG2("Number of items on Q: %d\n", buffer_.message_count());
-
-    if (time_stamps_.size() != buffer_.message_count()) {
-      GADGET_DEBUG1("Inconsistent number of messages and time stamps\n");
-      buffer_.flush();
-      return GADGET_FAIL;
-    }
-    
-    float previous = -100.0;
-    float sum_int  = 0.0; 
-    std::vector<float> intervals;
-    float int_count = 0.0;
-    std::vector<size_t> cycle_starts;
-    for (size_t i = 0; i < time_stamps_.size(); i++) {
-      //GADGET_DEBUG2("Time %d, %f\n", i, time_stamps_[i]);
-      if (time_stamps_[i] < previous) {
-	cycle_starts.push_back(i);
-      } else if (i > 0 ) {
-	sum_int += time_stamps_[i]-time_stamps_[i-1];
-	intervals.push_back(time_stamps_[i]-time_stamps_[i-1]);
-	int_count += 1.0;
-      }
-      previous = time_stamps_[i];
+    PhysioInterpolationGadget::PhysioInterpolationGadget() 
+        : phys_time_index_(0)
+        , phases_to_reconstruct_(30)
+        , image_with_attrib_(false)
+        , first_beat_on_trigger_(false)
+        , interp_method_("Spline")
+    {
+        set_parameter(std::string("physiology_time_index").c_str(), "0");
+        set_parameter(std::string("mode").c_str(), "0");
+        set_parameter(std::string("phases").c_str(), "30");
     }
 
-    std::sort(intervals.begin(),intervals.end());
+    PhysioInterpolationGadget::~PhysioInterpolationGadget() {}
 
-    float mean_interval = sum_int/int_count;
-    float median_interval = intervals[(intervals.size()>>1)];
+    int PhysioInterpolationGadget::process_config(ACE_Message_Block* mb)
+    {
+        phys_time_index_ = get_int_value("physiology_time_index");
+        phases_to_reconstruct_ = get_int_value("phases");
+        mode_ = get_int_value("mode");
+        first_beat_on_trigger_ = get_bool_value("first_beat_on_trigger");
 
-    float average_cycle_length = 0.0;
-    std::vector<float> cycle_lengths;
-    float count = 0;
-    for (size_t i = 1; i < cycle_starts.size(); i++) {
-      float clength = time_stamps_[cycle_starts[i]-1] + median_interval - time_stamps_[cycle_starts[i]];
-      //GADGET_DEBUG2("clength: %f\n", clength);
-      cycle_lengths.push_back(clength);
-    }
+        ISMRMRD::IsmrmrdHeader h;
+        ISMRMRD::deserialize(mb->rd_ptr(),h);
 
-    //GADGET_DEBUG2("Cycle starts: %d, cycle_lengths: %d\n", cycle_starts.size(), cycle_lengths.size());
-    //for (unsigned int i = 0; i < cycle_starts.size(); i++) {
-    //  GADGET_DEBUG2("\t%d,%f\n",cycle_starts[i], cycle_lengths[i]);
-    //} 
+        boost::shared_ptr<std::string> str = get_string_value("interp_method");
+        interp_method_ = *str;
+        if ( interp_method_.empty() ) interp_method_ = "Spline";
 
-    std::sort(cycle_lengths.begin(),cycle_lengths.end());
-    float mean_cycle_length = std::accumulate(cycle_lengths.begin(), cycle_lengths.end(), 0.0)/cycle_lengths.size();
-    float median_cycle_length = cycle_lengths[(cycle_lengths.size()>>1)];
+        if (h.encoding.size() == 0) {
+            GADGET_DEBUG1("Missing encoding section");
+            return GADGET_FAIL;
+        }
 
-    GADGET_DEBUG2("We have %d full cyles, first one starting at %d\n", cycle_starts.size()-1, cycle_starts[0]);
-    GADGET_DEBUG2("Mean/Median frame width %f/%f\n", mean_interval,median_interval);
-    GADGET_DEBUG2("Mean/Median cycle_length %f/%f\n", mean_cycle_length,median_cycle_length);
+        ISMRMRD::EncodingLimits e_limits = h.encoding[0].encodingLimits;
+        slc_limit_ = e_limits.slice ? e_limits.slice->maximum+1 : 1; 
 
-    //Correct the first cycle assuming it is of median length:
-    float first_cycle_offset = (median_cycle_length-median_interval)+time_stamps_[cycle_starts[0]]-time_stamps_[cycle_starts[0]-1];
-    for (size_t i = 0; i < cycle_starts[0]; i++) {
-      time_stamps_[i] += first_cycle_offset;
-    }
+        buffer_.resize(slc_limit_);
 
-    //Calculate relative time stamps
-    size_t current_cycle = 0;
-    std::vector<float> relative_cycle_time;
-
-    //Make sure we have cycle lengths for all the cycles we have covered
-    cycle_lengths.insert(cycle_lengths.begin(),median_cycle_length);
-    cycle_lengths.push_back(median_cycle_length);
-
-    for (size_t i = 0; i < time_stamps_.size(); i++) {
-      if ((i >= cycle_starts[current_cycle]) && (current_cycle < cycle_starts.size())) {
-      //GADGET_DEBUG2("Incrementing current_cycle, %d,%d\n",i,cycle_starts[current_cycle]);
-    current_cycle++;
-      }
-      relative_cycle_time.push_back(time_stamps_[i]/cycle_lengths[current_cycle] + current_cycle);
-	//GADGET_DEBUG2("Corrected time stamps: %d, %f  (%d)\n",i,relative_cycle_time[i],current_cycle);
-    }
-    
-    //Make a temporary list of all the data pointers from the Q
-    std::vector< ISMRMRD::ImageHeader* > hptrs;
-    std::vector< hoNDArray< std::complex<float> > * > aptrs;
-    
-    ACE_Message_Queue<ACE_MT_SYNCH>::ITERATOR it(buffer_);
-    for (ACE_Message_Block* entry = 0;
-	 it.next (entry) != 0;
-         it.advance ()) 
-      {
-	GadgetContainerMessage< ISMRMRD::ImageHeader >* tmpm1 =
-	  AsContainerMessage< ISMRMRD::ImageHeader >(entry);
-
-	GadgetContainerMessage< hoNDArray< std::complex<float> > > * tmpm2 = 
-	  AsContainerMessage< hoNDArray< std::complex<float> >  >(entry->cont());
-	
-	if (!tmpm1 || !tmpm2) {
-	  GADGET_DEBUG1("Failed to cast data on Q, bailing out\n");
-	  buffer_.flush();
-	  return GADGET_FAIL;
-	}
-	hptrs.push_back(tmpm1->getObjectPtr());
-	aptrs.push_back(tmpm2->getObjectPtr());	
-      }
-
-    //Let's figure out which time points we would like to interpolate on:
-    ///TODO: Deal with mode 1 and other future modes, we are only implementing mode 0 at the moment
-    float phase_interval = 1.0f/static_cast<float>(phases_to_reconstruct_);
-    float max_time = floor(relative_cycle_time[relative_cycle_time.size()-1]);
-    std::vector<float> recon_cycle_time;
-    for (float t=1.0;t<(max_time-0.001);t+=phase_interval) {
-      recon_cycle_time.push_back(t);
-    }
-    
-
-    //Now we can loop over each pixel and estimate the new frames, but first we have to have somewhere to put the data
-    std::vector< GadgetContainerMessage< ISMRMRD::ImageHeader >* > out_heads;
-    std::vector< GadgetContainerMessage< hoNDArray< std::complex<float> > > * > out_data;
-    
-    for (size_t i = 0; i < recon_cycle_time.size(); i++) {
-      GadgetContainerMessage<ISMRMRD::ImageHeader>* tmpm1 = new GadgetContainerMessage<ISMRMRD::ImageHeader>;
-      GadgetContainerMessage< hoNDArray< std::complex<float> > >* tmpm2 = new GadgetContainerMessage< hoNDArray< std::complex<float> > >;
-      
-      tmpm1->cont(tmpm2);
-
-      (*tmpm1->getObjectPtr()) = (*hptrs[0]);
-      tmpm2->getObjectPtr()->create(aptrs[0]->get_dimensions());
-
-      out_heads.push_back(tmpm1);
-      out_data.push_back(tmpm2);
-
-      unsigned short current_cycle = static_cast<unsigned short>(floor(recon_cycle_time[i] + 0.0001));
-      unsigned short current_phase = static_cast<unsigned short>((recon_cycle_time[i]+0.0001-current_cycle)/(1.0/static_cast<float>(phases_to_reconstruct_)) + 0.0001);
-
-      tmpm1->getObjectPtr()->physiology_time_stamp[phys_time_index_] = static_cast<unsigned>(floor((recon_cycle_time[i]+0.0001-current_cycle)*cycle_lengths[current_cycle])); 
-      tmpm1->getObjectPtr()->phase = current_phase;
-      tmpm1->getObjectPtr()->image_index = current_phase+1;
-      tmpm1->getObjectPtr()->image_series_index = current_cycle*10;
-      
-      /*
-      GADGET_DEBUG2("new_time: %f, %d, time_stamp: %d, phase: %d, index: %d, series: %d\n",
-		    recon_cycle_time[i],
-		    current_cycle,
-		    tmpm1->getObjectPtr()->physiology_time_stamp[phys_time_index_],
-		    tmpm1->getObjectPtr()->phase,
-		    tmpm1->getObjectPtr()->image_index,
-		    tmpm1->getObjectPtr()->image_series_index);
-
-      */
-    }
+        size_t slc;
+        for ( slc=0; slc<slc_limit_; slc++ )
+        {
+            buffer_[slc] = boost::shared_ptr< ACE_Message_Queue<ACE_MT_SYNCH> >(new ACE_Message_Queue<ACE_MT_SYNCH>(ACE_Message_Queue_Base::DEFAULT_HWM * 10, ACE_Message_Queue_Base::DEFAULT_LWM * 10) );
+        }
 
+        time_stamps_.resize(slc_limit_);
 
-    //Let's interpolate the images
-    unsigned inelem = relative_cycle_time.size();
-    unsigned outelem = recon_cycle_time.size();
-    unsigned imageelem = aptrs[0]->get_number_of_elements();
+        return GADGET_OK;
+    }
 
+    int PhysioInterpolationGadget::close(unsigned long flags)
     {
-
-      GadgetronTimer interptime("Interpolation Time");
+        int ret = Gadget::close(flags);
+
+        if ( flags != 0 )
+        {
+            GADGET_DEBUG1("PhysioInterpolationGadget::close...\n");
+
+            size_t slc;
+            for ( slc=0; slc<slc_limit_; slc ++ )
+            {
+                GADGET_DEBUG2("Processing slice: %d ... \n", slc);
+                GADGET_DEBUG2("Number of items on Q: %d\n", buffer_[slc]->message_count());
+                GADGET_DEBUG2("Image with attribute flag : %d\n", image_with_attrib_);
+
+                if (time_stamps_[slc].size() != buffer_[slc]->message_count())
+                {
+                    GADGET_DEBUG1("Inconsistent number of messages and time stamps\n");
+                    buffer_[slc]->flush();
+                    return GADGET_FAIL;
+                }
+
+                float previous = -100.0;
+                float sum_int  = 0.0; 
+                std::vector<float> intervals;
+                float int_count = 0.0;
+                std::vector<size_t> cycle_starts;
+                for (size_t i = 0; i < time_stamps_[slc].size(); i++)
+                {
+                    if ( (time_stamps_[slc][i] < previous) || (first_beat_on_trigger_ && i==0) )
+                    {
+                        cycle_starts.push_back(i);
+                    }
+                    else if (i > 0 )
+                    {
+                        sum_int += time_stamps_[slc][i]-time_stamps_[slc][i-1];
+                        intervals.push_back(time_stamps_[slc][i]-time_stamps_[slc][i-1]);
+                        int_count += 1.0;
+                    }
+                    previous = time_stamps_[slc][i];
+                }
+
+                if ( intervals.empty() ) continue;
+
+                std::sort(intervals.begin(),intervals.end());
+
+                float mean_interval = sum_int/int_count;
+                float median_interval = intervals[(intervals.size()>>1)];
+
+                float average_cycle_length = 0.0f;
+                std::vector<float> cycle_lengths;
+                float count = 0;
+                for (size_t i = 1; i < cycle_starts.size(); i++)
+                {
+                    float clength = time_stamps_[slc][cycle_starts[i]-1] + median_interval - time_stamps_[slc][cycle_starts[i]];
+                    cycle_lengths.push_back(clength);
+                }
+
+                if ( cycle_lengths.empty() )
+                {
+                    size_t phs = time_stamps_[slc].size();
+                    float clength = time_stamps_[slc][phs-1];
+                    cycle_lengths.push_back(clength);
+                }
+
+                std::sort(cycle_lengths.begin(),cycle_lengths.end());
+                float mean_cycle_length = std::accumulate(cycle_lengths.begin(), cycle_lengths.end(), 0.0f)/cycle_lengths.size();
+                float median_cycle_length = cycle_lengths[(cycle_lengths.size()>>1)];
+
+                GADGET_DEBUG2("We have %d full cyles, first one starting at %d\n", cycle_starts.size()-1, cycle_starts[0]);
+                GADGET_DEBUG2("Mean/Median frame width %f/%f\n", mean_interval,median_interval);
+                GADGET_DEBUG2("Mean/Median cycle_length %f/%f\n", mean_cycle_length,median_cycle_length);
+
+                //Correct the first cycle assuming it is of median length:
+                if ( !first_beat_on_trigger_ )
+                {
+                    float first_cycle_offset = (median_cycle_length-median_interval)+time_stamps_[slc][cycle_starts[0]]-time_stamps_[slc][cycle_starts[0]-1];
+                    for (size_t i = 0; i < cycle_starts[0]; i++)
+                    {
+                        time_stamps_[slc][i] += first_cycle_offset;
+                    }
+                }
+
+                //Calculate relative time stamps
+                size_t current_cycle = 0;
+                std::vector<float> relative_cycle_time;
+
+                //Make sure we have cycle lengths for all the cycles we have covered
+                cycle_lengths.insert(cycle_lengths.begin(),median_cycle_length);
+                cycle_lengths.push_back(median_cycle_length);
+
+                for (size_t i = 0; i < time_stamps_[slc].size(); i++)
+                {
+                    if ((current_cycle<cycle_starts.size()) && (i >= cycle_starts[current_cycle]) && (current_cycle < cycle_starts.size()))
+                    {
+                        current_cycle++;
+                    }
+                    relative_cycle_time.push_back(time_stamps_[slc][i]/cycle_lengths[current_cycle-1] + current_cycle);
+                }
+
+                //Make a temporary list of all the data pointers from the Q
+                std::vector< ISMRMRD::ImageHeader* > hptrs;
+                std::vector< hoNDArray< std::complex<float> > * > aptrs;
+                std::vector< ISMRMRD::MetaContainer* > attribptrs;
+
+                ACE_Message_Queue<ACE_MT_SYNCH>::ITERATOR it( *buffer_[slc] );
+                for (ACE_Message_Block* entry = 0;
+                    it.next (entry) != 0;
+                    it.advance ()) 
+                {
+                    GadgetContainerMessage< ISMRMRD::ImageHeader >* tmpm1 =
+                        AsContainerMessage< ISMRMRD::ImageHeader >(entry);
+
+                    GadgetContainerMessage< hoNDArray< std::complex<float> > > * tmpm2 = 
+                        AsContainerMessage< hoNDArray< std::complex<float> >  >(entry->cont());
+
+                    if (!tmpm1 || !tmpm2 )
+                    {
+                        GADGET_DEBUG1("Failed to cast data on Q, bailing out\n");
+                        buffer_[slc]->flush();
+                        return GADGET_FAIL;
+                    }
+
+                    hptrs.push_back(tmpm1->getObjectPtr());
+                    aptrs.push_back(tmpm2->getObjectPtr());
+
+                    if ( image_with_attrib_ )
+                    {
+                        GadgetContainerMessage< ISMRMRD::MetaContainer > * tmpm3 = 
+                            AsContainerMessage< ISMRMRD::MetaContainer >(entry->cont()->cont());
+
+                        if ( !tmpm3 )
+                        {
+                            GADGET_DEBUG1("Failed to cast data on Q, bailing out\n");
+                            buffer_[slc]->flush();
+                            return GADGET_FAIL;
+                        }
+
+                        attribptrs.push_back(tmpm3->getObjectPtr());
+                    }
+                }
+
+                //Let's figure out which time points we would like to interpolate on:
+                ///TODO: Deal with mode 1 and other future modes, we are only implementing mode 0 at the moment
+                float phase_interval = 1.0f/static_cast<float>(phases_to_reconstruct_);
+                float max_time = floor(relative_cycle_time[relative_cycle_time.size()-1]);
+                std::vector<float> recon_cycle_time;
+                for (float t=1.0;t<(max_time-0.001);t+=phase_interval)
+                {
+                    recon_cycle_time.push_back(t);
+                }
+
+                if ( mode_ == 1 )
+                {
+                    std::vector<float> recon_cycle_time_first_beat(phases_to_reconstruct_);
+                    memcpy(&recon_cycle_time_first_beat[0], &recon_cycle_time[0], sizeof(float)*phases_to_reconstruct_);
+                    recon_cycle_time = recon_cycle_time_first_beat;
+                }
+
+                //Now we can loop over each pixel and estimate the new frames, but first we have to have somewhere to put the data
+                std::vector< GadgetContainerMessage< ISMRMRD::ImageHeader >* > out_heads;
+                std::vector< GadgetContainerMessage< hoNDArray< std::complex<float> > > * > out_data;
+                std::vector< GadgetContainerMessage< ISMRMRD::MetaContainer> * > out_attrib;
+
+                for (size_t i = 0; i < recon_cycle_time.size(); i++)
+                {
+                    GadgetContainerMessage<ISMRMRD::ImageHeader>* tmpm1 = new GadgetContainerMessage<ISMRMRD::ImageHeader>;
+                    GadgetContainerMessage< hoNDArray< std::complex<float> > >* tmpm2 = new GadgetContainerMessage< hoNDArray< std::complex<float> > >;
+
+                    tmpm1->cont(tmpm2);
+
+                    (*tmpm1->getObjectPtr()) = (*hptrs[0]);
+                    tmpm2->getObjectPtr()->create(aptrs[0]->get_dimensions());
+
+                    out_heads.push_back(tmpm1);
+                    out_data.push_back(tmpm2);
+
+                    unsigned short current_cycle = static_cast<unsigned short>(floor(recon_cycle_time[i] + 0.0001));
+                    unsigned short current_phase = static_cast<unsigned short>((recon_cycle_time[i]+0.0001-current_cycle)/(1.0/static_cast<float>(phases_to_reconstruct_)) + 0.0001);
+
+                    tmpm1->getObjectPtr()->physiology_time_stamp[phys_time_index_] = static_cast<unsigned>(floor((recon_cycle_time[i]+0.0001-current_cycle)*cycle_lengths[current_cycle])); 
+                    tmpm1->getObjectPtr()->phase = current_phase;
+                    tmpm1->getObjectPtr()->image_index = current_phase+1 + (uint16_t)slc*phases_to_reconstruct_;
+                    tmpm1->getObjectPtr()->image_series_index = current_cycle*10;
+
+                    // make sure the phase is within the acquisition limit
+                    if ( tmpm1->getObjectPtr()->phase+1 >= time_stamps_[slc].size() )
+                    {
+                        tmpm1->getObjectPtr()->phase = (uint16_t)(time_stamps_[slc].size()-1);
+                    }
+
+                    if ( image_with_attrib_ )
+                    {
+                        GadgetContainerMessage< ISMRMRD::MetaContainer >* tmpm3 = new GadgetContainerMessage< ISMRMRD::MetaContainer >;
+
+                        tmpm2->cont(tmpm3);
+                        (*tmpm3->getObjectPtr()) = (*attribptrs[0]);
+                        out_attrib.push_back(tmpm3);
+
+                        tmpm3->getObjectPtr()->set(GTPLUS_PHASE,      (long)tmpm1->getObjectPtr()->phase);
+                        tmpm3->getObjectPtr()->set(GTPLUS_IMAGENUMBER, (long)tmpm1->getObjectPtr()->image_index);
+
+                        tmpm3->getObjectPtr()->append(GTPLUS_DATA_ROLE, "PhysioInterp");
+                        tmpm3->getObjectPtr()->append(GTPLUS_IMAGECOMMENT, "PhysioInterp");
+                        tmpm3->getObjectPtr()->append(GTPLUS_SEQUENCEDESCRIPTION, "_PhysioInterp");
+
+                        tmpm3->getObjectPtr()->append(GTPLUS_IMAGEPROCESSINGHISTORY, "Interp");
+                    }
+                }
+
+                //Let's interpolate the images
+                size_t inelem = relative_cycle_time.size();
+                size_t outelem = recon_cycle_time.size();
+                size_t imageelem = aptrs[0]->get_number_of_elements();
+
+                if ( (interp_method_ == "Spline") || (mode_ != 1) )
+                {
+                    GadgetronTimer interptime("Interpolation Time");
 
 #ifdef USE_OMP
 #pragma omp parallel for
 #endif
-    for (int p = 0; p < (int)imageelem; p++) {
-      std::vector< std::complex<float> > data_in(inelem);
-
-      //Get the input data for this pixel
-      for (size_t i = 0; i < inelem; i++) data_in[i] = aptrs[i]->get_data_ptr()[p];
-      
-      //Interpolate the data
-      Spline<float, std::complex<float> > sp(relative_cycle_time, data_in);
-      std::vector<std::complex<float> > data_out = sp[recon_cycle_time];
-
-      //Copy it to the images
-      for (size_t i = 0; i < outelem; i++) out_data[i]->getObjectPtr()->get_data_ptr()[p] = data_out[i];
-    }
-
+                    for (long long p = 0; p < (long long)imageelem; p++)
+                    {
+                        std::vector< std::complex<float> > data_in(inelem);
+
+                        //Get the input data for this pixel
+                        for (size_t i = 0; i < inelem; i++) data_in[i] = aptrs[i]->get_data_ptr()[p];
+
+                        //Interpolate the data
+                        Spline<float, std::complex<float> > sp(relative_cycle_time, data_in);
+                        std::vector<std::complex<float> > data_out = sp[recon_cycle_time];
+
+                        //Copy it to the images
+                        for (size_t i = 0; i < outelem; i++) out_data[i]->getObjectPtr()->get_data_ptr()[p] = data_out[i];
+                    }
+                }
+                else
+                {
+                    GadgetronTimer interptime("Interpolation Time using BSpline");
+
+                    size_t SplineDegree = 5;
+
+                    long long p;
+#pragma omp parallel default(none) shared(SplineDegree, imageelem, inelem, outelem, aptrs, relative_cycle_time, recon_cycle_time, out_data) private(p)
+                    {
+                        hoNDArray< std::complex<float> > data_in(inelem);
+                        hoNDArray< std::complex<float> > data_out(outelem);
+
+                        hoNDArray< std::complex<float> > coeff(inelem);
+
+                        hoNDBSpline< std::complex<float>, 1 > interp;
+
+                        size_t i;
+
+                        size_t num = relative_cycle_time.size();
+
+#pragma omp for
+                        for (p = 0; p < (long long)imageelem; p++)
+                        {
+                            //Get the input data for this pixel
+                            for (i = 0; i < inelem; i++) data_in(i) = aptrs[i]->get_data_ptr()[p];
+
+                            // compute the coefficient
+                            interp.computeBSplineCoefficients(data_in, SplineDegree, coeff);
+
+                            //Interpolate the data
+                            for (i = 0; i < outelem; i++)
+                            {
+                                float x = (num-1)*(recon_cycle_time[i]-relative_cycle_time[0])/(relative_cycle_time[num-1] - relative_cycle_time[0]);
+                                data_out(i) = interp.evaluateBSpline(coeff.begin(), inelem, SplineDegree, 0, x);
+                            }
+
+                            //Copy it to the images
+                            for (i = 0; i < outelem; i++) out_data[i]->getObjectPtr()->get_data_ptr()[p] = data_out[i];
+                        }
+                    }
+                }
+
+                //Send out the images
+                for (size_t i = 0; i < out_heads.size(); i++)
+                {
+                    if (this->next()->putq(out_heads[i]) < 0)
+                    {
+                        GADGET_DEBUG1("Unable to put data on next Gadgets Q\n");
+                        return GADGET_FAIL;
+                    }
+                }
+
+                //We can get rid of the buffered data now
+                buffer_[slc]->flush();
+            }
+        }
+
+        return ret;
     }
 
-    //Send out the images
-    for (size_t i = 0; i < out_heads.size(); i++) {
-      if (this->next()->putq(out_heads[i]) < 0) {
-	GADGET_DEBUG1("Unable to put data on next Gadgets Q\n");
-	return GADGET_FAIL;
-      }
-    }
-
-
-    //We can get rid of the buffered data now
-    buffer_.flush();
-
-    return ret;
-  }
-
-  int PhysioInterpolationGadget::
-  process(GadgetContainerMessage<ISMRMRD::ImageHeader>* m1,
-	  GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2)
-  {
-        
-    GadgetContainerMessage<ISMRMRD::ImageHeader>* m3 = new GadgetContainerMessage<ISMRMRD::ImageHeader>;
-    GadgetContainerMessage< hoNDArray< std::complex<float> > >* m4 = new GadgetContainerMessage< hoNDArray< std::complex<float> > >;
-
-    
-    (*m3->getObjectPtr()) = (*m1->getObjectPtr());
-    (*m4->getObjectPtr()) = (*m2->getObjectPtr());
-    m3->cont(m4);
-
-    if (buffer_.enqueue_tail(m3) < 0) {
-      GADGET_DEBUG1("Failed to add image to buffer\n");
-      m3->release();
-      return GADGET_FAIL;
-    }
-
-    time_stamps_.push_back(m1->getObjectPtr()->physiology_time_stamp[phys_time_index_]);
-
-    if (this->next()->putq(m1) < 0) {
-      GADGET_DEBUG1("Unable to put data on next Gadgets Q\n");
-      return GADGET_FAIL;
+    int PhysioInterpolationGadget::
+        process(GadgetContainerMessage< ISMRMRD::ImageHeader >* m1, GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2)
+    {
+        GadgetContainerMessage<ISMRMRD::ImageHeader>* header = new GadgetContainerMessage<ISMRMRD::ImageHeader>;
+        GadgetContainerMessage< hoNDArray< std::complex<float> > >* img = new GadgetContainerMessage< hoNDArray< std::complex<float> > >;
+
+        (*header->getObjectPtr()) = (*m1->getObjectPtr());
+        (*img->getObjectPtr()) = (*m2->getObjectPtr());
+        header->cont(img);
+
+        GadgetContainerMessage<ISMRMRD::MetaContainer>* m3 = 0;
+        if (m2)
+        {
+            m3 = AsContainerMessage<ISMRMRD::MetaContainer>(m2->cont());
+        }
+
+        if ( m3 )
+        {
+            image_with_attrib_ = true;
+        }
+        else
+        {
+            image_with_attrib_ = false;
+        }
+
+        if ( image_with_attrib_ )
+        {
+            GadgetContainerMessage< ISMRMRD::MetaContainer >* attrib = new GadgetContainerMessage< ISMRMRD::MetaContainer >;
+            (*attrib->getObjectPtr()) = *m3->getObjectPtr();
+            img->cont(attrib);
+        }
+
+        uint16_t slc = header->getObjectPtr()->slice;
+
+        if (buffer_[slc]->enqueue_tail(header) < 0)
+        {
+            GADGET_DEBUG1("Failed to add image to buffer\n");
+            header->release();
+            return GADGET_FAIL;
+        }
+
+        time_stamps_[slc].push_back( (float)(m1->getObjectPtr()->physiology_time_stamp[phys_time_index_]) );
+
+        if (this->next()->putq(m1) < 0)
+        {
+            GADGET_DEBUG1("Unable to put data on next Gadgets Q\n");
+            return GADGET_FAIL;
+        }
+
+        return GADGET_OK;
     }
 
-    return GADGET_OK;
-  }
-
-  GADGET_FACTORY_DECLARE(PhysioInterpolationGadget)
+    GADGET_FACTORY_DECLARE(PhysioInterpolationGadget)
 }
diff --git a/gadgets/mri_core/PhysioInterpolationGadget.h b/gadgets/mri_core/PhysioInterpolationGadget.h
index 38c5972..c7128bb 100644
--- a/gadgets/mri_core/PhysioInterpolationGadget.h
+++ b/gadgets/mri_core/PhysioInterpolationGadget.h
@@ -6,38 +6,48 @@
 #include "hoNDArray.h"
 #include "gadgetron_mricore_export.h"
 
-#include <ismrmrd.h>
+#include <ismrmrd/ismrmrd.h>
 #include <complex>
 
 namespace Gadgetron{  
 
-    class EXPORTGADGETSMRICORE PhysioInterpolationGadget :
-        public Gadget2< ISMRMRD::ImageHeader, hoNDArray< std::complex<float> > >
+    class EXPORTGADGETSMRICORE PhysioInterpolationGadget : public Gadget2<ISMRMRD::ImageHeader, hoNDArray< std::complex<float> > >
     {
-
     public:
         GADGET_DECLARE(PhysioInterpolationGadget);
 
         PhysioInterpolationGadget();
         virtual ~PhysioInterpolationGadget();
 
-	inline unsigned short get_number_of_phases() { return phases_to_reconstruct_; }
+        inline unsigned short get_number_of_phases() { return phases_to_reconstruct_; }
 
     protected:
         virtual int process_config(ACE_Message_Block* mb);
 
-        virtual int process(GadgetContainerMessage< ISMRMRD::ImageHeader >* m1,
-            GadgetContainerMessage< hoNDArray< std::complex<float> > > * m2);
-	
-	virtual int close(unsigned long flags); //All the work is done here in this Gadget
+        virtual int process(GadgetContainerMessage< ISMRMRD::ImageHeader >* m1, GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2);
+
+        virtual int close(unsigned long flags); //All the work is done here in this Gadget
+
+        unsigned short phys_time_index_;
+        unsigned short phases_to_reconstruct_;
+        unsigned short mode_; //0=seperate series for each complete RR,
+                              //1=First complete RR interval only
+
+        // true, if the first beat is on trigger
+        /// false, the first beat will be ignored
+        bool first_beat_on_trigger_;
+
+        // interpolation method, "Spline" or "BSpline"
+        std::string interp_method_;
 
-	unsigned short phys_time_index_;
-	unsigned short phases_to_reconstruct_;
-	unsigned short mode_; //0=seperate series for each complete RR,
-	                      //1=First complete RR interval only	
     private:
-	ACE_Message_Queue<ACE_MT_SYNCH> buffer_;
-	std::vector<float> time_stamps_;
+
+        std::vector< boost::shared_ptr< ACE_Message_Queue<ACE_MT_SYNCH> > > buffer_;
+        std::vector< std::vector<float> > time_stamps_;
+
+        size_t slc_limit_;
+
+        bool image_with_attrib_;
     };
 }
 
diff --git a/gadgets/mri_core/RemoveROOversamplingGadget.cpp b/gadgets/mri_core/RemoveROOversamplingGadget.cpp
index 0f860ad..e295be9 100644
--- a/gadgets/mri_core/RemoveROOversamplingGadget.cpp
+++ b/gadgets/mri_core/RemoveROOversamplingGadget.cpp
@@ -1,59 +1,164 @@
-#include "GadgetIsmrmrdReadWrite.h"
 #include "RemoveROOversamplingGadget.h"
 #include "Gadgetron.h"
 #include "hoNDFFT.h"
+#include "ismrmrd/xml.h"
+
+#ifdef USE_OMP
+    #include "omp.h"
+#endif // USE_OMP
 
 namespace Gadgetron{
 
-int RemoveROOversamplingGadget
-::process(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1,
-	  GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2)
-{
-  GadgetContainerMessage< hoNDArray< std::complex<float> > >* m3 
-    = new GadgetContainerMessage< hoNDArray< std::complex<float> > >();
-
-  if (!m3) {
-    return GADGET_FAIL;
-  }
-
-  std::vector<size_t> data_out_dims = *m2->getObjectPtr()->get_dimensions();
-  data_out_dims[0] = data_out_dims[0]/2;
-
-  try{ m3->getObjectPtr()->create(&data_out_dims);}
-  catch (std::runtime_error &err){
-    GADGET_DEBUG_EXCEPTION(err,"Unable to create new data array for downsampled data\n");
-    return GADGET_FAIL;
-  }
-
-  hoNDFFT<float>::instance()->ifft(m2->getObjectPtr(),0);
-  
-  std::complex<float>* data_in  = m2->getObjectPtr()->get_data_ptr();
-  std::complex<float>* data_out = m3->getObjectPtr()->get_data_ptr();
-
-  for (unsigned int c = 0; c < data_out_dims[1]; c++) {
-    size_t offset_in = c*m2->getObjectPtr()->get_size(0) +  (m2->getObjectPtr()->get_size(0)-data_out_dims[0])/2;
-    size_t offset_out = c*m3->getObjectPtr()->get_size(0);
-    memcpy(data_out+offset_out,data_in+offset_in,data_out_dims[0]*sizeof(std::complex<float>));
-  }
-
-  hoNDFFT<float>::instance()->fft(m3->getObjectPtr(),0);
-  
-  m2->release(); //We are done with this data
-
-  m1->cont(m3);
-  m1->getObjectPtr()->number_of_samples = data_out_dims[0];
-  m1->getObjectPtr()->center_sample /= 2;
-
-  if (this->next()->putq(m1) == -1) {
+    RemoveROOversamplingGadget::RemoveROOversamplingGadget() : constant_noise_variance_(false)
+    {
+    }
+
+    RemoveROOversamplingGadget::~RemoveROOversamplingGadget()
+    {
+    }
+
+    int RemoveROOversamplingGadget::process_config(ACE_Message_Block* mb)
+    {
+        constant_noise_variance_ = this->get_bool_value("constant_noise_variance");
+
+	ISMRMRD::IsmrmrdHeader h;
+	ISMRMRD::deserialize(mb->rd_ptr(),h);
+
+	if (h.encoding.size() == 0) {
+	  GADGET_DEBUG2("Number of encoding spaces: %d\n", h.encoding.size());
+	  GADGET_DEBUG1("This Gadget needs an encoding description\n");
+	  return GADGET_FAIL;
+	}
+
+	
+	ISMRMRD::EncodingSpace e_space = h.encoding[0].encodedSpace;
+	ISMRMRD::EncodingSpace r_space = h.encoding[0].reconSpace;
+
+        encodeNx_  = e_space.matrixSize.x;
+        encodeFOV_ = e_space.fieldOfView_mm.x;
+        reconNx_   = r_space.matrixSize.x;
+        reconFOV_  = r_space.fieldOfView_mm.x;
+
+        // limit the number of threads used to be 1
+#ifdef USE_OMP
+        omp_set_num_threads(1);
+        GADGET_MSG("RemoveROOversamplingGadget:omp_set_num_threads(1) ... ");
+#endif // USE_OMP
+
+    // If the encoding and recon matrix size and FOV are the same
+    // then the data is not oversampled and we can safely pass
+    // the data onto the next gadget
+    if ( (encodeNx_ == reconNx_) && (encodeFOV_ == reconFOV_) )
+    {
+      dowork_ = false;
+    }
+    else {
+      dowork_ = true;
+    }
+
+        return GADGET_OK;
+    }
+
+    int RemoveROOversamplingGadget
+        ::process(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1,
+        GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2)
+    {
+
+      // If we have work to do, do it, otherwise do nothing
+      if (dowork_) {
+
+        GadgetContainerMessage< hoNDArray< std::complex<float> > >* m3 
+            = new GadgetContainerMessage< hoNDArray< std::complex<float> > >();
+
+        if (!m3)
+        {
+            return GADGET_FAIL;
+        }
+
+        std::vector<size_t> data_out_dims = *m2->getObjectPtr()->get_dimensions();
+        if ( !ifft_buf_.dimensions_equal(&data_out_dims) )
+        {
+            ifft_buf_.create(data_out_dims);
+            ifft_res_.create(data_out_dims);
+        }
+
+        float ratioFOV = encodeFOV_/reconFOV_;
+
+        data_out_dims[0] = (size_t)(data_out_dims[0]/ratioFOV);
+        if ( !fft_buf_.dimensions_equal(&data_out_dims) )
+        {
+            fft_buf_.create(data_out_dims);
+            fft_res_.create(data_out_dims);
+        }
+
+        try{ m3->getObjectPtr()->create(&data_out_dims);}
+        catch (std::runtime_error &err)
+        {
+            GADGET_DEBUG_EXCEPTION(err,"Unable to create new data array for downsampled data\n");
+            return GADGET_FAIL;
+        }
+
+        size_t sRO = m2->getObjectPtr()->get_size(0);
+        size_t start = (size_t)( (m2->getObjectPtr()->get_size(0)-data_out_dims[0])/ratioFOV );
+
+        size_t dRO = m3->getObjectPtr()->get_size(0);
+        size_t numOfBytes = data_out_dims[0]*sizeof(std::complex<float>);
+
+        int c;
+
+        int CHA = (int)(data_out_dims[1]);
+
+        std::complex<float>* data_in, *data_out;
+
+        if ( constant_noise_variance_ )
+        {
+            hoNDFFT<float>::instance()->ifft1c(*m2->getObjectPtr(), ifft_res_, ifft_buf_);
+
+            data_in  = ifft_res_.get_data_ptr();
+            data_out = fft_res_.get_data_ptr();
+
+            // #pragma omp parallel for default(none) private(c) shared(CHA, sRO, start, dRO, data_in, data_out, numOfBytes)
+            for ( c=0; c<CHA; c++)
+            {
+                memcpy( data_out+c*dRO, data_in+c*sRO+start, numOfBytes );
+            }
+
+            hoNDFFT<float>::instance()->fft1c(fft_res_, *m3->getObjectPtr(), fft_buf_);
+        }
+        else
+        {
+            hoNDFFT<float>::instance()->ifft(m2->getObjectPtr(), 0);
+            data_in  = m2->getObjectPtr()->get_data_ptr();
+            data_out = m3->getObjectPtr()->get_data_ptr();
+
+            // #pragma omp parallel for default(none) private(c) shared(CHA, sRO, start, dRO, data_in, data_out, numOfBytes)
+            for ( c=0; c<CHA; c++)
+            {
+                memcpy( data_out+c*dRO, data_in+c*sRO+start, numOfBytes );
+            }
+
+            hoNDFFT<float>::instance()->fft(m3->getObjectPtr(), 0);
+        }
+
+        m2->release(); //We are done with this data
+
+        m1->cont(m3);
+        m1->getObjectPtr()->number_of_samples = data_out_dims[0];
+        m1->getObjectPtr()->center_sample = (uint16_t)(m1->getObjectPtr()->center_sample/ratioFOV);
+
+      } // end if (dowork_)
+
+      if (this->next()->putq(m1) == -1)
+      {
     ACE_ERROR_RETURN( (LM_ERROR,
-		       ACE_TEXT("%p\n"),
-		       ACE_TEXT("RemoveROOversamplingGadget::process, passing data on to next gadget")),
-		      GADGET_FAIL);
-  }
+                ACE_TEXT("%p\n"),
+                ACE_TEXT("RemoveROOversamplingGadget::process, passing data on to next gadget")),
+                GADGET_FAIL);
+      }
 
-  return GADGET_OK;
-}
+      return GADGET_OK;
+    }
 
 
-GADGET_FACTORY_DECLARE(RemoveROOversamplingGadget)
+    GADGET_FACTORY_DECLARE(RemoveROOversamplingGadget)
 }
diff --git a/gadgets/mri_core/RemoveROOversamplingGadget.h b/gadgets/mri_core/RemoveROOversamplingGadget.h
index 103422c..83e6c39 100644
--- a/gadgets/mri_core/RemoveROOversamplingGadget.h
+++ b/gadgets/mri_core/RemoveROOversamplingGadget.h
@@ -4,19 +4,43 @@
 #include "hoNDArray.h"
 #include "gadgetron_mricore_export.h"
 
-#include <ismrmrd.h>
+#include <ismrmrd/ismrmrd.h>
 #include <complex>
 
 namespace Gadgetron{
 
-  class EXPORTGADGETSMRICORE RemoveROOversamplingGadget :
-  public Gadget2<ISMRMRD::AcquisitionHeader,hoNDArray< std::complex<float> > >
+    class EXPORTGADGETSMRICORE RemoveROOversamplingGadget :
+        public Gadget2<ISMRMRD::AcquisitionHeader,hoNDArray< std::complex<float> > >
     {
     public:
-      GADGET_DECLARE(RemoveROOversamplingGadget);
-      
+        GADGET_DECLARE(RemoveROOversamplingGadget);
+
+        RemoveROOversamplingGadget();
+        virtual ~RemoveROOversamplingGadget();
+
     protected:
-      virtual int process(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1,
-			  GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2);
-    };  
+
+        virtual int process_config(ACE_Message_Block* mb);
+
+        virtual int process(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1,
+            GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2);
+
+        // if true, the noise variance is kept unchanged in this process
+        bool constant_noise_variance_;
+
+        hoNDArray< std::complex<float> > fft_res_;
+        hoNDArray< std::complex<float> > ifft_res_;
+
+        hoNDArray< std::complex<float> > fft_buf_;
+        hoNDArray< std::complex<float> > ifft_buf_;
+
+        int   encodeNx_;
+        float encodeFOV_;
+        int   reconNx_;
+        float reconFOV_;
+
+	// if true the gadget performs the operation
+	// otherwise, it just passes the data on
+	bool dowork_;
+    };
 }
diff --git a/gadgets/mri_core/WhiteNoiseInjectorGadget.cpp b/gadgets/mri_core/WhiteNoiseInjectorGadget.cpp
new file mode 100644
index 0000000..934074c
--- /dev/null
+++ b/gadgets/mri_core/WhiteNoiseInjectorGadget.cpp
@@ -0,0 +1,197 @@
+#include "WhiteNoiseInjectorGadget.h"
+#include "gtPlusUtil.h"
+#include <array>
+#include "ismrmrd/xml.h"
+
+namespace Gadgetron
+{
+
+WhiteNoiseInjectorGadget::WhiteNoiseInjectorGadget() : noise_mean_(0), noise_std_(1.0f)
+{
+    add_noise_ref_ = true;
+    randn_ = new RandGenType();
+
+    acceFactorE1_ = 1;
+    acceFactorE2_ = 1;
+
+    is_interleaved_ = false;
+    is_embeded_ = false;
+    is_seperate_ = false;
+    is_external_ = false;
+    is_other_ = false;
+    is_no_acceleration_ = false;
+}
+
+WhiteNoiseInjectorGadget::~WhiteNoiseInjectorGadget()
+{
+    delete randn_;
+}
+
+int WhiteNoiseInjectorGadget::process_config(ACE_Message_Block* mb)
+{
+    noise_mean_ = (float)this->get_double_value("noise_mean");
+    noise_std_ = (float)this->get_double_value("noise_std");
+    add_noise_ref_ = this->get_bool_value("add_noise_ref");
+
+    GADGET_MSG("noise mean is " << noise_mean_);
+    GADGET_MSG("noise std is " << noise_std_);
+    GADGET_MSG("add_noise_ref is " << add_noise_ref_);
+
+    randn_->setPara(noise_mean_, noise_std_);
+
+    // get the current time and generate a seed
+    time_t rawtime;
+    struct tm * timeinfo;
+    time ( &rawtime );
+    timeinfo = localtime ( &rawtime );
+
+    long long seed = (long long)(1e10*(timeinfo->tm_year+1900) + 1e8*(timeinfo->tm_mon+1) + 1e6*timeinfo->tm_mday + 1e4*timeinfo->tm_hour + 1e2*timeinfo->tm_min + timeinfo->tm_sec + std::rand());
+
+    std::array<unsigned int, 10> sequence;
+    sequence[0] = (unsigned int)(1e10*(timeinfo->tm_year+1900));
+    sequence[1] = (unsigned int)(1e8*(timeinfo->tm_mon+1));
+    sequence[2] = (unsigned int)(1e6*timeinfo->tm_mday);
+    sequence[3] = (unsigned int)(1e4*timeinfo->tm_hour);
+    sequence[4] = (unsigned int)(1e2*timeinfo->tm_min);
+    sequence[5] = (unsigned int)(timeinfo->tm_sec);
+
+    std::srand( (unsigned int)seed );
+    sequence[6] = (unsigned int)(std::rand());
+    sequence[7] = (unsigned int)(std::rand());
+    sequence[8] = (unsigned int)(std::rand());
+    sequence[9] = (unsigned int)(std::rand());
+
+    std::seed_seq seedSeq(sequence.begin(), sequence.end());
+    randn_->getRandomer().seed(seedSeq);
+
+    randn_->seed( (unsigned long)seed );
+
+// ---------------------------------------------------------------------------------------------------------
+    ISMRMRD::IsmrmrdHeader h;
+    try {
+      deserialize(mb->rd_ptr(),h);
+    } catch (...) {
+      GADGET_DEBUG1("Error parsing ISMRMRD Header");
+      throw;
+      return GADGET_FAIL;
+    }
+
+    if( h.encoding.size() != 1)
+    {
+      GADGET_DEBUG2("Number of encoding spaces: %d\n", h.encoding.size());
+      GADGET_DEBUG1("This simple WhiteNoiseInjectorGadget only supports one encoding space\n");
+      return GADGET_FAIL;
+    }
+    if (!h.encoding[0].parallelImaging) {
+      GADGET_DEBUG1("Parallel Imaging section not found in header");
+      return GADGET_FAIL;
+    }
+
+    ISMRMRD::ParallelImaging p_imaging = *h.encoding[0].parallelImaging;
+
+    acceFactorE1_ = (double)(p_imaging.accelerationFactor.kspace_encoding_step_1);
+    acceFactorE2_ = (double)(p_imaging.accelerationFactor.kspace_encoding_step_2);
+
+    GADGET_MSG("acceFactorE1_ is " << acceFactorE1_);
+    GADGET_MSG("acceFactorE2_ is " << acceFactorE2_);
+
+    if ( !p_imaging.calibrationMode.is_present() )
+    {
+        GADGET_DEBUG1("Parallel Imaging calibrationMode not found in header");
+        return GADGET_FAIL;
+    }
+
+    std::string calib = *p_imaging.calibrationMode;
+    if ( calib.compare("interleaved") == 0 )
+    {
+      is_interleaved_ = true;
+      GADGET_MSG("Calibration mode is interleaved");
+    } else if ( calib.compare("embedded") == 0 ) {
+      is_embeded_ = true;
+      GADGET_MSG("Calibration mode is embedded");
+    } else if ( calib.compare("separate") == 0 ) {
+      is_seperate_ = true;
+      GADGET_MSG("Calibration mode is separate");
+    } else if ( calib.compare("external") == 0 ) {
+      is_external_ = true;
+      GADGET_MSG("Calibration mode is external");
+    } else if ( (calib.compare("other") == 0)) {
+      is_other_ = true;
+      GADGET_MSG("Calibration mode is other");
+    } else {
+      GADGET_DEBUG1("Failed to process parallel imaging calibration mode");
+      return GADGET_FAIL;
+    }
+    
+    return GADGET_OK;
+}
+
+int WhiteNoiseInjectorGadget::process(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1, GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2)
+{
+    bool is_noise = ISMRMRD::FlagBit(ISMRMRD::ISMRMRD_ACQ_IS_NOISE_MEASUREMENT).isSet(m1->getObjectPtr()->flags);
+    bool is_scc_correction = ISMRMRD::FlagBit(ISMRMRD::ISMRMRD_ACQ_IS_SURFACECOILCORRECTIONSCAN_DATA).isSet(m1->getObjectPtr()->flags);
+
+    bool is_ref = ISMRMRD::FlagBit(ISMRMRD::ISMRMRD_ACQ_IS_PARALLEL_CALIBRATION).isSet(m1->getObjectPtr()->flags);
+    bool is_ref_kspace = ISMRMRD::FlagBit(ISMRMRD::ISMRMRD_ACQ_IS_PARALLEL_CALIBRATION_AND_IMAGING).isSet(m1->getObjectPtr()->flags);
+
+    size_t channels = m1->getObjectPtr()->active_channels;
+    size_t samples = m1->getObjectPtr()->number_of_samples;
+
+    if (!is_noise && !is_scc_correction )
+    {
+        bool add_noise = true;
+        if ( is_ref && !is_ref_kspace && (is_seperate_||is_external_) )
+        {
+            add_noise = add_noise_ref_;
+
+            if ( !add_noise )
+            {
+                GADGET_MSG("WhiteNoiseInjectorGadget, noise is not added to the ref acquisitions ... ");
+            }
+        }
+
+        if ( add_noise )
+        {
+            if ( !noise_.dimensions_equal(m2->getObjectPtr()) )
+            {
+                noise_.create(m2->getObjectPtr()->get_dimensions());
+                noise_fl_.create(m2->getObjectPtr()->get_dimensions());
+            }
+
+            if ( !randn_->gen(noise_) )
+            {
+                GADGET_ERROR_MSG("WhiteNoiseInjectorGadget, randn_->gen(noise_) failed ... ");
+                return GADGET_FAIL;
+            }
+
+            if ( !noise_fl_.copyFrom(noise_) )
+            {
+                GADGET_ERROR_MSG("WhiteNoiseInjectorGadget, noise_fl_.copyFrom(noise_) failed ... ");
+                return GADGET_FAIL;
+            }
+
+            try
+            {
+                Gadgetron::add(*m2->getObjectPtr(), noise_fl_, *m2->getObjectPtr());
+            }
+            catch(...)
+            {
+                GADGET_ERROR_MSG("WhiteNoiseInjectorGadget, Gadgetron::add(*m2->getObjectPtr(), noise_, *m2->getObjectPtr()) failed ... ");
+                return GADGET_FAIL;
+            }
+        }
+    }
+
+    if (this->next()->putq(m1) == -1) 
+    {
+        ACE_ERROR_RETURN( (LM_ERROR,
+                ACE_TEXT("%p\n"),
+                ACE_TEXT("WhiteNoiseInjectorGadget::process, passing data on to next gadget")),
+                -1);
+    }
+
+    return GADGET_OK;
+}
+
+GADGET_FACTORY_DECLARE(WhiteNoiseInjectorGadget)
+}
diff --git a/gadgets/mri_core/WhiteNoiseInjectorGadget.h b/gadgets/mri_core/WhiteNoiseInjectorGadget.h
new file mode 100644
index 0000000..dc15147
--- /dev/null
+++ b/gadgets/mri_core/WhiteNoiseInjectorGadget.h
@@ -0,0 +1,62 @@
+
+#pragma once
+
+#include "Gadget.h"
+#include "hoNDArray.h"
+#include "ismrmrd/ismrmrd.h"
+#include "GadgetIsmrmrdReadWrite.h"
+#include "gadgetron_mricore_export.h"
+
+namespace Gadgetron { namespace gtPlus {
+    template <typename T> class gtPlusRandNorm;
+}}
+
+namespace Gadgetron
+{
+
+/// add white noise to the kspace data
+class EXPORTGADGETSMRICORE WhiteNoiseInjectorGadget : public Gadgetron::Gadget2<ISMRMRD::AcquisitionHeader, hoNDArray< std::complex<float> > >
+{
+public:
+
+    GADGET_DECLARE(WhiteNoiseInjectorGadget);
+
+    typedef Gadgetron::gtPlus::gtPlusRandNorm<double> RandGenType;
+
+    WhiteNoiseInjectorGadget();
+    virtual ~WhiteNoiseInjectorGadget();
+
+protected:
+
+    virtual int process_config(ACE_Message_Block* mb);
+
+    virtual int process(Gadgetron::GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1,
+        Gadgetron::GadgetContainerMessage< Gadgetron::hoNDArray< std::complex<float> > >* m2);
+
+    /// whether to add noise to ref acquisition
+    bool add_noise_ref_;
+
+    /// noise mean and standard deviation
+    float noise_mean_;
+    float noise_std_;
+
+    /// random noise generator
+    RandGenType* randn_;
+
+    /// helper memory to store noise
+    hoNDArray< std::complex<double> > noise_;
+    hoNDArray< std::complex<float> > noise_fl_;
+
+    /// calibration mode and rate
+    size_t acceFactorE1_;
+    size_t acceFactorE2_;
+
+    bool is_interleaved_;
+    bool is_embeded_;
+    bool is_seperate_;
+    bool is_external_;
+    bool is_other_;
+    bool is_no_acceleration_;
+};
+
+}
diff --git a/gadgets/mri_core/default.xml b/gadgets/mri_core/default.xml
index b0f32a4..1f5c4f9 100644
--- a/gadgets/mri_core/default.xml
+++ b/gadgets/mri_core/default.xml
@@ -26,25 +26,65 @@
     </writer>
   
     <gadget>
-      <name>Acc</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>AccumulatorGadget</classname>
+        <name>RemoveROOversampling</name>
+        <dll>gadgetron_mricore</dll>
+	<classname>RemoveROOversamplingGadget</classname>
+        <property>
+            <name>constant_noise_variance</name>
+            <value>false</value>
+        </property>
+    </gadget>
+
+    <gadget>
+        <name>AccTrig</name>
+        <dll>gadgetron_mricore</dll>
+	<classname>AcquisitionAccumulateTriggerGadget</classname>
+        <property>
+            <name>trigger_dimension</name>
+            <value>repetition</value>
+        </property>
+        <property>
+          <name>sorting_dimension</name>
+          <value>slice</value>
+        </property>
     </gadget>
+
     <gadget>
+        <name>Buff</name>
+        <dll>gadgetron_mricore</dll>
+	<classname>BucketToBufferGadget</classname>
+        <property>
+            <name>N_dimension</name>
+            <value></value>
+        </property>
+        <property>
+          <name>S_dimension</name>
+          <value></value>
+        </property>
+        <property>
+          <name>split_slices</name>
+          <value>true</value>
+        </property>
+    </gadget>
+
+     <gadget>
       <name>FFT</name>
       <dll>gadgetron_mricore</dll>
       <classname>FFTGadget</classname>
-    </gadget>
+     </gadget>
+
     <gadget>
-      <name>CropCombine</name>
+      <name>Combine</name>
       <dll>gadgetron_mricore</dll>
-      <classname>CropAndCombineGadget</classname>
+      <classname>CombineGadget</classname>
     </gadget>
+
     <gadget>
       <name>Extract</name>
       <dll>gadgetron_mricore</dll>
       <classname>ExtractGadget</classname>
     </gadget>  
+
     <gadget>
       <name>ImageFinishFLOAT</name>
       <dll>gadgetron_mricore</dll>
diff --git a/gadgets/mri_core/default_measurement_dependencies.xml b/gadgets/mri_core/default_measurement_dependencies.xml
new file mode 100644
index 0000000..44a0919
--- /dev/null
+++ b/gadgets/mri_core/default_measurement_dependencies.xml
@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+                  xmlns="http://gadgetron.sf.net/gadgetron"
+                  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+    <reader>
+        <slot>1008</slot>
+        <dll>gadgetron_mricore</dll>
+        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+    </reader>
+
+    <!-- SNR unit noise adjust gadget 
+         If the scan has asymmetric readout, the center of echo will shifted to the index 0
+         Zeros will be filled into the readout data
+    -->
+    <gadget>
+        <name>NoiseAdjust</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>NoiseAdjustGadget</classname>
+
+        <!-- File prefix for stored noise prewhitener matrix -->
+        <property>
+            <name>noise_dependency_prefix</name>
+            <value>GadgetronNoisePreWhitener</value>
+        </property>
+
+        <!-- Preset noise dwell time; for noise dependency measurements -->
+        <property>
+            <name>noise_dwell_time_us_preset</name>
+            <value>5.0</value>
+        </property>
+
+        <!-- Whether to perform timing -->
+        <property>
+            <name>performTiming</name>
+            <value>true</value>
+        </property>
+    </gadget>
+
+</gadgetronStreamConfiguration>
diff --git a/gadgets/mri_core/default_optimized.xml b/gadgets/mri_core/default_optimized.xml
index 4bf7adc..0c62a8b 100644
--- a/gadgets/mri_core/default_optimized.xml
+++ b/gadgets/mri_core/default_optimized.xml
@@ -45,17 +45,53 @@
   </gadget>
   
   <gadget>
-    <name>Acc</name>
+    <name>RemoveROOversampling</name>
     <dll>gadgetron_mricore</dll>
-    <classname>AccumulatorGadget</classname>
-  </gadget>
-  
-  <gadget>
-    <name>FFT</name>
-    <dll>gadgetron_mricore</dll>
-    <classname>FFTGadget</classname>
+    <classname>RemoveROOversamplingGadget</classname>
+    <property>
+      <name>constant_noise_variance</name>
+      <value>false</value>
+    </property>
   </gadget>
 
+    <gadget>
+        <name>AccTrig</name>
+        <dll>gadgetron_mricore</dll>
+	<classname>AcquisitionAccumulateTriggerGadget</classname>
+        <property>
+            <name>trigger_dimension</name>
+            <value>repetition</value>
+        </property>
+        <property>
+          <name>sorting_dimension</name>
+          <value>slice</value>
+        </property>
+    </gadget>
+
+    <gadget>
+        <name>Buff</name>
+        <dll>gadgetron_mricore</dll>
+	<classname>BucketToBufferGadget</classname>
+        <property>
+            <name>N_dimension</name>
+            <value></value>
+        </property>
+        <property>
+          <name>S_dimension</name>
+          <value></value>
+        </property>
+        <property>
+          <name>split_slices</name>
+          <value>true</value>
+        </property>
+    </gadget>
+
+     <gadget>
+      <name>FFT</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>FFTGadget</classname>
+     </gadget>
+
   <!--
       <gadget>
       <name>ImageWrite</name>
@@ -65,9 +101,9 @@
   -->
   
   <gadget>
-    <name>CropCombine</name>
+    <name>Combine</name>
     <dll>gadgetron_mricore</dll>
-    <classname>CropAndCombineGadget</classname>
+    <classname>CombineGadget</classname>
   </gadget>
 
   <gadget>
diff --git a/gadgets/mri_core/default_short.xml b/gadgets/mri_core/default_short.xml
index 098c6d3..9e42a16 100644
--- a/gadgets/mri_core/default_short.xml
+++ b/gadgets/mri_core/default_short.xml
@@ -26,21 +26,57 @@
     </writer>
 
     <gadget>
-      <name>Acc</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>AccumulatorGadget</classname>
+        <name>RemoveROOversampling</name>
+        <dll>gadgetron_mricore</dll>
+        <classname>RemoveROOversamplingGadget</classname>
+        <property>
+            <name>constant_noise_variance</name>
+            <value>false</value>
+        </property>
+    </gadget>
+
+    <gadget>
+        <name>AccTrig</name>
+        <dll>gadgetron_mricore</dll>
+	<classname>AcquisitionAccumulateTriggerGadget</classname>
+        <property>
+            <name>trigger_dimension</name>
+            <value>repetition</value>
+        </property>
+        <property>
+          <name>sorting_dimension</name>
+          <value>slice</value>
+        </property>
     </gadget>
 
     <gadget>
+        <name>Buff</name>
+        <dll>gadgetron_mricore</dll>
+	<classname>BucketToBufferGadget</classname>
+        <property>
+            <name>N_dimension</name>
+            <value></value>
+        </property>
+        <property>
+          <name>S_dimension</name>
+          <value></value>
+        </property>
+        <property>
+          <name>split_slices</name>
+          <value>true</value>
+        </property>
+    </gadget>
+
+     <gadget>
       <name>FFT</name>
       <dll>gadgetron_mricore</dll>
       <classname>FFTGadget</classname>
-    </gadget>
+     </gadget>
 
     <gadget>
-      <name>CropCombine</name>
+      <name>Combine</name>
       <dll>gadgetron_mricore</dll>
-      <classname>CropAndCombineGadget</classname>
+      <classname>CombineGadget</classname>
     </gadget>
 
     <gadget>
diff --git a/gadgets/octave/CMakeLists.txt b/gadgets/octave/CMakeLists.txt
deleted file mode 100644
index 64725e4..0000000
--- a/gadgets/octave/CMakeLists.txt
+++ /dev/null
@@ -1,43 +0,0 @@
-find_package(Ismrmrd REQUIRED)
-find_package(Octave REQUIRED)
-
-link_directories(${OCTAVE_LINK_DIRS})
-
-add_library(gadgetron_octavecommunicator SHARED OctaveCommunicator.cpp)
-target_link_libraries(gadgetron_octavecommunicator ${OCTAVE_LIBRARY} optimized ${ACE_LIBRARIES} debug ${ACE_DEBUG_LIBRARY})
-
-add_library(GadgetronReturnIsmrmrdAcquisition MODULE GadgetronReturnIsmrmrdAcquisition.cpp)
-SET_TARGET_PROPERTIES(GadgetronReturnIsmrmrdAcquisition PROPERTIES SUFFIX .oct PREFIX "")
-target_link_libraries(GadgetronReturnIsmrmrdAcquisition ${OCTAVE_LIBRARY} gadgetron_octavecommunicator optimized ${ACE_LIBRARIES} debug ${ACE_DEBUG_LIBRARY})
-
-add_library(XMLGetXPath MODULE pugixml.cpp XMLGetXPath.cpp)
-SET_TARGET_PROPERTIES(XMLGetXPath PROPERTIES SUFFIX .oct PREFIX "")
-target_link_libraries(XMLGetXPath ${OCTAVE_LIBRARY})
-
-add_library(GadgetronReturnIsmrmrdImage MODULE GadgetronReturnIsmrmrdImage.cpp)
-SET_TARGET_PROPERTIES(GadgetronReturnIsmrmrdImage PROPERTIES SUFFIX .oct PREFIX "")
-target_link_libraries(GadgetronReturnIsmrmrdImage ${OCTAVE_LIBRARY} gadgetron_octavecommunicator optimized ${ACE_LIBRARIES} debug ${ACE_DEBUG_LIBRARY})
-
-add_library(gadgetron_octave SHARED OctaveGadget.cpp)
-target_link_libraries(gadgetron_octave ${OCTAVE_LIBRARY} gadgetron_octavecommunicator cpucore)
-	
-install(TARGETS gadgetron_octavecommunicator DESTINATION lib)
-install(TARGETS gadgetron_octave DESTINATION lib)
-install(TARGETS GadgetronReturnIsmrmrdAcquisition DESTINATION octave)
-install(TARGETS GadgetronReturnIsmrmrdImage DESTINATION octave)
-install(TARGETS XMLGetXPath DESTINATION octave)
-
-install(FILES octave/my_gadget_reference.m 
-              octave/my_recon_function.m 
-              octave/my_config_function.m 
-              octave/gadget_reference_downsample_2x.m 
-              octave/downsample_2x.m 
-              octave/configure_downsample_2x.m 
-              octave/ismrm_transform_kspace_to_image.m 
-              octave/ismrm_transform_image_to_kspace.m 
-              octave/gadget_reference_accumulator.m 
-              octave/accumulator.m 
-              octave/configure_accumulator.m 
-        DESTINATION octave)
-
-install(FILES octave.xml DESTINATION config)
diff --git a/gadgets/octave/GadgetronReturnIsmrmrdAcquisition.cpp b/gadgets/octave/GadgetronReturnIsmrmrdAcquisition.cpp
deleted file mode 100644
index d3e2129..0000000
--- a/gadgets/octave/GadgetronReturnIsmrmrdAcquisition.cpp
+++ /dev/null
@@ -1,136 +0,0 @@
-#include <octave/oct.h>
-#include <octave/ov-struct.h>
-
-#include "GadgetContainerMessage.h"
-#include "ismrmrd.h"
-#include "hoNDArray.h"
-#include "OctaveCommunicator.h"
-
-using namespace Gadgetron;
-
-DEFUN_DLD (GadgetronReturnIsmrmrdAcquisition, args, nargout,
-	   "GadgetronReturnIsmrmrdAcquisition Returns Acquisition to the Gadgetron")
-{
-  int nargin = args.length ();
-
-  octave_value retval;
-     
-  if (nargin != 3) {
-    print_usage(); 
-  } else {
-    std::string id(args(0).string_value());
-    Octave_map h(args(1).map_value());
-    FloatComplexNDArray d(args(2).complex_array_value());
-
-    GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1 =
-    		new GadgetContainerMessage<ISMRMRD::AcquisitionHeader>();
-
-    ISMRMRD::AcquisitionHeader* head = m1->getObjectPtr();
-
-    head->version = octave_value(h.contents("version")(0)).uint16_scalar_value();
-    head->flags = octave_value(h.contents("flags")(0)).uint64_scalar_value();
-    head->measurement_uid = octave_value(h.contents("measurement_uid")(0)).uint32_scalar_value();
-    head->scan_counter = octave_value(h.contents("scan_counter")(0)).uint32_scalar_value();
-    head->acquisition_time_stamp = octave_value(h.contents("acquisition_time_stamp")(0)).uint32_scalar_value();
-    head->measurement_uid = octave_value(h.contents("measurement_uid")(0)).uint32_scalar_value();
-    head->physiology_time_stamp[0] = octave_value(h.contents("physiology_time_stamp")(0)).uint32_array_value()(0);
-    head->physiology_time_stamp[1] = octave_value(h.contents("physiology_time_stamp")(0)).uint32_array_value()(1);
-    head->physiology_time_stamp[2] = octave_value(h.contents("physiology_time_stamp")(0)).uint32_array_value()(2);
-    head->number_of_samples = octave_value(h.contents("number_of_samples")(0)).uint16_scalar_value();
-    head->available_channels = octave_value(h.contents("available_channels")(0)).uint16_scalar_value();
-    head->active_channels = octave_value(h.contents("active_channels")(0)).uint16_scalar_value();
-    head->channel_mask[0] = octave_value(h.contents("channel_mask")(0)).uint64_array_value()(0);
-    head->channel_mask[1] = octave_value(h.contents("channel_mask")(0)).uint64_array_value()(1);
-    head->channel_mask[2] = octave_value(h.contents("channel_mask")(0)).uint64_array_value()(2);
-    head->channel_mask[3] = octave_value(h.contents("channel_mask")(0)).uint64_array_value()(3);
-    head->channel_mask[4] = octave_value(h.contents("channel_mask")(0)).uint64_array_value()(4);
-    head->channel_mask[5] = octave_value(h.contents("channel_mask")(0)).uint64_array_value()(5);
-    head->channel_mask[6] = octave_value(h.contents("channel_mask")(0)).uint64_array_value()(6);
-    head->channel_mask[7] = octave_value(h.contents("channel_mask")(0)).uint64_array_value()(7);
-    head->channel_mask[8] = octave_value(h.contents("channel_mask")(0)).uint64_array_value()(8);
-    head->channel_mask[9] = octave_value(h.contents("channel_mask")(0)).uint64_array_value()(9);
-    head->channel_mask[10] = octave_value(h.contents("channel_mask")(0)).uint64_array_value()(10);
-    head->channel_mask[11] = octave_value(h.contents("channel_mask")(0)).uint64_array_value()(11);
-    head->channel_mask[12] = octave_value(h.contents("channel_mask")(0)).uint64_array_value()(12);
-    head->channel_mask[13] = octave_value(h.contents("channel_mask")(0)).uint64_array_value()(13);
-    head->channel_mask[14] = octave_value(h.contents("channel_mask")(0)).uint64_array_value()(14);
-    head->channel_mask[15] = octave_value(h.contents("channel_mask")(0)).uint64_array_value()(15);
-    head->discard_pre = octave_value(h.contents("discard_pre")(0)).uint16_scalar_value();
-    head->discard_post = octave_value(h.contents("discard_post")(0)).uint16_scalar_value();
-    head->center_sample = octave_value(h.contents("center_sample")(0)).uint16_scalar_value();
-    head->encoding_space_ref = octave_value(h.contents("encoding_space_ref")(0)).uint16_scalar_value();
-    head->trajectory_dimensions = octave_value(h.contents("trajectory_dimensions")(0)).uint16_scalar_value();
-    head->sample_time_us = octave_value(h.contents("sample_time_us")(0)).float_scalar_value();
-    head->position[0] = octave_value(h.contents("position")(0)).float_array_value()(0);
-    head->position[1] = octave_value(h.contents("position")(0)).float_array_value()(1);
-    head->position[2] = octave_value(h.contents("position")(0)).float_array_value()(2);
-    head->read_dir[0] = octave_value(h.contents("read_dir")(0)).float_array_value()(0);
-    head->read_dir[1] = octave_value(h.contents("read_dir")(0)).float_array_value()(1);
-    head->read_dir[2] = octave_value(h.contents("read_dir")(0)).float_array_value()(2);
-    head->phase_dir[0] = octave_value(h.contents("phase_dir")(0)).float_array_value()(0);
-    head->phase_dir[1] = octave_value(h.contents("phase_dir")(0)).float_array_value()(1);
-    head->phase_dir[2] = octave_value(h.contents("phase_dir")(0)).float_array_value()(2);
-    head->slice_dir[0] = octave_value(h.contents("read_dir")(0)).float_array_value()(0);
-    head->slice_dir[1] = octave_value(h.contents("read_dir")(0)).float_array_value()(1);
-    head->slice_dir[2] = octave_value(h.contents("read_dir")(0)).float_array_value()(2);
-    head->patient_table_position[0] = octave_value(h.contents("patient_table_position")(0)).float_array_value()(0);
-    head->patient_table_position[1] = octave_value(h.contents("patient_table_position")(0)).float_array_value()(1);
-    head->patient_table_position[2] = octave_value(h.contents("patient_table_position")(0)).float_array_value()(2);
-    head->idx.kspace_encode_step_1 = octave_value(octave_value(h.contents("idx")(0)).map_value().contents("kspace_encode_step_1")(0)).uint16_scalar_value();
-    head->idx.kspace_encode_step_2 = octave_value(octave_value(h.contents("idx")(0)).map_value().contents("kspace_encode_step_2")(0)).uint16_scalar_value();
-    head->idx.average              = octave_value(octave_value(h.contents("idx")(0)).map_value().contents("average")(0)).uint16_scalar_value();
-    head->idx.slice                = octave_value(octave_value(h.contents("idx")(0)).map_value().contents("slice")(0)).uint16_scalar_value();
-    head->idx.contrast             = octave_value(octave_value(h.contents("idx")(0)).map_value().contents("contrast")(0)).uint16_scalar_value();
-    head->idx.phase                = octave_value(octave_value(h.contents("idx")(0)).map_value().contents("phase")(0)).uint16_scalar_value();
-    head->idx.repetition           = octave_value(octave_value(h.contents("idx")(0)).map_value().contents("repetition")(0)).uint16_scalar_value();
-    head->idx.set                  = octave_value(octave_value(h.contents("idx")(0)).map_value().contents("set")(0)).uint16_scalar_value();
-    head->idx.segment              = octave_value(octave_value(h.contents("idx")(0)).map_value().contents("segment")(0)).uint16_scalar_value();
-    head->idx.user[0]              = octave_value(octave_value(h.contents("idx")(0)).map_value().contents("user")(0)).uint16_array_value()(0);
-    head->idx.user[1]              = octave_value(octave_value(h.contents("idx")(0)).map_value().contents("user")(0)).uint16_array_value()(1);
-    head->idx.user[2]              = octave_value(octave_value(h.contents("idx")(0)).map_value().contents("user")(0)).uint16_array_value()(2);
-    head->idx.user[3]              = octave_value(octave_value(h.contents("idx")(0)).map_value().contents("user")(0)).uint16_array_value()(3);
-    head->idx.user[4]              = octave_value(octave_value(h.contents("idx")(0)).map_value().contents("user")(0)).uint16_array_value()(4);
-    head->idx.user[5]              = octave_value(octave_value(h.contents("idx")(0)).map_value().contents("user")(0)).uint16_array_value()(5);
-    head->idx.user[6]              = octave_value(octave_value(h.contents("idx")(0)).map_value().contents("user")(0)).uint16_array_value()(6);
-    head->idx.user[7]              = octave_value(octave_value(h.contents("idx")(0)).map_value().contents("user")(0)).uint16_array_value()(7);
-    head->user_int[0]              = octave_value(h.contents("user_int")(0)).int32_array_value()(0);
-    head->user_int[1]              = octave_value(h.contents("user_int")(0)).int32_array_value()(1);
-    head->user_int[2]              = octave_value(h.contents("user_int")(0)).int32_array_value()(2);
-    head->user_int[3]              = octave_value(h.contents("user_int")(0)).int32_array_value()(3);
-    head->user_int[4]              = octave_value(h.contents("user_int")(0)).int32_array_value()(4);
-    head->user_int[5]              = octave_value(h.contents("user_int")(0)).int32_array_value()(5);
-    head->user_int[6]              = octave_value(h.contents("user_int")(0)).int32_array_value()(6);
-    head->user_int[7]              = octave_value(h.contents("user_int")(0)).int32_array_value()(7);
-    head->user_float[0]            = octave_value(h.contents("user_float")(0)).int32_array_value()(0);
-    head->user_float[1]            = octave_value(h.contents("user_float")(0)).int32_array_value()(1);
-    head->user_float[2]            = octave_value(h.contents("user_float")(0)).int32_array_value()(2);
-    head->user_float[3]            = octave_value(h.contents("user_float")(0)).int32_array_value()(3);
-    head->user_float[4]            = octave_value(h.contents("user_float")(0)).int32_array_value()(4);
-    head->user_float[5]            = octave_value(h.contents("user_float")(0)).int32_array_value()(5);
-    head->user_float[6]            = octave_value(h.contents("user_float")(0)).int32_array_value()(6);
-    head->user_float[7]            = octave_value(h.contents("user_float")(0)).int32_array_value()(7);
-
-
-    GadgetContainerMessage< hoNDArray<std::complex<float> > >* m2 =
-    		new GadgetContainerMessage< hoNDArray<std::complex<float> > >();
-
-    std::vector<unsigned int> dims;
-    for (unsigned int i = 0; i < d.dims().length(); i++) {
-    	dims.push_back(d.dims()(i));
-    }
-
-    try {
-        m2->getObjectPtr()->create(&dims);
-    } catch (...) {
-        GADGET_DEBUG1("Failed to allocate return array\n");
-        m1->release();
-    }
-
-    memcpy(m2->getObjectPtr()->get_data_ptr(), &d(0), sizeof(float)*2*d.nelem());
-
-    m1->cont(m2);
-
-    OctaveCommunicator::instance()->message_gadget(id, m1);
-  }
-  return octave_value_list ();
-}
diff --git a/gadgets/octave/GadgetronReturnIsmrmrdImage.cpp b/gadgets/octave/GadgetronReturnIsmrmrdImage.cpp
deleted file mode 100644
index 7d8c8b6..0000000
--- a/gadgets/octave/GadgetronReturnIsmrmrdImage.cpp
+++ /dev/null
@@ -1,108 +0,0 @@
-#include <octave/oct.h>
-#include <octave/ov-struct.h>
-
-#include "OctaveCommunicator.h"
-#include "ismrmrd.h"
-#include "hoNDArray.h"
-
-using namespace Gadgetron;
-
-DEFUN_DLD (GadgetronReturnIsmrmrdImage, args, nargout,
-	   "GadgetronReturnIsmrmrdImage return Image to the Gadgetron")
-{
-	 int nargin = args.length ();
-
-	  octave_value retval;
-
-	  if (nargin != 3) {
-	    print_usage();
-	  } else {
-	    std::string id(args(0).string_value());
-	    Octave_map h(args(1).map_value());
-	    FloatComplexNDArray d(args(2).complex_array_value());
-
-	    GadgetContainerMessage<ISMRMRD::ImageHeader>* m1 =
-	    		new GadgetContainerMessage<ISMRMRD::ImageHeader>();
-
-	    ISMRMRD::ImageHeader* head = m1->getObjectPtr();
-
-	    head->version = octave_value(h.contents("version")(0)).uint16_scalar_value();
-	    head->flags = octave_value(h.contents("flags")(0)).uint64_scalar_value();
-	    head->measurement_uid = octave_value(h.contents("measurement_uid")(0)).uint32_scalar_value();
-	    head->matrix_size[0] = octave_value(h.contents("matrix_size")(0)).uint16_array_value()(0);
-	    head->matrix_size[1] = octave_value(h.contents("matrix_size")(0)).uint16_array_value()(1);
-	    head->matrix_size[2] = octave_value(h.contents("matrix_size")(0)).uint16_array_value()(2);
-		head->field_of_view[0] = octave_value(h.contents("field_of_view")(0)).float_array_value()(0);
-	    head->field_of_view[1] = octave_value(h.contents("field_of_view")(0)).float_array_value()(1);
-	    head->field_of_view[2] = octave_value(h.contents("field_of_view")(0)).float_array_value()(2);
-		head->channels = octave_value(h.contents("channels")(0)).uint16_scalar_value();
-	    head->position[0] = octave_value(h.contents("position")(0)).float_array_value()(0);
-	    head->position[1] = octave_value(h.contents("position")(0)).float_array_value()(1);
-	    head->position[2] = octave_value(h.contents("position")(0)).float_array_value()(2);
-	    head->read_dir[0] = octave_value(h.contents("read_dir")(0)).float_array_value()(0);
-	    head->read_dir[1] = octave_value(h.contents("read_dir")(0)).float_array_value()(1);
-	    head->read_dir[2] = octave_value(h.contents("read_dir")(0)).float_array_value()(2);
-	    head->phase_dir[0] = octave_value(h.contents("phase_dir")(0)).float_array_value()(0);
-	    head->phase_dir[1] = octave_value(h.contents("phase_dir")(0)).float_array_value()(1);
-	    head->phase_dir[2] = octave_value(h.contents("phase_dir")(0)).float_array_value()(2);
-	    head->slice_dir[0] = octave_value(h.contents("slice_dir")(0)).float_array_value()(0);
-	    head->slice_dir[1] = octave_value(h.contents("slice_dir")(0)).float_array_value()(1);
-	    head->slice_dir[2] = octave_value(h.contents("slice_dir")(0)).float_array_value()(2);
-	    head->patient_table_position[0] = octave_value(h.contents("patient_table_position")(0)).float_array_value()(0);
-	    head->patient_table_position[1] = octave_value(h.contents("patient_table_position")(0)).float_array_value()(1);
-	    head->patient_table_position[2] = octave_value(h.contents("patient_table_position")(0)).float_array_value()(2);
-	    head->average = octave_value(h.contents("average")(0)).uint16_scalar_value();
-	    head->slice = octave_value(h.contents("slice")(0)).uint16_scalar_value();
-	    head->contrast = octave_value(h.contents("contrast")(0)).uint16_scalar_value();
-	    head->phase = octave_value(h.contents("phase")(0)).uint16_scalar_value();
-	    head->repetition = octave_value(h.contents("repetition")(0)).uint16_scalar_value();
-	    head->set = octave_value(h.contents("set")(0)).uint16_scalar_value();
-	    head->acquisition_time_stamp = octave_value(h.contents("acquisition_time_stamp")(0)).uint32_scalar_value();
-	    head->physiology_time_stamp[0] = octave_value(h.contents("physiology_time_stamp")(0)).uint32_array_value()(0);
-	    head->physiology_time_stamp[1] = octave_value(h.contents("physiology_time_stamp")(0)).uint32_array_value()(1);
-	    head->physiology_time_stamp[2] = octave_value(h.contents("physiology_time_stamp")(0)).uint32_array_value()(2);
-	    head->image_data_type          = octave_value(h.contents("image_data_type")(0)).uint16_scalar_value();
-	    head->image_data_type          = octave_value(h.contents("image_data_type")(0)).uint16_scalar_value();
-	    head->image_data_type          = octave_value(h.contents("image_data_type")(0)).uint16_scalar_value();
-	    head->image_data_type          = octave_value(h.contents("image_data_type")(0)).uint16_scalar_value();
-	    head->user_int[0]              = octave_value(h.contents("user_int")(0)).int32_array_value()(0);
-	    head->user_int[1]              = octave_value(h.contents("user_int")(0)).int32_array_value()(1);
-	    head->user_int[2]              = octave_value(h.contents("user_int")(0)).int32_array_value()(2);
-	    head->user_int[3]              = octave_value(h.contents("user_int")(0)).int32_array_value()(3);
-	    head->user_int[4]              = octave_value(h.contents("user_int")(0)).int32_array_value()(4);
-	    head->user_int[5]              = octave_value(h.contents("user_int")(0)).int32_array_value()(5);
-	    head->user_int[6]              = octave_value(h.contents("user_int")(0)).int32_array_value()(6);
-	    head->user_int[7]              = octave_value(h.contents("user_int")(0)).int32_array_value()(7);
-	    head->user_float[0]            = octave_value(h.contents("user_float")(0)).int32_array_value()(0);
-	    head->user_float[1]            = octave_value(h.contents("user_float")(0)).int32_array_value()(1);
-	    head->user_float[2]            = octave_value(h.contents("user_float")(0)).int32_array_value()(2);
-	    head->user_float[3]            = octave_value(h.contents("user_float")(0)).int32_array_value()(3);
-	    head->user_float[4]            = octave_value(h.contents("user_float")(0)).int32_array_value()(4);
-	    head->user_float[5]            = octave_value(h.contents("user_float")(0)).int32_array_value()(5);
-	    head->user_float[6]            = octave_value(h.contents("user_float")(0)).int32_array_value()(6);
-	    head->user_float[7]            = octave_value(h.contents("user_float")(0)).int32_array_value()(7);
-
-	    GadgetContainerMessage< hoNDArray<std::complex<float> > >* m2 =
-	    		new GadgetContainerMessage< hoNDArray<std::complex<float> > >();
-
-	    std::vector<unsigned int> dims;
-	    for (unsigned int i = 0; i < d.dims().length(); i++) {
-	    	dims.push_back(d.dims()(i));
-	    }
-
-	    try {
-	        m2->getObjectPtr()->create(&dims);
-	    } catch (...) {
-	    	GADGET_DEBUG1("Failed to allocate return array\n");
-	    	m1->release();
-	    }
-
-	    memcpy(m2->getObjectPtr()->get_data_ptr(), &d(0), sizeof(float)*2*d.nelem());
-
-	    m1->cont(m2);
-
-	    OctaveCommunicator::instance()->message_gadget(id, m1);
-	  }
-
-	  return octave_value_list ();
-}
diff --git a/gadgets/octave/OctaveCommunicator.cpp b/gadgets/octave/OctaveCommunicator.cpp
deleted file mode 100644
index c0aa5f2..0000000
--- a/gadgets/octave/OctaveCommunicator.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
-#include "OctaveCommunicator.h"
-
-
-#include <iostream>
-
-using namespace Gadgetron;
-
-OctaveCommunicator* OctaveCommunicator::instance()
-{
-  if (!instance_) instance_ = new OctaveCommunicator();
-  return instance_;
-}
-
-OctaveCommunicator::OctaveCommunicator()
-  : mutex_("OctaveCommunicatorMutex")
-{
-  const char * argvv [] = {"" /* name of program, not relevant */, "--silent"}; 
-  octave_main (2, (char **) argvv, true /* embedded */);
-  octave_value_list in;
-  octave_value_list out;
-
-  const char* gadgetron_home = ACE_OS::getenv("GADGETRON_HOME");
-  std::string path_name = std::string(gadgetron_home) + std::string("/octave");
-  
-  in = octave_value (path_name.c_str());
-  out = feval ("addpath", in, 1);
-  
-}
-
-OctaveCommunicator::~OctaveCommunicator()
-{
-}
-
-void OctaveCommunicator::register_gadget(Gadget* g)
-{
-  mutex_.acquire();
-  gadget_map_[g->module()->name()] = g;
-  mutex_.release();
-}
-
-bool OctaveCommunicator::message_gadget(std::string gadget, ACE_Message_Block* m)
-{
-  std::map<std::string, Gadget*>::iterator it = gadget_map_.find(gadget);
-
-  if (it != gadget_map_.end()) {
-	  if (it->second->putq(m) < 0) {
-		  return false;
-	  } else {
-		  return true;
-	  }
-  } else {
-    std::cout << "Gadget with ID = " << gadget << " NOT FOUND!" << std::endl;
-    m->release();
-    return false;
-  }
-  return false;
-}
-
-octave_value_list OctaveCommunicator::octave_feval (const std::string &name, const octave_value_list &args, int nargout)
-{
-  mutex_.acquire();
-  octave_value_list out = feval(name,args,nargout);
-  mutex_.release();
-
-  return out;
-}
-
-OctaveCommunicator* OctaveCommunicator::instance_ = NULL;
diff --git a/gadgets/octave/OctaveCommunicator.h b/gadgets/octave/OctaveCommunicator.h
deleted file mode 100644
index 2fd2145..0000000
--- a/gadgets/octave/OctaveCommunicator.h
+++ /dev/null
@@ -1,40 +0,0 @@
-#ifndef OCTAVECOMMUNICATOR_H
-#define OCTAVECOMMUNICATOR_H
-
-#include <ace/Synch.h>
-#include <ace/Mutex.h>
-
-#include <octave/oct.h>
-#include <octave/octave.h>
-#include <octave/parse.h>
-
-
-#include "Gadget.h"
-
-#include <map>
-#include <string>
-
-#include "gadgetronoctavecommunicator_export.h"
-
-class EXPORTGADGETSOCTAVECOMMUNICATOR OctaveCommunicator
-{
-
- public:
-  static OctaveCommunicator* instance(); 
-  
-  void register_gadget(Gadgetron::Gadget* g);
-  bool message_gadget(std::string g, ACE_Message_Block* m);
-  octave_value_list octave_feval (const std::string &name, const octave_value_list &args=octave_value_list(), int nargout=0);
-
- private:
-  OctaveCommunicator();
-  ~OctaveCommunicator();
-  
-  static OctaveCommunicator* instance_;
-  ACE_Thread_Mutex mutex_;
-  
-  std::map<std::string, Gadgetron::Gadget*> gadget_map_;
-};
-
-
-#endif
diff --git a/gadgets/octave/OctaveGadget.cpp b/gadgets/octave/OctaveGadget.cpp
deleted file mode 100644
index ea63574..0000000
--- a/gadgets/octave/OctaveGadget.cpp
+++ /dev/null
@@ -1,232 +0,0 @@
-#include "OctaveGadget.h"
-
-namespace Gadgetron {
-
- int AcquisitionOctaveGadget::process(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1,
-	      GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2)
- {
-
-   //We want to avoid a deadlock for the Python GIL if this python call results in an output that the GadgetReference will not be able to get rid of.
-   //This is kind of a nasty busy wait, maybe we should add an event handler to the NotificationStrategy of the Q or something, but for now, this will do it.
-   while (this->next()->msg_queue()->is_full()) {
-     //GADGET_DEBUG2("Gadget (%s) sleeping while downstream Gadget (%s) does some work\n", this->module()->name(), this->next()->module()->name());
-     ACE_Time_Value tv(0,10000); //Sleep for 10ms while the downstream Gadget does some work
-      ACE_OS::sleep(tv);
-   }
-
-
-	Octave_map m;
-	ISMRMRD::AcquisitionHeader h = *m1->getObjectPtr();
-
-	m.assign("version",                    h.version);
-	m.assign("flags",                      h.flags);
-	m.assign("measurement_uid",            h.measurement_uid);
-	m.assign("scan_counter",               h.scan_counter);
-	m.assign("acquisition_time_stamp",     h.acquisition_time_stamp);
-
-	dim_vector d(1); d(0) = 3;
-	uint32NDArray phys_time(d);
-	memcpy(&phys_time(0),h.physiology_time_stamp,sizeof(uint32_t)*3);
-	m.assign("physiology_time_stamp",            octave_value(phys_time));
-
-
-	m.assign("number_of_samples",                h.number_of_samples);
-	m.assign("available_channels",               h.available_channels);
-	m.assign("active_channels",                  h.active_channels);
-
-	d(0) = 16;
-	uint64NDArray channel_mask(d);
-	memcpy(&channel_mask(0),h.channel_mask,sizeof(uint64_t)*16);
-	m.assign("channel_mask",                     octave_value(channel_mask));
-
-	m.assign("discard_pre",                      h.discard_pre);
-	m.assign("discard_post",                     h.discard_post);
-	m.assign("center_sample",                    h.center_sample);
-	m.assign("encoding_space_ref",               h.encoding_space_ref);
-	m.assign("trajectory_dimensions",            h.trajectory_dimensions);
-	m.assign("sample_time_us",                   h.sample_time_us);
-
-	d(0) = 3;
-	FloatNDArray position(d);
-	memcpy(&position(0),h.position,sizeof(float)*3);
-	m.assign("position",                         octave_value(position));
-
-	d(0) = 3;
-	FloatNDArray read_dir(d);
-	memcpy(&read_dir(0),h.read_dir,sizeof(float)*3);
-	m.assign("read_dir",            octave_value(read_dir));
-
-	d(0) = 3;
-	FloatNDArray phase_dir(d);
-	memcpy(&phase_dir(0),h.phase_dir,sizeof(float)*3);
-	m.assign("phase_dir",            octave_value(phase_dir));
-
-	d(0) = 3;
-	FloatNDArray slice_dir(d);
-	memcpy(&slice_dir(0),h.slice_dir,sizeof(float)*3);
-	m.assign("slice_dir",            octave_value(slice_dir));
-
-	d(0) = 3;
-	FloatNDArray patient_table_position(d);
-	memcpy(&patient_table_position(0),h.patient_table_position,sizeof(float)*3);
-	m.assign("patient_table_position",         octave_value(patient_table_position));
-
-	Octave_map idx;
-
-	idx.assign("kspace_encode_step_1",       h.idx.kspace_encode_step_1);
-	idx.assign("kspace_encode_step_2",       h.idx.kspace_encode_step_2);
-	idx.assign("average",                    h.idx.average);
-	idx.assign("slice",                      h.idx.slice);
-	idx.assign("contrast",                   h.idx.contrast);
-	idx.assign("phase",                      h.idx.phase);
-	idx.assign("repetition",                 h.idx.phase);
-	idx.assign("phase",                      h.idx.repetition);
-	idx.assign("set",                        h.idx.set);
-	idx.assign("segment",                    h.idx.segment);
-
-	d(0) = 8;
-	uint16NDArray user(d);
-	memcpy(&user(0),h.idx.user,sizeof(uint16_t)*8);
-	idx.assign("user",                    octave_value(user));
-	m.assign("idx",                         octave_value(idx));
-
-	d(0) = 8;
-	int32NDArray user_int(d);
-	memcpy(&user_int(0),h.user_int,sizeof(int32_t)*8);
-	m.assign("user_int",                         octave_value(user_int));
-
-	d(0) = 8;
-	FloatNDArray user_float(d);
-	memcpy(&user_float(0),h.user_float,sizeof(float)*8);
-	m.assign("user_float",                         octave_value(user_float));
-
-	//Make a copy of the data for sending to Octave.
-    dim_vector dims;
-    for (unsigned int i =0; i < m2->getObjectPtr()->get_number_of_dimensions(); i++) {
-    	dims(i) = m2->getObjectPtr()->get_size(i);
-    }
-    FloatComplexNDArray data(dims);
-    memcpy(data.fortran_vec(),m2->getObjectPtr()->get_data_ptr(),sizeof(float)*2*data.nelem());
-
-    octave_value_list in;
-    in(0) = m;
-    in(1) = data;
-
-    octave_value_list out = OctaveCommunicator::instance()->octave_feval (datafunc_->c_str(), in, 2);
-
-    //We are now done with the data
-    m1->release();
-
-    return GADGET_OK;
- }
-
- int ImageOctaveGadget::process(GadgetContainerMessage<ISMRMRD::ImageHeader>* m1,
-	      GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2)
- {
-
-   //We want to avoid a deadlock for the Python GIL if this python call results in an output that the GadgetReference will not be able to get rid of.
-   //This is kind of a nasty busy wait, maybe we should add an event handler to the NotificationStrategy of the Q or something, but for now, this will do it.
-   while (this->next()->msg_queue()->is_full()) {
-     //GADGET_DEBUG2("Gadget (%s) sleeping while downstream Gadget (%s) does some work\n", this->module()->name(), this->next()->module()->name());
-     ACE_Time_Value tv(0,10000); //Sleep for 10ms while the downstream Gadget does some work
-     ACE_OS::sleep(tv);
-   }
-
-
-	Octave_map m;
-	ISMRMRD::ImageHeader h = *m1->getObjectPtr();
-
-	m.assign("version",                    h.version);
-	m.assign("flags",                      h.flags);
-	m.assign("measurement_uid",            h.measurement_uid);
-
-	dim_vector d(1);
-	d(0) = 3;
-	uint16NDArray matrix_size(d);
-	memcpy(&matrix_size(0),h.matrix_size,sizeof(uint16_t)*3);
-	m.assign("matrix_size",            octave_value(matrix_size));
-
-	d(0) = 3;
-	FloatNDArray field_of_view(d);
-	memcpy(&field_of_view(0),h.field_of_view,sizeof(float)*3);
-	m.assign("field_of_view",            octave_value(field_of_view));
-
-	m.assign("channels",                    h.channels);
-
-	d(0) = 3;
-	FloatNDArray position(d);
-	memcpy(&position(0),h.position,sizeof(float)*3);
-	m.assign("position",            octave_value(position));
-
-	d(0) = 3;
-	FloatNDArray read_dir(d);
-	memcpy(&read_dir(0),h.read_dir,sizeof(float)*3);
-	m.assign("read_dir",            octave_value(read_dir));
-
-	d(0) = 3;
-	FloatNDArray phase_dir(d);
-	memcpy(&phase_dir(0),h.phase_dir,sizeof(float)*3);
-	m.assign("phase_dir",            octave_value(phase_dir));
-
-	d(0) = 3;
-	FloatNDArray slice_dir(d);
-	memcpy(&slice_dir(0),h.slice_dir,sizeof(float)*3);
-	m.assign("slice_dir",            octave_value(slice_dir));
-
-	d(0) = 3;
-	FloatNDArray patient_table_position(d);
-	memcpy(&patient_table_position(0),h.patient_table_position,sizeof(float)*3);
-	m.assign("patient_table_position",            octave_value(patient_table_position));
-
-	m.assign("average",                    h.average);
-	m.assign("slice",                    h.slice);
-	m.assign("contrast",                    h.contrast);
-	m.assign("phase",                    h.phase);
-	m.assign("repetition",                    h.repetition);
-	m.assign("set",                    h.set);
-
-	d(0) = 3;
-	uint32NDArray physiology_time_stamp(d);
-	memcpy(&physiology_time_stamp(0),h.physiology_time_stamp,sizeof(uint32_t)*3);
-	m.assign("physiology_time_stamp",            octave_value(physiology_time_stamp));
-
-	m.assign("image_data_type",                    h.image_data_type);
-	m.assign("image_type",                    h.image_type);
-
-	m.assign("image_index",                    h.image_index);
-	m.assign("image_series_index",            h.image_series_index);
-
-	d(0) = 8;
-	int32NDArray user_int(d);
-	memcpy(&user_int(0),h.user_int,sizeof(int32_t)*8);
-	m.assign("user_int",                         octave_value(user_int));
-
-	d(0) = 8;
-	FloatNDArray user_float(d);
-	memcpy(&user_float(0),h.user_float,sizeof(float)*8);
-	m.assign("user_float",                         octave_value(user_float));
-
-    dim_vector dims;
-    for (unsigned int i =0; i < m2->getObjectPtr()->get_number_of_dimensions(); i++) {
-    	dims(i) = m2->getObjectPtr()->get_size(i);
-    }
-
-    FloatComplexNDArray data(dims);
-    memcpy(&data(0),m2->getObjectPtr()->get_data_ptr(),sizeof(float)*2*data.nelem());
-
-    octave_value_list in;
-    in(0) = m; //octave_value (this->next()->module()->name());
-    in(1) = data;
-
-    octave_value_list out = OctaveCommunicator::instance()->octave_feval (datafunc_->c_str(), in, 2);
-
-    m1->release();
-
-    return GADGET_OK;
- }
-
-
-GADGET_FACTORY_DECLARE(AcquisitionOctaveGadget)
-GADGET_FACTORY_DECLARE(ImageOctaveGadget)
-
-}
diff --git a/gadgets/octave/OctaveGadget.h b/gadgets/octave/OctaveGadget.h
deleted file mode 100644
index 647de7b..0000000
--- a/gadgets/octave/OctaveGadget.h
+++ /dev/null
@@ -1,95 +0,0 @@
-#pragma once 
-
-#include <octave/oct.h>
-#include <octave/octave.h>
-#include <octave/parse.h>
-#include <octave/ov-struct.h>
-
-#include "gadgetronoctave_export.h"
-#include "Gadget.h"
-#include "Gadgetron.h"
-#include "hoNDArray.h"
-#include "ismrmrd.h"
-
-#include "OctaveCommunicator.h"
-#include "GadgetStreamController.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#include <complex>
-
-namespace Gadgetron
-{
-
-template <class T> class OctaveGadget :
-public Gadgetron::Gadget2<T, hoNDArray< std::complex<float> > >
-{
- public:
-  //GADGET_DECLARE(OctaveGadget);
-  //virtual ~OctaveGadget();
-
- protected:
-
-  int process_config(ACE_Message_Block* mb)
-  {
-
-    path_        = this->get_string_value("path");
-    reffunc_     = this->get_string_value("gadget_reference_function");
-    datafunc_    = this->get_string_value("input_function");
-    configfunc_  = this->get_string_value("config_function");
-
-    GADGET_DEBUG2("OCTAVE Ref Function    : %s\n", reffunc_.get()->c_str());
-    GADGET_DEBUG2("OCTAVE Data Function   : %s\n", datafunc_.get()->c_str());
-    GADGET_DEBUG2("OCTAVE Config Function : %s\n", configfunc_.get()->c_str());
-
-    OctaveCommunicator::instance()->register_gadget(this);
-    OctaveCommunicator::instance()->register_gadget(this->controller_->find_gadget(this->next()->module()->name()));
-
-    octave_value_list in = octave_value (path_->c_str());
-    octave_value_list out = OctaveCommunicator::instance()->octave_feval ("addpath", in, 1);
-
-    in(0) = octave_value(this->module()->name());
-    in(1) = octave_value(this->next()->module()->name());
-    out = OctaveCommunicator::instance()->octave_feval(reffunc_->c_str(), in, 2);
-
-    in(0) = octave_value(std::string(mb->rd_ptr(),mb->length()));
-    out = OctaveCommunicator::instance()->octave_feval(configfunc_->c_str(), in, 1);
-
-    return GADGET_OK;
-  }
-
-protected:
-  boost::shared_ptr<std::string> path_;
-  boost::shared_ptr<std::string> reffunc_;
-  boost::shared_ptr<std::string> datafunc_;
-  boost::shared_ptr<std::string> configfunc_;
-
-
-};
-
-
-
-class EXPORTGADGETSOCTAVE AcquisitionOctaveGadget :
-public OctaveGadget<ISMRMRD::AcquisitionHeader>
-{
- public:
-  GADGET_DECLARE(AcquisitionOctaveGadget);
-  
-  int process(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* m1,
-  	      GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2);
-
-};
-
-class EXPORTGADGETSOCTAVE ImageOctaveGadget :
-public OctaveGadget<ISMRMRD::ImageHeader>
-{
- public:
-  GADGET_DECLARE(ImageOctaveGadget);
-
-  int process(GadgetContainerMessage<ISMRMRD::ImageHeader>* m1,
-  	      GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2);
-
-};
-
-}
diff --git a/gadgets/octave/XMLGetXPath.cpp b/gadgets/octave/XMLGetXPath.cpp
deleted file mode 100644
index 4a2b3d1..0000000
--- a/gadgets/octave/XMLGetXPath.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-#include <octave/oct.h>
-#include <iostream>
-
-#include "pugixml.hpp"
-     
-DEFUN_DLD (XMLGetXPath, args, nargout,
-	   "XMLGetXPath: Returns the text contents of the xml node with the given XPATH")
-{
-  int nargin = args.length ();
-
-  octave_value retval;
-     
-  if (nargin != 2) {
-    print_usage(); 
-  } else {
-    std::string xml(args(0).string_value());
-    std::string xpath(args(1).string_value());
-
-    pugi::xml_document doc;
-    
-    pugi::xml_parse_result result = doc.load_buffer_inplace(const_cast<char*>(xml.c_str()), xml.length());
-
-    if (!result) {
-      std::cout << "XML parsed with errors." << std::endl;
-      std::cout << "Error description: " << result.description() << std::endl;
-      return retval;
-    }
-
-    pugi::xpath_node target_node = doc.select_single_node(xpath.c_str());
-
-    retval = octave_value(target_node.node().child_value());
-  }
-
-  return retval;
-}
diff --git a/gadgets/octave/gadgetron_octave_export.h b/gadgets/octave/gadgetron_octave_export.h
deleted file mode 100644
index 5ca4185..0000000
--- a/gadgets/octave/gadgetron_octave_export.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * gadgetroncore_export.h
- *
- *  Created on: Jan 28, 2013
- *      Author: Michael S. Hansen
- */
-
-#ifndef GADGETRONOCTAVE_EXPORT_H_
-#define GADGETRONOCTAVE_EXPORT_H_
-
-
-#if defined (WIN32)
-#if defined (__BUILD_GADGETRON_OCTAVE__) || defined (gadgetronoctave_EXPORTS)
-#define EXPORTGADGETSOCTAVE __declspec(dllexport)
-#else
-#define EXPORTGADGETSOCTAVE __declspec(dllimport)
-#endif
-#else
-#define EXPORTGADGETSOCTAVE
-#endif
-
-
-#endif /* GADGETRONOCTAVE_EXPORT_H_ */
diff --git a/gadgets/octave/gadgetron_octavecommunicator_export.h b/gadgets/octave/gadgetron_octavecommunicator_export.h
deleted file mode 100644
index 7a04095..0000000
--- a/gadgets/octave/gadgetron_octavecommunicator_export.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * gadgetroncore_export.h
- *
- *  Created on: Jan 28, 2013
- *      Author: Michael S. Hansen
- */
-
-#ifndef GADGETRONOCTAVECOMMUNICATOR_EXPORT_H_
-#define GADGETRONOCTAVECOMMUNICATOR_EXPORT_H_
-
-
-#if defined (WIN32)
-#if defined (__BUILD_GADGETRON_OCTAVECOMMUNICATOR__) || defined (gadgetronOctaveCommunicator_EXPORTS)
-#define EXPORTGADGETSOCTAVECOMMUNICATOR __declspec(dllexport)
-#else
-#define EXPORTGADGETSOCTAVECOMMUNICATOR __declspec(dllimport)
-#endif
-#else
-#define EXPORTGADGETSOCTAVECOMMUNICATOR
-#endif
-
-
-#endif /* GADGETRONOCTAVE_EXPORT_H_ */
diff --git a/gadgets/octave/octave.xml b/gadgets/octave/octave.xml
deleted file mode 100644
index 5e749c6..0000000
--- a/gadgets/octave/octave.xml
+++ /dev/null
@@ -1,84 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
-        xmlns="http://gadgetron.sf.net/gadgetron"
-        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-        
-    <reader>
-      <slot>1008</slot>
-      <dll>gadgetron_mricore</dll>
-      <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
-    </reader>
-  
-    <writer>
-      <slot>1004</slot>
-      <dll>gadgetron_mricore</dll>
-      <classname>MRIImageWriterCPLX</classname>
-    </writer>
-    <writer>
-      <slot>1005</slot>
-      <dll>gadgetron_mricore</dll>
-      <classname>MRIImageWriterFLOAT</classname>
-    </writer>
-    <writer>
-      <slot>1006</slot>
-      <dll>gadgetron_mricore</dll>
-      <classname>MRIImageWriterUSHORT</classname>
-    </writer>
-    
-    <gadget>
-      <name>OctaveDownsample</name>
-      <dll>gadgetron_octave</dll>
-      <classname>AcquisitionOctaveGadget</classname>
-      <!--
-   	  <property><name>path</name>                  <value>/home/myuser/scripts/python</value></property>
-      -->
-      <property><name>gadget_reference_function</name>    <value>gadget_reference_downsample_2x</value></property>
-      <property><name>input_function</name>               <value>downsample_2x</value></property>
-      <property><name>config_function</name>              <value>configure_downsample_2x</value></property> 
-    </gadget>
-
-    <gadget>
-      <name>OctaveAccumulate</name>
-      <dll>gadgetron_octave</dll>
-      <classname>AcquisitionOctaveGadget</classname>
-      <!--
-   	  <property><name>path</name>                  <value>/home/myuser/scripts/python</value></property>
-      -->
-      <property><name>gadget_reference_function</name>    <value>gadget_reference_accumulator</value></property>
-      <property><name>input_function</name>               <value>accumulator</value></property>
-      <property><name>config_function</name>              <value>configure_accumulator</value></property> 
-    </gadget>
-
-    <!--
-    <gadget>
-      <name>Acc</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>AccumulatorGadget</classname>
-    </gadget>
-
-    <gadget>
-      <name>FFT</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>FFTGadget</classname>
-    </gadget>
-
-    <gadget>
-      <name>CropCombine</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>CropAndCombineGadget</classname>
-    </gadget>
-    -->
-
-    <gadget>
-      <name>Extract</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ExtractGadget</classname>
-    </gadget>  
-  
-    <gadget>
-      <name>ImageFinishFLOAT</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageFinishGadgetFLOAT</classname>
-    </gadget>
-
-</gadgetronStreamConfiguration>
diff --git a/gadgets/octave/octave/accumulator.m b/gadgets/octave/octave/accumulator.m
deleted file mode 100644
index daca008..0000000
--- a/gadgets/octave/octave/accumulator.m
+++ /dev/null
@@ -1,29 +0,0 @@
-function accumulator(head, data)
-  global accumulator_calling_gadget;
-  global accumulator_next_gadget;
-  global accumulator_buffer;
-  global accumulator_center_line;
-
-   line_offset = bitshift(size(accumulator_buffer,2),-1) - accumulator_center_line;
-   %size(accumulator_buffer(:,head.idx.kspace_encode_step_1+line_offset+1,1,:))
-   %size(reshape(data,[size(data,1),1,1,size(data,2))])
-   %size(data)
-   
-   accumulator_buffer(:,head.idx.kspace_encode_step_1+line_offset+1,1,:) = reshape(data,size(data,1),1,1,size(data,2));
-   
-  if (bitand(head.flags, bitshift(1,7)) > 0),
-
-
-    img = ismrm_transform_kspace_to_image(accumulator_buffer,[1,2,3]);
-    img = sqrt(sum(abs(img).^2,4));
-
-    img_head = struct();
-  
-    img_head.version = h.version;
-    img_head.flags = 0;
-    img_head.measurement_uid = h.measurement_uid;
-    img_head.matrix_size = [size(img,1),size(img,2),size(img,3)];
-    img_head.channels = 1; 
-    GadgetronReturnIsmrmrdImage(accumulator_next_gadget, head, single(img));
-  end
-end
diff --git a/gadgets/octave/octave/configure_accumulator.m b/gadgets/octave/octave/configure_accumulator.m
deleted file mode 100644
index 8bea5b8..0000000
--- a/gadgets/octave/octave/configure_accumulator.m
+++ /dev/null
@@ -1,18 +0,0 @@
-function configure_accumulator(XMLconfig)
-   global accumulator_buffer
-   global accumulator_center_line 
-
-   matrix_size = [str2num(XMLGetXPath(XMLconfig, '//ismrmrdHeader/encoding/reconSpace/matrixSize/x')), ...
-			 str2num(XMLGetXPath(XMLconfig, '//ismrmrdHeader/encoding/reconSpace/matrixSize/y')), ...
-			 str2num(XMLGetXPath(XMLconfig, '//ismrmrdHeader/encoding/reconSpace/matrixSize/z')), ...
-                         str2num(XMLGetXPath(XMLconfig, '//ismrmrdHeader/acquisitionSystemInformation/receiverChannels'))];
-
-
-   accumulator_center_line = str2num(XMLGetXPath(XMLconfig, '//ismrmrdHeader/encoding/encodingLimits/kspace_encoding_step_1/center'));
-
-   fprintf('Accumulator: Reconstructing on matrix [%d, %d, %d]\n', matrix_size(1), matrix_size(2), matrix_size(3));
-
-
-   accumulator_buffer = single(zeros(matrix_size));
-   
-end
diff --git a/gadgets/octave/octave/configure_downsample_2x.m b/gadgets/octave/octave/configure_downsample_2x.m
deleted file mode 100644
index 688a8ca..0000000
--- a/gadgets/octave/octave/configure_downsample_2x.m
+++ /dev/null
@@ -1,3 +0,0 @@
-function configure_downsample_2x(XMLconfig)
- fprintf("Skipping configuration for downsampling\n");
-end
diff --git a/gadgets/octave/octave/downsample_2x.m b/gadgets/octave/octave/downsample_2x.m
deleted file mode 100644
index 67a8a40..0000000
--- a/gadgets/octave/octave/downsample_2x.m
+++ /dev/null
@@ -1,11 +0,0 @@
-function downsample_2x(head, data)
-  global downsample_2x_calling_gadget;
-  global downsample_2x_next_gadget;
-
-  data = ismrm_transform_kspace_to_image(data,1);
-  data = data([1:bitshift(size(data,1),-1)]+bitshift(size(data,1),-2),:);
-  data = ismrm_transform_image_to_kspace(data,1);
-  head.number_of_samples = size(data,1);
-
-  GadgetronReturnIsmrmrdAcquisition(downsample_2x_next_gadget, head, single(data));
-end
diff --git a/gadgets/octave/octave/gadget_reference_accumulator.m b/gadgets/octave/octave/gadget_reference_accumulator.m
deleted file mode 100644
index bc25de7..0000000
--- a/gadgets/octave/octave/gadget_reference_accumulator.m
+++ /dev/null
@@ -1,7 +0,0 @@
-function gadget_reference_accumulator(calling_gadget,next_gadget)
-  global accumulator_calling_gadget;
-  global accumulator_next_gadget;
-	
-  accumulator_calling_gadget = calling_gadget
-  accumulator_next_gadget = next_gadget
-end
diff --git a/gadgets/octave/octave/gadget_reference_downsample_2x.m b/gadgets/octave/octave/gadget_reference_downsample_2x.m
deleted file mode 100644
index 08ad380..0000000
--- a/gadgets/octave/octave/gadget_reference_downsample_2x.m
+++ /dev/null
@@ -1,7 +0,0 @@
-function gadget_reference_downsample_2x(calling_gadget,next_gadget)
-  global downsample_2x_calling_gadget;
-  global downsample_2x_next_gadget;
-	
-  downsample_2x_calling_gadget = calling_gadget
-  downsample_2x_next_gadget = next_gadget
-end
diff --git a/gadgets/octave/octave/ismrm_transform_image_to_kspace.m b/gadgets/octave/octave/ismrm_transform_image_to_kspace.m
deleted file mode 100644
index 2c22f78..0000000
--- a/gadgets/octave/octave/ismrm_transform_image_to_kspace.m
+++ /dev/null
@@ -1,34 +0,0 @@
-function [k] = ismrm_transform_image_to_kspace(img, dim)
-%
-%  [k] = ismrm_transform_image_to_kspace(img, dim)
-%
-%  Fourier transform from image space to k-space space along a given or all 
-%  dimensions
-%
-%  INPUT:
-%    - img     [x,y,..]      : image space data
-%    - dim     vector        : Vector with dimensions to transform
-%
-%  OUPUT:
-%    - k       [kx,ky,...]   : Data in k-space (along transformed dimensions)
-%
-%   Code made available for the ISMRM 2013 Sunrise Educational Course
-% 
-%   Michael S. Hansen (michael.hansen at nih.gov)
-%   Philip Beatty (philip.beatty at sri.utoronto.ca)
-%
-
-if nargin < 2,
-    dim = [];
-end    
-   
-if isempty(dim),
-    k = fftshift(fftn(ifftshift(img))) ./ sqrt(numel(img));
-else
-   k = img;
-   for d=1:length(dim),
-      k = fftshift(fft(ifftshift(k,dim(d)),[],dim(d)),dim(d)) ./ sqrt(size(k,d)); 
-   end
-end
-
-return
\ No newline at end of file
diff --git a/gadgets/octave/octave/ismrm_transform_kspace_to_image.m b/gadgets/octave/octave/ismrm_transform_kspace_to_image.m
deleted file mode 100644
index a8ce058..0000000
--- a/gadgets/octave/octave/ismrm_transform_kspace_to_image.m
+++ /dev/null
@@ -1,35 +0,0 @@
-function [img] = ismrm_transform_kspace_to_image(k, dim)
-%
-%  [img] = ismrm_transform_kspace_to_image(k, dim)
-%
-%  Fourier transform from k-space to image space along a given or all 
-%  dimensions
-%
-%  INPUT:
-%    - k       [kx,ky,..]    : k-space data
-%    - dim     vector        : Vector with dimensions to transform
-%
-%  OUPUT:
-%    - img    [x,y,...]      : Data in image space (along transformed
-%                                                   dimensions)
-%
-%   Code made available for the ISMRM 2013 Sunrise Educational Course
-% 
-%   Michael S. Hansen (michael.hansen at nih.gov)
-%   Philip Beatty (philip.beatty at sri.utoronto.ca)
-%
-
-if nargin < 2,
-    dim = [];
-end    
-   
-if isempty(dim),
-    img = fftshift(ifftn(ifftshift(k))) .* sqrt(numel(k));
-else
-   img = k;
-   for d=1:length(dim),
-      img = fftshift(ifft(ifftshift(img,dim(d)),[],dim(d)),dim(d)) .* sqrt(size(img,d)); 
-   end
-end
-
-return
\ No newline at end of file
diff --git a/gadgets/octave/octave/my_config_function.m b/gadgets/octave/octave/my_config_function.m
deleted file mode 100644
index 1a05ad7..0000000
--- a/gadgets/octave/octave/my_config_function.m
+++ /dev/null
@@ -1,3 +0,0 @@
-function my_config_function(XMLconfig)
-   XMLconfig
-end
diff --git a/gadgets/octave/octave/my_gadget_reference.m b/gadgets/octave/octave/my_gadget_reference.m
deleted file mode 100644
index b1ce45e..0000000
--- a/gadgets/octave/octave/my_gadget_reference.m
+++ /dev/null
@@ -1,7 +0,0 @@
-function my_gadget_reference(calling_gadget,next_gadget)
-  global my_calling_gadget;
-  global my_next_gadget;
-	
-  my_calling_gadget = calling_gadget
-  my_next_gadget = next_gadget
-end
diff --git a/gadgets/octave/octave/my_recon_function.m b/gadgets/octave/octave/my_recon_function.m
deleted file mode 100644
index c092a76..0000000
--- a/gadgets/octave/octave/my_recon_function.m
+++ /dev/null
@@ -1,8 +0,0 @@
-function my_recon_function(head, data)
-  global my_calling_gadget;
-  global my_next_gadget;
-  f = hamming(size(data,1));
-  f = repmat(f,1,size(data,2));
-  data = data .* f;
-  GadgetronReturnIsmrmrdAcquisition(my_next_gadget, head, data);
-end
diff --git a/gadgets/octave/pugiconfig.hpp b/gadgets/octave/pugiconfig.hpp
deleted file mode 100644
index c219671..0000000
--- a/gadgets/octave/pugiconfig.hpp
+++ /dev/null
@@ -1,69 +0,0 @@
-/**
- * pugixml parser - version 1.2
- * --------------------------------------------------------
- * Copyright (C) 2006-2012, by Arseny Kapoulkine (arseny.kapoulkine at gmail.com)
- * Report bugs and download new versions at http://pugixml.org/
- *
- * This library is distributed under the MIT License. See notice at the end
- * of this file.
- *
- * This work is based on the pugxml parser, which is:
- * Copyright (C) 2003, by Kristen Wegner (kristen at tima.net)
- */
-
-#ifndef HEADER_PUGICONFIG_HPP
-#define HEADER_PUGICONFIG_HPP
-
-// Uncomment this to enable wchar_t mode
-// #define PUGIXML_WCHAR_MODE
-
-// Uncomment this to disable XPath
-// #define PUGIXML_NO_XPATH
-
-// Uncomment this to disable STL
-// #define PUGIXML_NO_STL
-
-// Uncomment this to disable exceptions
-// #define PUGIXML_NO_EXCEPTIONS
-
-// Set this to control attributes for public classes/functions, i.e.:
-// #define PUGIXML_API __declspec(dllexport) // to export all public symbols from DLL
-// #define PUGIXML_CLASS __declspec(dllimport) // to import all classes from DLL
-// #define PUGIXML_FUNCTION __fastcall // to set calling conventions to all public functions to fastcall
-// In absence of PUGIXML_CLASS/PUGIXML_FUNCTION definitions PUGIXML_API is used instead
-
-// Uncomment this to switch to header-only version
-// #define PUGIXML_HEADER_ONLY
-// #include "pugixml.cpp"
-
-// Tune these constants to adjust memory-related behavior
-// #define PUGIXML_MEMORY_PAGE_SIZE 32768
-// #define PUGIXML_MEMORY_OUTPUT_STACK 10240
-// #define PUGIXML_MEMORY_XPATH_PAGE_SIZE 4096
-
-#endif
-
-/**
- * Copyright (c) 2006-2012 Arseny Kapoulkine
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
diff --git a/gadgets/octave/pugixml.cpp b/gadgets/octave/pugixml.cpp
deleted file mode 100644
index 4035ab1..0000000
--- a/gadgets/octave/pugixml.cpp
+++ /dev/null
@@ -1,10250 +0,0 @@
-/**
- * pugixml parser - version 1.2
- * --------------------------------------------------------
- * Copyright (C) 2006-2012, by Arseny Kapoulkine (arseny.kapoulkine at gmail.com)
- * Report bugs and download new versions at http://pugixml.org/
- *
- * This library is distributed under the MIT License. See notice at the end
- * of this file.
- *
- * This work is based on the pugxml parser, which is:
- * Copyright (C) 2003, by Kristen Wegner (kristen at tima.net)
- */
-
-#ifndef SOURCE_PUGIXML_CPP
-#define SOURCE_PUGIXML_CPP
-
-#include "pugixml.hpp"
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-#include <wchar.h>
-
-#ifndef PUGIXML_NO_XPATH
-#	include <math.h>
-#	include <float.h>
-#	ifdef PUGIXML_NO_EXCEPTIONS
-#		include <setjmp.h>
-#	endif
-#endif
-
-#ifndef PUGIXML_NO_STL
-#	include <istream>
-#	include <ostream>
-#	include <string>
-#endif
-
-// For placement new
-#include <new>
-
-#ifdef _MSC_VER
-#	pragma warning(push)
-#	pragma warning(disable: 4127) // conditional expression is constant
-#	pragma warning(disable: 4324) // structure was padded due to __declspec(align())
-#	pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable
-#	pragma warning(disable: 4702) // unreachable code
-#	pragma warning(disable: 4996) // this function or variable may be unsafe
-#	pragma warning(disable: 4793) // function compiled as native: presence of '_setjmp' makes a function unmanaged
-#endif
-
-#ifdef __INTEL_COMPILER
-#	pragma warning(disable: 177) // function was declared but never referenced 
-#	pragma warning(disable: 279) // controlling expression is constant
-#	pragma warning(disable: 1478 1786) // function was declared "deprecated"
-#	pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
-#endif
-
-#if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)
-#	pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away
-#endif
-
-#ifdef __BORLANDC__
-#	pragma option push
-#	pragma warn -8008 // condition is always false
-#	pragma warn -8066 // unreachable code
-#endif
-
-#ifdef __SNC__
-// Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
-#	pragma diag_suppress=178 // function was declared but never referenced
-#	pragma diag_suppress=237 // controlling expression is constant
-#endif
-
-// Inlining controls
-#if defined(_MSC_VER) && _MSC_VER >= 1300
-#	define PUGI__NO_INLINE __declspec(noinline)
-#elif defined(__GNUC__)
-#	define PUGI__NO_INLINE __attribute__((noinline))
-#else
-#	define PUGI__NO_INLINE 
-#endif
-
-// Simple static assertion
-#define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
-
-// Digital Mars C++ bug workaround for passing char loaded from memory via stack
-#ifdef __DMC__
-#	define PUGI__DMC_VOLATILE volatile
-#else
-#	define PUGI__DMC_VOLATILE
-#endif
-
-// Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all)
-#if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
-using std::memcpy;
-using std::memmove;
-#endif
-
-// In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
-#if defined(_MSC_VER) && !defined(__S3E__)
-#	define PUGI__MSVC_CRT_VERSION _MSC_VER
-#endif
-
-#ifdef PUGIXML_HEADER_ONLY
-#	define PUGI__NS_BEGIN namespace pugi { namespace impl {
-#	define PUGI__NS_END } }
-#	define PUGI__FN inline
-#	define PUGI__FN_NO_INLINE inline
-#else
-#	if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
-#		define PUGI__NS_BEGIN namespace pugi { namespace impl {
-#		define PUGI__NS_END } }
-#	else
-#		define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace {
-#		define PUGI__NS_END } } }
-#	endif
-#	define PUGI__FN
-#	define PUGI__FN_NO_INLINE PUGI__NO_INLINE
-#endif
-
-// uintptr_t
-#if !defined(_MSC_VER) || _MSC_VER >= 1600
-#	include <stdint.h>
-#else
-#	ifndef _UINTPTR_T_DEFINED
-// No native uintptr_t in MSVC6 and in some WinCE versions
-typedef size_t uintptr_t;
-#define _UINTPTR_T_DEFINED
-#	endif
-PUGI__NS_BEGIN
-	typedef unsigned __int8 uint8_t;
-	typedef unsigned __int16 uint16_t;
-	typedef unsigned __int32 uint32_t;
-PUGI__NS_END
-#endif
-
-// Memory allocation
-PUGI__NS_BEGIN
-	PUGI__FN void* default_allocate(size_t size)
-	{
-		return malloc(size);
-	}
-
-	PUGI__FN void default_deallocate(void* ptr)
-	{
-		free(ptr);
-	}
-
-	template <typename T>
-	struct xml_memory_management_function_storage
-	{
-		static allocation_function allocate;
-		static deallocation_function deallocate;
-	};
-
-	template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate;
-	template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate;
-
-	typedef xml_memory_management_function_storage<int> xml_memory;
-PUGI__NS_END
-
-// String utilities
-PUGI__NS_BEGIN
-	// Get string length
-	PUGI__FN size_t strlength(const char_t* s)
-	{
-		assert(s);
-
-	#ifdef PUGIXML_WCHAR_MODE
-		return wcslen(s);
-	#else
-		return strlen(s);
-	#endif
-	}
-
-	// Compare two strings
-	PUGI__FN bool strequal(const char_t* src, const char_t* dst)
-	{
-		assert(src && dst);
-
-	#ifdef PUGIXML_WCHAR_MODE
-		return wcscmp(src, dst) == 0;
-	#else
-		return strcmp(src, dst) == 0;
-	#endif
-	}
-
-	// Compare lhs with [rhs_begin, rhs_end)
-	PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
-	{
-		for (size_t i = 0; i < count; ++i)
-			if (lhs[i] != rhs[i])
-				return false;
-	
-		return lhs[count] == 0;
-	}
-	
-#ifdef PUGIXML_WCHAR_MODE
-	// Convert string to wide string, assuming all symbols are ASCII
-	PUGI__FN void widen_ascii(wchar_t* dest, const char* source)
-	{
-		for (const char* i = source; *i; ++i) *dest++ = *i;
-		*dest = 0;
-	}
-#endif
-PUGI__NS_END
-
-#if !defined(PUGIXML_NO_STL) || !defined(PUGIXML_NO_XPATH)
-// auto_ptr-like buffer holder for exception recovery
-PUGI__NS_BEGIN
-	struct buffer_holder
-	{
-		void* data;
-		void (*deleter)(void*);
-
-		buffer_holder(void* data_, void (*deleter_)(void*)): data(data_), deleter(deleter_)
-		{
-		}
-
-		~buffer_holder()
-		{
-			if (data) deleter(data);
-		}
-
-		void* release()
-		{
-			void* result = data;
-			data = 0;
-			return result;
-		}
-	};
-PUGI__NS_END
-#endif
-
-PUGI__NS_BEGIN
-	static const size_t xml_memory_page_size =
-	#ifdef PUGIXML_MEMORY_PAGE_SIZE
-		PUGIXML_MEMORY_PAGE_SIZE
-	#else
-		32768
-	#endif
-		;
-
-	static const uintptr_t xml_memory_page_alignment = 32;
-	static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1);
-	static const uintptr_t xml_memory_page_name_allocated_mask = 16;
-	static const uintptr_t xml_memory_page_value_allocated_mask = 8;
-	static const uintptr_t xml_memory_page_type_mask = 7;
-
-	struct xml_allocator;
-
-	struct xml_memory_page
-	{
-		static xml_memory_page* construct(void* memory)
-		{
-			if (!memory) return 0; //$ redundant, left for performance
-
-			xml_memory_page* result = static_cast<xml_memory_page*>(memory);
-
-			result->allocator = 0;
-			result->memory = 0;
-			result->prev = 0;
-			result->next = 0;
-			result->busy_size = 0;
-			result->freed_size = 0;
-
-			return result;
-		}
-
-		xml_allocator* allocator;
-
-		void* memory;
-
-		xml_memory_page* prev;
-		xml_memory_page* next;
-
-		size_t busy_size;
-		size_t freed_size;
-
-		char data[1];
-	};
-
-	struct xml_memory_string_header
-	{
-		uint16_t page_offset; // offset from page->data
-		uint16_t full_size; // 0 if string occupies whole page
-	};
-
-	struct xml_allocator
-	{
-		xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
-		{
-		}
-
-		xml_memory_page* allocate_page(size_t data_size)
-		{
-			size_t size = offsetof(xml_memory_page, data) + data_size;
-
-			// allocate block with some alignment, leaving memory for worst-case padding
-			void* memory = xml_memory::allocate(size + xml_memory_page_alignment);
-			if (!memory) return 0;
-
-			// align upwards to page boundary
-			void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(memory) + (xml_memory_page_alignment - 1)) & ~(xml_memory_page_alignment - 1));
-
-			// prepare page structure
-			xml_memory_page* page = xml_memory_page::construct(page_memory);
-
-			page->memory = memory;
-			page->allocator = _root->allocator;
-
-			return page;
-		}
-
-		static void deallocate_page(xml_memory_page* page)
-		{
-			xml_memory::deallocate(page->memory);
-		}
-
-		void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
-
-		void* allocate_memory(size_t size, xml_memory_page*& out_page)
-		{
-			if (_busy_size + size > xml_memory_page_size) return allocate_memory_oob(size, out_page);
-
-			void* buf = _root->data + _busy_size;
-
-			_busy_size += size;
-
-			out_page = _root;
-
-			return buf;
-		}
-
-		void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
-		{
-			if (page == _root) page->busy_size = _busy_size;
-
-			assert(ptr >= page->data && ptr < page->data + page->busy_size);
-			(void)!ptr;
-
-			page->freed_size += size;
-			assert(page->freed_size <= page->busy_size);
-
-			if (page->freed_size == page->busy_size)
-			{
-				if (page->next == 0)
-				{
-					assert(_root == page);
-
-					// top page freed, just reset sizes
-					page->busy_size = page->freed_size = 0;
-					_busy_size = 0;
-				}
-				else
-				{
-					assert(_root != page);
-					assert(page->prev);
-
-					// remove from the list
-					page->prev->next = page->next;
-					page->next->prev = page->prev;
-
-					// deallocate
-					deallocate_page(page);
-				}
-			}
-		}
-
-		char_t* allocate_string(size_t length)
-		{
-			// allocate memory for string and header block
-			size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
-			
-			// round size up to pointer alignment boundary
-			size_t full_size = (size + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1);
-
-			xml_memory_page* page;
-			xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
-
-			if (!header) return 0;
-
-			// setup header
-			ptrdiff_t page_offset = reinterpret_cast<char*>(header) - page->data;
-
-			assert(page_offset >= 0 && page_offset < (1 << 16));
-			header->page_offset = static_cast<uint16_t>(page_offset);
-
-			// full_size == 0 for large strings that occupy the whole page
-			assert(full_size < (1 << 16) || (page->busy_size == full_size && page_offset == 0));
-			header->full_size = static_cast<uint16_t>(full_size < (1 << 16) ? full_size : 0);
-
-			// round-trip through void* to avoid 'cast increases required alignment of target type' warning
-			// header is guaranteed a pointer-sized alignment, which should be enough for char_t
-			return static_cast<char_t*>(static_cast<void*>(header + 1));
-		}
-
-		void deallocate_string(char_t* string)
-		{
-			// this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
-			// we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
-
-			// get header
-			xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
-
-			// deallocate
-			size_t page_offset = offsetof(xml_memory_page, data) + header->page_offset;
-			xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));
-
-			// if full_size == 0 then this string occupies the whole page
-			size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size;
-
-			deallocate_memory(header, full_size, page);
-		}
-
-		xml_memory_page* _root;
-		size_t _busy_size;
-	};
-
-	PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
-	{
-		const size_t large_allocation_threshold = xml_memory_page_size / 4;
-
-		xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
-		out_page = page;
-
-		if (!page) return 0;
-
-		if (size <= large_allocation_threshold)
-		{
-			_root->busy_size = _busy_size;
-
-			// insert page at the end of linked list
-			page->prev = _root;
-			_root->next = page;
-			_root = page;
-
-			_busy_size = size;
-		}
-		else
-		{
-			// insert page before the end of linked list, so that it is deleted as soon as possible
-			// the last page is not deleted even if it's empty (see deallocate_memory)
-			assert(_root->prev);
-
-			page->prev = _root->prev;
-			page->next = _root;
-
-			_root->prev->next = page;
-			_root->prev = page;
-		}
-
-		// allocate inside page
-		page->busy_size = size;
-
-		return page->data;
-	}
-PUGI__NS_END
-
-namespace pugi
-{
-	/// A 'name=value' XML attribute structure.
-	struct xml_attribute_struct
-	{
-		/// Default ctor
-		xml_attribute_struct(impl::xml_memory_page* page): header(reinterpret_cast<uintptr_t>(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0)
-		{
-		}
-
-		uintptr_t header;
-
-		char_t* name;	///< Pointer to attribute name.
-		char_t*	value;	///< Pointer to attribute value.
-
-		xml_attribute_struct* prev_attribute_c;	///< Previous attribute (cyclic list)
-		xml_attribute_struct* next_attribute;	///< Next attribute
-	};
-
-	/// An XML document tree node.
-	struct xml_node_struct
-	{
-		/// Default ctor
-		/// \param type - node type
-		xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(reinterpret_cast<uintptr_t>(page) | (type - 1)), parent(0), name(0), value(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
-		{
-		}
-
-		uintptr_t header;
-
-		xml_node_struct*		parent;					///< Pointer to parent
-
-		char_t*					name;					///< Pointer to element name.
-		char_t*					value;					///< Pointer to any associated string data.
-
-		xml_node_struct*		first_child;			///< First child
-		
-		xml_node_struct*		prev_sibling_c;			///< Left brother (cyclic list)
-		xml_node_struct*		next_sibling;			///< Right brother
-		
-		xml_attribute_struct*	first_attribute;		///< First attribute
-	};
-}
-
-PUGI__NS_BEGIN
-	struct xml_document_struct: public xml_node_struct, public xml_allocator
-	{
-		xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0)
-		{
-		}
-
-		const char_t* buffer;
-	};
-
-	inline xml_allocator& get_allocator(const xml_node_struct* node)
-	{
-		assert(node);
-
-		return *reinterpret_cast<xml_memory_page*>(node->header & xml_memory_page_pointer_mask)->allocator;
-	}
-PUGI__NS_END
-
-// Low-level DOM operations
-PUGI__NS_BEGIN
-	inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
-	{
-		xml_memory_page* page;
-		void* memory = alloc.allocate_memory(sizeof(xml_attribute_struct), page);
-
-		return new (memory) xml_attribute_struct(page);
-	}
-
-	inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
-	{
-		xml_memory_page* page;
-		void* memory = alloc.allocate_memory(sizeof(xml_node_struct), page);
-
-		return new (memory) xml_node_struct(page, type);
-	}
-
-	inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
-	{
-		uintptr_t header = a->header;
-
-		if (header & impl::xml_memory_page_name_allocated_mask) alloc.deallocate_string(a->name);
-		if (header & impl::xml_memory_page_value_allocated_mask) alloc.deallocate_string(a->value);
-
-		alloc.deallocate_memory(a, sizeof(xml_attribute_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask));
-	}
-
-	inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
-	{
-		uintptr_t header = n->header;
-
-		if (header & impl::xml_memory_page_name_allocated_mask) alloc.deallocate_string(n->name);
-		if (header & impl::xml_memory_page_value_allocated_mask) alloc.deallocate_string(n->value);
-
-		for (xml_attribute_struct* attr = n->first_attribute; attr; )
-		{
-			xml_attribute_struct* next = attr->next_attribute;
-
-			destroy_attribute(attr, alloc);
-
-			attr = next;
-		}
-
-		for (xml_node_struct* child = n->first_child; child; )
-		{
-			xml_node_struct* next = child->next_sibling;
-
-			destroy_node(child, alloc);
-
-			child = next;
-		}
-
-		alloc.deallocate_memory(n, sizeof(xml_node_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask));
-	}
-
-	PUGI__FN_NO_INLINE xml_node_struct* append_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
-	{
-		xml_node_struct* child = allocate_node(alloc, type);
-		if (!child) return 0;
-
-		child->parent = node;
-
-		xml_node_struct* first_child = node->first_child;
-			
-		if (first_child)
-		{
-			xml_node_struct* last_child = first_child->prev_sibling_c;
-
-			last_child->next_sibling = child;
-			child->prev_sibling_c = last_child;
-			first_child->prev_sibling_c = child;
-		}
-		else
-		{
-			node->first_child = child;
-			child->prev_sibling_c = child;
-		}
-			
-		return child;
-	}
-
-	PUGI__FN_NO_INLINE xml_attribute_struct* append_attribute_ll(xml_node_struct* node, xml_allocator& alloc)
-	{
-		xml_attribute_struct* a = allocate_attribute(alloc);
-		if (!a) return 0;
-
-		xml_attribute_struct* first_attribute = node->first_attribute;
-
-		if (first_attribute)
-		{
-			xml_attribute_struct* last_attribute = first_attribute->prev_attribute_c;
-
-			last_attribute->next_attribute = a;
-			a->prev_attribute_c = last_attribute;
-			first_attribute->prev_attribute_c = a;
-		}
-		else
-		{
-			node->first_attribute = a;
-			a->prev_attribute_c = a;
-		}
-			
-		return a;
-	}
-PUGI__NS_END
-
-// Helper classes for code generation
-PUGI__NS_BEGIN
-	struct opt_false
-	{
-		enum { value = 0 };
-	};
-
-	struct opt_true
-	{
-		enum { value = 1 };
-	};
-PUGI__NS_END
-
-// Unicode utilities
-PUGI__NS_BEGIN
-	inline uint16_t endian_swap(uint16_t value)
-	{
-		return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
-	}
-
-	inline uint32_t endian_swap(uint32_t value)
-	{
-		return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
-	}
-
-	struct utf8_counter
-	{
-		typedef size_t value_type;
-
-		static value_type low(value_type result, uint32_t ch)
-		{
-			// U+0000..U+007F
-			if (ch < 0x80) return result + 1;
-			// U+0080..U+07FF
-			else if (ch < 0x800) return result + 2;
-			// U+0800..U+FFFF
-			else return result + 3;
-		}
-
-		static value_type high(value_type result, uint32_t)
-		{
-			// U+10000..U+10FFFF
-			return result + 4;
-		}
-	};
-
-	struct utf8_writer
-	{
-		typedef uint8_t* value_type;
-
-		static value_type low(value_type result, uint32_t ch)
-		{
-			// U+0000..U+007F
-			if (ch < 0x80)
-			{
-				*result = static_cast<uint8_t>(ch);
-				return result + 1;
-			}
-			// U+0080..U+07FF
-			else if (ch < 0x800)
-			{
-				result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
-				result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
-				return result + 2;
-			}
-			// U+0800..U+FFFF
-			else
-			{
-				result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
-				result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
-				result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
-				return result + 3;
-			}
-		}
-
-		static value_type high(value_type result, uint32_t ch)
-		{
-			// U+10000..U+10FFFF
-			result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
-			result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
-			result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
-			result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
-			return result + 4;
-		}
-
-		static value_type any(value_type result, uint32_t ch)
-		{
-			return (ch < 0x10000) ? low(result, ch) : high(result, ch);
-		}
-	};
-
-	struct utf16_counter
-	{
-		typedef size_t value_type;
-
-		static value_type low(value_type result, uint32_t)
-		{
-			return result + 1;
-		}
-
-		static value_type high(value_type result, uint32_t)
-		{
-			return result + 2;
-		}
-	};
-
-	struct utf16_writer
-	{
-		typedef uint16_t* value_type;
-
-		static value_type low(value_type result, uint32_t ch)
-		{
-			*result = static_cast<uint16_t>(ch);
-
-			return result + 1;
-		}
-
-		static value_type high(value_type result, uint32_t ch)
-		{
-			uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
-			uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
-
-			result[0] = static_cast<uint16_t>(0xD800 + msh);
-			result[1] = static_cast<uint16_t>(0xDC00 + lsh);
-
-			return result + 2;
-		}
-
-		static value_type any(value_type result, uint32_t ch)
-		{
-			return (ch < 0x10000) ? low(result, ch) : high(result, ch);
-		}
-	};
-
-	struct utf32_counter
-	{
-		typedef size_t value_type;
-
-		static value_type low(value_type result, uint32_t)
-		{
-			return result + 1;
-		}
-
-		static value_type high(value_type result, uint32_t)
-		{
-			return result + 1;
-		}
-	};
-
-	struct utf32_writer
-	{
-		typedef uint32_t* value_type;
-
-		static value_type low(value_type result, uint32_t ch)
-		{
-			*result = ch;
-
-			return result + 1;
-		}
-
-		static value_type high(value_type result, uint32_t ch)
-		{
-			*result = ch;
-
-			return result + 1;
-		}
-
-		static value_type any(value_type result, uint32_t ch)
-		{
-			*result = ch;
-
-			return result + 1;
-		}
-	};
-
-	struct latin1_writer
-	{
-		typedef uint8_t* value_type;
-
-		static value_type low(value_type result, uint32_t ch)
-		{
-			*result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
-
-			return result + 1;
-		}
-
-		static value_type high(value_type result, uint32_t ch)
-		{
-			(void)ch;
-
-			*result = '?';
-
-			return result + 1;
-		}
-	};
-
-	template <size_t size> struct wchar_selector;
-
-	template <> struct wchar_selector<2>
-	{
-		typedef uint16_t type;
-		typedef utf16_counter counter;
-		typedef utf16_writer writer;
-	};
-
-	template <> struct wchar_selector<4>
-	{
-		typedef uint32_t type;
-		typedef utf32_counter counter;
-		typedef utf32_writer writer;
-	};
-
-	typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
-	typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
-
-	template <typename Traits, typename opt_swap = opt_false> struct utf_decoder
-	{
-		static inline typename Traits::value_type decode_utf8_block(const uint8_t* data, size_t size, typename Traits::value_type result)
-		{
-			const uint8_t utf8_byte_mask = 0x3f;
-
-			while (size)
-			{
-				uint8_t lead = *data;
-
-				// 0xxxxxxx -> U+0000..U+007F
-				if (lead < 0x80)
-				{
-					result = Traits::low(result, lead);
-					data += 1;
-					size -= 1;
-
-					// process aligned single-byte (ascii) blocks
-					if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
-					{
-						// round-trip through void* to silence 'cast increases required alignment of target type' warnings
-						while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0)
-						{
-							result = Traits::low(result, data[0]);
-							result = Traits::low(result, data[1]);
-							result = Traits::low(result, data[2]);
-							result = Traits::low(result, data[3]);
-							data += 4;
-							size -= 4;
-						}
-					}
-				}
-				// 110xxxxx -> U+0080..U+07FF
-				else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
-				{
-					result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
-					data += 2;
-					size -= 2;
-				}
-				// 1110xxxx -> U+0800-U+FFFF
-				else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
-				{
-					result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
-					data += 3;
-					size -= 3;
-				}
-				// 11110xxx -> U+10000..U+10FFFF
-				else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
-				{
-					result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
-					data += 4;
-					size -= 4;
-				}
-				// 10xxxxxx or 11111xxx -> invalid
-				else
-				{
-					data += 1;
-					size -= 1;
-				}
-			}
-
-			return result;
-		}
-
-		static inline typename Traits::value_type decode_utf16_block(const uint16_t* data, size_t size, typename Traits::value_type result)
-		{
-			const uint16_t* end = data + size;
-
-			while (data < end)
-			{
-				uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
-
-				// U+0000..U+D7FF
-				if (lead < 0xD800)
-				{
-					result = Traits::low(result, lead);
-					data += 1;
-				}
-				// U+E000..U+FFFF
-				else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)
-				{
-					result = Traits::low(result, lead);
-					data += 1;
-				}
-				// surrogate pair lead
-				else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && data + 1 < end)
-				{
-					uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
-
-					if (static_cast<unsigned int>(next - 0xDC00) < 0x400)
-					{
-						result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
-						data += 2;
-					}
-					else
-					{
-						data += 1;
-					}
-				}
-				else
-				{
-					data += 1;
-				}
-			}
-
-			return result;
-		}
-
-		static inline typename Traits::value_type decode_utf32_block(const uint32_t* data, size_t size, typename Traits::value_type result)
-		{
-			const uint32_t* end = data + size;
-
-			while (data < end)
-			{
-				uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
-
-				// U+0000..U+FFFF
-				if (lead < 0x10000)
-				{
-					result = Traits::low(result, lead);
-					data += 1;
-				}
-				// U+10000..U+10FFFF
-				else
-				{
-					result = Traits::high(result, lead);
-					data += 1;
-				}
-			}
-
-			return result;
-		}
-
-		static inline typename Traits::value_type decode_latin1_block(const uint8_t* data, size_t size, typename Traits::value_type result)
-		{
-			for (size_t i = 0; i < size; ++i)
-			{
-				result = Traits::low(result, data[i]);
-			}
-
-			return result;
-		}
-
-		static inline typename Traits::value_type decode_wchar_block_impl(const uint16_t* data, size_t size, typename Traits::value_type result)
-		{
-			return decode_utf16_block(data, size, result);
-		}
-
-		static inline typename Traits::value_type decode_wchar_block_impl(const uint32_t* data, size_t size, typename Traits::value_type result)
-		{
-			return decode_utf32_block(data, size, result);
-		}
-
-		static inline typename Traits::value_type decode_wchar_block(const wchar_t* data, size_t size, typename Traits::value_type result)
-		{
-			return decode_wchar_block_impl(reinterpret_cast<const wchar_selector<sizeof(wchar_t)>::type*>(data), size, result);
-		}
-	};
-
-	template <typename T> PUGI__FN void convert_utf_endian_swap(T* result, const T* data, size_t length)
-	{
-		for (size_t i = 0; i < length; ++i) result[i] = endian_swap(data[i]);
-	}
-
-#ifdef PUGIXML_WCHAR_MODE
-	PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
-	{
-		for (size_t i = 0; i < length; ++i) result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
-	}
-#endif
-PUGI__NS_END
-
-PUGI__NS_BEGIN
-	enum chartype_t
-	{
-		ct_parse_pcdata = 1,	// \0, &, \r, <
-		ct_parse_attr = 2,		// \0, &, \r, ', "
-		ct_parse_attr_ws = 4,	// \0, &, \r, ', ", \n, tab
-		ct_space = 8,			// \r, \n, space, tab
-		ct_parse_cdata = 16,	// \0, ], >, \r
-		ct_parse_comment = 32,	// \0, -, >, \r
-		ct_symbol = 64,			// Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
-		ct_start_symbol = 128	// Any symbol > 127, a-z, A-Z, _, :
-	};
-
-	static const unsigned char chartype_table[256] =
-	{
-		55,  0,   0,   0,   0,   0,   0,   0,      0,   12,  12,  0,   0,   63,  0,   0,   // 0-15
-		0,   0,   0,   0,   0,   0,   0,   0,      0,   0,   0,   0,   0,   0,   0,   0,   // 16-31
-		8,   0,   6,   0,   0,   0,   7,   6,      0,   0,   0,   0,   0,   96,  64,  0,   // 32-47
-		64,  64,  64,  64,  64,  64,  64,  64,     64,  64,  192, 0,   1,   0,   48,  0,   // 48-63
-		0,   192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 64-79
-		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 0,   0,   16,  0,   192, // 80-95
-		0,   192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 96-111
-		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 0, 0, 0, 0, 0,           // 112-127
-
-		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 128+
-		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
-		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
-		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
-		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
-		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
-		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
-		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192
-	};
-
-	enum chartypex_t
-	{
-		ctx_special_pcdata = 1,   // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
-		ctx_special_attr = 2,     // Any symbol >= 0 and < 32 (except \t), &, <, >, "
-		ctx_start_symbol = 4,	  // Any symbol > 127, a-z, A-Z, _
-		ctx_digit = 8,			  // 0-9
-		ctx_symbol = 16			  // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
-	};
-	
-	static const unsigned char chartypex_table[256] =
-	{
-		3,  3,  3,  3,  3,  3,  3,  3,     3,  0,  2,  3,  3,  2,  3,  3,     // 0-15
-		3,  3,  3,  3,  3,  3,  3,  3,     3,  3,  3,  3,  3,  3,  3,  3,     // 16-31
-		0,  0,  2,  0,  0,  0,  3,  0,     0,  0,  0,  0,  0, 16, 16,  0,     // 32-47
-		24, 24, 24, 24, 24, 24, 24, 24,    24, 24, 0,  0,  3,  0,  3,  0,     // 48-63
-
-		0,  20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 64-79
-		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 0,  0,  0,  0,  20,    // 80-95
-		0,  20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 96-111
-		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 0,  0,  0,  0,  0,     // 112-127
-
-		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 128+
-		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
-		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
-		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
-		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
-		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
-		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
-		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20
-	};
-	
-#ifdef PUGIXML_WCHAR_MODE
-	#define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
-#else
-	#define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
-#endif
-
-	#define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table)
-	#define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table)
-
-	PUGI__FN bool is_little_endian()
-	{
-		unsigned int ui = 1;
-
-		return *reinterpret_cast<unsigned char*>(&ui) == 1;
-	}
-
-	PUGI__FN xml_encoding get_wchar_encoding()
-	{
-		PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
-
-		if (sizeof(wchar_t) == 2)
-			return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
-		else 
-			return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
-	}
-
-	PUGI__FN xml_encoding guess_buffer_encoding(uint8_t d0, uint8_t d1, uint8_t d2, uint8_t d3)
-	{
-		// look for BOM in first few bytes
-		if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
-		if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
-		if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
-		if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
-		if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
-
-		// look for <, <? or <?xm in various encodings
-		if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
-		if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
-		if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
-		if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
-		if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d) return encoding_utf8;
-
-		// look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
-		if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
-		if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
-
-		// no known BOM detected, assume utf8
-		return encoding_utf8;
-	}
-
-	PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
-	{
-		// replace wchar encoding with utf implementation
-		if (encoding == encoding_wchar) return get_wchar_encoding();
-
-		// replace utf16 encoding with utf16 with specific endianness
-		if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
-
-		// replace utf32 encoding with utf32 with specific endianness
-		if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
-
-		// only do autodetection if no explicit encoding is requested
-		if (encoding != encoding_auto) return encoding;
-
-		// skip encoding autodetection if input buffer is too small
-		if (size < 4) return encoding_utf8;
-
-		// try to guess encoding (based on XML specification, Appendix F.1)
-		const uint8_t* data = static_cast<const uint8_t*>(contents);
-
-		PUGI__DMC_VOLATILE uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
-
-		return guess_buffer_encoding(d0, d1, d2, d3);
-	}
-
-	PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
-	{
-		if (is_mutable)
-		{
-			out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
-		}
-		else
-		{
-			void* buffer = xml_memory::allocate(size > 0 ? size : 1);
-			if (!buffer) return false;
-
-			memcpy(buffer, contents, size);
-
-			out_buffer = static_cast<char_t*>(buffer);
-		}
-
-		out_length = size / sizeof(char_t);
-
-		return true;
-	}
-
-#ifdef PUGIXML_WCHAR_MODE
-	PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
-	{
-		return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
-			   (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
-	}
-
-	PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
-	{
-		const char_t* data = static_cast<const char_t*>(contents);
-	
-		if (is_mutable)
-		{
-			out_buffer = const_cast<char_t*>(data);
-		}
-		else
-		{
-			out_buffer = static_cast<char_t*>(xml_memory::allocate(size > 0 ? size : 1));
-			if (!out_buffer) return false;
-		}
-
-		out_length = size / sizeof(char_t);
-
-		convert_wchar_endian_swap(out_buffer, data, out_length);
-
-		return true;
-	}
-
-	PUGI__FN bool convert_buffer_utf8(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size)
-	{
-		const uint8_t* data = static_cast<const uint8_t*>(contents);
-
-		// first pass: get length in wchar_t units
-		out_length = utf_decoder<wchar_counter>::decode_utf8_block(data, size, 0);
-
-		// allocate buffer of suitable length
-		out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
-		if (!out_buffer) return false;
-
-		// second pass: convert utf8 input to wchar_t
-		wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
-		wchar_writer::value_type out_end = utf_decoder<wchar_writer>::decode_utf8_block(data, size, out_begin);
-
-		assert(out_end == out_begin + out_length);
-		(void)!out_end;
-
-		return true;
-	}
-
-	template <typename opt_swap> PUGI__FN bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
-	{
-		const uint16_t* data = static_cast<const uint16_t*>(contents);
-		size_t length = size / sizeof(uint16_t);
-
-		// first pass: get length in wchar_t units
-		out_length = utf_decoder<wchar_counter, opt_swap>::decode_utf16_block(data, length, 0);
-
-		// allocate buffer of suitable length
-		out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
-		if (!out_buffer) return false;
-
-		// second pass: convert utf16 input to wchar_t
-		wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
-		wchar_writer::value_type out_end = utf_decoder<wchar_writer, opt_swap>::decode_utf16_block(data, length, out_begin);
-
-		assert(out_end == out_begin + out_length);
-		(void)!out_end;
-
-		return true;
-	}
-
-	template <typename opt_swap> PUGI__FN bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
-	{
-		const uint32_t* data = static_cast<const uint32_t*>(contents);
-		size_t length = size / sizeof(uint32_t);
-
-		// first pass: get length in wchar_t units
-		out_length = utf_decoder<wchar_counter, opt_swap>::decode_utf32_block(data, length, 0);
-
-		// allocate buffer of suitable length
-		out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
-		if (!out_buffer) return false;
-
-		// second pass: convert utf32 input to wchar_t
-		wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
-		wchar_writer::value_type out_end = utf_decoder<wchar_writer, opt_swap>::decode_utf32_block(data, length, out_begin);
-
-		assert(out_end == out_begin + out_length);
-		(void)!out_end;
-
-		return true;
-	}
-
-	PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size)
-	{
-		const uint8_t* data = static_cast<const uint8_t*>(contents);
-
-		// get length in wchar_t units
-		out_length = size;
-
-		// allocate buffer of suitable length
-		out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
-		if (!out_buffer) return false;
-
-		// convert latin1 input to wchar_t
-		wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
-		wchar_writer::value_type out_end = utf_decoder<wchar_writer>::decode_latin1_block(data, size, out_begin);
-
-		assert(out_end == out_begin + out_length);
-		(void)!out_end;
-
-		return true;
-	}
-
-	PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
-	{
-		// get native encoding
-		xml_encoding wchar_encoding = get_wchar_encoding();
-
-		// fast path: no conversion required
-		if (encoding == wchar_encoding) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
-
-		// only endian-swapping is required
-		if (need_endian_swap_utf(encoding, wchar_encoding)) return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
-
-		// source encoding is utf8
-		if (encoding == encoding_utf8) return convert_buffer_utf8(out_buffer, out_length, contents, size);
-
-		// source encoding is utf16
-		if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
-		{
-			xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
-
-			return (native_encoding == encoding) ?
-				convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :
-				convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());
-		}
-
-		// source encoding is utf32
-		if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
-		{
-			xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
-
-			return (native_encoding == encoding) ?
-				convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :
-				convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());
-		}
-
-		// source encoding is latin1
-		if (encoding == encoding_latin1) return convert_buffer_latin1(out_buffer, out_length, contents, size);
-
-		assert(!"Invalid encoding");
-		return false;
-	}
-#else
-	template <typename opt_swap> PUGI__FN bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
-	{
-		const uint16_t* data = static_cast<const uint16_t*>(contents);
-		size_t length = size / sizeof(uint16_t);
-
-		// first pass: get length in utf8 units
-		out_length = utf_decoder<utf8_counter, opt_swap>::decode_utf16_block(data, length, 0);
-
-		// allocate buffer of suitable length
-		out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
-		if (!out_buffer) return false;
-
-		// second pass: convert utf16 input to utf8
-		uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer);
-		uint8_t* out_end = utf_decoder<utf8_writer, opt_swap>::decode_utf16_block(data, length, out_begin);
-
-		assert(out_end == out_begin + out_length);
-		(void)!out_end;
-
-		return true;
-	}
-
-	template <typename opt_swap> PUGI__FN bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
-	{
-		const uint32_t* data = static_cast<const uint32_t*>(contents);
-		size_t length = size / sizeof(uint32_t);
-
-		// first pass: get length in utf8 units
-		out_length = utf_decoder<utf8_counter, opt_swap>::decode_utf32_block(data, length, 0);
-
-		// allocate buffer of suitable length
-		out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
-		if (!out_buffer) return false;
-
-		// second pass: convert utf32 input to utf8
-		uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer);
-		uint8_t* out_end = utf_decoder<utf8_writer, opt_swap>::decode_utf32_block(data, length, out_begin);
-
-		assert(out_end == out_begin + out_length);
-		(void)!out_end;
-
-		return true;
-	}
-
-	PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)
-	{
-		for (size_t i = 0; i < size; ++i)
-			if (data[i] > 127)
-				return i;
-
-		return size;
-	}
-
-	PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
-	{
-		const uint8_t* data = static_cast<const uint8_t*>(contents);
-
-		// get size of prefix that does not need utf8 conversion
-		size_t prefix_length = get_latin1_7bit_prefix_length(data, size);
-		assert(prefix_length <= size);
-
-		const uint8_t* postfix = data + prefix_length;
-		size_t postfix_length = size - prefix_length;
-
-		// if no conversion is needed, just return the original buffer
-		if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
-
-		// first pass: get length in utf8 units
-		out_length = prefix_length + utf_decoder<utf8_counter>::decode_latin1_block(postfix, postfix_length, 0);
-
-		// allocate buffer of suitable length
-		out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
-		if (!out_buffer) return false;
-
-		// second pass: convert latin1 input to utf8
-		memcpy(out_buffer, data, prefix_length);
-
-		uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer);
-		uint8_t* out_end = utf_decoder<utf8_writer>::decode_latin1_block(postfix, postfix_length, out_begin + prefix_length);
-
-		assert(out_end == out_begin + out_length);
-		(void)!out_end;
-
-		return true;
-	}
-
-	PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
-	{
-		// fast path: no conversion required
-		if (encoding == encoding_utf8) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
-
-		// source encoding is utf16
-		if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
-		{
-			xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
-
-			return (native_encoding == encoding) ?
-				convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :
-				convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());
-		}
-
-		// source encoding is utf32
-		if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
-		{
-			xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
-
-			return (native_encoding == encoding) ?
-				convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :
-				convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());
-		}
-
-		// source encoding is latin1
-		if (encoding == encoding_latin1) return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
-
-		assert(!"Invalid encoding");
-		return false;
-	}
-#endif
-
-	PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)
-	{
-		// get length in utf8 characters
-		return utf_decoder<utf8_counter>::decode_wchar_block(str, length, 0);
-	}
-
-	PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
-	{
-		// convert to utf8
-		uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
-		uint8_t* end = utf_decoder<utf8_writer>::decode_wchar_block(str, length, begin);
-	
-		assert(begin + size == end);
-		(void)!end;
-
-		// zero-terminate
-		buffer[size] = 0;
-	}
-	
-#ifndef PUGIXML_NO_STL
-	PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)
-	{
-		// first pass: get length in utf8 characters
-		size_t size = as_utf8_begin(str, length);
-
-		// allocate resulting string
-		std::string result;
-		result.resize(size);
-
-		// second pass: convert to utf8
-		if (size > 0) as_utf8_end(&result[0], size, str, length);
-
-		return result;
-	}
-
-	PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size)
-	{
-		const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
-
-		// first pass: get length in wchar_t units
-		size_t length = utf_decoder<wchar_counter>::decode_utf8_block(data, size, 0);
-
-		// allocate resulting string
-		std::basic_string<wchar_t> result;
-		result.resize(length);
-
-		// second pass: convert to wchar_t
-		if (length > 0)
-		{
-			wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
-			wchar_writer::value_type end = utf_decoder<wchar_writer>::decode_utf8_block(data, size, begin);
-
-			assert(begin + length == end);
-			(void)!end;
-		}
-
-		return result;
-	}
-#endif
-
-	inline bool strcpy_insitu_allow(size_t length, uintptr_t allocated, char_t* target)
-	{
-		assert(target);
-		size_t target_length = strlength(target);
-
-		// always reuse document buffer memory if possible
-		if (!allocated) return target_length >= length;
-
-		// reuse heap memory if waste is not too great
-		const size_t reuse_threshold = 32;
-
-		return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
-	}
-
-	PUGI__FN bool strcpy_insitu(char_t*& dest, uintptr_t& header, uintptr_t header_mask, const char_t* source)
-	{
-		size_t source_length = strlength(source);
-
-		if (source_length == 0)
-		{
-			// empty string and null pointer are equivalent, so just deallocate old memory
-			xml_allocator* alloc = reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)->allocator;
-
-			if (header & header_mask) alloc->deallocate_string(dest);
-			
-			// mark the string as not allocated
-			dest = 0;
-			header &= ~header_mask;
-
-			return true;
-		}
-		else if (dest && strcpy_insitu_allow(source_length, header & header_mask, dest))
-		{
-			// we can reuse old buffer, so just copy the new data (including zero terminator)
-			memcpy(dest, source, (source_length + 1) * sizeof(char_t));
-			
-			return true;
-		}
-		else
-		{
-			xml_allocator* alloc = reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)->allocator;
-
-			// allocate new buffer
-			char_t* buf = alloc->allocate_string(source_length + 1);
-			if (!buf) return false;
-
-			// copy the string (including zero terminator)
-			memcpy(buf, source, (source_length + 1) * sizeof(char_t));
-
-			// deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
-			if (header & header_mask) alloc->deallocate_string(dest);
-			
-			// the string is now allocated, so set the flag
-			dest = buf;
-			header |= header_mask;
-
-			return true;
-		}
-	}
-
-	struct gap
-	{
-		char_t* end;
-		size_t size;
-			
-		gap(): end(0), size(0)
-		{
-		}
-			
-		// Push new gap, move s count bytes further (skipping the gap).
-		// Collapse previous gap.
-		void push(char_t*& s, size_t count)
-		{
-			if (end) // there was a gap already; collapse it
-			{
-				// Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
-				assert(s >= end);
-				memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
-			}
-				
-			s += count; // end of current gap
-				
-			// "merge" two gaps
-			end = s;
-			size += count;
-		}
-			
-		// Collapse all gaps, return past-the-end pointer
-		char_t* flush(char_t* s)
-		{
-			if (end)
-			{
-				// Move [old_gap_end, current_pos) to [old_gap_start, ...)
-				assert(s >= end);
-				memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
-
-				return s - size;
-			}
-			else return s;
-		}
-	};
-	
-	PUGI__FN char_t* strconv_escape(char_t* s, gap& g)
-	{
-		char_t* stre = s + 1;
-
-		switch (*stre)
-		{
-			case '#':	// &#...
-			{
-				unsigned int ucsc = 0;
-
-				if (stre[1] == 'x') // &#x... (hex code)
-				{
-					stre += 2;
-
-					char_t ch = *stre;
-
-					if (ch == ';') return stre;
-
-					for (;;)
-					{
-						if (static_cast<unsigned int>(ch - '0') <= 9)
-							ucsc = 16 * ucsc + (ch - '0');
-						else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
-							ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
-						else if (ch == ';')
-							break;
-						else // cancel
-							return stre;
-
-						ch = *++stre;
-					}
-					
-					++stre;
-				}
-				else	// &#... (dec code)
-				{
-					char_t ch = *++stre;
-
-					if (ch == ';') return stre;
-
-					for (;;)
-					{
-						if (static_cast<unsigned int>(ch - '0') <= 9)
-							ucsc = 10 * ucsc + (ch - '0');
-						else if (ch == ';')
-							break;
-						else // cancel
-							return stre;
-
-						ch = *++stre;
-					}
-					
-					++stre;
-				}
-
-			#ifdef PUGIXML_WCHAR_MODE
-				s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
-			#else
-				s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
-			#endif
-					
-				g.push(s, stre - s);
-				return stre;
-			}
-
-			case 'a':	// &a
-			{
-				++stre;
-
-				if (*stre == 'm') // &am
-				{
-					if (*++stre == 'p' && *++stre == ';') // &
-					{
-						*s++ = '&';
-						++stre;
-							
-						g.push(s, stre - s);
-						return stre;
-					}
-				}
-				else if (*stre == 'p') // &ap
-				{
-					if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // '
-					{
-						*s++ = '\'';
-						++stre;
-
-						g.push(s, stre - s);
-						return stre;
-					}
-				}
-				break;
-			}
-
-			case 'g': // &g
-			{
-				if (*++stre == 't' && *++stre == ';') // >
-				{
-					*s++ = '>';
-					++stre;
-					
-					g.push(s, stre - s);
-					return stre;
-				}
-				break;
-			}
-
-			case 'l': // &l
-			{
-				if (*++stre == 't' && *++stre == ';') // <
-				{
-					*s++ = '<';
-					++stre;
-						
-					g.push(s, stre - s);
-					return stre;
-				}
-				break;
-			}
-
-			case 'q': // &q
-			{
-				if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // "
-				{
-					*s++ = '"';
-					++stre;
-					
-					g.push(s, stre - s);
-					return stre;
-				}
-				break;
-			}
-
-			default:
-				break;
-		}
-		
-		return stre;
-	}
-
-	// Utility macro for last character handling
-	#define ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e)))
-
-	PUGI__FN char_t* strconv_comment(char_t* s, char_t endch)
-	{
-		gap g;
-		
-		while (true)
-		{
-			while (!PUGI__IS_CHARTYPE(*s, ct_parse_comment)) ++s;
-		
-			if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
-			{
-				*s++ = '\n'; // replace first one with 0x0a
-				
-				if (*s == '\n') g.push(s, 1);
-			}
-			else if (s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>')) // comment ends here
-			{
-				*g.flush(s) = 0;
-				
-				return s + (s[2] == '>' ? 3 : 2);
-			}
-			else if (*s == 0)
-			{
-				return 0;
-			}
-			else ++s;
-		}
-	}
-
-	PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch)
-	{
-		gap g;
-			
-		while (true)
-		{
-			while (!PUGI__IS_CHARTYPE(*s, ct_parse_cdata)) ++s;
-			
-			if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
-			{
-				*s++ = '\n'; // replace first one with 0x0a
-				
-				if (*s == '\n') g.push(s, 1);
-			}
-			else if (s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>')) // CDATA ends here
-			{
-				*g.flush(s) = 0;
-				
-				return s + 1;
-			}
-			else if (*s == 0)
-			{
-				return 0;
-			}
-			else ++s;
-		}
-	}
-	
-	typedef char_t* (*strconv_pcdata_t)(char_t*);
-		
-	template <typename opt_eol, typename opt_escape> struct strconv_pcdata_impl
-	{
-		static char_t* parse(char_t* s)
-		{
-			gap g;
-			
-			while (true)
-			{
-				while (!PUGI__IS_CHARTYPE(*s, ct_parse_pcdata)) ++s;
-					
-				if (*s == '<') // PCDATA ends here
-				{
-					*g.flush(s) = 0;
-					
-					return s + 1;
-				}
-				else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
-				{
-					*s++ = '\n'; // replace first one with 0x0a
-					
-					if (*s == '\n') g.push(s, 1);
-				}
-				else if (opt_escape::value && *s == '&')
-				{
-					s = strconv_escape(s, g);
-				}
-				else if (*s == 0)
-				{
-					return s;
-				}
-				else ++s;
-			}
-		}
-	};
-	
-	PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
-	{
-		PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20);
-
-		switch ((optmask >> 4) & 3) // get bitmask for flags (eol escapes)
-		{
-		case 0: return strconv_pcdata_impl<opt_false, opt_false>::parse;
-		case 1: return strconv_pcdata_impl<opt_false, opt_true>::parse;
-		case 2: return strconv_pcdata_impl<opt_true, opt_false>::parse;
-		case 3: return strconv_pcdata_impl<opt_true, opt_true>::parse;
-		default: return 0; // should not get here
-		}
-	}
-
-	typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
-	
-	template <typename opt_escape> struct strconv_attribute_impl
-	{
-		static char_t* parse_wnorm(char_t* s, char_t end_quote)
-		{
-			gap g;
-
-			// trim leading whitespaces
-			if (PUGI__IS_CHARTYPE(*s, ct_space))
-			{
-				char_t* str = s;
-				
-				do ++str;
-				while (PUGI__IS_CHARTYPE(*str, ct_space));
-				
-				g.push(s, str - s);
-			}
-
-			while (true)
-			{
-				while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr_ws | ct_space)) ++s;
-				
-				if (*s == end_quote)
-				{
-					char_t* str = g.flush(s);
-					
-					do *str-- = 0;
-					while (PUGI__IS_CHARTYPE(*str, ct_space));
-				
-					return s + 1;
-				}
-				else if (PUGI__IS_CHARTYPE(*s, ct_space))
-				{
-					*s++ = ' ';
-		
-					if (PUGI__IS_CHARTYPE(*s, ct_space))
-					{
-						char_t* str = s + 1;
-						while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
-						
-						g.push(s, str - s);
-					}
-				}
-				else if (opt_escape::value && *s == '&')
-				{
-					s = strconv_escape(s, g);
-				}
-				else if (!*s)
-				{
-					return 0;
-				}
-				else ++s;
-			}
-		}
-
-		static char_t* parse_wconv(char_t* s, char_t end_quote)
-		{
-			gap g;
-
-			while (true)
-			{
-				while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr_ws)) ++s;
-				
-				if (*s == end_quote)
-				{
-					*g.flush(s) = 0;
-				
-					return s + 1;
-				}
-				else if (PUGI__IS_CHARTYPE(*s, ct_space))
-				{
-					if (*s == '\r')
-					{
-						*s++ = ' ';
-				
-						if (*s == '\n') g.push(s, 1);
-					}
-					else *s++ = ' ';
-				}
-				else if (opt_escape::value && *s == '&')
-				{
-					s = strconv_escape(s, g);
-				}
-				else if (!*s)
-				{
-					return 0;
-				}
-				else ++s;
-			}
-		}
-
-		static char_t* parse_eol(char_t* s, char_t end_quote)
-		{
-			gap g;
-
-			while (true)
-			{
-				while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr)) ++s;
-				
-				if (*s == end_quote)
-				{
-					*g.flush(s) = 0;
-				
-					return s + 1;
-				}
-				else if (*s == '\r')
-				{
-					*s++ = '\n';
-					
-					if (*s == '\n') g.push(s, 1);
-				}
-				else if (opt_escape::value && *s == '&')
-				{
-					s = strconv_escape(s, g);
-				}
-				else if (!*s)
-				{
-					return 0;
-				}
-				else ++s;
-			}
-		}
-
-		static char_t* parse_simple(char_t* s, char_t end_quote)
-		{
-			gap g;
-
-			while (true)
-			{
-				while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr)) ++s;
-				
-				if (*s == end_quote)
-				{
-					*g.flush(s) = 0;
-				
-					return s + 1;
-				}
-				else if (opt_escape::value && *s == '&')
-				{
-					s = strconv_escape(s, g);
-				}
-				else if (!*s)
-				{
-					return 0;
-				}
-				else ++s;
-			}
-		}
-	};
-
-	PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)
-	{
-		PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
-		
-		switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes)
-		{
-		case 0:  return strconv_attribute_impl<opt_false>::parse_simple;
-		case 1:  return strconv_attribute_impl<opt_true>::parse_simple;
-		case 2:  return strconv_attribute_impl<opt_false>::parse_eol;
-		case 3:  return strconv_attribute_impl<opt_true>::parse_eol;
-		case 4:  return strconv_attribute_impl<opt_false>::parse_wconv;
-		case 5:  return strconv_attribute_impl<opt_true>::parse_wconv;
-		case 6:  return strconv_attribute_impl<opt_false>::parse_wconv;
-		case 7:  return strconv_attribute_impl<opt_true>::parse_wconv;
-		case 8:  return strconv_attribute_impl<opt_false>::parse_wnorm;
-		case 9:  return strconv_attribute_impl<opt_true>::parse_wnorm;
-		case 10: return strconv_attribute_impl<opt_false>::parse_wnorm;
-		case 11: return strconv_attribute_impl<opt_true>::parse_wnorm;
-		case 12: return strconv_attribute_impl<opt_false>::parse_wnorm;
-		case 13: return strconv_attribute_impl<opt_true>::parse_wnorm;
-		case 14: return strconv_attribute_impl<opt_false>::parse_wnorm;
-		case 15: return strconv_attribute_impl<opt_true>::parse_wnorm;
-		default: return 0; // should not get here
-		}
-	}
-
-	inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
-	{
-		xml_parse_result result;
-		result.status = status;
-		result.offset = offset;
-
-		return result;
-	}
-
-	struct xml_parser
-	{
-		xml_allocator alloc;
-		char_t* error_offset;
-		xml_parse_status error_status;
-		
-		// Parser utilities.
-		#define PUGI__SKIPWS()			{ while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
-		#define PUGI__OPTSET(OPT)			( optmsk & (OPT) )
-		#define PUGI__PUSHNODE(TYPE)		{ cursor = append_node(cursor, alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }
-		#define PUGI__POPNODE()			{ cursor = cursor->parent; }
-		#define PUGI__SCANFOR(X)			{ while (*s != 0 && !(X)) ++s; }
-		#define PUGI__SCANWHILE(X)		{ while ((X)) ++s; }
-		#define PUGI__ENDSEG()			{ ch = *s; *s = 0; ++s; }
-		#define PUGI__THROW_ERROR(err, m)	return error_offset = m, error_status = err, static_cast<char_t*>(0)
-		#define PUGI__CHECK_ERROR(err, m)	{ if (*s == 0) PUGI__THROW_ERROR(err, m); }
-		
-		xml_parser(const xml_allocator& alloc_): alloc(alloc_), error_offset(0), error_status(status_ok)
-		{
-		}
-
-		// DOCTYPE consists of nested sections of the following possible types:
-		// <!-- ... -->, <? ... ?>, "...", '...'
-		// <![...]]>
-		// <!...>
-		// First group can not contain nested groups
-		// Second group can contain nested groups of the same type
-		// Third group can contain all other groups
-		char_t* parse_doctype_primitive(char_t* s)
-		{
-			if (*s == '"' || *s == '\'')
-			{
-				// quoted string
-				char_t ch = *s++;
-				PUGI__SCANFOR(*s == ch);
-				if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
-
-				s++;
-			}
-			else if (s[0] == '<' && s[1] == '?')
-			{
-				// <? ... ?>
-				s += 2;
-				PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
-				if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
-
-				s += 2;
-			}
-			else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
-			{
-				s += 4;
-				PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
-				if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
-
-				s += 4;
-			}
-			else PUGI__THROW_ERROR(status_bad_doctype, s);
-
-			return s;
-		}
-
-		char_t* parse_doctype_ignore(char_t* s)
-		{
-			assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
-			s++;
-
-			while (*s)
-			{
-				if (s[0] == '<' && s[1] == '!' && s[2] == '[')
-				{
-					// nested ignore section
-					s = parse_doctype_ignore(s);
-					if (!s) return s;
-				}
-				else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
-				{
-					// ignore section end
-					s += 3;
-
-					return s;
-				}
-				else s++;
-			}
-
-			PUGI__THROW_ERROR(status_bad_doctype, s);
-		}
-
-		char_t* parse_doctype_group(char_t* s, char_t endch, bool toplevel)
-		{
-			assert(s[0] == '<' && s[1] == '!');
-			s++;
-
-			while (*s)
-			{
-				if (s[0] == '<' && s[1] == '!' && s[2] != '-')
-				{
-					if (s[2] == '[')
-					{
-						// ignore
-						s = parse_doctype_ignore(s);
-						if (!s) return s;
-					}
-					else
-					{
-						// some control group
-						s = parse_doctype_group(s, endch, false);
-						if (!s) return s;
-					}
-				}
-				else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
-				{
-					// unknown tag (forbidden), or some primitive group
-					s = parse_doctype_primitive(s);
-					if (!s) return s;
-				}
-				else if (*s == '>')
-				{
-					s++;
-
-					return s;
-				}
-				else s++;
-			}
-
-			if (!toplevel || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
-
-			return s;
-		}
-
-		char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch)
-		{
-			// parse node contents, starting with exclamation mark
-			++s;
-
-			if (*s == '-') // '<!-...'
-			{
-				++s;
-
-				if (*s == '-') // '<!--...'
-				{
-					++s;
-
-					if (PUGI__OPTSET(parse_comments))
-					{
-						PUGI__PUSHNODE(node_comment); // Append a new node on the tree.
-						cursor->value = s; // Save the offset.
-					}
-
-					if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments))
-					{
-						s = strconv_comment(s, endch);
-
-						if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
-					}
-					else
-					{
-						// Scan for terminating '-->'.
-						PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>'));
-						PUGI__CHECK_ERROR(status_bad_comment, s);
-
-						if (PUGI__OPTSET(parse_comments))
-							*s = 0; // Zero-terminate this segment at the first terminating '-'.
-
-						s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
-					}
-				}
-				else PUGI__THROW_ERROR(status_bad_comment, s);
-			}
-			else if (*s == '[')
-			{
-				// '<![CDATA[...'
-				if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
-				{
-					++s;
-
-					if (PUGI__OPTSET(parse_cdata))
-					{
-						PUGI__PUSHNODE(node_cdata); // Append a new node on the tree.
-						cursor->value = s; // Save the offset.
-
-						if (PUGI__OPTSET(parse_eol))
-						{
-							s = strconv_cdata(s, endch);
-
-							if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
-						}
-						else
-						{
-							// Scan for terminating ']]>'.
-							PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>'));
-							PUGI__CHECK_ERROR(status_bad_cdata, s);
-
-							*s++ = 0; // Zero-terminate this segment.
-						}
-					}
-					else // Flagged for discard, but we still have to scan for the terminator.
-					{
-						// Scan for terminating ']]>'.
-						PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>'));
-						PUGI__CHECK_ERROR(status_bad_cdata, s);
-
-						++s;
-					}
-
-					s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
-				}
-				else PUGI__THROW_ERROR(status_bad_cdata, s);
-			}
-			else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && ENDSWITH(s[6], 'E'))
-			{
-				s -= 2;
-
-				if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
-
-				char_t* mark = s + 9;
-
-				s = parse_doctype_group(s, endch, true);
-				if (!s) return s;
-
-				if (PUGI__OPTSET(parse_doctype))
-				{
-					while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
-
-					PUGI__PUSHNODE(node_doctype);
-
-					cursor->value = mark;
-
-					assert((s[0] == 0 && endch == '>') || s[-1] == '>');
-					s[*s == 0 ? 0 : -1] = 0;
-
-					PUGI__POPNODE();
-				}
-			}
-			else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
-			else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);
-			else PUGI__THROW_ERROR(status_unrecognized_tag, s);
-
-			return s;
-		}
-
-		char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)
-		{
-			// load into registers
-			xml_node_struct* cursor = ref_cursor;
-			char_t ch = 0;
-
-			// parse node contents, starting with question mark
-			++s;
-
-			// read PI target
-			char_t* target = s;
-
-			if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s);
-
-			PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol));
-			PUGI__CHECK_ERROR(status_bad_pi, s);
-
-			// determine node type; stricmp / strcasecmp is not portable
-			bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
-
-			if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi))
-			{
-				if (declaration)
-				{
-					// disallow non top-level declarations
-					if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
-
-					PUGI__PUSHNODE(node_declaration);
-				}
-				else
-				{
-					PUGI__PUSHNODE(node_pi);
-				}
-
-				cursor->name = target;
-
-				PUGI__ENDSEG();
-
-				// parse value/attributes
-				if (ch == '?')
-				{
-					// empty node
-					if (!ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
-					s += (*s == '>');
-
-					PUGI__POPNODE();
-				}
-				else if (PUGI__IS_CHARTYPE(ch, ct_space))
-				{
-					PUGI__SKIPWS();
-
-					// scan for tag end
-					char_t* value = s;
-
-					PUGI__SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));
-					PUGI__CHECK_ERROR(status_bad_pi, s);
-
-					if (declaration)
-					{
-						// replace ending ? with / so that 'element' terminates properly
-						*s = '/';
-
-						// we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
-						s = value;
-					}
-					else
-					{
-						// store value and step over >
-						cursor->value = value;
-						PUGI__POPNODE();
-
-						PUGI__ENDSEG();
-
-						s += (*s == '>');
-					}
-				}
-				else PUGI__THROW_ERROR(status_bad_pi, s);
-			}
-			else
-			{
-				// scan for tag end
-				PUGI__SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));
-				PUGI__CHECK_ERROR(status_bad_pi, s);
-
-				s += (s[1] == '>' ? 2 : 1);
-			}
-
-			// store from registers
-			ref_cursor = cursor;
-
-			return s;
-		}
-
-		char_t* parse(char_t* s, xml_node_struct* xmldoc, unsigned int optmsk, char_t endch)
-		{
-			strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
-			strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
-			
-			char_t ch = 0;
-			xml_node_struct* cursor = xmldoc;
-			char_t* mark = s;
-
-			while (*s != 0)
-			{
-				if (*s == '<')
-				{
-					++s;
-
-				LOC_TAG:
-					if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
-					{
-						PUGI__PUSHNODE(node_element); // Append a new node to the tree.
-
-						cursor->name = s;
-
-						PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator.
-						PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
-
-						if (ch == '>')
-						{
-							// end of tag
-						}
-						else if (PUGI__IS_CHARTYPE(ch, ct_space))
-						{
-						LOC_ATTRIBUTES:
-							while (true)
-							{
-								PUGI__SKIPWS(); // Eat any whitespace.
-						
-								if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
-								{
-									xml_attribute_struct* a = append_attribute_ll(cursor, alloc); // Make space for this attribute.
-									if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);
-
-									a->name = s; // Save the offset.
-
-									PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator.
-									PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
-
-									PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
-									PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
-
-									if (PUGI__IS_CHARTYPE(ch, ct_space))
-									{
-										PUGI__SKIPWS(); // Eat any whitespace.
-										PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
-
-										ch = *s;
-										++s;
-									}
-									
-									if (ch == '=') // '<... #=...'
-									{
-										PUGI__SKIPWS(); // Eat any whitespace.
-
-										if (*s == '"' || *s == '\'') // '<... #="...'
-										{
-											ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
-											++s; // Step over the quote.
-											a->value = s; // Save the offset.
-
-											s = strconv_attribute(s, ch);
-										
-											if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
-
-											// After this line the loop continues from the start;
-											// Whitespaces, / and > are ok, symbols and EOF are wrong,
-											// everything else will be detected
-											if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s);
-										}
-										else PUGI__THROW_ERROR(status_bad_attribute, s);
-									}
-									else PUGI__THROW_ERROR(status_bad_attribute, s);
-								}
-								else if (*s == '/')
-								{
-									++s;
-									
-									if (*s == '>')
-									{
-										PUGI__POPNODE();
-										s++;
-										break;
-									}
-									else if (*s == 0 && endch == '>')
-									{
-										PUGI__POPNODE();
-										break;
-									}
-									else PUGI__THROW_ERROR(status_bad_start_element, s);
-								}
-								else if (*s == '>')
-								{
-									++s;
-
-									break;
-								}
-								else if (*s == 0 && endch == '>')
-								{
-									break;
-								}
-								else PUGI__THROW_ERROR(status_bad_start_element, s);
-							}
-
-							// !!!
-						}
-						else if (ch == '/') // '<#.../'
-						{
-							if (!ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s);
-
-							PUGI__POPNODE(); // Pop.
-
-							s += (*s == '>');
-						}
-						else if (ch == 0)
-						{
-							// we stepped over null terminator, backtrack & handle closing tag
-							--s;
-							
-							if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
-						}
-						else PUGI__THROW_ERROR(status_bad_start_element, s);
-					}
-					else if (*s == '/')
-					{
-						++s;
-
-						char_t* name = cursor->name;
-						if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, s);
-						
-						while (PUGI__IS_CHARTYPE(*s, ct_symbol))
-						{
-							if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, s);
-						}
-
-						if (*name)
-						{
-							if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
-							else PUGI__THROW_ERROR(status_end_element_mismatch, s);
-						}
-							
-						PUGI__POPNODE(); // Pop.
-
-						PUGI__SKIPWS();
-
-						if (*s == 0)
-						{
-							if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
-						}
-						else
-						{
-							if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
-							++s;
-						}
-					}
-					else if (*s == '?') // '<?...'
-					{
-						s = parse_question(s, cursor, optmsk, endch);
-						if (!s) return s;
-
-						assert(cursor);
-						if ((cursor->header & xml_memory_page_type_mask) + 1 == node_declaration) goto LOC_ATTRIBUTES;
-					}
-					else if (*s == '!') // '<!...'
-					{
-						s = parse_exclamation(s, cursor, optmsk, endch);
-						if (!s) return s;
-					}
-					else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s);
-					else PUGI__THROW_ERROR(status_unrecognized_tag, s);
-				}
-				else
-				{
-					mark = s; // Save this offset while searching for a terminator.
-
-					PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
-
-					if (*s == '<')
-					{
-						// We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
-						assert(mark != s);
-
-						if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single))
-						{
-							continue;
-						}
-						else if (PUGI__OPTSET(parse_ws_pcdata_single))
-						{
-							if (s[1] != '/' || cursor->first_child) continue;
-						}
-					}
-
-					s = mark;
-							
-					if (cursor->parent)
-					{
-						PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
-						cursor->value = s; // Save the offset.
-
-						s = strconv_pcdata(s);
-								
-						PUGI__POPNODE(); // Pop since this is a standalone.
-						
-						if (!*s) break;
-					}
-					else
-					{
-						PUGI__SCANFOR(*s == '<'); // '...<'
-						if (!*s) break;
-						
-						++s;
-					}
-
-					// We're after '<'
-					goto LOC_TAG;
-				}
-			}
-
-			// check that last tag is closed
-			if (cursor != xmldoc) PUGI__THROW_ERROR(status_end_element_mismatch, s);
-
-			return s;
-		}
-
-		static xml_parse_result parse(char_t* buffer, size_t length, xml_node_struct* root, unsigned int optmsk)
-		{
-			xml_document_struct* xmldoc = static_cast<xml_document_struct*>(root);
-
-			// store buffer for offset_debug
-			xmldoc->buffer = buffer;
-
-			// early-out for empty documents
-			if (length == 0) return make_parse_result(status_ok);
-
-			// create parser on stack
-			xml_parser parser(*xmldoc);
-
-			// save last character and make buffer zero-terminated (speeds up parsing)
-			char_t endch = buffer[length - 1];
-			buffer[length - 1] = 0;
-			
-			// perform actual parsing
-			parser.parse(buffer, xmldoc, optmsk, endch);
-
-			xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
-			assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
-
-			// update allocator state
-			*static_cast<xml_allocator*>(xmldoc) = parser.alloc;
-
-			// since we removed last character, we have to handle the only possible false positive
-			if (result && endch == '<')
-			{
-				// there's no possible well-formed document with < at the end
-				return make_parse_result(status_unrecognized_tag, length);
-			}
-
-			return result;
-		}
-	};
-
-	// Output facilities
-	PUGI__FN xml_encoding get_write_native_encoding()
-	{
-	#ifdef PUGIXML_WCHAR_MODE
-		return get_wchar_encoding();
-	#else
-		return encoding_utf8;
-	#endif
-	}
-
-	PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding)
-	{
-		// replace wchar encoding with utf implementation
-		if (encoding == encoding_wchar) return get_wchar_encoding();
-
-		// replace utf16 encoding with utf16 with specific endianness
-		if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
-
-		// replace utf32 encoding with utf32 with specific endianness
-		if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
-
-		// only do autodetection if no explicit encoding is requested
-		if (encoding != encoding_auto) return encoding;
-
-		// assume utf8 encoding
-		return encoding_utf8;
-	}
-
-#ifdef PUGIXML_WCHAR_MODE
-	PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
-	{
-		assert(length > 0);
-
-		// discard last character if it's the lead of a surrogate pair 
-		return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
-	}
-
-	PUGI__FN size_t convert_buffer(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
-	{
-		// only endian-swapping is required
-		if (need_endian_swap_utf(encoding, get_wchar_encoding()))
-		{
-			convert_wchar_endian_swap(r_char, data, length);
-
-			return length * sizeof(char_t);
-		}
-	
-		// convert to utf8
-		if (encoding == encoding_utf8)
-		{
-			uint8_t* dest = r_u8;
-			uint8_t* end = utf_decoder<utf8_writer>::decode_wchar_block(data, length, dest);
-
-			return static_cast<size_t>(end - dest);
-		}
-
-		// convert to utf16
-		if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
-		{
-			uint16_t* dest = r_u16;
-
-			// convert to native utf16
-			uint16_t* end = utf_decoder<utf16_writer>::decode_wchar_block(data, length, dest);
-
-			// swap if necessary
-			xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
-
-			if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
-
-			return static_cast<size_t>(end - dest) * sizeof(uint16_t);
-		}
-
-		// convert to utf32
-		if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
-		{
-			uint32_t* dest = r_u32;
-
-			// convert to native utf32
-			uint32_t* end = utf_decoder<utf32_writer>::decode_wchar_block(data, length, dest);
-
-			// swap if necessary
-			xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
-
-			if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
-
-			return static_cast<size_t>(end - dest) * sizeof(uint32_t);
-		}
-
-		// convert to latin1
-		if (encoding == encoding_latin1)
-		{
-			uint8_t* dest = r_u8;
-			uint8_t* end = utf_decoder<latin1_writer>::decode_wchar_block(data, length, dest);
-
-			return static_cast<size_t>(end - dest);
-		}
-
-		assert(!"Invalid encoding");
-		return 0;
-	}
-#else
-	PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
-	{
-		assert(length > 4);
-
-		for (size_t i = 1; i <= 4; ++i)
-		{
-			uint8_t ch = static_cast<uint8_t>(data[length - i]);
-
-			// either a standalone character or a leading one
-			if ((ch & 0xc0) != 0x80) return length - i;
-		}
-
-		// there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
-		return length;
-	}
-
-	PUGI__FN size_t convert_buffer(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
-	{
-		if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
-		{
-			uint16_t* dest = r_u16;
-
-			// convert to native utf16
-			uint16_t* end = utf_decoder<utf16_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
-
-			// swap if necessary
-			xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
-
-			if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
-
-			return static_cast<size_t>(end - dest) * sizeof(uint16_t);
-		}
-
-		if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
-		{
-			uint32_t* dest = r_u32;
-
-			// convert to native utf32
-			uint32_t* end = utf_decoder<utf32_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
-
-			// swap if necessary
-			xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
-
-			if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
-
-			return static_cast<size_t>(end - dest) * sizeof(uint32_t);
-		}
-
-		if (encoding == encoding_latin1)
-		{
-			uint8_t* dest = r_u8;
-			uint8_t* end = utf_decoder<latin1_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
-
-			return static_cast<size_t>(end - dest);
-		}
-
-		assert(!"Invalid encoding");
-		return 0;
-	}
-#endif
-
-	class xml_buffered_writer
-	{
-		xml_buffered_writer(const xml_buffered_writer&);
-		xml_buffered_writer& operator=(const xml_buffered_writer&);
-
-	public:
-		xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding))
-		{
-			PUGI__STATIC_ASSERT(bufcapacity >= 8);
-		}
-
-		~xml_buffered_writer()
-		{
-			flush();
-		}
-
-		void flush()
-		{
-			flush(buffer, bufsize);
-			bufsize = 0;
-		}
-
-		void flush(const char_t* data, size_t size)
-		{
-			if (size == 0) return;
-
-			// fast path, just write data
-			if (encoding == get_write_native_encoding())
-				writer.write(data, size * sizeof(char_t));
-			else
-			{
-				// convert chunk
-				size_t result = convert_buffer(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
-				assert(result <= sizeof(scratch));
-
-				// write data
-				writer.write(scratch.data_u8, result);
-			}
-		}
-
-		void write(const char_t* data, size_t length)
-		{
-			if (bufsize + length > bufcapacity)
-			{
-				// flush the remaining buffer contents
-				flush();
-
-				// handle large chunks
-				if (length > bufcapacity)
-				{
-					if (encoding == get_write_native_encoding())
-					{
-						// fast path, can just write data chunk
-						writer.write(data, length * sizeof(char_t));
-						return;
-					}
-
-					// need to convert in suitable chunks
-					while (length > bufcapacity)
-					{
-						// get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
-						// and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
-						size_t chunk_size = get_valid_length(data, bufcapacity);
-
-						// convert chunk and write
-						flush(data, chunk_size);
-
-						// iterate
-						data += chunk_size;
-						length -= chunk_size;
-					}
-
-					// small tail is copied below
-					bufsize = 0;
-				}
-			}
-
-			memcpy(buffer + bufsize, data, length * sizeof(char_t));
-			bufsize += length;
-		}
-
-		void write(const char_t* data)
-		{
-			write(data, strlength(data));
-		}
-
-		void write(char_t d0)
-		{
-			if (bufsize + 1 > bufcapacity) flush();
-
-			buffer[bufsize + 0] = d0;
-			bufsize += 1;
-		}
-
-		void write(char_t d0, char_t d1)
-		{
-			if (bufsize + 2 > bufcapacity) flush();
-
-			buffer[bufsize + 0] = d0;
-			buffer[bufsize + 1] = d1;
-			bufsize += 2;
-		}
-
-		void write(char_t d0, char_t d1, char_t d2)
-		{
-			if (bufsize + 3 > bufcapacity) flush();
-
-			buffer[bufsize + 0] = d0;
-			buffer[bufsize + 1] = d1;
-			buffer[bufsize + 2] = d2;
-			bufsize += 3;
-		}
-
-		void write(char_t d0, char_t d1, char_t d2, char_t d3)
-		{
-			if (bufsize + 4 > bufcapacity) flush();
-
-			buffer[bufsize + 0] = d0;
-			buffer[bufsize + 1] = d1;
-			buffer[bufsize + 2] = d2;
-			buffer[bufsize + 3] = d3;
-			bufsize += 4;
-		}
-
-		void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
-		{
-			if (bufsize + 5 > bufcapacity) flush();
-
-			buffer[bufsize + 0] = d0;
-			buffer[bufsize + 1] = d1;
-			buffer[bufsize + 2] = d2;
-			buffer[bufsize + 3] = d3;
-			buffer[bufsize + 4] = d4;
-			bufsize += 5;
-		}
-
-		void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
-		{
-			if (bufsize + 6 > bufcapacity) flush();
-
-			buffer[bufsize + 0] = d0;
-			buffer[bufsize + 1] = d1;
-			buffer[bufsize + 2] = d2;
-			buffer[bufsize + 3] = d3;
-			buffer[bufsize + 4] = d4;
-			buffer[bufsize + 5] = d5;
-			bufsize += 6;
-		}
-
-		// utf8 maximum expansion: x4 (-> utf32)
-		// utf16 maximum expansion: x2 (-> utf32)
-		// utf32 maximum expansion: x1
-		enum
-		{
-			bufcapacitybytes =
-			#ifdef PUGIXML_MEMORY_OUTPUT_STACK
-				PUGIXML_MEMORY_OUTPUT_STACK
-			#else
-				10240
-			#endif
-			,
-			bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
-		};
-
-		char_t buffer[bufcapacity];
-
-		union
-		{
-			uint8_t data_u8[4 * bufcapacity];
-			uint16_t data_u16[2 * bufcapacity];
-			uint32_t data_u32[bufcapacity];
-			char_t data_char[bufcapacity];
-		} scratch;
-
-		xml_writer& writer;
-		size_t bufsize;
-		xml_encoding encoding;
-	};
-
-	PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type)
-	{
-		while (*s)
-		{
-			const char_t* prev = s;
-			
-			// While *s is a usual symbol
-			while (!PUGI__IS_CHARTYPEX(*s, type)) ++s;
-		
-			writer.write(prev, static_cast<size_t>(s - prev));
-
-			switch (*s)
-			{
-				case 0: break;
-				case '&':
-					writer.write('&', 'a', 'm', 'p', ';');
-					++s;
-					break;
-				case '<':
-					writer.write('&', 'l', 't', ';');
-					++s;
-					break;
-				case '>':
-					writer.write('&', 'g', 't', ';');
-					++s;
-					break;
-				case '"':
-					writer.write('&', 'q', 'u', 'o', 't', ';');
-					++s;
-					break;
-				default: // s is not a usual symbol
-				{
-					unsigned int ch = static_cast<unsigned int>(*s++);
-					assert(ch < 32);
-
-					writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
-				}
-			}
-		}
-	}
-
-	PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
-	{
-		if (flags & format_no_escapes)
-			writer.write(s);
-		else
-			text_output_escaped(writer, s, type);
-	}
-
-	PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
-	{
-		do
-		{
-			writer.write('<', '!', '[', 'C', 'D');
-			writer.write('A', 'T', 'A', '[');
-
-			const char_t* prev = s;
-
-			// look for ]]> sequence - we can't output it as is since it terminates CDATA
-			while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
-
-			// skip ]] if we stopped at ]]>, > will go to the next CDATA section
-			if (*s) s += 2;
-
-			writer.write(prev, static_cast<size_t>(s - prev));
-
-			writer.write(']', ']', '>');
-		}
-		while (*s);
-	}
-
-	PUGI__FN void node_output_attributes(xml_buffered_writer& writer, const xml_node& node, unsigned int flags)
-	{
-		const char_t* default_name = PUGIXML_TEXT(":anonymous");
-
-		for (xml_attribute a = node.first_attribute(); a; a = a.next_attribute())
-		{
-			writer.write(' ');
-			writer.write(a.name()[0] ? a.name() : default_name);
-			writer.write('=', '"');
-
-			text_output(writer, a.value(), ctx_special_attr, flags);
-
-			writer.write('"');
-		}
-	}
-
-	PUGI__FN void node_output(xml_buffered_writer& writer, const xml_node& node, const char_t* indent, unsigned int flags, unsigned int depth)
-	{
-		const char_t* default_name = PUGIXML_TEXT(":anonymous");
-
-		if ((flags & format_indent) != 0 && (flags & format_raw) == 0)
-			for (unsigned int i = 0; i < depth; ++i) writer.write(indent);
-
-		switch (node.type())
-		{
-		case node_document:
-		{
-			for (xml_node n = node.first_child(); n; n = n.next_sibling())
-				node_output(writer, n, indent, flags, depth);
-			break;
-		}
-			
-		case node_element:
-		{
-			const char_t* name = node.name()[0] ? node.name() : default_name;
-
-			writer.write('<');
-			writer.write(name);
-
-			node_output_attributes(writer, node, flags);
-
-			if (flags & format_raw)
-			{
-				if (!node.first_child())
-					writer.write(' ', '/', '>');
-				else
-				{
-					writer.write('>');
-
-					for (xml_node n = node.first_child(); n; n = n.next_sibling())
-						node_output(writer, n, indent, flags, depth + 1);
-
-					writer.write('<', '/');
-					writer.write(name);
-					writer.write('>');
-				}
-			}
-			else if (!node.first_child())
-				writer.write(' ', '/', '>', '\n');
-			else if (node.first_child() == node.last_child() && (node.first_child().type() == node_pcdata || node.first_child().type() == node_cdata))
-			{
-				writer.write('>');
-
-				if (node.first_child().type() == node_pcdata)
-					text_output(writer, node.first_child().value(), ctx_special_pcdata, flags);
-				else
-					text_output_cdata(writer, node.first_child().value());
-
-				writer.write('<', '/');
-				writer.write(name);
-				writer.write('>', '\n');
-			}
-			else
-			{
-				writer.write('>', '\n');
-				
-				for (xml_node n = node.first_child(); n; n = n.next_sibling())
-					node_output(writer, n, indent, flags, depth + 1);
-
-				if ((flags & format_indent) != 0 && (flags & format_raw) == 0)
-					for (unsigned int i = 0; i < depth; ++i) writer.write(indent);
-				
-				writer.write('<', '/');
-				writer.write(name);
-				writer.write('>', '\n');
-			}
-
-			break;
-		}
-		
-		case node_pcdata:
-			text_output(writer, node.value(), ctx_special_pcdata, flags);
-			if ((flags & format_raw) == 0) writer.write('\n');
-			break;
-
-		case node_cdata:
-			text_output_cdata(writer, node.value());
-			if ((flags & format_raw) == 0) writer.write('\n');
-			break;
-
-		case node_comment:
-			writer.write('<', '!', '-', '-');
-			writer.write(node.value());
-			writer.write('-', '-', '>');
-			if ((flags & format_raw) == 0) writer.write('\n');
-			break;
-
-		case node_pi:
-		case node_declaration:
-			writer.write('<', '?');
-			writer.write(node.name()[0] ? node.name() : default_name);
-
-			if (node.type() == node_declaration)
-			{
-				node_output_attributes(writer, node, flags);
-			}
-			else if (node.value()[0])
-			{
-				writer.write(' ');
-				writer.write(node.value());
-			}
-
-			writer.write('?', '>');
-			if ((flags & format_raw) == 0) writer.write('\n');
-			break;
-
-		case node_doctype:
-			writer.write('<', '!', 'D', 'O', 'C');
-			writer.write('T', 'Y', 'P', 'E');
-
-			if (node.value()[0])
-			{
-				writer.write(' ');
-				writer.write(node.value());
-			}
-
-			writer.write('>');
-			if ((flags & format_raw) == 0) writer.write('\n');
-			break;
-
-		default:
-			assert(!"Invalid node type");
-		}
-	}
-
-	inline bool has_declaration(const xml_node& node)
-	{
-		for (xml_node child = node.first_child(); child; child = child.next_sibling())
-		{
-			xml_node_type type = child.type();
-
-			if (type == node_declaration) return true;
-			if (type == node_element) return false;
-		}
-
-		return false;
-	}
-
-	inline bool allow_insert_child(xml_node_type parent, xml_node_type child)
-	{
-		if (parent != node_document && parent != node_element) return false;
-		if (child == node_document || child == node_null) return false;
-		if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
-
-		return true;
-	}
-
-	PUGI__FN void recursive_copy_skip(xml_node& dest, const xml_node& source, const xml_node& skip)
-	{
-		assert(dest.type() == source.type());
-
-		switch (source.type())
-		{
-		case node_element:
-		{
-			dest.set_name(source.name());
-
-			for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute())
-				dest.append_attribute(a.name()).set_value(a.value());
-
-			for (xml_node c = source.first_child(); c; c = c.next_sibling())
-			{
-				if (c == skip) continue;
-
-				xml_node cc = dest.append_child(c.type());
-				assert(cc);
-
-				recursive_copy_skip(cc, c, skip);
-			}
-
-			break;
-		}
-
-		case node_pcdata:
-		case node_cdata:
-		case node_comment:
-		case node_doctype:
-			dest.set_value(source.value());
-			break;
-
-		case node_pi:
-			dest.set_name(source.name());
-			dest.set_value(source.value());
-			break;
-
-		case node_declaration:
-		{
-			dest.set_name(source.name());
-
-			for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute())
-				dest.append_attribute(a.name()).set_value(a.value());
-
-			break;
-		}
-
-		default:
-			assert(!"Invalid node type");
-		}
-	}
-
-	inline bool is_text_node(xml_node_struct* node)
-	{
-		xml_node_type type = static_cast<xml_node_type>((node->header & impl::xml_memory_page_type_mask) + 1);
-
-		return type == node_pcdata || type == node_cdata;
-	}
-
-	// get value with conversion functions
-	PUGI__FN int get_value_int(const char_t* value, int def)
-	{
-		if (!value) return def;
-
-	#ifdef PUGIXML_WCHAR_MODE
-		return static_cast<int>(wcstol(value, 0, 10));
-	#else
-		return static_cast<int>(strtol(value, 0, 10));
-	#endif
-	}
-
-	PUGI__FN unsigned int get_value_uint(const char_t* value, unsigned int def)
-	{
-		if (!value) return def;
-
-	#ifdef PUGIXML_WCHAR_MODE
-		return static_cast<unsigned int>(wcstoul(value, 0, 10));
-	#else
-		return static_cast<unsigned int>(strtoul(value, 0, 10));
-	#endif
-	}
-
-	PUGI__FN double get_value_double(const char_t* value, double def)
-	{
-		if (!value) return def;
-
-	#ifdef PUGIXML_WCHAR_MODE
-		return wcstod(value, 0);
-	#else
-		return strtod(value, 0);
-	#endif
-	}
-
-	PUGI__FN float get_value_float(const char_t* value, float def)
-	{
-		if (!value) return def;
-
-	#ifdef PUGIXML_WCHAR_MODE
-		return static_cast<float>(wcstod(value, 0));
-	#else
-		return static_cast<float>(strtod(value, 0));
-	#endif
-	}
-
-	PUGI__FN bool get_value_bool(const char_t* value, bool def)
-	{
-		if (!value) return def;
-
-		// only look at first char
-		char_t first = *value;
-
-		// 1*, t* (true), T* (True), y* (yes), Y* (YES)
-		return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
-	}
-
-	// set value with conversion functions
-	PUGI__FN bool set_value_buffer(char_t*& dest, uintptr_t& header, uintptr_t header_mask, char (&buf)[128])
-	{
-	#ifdef PUGIXML_WCHAR_MODE
-		char_t wbuf[128];
-		impl::widen_ascii(wbuf, buf);
-
-		return strcpy_insitu(dest, header, header_mask, wbuf);
-	#else
-		return strcpy_insitu(dest, header, header_mask, buf);
-	#endif
-	}
-
-	PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, int value)
-	{
-		char buf[128];
-		sprintf(buf, "%d", value);
-	
-		return set_value_buffer(dest, header, header_mask, buf);
-	}
-
-	PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, unsigned int value)
-	{
-		char buf[128];
-		sprintf(buf, "%u", value);
-
-		return set_value_buffer(dest, header, header_mask, buf);
-	}
-
-	PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, double value)
-	{
-		char buf[128];
-		sprintf(buf, "%g", value);
-
-		return set_value_buffer(dest, header, header_mask, buf);
-	}
-	
-	PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, bool value)
-	{
-		return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
-	}
-
-	// we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
-	PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
-	{
-	#if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
-		// there are 64-bit versions of fseek/ftell, let's use them
-		typedef __int64 length_type;
-
-		_fseeki64(file, 0, SEEK_END);
-		length_type length = _ftelli64(file);
-		_fseeki64(file, 0, SEEK_SET);
-	#elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && !defined(__STRICT_ANSI__)
-		// there are 64-bit versions of fseek/ftell, let's use them
-		typedef off64_t length_type;
-
-		fseeko64(file, 0, SEEK_END);
-		length_type length = ftello64(file);
-		fseeko64(file, 0, SEEK_SET);
-	#else
-		// if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
-		typedef long length_type;
-
-		fseek(file, 0, SEEK_END);
-		length_type length = ftell(file);
-		fseek(file, 0, SEEK_SET);
-	#endif
-
-		// check for I/O errors
-		if (length < 0) return status_io_error;
-		
-		// check for overflow
-		size_t result = static_cast<size_t>(length);
-
-		if (static_cast<length_type>(result) != length) return status_out_of_memory;
-
-		// finalize
-		out_result = result;
-
-		return status_ok;
-	}
-
-	PUGI__FN xml_parse_result load_file_impl(xml_document& doc, FILE* file, unsigned int options, xml_encoding encoding)
-	{
-		if (!file) return make_parse_result(status_file_not_found);
-
-		// get file size (can result in I/O errors)
-		size_t size = 0;
-		xml_parse_status size_status = get_file_size(file, size);
-
-		if (size_status != status_ok)
-		{
-			fclose(file);
-			return make_parse_result(size_status);
-		}
-		
-		// allocate buffer for the whole file
-		char* contents = static_cast<char*>(xml_memory::allocate(size > 0 ? size : 1));
-
-		if (!contents)
-		{
-			fclose(file);
-			return make_parse_result(status_out_of_memory);
-		}
-
-		// read file in memory
-		size_t read_size = fread(contents, 1, size, file);
-		fclose(file);
-
-		if (read_size != size)
-		{
-			xml_memory::deallocate(contents);
-			return make_parse_result(status_io_error);
-		}
-		
-		return doc.load_buffer_inplace_own(contents, size, options, encoding);
-	}
-
-#ifndef PUGIXML_NO_STL
-	template <typename T> struct xml_stream_chunk
-	{
-		static xml_stream_chunk* create()
-		{
-			void* memory = xml_memory::allocate(sizeof(xml_stream_chunk));
-			
-			return new (memory) xml_stream_chunk();
-		}
-
-		static void destroy(void* ptr)
-		{
-			xml_stream_chunk* chunk = static_cast<xml_stream_chunk*>(ptr);
-
-			// free chunk chain
-			while (chunk)
-			{
-				xml_stream_chunk* next = chunk->next;
-				xml_memory::deallocate(chunk);
-				chunk = next;
-			}
-		}
-
-		xml_stream_chunk(): next(0), size(0)
-		{
-		}
-
-		xml_stream_chunk* next;
-		size_t size;
-
-		T data[xml_memory_page_size / sizeof(T)];
-	};
-
-	template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
-	{
-		buffer_holder chunks(0, xml_stream_chunk<T>::destroy);
-
-		// read file to a chunk list
-		size_t total = 0;
-		xml_stream_chunk<T>* last = 0;
-
-		while (!stream.eof())
-		{
-			// allocate new chunk
-			xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create();
-			if (!chunk) return status_out_of_memory;
-
-			// append chunk to list
-			if (last) last = last->next = chunk;
-			else chunks.data = last = chunk;
-
-			// read data to chunk
-			stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
-			chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
-
-			// read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors
-			if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
-
-			// guard against huge files (chunk size is small enough to make this overflow check work)
-			if (total + chunk->size < total) return status_out_of_memory;
-			total += chunk->size;
-		}
-
-		// copy chunk list to a contiguous buffer
-		char* buffer = static_cast<char*>(xml_memory::allocate(total));
-		if (!buffer) return status_out_of_memory;
-
-		char* write = buffer;
-
-		for (xml_stream_chunk<T>* chunk = static_cast<xml_stream_chunk<T>*>(chunks.data); chunk; chunk = chunk->next)
-		{
-			assert(write + chunk->size <= buffer + total);
-			memcpy(write, chunk->data, chunk->size);
-			write += chunk->size;
-		}
-
-		assert(write == buffer + total);
-
-		// return buffer
-		*out_buffer = buffer;
-		*out_size = total;
-
-		return status_ok;
-	}
-
-	template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
-	{
-		// get length of remaining data in stream
-		typename std::basic_istream<T>::pos_type pos = stream.tellg();
-		stream.seekg(0, std::ios::end);
-		std::streamoff length = stream.tellg() - pos;
-		stream.seekg(pos);
-
-		if (stream.fail() || pos < 0) return status_io_error;
-
-		// guard against huge files
-		size_t read_length = static_cast<size_t>(length);
-
-		if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory;
-
-		// read stream data into memory (guard against stream exceptions with buffer holder)
-		buffer_holder buffer(xml_memory::allocate((read_length > 0 ? read_length : 1) * sizeof(T)), xml_memory::deallocate);
-		if (!buffer.data) return status_out_of_memory;
-
-		stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
-
-		// read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
-		if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
-
-		// return buffer
-		size_t actual_length = static_cast<size_t>(stream.gcount());
-		assert(actual_length <= read_length);
-
-		*out_buffer = buffer.release();
-		*out_size = actual_length * sizeof(T);
-
-		return status_ok;
-	}
-
-	template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document& doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding)
-	{
-		void* buffer = 0;
-		size_t size = 0;
-
-		// load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)
-		xml_parse_status status = (stream.tellg() < 0) ? load_stream_data_noseek(stream, &buffer, &size) : load_stream_data_seek(stream, &buffer, &size);
-		if (status != status_ok) return make_parse_result(status);
-
-		return doc.load_buffer_inplace_own(buffer, size, options, encoding);
-	}
-#endif
-
-#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && !defined(__STRICT_ANSI__))
-	PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
-	{
-		return _wfopen(path, mode);
-	}
-#else
-	PUGI__FN char* convert_path_heap(const wchar_t* str)
-	{
-		assert(str);
-
-		// first pass: get length in utf8 characters
-		size_t length = wcslen(str);
-		size_t size = as_utf8_begin(str, length);
-
-		// allocate resulting string
-		char* result = static_cast<char*>(xml_memory::allocate(size + 1));
-		if (!result) return 0;
-
-		// second pass: convert to utf8
-		as_utf8_end(result, size, str, length);
-
-		return result;
-	}
-
-	PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
-	{
-		// there is no standard function to open wide paths, so our best bet is to try utf8 path
-		char* path_utf8 = convert_path_heap(path);
-		if (!path_utf8) return 0;
-
-		// convert mode to ASCII (we mirror _wfopen interface)
-		char mode_ascii[4] = {0};
-		for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
-
-		// try to open the utf8 path
-		FILE* result = fopen(path_utf8, mode_ascii);
-
-		// free dummy buffer
-		xml_memory::deallocate(path_utf8);
-
-		return result;
-	}
-#endif
-
-	PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding)
-	{
-		if (!file) return false;
-
-		xml_writer_file writer(file);
-		doc.save(writer, indent, flags, encoding);
-
-		int result = ferror(file);
-
-		fclose(file);
-
-		return result == 0;
-	}
-PUGI__NS_END
-
-namespace pugi
-{
-	PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_)
-	{
-	}
-
-	PUGI__FN void xml_writer_file::write(const void* data, size_t size)
-	{
-		size_t result = fwrite(data, 1, size, static_cast<FILE*>(file));
-		(void)!result; // unfortunately we can't do proper error handling here
-	}
-
-#ifndef PUGIXML_NO_STL
-	PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
-	{
-	}
-
-	PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
-	{
-	}
-
-	PUGI__FN void xml_writer_stream::write(const void* data, size_t size)
-	{
-		if (narrow_stream)
-		{
-			assert(!wide_stream);
-			narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
-		}
-		else
-		{
-			assert(wide_stream);
-			assert(size % sizeof(wchar_t) == 0);
-
-			wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
-		}
-	}
-#endif
-
-	PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0)
-	{
-	}
-	
-	PUGI__FN xml_tree_walker::~xml_tree_walker()
-	{
-	}
-
-	PUGI__FN int xml_tree_walker::depth() const
-	{
-		return _depth;
-	}
-
-	PUGI__FN bool xml_tree_walker::begin(xml_node&)
-	{
-		return true;
-	}
-
-	PUGI__FN bool xml_tree_walker::end(xml_node&)
-	{
-		return true;
-	}
-
-	PUGI__FN xml_attribute::xml_attribute(): _attr(0)
-	{
-	}
-
-	PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr)
-	{
-	}
-
-	PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***)
-	{
-	}
-
-	PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const
-	{
-		return _attr ? unspecified_bool_xml_attribute : 0;
-	}
-
-	PUGI__FN bool xml_attribute::operator!() const
-	{
-		return !_attr;
-	}
-
-	PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const
-	{
-		return (_attr == r._attr);
-	}
-	
-	PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const
-	{
-		return (_attr != r._attr);
-	}
-
-	PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const
-	{
-		return (_attr < r._attr);
-	}
-	
-	PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const
-	{
-		return (_attr > r._attr);
-	}
-	
-	PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const
-	{
-		return (_attr <= r._attr);
-	}
-	
-	PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const
-	{
-		return (_attr >= r._attr);
-	}
-
-	PUGI__FN xml_attribute xml_attribute::next_attribute() const
-	{
-		return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute();
-	}
-
-	PUGI__FN xml_attribute xml_attribute::previous_attribute() const
-	{
-		return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute();
-	}
-
-	PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const
-	{
-		return (_attr && _attr->value) ? _attr->value : def;
-	}
-
-	PUGI__FN int xml_attribute::as_int(int def) const
-	{
-		return impl::get_value_int(_attr ? _attr->value : 0, def);
-	}
-
-	PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const
-	{
-		return impl::get_value_uint(_attr ? _attr->value : 0, def);
-	}
-
-	PUGI__FN double xml_attribute::as_double(double def) const
-	{
-		return impl::get_value_double(_attr ? _attr->value : 0, def);
-	}
-
-	PUGI__FN float xml_attribute::as_float(float def) const
-	{
-		return impl::get_value_float(_attr ? _attr->value : 0, def);
-	}
-
-	PUGI__FN bool xml_attribute::as_bool(bool def) const
-	{
-		return impl::get_value_bool(_attr ? _attr->value : 0, def);
-	}
-
-	PUGI__FN bool xml_attribute::empty() const
-	{
-		return !_attr;
-	}
-
-	PUGI__FN const char_t* xml_attribute::name() const
-	{
-		return (_attr && _attr->name) ? _attr->name : PUGIXML_TEXT("");
-	}
-
-	PUGI__FN const char_t* xml_attribute::value() const
-	{
-		return (_attr && _attr->value) ? _attr->value : PUGIXML_TEXT("");
-	}
-
-	PUGI__FN size_t xml_attribute::hash_value() const
-	{
-		return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));
-	}
-
-	PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const
-	{
-		return _attr;
-	}
-
-	PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs)
-	{
-		set_value(rhs);
-		return *this;
-	}
-	
-	PUGI__FN xml_attribute& xml_attribute::operator=(int rhs)
-	{
-		set_value(rhs);
-		return *this;
-	}
-
-	PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs)
-	{
-		set_value(rhs);
-		return *this;
-	}
-
-	PUGI__FN xml_attribute& xml_attribute::operator=(double rhs)
-	{
-		set_value(rhs);
-		return *this;
-	}
-	
-	PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs)
-	{
-		set_value(rhs);
-		return *this;
-	}
-
-	PUGI__FN bool xml_attribute::set_name(const char_t* rhs)
-	{
-		if (!_attr) return false;
-		
-		return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs);
-	}
-		
-	PUGI__FN bool xml_attribute::set_value(const char_t* rhs)
-	{
-		if (!_attr) return false;
-
-		return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
-	}
-
-	PUGI__FN bool xml_attribute::set_value(int rhs)
-	{
-		if (!_attr) return false;
-
-		return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
-	}
-
-	PUGI__FN bool xml_attribute::set_value(unsigned int rhs)
-	{
-		if (!_attr) return false;
-
-		return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
-	}
-
-	PUGI__FN bool xml_attribute::set_value(double rhs)
-	{
-		if (!_attr) return false;
-
-		return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
-	}
-	
-	PUGI__FN bool xml_attribute::set_value(bool rhs)
-	{
-		if (!_attr) return false;
-
-		return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
-	}
-
-#ifdef __BORLANDC__
-	PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs)
-	{
-		return (bool)lhs && rhs;
-	}
-
-	PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs)
-	{
-		return (bool)lhs || rhs;
-	}
-#endif
-
-	PUGI__FN xml_node::xml_node(): _root(0)
-	{
-	}
-
-	PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p)
-	{
-	}
-	
-	PUGI__FN static void unspecified_bool_xml_node(xml_node***)
-	{
-	}
-
-	PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const
-	{
-		return _root ? unspecified_bool_xml_node : 0;
-	}
-
-	PUGI__FN bool xml_node::operator!() const
-	{
-		return !_root;
-	}
-
-	PUGI__FN xml_node::iterator xml_node::begin() const
-	{
-		return iterator(_root ? _root->first_child : 0, _root);
-	}
-
-	PUGI__FN xml_node::iterator xml_node::end() const
-	{
-		return iterator(0, _root);
-	}
-	
-	PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const
-	{
-		return attribute_iterator(_root ? _root->first_attribute : 0, _root);
-	}
-
-	PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const
-	{
-		return attribute_iterator(0, _root);
-	}
-	
-	PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const
-	{
-		return xml_object_range<xml_node_iterator>(begin(), end());
-	}
-
-	PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const
-	{
-		return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_), name_), xml_named_node_iterator());
-	}
-
-	PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const
-	{
-		return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end());
-	}
-
-	PUGI__FN bool xml_node::operator==(const xml_node& r) const
-	{
-		return (_root == r._root);
-	}
-
-	PUGI__FN bool xml_node::operator!=(const xml_node& r) const
-	{
-		return (_root != r._root);
-	}
-
-	PUGI__FN bool xml_node::operator<(const xml_node& r) const
-	{
-		return (_root < r._root);
-	}
-	
-	PUGI__FN bool xml_node::operator>(const xml_node& r) const
-	{
-		return (_root > r._root);
-	}
-	
-	PUGI__FN bool xml_node::operator<=(const xml_node& r) const
-	{
-		return (_root <= r._root);
-	}
-	
-	PUGI__FN bool xml_node::operator>=(const xml_node& r) const
-	{
-		return (_root >= r._root);
-	}
-
-	PUGI__FN bool xml_node::empty() const
-	{
-		return !_root;
-	}
-	
-	PUGI__FN const char_t* xml_node::name() const
-	{
-		return (_root && _root->name) ? _root->name : PUGIXML_TEXT("");
-	}
-
-	PUGI__FN xml_node_type xml_node::type() const
-	{
-		return _root ? static_cast<xml_node_type>((_root->header & impl::xml_memory_page_type_mask) + 1) : node_null;
-	}
-	
-	PUGI__FN const char_t* xml_node::value() const
-	{
-		return (_root && _root->value) ? _root->value : PUGIXML_TEXT("");
-	}
-	
-	PUGI__FN xml_node xml_node::child(const char_t* name_) const
-	{
-		if (!_root) return xml_node();
-
-		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
-			if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
-
-		return xml_node();
-	}
-
-	PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const
-	{
-		if (!_root) return xml_attribute();
-
-		for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute)
-			if (i->name && impl::strequal(name_, i->name))
-				return xml_attribute(i);
-		
-		return xml_attribute();
-	}
-	
-	PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const
-	{
-		if (!_root) return xml_node();
-		
-		for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
-			if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
-
-		return xml_node();
-	}
-
-	PUGI__FN xml_node xml_node::next_sibling() const
-	{
-		if (!_root) return xml_node();
-		
-		if (_root->next_sibling) return xml_node(_root->next_sibling);
-		else return xml_node();
-	}
-
-	PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const
-	{
-		if (!_root) return xml_node();
-		
-		for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c)
-			if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
-
-		return xml_node();
-	}
-
-	PUGI__FN xml_node xml_node::previous_sibling() const
-	{
-		if (!_root) return xml_node();
-		
-		if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c);
-		else return xml_node();
-	}
-
-	PUGI__FN xml_node xml_node::parent() const
-	{
-		return _root ? xml_node(_root->parent) : xml_node();
-	}
-
-	PUGI__FN xml_node xml_node::root() const
-	{
-		if (!_root) return xml_node();
-
-		impl::xml_memory_page* page = reinterpret_cast<impl::xml_memory_page*>(_root->header & impl::xml_memory_page_pointer_mask);
-
-		return xml_node(static_cast<impl::xml_document_struct*>(page->allocator));
-	}
-
-	PUGI__FN xml_text xml_node::text() const
-	{
-		return xml_text(_root);
-	}
-
-	PUGI__FN const char_t* xml_node::child_value() const
-	{
-		if (!_root) return PUGIXML_TEXT("");
-		
-		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
-			if (i->value && impl::is_text_node(i))
-				return i->value;
-
-		return PUGIXML_TEXT("");
-	}
-
-	PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const
-	{
-		return child(name_).child_value();
-	}
-
-	PUGI__FN xml_attribute xml_node::first_attribute() const
-	{
-		return _root ? xml_attribute(_root->first_attribute) : xml_attribute();
-	}
-
-	PUGI__FN xml_attribute xml_node::last_attribute() const
-	{
-		return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute();
-	}
-
-	PUGI__FN xml_node xml_node::first_child() const
-	{
-		return _root ? xml_node(_root->first_child) : xml_node();
-	}
-
-	PUGI__FN xml_node xml_node::last_child() const
-	{
-		return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node();
-	}
-
-	PUGI__FN bool xml_node::set_name(const char_t* rhs)
-	{
-		switch (type())
-		{
-		case node_pi:
-		case node_declaration:
-		case node_element:
-			return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs);
-
-		default:
-			return false;
-		}
-	}
-		
-	PUGI__FN bool xml_node::set_value(const char_t* rhs)
-	{
-		switch (type())
-		{
-		case node_pi:
-		case node_cdata:
-		case node_pcdata:
-		case node_comment:
-		case node_doctype:
-			return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs);
-
-		default:
-			return false;
-		}
-	}
-
-	PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_)
-	{
-		if (type() != node_element && type() != node_declaration) return xml_attribute();
-		
-		xml_attribute a(impl::append_attribute_ll(_root, impl::get_allocator(_root)));
-		a.set_name(name_);
-		
-		return a;
-	}
-
-	PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_)
-	{
-		if (type() != node_element && type() != node_declaration) return xml_attribute();
-		
-		xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root)));
-		if (!a) return xml_attribute();
-
-		a.set_name(name_);
-		
-		xml_attribute_struct* head = _root->first_attribute;
-
-		if (head)
-		{
-			a._attr->prev_attribute_c = head->prev_attribute_c;
-			head->prev_attribute_c = a._attr;
-		}
-		else
-			a._attr->prev_attribute_c = a._attr;
-		
-		a._attr->next_attribute = head;
-		_root->first_attribute = a._attr;
-				
-		return a;
-	}
-
-	PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr)
-	{
-		if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute();
-		
-		// check that attribute belongs to *this
-		xml_attribute_struct* cur = attr._attr;
-
-		while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c;
-
-		if (cur != _root->first_attribute) return xml_attribute();
-
-		xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root)));
-		if (!a) return xml_attribute();
-
-		a.set_name(name_);
-
-		if (attr._attr->prev_attribute_c->next_attribute)
-			attr._attr->prev_attribute_c->next_attribute = a._attr;
-		else
-			_root->first_attribute = a._attr;
-		
-		a._attr->prev_attribute_c = attr._attr->prev_attribute_c;
-		a._attr->next_attribute = attr._attr;
-		attr._attr->prev_attribute_c = a._attr;
-				
-		return a;
-	}
-
-	PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr)
-	{
-		if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute();
-		
-		// check that attribute belongs to *this
-		xml_attribute_struct* cur = attr._attr;
-
-		while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c;
-
-		if (cur != _root->first_attribute) return xml_attribute();
-
-		xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root)));
-		if (!a) return xml_attribute();
-
-		a.set_name(name_);
-
-		if (attr._attr->next_attribute)
-			attr._attr->next_attribute->prev_attribute_c = a._attr;
-		else
-			_root->first_attribute->prev_attribute_c = a._attr;
-		
-		a._attr->next_attribute = attr._attr->next_attribute;
-		a._attr->prev_attribute_c = attr._attr;
-		attr._attr->next_attribute = a._attr;
-
-		return a;
-	}
-
-	PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto)
-	{
-		if (!proto) return xml_attribute();
-
-		xml_attribute result = append_attribute(proto.name());
-		result.set_value(proto.value());
-
-		return result;
-	}
-
-	PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto)
-	{
-		if (!proto) return xml_attribute();
-
-		xml_attribute result = prepend_attribute(proto.name());
-		result.set_value(proto.value());
-
-		return result;
-	}
-
-	PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr)
-	{
-		if (!proto) return xml_attribute();
-
-		xml_attribute result = insert_attribute_after(proto.name(), attr);
-		result.set_value(proto.value());
-
-		return result;
-	}
-
-	PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr)
-	{
-		if (!proto) return xml_attribute();
-
-		xml_attribute result = insert_attribute_before(proto.name(), attr);
-		result.set_value(proto.value());
-
-		return result;
-	}
-
-	PUGI__FN xml_node xml_node::append_child(xml_node_type type_)
-	{
-		if (!impl::allow_insert_child(this->type(), type_)) return xml_node();
-		
-		xml_node n(impl::append_node(_root, impl::get_allocator(_root), type_));
-
-		if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
-
-		return n;
-	}
-
-	PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_)
-	{
-		if (!impl::allow_insert_child(this->type(), type_)) return xml_node();
-		
-		xml_node n(impl::allocate_node(impl::get_allocator(_root), type_));
-		if (!n) return xml_node();
-
-		n._root->parent = _root;
-
-		xml_node_struct* head = _root->first_child;
-
-		if (head)
-		{
-			n._root->prev_sibling_c = head->prev_sibling_c;
-			head->prev_sibling_c = n._root;
-		}
-		else
-			n._root->prev_sibling_c = n._root;
-		
-		n._root->next_sibling = head;
-		_root->first_child = n._root;
-				
-		if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
-
-		return n;
-	}
-
-	PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node)
-	{
-		if (!impl::allow_insert_child(this->type(), type_)) return xml_node();
-		if (!node._root || node._root->parent != _root) return xml_node();
-	
-		xml_node n(impl::allocate_node(impl::get_allocator(_root), type_));
-		if (!n) return xml_node();
-
-		n._root->parent = _root;
-		
-		if (node._root->prev_sibling_c->next_sibling)
-			node._root->prev_sibling_c->next_sibling = n._root;
-		else
-			_root->first_child = n._root;
-		
-		n._root->prev_sibling_c = node._root->prev_sibling_c;
-		n._root->next_sibling = node._root;
-		node._root->prev_sibling_c = n._root;
-
-		if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
-
-		return n;
-	}
-
-	PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node)
-	{
-		if (!impl::allow_insert_child(this->type(), type_)) return xml_node();
-		if (!node._root || node._root->parent != _root) return xml_node();
-	
-		xml_node n(impl::allocate_node(impl::get_allocator(_root), type_));
-		if (!n) return xml_node();
-
-		n._root->parent = _root;
-	
-		if (node._root->next_sibling)
-			node._root->next_sibling->prev_sibling_c = n._root;
-		else
-			_root->first_child->prev_sibling_c = n._root;
-		
-		n._root->next_sibling = node._root->next_sibling;
-		n._root->prev_sibling_c = node._root;
-		node._root->next_sibling = n._root;
-
-		if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
-
-		return n;
-	}
-
-	PUGI__FN xml_node xml_node::append_child(const char_t* name_)
-	{
-		xml_node result = append_child(node_element);
-
-		result.set_name(name_);
-
-		return result;
-	}
-
-	PUGI__FN xml_node xml_node::prepend_child(const char_t* name_)
-	{
-		xml_node result = prepend_child(node_element);
-
-		result.set_name(name_);
-
-		return result;
-	}
-
-	PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node)
-	{
-		xml_node result = insert_child_after(node_element, node);
-
-		result.set_name(name_);
-
-		return result;
-	}
-
-	PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node)
-	{
-		xml_node result = insert_child_before(node_element, node);
-
-		result.set_name(name_);
-
-		return result;
-	}
-
-	PUGI__FN xml_node xml_node::append_copy(const xml_node& proto)
-	{
-		xml_node result = append_child(proto.type());
-
-		if (result) impl::recursive_copy_skip(result, proto, result);
-
-		return result;
-	}
-
-	PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto)
-	{
-		xml_node result = prepend_child(proto.type());
-
-		if (result) impl::recursive_copy_skip(result, proto, result);
-
-		return result;
-	}
-
-	PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node)
-	{
-		xml_node result = insert_child_after(proto.type(), node);
-
-		if (result) impl::recursive_copy_skip(result, proto, result);
-
-		return result;
-	}
-
-	PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node)
-	{
-		xml_node result = insert_child_before(proto.type(), node);
-
-		if (result) impl::recursive_copy_skip(result, proto, result);
-
-		return result;
-	}
-
-	PUGI__FN bool xml_node::remove_attribute(const char_t* name_)
-	{
-		return remove_attribute(attribute(name_));
-	}
-
-	PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a)
-	{
-		if (!_root || !a._attr) return false;
-
-		// check that attribute belongs to *this
-		xml_attribute_struct* attr = a._attr;
-
-		while (attr->prev_attribute_c->next_attribute) attr = attr->prev_attribute_c;
-
-		if (attr != _root->first_attribute) return false;
-
-		if (a._attr->next_attribute) a._attr->next_attribute->prev_attribute_c = a._attr->prev_attribute_c;
-		else if (_root->first_attribute) _root->first_attribute->prev_attribute_c = a._attr->prev_attribute_c;
-		
-		if (a._attr->prev_attribute_c->next_attribute) a._attr->prev_attribute_c->next_attribute = a._attr->next_attribute;
-		else _root->first_attribute = a._attr->next_attribute;
-
-		impl::destroy_attribute(a._attr, impl::get_allocator(_root));
-
-		return true;
-	}
-
-	PUGI__FN bool xml_node::remove_child(const char_t* name_)
-	{
-		return remove_child(child(name_));
-	}
-
-	PUGI__FN bool xml_node::remove_child(const xml_node& n)
-	{
-		if (!_root || !n._root || n._root->parent != _root) return false;
-
-		if (n._root->next_sibling) n._root->next_sibling->prev_sibling_c = n._root->prev_sibling_c;
-		else if (_root->first_child) _root->first_child->prev_sibling_c = n._root->prev_sibling_c;
-		
-		if (n._root->prev_sibling_c->next_sibling) n._root->prev_sibling_c->next_sibling = n._root->next_sibling;
-		else _root->first_child = n._root->next_sibling;
-		
-		impl::destroy_node(n._root, impl::get_allocator(_root));
-
-		return true;
-	}
-
-	PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const
-	{
-		if (!_root) return xml_node();
-		
-		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
-			if (i->name && impl::strequal(name_, i->name))
-			{
-				for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
-					if (impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value))
-						return xml_node(i);
-			}
-
-		return xml_node();
-	}
-
-	PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const
-	{
-		if (!_root) return xml_node();
-		
-		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
-			for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
-				if (impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value))
-					return xml_node(i);
-
-		return xml_node();
-	}
-
-#ifndef PUGIXML_NO_STL
-	PUGI__FN string_t xml_node::path(char_t delimiter) const
-	{
-		xml_node cursor = *this; // Make a copy.
-		
-		string_t result = cursor.name();
-
-		while (cursor.parent())
-		{
-			cursor = cursor.parent();
-			
-			string_t temp = cursor.name();
-			temp += delimiter;
-			temp += result;
-			result.swap(temp);
-		}
-
-		return result;
-	}
-#endif
-
-	PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const
-	{
-		xml_node found = *this; // Current search context.
-
-		if (!_root || !path_ || !path_[0]) return found;
-
-		if (path_[0] == delimiter)
-		{
-			// Absolute path; e.g. '/foo/bar'
-			found = found.root();
-			++path_;
-		}
-
-		const char_t* path_segment = path_;
-
-		while (*path_segment == delimiter) ++path_segment;
-
-		const char_t* path_segment_end = path_segment;
-
-		while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end;
-
-		if (path_segment == path_segment_end) return found;
-
-		const char_t* next_segment = path_segment_end;
-
-		while (*next_segment == delimiter) ++next_segment;
-
-		if (*path_segment == '.' && path_segment + 1 == path_segment_end)
-			return found.first_element_by_path(next_segment, delimiter);
-		else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end)
-			return found.parent().first_element_by_path(next_segment, delimiter);
-		else
-		{
-			for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling)
-			{
-				if (j->name && impl::strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment)))
-				{
-					xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter);
-
-					if (subsearch) return subsearch;
-				}
-			}
-
-			return xml_node();
-		}
-	}
-
-	PUGI__FN bool xml_node::traverse(xml_tree_walker& walker)
-	{
-		walker._depth = -1;
-		
-		xml_node arg_begin = *this;
-		if (!walker.begin(arg_begin)) return false;
-
-		xml_node cur = first_child();
-				
-		if (cur)
-		{
-			++walker._depth;
-
-			do 
-			{
-				xml_node arg_for_each = cur;
-				if (!walker.for_each(arg_for_each))
-					return false;
-						
-				if (cur.first_child())
-				{
-					++walker._depth;
-					cur = cur.first_child();
-				}
-				else if (cur.next_sibling())
-					cur = cur.next_sibling();
-				else
-				{
-					// Borland C++ workaround
-					while (!cur.next_sibling() && cur != *this && !cur.parent().empty())
-					{
-						--walker._depth;
-						cur = cur.parent();
-					}
-						
-					if (cur != *this)
-						cur = cur.next_sibling();
-				}
-			}
-			while (cur && cur != *this);
-		}
-
-		assert(walker._depth == -1);
-
-		xml_node arg_end = *this;
-		return walker.end(arg_end);
-	}
-
-	PUGI__FN size_t xml_node::hash_value() const
-	{
-		return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct));
-	}
-
-	PUGI__FN xml_node_struct* xml_node::internal_object() const
-	{
-		return _root;
-	}
-
-	PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
-	{
-		if (!_root) return;
-
-		impl::xml_buffered_writer buffered_writer(writer, encoding);
-
-		impl::node_output(buffered_writer, *this, indent, flags, depth);
-	}
-
-#ifndef PUGIXML_NO_STL
-	PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
-	{
-		xml_writer_stream writer(stream);
-
-		print(writer, indent, flags, encoding, depth);
-	}
-
-	PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const
-	{
-		xml_writer_stream writer(stream);
-
-		print(writer, indent, flags, encoding_wchar, depth);
-	}
-#endif
-
-	PUGI__FN ptrdiff_t xml_node::offset_debug() const
-	{
-		xml_node_struct* r = root()._root;
-
-		if (!r) return -1;
-
-		const char_t* buffer = static_cast<impl::xml_document_struct*>(r)->buffer;
-
-		if (!buffer) return -1;
-
-		switch (type())
-		{
-		case node_document:
-			return 0;
-
-		case node_element:
-		case node_declaration:
-		case node_pi:
-			return (_root->header & impl::xml_memory_page_name_allocated_mask) ? -1 : _root->name - buffer;
-
-		case node_pcdata:
-		case node_cdata:
-		case node_comment:
-		case node_doctype:
-			return (_root->header & impl::xml_memory_page_value_allocated_mask) ? -1 : _root->value - buffer;
-
-		default:
-			return -1;
-		}
-	}
-
-#ifdef __BORLANDC__
-	PUGI__FN bool operator&&(const xml_node& lhs, bool rhs)
-	{
-		return (bool)lhs && rhs;
-	}
-
-	PUGI__FN bool operator||(const xml_node& lhs, bool rhs)
-	{
-		return (bool)lhs || rhs;
-	}
-#endif
-
-	PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root)
-	{
-	}
-
-	PUGI__FN xml_node_struct* xml_text::_data() const
-	{
-		if (!_root || impl::is_text_node(_root)) return _root;
-
-		for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling)
-			if (impl::is_text_node(node))
-				return node;
-
-		return 0;
-	}
-
-	PUGI__FN xml_node_struct* xml_text::_data_new()
-	{
-		xml_node_struct* d = _data();
-		if (d) return d;
-
-		return xml_node(_root).append_child(node_pcdata).internal_object();
-	}
-
-	PUGI__FN xml_text::xml_text(): _root(0)
-	{
-	}
-
-	PUGI__FN static void unspecified_bool_xml_text(xml_text***)
-	{
-	}
-
-	PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const
-	{
-		return _data() ? unspecified_bool_xml_text : 0;
-	}
-
-	PUGI__FN bool xml_text::operator!() const
-	{
-		return !_data();
-	}
-
-	PUGI__FN bool xml_text::empty() const
-	{
-		return _data() == 0;
-	}
-
-	PUGI__FN const char_t* xml_text::get() const
-	{
-		xml_node_struct* d = _data();
-
-		return (d && d->value) ? d->value : PUGIXML_TEXT("");
-	}
-
-	PUGI__FN const char_t* xml_text::as_string(const char_t* def) const
-	{
-		xml_node_struct* d = _data();
-
-		return (d && d->value) ? d->value : def;
-	}
-
-	PUGI__FN int xml_text::as_int(int def) const
-	{
-		xml_node_struct* d = _data();
-
-		return impl::get_value_int(d ? d->value : 0, def);
-	}
-
-	PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const
-	{
-		xml_node_struct* d = _data();
-
-		return impl::get_value_uint(d ? d->value : 0, def);
-	}
-
-	PUGI__FN double xml_text::as_double(double def) const
-	{
-		xml_node_struct* d = _data();
-
-		return impl::get_value_double(d ? d->value : 0, def);
-	}
-
-	PUGI__FN float xml_text::as_float(float def) const
-	{
-		xml_node_struct* d = _data();
-
-		return impl::get_value_float(d ? d->value : 0, def);
-	}
-
-	PUGI__FN bool xml_text::as_bool(bool def) const
-	{
-		xml_node_struct* d = _data();
-
-		return impl::get_value_bool(d ? d->value : 0, def);
-	}
-
-	PUGI__FN bool xml_text::set(const char_t* rhs)
-	{
-		xml_node_struct* dn = _data_new();
-
-		return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
-	}
-
-	PUGI__FN bool xml_text::set(int rhs)
-	{
-		xml_node_struct* dn = _data_new();
-
-		return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
-	}
-
-	PUGI__FN bool xml_text::set(unsigned int rhs)
-	{
-		xml_node_struct* dn = _data_new();
-
-		return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
-	}
-
-	PUGI__FN bool xml_text::set(double rhs)
-	{
-		xml_node_struct* dn = _data_new();
-
-		return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
-	}
-
-	PUGI__FN bool xml_text::set(bool rhs)
-	{
-		xml_node_struct* dn = _data_new();
-
-		return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
-	}
-
-	PUGI__FN xml_text& xml_text::operator=(const char_t* rhs)
-	{
-		set(rhs);
-		return *this;
-	}
-
-	PUGI__FN xml_text& xml_text::operator=(int rhs)
-	{
-		set(rhs);
-		return *this;
-	}
-
-	PUGI__FN xml_text& xml_text::operator=(unsigned int rhs)
-	{
-		set(rhs);
-		return *this;
-	}
-
-	PUGI__FN xml_text& xml_text::operator=(double rhs)
-	{
-		set(rhs);
-		return *this;
-	}
-
-	PUGI__FN xml_text& xml_text::operator=(bool rhs)
-	{
-		set(rhs);
-		return *this;
-	}
-
-	PUGI__FN xml_node xml_text::data() const
-	{
-		return xml_node(_data());
-	}
-
-#ifdef __BORLANDC__
-	PUGI__FN bool operator&&(const xml_text& lhs, bool rhs)
-	{
-		return (bool)lhs && rhs;
-	}
-
-	PUGI__FN bool operator||(const xml_text& lhs, bool rhs)
-	{
-		return (bool)lhs || rhs;
-	}
-#endif
-
-	PUGI__FN xml_node_iterator::xml_node_iterator()
-	{
-	}
-
-	PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent())
-	{
-	}
-
-	PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
-	{
-	}
-
-	PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const
-	{
-		return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
-	}
-	
-	PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const
-	{
-		return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
-	}
-
-	PUGI__FN xml_node& xml_node_iterator::operator*() const
-	{
-		assert(_wrap._root);
-		return _wrap;
-	}
-
-	PUGI__FN xml_node* xml_node_iterator::operator->() const
-	{
-		assert(_wrap._root);
-		return const_cast<xml_node*>(&_wrap); // BCC32 workaround
-	}
-
-	PUGI__FN const xml_node_iterator& xml_node_iterator::operator++()
-	{
-		assert(_wrap._root);
-		_wrap._root = _wrap._root->next_sibling;
-		return *this;
-	}
-
-	PUGI__FN xml_node_iterator xml_node_iterator::operator++(int)
-	{
-		xml_node_iterator temp = *this;
-		++*this;
-		return temp;
-	}
-
-	PUGI__FN const xml_node_iterator& xml_node_iterator::operator--()
-	{
-		_wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child();
-		return *this;
-	}
-
-	PUGI__FN xml_node_iterator xml_node_iterator::operator--(int)
-	{
-		xml_node_iterator temp = *this;
-		--*this;
-		return temp;
-	}
-
-	PUGI__FN xml_attribute_iterator::xml_attribute_iterator()
-	{
-	}
-
-	PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent)
-	{
-	}
-
-	PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
-	{
-	}
-
-	PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const
-	{
-		return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root;
-	}
-	
-	PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const
-	{
-		return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root;
-	}
-
-	PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const
-	{
-		assert(_wrap._attr);
-		return _wrap;
-	}
-
-	PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const
-	{
-		assert(_wrap._attr);
-		return const_cast<xml_attribute*>(&_wrap); // BCC32 workaround
-	}
-
-	PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator++()
-	{
-		assert(_wrap._attr);
-		_wrap._attr = _wrap._attr->next_attribute;
-		return *this;
-	}
-
-	PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int)
-	{
-		xml_attribute_iterator temp = *this;
-		++*this;
-		return temp;
-	}
-
-	PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator--()
-	{
-		_wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute();
-		return *this;
-	}
-
-	PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int)
-	{
-		xml_attribute_iterator temp = *this;
-		--*this;
-		return temp;
-	}
-
-	PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0)
-	{
-	}
-
-	PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _node(node), _name(name)
-	{
-	}
-
-	PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const
-	{
-		return _node == rhs._node;
-	}
-
-	PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const
-	{
-		return _node != rhs._node;
-	}
-
-	PUGI__FN xml_node& xml_named_node_iterator::operator*() const
-	{
-		assert(_node._root);
-		return _node;
-	}
-
-	PUGI__FN xml_node* xml_named_node_iterator::operator->() const
-	{
-		assert(_node._root);
-		return const_cast<xml_node*>(&_node); // BCC32 workaround
-	}
-
-	PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator++()
-	{
-		assert(_node._root);
-		_node = _node.next_sibling(_name);
-		return *this;
-	}
-
-	PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int)
-	{
-		xml_named_node_iterator temp = *this;
-		++*this;
-		return temp;
-	}
-
-	PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto)
-	{
-	}
-
-	PUGI__FN xml_parse_result::operator bool() const
-	{
-		return status == status_ok;
-	}
-
-	PUGI__FN const char* xml_parse_result::description() const
-	{
-		switch (status)
-		{
-		case status_ok: return "No error";
-
-		case status_file_not_found: return "File was not found";
-		case status_io_error: return "Error reading from file/stream";
-		case status_out_of_memory: return "Could not allocate memory";
-		case status_internal_error: return "Internal error occurred";
-
-		case status_unrecognized_tag: return "Could not determine tag type";
-
-		case status_bad_pi: return "Error parsing document declaration/processing instruction";
-		case status_bad_comment: return "Error parsing comment";
-		case status_bad_cdata: return "Error parsing CDATA section";
-		case status_bad_doctype: return "Error parsing document type declaration";
-		case status_bad_pcdata: return "Error parsing PCDATA section";
-		case status_bad_start_element: return "Error parsing start element tag";
-		case status_bad_attribute: return "Error parsing element attribute";
-		case status_bad_end_element: return "Error parsing end element tag";
-		case status_end_element_mismatch: return "Start-end tags mismatch";
-
-		default: return "Unknown error";
-		}
-	}
-
-	PUGI__FN xml_document::xml_document(): _buffer(0)
-	{
-		create();
-	}
-
-	PUGI__FN xml_document::~xml_document()
-	{
-		destroy();
-	}
-
-	PUGI__FN void xml_document::reset()
-	{
-		destroy();
-		create();
-	}
-
-	PUGI__FN void xml_document::reset(const xml_document& proto)
-	{
-		reset();
-
-		for (xml_node cur = proto.first_child(); cur; cur = cur.next_sibling())
-			append_copy(cur);
-	}
-
-	PUGI__FN void xml_document::create()
-	{
-		// initialize sentinel page
-		PUGI__STATIC_ASSERT(offsetof(impl::xml_memory_page, data) + sizeof(impl::xml_document_struct) + impl::xml_memory_page_alignment <= sizeof(_memory));
-
-		// align upwards to page boundary
-		void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(_memory) + (impl::xml_memory_page_alignment - 1)) & ~(impl::xml_memory_page_alignment - 1));
-
-		// prepare page structure
-		impl::xml_memory_page* page = impl::xml_memory_page::construct(page_memory);
-
-		page->busy_size = impl::xml_memory_page_size;
-
-		// allocate new root
-		_root = new (page->data) impl::xml_document_struct(page);
-		_root->prev_sibling_c = _root;
-
-		// setup sentinel page
-		page->allocator = static_cast<impl::xml_document_struct*>(_root);
-	}
-
-	PUGI__FN void xml_document::destroy()
-	{
-		// destroy static storage
-		if (_buffer)
-		{
-			impl::xml_memory::deallocate(_buffer);
-			_buffer = 0;
-		}
-
-		// destroy dynamic storage, leave sentinel page (it's in static memory)
-		if (_root)
-		{
-			impl::xml_memory_page* root_page = reinterpret_cast<impl::xml_memory_page*>(_root->header & impl::xml_memory_page_pointer_mask);
-			assert(root_page && !root_page->prev && !root_page->memory);
-
-			// destroy all pages
-			for (impl::xml_memory_page* page = root_page->next; page; )
-			{
-				impl::xml_memory_page* next = page->next;
-
-				impl::xml_allocator::deallocate_page(page);
-
-				page = next;
-			}
-
-			// cleanup root page
-			root_page->allocator = 0;
-			root_page->next = 0;
-			root_page->busy_size = root_page->freed_size = 0;
-
-			_root = 0;
-		}
-	}
-
-#ifndef PUGIXML_NO_STL
-	PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding)
-	{
-		reset();
-
-		return impl::load_stream_impl(*this, stream, options, encoding);
-	}
-
-	PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
-	{
-		reset();
-
-		return impl::load_stream_impl(*this, stream, options, encoding_wchar);
-	}
-#endif
-
-	PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options)
-	{
-		// Force native encoding (skip autodetection)
-	#ifdef PUGIXML_WCHAR_MODE
-		xml_encoding encoding = encoding_wchar;
-	#else
-		xml_encoding encoding = encoding_utf8;
-	#endif
-
-		return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding);
-	}
-
-	PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding)
-	{
-		reset();
-
-		FILE* file = fopen(path_, "rb");
-
-		return impl::load_file_impl(*this, file, options, encoding);
-	}
-
-	PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding)
-	{
-		reset();
-
-		FILE* file = impl::open_file_wide(path_, L"rb");
-
-		return impl::load_file_impl(*this, file, options, encoding);
-	}
-
-	PUGI__FN xml_parse_result xml_document::load_buffer_impl(void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own)
-	{
-		reset();
-
-		// check input buffer
-		assert(contents || size == 0);
-
-		// get actual encoding
-		xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);
-
-		// get private buffer
-		char_t* buffer = 0;
-		size_t length = 0;
-
-		if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);
-		
-		// delete original buffer if we performed a conversion
-		if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);
-
-		// parse
-		xml_parse_result res = impl::xml_parser::parse(buffer, length, _root, options);
-
-		// remember encoding
-		res.encoding = buffer_encoding;
-
-		// grab onto buffer if it's our buffer, user is responsible for deallocating contens himself
-		if (own || buffer != contents) _buffer = buffer;
-
-		return res;
-	}
-
-	PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
-	{
-		return load_buffer_impl(const_cast<void*>(contents), size, options, encoding, false, false);
-	}
-
-	PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding)
-	{
-		return load_buffer_impl(contents, size, options, encoding, true, false);
-	}
-		
-	PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding)
-	{
-		return load_buffer_impl(contents, size, options, encoding, true, true);
-	}
-
-	PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const
-	{
-		impl::xml_buffered_writer buffered_writer(writer, encoding);
-
-		if ((flags & format_write_bom) && encoding != encoding_latin1)
-		{
-			// BOM always represents the codepoint U+FEFF, so just write it in native encoding
-		#ifdef PUGIXML_WCHAR_MODE
-			unsigned int bom = 0xfeff;
-			buffered_writer.write(static_cast<wchar_t>(bom));
-		#else
-			buffered_writer.write('\xef', '\xbb', '\xbf');
-		#endif
-		}
-
-		if (!(flags & format_no_declaration) && !impl::has_declaration(*this))
-		{
-			buffered_writer.write(PUGIXML_TEXT("<?xml version=\"1.0\""));
-			if (encoding == encoding_latin1) buffered_writer.write(PUGIXML_TEXT(" encoding=\"ISO-8859-1\""));
-			buffered_writer.write('?', '>');
-			if (!(flags & format_raw)) buffered_writer.write('\n');
-		}
-
-		impl::node_output(buffered_writer, *this, indent, flags, 0);
-	}
-
-#ifndef PUGIXML_NO_STL
-	PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const
-	{
-		xml_writer_stream writer(stream);
-
-		save(writer, indent, flags, encoding);
-	}
-
-	PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const
-	{
-		xml_writer_stream writer(stream);
-
-		save(writer, indent, flags, encoding_wchar);
-	}
-#endif
-
-	PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
-	{
-		FILE* file = fopen(path_, (flags & format_save_file_text) ? "w" : "wb");
-		return impl::save_file_impl(*this, file, indent, flags, encoding);
-	}
-
-	PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
-	{
-		FILE* file = impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb");
-		return impl::save_file_impl(*this, file, indent, flags, encoding);
-	}
-
-	PUGI__FN xml_node xml_document::document_element() const
-	{
-		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
-			if ((i->header & impl::xml_memory_page_type_mask) + 1 == node_element)
-				return xml_node(i);
-
-		return xml_node();
-	}
-
-#ifndef PUGIXML_NO_STL
-	PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str)
-	{
-		assert(str);
-
-		return impl::as_utf8_impl(str, wcslen(str));
-	}
-
-	PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str)
-	{
-		return impl::as_utf8_impl(str.c_str(), str.size());
-	}
-	
-	PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str)
-	{
-		assert(str);
-
-		return impl::as_wide_impl(str, strlen(str));
-	}
-	
-	PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str)
-	{
-		return impl::as_wide_impl(str.c_str(), str.size());
-	}
-#endif
-
-	PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate)
-	{
-		impl::xml_memory::allocate = allocate;
-		impl::xml_memory::deallocate = deallocate;
-	}
-
-	PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function()
-	{
-		return impl::xml_memory::allocate;
-	}
-
-	PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function()
-	{
-		return impl::xml_memory::deallocate;
-	}
-}
-
-#if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
-namespace std
-{
-	// Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
-	PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&)
-	{
-		return std::bidirectional_iterator_tag();
-	}
-
-	PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&)
-	{
-		return std::bidirectional_iterator_tag();
-	}
-
-	PUGI__FN std::forward_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&)
-	{
-		return std::forward_iterator_tag();
-	}
-}
-#endif
-
-#if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
-namespace std
-{
-	// Workarounds for (non-standard) iterator category detection
-	PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&)
-	{
-		return std::bidirectional_iterator_tag();
-	}
-
-	PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&)
-	{
-		return std::bidirectional_iterator_tag();
-	}
-
-	PUGI__FN std::forward_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&)
-	{
-		return std::forward_iterator_tag();
-	}
-}
-#endif
-
-#ifndef PUGIXML_NO_XPATH
-
-// STL replacements
-PUGI__NS_BEGIN
-	struct equal_to
-	{
-		template <typename T> bool operator()(const T& lhs, const T& rhs) const
-		{
-			return lhs == rhs;
-		}
-	};
-
-	struct not_equal_to
-	{
-		template <typename T> bool operator()(const T& lhs, const T& rhs) const
-		{
-			return lhs != rhs;
-		}
-	};
-
-	struct less
-	{
-		template <typename T> bool operator()(const T& lhs, const T& rhs) const
-		{
-			return lhs < rhs;
-		}
-	};
-
-	struct less_equal
-	{
-		template <typename T> bool operator()(const T& lhs, const T& rhs) const
-		{
-			return lhs <= rhs;
-		}
-	};
-
-	template <typename T> void swap(T& lhs, T& rhs)
-	{
-		T temp = lhs;
-		lhs = rhs;
-		rhs = temp;
-	}
-
-	template <typename I, typename Pred> I min_element(I begin, I end, const Pred& pred)
-	{
-		I result = begin;
-
-		for (I it = begin + 1; it != end; ++it)
-			if (pred(*it, *result))
-				result = it;
-
-		return result;
-	}
-
-	template <typename I> void reverse(I begin, I end)
-	{
-		while (begin + 1 < end) swap(*begin++, *--end);
-	}
-
-	template <typename I> I unique(I begin, I end)
-	{
-		// fast skip head
-		while (begin + 1 < end && *begin != *(begin + 1)) begin++;
-
-		if (begin == end) return begin;
-
-		// last written element
-		I write = begin++; 
-
-		// merge unique elements
-		while (begin != end)
-		{
-			if (*begin != *write)
-				*++write = *begin++;
-			else
-				begin++;
-		}
-
-		// past-the-end (write points to live element)
-		return write + 1;
-	}
-
-	template <typename I> void copy_backwards(I begin, I end, I target)
-	{
-		while (begin != end) *--target = *--end;
-	}
-
-	template <typename I, typename Pred, typename T> void insertion_sort(I begin, I end, const Pred& pred, T*)
-	{
-		assert(begin != end);
-
-		for (I it = begin + 1; it != end; ++it)
-		{
-			T val = *it;
-
-			if (pred(val, *begin))
-			{
-				// move to front
-				copy_backwards(begin, it, it + 1);
-				*begin = val;
-			}
-			else
-			{
-				I hole = it;
-
-				// move hole backwards
-				while (pred(val, *(hole - 1)))
-				{
-					*hole = *(hole - 1);
-					hole--;
-				}
-
-				// fill hole with element
-				*hole = val;
-			}
-		}
-	}
-
-	// std variant for elements with ==
-	template <typename I, typename Pred> void partition(I begin, I middle, I end, const Pred& pred, I* out_eqbeg, I* out_eqend)
-	{
-		I eqbeg = middle, eqend = middle + 1;
-
-		// expand equal range
-		while (eqbeg != begin && *(eqbeg - 1) == *eqbeg) --eqbeg;
-		while (eqend != end && *eqend == *eqbeg) ++eqend;
-
-		// process outer elements
-		I ltend = eqbeg, gtbeg = eqend;
-
-		for (;;)
-		{
-			// find the element from the right side that belongs to the left one
-			for (; gtbeg != end; ++gtbeg)
-				if (!pred(*eqbeg, *gtbeg))
-				{
-					if (*gtbeg == *eqbeg) swap(*gtbeg, *eqend++);
-					else break;
-				}
-
-			// find the element from the left side that belongs to the right one
-			for (; ltend != begin; --ltend)
-				if (!pred(*(ltend - 1), *eqbeg))
-				{
-					if (*eqbeg == *(ltend - 1)) swap(*(ltend - 1), *--eqbeg);
-					else break;
-				}
-
-			// scanned all elements
-			if (gtbeg == end && ltend == begin)
-			{
-				*out_eqbeg = eqbeg;
-				*out_eqend = eqend;
-				return;
-			}
-
-			// make room for elements by moving equal area
-			if (gtbeg == end)
-			{
-				if (--ltend != --eqbeg) swap(*ltend, *eqbeg);
-				swap(*eqbeg, *--eqend);
-			}
-			else if (ltend == begin)
-			{
-				if (eqend != gtbeg) swap(*eqbeg, *eqend);
-				++eqend;
-				swap(*gtbeg++, *eqbeg++);
-			}
-			else swap(*gtbeg++, *--ltend);
-		}
-	}
-
-	template <typename I, typename Pred> void median3(I first, I middle, I last, const Pred& pred)
-	{
-		if (pred(*middle, *first)) swap(*middle, *first);
-		if (pred(*last, *middle)) swap(*last, *middle);
-		if (pred(*middle, *first)) swap(*middle, *first);
-	}
-
-	template <typename I, typename Pred> void median(I first, I middle, I last, const Pred& pred)
-	{
-		if (last - first <= 40)
-		{
-			// median of three for small chunks
-			median3(first, middle, last, pred);
-		}
-		else
-		{
-			// median of nine
-			size_t step = (last - first + 1) / 8;
-
-			median3(first, first + step, first + 2 * step, pred);
-			median3(middle - step, middle, middle + step, pred);
-			median3(last - 2 * step, last - step, last, pred);
-			median3(first + step, middle, last - step, pred);
-		}
-	}
-
-	template <typename I, typename Pred> void sort(I begin, I end, const Pred& pred)
-	{
-		// sort large chunks
-		while (end - begin > 32)
-		{
-			// find median element
-			I middle = begin + (end - begin) / 2;
-			median(begin, middle, end - 1, pred);
-
-			// partition in three chunks (< = >)
-			I eqbeg, eqend;
-			partition(begin, middle, end, pred, &eqbeg, &eqend);
-
-			// loop on larger half
-			if (eqbeg - begin > end - eqend)
-			{
-				sort(eqend, end, pred);
-				end = eqbeg;
-			}
-			else
-			{
-				sort(begin, eqbeg, pred);
-				begin = eqend;
-			}
-		}
-
-		// insertion sort small chunk
-		if (begin != end) insertion_sort(begin, end, pred, &*begin);
-	}
-PUGI__NS_END
-
-// Allocator used for AST and evaluation stacks
-PUGI__NS_BEGIN
-	struct xpath_memory_block
-	{	
-		xpath_memory_block* next;
-
-		char data[
-	#ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE
-			PUGIXML_MEMORY_XPATH_PAGE_SIZE
-	#else
-			4096
-	#endif
-		];
-	};
-		
-	class xpath_allocator
-	{
-		xpath_memory_block* _root;
-		size_t _root_size;
-
-	public:
-	#ifdef PUGIXML_NO_EXCEPTIONS
-		jmp_buf* error_handler;
-	#endif
-
-		xpath_allocator(xpath_memory_block* root, size_t root_size = 0): _root(root), _root_size(root_size)
-		{
-		#ifdef PUGIXML_NO_EXCEPTIONS
-			error_handler = 0;
-		#endif
-		}
-		
-		void* allocate_nothrow(size_t size)
-		{
-			const size_t block_capacity = sizeof(_root->data);
-
-			// align size so that we're able to store pointers in subsequent blocks
-			size = (size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
-
-			if (_root_size + size <= block_capacity)
-			{
-				void* buf = _root->data + _root_size;
-				_root_size += size;
-				return buf;
-			}
-			else
-			{
-				size_t block_data_size = (size > block_capacity) ? size : block_capacity;
-				size_t block_size = block_data_size + offsetof(xpath_memory_block, data);
-
-				xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size));
-				if (!block) return 0;
-				
-				block->next = _root;
-				
-				_root = block;
-				_root_size = size;
-				
-				return block->data;
-			}
-		}
-
-		void* allocate(size_t size)
-		{
-			void* result = allocate_nothrow(size);
-
-			if (!result)
-			{
-			#ifdef PUGIXML_NO_EXCEPTIONS
-				assert(error_handler);
-				longjmp(*error_handler, 1);
-			#else
-				throw std::bad_alloc();
-			#endif
-			}
-
-			return result;
-		}
-
-		void* reallocate(void* ptr, size_t old_size, size_t new_size)
-		{
-			// align size so that we're able to store pointers in subsequent blocks
-			old_size = (old_size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
-			new_size = (new_size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
-
-			// we can only reallocate the last object
-			assert(ptr == 0 || static_cast<char*>(ptr) + old_size == _root->data + _root_size);
-
-			// adjust root size so that we have not allocated the object at all
-			bool only_object = (_root_size == old_size);
-
-			if (ptr) _root_size -= old_size;
-
-			// allocate a new version (this will obviously reuse the memory if possible)
-			void* result = allocate(new_size);
-			assert(result);
-
-			// we have a new block
-			if (result != ptr && ptr)
-			{
-				// copy old data
-				assert(new_size > old_size);
-				memcpy(result, ptr, old_size);
-
-				// free the previous page if it had no other objects
-				if (only_object)
-				{
-					assert(_root->data == result);
-					assert(_root->next);
-
-					xpath_memory_block* next = _root->next->next;
-
-					if (next)
-					{
-						// deallocate the whole page, unless it was the first one
-						xml_memory::deallocate(_root->next);
-						_root->next = next;
-					}
-				}
-			}
-
-			return result;
-		}
-
-		void revert(const xpath_allocator& state)
-		{
-			// free all new pages
-			xpath_memory_block* cur = _root;
-
-			while (cur != state._root)
-			{
-				xpath_memory_block* next = cur->next;
-
-				xml_memory::deallocate(cur);
-
-				cur = next;
-			}
-
-			// restore state
-			_root = state._root;
-			_root_size = state._root_size;
-		}
-
-		void release()
-		{
-			xpath_memory_block* cur = _root;
-			assert(cur);
-
-			while (cur->next)
-			{
-				xpath_memory_block* next = cur->next;
-
-				xml_memory::deallocate(cur);
-
-				cur = next;
-			}
-		}
-	};
-
-	struct xpath_allocator_capture
-	{
-		xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc)
-		{
-		}
-
-		~xpath_allocator_capture()
-		{
-			_target->revert(_state);
-		}
-
-		xpath_allocator* _target;
-		xpath_allocator _state;
-	};
-
-	struct xpath_stack
-	{
-		xpath_allocator* result;
-		xpath_allocator* temp;
-	};
-
-	struct xpath_stack_data
-	{
-		xpath_memory_block blocks[2];
-		xpath_allocator result;
-		xpath_allocator temp;
-		xpath_stack stack;
-
-	#ifdef PUGIXML_NO_EXCEPTIONS
-		jmp_buf error_handler;
-	#endif
-
-		xpath_stack_data(): result(blocks + 0), temp(blocks + 1)
-		{
-			blocks[0].next = blocks[1].next = 0;
-
-			stack.result = &result;
-			stack.temp = &temp;
-
-		#ifdef PUGIXML_NO_EXCEPTIONS
-			result.error_handler = temp.error_handler = &error_handler;
-		#endif
-		}
-
-		~xpath_stack_data()
-		{
-			result.release();
-			temp.release();
-		}
-	};
-PUGI__NS_END
-
-// String class
-PUGI__NS_BEGIN
-	class xpath_string
-	{
-		const char_t* _buffer;
-		bool _uses_heap;
-
-		static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc)
-		{
-			char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t)));
-			assert(result);
-
-			memcpy(result, string, length * sizeof(char_t));
-			result[length] = 0;
-
-			return result;
-		}
-
-		static char_t* duplicate_string(const char_t* string, xpath_allocator* alloc)
-		{
-			return duplicate_string(string, strlength(string), alloc);
-		}
-
-	public:
-		xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false)
-		{
-		}
-
-		explicit xpath_string(const char_t* str, xpath_allocator* alloc)
-		{
-			bool empty_ = (*str == 0);
-
-			_buffer = empty_ ? PUGIXML_TEXT("") : duplicate_string(str, alloc);
-			_uses_heap = !empty_;
-		}
-
-		explicit xpath_string(const char_t* str, bool use_heap): _buffer(str), _uses_heap(use_heap)
-		{
-		}
-
-		xpath_string(const char_t* begin, const char_t* end, xpath_allocator* alloc)
-		{
-			assert(begin <= end);
-
-			bool empty_ = (begin == end);
-
-			_buffer = empty_ ? PUGIXML_TEXT("") : duplicate_string(begin, static_cast<size_t>(end - begin), alloc);
-			_uses_heap = !empty_;
-		}
-
-		void append(const xpath_string& o, xpath_allocator* alloc)
-		{
-			// skip empty sources
-			if (!*o._buffer) return;
-
-			// fast append for constant empty target and constant source
-			if (!*_buffer && !_uses_heap && !o._uses_heap)
-			{
-				_buffer = o._buffer;
-			}
-			else
-			{
-				// need to make heap copy
-				size_t target_length = strlength(_buffer);
-				size_t source_length = strlength(o._buffer);
-				size_t result_length = target_length + source_length;
-
-				// allocate new buffer
-				char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t)));
-				assert(result);
-
-				// append first string to the new buffer in case there was no reallocation
-				if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t));
-
-				// append second string to the new buffer
-				memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
-				result[result_length] = 0;
-
-				// finalize
-				_buffer = result;
-				_uses_heap = true;
-			}
-		}
-
-		const char_t* c_str() const
-		{
-			return _buffer;
-		}
-
-		size_t length() const
-		{
-			return strlength(_buffer);
-		}
-		
-		char_t* data(xpath_allocator* alloc)
-		{
-			// make private heap copy
-			if (!_uses_heap)
-			{
-				_buffer = duplicate_string(_buffer, alloc);
-				_uses_heap = true;
-			}
-
-			return const_cast<char_t*>(_buffer);
-		}
-
-		bool empty() const
-		{
-			return *_buffer == 0;
-		}
-
-		bool operator==(const xpath_string& o) const
-		{
-			return strequal(_buffer, o._buffer);
-		}
-
-		bool operator!=(const xpath_string& o) const
-		{
-			return !strequal(_buffer, o._buffer);
-		}
-
-		bool uses_heap() const
-		{
-			return _uses_heap;
-		}
-	};
-
-	PUGI__FN xpath_string xpath_string_const(const char_t* str)
-	{
-		return xpath_string(str, false);
-	}
-PUGI__NS_END
-
-PUGI__NS_BEGIN
-	PUGI__FN bool starts_with(const char_t* string, const char_t* pattern)
-	{
-		while (*pattern && *string == *pattern)
-		{
-			string++;
-			pattern++;
-		}
-
-		return *pattern == 0;
-	}
-
-	PUGI__FN const char_t* find_char(const char_t* s, char_t c)
-	{
-	#ifdef PUGIXML_WCHAR_MODE
-		return wcschr(s, c);
-	#else
-		return strchr(s, c);
-	#endif
-	}
-
-	PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p)
-	{
-	#ifdef PUGIXML_WCHAR_MODE
-		// MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
-		return (*p == 0) ? s : wcsstr(s, p);
-	#else
-		return strstr(s, p);
-	#endif
-	}
-
-	// Converts symbol to lower case, if it is an ASCII one
-	PUGI__FN char_t tolower_ascii(char_t ch)
-	{
-		return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch;
-	}
-
-	PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc)
-	{
-		if (na.attribute())
-			return xpath_string_const(na.attribute().value());
-		else
-		{
-			const xml_node& n = na.node();
-
-			switch (n.type())
-			{
-			case node_pcdata:
-			case node_cdata:
-			case node_comment:
-			case node_pi:
-				return xpath_string_const(n.value());
-			
-			case node_document:
-			case node_element:
-			{
-				xpath_string result;
-
-				xml_node cur = n.first_child();
-				
-				while (cur && cur != n)
-				{
-					if (cur.type() == node_pcdata || cur.type() == node_cdata)
-						result.append(xpath_string_const(cur.value()), alloc);
-
-					if (cur.first_child())
-						cur = cur.first_child();
-					else if (cur.next_sibling())
-						cur = cur.next_sibling();
-					else
-					{
-						while (!cur.next_sibling() && cur != n)
-							cur = cur.parent();
-
-						if (cur != n) cur = cur.next_sibling();
-					}
-				}
-				
-				return result;
-			}
-			
-			default:
-				return xpath_string();
-			}
-		}
-	}
-	
-	PUGI__FN unsigned int node_height(xml_node n)
-	{
-		unsigned int result = 0;
-		
-		while (n)
-		{
-			++result;
-			n = n.parent();
-		}
-		
-		return result;
-	}
-	
-	PUGI__FN bool node_is_before(xml_node ln, unsigned int lh, xml_node rn, unsigned int rh)
-	{
-		// normalize heights
-		for (unsigned int i = rh; i < lh; i++) ln = ln.parent();
-		for (unsigned int j = lh; j < rh; j++) rn = rn.parent();
-		
-		// one node is the ancestor of the other
-		if (ln == rn) return lh < rh;
-		
-		// find common ancestor
-		while (ln.parent() != rn.parent())
-		{
-			ln = ln.parent();
-			rn = rn.parent();
-		}
-
-		// there is no common ancestor (the shared parent is null), nodes are from different documents
-		if (!ln.parent()) return ln < rn;
-
-		// determine sibling order
-		for (; ln; ln = ln.next_sibling())
-			if (ln == rn)
-				return true;
-				
-		return false;
-	}
-
-	PUGI__FN bool node_is_ancestor(xml_node parent, xml_node node)
-	{
-		while (node && node != parent) node = node.parent();
-
-		return parent && node == parent;
-	}
-
-	PUGI__FN const void* document_order(const xpath_node& xnode)
-	{
-		xml_node_struct* node = xnode.node().internal_object();
-
-		if (node)
-		{
-			if (node->name && (node->header & xml_memory_page_name_allocated_mask) == 0) return node->name;
-			if (node->value && (node->header & xml_memory_page_value_allocated_mask) == 0) return node->value;
-			return 0;
-		}
-
-		xml_attribute_struct* attr = xnode.attribute().internal_object();
-
-		if (attr)
-		{
-			if ((attr->header & xml_memory_page_name_allocated_mask) == 0) return attr->name;
-			if ((attr->header & xml_memory_page_value_allocated_mask) == 0) return attr->value;
-			return 0;
-		}
-
-		return 0;
-	}
-	
-	struct document_order_comparator
-	{
-		bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
-		{
-			// optimized document order based check
-			const void* lo = document_order(lhs);
-			const void* ro = document_order(rhs);
-
-			if (lo && ro) return lo < ro;
-
-			// slow comparison
-			xml_node ln = lhs.node(), rn = rhs.node();
-
-			// compare attributes
-			if (lhs.attribute() && rhs.attribute())
-			{
-				// shared parent
-				if (lhs.parent() == rhs.parent())
-				{
-					// determine sibling order
-					for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
-						if (a == rhs.attribute())
-							return true;
-					
-					return false;
-				}
-				
-				// compare attribute parents
-				ln = lhs.parent();
-				rn = rhs.parent();
-			}
-			else if (lhs.attribute())
-			{
-				// attributes go after the parent element
-				if (lhs.parent() == rhs.node()) return false;
-				
-				ln = lhs.parent();
-			}
-			else if (rhs.attribute())
-			{
-				// attributes go after the parent element
-				if (rhs.parent() == lhs.node()) return true;
-				
-				rn = rhs.parent();
-			}
-
-			if (ln == rn) return false;
-			
-			unsigned int lh = node_height(ln);
-			unsigned int rh = node_height(rn);
-			
-			return node_is_before(ln, lh, rn, rh);
-		}
-	};
-
-	struct duplicate_comparator
-	{
-		bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
-		{
-			if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true;
-			else return rhs.attribute() ? false : lhs.node() < rhs.node();
-		}
-	};
-	
-	PUGI__FN double gen_nan()
-	{
-	#if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
-		union { float f; uint32_t i; } u[sizeof(float) == sizeof(uint32_t) ? 1 : -1];
-		u[0].i = 0x7fc00000;
-		return u[0].f;
-	#else
-		// fallback
-		const volatile double zero = 0.0;
-		return zero / zero;
-	#endif
-	}
-	
-	PUGI__FN bool is_nan(double value)
-	{
-	#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
-		return !!_isnan(value);
-	#elif defined(fpclassify) && defined(FP_NAN)
-		return fpclassify(value) == FP_NAN;
-	#else
-		// fallback
-		const volatile double v = value;
-		return v != v;
-	#endif
-	}
-	
-	PUGI__FN const char_t* convert_number_to_string_special(double value)
-	{
-	#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
-		if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
-		if (_isnan(value)) return PUGIXML_TEXT("NaN");
-		return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
-	#elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
-		switch (fpclassify(value))
-		{
-		case FP_NAN:
-			return PUGIXML_TEXT("NaN");
-
-		case FP_INFINITE:
-			return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
-
-		case FP_ZERO:
-			return PUGIXML_TEXT("0");
-
-		default:
-			return 0;
-		}
-	#else
-		// fallback
-		const volatile double v = value;
-
-		if (v == 0) return PUGIXML_TEXT("0");
-		if (v != v) return PUGIXML_TEXT("NaN");
-		if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
-		return 0;
-	#endif
-	}
-	
-	PUGI__FN bool convert_number_to_boolean(double value)
-	{
-		return (value != 0 && !is_nan(value));
-	}
-	
-	PUGI__FN void truncate_zeros(char* begin, char* end)
-	{
-		while (begin != end && end[-1] == '0') end--;
-
-		*end = 0;
-	}
-
-	// gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
-#if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
-	PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
-	{
-		// get base values
-		int sign, exponent;
-		_ecvt_s(buffer, buffer_size, value, DBL_DIG + 1, &exponent, &sign);
-
-		// truncate redundant zeros
-		truncate_zeros(buffer, buffer + strlen(buffer));
-
-		// fill results
-		*out_mantissa = buffer;
-		*out_exponent = exponent;
-	}
-#else
-	PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
-	{
-		// get a scientific notation value with IEEE DBL_DIG decimals
-		sprintf(buffer, "%.*e", DBL_DIG, value);
-		assert(strlen(buffer) < buffer_size);
-		(void)!buffer_size;
-
-		// get the exponent (possibly negative)
-		char* exponent_string = strchr(buffer, 'e');
-		assert(exponent_string);
-
-		int exponent = atoi(exponent_string + 1);
-
-		// extract mantissa string: skip sign
-		char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
-		assert(mantissa[0] != '0' && mantissa[1] == '.');
-
-		// divide mantissa by 10 to eliminate integer part
-		mantissa[1] = mantissa[0];
-		mantissa++;
-		exponent++;
-
-		// remove extra mantissa digits and zero-terminate mantissa
-		truncate_zeros(mantissa, exponent_string);
-
-		// fill results
-		*out_mantissa = mantissa;
-		*out_exponent = exponent;
-	}
-#endif
-
-	PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc)
-	{
-		// try special number conversion
-		const char_t* special = convert_number_to_string_special(value);
-		if (special) return xpath_string_const(special);
-
-		// get mantissa + exponent form
-		char mantissa_buffer[64];
-
-		char* mantissa;
-		int exponent;
-		convert_number_to_mantissa_exponent(value, mantissa_buffer, sizeof(mantissa_buffer), &mantissa, &exponent);
-
-		// make the number!
-		char_t result[512];
-		char_t* s = result;
-
-		// sign
-		if (value < 0) *s++ = '-';
-
-		// integer part
-		if (exponent <= 0)
-		{
-			*s++ = '0';
-		}
-		else
-		{
-			while (exponent > 0)
-			{
-				assert(*mantissa == 0 || static_cast<unsigned int>(*mantissa - '0') <= 9);
-				*s++ = *mantissa ? *mantissa++ : '0';
-				exponent--;
-			}
-		}
-
-		// fractional part
-		if (*mantissa)
-		{
-			// decimal point
-			*s++ = '.';
-
-			// extra zeroes from negative exponent
-			while (exponent < 0)
-			{
-				*s++ = '0';
-				exponent++;
-			}
-
-			// extra mantissa digits
-			while (*mantissa)
-			{
-				assert(static_cast<unsigned int>(*mantissa - '0') <= 9);
-				*s++ = *mantissa++;
-			}
-		}
-
-		// zero-terminate
-		assert(s < result + sizeof(result) / sizeof(result[0]));
-		*s = 0;
-
-		return xpath_string(result, alloc);
-	}
-	
-	PUGI__FN bool check_string_to_number_format(const char_t* string)
-	{
-		// parse leading whitespace
-		while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
-
-		// parse sign
-		if (*string == '-') ++string;
-
-		if (!*string) return false;
-
-		// if there is no integer part, there should be a decimal part with at least one digit
-		if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false;
-
-		// parse integer part
-		while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
-
-		// parse decimal part
-		if (*string == '.')
-		{
-			++string;
-
-			while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
-		}
-
-		// parse trailing whitespace
-		while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
-
-		return *string == 0;
-	}
-
-	PUGI__FN double convert_string_to_number(const char_t* string)
-	{
-		// check string format
-		if (!check_string_to_number_format(string)) return gen_nan();
-
-		// parse string
-	#ifdef PUGIXML_WCHAR_MODE
-		return wcstod(string, 0);
-	#else
-		return atof(string);
-	#endif
-	}
-
-	PUGI__FN bool convert_string_to_number(const char_t* begin, const char_t* end, double* out_result)
-	{
-		char_t buffer[32];
-
-		size_t length = static_cast<size_t>(end - begin);
-		char_t* scratch = buffer;
-
-		if (length >= sizeof(buffer) / sizeof(buffer[0]))
-		{
-			// need to make dummy on-heap copy
-			scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
-			if (!scratch) return false;
-		}
-
-		// copy string to zero-terminated buffer and perform conversion
-		memcpy(scratch, begin, length * sizeof(char_t));
-		scratch[length] = 0;
-
-		*out_result = convert_string_to_number(scratch);
-
-		// free dummy buffer
-		if (scratch != buffer) xml_memory::deallocate(scratch);
-
-		return true;
-	}
-	
-	PUGI__FN double round_nearest(double value)
-	{
-		return floor(value + 0.5);
-	}
-
-	PUGI__FN double round_nearest_nzero(double value)
-	{
-		// same as round_nearest, but returns -0 for [-0.5, -0]
-		// ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
-		return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
-	}
-	
-	PUGI__FN const char_t* qualified_name(const xpath_node& node)
-	{
-		return node.attribute() ? node.attribute().name() : node.node().name();
-	}
-	
-	PUGI__FN const char_t* local_name(const xpath_node& node)
-	{
-		const char_t* name = qualified_name(node);
-		const char_t* p = find_char(name, ':');
-		
-		return p ? p + 1 : name;
-	}
-
-	struct namespace_uri_predicate
-	{
-		const char_t* prefix;
-		size_t prefix_length;
-
-		namespace_uri_predicate(const char_t* name)
-		{
-			const char_t* pos = find_char(name, ':');
-
-			prefix = pos ? name : 0;
-			prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
-		}
-
-		bool operator()(const xml_attribute& a) const
-		{
-			const char_t* name = a.name();
-
-			if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
-
-			return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
-		}
-	};
-
-	PUGI__FN const char_t* namespace_uri(const xml_node& node)
-	{
-		namespace_uri_predicate pred = node.name();
-		
-		xml_node p = node;
-		
-		while (p)
-		{
-			xml_attribute a = p.find_attribute(pred);
-			
-			if (a) return a.value();
-			
-			p = p.parent();
-		}
-		
-		return PUGIXML_TEXT("");
-	}
-
-	PUGI__FN const char_t* namespace_uri(const xml_attribute& attr, const xml_node& parent)
-	{
-		namespace_uri_predicate pred = attr.name();
-		
-		// Default namespace does not apply to attributes
-		if (!pred.prefix) return PUGIXML_TEXT("");
-		
-		xml_node p = parent;
-		
-		while (p)
-		{
-			xml_attribute a = p.find_attribute(pred);
-			
-			if (a) return a.value();
-			
-			p = p.parent();
-		}
-		
-		return PUGIXML_TEXT("");
-	}
-
-	PUGI__FN const char_t* namespace_uri(const xpath_node& node)
-	{
-		return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
-	}
-
-	PUGI__FN void normalize_space(char_t* buffer)
-	{
-		char_t* write = buffer;
-
-		for (char_t* it = buffer; *it; )
-		{
-			char_t ch = *it++;
-
-			if (PUGI__IS_CHARTYPE(ch, ct_space))
-			{
-				// replace whitespace sequence with single space
-				while (PUGI__IS_CHARTYPE(*it, ct_space)) it++;
-
-				// avoid leading spaces
-				if (write != buffer) *write++ = ' ';
-			}
-			else *write++ = ch;
-		}
-
-		// remove trailing space
-		if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--;
-
-		// zero-terminate
-		*write = 0;
-	}
-
-	PUGI__FN void translate(char_t* buffer, const char_t* from, const char_t* to)
-	{
-		size_t to_length = strlength(to);
-
-		char_t* write = buffer;
-
-		while (*buffer)
-		{
-			PUGI__DMC_VOLATILE char_t ch = *buffer++;
-
-			const char_t* pos = find_char(from, ch);
-
-			if (!pos)
-				*write++ = ch; // do not process
-			else if (static_cast<size_t>(pos - from) < to_length)
-				*write++ = to[pos - from]; // replace
-		}
-
-		// zero-terminate
-		*write = 0;
-	}
-
-	struct xpath_variable_boolean: xpath_variable
-	{
-		xpath_variable_boolean(): value(false)
-		{
-		}
-
-		bool value;
-		char_t name[1];
-	};
-
-	struct xpath_variable_number: xpath_variable
-	{
-		xpath_variable_number(): value(0)
-		{
-		}
-
-		double value;
-		char_t name[1];
-	};
-
-	struct xpath_variable_string: xpath_variable
-	{
-		xpath_variable_string(): value(0)
-		{
-		}
-
-		~xpath_variable_string()
-		{
-			if (value) xml_memory::deallocate(value);
-		}
-
-		char_t* value;
-		char_t name[1];
-	};
-
-	struct xpath_variable_node_set: xpath_variable
-	{
-		xpath_node_set value;
-		char_t name[1];
-	};
-
-	static const xpath_node_set dummy_node_set;
-
-	PUGI__FN unsigned int hash_string(const char_t* str)
-	{
-		// Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
-		unsigned int result = 0;
-
-		while (*str)
-		{
-			result += static_cast<unsigned int>(*str++);
-			result += result << 10;
-			result ^= result >> 6;
-		}
-	
-		result += result << 3;
-		result ^= result >> 11;
-		result += result << 15;
-	
-		return result;
-	}
-
-	template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name)
-	{
-		size_t length = strlength(name);
-		if (length == 0) return 0; // empty variable names are invalid
-
-		// $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters
-		void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t));
-		if (!memory) return 0;
-
-		T* result = new (memory) T();
-
-		memcpy(result->name, name, (length + 1) * sizeof(char_t));
-
-		return result;
-	}
-
-	PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name)
-	{
-		switch (type)
-		{
-		case xpath_type_node_set:
-			return new_xpath_variable<xpath_variable_node_set>(name);
-
-		case xpath_type_number:
-			return new_xpath_variable<xpath_variable_number>(name);
-
-		case xpath_type_string:
-			return new_xpath_variable<xpath_variable_string>(name);
-
-		case xpath_type_boolean:
-			return new_xpath_variable<xpath_variable_boolean>(name);
-
-		default:
-			return 0;
-		}
-	}
-
-	template <typename T> PUGI__FN void delete_xpath_variable(T* var)
-	{
-		var->~T();
-		xml_memory::deallocate(var);
-	}
-
-	PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var)
-	{
-		switch (type)
-		{
-		case xpath_type_node_set:
-			delete_xpath_variable(static_cast<xpath_variable_node_set*>(var));
-			break;
-
-		case xpath_type_number:
-			delete_xpath_variable(static_cast<xpath_variable_number*>(var));
-			break;
-
-		case xpath_type_string:
-			delete_xpath_variable(static_cast<xpath_variable_string*>(var));
-			break;
-
-		case xpath_type_boolean:
-			delete_xpath_variable(static_cast<xpath_variable_boolean*>(var));
-			break;
-
-		default:
-			assert(!"Invalid variable type");
-		}
-	}
-
-	PUGI__FN xpath_variable* get_variable(xpath_variable_set* set, const char_t* begin, const char_t* end)
-	{
-		char_t buffer[32];
-
-		size_t length = static_cast<size_t>(end - begin);
-		char_t* scratch = buffer;
-
-		if (length >= sizeof(buffer) / sizeof(buffer[0]))
-		{
-			// need to make dummy on-heap copy
-			scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
-			if (!scratch) return 0;
-		}
-
-		// copy string to zero-terminated buffer and perform lookup
-		memcpy(scratch, begin, length * sizeof(char_t));
-		scratch[length] = 0;
-
-		xpath_variable* result = set->get(scratch);
-
-		// free dummy buffer
-		if (scratch != buffer) xml_memory::deallocate(scratch);
-
-		return result;
-	}
-PUGI__NS_END
-
-// Internal node set class
-PUGI__NS_BEGIN
-	PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev)
-	{
-		xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
-
-		if (type == xpath_node_set::type_unsorted)
-		{
-			sort(begin, end, document_order_comparator());
-
-			type = xpath_node_set::type_sorted;
-		}
-		
-		if (type != order) reverse(begin, end);
-			
-		return order;
-	}
-
-	PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type)
-	{
-		if (begin == end) return xpath_node();
-
-		switch (type)
-		{
-		case xpath_node_set::type_sorted:
-			return *begin;
-
-		case xpath_node_set::type_sorted_reverse:
-			return *(end - 1);
-
-		case xpath_node_set::type_unsorted:
-			return *min_element(begin, end, document_order_comparator());
-
-		default:
-			assert(!"Invalid node set type");
-			return xpath_node();
-		}
-	}
-
-	class xpath_node_set_raw
-	{
-		xpath_node_set::type_t _type;
-
-		xpath_node* _begin;
-		xpath_node* _end;
-		xpath_node* _eos;
-
-	public:
-		xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0)
-		{
-		}
-
-		xpath_node* begin() const
-		{
-			return _begin;
-		}
-
-		xpath_node* end() const
-		{
-			return _end;
-		}
-
-		bool empty() const
-		{
-			return _begin == _end;
-		}
-
-		size_t size() const
-		{
-			return static_cast<size_t>(_end - _begin);
-		}
-
-		xpath_node first() const
-		{
-			return xpath_first(_begin, _end, _type);
-		}
-
-		void push_back(const xpath_node& node, xpath_allocator* alloc)
-		{
-			if (_end == _eos)
-			{
-				size_t capacity = static_cast<size_t>(_eos - _begin);
-
-				// get new capacity (1.5x rule)
-				size_t new_capacity = capacity + capacity / 2 + 1;
-
-				// reallocate the old array or allocate a new one
-				xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node)));
-				assert(data);
-
-				// finalize
-				_begin = data;
-				_end = data + capacity;
-				_eos = data + new_capacity;
-			}
-
-			*_end++ = node;
-		}
-
-		void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc)
-		{
-			size_t size_ = static_cast<size_t>(_end - _begin);
-			size_t capacity = static_cast<size_t>(_eos - _begin);
-			size_t count = static_cast<size_t>(end_ - begin_);
-
-			if (size_ + count > capacity)
-			{
-				// reallocate the old array or allocate a new one
-				xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node)));
-				assert(data);
-
-				// finalize
-				_begin = data;
-				_end = data + size_;
-				_eos = data + size_ + count;
-			}
-
-			memcpy(_end, begin_, count * sizeof(xpath_node));
-			_end += count;
-		}
-
-		void sort_do()
-		{
-			_type = xpath_sort(_begin, _end, _type, false);
-		}
-
-		void truncate(xpath_node* pos)
-		{
-			assert(_begin <= pos && pos <= _end);
-
-			_end = pos;
-		}
-
-		void remove_duplicates()
-		{
-			if (_type == xpath_node_set::type_unsorted)
-				sort(_begin, _end, duplicate_comparator());
-		
-			_end = unique(_begin, _end);
-		}
-
-		xpath_node_set::type_t type() const
-		{
-			return _type;
-		}
-
-		void set_type(xpath_node_set::type_t value)
-		{
-			_type = value;
-		}
-	};
-PUGI__NS_END
-
-PUGI__NS_BEGIN
-	struct xpath_context
-	{
-		xpath_node n;
-		size_t position, size;
-
-		xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_)
-		{
-		}
-	};
-
-	enum lexeme_t
-	{
-		lex_none = 0,
-		lex_equal,
-		lex_not_equal,
-		lex_less,
-		lex_greater,
-		lex_less_or_equal,
-		lex_greater_or_equal,
-		lex_plus,
-		lex_minus,
-		lex_multiply,
-		lex_union,
-		lex_var_ref,
-		lex_open_brace,
-		lex_close_brace,
-		lex_quoted_string,
-		lex_number,
-		lex_slash,
-		lex_double_slash,
-		lex_open_square_brace,
-		lex_close_square_brace,
-		lex_string,
-		lex_comma,
-		lex_axis_attribute,
-		lex_dot,
-		lex_double_dot,
-		lex_double_colon,
-		lex_eof
-	};
-
-	struct xpath_lexer_string
-	{
-		const char_t* begin;
-		const char_t* end;
-
-		xpath_lexer_string(): begin(0), end(0)
-		{
-		}
-
-		bool operator==(const char_t* other) const
-		{
-			size_t length = static_cast<size_t>(end - begin);
-
-			return strequalrange(other, begin, length);
-		}
-	};
-
-	class xpath_lexer
-	{
-		const char_t* _cur;
-		const char_t* _cur_lexeme_pos;
-		xpath_lexer_string _cur_lexeme_contents;
-
-		lexeme_t _cur_lexeme;
-
-	public:
-		explicit xpath_lexer(const char_t* query): _cur(query)
-		{
-			next();
-		}
-		
-		const char_t* state() const
-		{
-			return _cur;
-		}
-		
-		void next()
-		{
-			const char_t* cur = _cur;
-
-			while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur;
-
-			// save lexeme position for error reporting
-			_cur_lexeme_pos = cur;
-
-			switch (*cur)
-			{
-			case 0:
-				_cur_lexeme = lex_eof;
-				break;
-			
-			case '>':
-				if (*(cur+1) == '=')
-				{
-					cur += 2;
-					_cur_lexeme = lex_greater_or_equal;
-				}
-				else
-				{
-					cur += 1;
-					_cur_lexeme = lex_greater;
-				}
-				break;
-
-			case '<':
-				if (*(cur+1) == '=')
-				{
-					cur += 2;
-					_cur_lexeme = lex_less_or_equal;
-				}
-				else
-				{
-					cur += 1;
-					_cur_lexeme = lex_less;
-				}
-				break;
-
-			case '!':
-				if (*(cur+1) == '=')
-				{
-					cur += 2;
-					_cur_lexeme = lex_not_equal;
-				}
-				else
-				{
-					_cur_lexeme = lex_none;
-				}
-				break;
-
-			case '=':
-				cur += 1;
-				_cur_lexeme = lex_equal;
-
-				break;
-			
-			case '+':
-				cur += 1;
-				_cur_lexeme = lex_plus;
-
-				break;
-
-			case '-':
-				cur += 1;
-				_cur_lexeme = lex_minus;
-
-				break;
-
-			case '*':
-				cur += 1;
-				_cur_lexeme = lex_multiply;
-
-				break;
-
-			case '|':
-				cur += 1;
-				_cur_lexeme = lex_union;
-
-				break;
-			
-			case '$':
-				cur += 1;
-
-				if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
-				{
-					_cur_lexeme_contents.begin = cur;
-
-					while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
-
-					if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname
-					{
-						cur++; // :
-
-						while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
-					}
-
-					_cur_lexeme_contents.end = cur;
-				
-					_cur_lexeme = lex_var_ref;
-				}
-				else
-				{
-					_cur_lexeme = lex_none;
-				}
-
-				break;
-
-			case '(':
-				cur += 1;
-				_cur_lexeme = lex_open_brace;
-
-				break;
-
-			case ')':
-				cur += 1;
-				_cur_lexeme = lex_close_brace;
-
-				break;
-			
-			case '[':
-				cur += 1;
-				_cur_lexeme = lex_open_square_brace;
-
-				break;
-
-			case ']':
-				cur += 1;
-				_cur_lexeme = lex_close_square_brace;
-
-				break;
-
-			case ',':
-				cur += 1;
-				_cur_lexeme = lex_comma;
-
-				break;
-
-			case '/':
-				if (*(cur+1) == '/')
-				{
-					cur += 2;
-					_cur_lexeme = lex_double_slash;
-				}
-				else
-				{
-					cur += 1;
-					_cur_lexeme = lex_slash;
-				}
-				break;
-		
-			case '.':
-				if (*(cur+1) == '.')
-				{
-					cur += 2;
-					_cur_lexeme = lex_double_dot;
-				}
-				else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit))
-				{
-					_cur_lexeme_contents.begin = cur; // .
-
-					++cur;
-
-					while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
-
-					_cur_lexeme_contents.end = cur;
-					
-					_cur_lexeme = lex_number;
-				}
-				else
-				{
-					cur += 1;
-					_cur_lexeme = lex_dot;
-				}
-				break;
-
-			case '@':
-				cur += 1;
-				_cur_lexeme = lex_axis_attribute;
-
-				break;
-
-			case '"':
-			case '\'':
-			{
-				char_t terminator = *cur;
-
-				++cur;
-
-				_cur_lexeme_contents.begin = cur;
-				while (*cur && *cur != terminator) cur++;
-				_cur_lexeme_contents.end = cur;
-				
-				if (!*cur)
-					_cur_lexeme = lex_none;
-				else
-				{
-					cur += 1;
-					_cur_lexeme = lex_quoted_string;
-				}
-
-				break;
-			}
-
-			case ':':
-				if (*(cur+1) == ':')
-				{
-					cur += 2;
-					_cur_lexeme = lex_double_colon;
-				}
-				else
-				{
-					_cur_lexeme = lex_none;
-				}
-				break;
-
-			default:
-				if (PUGI__IS_CHARTYPEX(*cur, ctx_digit))
-				{
-					_cur_lexeme_contents.begin = cur;
-
-					while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
-				
-					if (*cur == '.')
-					{
-						cur++;
-
-						while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
-					}
-
-					_cur_lexeme_contents.end = cur;
-
-					_cur_lexeme = lex_number;
-				}
-				else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
-				{
-					_cur_lexeme_contents.begin = cur;
-
-					while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
-
-					if (cur[0] == ':')
-					{
-						if (cur[1] == '*') // namespace test ncname:*
-						{
-							cur += 2; // :*
-						}
-						else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname
-						{
-							cur++; // :
-
-							while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
-						}
-					}
-
-					_cur_lexeme_contents.end = cur;
-				
-					_cur_lexeme = lex_string;
-				}
-				else
-				{
-					_cur_lexeme = lex_none;
-				}
-			}
-
-			_cur = cur;
-		}
-
-		lexeme_t current() const
-		{
-			return _cur_lexeme;
-		}
-
-		const char_t* current_pos() const
-		{
-			return _cur_lexeme_pos;
-		}
-
-		const xpath_lexer_string& contents() const
-		{
-			assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
-
-			return _cur_lexeme_contents;
-		}
-	};
-
-	enum ast_type_t
-	{
-		ast_op_or,						// left or right
-		ast_op_and,						// left and right
-		ast_op_equal,					// left = right
-		ast_op_not_equal,				// left != right
-		ast_op_less,					// left < right
-		ast_op_greater,					// left > right
-		ast_op_less_or_equal,			// left <= right
-		ast_op_greater_or_equal,		// left >= right
-		ast_op_add,						// left + right
-		ast_op_subtract,				// left - right
-		ast_op_multiply,				// left * right
-		ast_op_divide,					// left / right
-		ast_op_mod,						// left % right
-		ast_op_negate,					// left - right
-		ast_op_union,					// left | right
-		ast_predicate,					// apply predicate to set; next points to next predicate
-		ast_filter,						// select * from left where right
-		ast_filter_posinv,				// select * from left where right; proximity position invariant
-		ast_string_constant,			// string constant
-		ast_number_constant,			// number constant
-		ast_variable,					// variable
-		ast_func_last,					// last()
-		ast_func_position,				// position()
-		ast_func_count,					// count(left)
-		ast_func_id,					// id(left)
-		ast_func_local_name_0,			// local-name()
-		ast_func_local_name_1,			// local-name(left)
-		ast_func_namespace_uri_0,		// namespace-uri()
-		ast_func_namespace_uri_1,		// namespace-uri(left)
-		ast_func_name_0,				// name()
-		ast_func_name_1,				// name(left)
-		ast_func_string_0,				// string()
-		ast_func_string_1,				// string(left)
-		ast_func_concat,				// concat(left, right, siblings)
-		ast_func_starts_with,			// starts_with(left, right)
-		ast_func_contains,				// contains(left, right)
-		ast_func_substring_before,		// substring-before(left, right)
-		ast_func_substring_after,		// substring-after(left, right)
-		ast_func_substring_2,			// substring(left, right)
-		ast_func_substring_3,			// substring(left, right, third)
-		ast_func_string_length_0,		// string-length()
-		ast_func_string_length_1,		// string-length(left)
-		ast_func_normalize_space_0,		// normalize-space()
-		ast_func_normalize_space_1,		// normalize-space(left)
-		ast_func_translate,				// translate(left, right, third)
-		ast_func_boolean,				// boolean(left)
-		ast_func_not,					// not(left)
-		ast_func_true,					// true()
-		ast_func_false,					// false()
-		ast_func_lang,					// lang(left)
-		ast_func_number_0,				// number()
-		ast_func_number_1,				// number(left)
-		ast_func_sum,					// sum(left)
-		ast_func_floor,					// floor(left)
-		ast_func_ceiling,				// ceiling(left)
-		ast_func_round,					// round(left)
-		ast_step,						// process set left with step
-		ast_step_root					// select root node
-	};
-
-	enum axis_t
-	{
-		axis_ancestor,
-		axis_ancestor_or_self,
-		axis_attribute,
-		axis_child,
-		axis_descendant,
-		axis_descendant_or_self,
-		axis_following,
-		axis_following_sibling,
-		axis_namespace,
-		axis_parent,
-		axis_preceding,
-		axis_preceding_sibling,
-		axis_self
-	};
-	
-	enum nodetest_t
-	{
-		nodetest_none,
-		nodetest_name,
-		nodetest_type_node,
-		nodetest_type_comment,
-		nodetest_type_pi,
-		nodetest_type_text,
-		nodetest_pi,
-		nodetest_all,
-		nodetest_all_in_namespace
-	};
-
-	template <axis_t N> struct axis_to_type
-	{
-		static const axis_t axis;
-	};
-
-	template <axis_t N> const axis_t axis_to_type<N>::axis = N;
-		
-	class xpath_ast_node
-	{
-	private:
-		// node type
-		char _type;
-		char _rettype;
-
-		// for ast_step / ast_predicate
-		char _axis;
-		char _test;
-
-		// tree node structure
-		xpath_ast_node* _left;
-		xpath_ast_node* _right;
-		xpath_ast_node* _next;
-
-		union
-		{
-			// value for ast_string_constant
-			const char_t* string;
-			// value for ast_number_constant
-			double number;
-			// variable for ast_variable
-			xpath_variable* variable;
-			// node test for ast_step (node name/namespace/node type/pi target)
-			const char_t* nodetest;
-		} _data;
-
-		xpath_ast_node(const xpath_ast_node&);
-		xpath_ast_node& operator=(const xpath_ast_node&);
-
-		template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
-		{
-			xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
-
-			if (lt != xpath_type_node_set && rt != xpath_type_node_set)
-			{
-				if (lt == xpath_type_boolean || rt == xpath_type_boolean)
-					return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
-				else if (lt == xpath_type_number || rt == xpath_type_number)
-					return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
-				else if (lt == xpath_type_string || rt == xpath_type_string)
-				{
-					xpath_allocator_capture cr(stack.result);
-
-					xpath_string ls = lhs->eval_string(c, stack);
-					xpath_string rs = rhs->eval_string(c, stack);
-
-					return comp(ls, rs);
-				}
-			}
-			else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
-			{
-				xpath_allocator_capture cr(stack.result);
-
-				xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
-				xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
-
-				for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
-					for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
-					{
-						xpath_allocator_capture cri(stack.result);
-
-						if (comp(string_value(*li, stack.result), string_value(*ri, stack.result)))
-							return true;
-					}
-
-				return false;
-			}
-			else
-			{
-				if (lt == xpath_type_node_set)
-				{
-					swap(lhs, rhs);
-					swap(lt, rt);
-				}
-
-				if (lt == xpath_type_boolean)
-					return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
-				else if (lt == xpath_type_number)
-				{
-					xpath_allocator_capture cr(stack.result);
-
-					double l = lhs->eval_number(c, stack);
-					xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
-
-					for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
-					{
-						xpath_allocator_capture cri(stack.result);
-
-						if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
-							return true;
-					}
-
-					return false;
-				}
-				else if (lt == xpath_type_string)
-				{
-					xpath_allocator_capture cr(stack.result);
-
-					xpath_string l = lhs->eval_string(c, stack);
-					xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
-
-					for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
-					{
-						xpath_allocator_capture cri(stack.result);
-
-						if (comp(l, string_value(*ri, stack.result)))
-							return true;
-					}
-
-					return false;
-				}
-			}
-
-			assert(!"Wrong types");
-			return false;
-		}
-
-		template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
-		{
-			xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
-
-			if (lt != xpath_type_node_set && rt != xpath_type_node_set)
-				return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
-			else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
-			{
-				xpath_allocator_capture cr(stack.result);
-
-				xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
-				xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
-
-				for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
-				{
-					xpath_allocator_capture cri(stack.result);
-
-					double l = convert_string_to_number(string_value(*li, stack.result).c_str());
-
-					for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
-					{
-						xpath_allocator_capture crii(stack.result);
-
-						if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
-							return true;
-					}
-				}
-
-				return false;
-			}
-			else if (lt != xpath_type_node_set && rt == xpath_type_node_set)
-			{
-				xpath_allocator_capture cr(stack.result);
-
-				double l = lhs->eval_number(c, stack);
-				xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
-
-				for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
-				{
-					xpath_allocator_capture cri(stack.result);
-
-					if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
-						return true;
-				}
-
-				return false;
-			}
-			else if (lt == xpath_type_node_set && rt != xpath_type_node_set)
-			{
-				xpath_allocator_capture cr(stack.result);
-
-				xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
-				double r = rhs->eval_number(c, stack);
-
-				for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
-				{
-					xpath_allocator_capture cri(stack.result);
-
-					if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r))
-						return true;
-				}
-
-				return false;
-			}
-			else
-			{
-				assert(!"Wrong types");
-				return false;
-			}
-		}
-
-		void apply_predicate(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)
-		{
-			assert(ns.size() >= first);
-
-			size_t i = 1;
-			size_t size = ns.size() - first;
-				
-			xpath_node* last = ns.begin() + first;
-				
-			// remove_if... or well, sort of
-			for (xpath_node* it = last; it != ns.end(); ++it, ++i)
-			{
-				xpath_context c(*it, i, size);
-			
-				if (expr->rettype() == xpath_type_number)
-				{
-					if (expr->eval_number(c, stack) == i)
-						*last++ = *it;
-				}
-				else if (expr->eval_boolean(c, stack))
-					*last++ = *it;
-			}
-			
-			ns.truncate(last);
-		}
-
-		void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack)
-		{
-			if (ns.size() == first) return;
-			
-			for (xpath_ast_node* pred = _right; pred; pred = pred->_next)
-			{
-				apply_predicate(ns, first, pred->_left, stack);
-			}
-		}
-
-		void step_push(xpath_node_set_raw& ns, const xml_attribute& a, const xml_node& parent, xpath_allocator* alloc)
-		{
-			if (!a) return;
-
-			const char_t* name = a.name();
-
-			// There are no attribute nodes corresponding to attributes that declare namespaces
-			// That is, "xmlns:..." or "xmlns"
-			if (starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')) return;
-			
-			switch (_test)
-			{
-			case nodetest_name:
-				if (strequal(name, _data.nodetest)) ns.push_back(xpath_node(a, parent), alloc);
-				break;
-				
-			case nodetest_type_node:
-			case nodetest_all:
-				ns.push_back(xpath_node(a, parent), alloc);
-				break;
-				
-			case nodetest_all_in_namespace:
-				if (starts_with(name, _data.nodetest))
-					ns.push_back(xpath_node(a, parent), alloc);
-				break;
-			
-			default:
-				;
-			}
-		}
-		
-		void step_push(xpath_node_set_raw& ns, const xml_node& n, xpath_allocator* alloc)
-		{
-			if (!n) return;
-
-			switch (_test)
-			{
-			case nodetest_name:
-				if (n.type() == node_element && strequal(n.name(), _data.nodetest)) ns.push_back(n, alloc);
-				break;
-				
-			case nodetest_type_node:
-				ns.push_back(n, alloc);
-				break;
-				
-			case nodetest_type_comment:
-				if (n.type() == node_comment)
-					ns.push_back(n, alloc);
-				break;
-				
-			case nodetest_type_text:
-				if (n.type() == node_pcdata || n.type() == node_cdata)
-					ns.push_back(n, alloc);
-				break;
-				
-			case nodetest_type_pi:
-				if (n.type() == node_pi)
-					ns.push_back(n, alloc);
-				break;
-									
-			case nodetest_pi:
-				if (n.type() == node_pi && strequal(n.name(), _data.nodetest))
-					ns.push_back(n, alloc);
-				break;
-				
-			case nodetest_all:
-				if (n.type() == node_element)
-					ns.push_back(n, alloc);
-				break;
-				
-			case nodetest_all_in_namespace:
-				if (n.type() == node_element && starts_with(n.name(), _data.nodetest))
-					ns.push_back(n, alloc);
-				break;
-
-			default:
-				assert(!"Unknown axis");
-			} 
-		}
-
-		template <class T> void step_fill(xpath_node_set_raw& ns, const xml_node& n, xpath_allocator* alloc, T)
-		{
-			const axis_t axis = T::axis;
-
-			switch (axis)
-			{
-			case axis_attribute:
-			{
-				for (xml_attribute a = n.first_attribute(); a; a = a.next_attribute())
-					step_push(ns, a, n, alloc);
-				
-				break;
-			}
-			
-			case axis_child:
-			{
-				for (xml_node c = n.first_child(); c; c = c.next_sibling())
-					step_push(ns, c, alloc);
-					
-				break;
-			}
-			
-			case axis_descendant:
-			case axis_descendant_or_self:
-			{
-				if (axis == axis_descendant_or_self)
-					step_push(ns, n, alloc);
-					
-				xml_node cur = n.first_child();
-				
-				while (cur && cur != n)
-				{
-					step_push(ns, cur, alloc);
-					
-					if (cur.first_child())
-						cur = cur.first_child();
-					else if (cur.next_sibling())
-						cur = cur.next_sibling();
-					else
-					{
-						while (!cur.next_sibling() && cur != n)
-							cur = cur.parent();
-					
-						if (cur != n) cur = cur.next_sibling();
-					}
-				}
-				
-				break;
-			}
-			
-			case axis_following_sibling:
-			{
-				for (xml_node c = n.next_sibling(); c; c = c.next_sibling())
-					step_push(ns, c, alloc);
-				
-				break;
-			}
-			
-			case axis_preceding_sibling:
-			{
-				for (xml_node c = n.previous_sibling(); c; c = c.previous_sibling())
-					step_push(ns, c, alloc);
-				
-				break;
-			}
-			
-			case axis_following:
-			{
-				xml_node cur = n;
-
-				// exit from this node so that we don't include descendants
-				while (cur && !cur.next_sibling()) cur = cur.parent();
-				cur = cur.next_sibling();
-
-				for (;;)
-				{
-					step_push(ns, cur, alloc);
-
-					if (cur.first_child())
-						cur = cur.first_child();
-					else if (cur.next_sibling())
-						cur = cur.next_sibling();
-					else
-					{
-						while (cur && !cur.next_sibling()) cur = cur.parent();
-						cur = cur.next_sibling();
-
-						if (!cur) break;
-					}
-				}
-
-				break;
-			}
-
-			case axis_preceding:
-			{
-				xml_node cur = n;
-
-				while (cur && !cur.previous_sibling()) cur = cur.parent();
-				cur = cur.previous_sibling();
-
-				for (;;)
-				{
-					if (cur.last_child())
-						cur = cur.last_child();
-					else
-					{
-						// leaf node, can't be ancestor
-						step_push(ns, cur, alloc);
-
-						if (cur.previous_sibling())
-							cur = cur.previous_sibling();
-						else
-						{
-							do 
-							{
-								cur = cur.parent();
-								if (!cur) break;
-
-								if (!node_is_ancestor(cur, n)) step_push(ns, cur, alloc);
-							}
-							while (!cur.previous_sibling());
-
-							cur = cur.previous_sibling();
-
-							if (!cur) break;
-						}
-					}
-				}
-
-				break;
-			}
-			
-			case axis_ancestor:
-			case axis_ancestor_or_self:
-			{
-				if (axis == axis_ancestor_or_self)
-					step_push(ns, n, alloc);
-
-				xml_node cur = n.parent();
-				
-				while (cur)
-				{
-					step_push(ns, cur, alloc);
-					
-					cur = cur.parent();
-				}
-				
-				break;
-			}
-
-			case axis_self:
-			{
-				step_push(ns, n, alloc);
-
-				break;
-			}
-
-			case axis_parent:
-			{
-				if (n.parent()) step_push(ns, n.parent(), alloc);
-
-				break;
-			}
-				
-			default:
-				assert(!"Unimplemented axis");
-			}
-		}
-		
-		template <class T> void step_fill(xpath_node_set_raw& ns, const xml_attribute& a, const xml_node& p, xpath_allocator* alloc, T v)
-		{
-			const axis_t axis = T::axis;
-
-			switch (axis)
-			{
-			case axis_ancestor:
-			case axis_ancestor_or_self:
-			{
-				if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
-					step_push(ns, a, p, alloc);
-
-				xml_node cur = p;
-				
-				while (cur)
-				{
-					step_push(ns, cur, alloc);
-					
-					cur = cur.parent();
-				}
-				
-				break;
-			}
-
-			case axis_descendant_or_self:
-			case axis_self:
-			{
-				if (_test == nodetest_type_node) // reject attributes based on principal node type test
-					step_push(ns, a, p, alloc);
-
-				break;
-			}
-
-			case axis_following:
-			{
-				xml_node cur = p;
-				
-				for (;;)
-				{
-					if (cur.first_child())
-						cur = cur.first_child();
-					else if (cur.next_sibling())
-						cur = cur.next_sibling();
-					else
-					{
-						while (cur && !cur.next_sibling()) cur = cur.parent();
-						cur = cur.next_sibling();
-						
-						if (!cur) break;
-					}
-
-					step_push(ns, cur, alloc);
-				}
-
-				break;
-			}
-
-			case axis_parent:
-			{
-				step_push(ns, p, alloc);
-
-				break;
-			}
-
-			case axis_preceding:
-			{
-				// preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
-				step_fill(ns, p, alloc, v);
-				break;
-			}
-			
-			default:
-				assert(!"Unimplemented axis");
-			}
-		}
-		
-		template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, T v)
-		{
-			const axis_t axis = T::axis;
-			bool attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
-
-			xpath_node_set_raw ns;
-			ns.set_type((axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling) ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted);
-
-			if (_left)
-			{
-				xpath_node_set_raw s = _left->eval_node_set(c, stack);
-
-				// self axis preserves the original order
-				if (axis == axis_self) ns.set_type(s.type());
-
-				for (const xpath_node* it = s.begin(); it != s.end(); ++it)
-				{
-					size_t size = ns.size();
-
-					// in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes
-					if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);
-					
-					if (it->node())
-						step_fill(ns, it->node(), stack.result, v);
-					else if (attributes)
-						step_fill(ns, it->attribute(), it->parent(), stack.result, v);
-						
-					apply_predicates(ns, size, stack);
-				}
-			}
-			else
-			{
-				if (c.n.node())
-					step_fill(ns, c.n.node(), stack.result, v);
-				else if (attributes)
-					step_fill(ns, c.n.attribute(), c.n.parent(), stack.result, v);
-				
-				apply_predicates(ns, 0, stack);
-			}
-
-			// child, attribute and self axes always generate unique set of nodes
-			// for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice
-			if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted)
-				ns.remove_duplicates();
-
-			return ns;
-		}
-		
-	public:
-		xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value):
-			_type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
-		{
-			assert(type == ast_string_constant);
-			_data.string = value;
-		}
-
-		xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value):
-			_type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
-		{
-			assert(type == ast_number_constant);
-			_data.number = value;
-		}
-		
-		xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value):
-			_type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
-		{
-			assert(type == ast_variable);
-			_data.variable = value;
-		}
-		
-		xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0):
-			_type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0)
-		{
-		}
-
-		xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents):
-			_type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0)
-		{
-			_data.nodetest = contents;
-		}
-
-		void set_next(xpath_ast_node* value)
-		{
-			_next = value;
-		}
-
-		void set_right(xpath_ast_node* value)
-		{
-			_right = value;
-		}
-
-		bool eval_boolean(const xpath_context& c, const xpath_stack& stack)
-		{
-			switch (_type)
-			{
-			case ast_op_or:
-				return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);
-				
-			case ast_op_and:
-				return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);
-				
-			case ast_op_equal:
-				return compare_eq(_left, _right, c, stack, equal_to());
-
-			case ast_op_not_equal:
-				return compare_eq(_left, _right, c, stack, not_equal_to());
-	
-			case ast_op_less:
-				return compare_rel(_left, _right, c, stack, less());
-			
-			case ast_op_greater:
-				return compare_rel(_right, _left, c, stack, less());
-
-			case ast_op_less_or_equal:
-				return compare_rel(_left, _right, c, stack, less_equal());
-			
-			case ast_op_greater_or_equal:
-				return compare_rel(_right, _left, c, stack, less_equal());
-
-			case ast_func_starts_with:
-			{
-				xpath_allocator_capture cr(stack.result);
-
-				xpath_string lr = _left->eval_string(c, stack);
-				xpath_string rr = _right->eval_string(c, stack);
-
-				return starts_with(lr.c_str(), rr.c_str());
-			}
-
-			case ast_func_contains:
-			{
-				xpath_allocator_capture cr(stack.result);
-
-				xpath_string lr = _left->eval_string(c, stack);
-				xpath_string rr = _right->eval_string(c, stack);
-
-				return find_substring(lr.c_str(), rr.c_str()) != 0;
-			}
-
-			case ast_func_boolean:
-				return _left->eval_boolean(c, stack);
-				
-			case ast_func_not:
-				return !_left->eval_boolean(c, stack);
-				
-			case ast_func_true:
-				return true;
-				
-			case ast_func_false:
-				return false;
-
-			case ast_func_lang:
-			{
-				if (c.n.attribute()) return false;
-				
-				xpath_allocator_capture cr(stack.result);
-
-				xpath_string lang = _left->eval_string(c, stack);
-				
-				for (xml_node n = c.n.node(); n; n = n.parent())
-				{
-					xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));
-					
-					if (a)
-					{
-						const char_t* value = a.value();
-						
-						// strnicmp / strncasecmp is not portable
-						for (const char_t* lit = lang.c_str(); *lit; ++lit)
-						{
-							if (tolower_ascii(*lit) != tolower_ascii(*value)) return false;
-							++value;
-						}
-						
-						return *value == 0 || *value == '-';
-					}
-				}
-				
-				return false;
-			}
-
-			case ast_variable:
-			{
-				assert(_rettype == _data.variable->type());
-
-				if (_rettype == xpath_type_boolean)
-					return _data.variable->get_boolean();
-
-				// fallthrough to type conversion
-			}
-
-			default:
-			{
-				switch (_rettype)
-				{
-				case xpath_type_number:
-					return convert_number_to_boolean(eval_number(c, stack));
-					
-				case xpath_type_string:
-				{
-					xpath_allocator_capture cr(stack.result);
-
-					return !eval_string(c, stack).empty();
-				}
-					
-				case xpath_type_node_set:				
-				{
-					xpath_allocator_capture cr(stack.result);
-
-					return !eval_node_set(c, stack).empty();
-				}
-
-				default:
-					assert(!"Wrong expression for return type boolean");
-					return false;
-				}
-			}
-			}
-		}
-
-		double eval_number(const xpath_context& c, const xpath_stack& stack)
-		{
-			switch (_type)
-			{
-			case ast_op_add:
-				return _left->eval_number(c, stack) + _right->eval_number(c, stack);
-				
-			case ast_op_subtract:
-				return _left->eval_number(c, stack) - _right->eval_number(c, stack);
-
-			case ast_op_multiply:
-				return _left->eval_number(c, stack) * _right->eval_number(c, stack);
-
-			case ast_op_divide:
-				return _left->eval_number(c, stack) / _right->eval_number(c, stack);
-
-			case ast_op_mod:
-				return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack));
-
-			case ast_op_negate:
-				return -_left->eval_number(c, stack);
-
-			case ast_number_constant:
-				return _data.number;
-
-			case ast_func_last:
-				return static_cast<double>(c.size);
-			
-			case ast_func_position:
-				return static_cast<double>(c.position);
-
-			case ast_func_count:
-			{
-				xpath_allocator_capture cr(stack.result);
-
-				return static_cast<double>(_left->eval_node_set(c, stack).size());
-			}
-			
-			case ast_func_string_length_0:
-			{
-				xpath_allocator_capture cr(stack.result);
-
-				return static_cast<double>(string_value(c.n, stack.result).length());
-			}
-			
-			case ast_func_string_length_1:
-			{
-				xpath_allocator_capture cr(stack.result);
-
-				return static_cast<double>(_left->eval_string(c, stack).length());
-			}
-			
-			case ast_func_number_0:
-			{
-				xpath_allocator_capture cr(stack.result);
-
-				return convert_string_to_number(string_value(c.n, stack.result).c_str());
-			}
-			
-			case ast_func_number_1:
-				return _left->eval_number(c, stack);
-
-			case ast_func_sum:
-			{
-				xpath_allocator_capture cr(stack.result);
-
-				double r = 0;
-				
-				xpath_node_set_raw ns = _left->eval_node_set(c, stack);
-				
-				for (const xpath_node* it = ns.begin(); it != ns.end(); ++it)
-				{
-					xpath_allocator_capture cri(stack.result);
-
-					r += convert_string_to_number(string_value(*it, stack.result).c_str());
-				}
-			
-				return r;
-			}
-
-			case ast_func_floor:
-			{
-				double r = _left->eval_number(c, stack);
-				
-				return r == r ? floor(r) : r;
-			}
-
-			case ast_func_ceiling:
-			{
-				double r = _left->eval_number(c, stack);
-				
-				return r == r ? ceil(r) : r;
-			}
-
-			case ast_func_round:
-				return round_nearest_nzero(_left->eval_number(c, stack));
-			
-			case ast_variable:
-			{
-				assert(_rettype == _data.variable->type());
-
-				if (_rettype == xpath_type_number)
-					return _data.variable->get_number();
-
-				// fallthrough to type conversion
-			}
-
-			default:
-			{
-				switch (_rettype)
-				{
-				case xpath_type_boolean:
-					return eval_boolean(c, stack) ? 1 : 0;
-					
-				case xpath_type_string:
-				{
-					xpath_allocator_capture cr(stack.result);
-
-					return convert_string_to_number(eval_string(c, stack).c_str());
-				}
-					
-				case xpath_type_node_set:
-				{
-					xpath_allocator_capture cr(stack.result);
-
-					return convert_string_to_number(eval_string(c, stack).c_str());
-				}
-					
-				default:
-					assert(!"Wrong expression for return type number");
-					return 0;
-				}
-				
-			}
-			}
-		}
-		
-		xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack)
-		{
-			assert(_type == ast_func_concat);
-
-			xpath_allocator_capture ct(stack.temp);
-
-			// count the string number
-			size_t count = 1;
-			for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++;
-
-			// gather all strings
-			xpath_string static_buffer[4];
-			xpath_string* buffer = static_buffer;
-
-			// allocate on-heap for large concats
-			if (count > sizeof(static_buffer) / sizeof(static_buffer[0]))
-			{
-				buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string)));
-				assert(buffer);
-			}
-
-			// evaluate all strings to temporary stack
-			xpath_stack swapped_stack = {stack.temp, stack.result};
-
-			buffer[0] = _left->eval_string(c, swapped_stack);
-
-			size_t pos = 1;
-			for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack);
-			assert(pos == count);
-
-			// get total length
-			size_t length = 0;
-			for (size_t i = 0; i < count; ++i) length += buffer[i].length();
-
-			// create final string
-			char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t)));
-			assert(result);
-
-			char_t* ri = result;
-
-			for (size_t j = 0; j < count; ++j)
-				for (const char_t* bi = buffer[j].c_str(); *bi; ++bi)
-					*ri++ = *bi;
-
-			*ri = 0;
-
-			return xpath_string(result, true);
-		}
-
-		xpath_string eval_string(const xpath_context& c, const xpath_stack& stack)
-		{
-			switch (_type)
-			{
-			case ast_string_constant:
-				return xpath_string_const(_data.string);
-			
-			case ast_func_local_name_0:
-			{
-				xpath_node na = c.n;
-				
-				return xpath_string_const(local_name(na));
-			}
-
-			case ast_func_local_name_1:
-			{
-				xpath_allocator_capture cr(stack.result);
-
-				xpath_node_set_raw ns = _left->eval_node_set(c, stack);
-				xpath_node na = ns.first();
-				
-				return xpath_string_const(local_name(na));
-			}
-
-			case ast_func_name_0:
-			{
-				xpath_node na = c.n;
-				
-				return xpath_string_const(qualified_name(na));
-			}
-
-			case ast_func_name_1:
-			{
-				xpath_allocator_capture cr(stack.result);
-
-				xpath_node_set_raw ns = _left->eval_node_set(c, stack);
-				xpath_node na = ns.first();
-				
-				return xpath_string_const(qualified_name(na));
-			}
-
-			case ast_func_namespace_uri_0:
-			{
-				xpath_node na = c.n;
-				
-				return xpath_string_const(namespace_uri(na));
-			}
-
-			case ast_func_namespace_uri_1:
-			{
-				xpath_allocator_capture cr(stack.result);
-
-				xpath_node_set_raw ns = _left->eval_node_set(c, stack);
-				xpath_node na = ns.first();
-				
-				return xpath_string_const(namespace_uri(na));
-			}
-
-			case ast_func_string_0:
-				return string_value(c.n, stack.result);
-
-			case ast_func_string_1:
-				return _left->eval_string(c, stack);
-
-			case ast_func_concat:
-				return eval_string_concat(c, stack);
-
-			case ast_func_substring_before:
-			{
-				xpath_allocator_capture cr(stack.temp);
-
-				xpath_stack swapped_stack = {stack.temp, stack.result};
-
-				xpath_string s = _left->eval_string(c, swapped_stack);
-				xpath_string p = _right->eval_string(c, swapped_stack);
-
-				const char_t* pos = find_substring(s.c_str(), p.c_str());
-				
-				return pos ? xpath_string(s.c_str(), pos, stack.result) : xpath_string();
-			}
-			
-			case ast_func_substring_after:
-			{
-				xpath_allocator_capture cr(stack.temp);
-
-				xpath_stack swapped_stack = {stack.temp, stack.result};
-
-				xpath_string s = _left->eval_string(c, swapped_stack);
-				xpath_string p = _right->eval_string(c, swapped_stack);
-				
-				const char_t* pos = find_substring(s.c_str(), p.c_str());
-				if (!pos) return xpath_string();
-
-				const char_t* result = pos + p.length();
-
-				return s.uses_heap() ? xpath_string(result, stack.result) : xpath_string_const(result);
-			}
-
-			case ast_func_substring_2:
-			{
-				xpath_allocator_capture cr(stack.temp);
-
-				xpath_stack swapped_stack = {stack.temp, stack.result};
-
-				xpath_string s = _left->eval_string(c, swapped_stack);
-				size_t s_length = s.length();
-
-				double first = round_nearest(_right->eval_number(c, stack));
-				
-				if (is_nan(first)) return xpath_string(); // NaN
-				else if (first >= s_length + 1) return xpath_string();
-				
-				size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
-				assert(1 <= pos && pos <= s_length + 1);
-
-				const char_t* rbegin = s.c_str() + (pos - 1);
-				
-				return s.uses_heap() ? xpath_string(rbegin, stack.result) : xpath_string_const(rbegin);
-			}
-			
-			case ast_func_substring_3:
-			{
-				xpath_allocator_capture cr(stack.temp);
-
-				xpath_stack swapped_stack = {stack.temp, stack.result};
-
-				xpath_string s = _left->eval_string(c, swapped_stack);
-				size_t s_length = s.length();
-
-				double first = round_nearest(_right->eval_number(c, stack));
-				double last = first + round_nearest(_right->_next->eval_number(c, stack));
-				
-				if (is_nan(first) || is_nan(last)) return xpath_string();
-				else if (first >= s_length + 1) return xpath_string();
-				else if (first >= last) return xpath_string();
-				else if (last < 1) return xpath_string();
-				
-				size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
-				size_t end = last >= s_length + 1 ? s_length + 1 : static_cast<size_t>(last);
-
-				assert(1 <= pos && pos <= end && end <= s_length + 1);
-				const char_t* rbegin = s.c_str() + (pos - 1);
-				const char_t* rend = s.c_str() + (end - 1);
-
-				return (end == s_length + 1 && !s.uses_heap()) ? xpath_string_const(rbegin) : xpath_string(rbegin, rend, stack.result);
-			}
-
-			case ast_func_normalize_space_0:
-			{
-				xpath_string s = string_value(c.n, stack.result);
-
-				normalize_space(s.data(stack.result));
-
-				return s;
-			}
-
-			case ast_func_normalize_space_1:
-			{
-				xpath_string s = _left->eval_string(c, stack);
-
-				normalize_space(s.data(stack.result));
-			
-				return s;
-			}
-
-			case ast_func_translate:
-			{
-				xpath_allocator_capture cr(stack.temp);
-
-				xpath_stack swapped_stack = {stack.temp, stack.result};
-
-				xpath_string s = _left->eval_string(c, stack);
-				xpath_string from = _right->eval_string(c, swapped_stack);
-				xpath_string to = _right->_next->eval_string(c, swapped_stack);
-
-				translate(s.data(stack.result), from.c_str(), to.c_str());
-
-				return s;
-			}
-
-			case ast_variable:
-			{
-				assert(_rettype == _data.variable->type());
-
-				if (_rettype == xpath_type_string)
-					return xpath_string_const(_data.variable->get_string());
-
-				// fallthrough to type conversion
-			}
-
-			default:
-			{
-				switch (_rettype)
-				{
-				case xpath_type_boolean:
-					return xpath_string_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
-					
-				case xpath_type_number:
-					return convert_number_to_string(eval_number(c, stack), stack.result);
-					
-				case xpath_type_node_set:
-				{
-					xpath_allocator_capture cr(stack.temp);
-
-					xpath_stack swapped_stack = {stack.temp, stack.result};
-
-					xpath_node_set_raw ns = eval_node_set(c, swapped_stack);
-					return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);
-				}
-				
-				default:
-					assert(!"Wrong expression for return type string");
-					return xpath_string();
-				}
-			}
-			}
-		}
-
-		xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack)
-		{
-			switch (_type)
-			{
-			case ast_op_union:
-			{
-				xpath_allocator_capture cr(stack.temp);
-
-				xpath_stack swapped_stack = {stack.temp, stack.result};
-
-				xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack);
-				xpath_node_set_raw rs = _right->eval_node_set(c, stack);
-				
-				// we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
-				rs.set_type(xpath_node_set::type_unsorted);
-
-				rs.append(ls.begin(), ls.end(), stack.result);
-				rs.remove_duplicates();
-				
-				return rs;
-			}
-
-			case ast_filter:
-			case ast_filter_posinv:
-			{
-				xpath_node_set_raw set = _left->eval_node_set(c, stack);
-
-				// either expression is a number or it contains position() call; sort by document order
-				if (_type == ast_filter) set.sort_do();
-
-				apply_predicate(set, 0, _right, stack);
-			
-				return set;
-			}
-			
-			case ast_func_id:
-				return xpath_node_set_raw();
-			
-			case ast_step:
-			{
-				switch (_axis)
-				{
-				case axis_ancestor:
-					return step_do(c, stack, axis_to_type<axis_ancestor>());
-					
-				case axis_ancestor_or_self:
-					return step_do(c, stack, axis_to_type<axis_ancestor_or_self>());
-
-				case axis_attribute:
-					return step_do(c, stack, axis_to_type<axis_attribute>());
-
-				case axis_child:
-					return step_do(c, stack, axis_to_type<axis_child>());
-				
-				case axis_descendant:
-					return step_do(c, stack, axis_to_type<axis_descendant>());
-
-				case axis_descendant_or_self:
-					return step_do(c, stack, axis_to_type<axis_descendant_or_self>());
-
-				case axis_following:
-					return step_do(c, stack, axis_to_type<axis_following>());
-				
-				case axis_following_sibling:
-					return step_do(c, stack, axis_to_type<axis_following_sibling>());
-				
-				case axis_namespace:
-					// namespaced axis is not supported
-					return xpath_node_set_raw();
-				
-				case axis_parent:
-					return step_do(c, stack, axis_to_type<axis_parent>());
-				
-				case axis_preceding:
-					return step_do(c, stack, axis_to_type<axis_preceding>());
-
-				case axis_preceding_sibling:
-					return step_do(c, stack, axis_to_type<axis_preceding_sibling>());
-				
-				case axis_self:
-					return step_do(c, stack, axis_to_type<axis_self>());
-
-				default:
-					assert(!"Unknown axis");
-					return xpath_node_set_raw();
-				}
-			}
-
-			case ast_step_root:
-			{
-				assert(!_right); // root step can't have any predicates
-
-				xpath_node_set_raw ns;
-
-				ns.set_type(xpath_node_set::type_sorted);
-
-				if (c.n.node()) ns.push_back(c.n.node().root(), stack.result);
-				else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result);
-
-				return ns;
-			}
-
-			case ast_variable:
-			{
-				assert(_rettype == _data.variable->type());
-
-				if (_rettype == xpath_type_node_set)
-				{
-					const xpath_node_set& s = _data.variable->get_node_set();
-
-					xpath_node_set_raw ns;
-
-					ns.set_type(s.type());
-					ns.append(s.begin(), s.end(), stack.result);
-
-					return ns;
-				}
-
-				// fallthrough to type conversion
-			}
-
-			default:
-				assert(!"Wrong expression for return type node set");
-				return xpath_node_set_raw();
-			}
-		}
-		
-		bool is_posinv()
-		{
-			switch (_type)
-			{
-			case ast_func_position:
-				return false;
-
-			case ast_string_constant:
-			case ast_number_constant:
-			case ast_variable:
-				return true;
-
-			case ast_step:
-			case ast_step_root:
-				return true;
-
-			case ast_predicate:
-			case ast_filter:
-			case ast_filter_posinv:
-				return true;
-
-			default:
-				if (_left && !_left->is_posinv()) return false;
-				
-				for (xpath_ast_node* n = _right; n; n = n->_next)
-					if (!n->is_posinv()) return false;
-					
-				return true;
-			}
-		}
-
-		xpath_value_type rettype() const
-		{
-			return static_cast<xpath_value_type>(_rettype);
-		}
-	};
-
-	struct xpath_parser
-	{
-		xpath_allocator* _alloc;
-		xpath_lexer _lexer;
-
-		const char_t* _query;
-		xpath_variable_set* _variables;
-
-		xpath_parse_result* _result;
-
-	#ifdef PUGIXML_NO_EXCEPTIONS
-		jmp_buf _error_handler;
-	#endif
-
-		void throw_error(const char* message)
-		{
-			_result->error = message;
-			_result->offset = _lexer.current_pos() - _query;
-
-		#ifdef PUGIXML_NO_EXCEPTIONS
-			longjmp(_error_handler, 1);
-		#else
-			throw xpath_exception(*_result);
-		#endif
-		}
-
-		void throw_error_oom()
-		{
-		#ifdef PUGIXML_NO_EXCEPTIONS
-			throw_error("Out of memory");
-		#else
-			throw std::bad_alloc();
-		#endif
-		}
-
-		void* alloc_node()
-		{
-			void* result = _alloc->allocate_nothrow(sizeof(xpath_ast_node));
-
-			if (!result) throw_error_oom();
-
-			return result;
-		}
-
-		const char_t* alloc_string(const xpath_lexer_string& value)
-		{
-			if (value.begin)
-			{
-				size_t length = static_cast<size_t>(value.end - value.begin);
-
-				char_t* c = static_cast<char_t*>(_alloc->allocate_nothrow((length + 1) * sizeof(char_t)));
-				if (!c) throw_error_oom();
-
-				memcpy(c, value.begin, length * sizeof(char_t));
-				c[length] = 0;
-
-				return c;
-			}
-			else return 0;
-		}
-
-		xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2])
-		{
-			assert(argc <= 1);
-
-			if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
-
-			return new (alloc_node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]);
-		}
-
-		xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2])
-		{
-			switch (name.begin[0])
-			{
-			case 'b':
-				if (name == PUGIXML_TEXT("boolean") && argc == 1)
-					return new (alloc_node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]);
-					
-				break;
-			
-			case 'c':
-				if (name == PUGIXML_TEXT("count") && argc == 1)
-				{
-					if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
-					return new (alloc_node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]);
-				}
-				else if (name == PUGIXML_TEXT("contains") && argc == 2)
-					return new (alloc_node()) xpath_ast_node(ast_func_contains, xpath_type_string, args[0], args[1]);
-				else if (name == PUGIXML_TEXT("concat") && argc >= 2)
-					return new (alloc_node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]);
-				else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
-					return new (alloc_node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]);
-					
-				break;
-			
-			case 'f':
-				if (name == PUGIXML_TEXT("false") && argc == 0)
-					return new (alloc_node()) xpath_ast_node(ast_func_false, xpath_type_boolean);
-				else if (name == PUGIXML_TEXT("floor") && argc == 1)
-					return new (alloc_node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]);
-					
-				break;
-			
-			case 'i':
-				if (name == PUGIXML_TEXT("id") && argc == 1)
-					return new (alloc_node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]);
-					
-				break;
-			
-			case 'l':
-				if (name == PUGIXML_TEXT("last") && argc == 0)
-					return new (alloc_node()) xpath_ast_node(ast_func_last, xpath_type_number);
-				else if (name == PUGIXML_TEXT("lang") && argc == 1)
-					return new (alloc_node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]);
-				else if (name == PUGIXML_TEXT("local-name") && argc <= 1)
-					return parse_function_helper(ast_func_local_name_0, ast_func_local_name_1, argc, args);
-			
-				break;
-			
-			case 'n':
-				if (name == PUGIXML_TEXT("name") && argc <= 1)
-					return parse_function_helper(ast_func_name_0, ast_func_name_1, argc, args);
-				else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)
-					return parse_function_helper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args);
-				else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)
-					return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]);
-				else if (name == PUGIXML_TEXT("not") && argc == 1)
-					return new (alloc_node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]);
-				else if (name == PUGIXML_TEXT("number") && argc <= 1)
-					return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]);
-			
-				break;
-			
-			case 'p':
-				if (name == PUGIXML_TEXT("position") && argc == 0)
-					return new (alloc_node()) xpath_ast_node(ast_func_position, xpath_type_number);
-				
-				break;
-			
-			case 'r':
-				if (name == PUGIXML_TEXT("round") && argc == 1)
-					return new (alloc_node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]);
-
-				break;
-			
-			case 's':
-				if (name == PUGIXML_TEXT("string") && argc <= 1)
-					return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]);
-				else if (name == PUGIXML_TEXT("string-length") && argc <= 1)
-					return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_string, args[0]);
-				else if (name == PUGIXML_TEXT("starts-with") && argc == 2)
-					return new (alloc_node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]);
-				else if (name == PUGIXML_TEXT("substring-before") && argc == 2)
-					return new (alloc_node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]);
-				else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
-					return new (alloc_node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);
-				else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
-					return new (alloc_node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);
-				else if (name == PUGIXML_TEXT("sum") && argc == 1)
-				{
-					if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
-					return new (alloc_node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]);
-				}
-
-				break;
-			
-			case 't':
-				if (name == PUGIXML_TEXT("translate") && argc == 3)
-					return new (alloc_node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]);
-				else if (name == PUGIXML_TEXT("true") && argc == 0)
-					return new (alloc_node()) xpath_ast_node(ast_func_true, xpath_type_boolean);
-					
-				break;
-
-			default:
-				break;
-			}
-
-			throw_error("Unrecognized function or wrong parameter count");
-
-			return 0;
-		}
-
-		axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified)
-		{
-			specified = true;
-
-			switch (name.begin[0])
-			{
-			case 'a':
-				if (name == PUGIXML_TEXT("ancestor"))
-					return axis_ancestor;
-				else if (name == PUGIXML_TEXT("ancestor-or-self"))
-					return axis_ancestor_or_self;
-				else if (name == PUGIXML_TEXT("attribute"))
-					return axis_attribute;
-				
-				break;
-			
-			case 'c':
-				if (name == PUGIXML_TEXT("child"))
-					return axis_child;
-				
-				break;
-			
-			case 'd':
-				if (name == PUGIXML_TEXT("descendant"))
-					return axis_descendant;
-				else if (name == PUGIXML_TEXT("descendant-or-self"))
-					return axis_descendant_or_self;
-				
-				break;
-			
-			case 'f':
-				if (name == PUGIXML_TEXT("following"))
-					return axis_following;
-				else if (name == PUGIXML_TEXT("following-sibling"))
-					return axis_following_sibling;
-				
-				break;
-			
-			case 'n':
-				if (name == PUGIXML_TEXT("namespace"))
-					return axis_namespace;
-				
-				break;
-			
-			case 'p':
-				if (name == PUGIXML_TEXT("parent"))
-					return axis_parent;
-				else if (name == PUGIXML_TEXT("preceding"))
-					return axis_preceding;
-				else if (name == PUGIXML_TEXT("preceding-sibling"))
-					return axis_preceding_sibling;
-				
-				break;
-			
-			case 's':
-				if (name == PUGIXML_TEXT("self"))
-					return axis_self;
-				
-				break;
-
-			default:
-				break;
-			}
-
-			specified = false;
-			return axis_child;
-		}
-
-		nodetest_t parse_node_test_type(const xpath_lexer_string& name)
-		{
-			switch (name.begin[0])
-			{
-			case 'c':
-				if (name == PUGIXML_TEXT("comment"))
-					return nodetest_type_comment;
-
-				break;
-
-			case 'n':
-				if (name == PUGIXML_TEXT("node"))
-					return nodetest_type_node;
-
-				break;
-
-			case 'p':
-				if (name == PUGIXML_TEXT("processing-instruction"))
-					return nodetest_type_pi;
-
-				break;
-
-			case 't':
-				if (name == PUGIXML_TEXT("text"))
-					return nodetest_type_text;
-
-				break;
-			
-			default:
-				break;
-			}
-
-			return nodetest_none;
-		}
-
-		// PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
-		xpath_ast_node* parse_primary_expression()
-		{
-			switch (_lexer.current())
-			{
-			case lex_var_ref:
-			{
-				xpath_lexer_string name = _lexer.contents();
-
-				if (!_variables)
-					throw_error("Unknown variable: variable set is not provided");
-
-				xpath_variable* var = get_variable(_variables, name.begin, name.end);
-
-				if (!var)
-					throw_error("Unknown variable: variable set does not contain the given name");
-
-				_lexer.next();
-
-				return new (alloc_node()) xpath_ast_node(ast_variable, var->type(), var);
-			}
-
-			case lex_open_brace:
-			{
-				_lexer.next();
-
-				xpath_ast_node* n = parse_expression();
-
-				if (_lexer.current() != lex_close_brace)
-					throw_error("Unmatched braces");
-
-				_lexer.next();
-
-				return n;
-			}
-
-			case lex_quoted_string:
-			{
-				const char_t* value = alloc_string(_lexer.contents());
-
-				xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_string_constant, xpath_type_string, value);
-				_lexer.next();
-
-				return n;
-			}
-
-			case lex_number:
-			{
-				double value = 0;
-
-				if (!convert_string_to_number(_lexer.contents().begin, _lexer.contents().end, &value))
-					throw_error_oom();
-
-				xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_number_constant, xpath_type_number, value);
-				_lexer.next();
-
-				return n;
-			}
-
-			case lex_string:
-			{
-				xpath_ast_node* args[2] = {0};
-				size_t argc = 0;
-				
-				xpath_lexer_string function = _lexer.contents();
-				_lexer.next();
-				
-				xpath_ast_node* last_arg = 0;
-				
-				if (_lexer.current() != lex_open_brace)
-					throw_error("Unrecognized function call");
-				_lexer.next();
-
-				if (_lexer.current() != lex_close_brace)
-					args[argc++] = parse_expression();
-
-				while (_lexer.current() != lex_close_brace)
-				{
-					if (_lexer.current() != lex_comma)
-						throw_error("No comma between function arguments");
-					_lexer.next();
-					
-					xpath_ast_node* n = parse_expression();
-					
-					if (argc < 2) args[argc] = n;
-					else last_arg->set_next(n);
-
-					argc++;
-					last_arg = n;
-				}
-				
-				_lexer.next();
-
-				return parse_function(function, argc, args);
-			}
-
-			default:
-				throw_error("Unrecognizable primary expression");
-
-				return 0;
-			}
-		}
-		
-		// FilterExpr ::= PrimaryExpr | FilterExpr Predicate
-		// Predicate ::= '[' PredicateExpr ']'
-		// PredicateExpr ::= Expr
-		xpath_ast_node* parse_filter_expression()
-		{
-			xpath_ast_node* n = parse_primary_expression();
-
-			while (_lexer.current() == lex_open_square_brace)
-			{
-				_lexer.next();
-
-				xpath_ast_node* expr = parse_expression();
-
-				if (n->rettype() != xpath_type_node_set) throw_error("Predicate has to be applied to node set");
-
-				bool posinv = expr->rettype() != xpath_type_number && expr->is_posinv();
-
-				n = new (alloc_node()) xpath_ast_node(posinv ? ast_filter_posinv : ast_filter, xpath_type_node_set, n, expr);
-
-				if (_lexer.current() != lex_close_square_brace)
-					throw_error("Unmatched square brace");
-			
-				_lexer.next();
-			}
-			
-			return n;
-		}
-		
-		// Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
-		// AxisSpecifier ::= AxisName '::' | '@'?
-		// NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
-		// NameTest ::= '*' | NCName ':' '*' | QName
-		// AbbreviatedStep ::= '.' | '..'
-		xpath_ast_node* parse_step(xpath_ast_node* set)
-		{
-			if (set && set->rettype() != xpath_type_node_set)
-				throw_error("Step has to be applied to node set");
-
-			bool axis_specified = false;
-			axis_t axis = axis_child; // implied child axis
-
-			if (_lexer.current() == lex_axis_attribute)
-			{
-				axis = axis_attribute;
-				axis_specified = true;
-				
-				_lexer.next();
-			}
-			else if (_lexer.current() == lex_dot)
-			{
-				_lexer.next();
-				
-				return new (alloc_node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, 0);
-			}
-			else if (_lexer.current() == lex_double_dot)
-			{
-				_lexer.next();
-				
-				return new (alloc_node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, 0);
-			}
-		
-			nodetest_t nt_type = nodetest_none;
-			xpath_lexer_string nt_name;
-			
-			if (_lexer.current() == lex_string)
-			{
-				// node name test
-				nt_name = _lexer.contents();
-				_lexer.next();
-
-				// was it an axis name?
-				if (_lexer.current() == lex_double_colon)
-				{
-					// parse axis name
-					if (axis_specified) throw_error("Two axis specifiers in one step");
-
-					axis = parse_axis_name(nt_name, axis_specified);
-
-					if (!axis_specified) throw_error("Unknown axis");
-
-					// read actual node test
-					_lexer.next();
-
-					if (_lexer.current() == lex_multiply)
-					{
-						nt_type = nodetest_all;
-						nt_name = xpath_lexer_string();
-						_lexer.next();
-					}
-					else if (_lexer.current() == lex_string)
-					{
-						nt_name = _lexer.contents();
-						_lexer.next();
-					}
-					else throw_error("Unrecognized node test");
-				}
-				
-				if (nt_type == nodetest_none)
-				{
-					// node type test or processing-instruction
-					if (_lexer.current() == lex_open_brace)
-					{
-						_lexer.next();
-						
-						if (_lexer.current() == lex_close_brace)
-						{
-							_lexer.next();
-
-							nt_type = parse_node_test_type(nt_name);
-
-							if (nt_type == nodetest_none) throw_error("Unrecognized node type");
-							
-							nt_name = xpath_lexer_string();
-						}
-						else if (nt_name == PUGIXML_TEXT("processing-instruction"))
-						{
-							if (_lexer.current() != lex_quoted_string)
-								throw_error("Only literals are allowed as arguments to processing-instruction()");
-						
-							nt_type = nodetest_pi;
-							nt_name = _lexer.contents();
-							_lexer.next();
-							
-							if (_lexer.current() != lex_close_brace)
-								throw_error("Unmatched brace near processing-instruction()");
-							_lexer.next();
-						}
-						else
-							throw_error("Unmatched brace near node type test");
-
-					}
-					// QName or NCName:*
-					else
-					{
-						if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:*
-						{
-							nt_name.end--; // erase *
-							
-							nt_type = nodetest_all_in_namespace;
-						}
-						else nt_type = nodetest_name;
-					}
-				}
-			}
-			else if (_lexer.current() == lex_multiply)
-			{
-				nt_type = nodetest_all;
-				_lexer.next();
-			}
-			else throw_error("Unrecognized node test");
-			
-			xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step, set, axis, nt_type, alloc_string(nt_name));
-			
-			xpath_ast_node* last = 0;
-			
-			while (_lexer.current() == lex_open_square_brace)
-			{
-				_lexer.next();
-				
-				xpath_ast_node* expr = parse_expression();
-
-				xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, xpath_type_node_set, expr);
-				
-				if (_lexer.current() != lex_close_square_brace)
-					throw_error("Unmatched square brace");
-				_lexer.next();
-				
-				if (last) last->set_next(pred);
-				else n->set_right(pred);
-				
-				last = pred;
-			}
-			
-			return n;
-		}
-		
-		// RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
-		xpath_ast_node* parse_relative_location_path(xpath_ast_node* set)
-		{
-			xpath_ast_node* n = parse_step(set);
-			
-			while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
-			{
-				lexeme_t l = _lexer.current();
-				_lexer.next();
-
-				if (l == lex_double_slash)
-					n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
-				
-				n = parse_step(n);
-			}
-			
-			return n;
-		}
-		
-		// LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
-		// AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
-		xpath_ast_node* parse_location_path()
-		{
-			if (_lexer.current() == lex_slash)
-			{
-				_lexer.next();
-				
-				xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
-
-				// relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path
-				lexeme_t l = _lexer.current();
-
-				if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
-					return parse_relative_location_path(n);
-				else
-					return n;
-			}
-			else if (_lexer.current() == lex_double_slash)
-			{
-				_lexer.next();
-				
-				xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
-				n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
-				
-				return parse_relative_location_path(n);
-			}
-
-			// else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1
-			return parse_relative_location_path(0);
-		}
-		
-		// PathExpr ::= LocationPath
-		//				| FilterExpr
-		//				| FilterExpr '/' RelativeLocationPath
-		//				| FilterExpr '//' RelativeLocationPath
-		xpath_ast_node* parse_path_expression()
-		{
-			// Clarification.
-			// PathExpr begins with either LocationPath or FilterExpr.
-			// FilterExpr begins with PrimaryExpr
-			// PrimaryExpr begins with '$' in case of it being a variable reference,
-			// '(' in case of it being an expression, string literal, number constant or
-			// function call.
-
-			if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace || 
-				_lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||
-				_lexer.current() == lex_string)
-			{
-				if (_lexer.current() == lex_string)
-				{
-					// This is either a function call, or not - if not, we shall proceed with location path
-					const char_t* state = _lexer.state();
-					
-					while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state;
-					
-					if (*state != '(') return parse_location_path();
-
-					// This looks like a function call; however this still can be a node-test. Check it.
-					if (parse_node_test_type(_lexer.contents()) != nodetest_none) return parse_location_path();
-				}
-				
-				xpath_ast_node* n = parse_filter_expression();
-
-				if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
-				{
-					lexeme_t l = _lexer.current();
-					_lexer.next();
-					
-					if (l == lex_double_slash)
-					{
-						if (n->rettype() != xpath_type_node_set) throw_error("Step has to be applied to node set");
-
-						n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
-					}
-	
-					// select from location path
-					return parse_relative_location_path(n);
-				}
-
-				return n;
-			}
-			else return parse_location_path();
-		}
-
-		// UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
-		xpath_ast_node* parse_union_expression()
-		{
-			xpath_ast_node* n = parse_path_expression();
-
-			while (_lexer.current() == lex_union)
-			{
-				_lexer.next();
-
-				xpath_ast_node* expr = parse_union_expression();
-
-				if (n->rettype() != xpath_type_node_set || expr->rettype() != xpath_type_node_set)
-					throw_error("Union operator has to be applied to node sets");
-
-				n = new (alloc_node()) xpath_ast_node(ast_op_union, xpath_type_node_set, n, expr);
-			}
-
-			return n;
-		}
-
-		// UnaryExpr ::= UnionExpr | '-' UnaryExpr
-		xpath_ast_node* parse_unary_expression()
-		{
-			if (_lexer.current() == lex_minus)
-			{
-				_lexer.next();
-
-				xpath_ast_node* expr = parse_unary_expression();
-
-				return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr);
-			}
-			else return parse_union_expression();
-		}
-		
-		// MultiplicativeExpr ::= UnaryExpr
-		//						  | MultiplicativeExpr '*' UnaryExpr
-		//						  | MultiplicativeExpr 'div' UnaryExpr
-		//						  | MultiplicativeExpr 'mod' UnaryExpr
-		xpath_ast_node* parse_multiplicative_expression()
-		{
-			xpath_ast_node* n = parse_unary_expression();
-
-			while (_lexer.current() == lex_multiply || (_lexer.current() == lex_string &&
-				   (_lexer.contents() == PUGIXML_TEXT("mod") || _lexer.contents() == PUGIXML_TEXT("div"))))
-			{
-				ast_type_t op = _lexer.current() == lex_multiply ? ast_op_multiply :
-					_lexer.contents().begin[0] == 'd' ? ast_op_divide : ast_op_mod;
-				_lexer.next();
-
-				xpath_ast_node* expr = parse_unary_expression();
-
-				n = new (alloc_node()) xpath_ast_node(op, xpath_type_number, n, expr);
-			}
-
-			return n;
-		}
-
-		// AdditiveExpr ::= MultiplicativeExpr
-		//					| AdditiveExpr '+' MultiplicativeExpr
-		//					| AdditiveExpr '-' MultiplicativeExpr
-		xpath_ast_node* parse_additive_expression()
-		{
-			xpath_ast_node* n = parse_multiplicative_expression();
-
-			while (_lexer.current() == lex_plus || _lexer.current() == lex_minus)
-			{
-				lexeme_t l = _lexer.current();
-
-				_lexer.next();
-
-				xpath_ast_node* expr = parse_multiplicative_expression();
-
-				n = new (alloc_node()) xpath_ast_node(l == lex_plus ? ast_op_add : ast_op_subtract, xpath_type_number, n, expr);
-			}
-
-			return n;
-		}
-
-		// RelationalExpr ::= AdditiveExpr
-		//					  | RelationalExpr '<' AdditiveExpr
-		//					  | RelationalExpr '>' AdditiveExpr
-		//					  | RelationalExpr '<=' AdditiveExpr
-		//					  | RelationalExpr '>=' AdditiveExpr
-		xpath_ast_node* parse_relational_expression()
-		{
-			xpath_ast_node* n = parse_additive_expression();
-
-			while (_lexer.current() == lex_less || _lexer.current() == lex_less_or_equal || 
-				   _lexer.current() == lex_greater || _lexer.current() == lex_greater_or_equal)
-			{
-				lexeme_t l = _lexer.current();
-				_lexer.next();
-
-				xpath_ast_node* expr = parse_additive_expression();
-
-				n = new (alloc_node()) xpath_ast_node(l == lex_less ? ast_op_less : l == lex_greater ? ast_op_greater :
-								l == lex_less_or_equal ? ast_op_less_or_equal : ast_op_greater_or_equal, xpath_type_boolean, n, expr);
-			}
-
-			return n;
-		}
-		
-		// EqualityExpr ::= RelationalExpr
-		//					| EqualityExpr '=' RelationalExpr
-		//					| EqualityExpr '!=' RelationalExpr
-		xpath_ast_node* parse_equality_expression()
-		{
-			xpath_ast_node* n = parse_relational_expression();
-
-			while (_lexer.current() == lex_equal || _lexer.current() == lex_not_equal)
-			{
-				lexeme_t l = _lexer.current();
-
-				_lexer.next();
-
-				xpath_ast_node* expr = parse_relational_expression();
-
-				n = new (alloc_node()) xpath_ast_node(l == lex_equal ? ast_op_equal : ast_op_not_equal, xpath_type_boolean, n, expr);
-			}
-
-			return n;
-		}
-		
-		// AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
-		xpath_ast_node* parse_and_expression()
-		{
-			xpath_ast_node* n = parse_equality_expression();
-
-			while (_lexer.current() == lex_string && _lexer.contents() == PUGIXML_TEXT("and"))
-			{
-				_lexer.next();
-
-				xpath_ast_node* expr = parse_equality_expression();
-
-				n = new (alloc_node()) xpath_ast_node(ast_op_and, xpath_type_boolean, n, expr);
-			}
-
-			return n;
-		}
-
-		// OrExpr ::= AndExpr | OrExpr 'or' AndExpr
-		xpath_ast_node* parse_or_expression()
-		{
-			xpath_ast_node* n = parse_and_expression();
-
-			while (_lexer.current() == lex_string && _lexer.contents() == PUGIXML_TEXT("or"))
-			{
-				_lexer.next();
-
-				xpath_ast_node* expr = parse_and_expression();
-
-				n = new (alloc_node()) xpath_ast_node(ast_op_or, xpath_type_boolean, n, expr);
-			}
-
-			return n;
-		}
-		
-		// Expr ::= OrExpr
-		xpath_ast_node* parse_expression()
-		{
-			return parse_or_expression();
-		}
-
-		xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result)
-		{
-		}
-
-		xpath_ast_node* parse()
-		{
-			xpath_ast_node* result = parse_expression();
-			
-			if (_lexer.current() != lex_eof)
-			{
-				// there are still unparsed tokens left, error
-				throw_error("Incorrect query");
-			}
-			
-			return result;
-		}
-
-		static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result)
-		{
-			xpath_parser parser(query, variables, alloc, result);
-
-		#ifdef PUGIXML_NO_EXCEPTIONS
-			int error = setjmp(parser._error_handler);
-
-			return (error == 0) ? parser.parse() : 0;
-		#else
-			return parser.parse();
-		#endif
-		}
-	};
-
-	struct xpath_query_impl
-	{
-		static xpath_query_impl* create()
-		{
-			void* memory = xml_memory::allocate(sizeof(xpath_query_impl));
-
-			return new (memory) xpath_query_impl();
-		}
-
-		static void destroy(void* ptr)
-		{
-			if (!ptr) return;
-			
-			// free all allocated pages
-			static_cast<xpath_query_impl*>(ptr)->alloc.release();
-
-			// free allocator memory (with the first page)
-			xml_memory::deallocate(ptr);
-		}
-
-		xpath_query_impl(): root(0), alloc(&block)
-		{
-			block.next = 0;
-		}
-
-		xpath_ast_node* root;
-		xpath_allocator alloc;
-		xpath_memory_block block;
-	};
-
-	PUGI__FN xpath_string evaluate_string_impl(xpath_query_impl* impl, const xpath_node& n, xpath_stack_data& sd)
-	{
-		if (!impl) return xpath_string();
-
-	#ifdef PUGIXML_NO_EXCEPTIONS
-		if (setjmp(sd.error_handler)) return xpath_string();
-	#endif
-
-		xpath_context c(n, 1, 1);
-
-		return impl->root->eval_string(c, sd.stack);
-	}
-PUGI__NS_END
-
-namespace pugi
-{
-#ifndef PUGIXML_NO_EXCEPTIONS
-	PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_)
-	{
-		assert(_result.error);
-	}
-	
-	PUGI__FN const char* xpath_exception::what() const throw()
-	{
-		return _result.error;
-	}
-
-	PUGI__FN const xpath_parse_result& xpath_exception::result() const
-	{
-		return _result;
-	}
-#endif
-	
-	PUGI__FN xpath_node::xpath_node()
-	{
-	}
-		
-	PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_)
-	{
-	}
-		
-	PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_)
-	{
-	}
-
-	PUGI__FN xml_node xpath_node::node() const
-	{
-		return _attribute ? xml_node() : _node;
-	}
-		
-	PUGI__FN xml_attribute xpath_node::attribute() const
-	{
-		return _attribute;
-	}
-	
-	PUGI__FN xml_node xpath_node::parent() const
-	{
-		return _attribute ? _node : _node.parent();
-	}
-
-	PUGI__FN static void unspecified_bool_xpath_node(xpath_node***)
-	{
-	}
-
-	PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const
-	{
-		return (_node || _attribute) ? unspecified_bool_xpath_node : 0;
-	}
-	
-	PUGI__FN bool xpath_node::operator!() const
-	{
-		return !(_node || _attribute);
-	}
-
-	PUGI__FN bool xpath_node::operator==(const xpath_node& n) const
-	{
-		return _node == n._node && _attribute == n._attribute;
-	}
-	
-	PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const
-	{
-		return _node != n._node || _attribute != n._attribute;
-	}
-
-#ifdef __BORLANDC__
-	PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs)
-	{
-		return (bool)lhs && rhs;
-	}
-
-	PUGI__FN bool operator||(const xpath_node& lhs, bool rhs)
-	{
-		return (bool)lhs || rhs;
-	}
-#endif
-
-	PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_)
-	{
-		assert(begin_ <= end_);
-
-		size_t size_ = static_cast<size_t>(end_ - begin_);
-
-		if (size_ <= 1)
-		{
-			// deallocate old buffer
-			if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
-
-			// use internal buffer
-			if (begin_ != end_) _storage = *begin_;
-
-			_begin = &_storage;
-			_end = &_storage + size_;
-		}
-		else
-		{
-			// make heap copy
-			xpath_node* storage = static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node)));
-
-			if (!storage)
-			{
-			#ifdef PUGIXML_NO_EXCEPTIONS
-				return;
-			#else
-				throw std::bad_alloc();
-			#endif
-			}
-
-			memcpy(storage, begin_, size_ * sizeof(xpath_node));
-			
-			// deallocate old buffer
-			if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
-
-			// finalize
-			_begin = storage;
-			_end = storage + size_;
-		}
-	}
-
-	PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(&_storage), _end(&_storage)
-	{
-	}
-
-	PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_), _begin(&_storage), _end(&_storage)
-	{
-		_assign(begin_, end_);
-	}
-
-	PUGI__FN xpath_node_set::~xpath_node_set()
-	{
-		if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
-	}
-		
-	PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(ns._type), _begin(&_storage), _end(&_storage)
-	{
-		_assign(ns._begin, ns._end);
-	}
-	
-	PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns)
-	{
-		if (this == &ns) return *this;
-		
-		_type = ns._type;
-		_assign(ns._begin, ns._end);
-
-		return *this;
-	}
-
-	PUGI__FN xpath_node_set::type_t xpath_node_set::type() const
-	{
-		return _type;
-	}
-		
-	PUGI__FN size_t xpath_node_set::size() const
-	{
-		return _end - _begin;
-	}
-		
-	PUGI__FN bool xpath_node_set::empty() const
-	{
-		return _begin == _end;
-	}
-		
-	PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const
-	{
-		assert(index < size());
-		return _begin[index];
-	}
-
-	PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const
-	{
-		return _begin;
-	}
-		
-	PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const
-	{
-		return _end;
-	}
-	
-	PUGI__FN void xpath_node_set::sort(bool reverse)
-	{
-		_type = impl::xpath_sort(_begin, _end, _type, reverse);
-	}
-
-	PUGI__FN xpath_node xpath_node_set::first() const
-	{
-		return impl::xpath_first(_begin, _end, _type);
-	}
-
-	PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0)
-	{
-	}
-
-	PUGI__FN xpath_parse_result::operator bool() const
-	{
-		return error == 0;
-	}
-
-	PUGI__FN const char* xpath_parse_result::description() const
-	{
-		return error ? error : "No error";
-	}
-
-	PUGI__FN xpath_variable::xpath_variable()
-	{
-	}
-
-	PUGI__FN const char_t* xpath_variable::name() const
-	{
-		switch (_type)
-		{
-		case xpath_type_node_set:
-			return static_cast<const impl::xpath_variable_node_set*>(this)->name;
-
-		case xpath_type_number:
-			return static_cast<const impl::xpath_variable_number*>(this)->name;
-
-		case xpath_type_string:
-			return static_cast<const impl::xpath_variable_string*>(this)->name;
-
-		case xpath_type_boolean:
-			return static_cast<const impl::xpath_variable_boolean*>(this)->name;
-
-		default:
-			assert(!"Invalid variable type");
-			return 0;
-		}
-	}
-
-	PUGI__FN xpath_value_type xpath_variable::type() const
-	{
-		return _type;
-	}
-
-	PUGI__FN bool xpath_variable::get_boolean() const
-	{
-		return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false;
-	}
-
-	PUGI__FN double xpath_variable::get_number() const
-	{
-		return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan();
-	}
-
-	PUGI__FN const char_t* xpath_variable::get_string() const
-	{
-		const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0;
-		return value ? value : PUGIXML_TEXT("");
-	}
-
-	PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const
-	{
-		return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set;
-	}
-
-	PUGI__FN bool xpath_variable::set(bool value)
-	{
-		if (_type != xpath_type_boolean) return false;
-
-		static_cast<impl::xpath_variable_boolean*>(this)->value = value;
-		return true;
-	}
-
-	PUGI__FN bool xpath_variable::set(double value)
-	{
-		if (_type != xpath_type_number) return false;
-
-		static_cast<impl::xpath_variable_number*>(this)->value = value;
-		return true;
-	}
-
-	PUGI__FN bool xpath_variable::set(const char_t* value)
-	{
-		if (_type != xpath_type_string) return false;
-
-		impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this);
-
-		// duplicate string
-		size_t size = (impl::strlength(value) + 1) * sizeof(char_t);
-
-		char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size));
-		if (!copy) return false;
-
-		memcpy(copy, value, size);
-
-		// replace old string
-		if (var->value) impl::xml_memory::deallocate(var->value);
-		var->value = copy;
-
-		return true;
-	}
-
-	PUGI__FN bool xpath_variable::set(const xpath_node_set& value)
-	{
-		if (_type != xpath_type_node_set) return false;
-
-		static_cast<impl::xpath_variable_node_set*>(this)->value = value;
-		return true;
-	}
-
-	PUGI__FN xpath_variable_set::xpath_variable_set()
-	{
-		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) _data[i] = 0;
-	}
-
-	PUGI__FN xpath_variable_set::~xpath_variable_set()
-	{
-		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
-		{
-			xpath_variable* var = _data[i];
-
-			while (var)
-			{
-				xpath_variable* next = var->_next;
-
-				impl::delete_xpath_variable(var->_type, var);
-
-				var = next;
-			}
-		}
-	}
-
-	PUGI__FN xpath_variable* xpath_variable_set::find(const char_t* name) const
-	{
-		const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
-		size_t hash = impl::hash_string(name) % hash_size;
-
-		// look for existing variable
-		for (xpath_variable* var = _data[hash]; var; var = var->_next)
-			if (impl::strequal(var->name(), name))
-				return var;
-
-		return 0;
-	}
-
-	PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type)
-	{
-		const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
-		size_t hash = impl::hash_string(name) % hash_size;
-
-		// look for existing variable
-		for (xpath_variable* var = _data[hash]; var; var = var->_next)
-			if (impl::strequal(var->name(), name))
-				return var->type() == type ? var : 0;
-
-		// add new variable
-		xpath_variable* result = impl::new_xpath_variable(type, name);
-
-		if (result)
-		{
-			result->_type = type;
-			result->_next = _data[hash];
-
-			_data[hash] = result;
-		}
-
-		return result;
-	}
-
-	PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value)
-	{
-		xpath_variable* var = add(name, xpath_type_boolean);
-		return var ? var->set(value) : false;
-	}
-
-	PUGI__FN bool xpath_variable_set::set(const char_t* name, double value)
-	{
-		xpath_variable* var = add(name, xpath_type_number);
-		return var ? var->set(value) : false;
-	}
-
-	PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value)
-	{
-		xpath_variable* var = add(name, xpath_type_string);
-		return var ? var->set(value) : false;
-	}
-
-	PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value)
-	{
-		xpath_variable* var = add(name, xpath_type_node_set);
-		return var ? var->set(value) : false;
-	}
-
-	PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name)
-	{
-		return find(name);
-	}
-
-	PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const
-	{
-		return find(name);
-	}
-
-	PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0)
-	{
-		impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create();
-
-		if (!qimpl)
-		{
-		#ifdef PUGIXML_NO_EXCEPTIONS
-			_result.error = "Out of memory";
-		#else
-			throw std::bad_alloc();
-		#endif
-		}
-		else
-		{
-			impl::buffer_holder impl_holder(qimpl, impl::xpath_query_impl::destroy);
-
-			qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result);
-
-			if (qimpl->root)
-			{
-				_impl = static_cast<impl::xpath_query_impl*>(impl_holder.release());
-				_result.error = 0;
-			}
-		}
-	}
-
-	PUGI__FN xpath_query::~xpath_query()
-	{
-		impl::xpath_query_impl::destroy(_impl);
-	}
-
-	PUGI__FN xpath_value_type xpath_query::return_type() const
-	{
-		if (!_impl) return xpath_type_none;
-
-		return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype();
-	}
-
-	PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const
-	{
-		if (!_impl) return false;
-		
-		impl::xpath_context c(n, 1, 1);
-		impl::xpath_stack_data sd;
-
-	#ifdef PUGIXML_NO_EXCEPTIONS
-		if (setjmp(sd.error_handler)) return false;
-	#endif
-		
-		return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack);
-	}
-	
-	PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const
-	{
-		if (!_impl) return impl::gen_nan();
-		
-		impl::xpath_context c(n, 1, 1);
-		impl::xpath_stack_data sd;
-
-	#ifdef PUGIXML_NO_EXCEPTIONS
-		if (setjmp(sd.error_handler)) return impl::gen_nan();
-	#endif
-
-		return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack);
-	}
-
-#ifndef PUGIXML_NO_STL
-	PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const
-	{
-		impl::xpath_stack_data sd;
-
-		return impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd).c_str();
-	}
-#endif
-
-	PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const
-	{
-		impl::xpath_stack_data sd;
-
-		impl::xpath_string r = impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd);
-
-		size_t full_size = r.length() + 1;
-		
-		if (capacity > 0)
-		{
-			size_t size = (full_size < capacity) ? full_size : capacity;
-			assert(size > 0);
-
-			memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t));
-			buffer[size - 1] = 0;
-		}
-		
-		return full_size;
-	}
-
-	PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const
-	{
-		if (!_impl) return xpath_node_set();
-
-		impl::xpath_ast_node* root = static_cast<impl::xpath_query_impl*>(_impl)->root;
-
-		if (root->rettype() != xpath_type_node_set)
-		{
-		#ifdef PUGIXML_NO_EXCEPTIONS
-			return xpath_node_set();
-		#else
-			xpath_parse_result res;
-			res.error = "Expression does not evaluate to node set";
-
-			throw xpath_exception(res);
-		#endif
-		}
-		
-		impl::xpath_context c(n, 1, 1);
-		impl::xpath_stack_data sd;
-
-	#ifdef PUGIXML_NO_EXCEPTIONS
-		if (setjmp(sd.error_handler)) return xpath_node_set();
-	#endif
-
-		impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack);
-
-		return xpath_node_set(r.begin(), r.end(), r.type());
-	}
-
-	PUGI__FN const xpath_parse_result& xpath_query::result() const
-	{
-		return _result;
-	}
-
-	PUGI__FN static void unspecified_bool_xpath_query(xpath_query***)
-	{
-	}
-
-	PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const
-	{
-		return _impl ? unspecified_bool_xpath_query : 0;
-	}
-
-	PUGI__FN bool xpath_query::operator!() const
-	{
-		return !_impl;
-	}
-
-	PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const
-	{
-		xpath_query q(query, variables);
-		return select_single_node(q);
-	}
-
-	PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const
-	{
-		xpath_node_set s = query.evaluate_node_set(*this);
-		return s.empty() ? xpath_node() : s.first();
-	}
-
-	PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const
-	{
-		xpath_query q(query, variables);
-		return select_nodes(q);
-	}
-
-	PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const
-	{
-		return query.evaluate_node_set(*this);
-	}
-}
-
-#endif
-
-#ifdef __BORLANDC__
-#	pragma option pop
-#endif
-
-// Intel C++ does not properly keep warning state for function templates,
-// so popping warning state at the end of translation unit leads to warnings in the middle.
-#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-#	pragma warning(pop)
-#endif
-
-// Undefine all local macros (makes sure we're not leaking macros in header-only mode)
-#undef PUGI__NO_INLINE
-#undef PUGI__STATIC_ASSERT
-#undef PUGI__DMC_VOLATILE
-#undef PUGI__MSVC_CRT_VERSION
-#undef PUGI__NS_BEGIN
-#undef PUGI__NS_END
-#undef PUGI__FN
-#undef PUGI__FN_NO_INLINE
-#undef PUGI__IS_CHARTYPE_IMPL
-#undef PUGI__IS_CHARTYPE
-#undef PUGI__IS_CHARTYPEX
-#undef PUGI__SKIPWS
-#undef PUGI__OPTSET
-#undef PUGI__PUSHNODE
-#undef PUGI__POPNODE
-#undef PUGI__SCANFOR
-#undef PUGI__SCANWHILE
-#undef PUGI__ENDSEG
-#undef PUGI__THROW_ERROR
-#undef PUGI__CHECK_ERROR
-
-#endif
-
-/**
- * Copyright (c) 2006-2012 Arseny Kapoulkine
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
diff --git a/gadgets/octave/pugixml.hpp b/gadgets/octave/pugixml.hpp
deleted file mode 100644
index 77b4dcf..0000000
--- a/gadgets/octave/pugixml.hpp
+++ /dev/null
@@ -1,1265 +0,0 @@
-/**
- * pugixml parser - version 1.2
- * --------------------------------------------------------
- * Copyright (C) 2006-2012, by Arseny Kapoulkine (arseny.kapoulkine at gmail.com)
- * Report bugs and download new versions at http://pugixml.org/
- *
- * This library is distributed under the MIT License. See notice at the end
- * of this file.
- *
- * This work is based on the pugxml parser, which is:
- * Copyright (C) 2003, by Kristen Wegner (kristen at tima.net)
- */
-
-#ifndef PUGIXML_VERSION
-// Define version macro; evaluates to major * 100 + minor so that it's safe to use in less-than comparisons
-#	define PUGIXML_VERSION 120
-#endif
-
-// Include user configuration file (this can define various configuration macros)
-#include "pugiconfig.hpp"
-
-#ifndef HEADER_PUGIXML_HPP
-#define HEADER_PUGIXML_HPP
-
-// Include stddef.h for size_t and ptrdiff_t
-#include <stddef.h>
-
-// Include exception header for XPath
-#if !defined(PUGIXML_NO_XPATH) && !defined(PUGIXML_NO_EXCEPTIONS)
-#	include <exception>
-#endif
-
-// Include STL headers
-#ifndef PUGIXML_NO_STL
-#	include <iterator>
-#	include <iosfwd>
-#	include <string>
-#endif
-
-// Macro for deprecated features
-#ifndef PUGIXML_DEPRECATED
-#	if defined(__GNUC__)
-#		define PUGIXML_DEPRECATED __attribute__((deprecated))
-#	elif defined(_MSC_VER) && _MSC_VER >= 1300
-#		define PUGIXML_DEPRECATED __declspec(deprecated)
-#	else
-#		define PUGIXML_DEPRECATED
-#	endif
-#endif
-
-// If no API is defined, assume default
-#ifndef PUGIXML_API
-#	define PUGIXML_API
-#endif
-
-// If no API for classes is defined, assume default
-#ifndef PUGIXML_CLASS
-#	define PUGIXML_CLASS PUGIXML_API
-#endif
-
-// If no API for functions is defined, assume default
-#ifndef PUGIXML_FUNCTION
-#	define PUGIXML_FUNCTION PUGIXML_API
-#endif
-
-// Character interface macros
-#ifdef PUGIXML_WCHAR_MODE
-#	define PUGIXML_TEXT(t) L ## t
-#	define PUGIXML_CHAR wchar_t
-#else
-#	define PUGIXML_TEXT(t) t
-#	define PUGIXML_CHAR char
-#endif
-
-namespace pugi
-{
-	// Character type used for all internal storage and operations; depends on PUGIXML_WCHAR_MODE
-	typedef PUGIXML_CHAR char_t;
-
-#ifndef PUGIXML_NO_STL
-	// String type used for operations that work with STL string; depends on PUGIXML_WCHAR_MODE
-	typedef std::basic_string<PUGIXML_CHAR, std::char_traits<PUGIXML_CHAR>, std::allocator<PUGIXML_CHAR> > string_t;
-#endif
-}
-
-// The PugiXML namespace
-namespace pugi
-{
-	// Tree node types
-	enum xml_node_type
-	{
-		node_null,			// Empty (null) node handle
-		node_document,		// A document tree's absolute root
-		node_element,		// Element tag, i.e. '<node/>'
-		node_pcdata,		// Plain character data, i.e. 'text'
-		node_cdata,			// Character data, i.e. '<![CDATA[text]]>'
-		node_comment,		// Comment tag, i.e. '<!-- text -->'
-		node_pi,			// Processing instruction, i.e. '<?name?>'
-		node_declaration,	// Document declaration, i.e. '<?xml version="1.0"?>'
-		node_doctype		// Document type declaration, i.e. '<!DOCTYPE doc>'
-	};
-
-	// Parsing options
-
-	// Minimal parsing mode (equivalent to turning all other flags off).
-	// Only elements and PCDATA sections are added to the DOM tree, no text conversions are performed.
-	const unsigned int parse_minimal = 0x0000;
-
-	// This flag determines if processing instructions (node_pi) are added to the DOM tree. This flag is off by default.
-	const unsigned int parse_pi = 0x0001;
-
-	// This flag determines if comments (node_comment) are added to the DOM tree. This flag is off by default.
-	const unsigned int parse_comments = 0x0002;
-
-	// This flag determines if CDATA sections (node_cdata) are added to the DOM tree. This flag is on by default.
-	const unsigned int parse_cdata = 0x0004;
-
-	// This flag determines if plain character data (node_pcdata) that consist only of whitespace are added to the DOM tree.
-	// This flag is off by default; turning it on usually results in slower parsing and more memory consumption.
-	const unsigned int parse_ws_pcdata = 0x0008;
-
-	// This flag determines if character and entity references are expanded during parsing. This flag is on by default.
-	const unsigned int parse_escapes = 0x0010;
-
-	// This flag determines if EOL characters are normalized (converted to #xA) during parsing. This flag is on by default.
-	const unsigned int parse_eol = 0x0020;
-	
-	// This flag determines if attribute values are normalized using CDATA normalization rules during parsing. This flag is on by default.
-	const unsigned int parse_wconv_attribute = 0x0040;
-
-	// This flag determines if attribute values are normalized using NMTOKENS normalization rules during parsing. This flag is off by default.
-	const unsigned int parse_wnorm_attribute = 0x0080;
-	
-	// This flag determines if document declaration (node_declaration) is added to the DOM tree. This flag is off by default.
-	const unsigned int parse_declaration = 0x0100;
-
-	// This flag determines if document type declaration (node_doctype) is added to the DOM tree. This flag is off by default.
-	const unsigned int parse_doctype = 0x0200;
-
-	// This flag determines if plain character data (node_pcdata) that is the only child of the parent node and that consists only
-	// of whitespace is added to the DOM tree.
-	// This flag is off by default; turning it on may result in slower parsing and more memory consumption.
-	const unsigned int parse_ws_pcdata_single = 0x0400;
-
-	// The default parsing mode.
-	// Elements, PCDATA and CDATA sections are added to the DOM tree, character/reference entities are expanded,
-	// End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules.
-	const unsigned int parse_default = parse_cdata | parse_escapes | parse_wconv_attribute | parse_eol;
-
-	// The full parsing mode.
-	// Nodes of all types are added to the DOM tree, character/reference entities are expanded,
-	// End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules.
-	const unsigned int parse_full = parse_default | parse_pi | parse_comments | parse_declaration | parse_doctype;
-
-	// These flags determine the encoding of input data for XML document
-	enum xml_encoding
-	{
-		encoding_auto,		// Auto-detect input encoding using BOM or < / <? detection; use UTF8 if BOM is not found
-		encoding_utf8,		// UTF8 encoding
-		encoding_utf16_le,	// Little-endian UTF16
-		encoding_utf16_be,	// Big-endian UTF16
-		encoding_utf16,		// UTF16 with native endianness
-		encoding_utf32_le,	// Little-endian UTF32
-		encoding_utf32_be,	// Big-endian UTF32
-		encoding_utf32,		// UTF32 with native endianness
-		encoding_wchar,		// The same encoding wchar_t has (either UTF16 or UTF32)
-		encoding_latin1
-	};
-
-	// Formatting flags
-	
-	// Indent the nodes that are written to output stream with as many indentation strings as deep the node is in DOM tree. This flag is on by default.
-	const unsigned int format_indent = 0x01;
-	
-	// Write encoding-specific BOM to the output stream. This flag is off by default.
-	const unsigned int format_write_bom = 0x02;
-
-	// Use raw output mode (no indentation and no line breaks are written). This flag is off by default.
-	const unsigned int format_raw = 0x04;
-	
-	// Omit default XML declaration even if there is no declaration in the document. This flag is off by default.
-	const unsigned int format_no_declaration = 0x08;
-
-	// Don't escape attribute values and PCDATA contents. This flag is off by default.
-	const unsigned int format_no_escapes = 0x10;
-
-	// Open file using text mode in xml_document::save_file. This enables special character (i.e. new-line) conversions on some systems. This flag is off by default.
-	const unsigned int format_save_file_text = 0x20;
-
-	// The default set of formatting flags.
-	// Nodes are indented depending on their depth in DOM tree, a default declaration is output if document has none.
-	const unsigned int format_default = format_indent;
-		
-	// Forward declarations
-	struct xml_attribute_struct;
-	struct xml_node_struct;
-
-	class xml_node_iterator;
-	class xml_attribute_iterator;
-	class xml_named_node_iterator;
-
-	class xml_tree_walker;
-
-	class xml_node;
-
-	class xml_text;
-	
-	#ifndef PUGIXML_NO_XPATH
-	class xpath_node;
-	class xpath_node_set;
-	class xpath_query;
-	class xpath_variable_set;
-	#endif
-
-	// Range-based for loop support
-	template <typename It> class xml_object_range
-	{
-	public:
-		typedef It const_iterator;
-
-		xml_object_range(It b, It e): _begin(b), _end(e)
-		{
-		}
-
-		It begin() const { return _begin; }
-		It end() const { return _end; }
-
-	private:
-		It _begin, _end;
-	};
-
-	// Writer interface for node printing (see xml_node::print)
-	class PUGIXML_CLASS xml_writer
-	{
-	public:
-		virtual ~xml_writer() {}
-
-		// Write memory chunk into stream/file/whatever
-		virtual void write(const void* data, size_t size) = 0;
-	};
-
-	// xml_writer implementation for FILE*
-	class PUGIXML_CLASS xml_writer_file: public xml_writer
-	{
-	public:
-		// Construct writer from a FILE* object; void* is used to avoid header dependencies on stdio
-		xml_writer_file(void* file);
-
-		virtual void write(const void* data, size_t size);
-
-	private:
-		void* file;
-	};
-
-	#ifndef PUGIXML_NO_STL
-	// xml_writer implementation for streams
-	class PUGIXML_CLASS xml_writer_stream: public xml_writer
-	{
-	public:
-		// Construct writer from an output stream object
-		xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream);
-		xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream);
-
-		virtual void write(const void* data, size_t size);
-
-	private:
-		std::basic_ostream<char, std::char_traits<char> >* narrow_stream;
-		std::basic_ostream<wchar_t, std::char_traits<wchar_t> >* wide_stream;
-	};
-	#endif
-
-	// A light-weight handle for manipulating attributes in DOM tree
-	class PUGIXML_CLASS xml_attribute
-	{
-		friend class xml_attribute_iterator;
-		friend class xml_node;
-
-	private:
-		xml_attribute_struct* _attr;
-	
-		typedef void (*unspecified_bool_type)(xml_attribute***);
-
-	public:
-		// Default constructor. Constructs an empty attribute.
-		xml_attribute();
-		
-		// Constructs attribute from internal pointer
-		explicit xml_attribute(xml_attribute_struct* attr);
-
-		// Safe bool conversion operator
-		operator unspecified_bool_type() const;
-
-		// Borland C++ workaround
-		bool operator!() const;
-
-		// Comparison operators (compares wrapped attribute pointers)
-		bool operator==(const xml_attribute& r) const;
-		bool operator!=(const xml_attribute& r) const;
-		bool operator<(const xml_attribute& r) const;
-		bool operator>(const xml_attribute& r) const;
-		bool operator<=(const xml_attribute& r) const;
-		bool operator>=(const xml_attribute& r) const;
-
-		// Check if attribute is empty
-		bool empty() const;
-
-		// Get attribute name/value, or "" if attribute is empty
-		const char_t* name() const;
-		const char_t* value() const;
-
-		// Get attribute value, or the default value if attribute is empty
-		const char_t* as_string(const char_t* def = PUGIXML_TEXT("")) const;
-
-		// Get attribute value as a number, or the default value if conversion did not succeed or attribute is empty
-		int as_int(int def = 0) const;
-		unsigned int as_uint(unsigned int def = 0) const;
-		double as_double(double def = 0) const;
-		float as_float(float def = 0) const;
-
-		// Get attribute value as bool (returns true if first character is in '1tTyY' set), or the default value if attribute is empty
-		bool as_bool(bool def = false) const;
-
-		// Set attribute name/value (returns false if attribute is empty or there is not enough memory)
-		bool set_name(const char_t* rhs);
-		bool set_value(const char_t* rhs);
-
-		// Set attribute value with type conversion (numbers are converted to strings, boolean is converted to "true"/"false")
-		bool set_value(int rhs);
-		bool set_value(unsigned int rhs);
-		bool set_value(double rhs);
-		bool set_value(bool rhs);
-
-		// Set attribute value (equivalent to set_value without error checking)
-		xml_attribute& operator=(const char_t* rhs);
-		xml_attribute& operator=(int rhs);
-		xml_attribute& operator=(unsigned int rhs);
-		xml_attribute& operator=(double rhs);
-		xml_attribute& operator=(bool rhs);
-
-		// Get next/previous attribute in the attribute list of the parent node
-		xml_attribute next_attribute() const;
-		xml_attribute previous_attribute() const;
-
-		// Get hash value (unique for handles to the same object)
-		size_t hash_value() const;
-
-		// Get internal pointer
-		xml_attribute_struct* internal_object() const;
-	};
-
-#ifdef __BORLANDC__
-	// Borland C++ workaround
-	bool PUGIXML_FUNCTION operator&&(const xml_attribute& lhs, bool rhs);
-	bool PUGIXML_FUNCTION operator||(const xml_attribute& lhs, bool rhs);
-#endif
-
-	// A light-weight handle for manipulating nodes in DOM tree
-	class PUGIXML_CLASS xml_node
-	{
-		friend class xml_attribute_iterator;
-		friend class xml_node_iterator;
-		friend class xml_named_node_iterator;
-
-	protected:
-		xml_node_struct* _root;
-
-		typedef void (*unspecified_bool_type)(xml_node***);
-
-	public:
-		// Default constructor. Constructs an empty node.
-		xml_node();
-
-		// Constructs node from internal pointer
-		explicit xml_node(xml_node_struct* p);
-
-		// Safe bool conversion operator
-		operator unspecified_bool_type() const;
-
-		// Borland C++ workaround
-		bool operator!() const;
-	
-		// Comparison operators (compares wrapped node pointers)
-		bool operator==(const xml_node& r) const;
-		bool operator!=(const xml_node& r) const;
-		bool operator<(const xml_node& r) const;
-		bool operator>(const xml_node& r) const;
-		bool operator<=(const xml_node& r) const;
-		bool operator>=(const xml_node& r) const;
-
-		// Check if node is empty.
-		bool empty() const;
-
-		// Get node type
-		xml_node_type type() const;
-
-		// Get node name/value, or "" if node is empty or it has no name/value
-		const char_t* name() const;
-		const char_t* value() const;
-	
-		// Get attribute list
-		xml_attribute first_attribute() const;
-		xml_attribute last_attribute() const;
-
-		// Get children list
-		xml_node first_child() const;
-		xml_node last_child() const;
-
-		// Get next/previous sibling in the children list of the parent node
-		xml_node next_sibling() const;
-		xml_node previous_sibling() const;
-		
-		// Get parent node
-		xml_node parent() const;
-
-		// Get root of DOM tree this node belongs to
-		xml_node root() const;
-
-		// Get text object for the current node
-		xml_text text() const;
-
-		// Get child, attribute or next/previous sibling with the specified name
-		xml_node child(const char_t* name) const;
-		xml_attribute attribute(const char_t* name) const;
-		xml_node next_sibling(const char_t* name) const;
-		xml_node previous_sibling(const char_t* name) const;
-
-		// Get child value of current node; that is, value of the first child node of type PCDATA/CDATA
-		const char_t* child_value() const;
-
-		// Get child value of child with specified name. Equivalent to child(name).child_value().
-		const char_t* child_value(const char_t* name) const;
-
-		// Set node name/value (returns false if node is empty, there is not enough memory, or node can not have name/value)
-		bool set_name(const char_t* rhs);
-		bool set_value(const char_t* rhs);
-		
-		// Add attribute with specified name. Returns added attribute, or empty attribute on errors.
-		xml_attribute append_attribute(const char_t* name);
-		xml_attribute prepend_attribute(const char_t* name);
-		xml_attribute insert_attribute_after(const char_t* name, const xml_attribute& attr);
-		xml_attribute insert_attribute_before(const char_t* name, const xml_attribute& attr);
-
-		// Add a copy of the specified attribute. Returns added attribute, or empty attribute on errors.
-		xml_attribute append_copy(const xml_attribute& proto);
-		xml_attribute prepend_copy(const xml_attribute& proto);
-		xml_attribute insert_copy_after(const xml_attribute& proto, const xml_attribute& attr);
-		xml_attribute insert_copy_before(const xml_attribute& proto, const xml_attribute& attr);
-
-		// Add child node with specified type. Returns added node, or empty node on errors.
-		xml_node append_child(xml_node_type type = node_element);
-		xml_node prepend_child(xml_node_type type = node_element);
-		xml_node insert_child_after(xml_node_type type, const xml_node& node);
-		xml_node insert_child_before(xml_node_type type, const xml_node& node);
-
-		// Add child element with specified name. Returns added node, or empty node on errors.
-		xml_node append_child(const char_t* name);
-		xml_node prepend_child(const char_t* name);
-		xml_node insert_child_after(const char_t* name, const xml_node& node);
-		xml_node insert_child_before(const char_t* name, const xml_node& node);
-
-		// Add a copy of the specified node as a child. Returns added node, or empty node on errors.
-		xml_node append_copy(const xml_node& proto);
-		xml_node prepend_copy(const xml_node& proto);
-		xml_node insert_copy_after(const xml_node& proto, const xml_node& node);
-		xml_node insert_copy_before(const xml_node& proto, const xml_node& node);
-
-		// Remove specified attribute
-		bool remove_attribute(const xml_attribute& a);
-		bool remove_attribute(const char_t* name);
-
-		// Remove specified child
-		bool remove_child(const xml_node& n);
-		bool remove_child(const char_t* name);
-
-		// Find attribute using predicate. Returns first attribute for which predicate returned true.
-		template <typename Predicate> xml_attribute find_attribute(Predicate pred) const
-		{
-			if (!_root) return xml_attribute();
-			
-			for (xml_attribute attrib = first_attribute(); attrib; attrib = attrib.next_attribute())
-				if (pred(attrib))
-					return attrib;
-		
-			return xml_attribute();
-		}
-
-		// Find child node using predicate. Returns first child for which predicate returned true.
-		template <typename Predicate> xml_node find_child(Predicate pred) const
-		{
-			if (!_root) return xml_node();
-	
-			for (xml_node node = first_child(); node; node = node.next_sibling())
-				if (pred(node))
-					return node;
-		
-			return xml_node();
-		}
-
-		// Find node from subtree using predicate. Returns first node from subtree (depth-first), for which predicate returned true.
-		template <typename Predicate> xml_node find_node(Predicate pred) const
-		{
-			if (!_root) return xml_node();
-
-			xml_node cur = first_child();
-			
-			while (cur._root && cur._root != _root)
-			{
-				if (pred(cur)) return cur;
-
-				if (cur.first_child()) cur = cur.first_child();
-				else if (cur.next_sibling()) cur = cur.next_sibling();
-				else
-				{
-					while (!cur.next_sibling() && cur._root != _root) cur = cur.parent();
-
-					if (cur._root != _root) cur = cur.next_sibling();
-				}
-			}
-
-			return xml_node();
-		}
-
-		// Find child node by attribute name/value
-		xml_node find_child_by_attribute(const char_t* name, const char_t* attr_name, const char_t* attr_value) const;
-		xml_node find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const;
-
-	#ifndef PUGIXML_NO_STL
-		// Get the absolute node path from root as a text string.
-		string_t path(char_t delimiter = '/') const;
-	#endif
-
-		// Search for a node by path consisting of node names and . or .. elements.
-		xml_node first_element_by_path(const char_t* path, char_t delimiter = '/') const;
-
-		// Recursively traverse subtree with xml_tree_walker
-		bool traverse(xml_tree_walker& walker);
-	
-	#ifndef PUGIXML_NO_XPATH
-		// Select single node by evaluating XPath query. Returns first node from the resulting node set.
-		xpath_node select_single_node(const char_t* query, xpath_variable_set* variables = 0) const;
-		xpath_node select_single_node(const xpath_query& query) const;
-
-		// Select node set by evaluating XPath query
-		xpath_node_set select_nodes(const char_t* query, xpath_variable_set* variables = 0) const;
-		xpath_node_set select_nodes(const xpath_query& query) const;
-	#endif
-		
-		// Print subtree using a writer object
-		void print(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;
-
-	#ifndef PUGIXML_NO_STL
-		// Print subtree to stream
-		void print(std::basic_ostream<char, std::char_traits<char> >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;
-		void print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, unsigned int depth = 0) const;
-	#endif
-
-		// Child nodes iterators
-		typedef xml_node_iterator iterator;
-
-		iterator begin() const;
-		iterator end() const;
-
-		// Attribute iterators
-		typedef xml_attribute_iterator attribute_iterator;
-
-		attribute_iterator attributes_begin() const;
-		attribute_iterator attributes_end() const;
-
-		// Range-based for support
-		xml_object_range<xml_node_iterator> children() const;
-		xml_object_range<xml_named_node_iterator> children(const char_t* name) const;
-		xml_object_range<xml_attribute_iterator> attributes() const;
-
-		// Get node offset in parsed file/string (in char_t units) for debugging purposes
-		ptrdiff_t offset_debug() const;
-
-		// Get hash value (unique for handles to the same object)
-		size_t hash_value() const;
-
-		// Get internal pointer
-		xml_node_struct* internal_object() const;
-	};
-
-#ifdef __BORLANDC__
-	// Borland C++ workaround
-	bool PUGIXML_FUNCTION operator&&(const xml_node& lhs, bool rhs);
-	bool PUGIXML_FUNCTION operator||(const xml_node& lhs, bool rhs);
-#endif
-
-	// A helper for working with text inside PCDATA nodes
-	class PUGIXML_CLASS xml_text
-	{
-		friend class xml_node;
-
-		xml_node_struct* _root;
-
-		typedef void (*unspecified_bool_type)(xml_text***);
-
-		explicit xml_text(xml_node_struct* root);
-
-		xml_node_struct* _data_new();
-		xml_node_struct* _data() const;
-
-	public:
-		// Default constructor. Constructs an empty object.
-		xml_text();
-
-		// Safe bool conversion operator
-		operator unspecified_bool_type() const;
-
-		// Borland C++ workaround
-		bool operator!() const;
-
-		// Check if text object is empty
-		bool empty() const;
-
-		// Get text, or "" if object is empty
-		const char_t* get() const;
-
-		// Get text, or the default value if object is empty
-		const char_t* as_string(const char_t* def = PUGIXML_TEXT("")) const;
-
-		// Get text as a number, or the default value if conversion did not succeed or object is empty
-		int as_int(int def = 0) const;
-		unsigned int as_uint(unsigned int def = 0) const;
-		double as_double(double def = 0) const;
-		float as_float(float def = 0) const;
-
-		// Get text as bool (returns true if first character is in '1tTyY' set), or the default value if object is empty
-		bool as_bool(bool def = false) const;
-
-		// Set text (returns false if object is empty or there is not enough memory)
-		bool set(const char_t* rhs);
-
-		// Set text with type conversion (numbers are converted to strings, boolean is converted to "true"/"false")
-		bool set(int rhs);
-		bool set(unsigned int rhs);
-		bool set(double rhs);
-		bool set(bool rhs);
-
-		// Set text (equivalent to set without error checking)
-		xml_text& operator=(const char_t* rhs);
-		xml_text& operator=(int rhs);
-		xml_text& operator=(unsigned int rhs);
-		xml_text& operator=(double rhs);
-		xml_text& operator=(bool rhs);
-
-		// Get the data node (node_pcdata or node_cdata) for this object
-		xml_node data() const;
-	};
-
-#ifdef __BORLANDC__
-	// Borland C++ workaround
-	bool PUGIXML_FUNCTION operator&&(const xml_text& lhs, bool rhs);
-	bool PUGIXML_FUNCTION operator||(const xml_text& lhs, bool rhs);
-#endif
-
-	// Child node iterator (a bidirectional iterator over a collection of xml_node)
-	class PUGIXML_CLASS xml_node_iterator
-	{
-		friend class xml_node;
-
-	private:
-		mutable xml_node _wrap;
-		xml_node _parent;
-
-		xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent);
-
-	public:
-		// Iterator traits
-		typedef ptrdiff_t difference_type;
-		typedef xml_node value_type;
-		typedef xml_node* pointer;
-		typedef xml_node& reference;
-
-	#ifndef PUGIXML_NO_STL
-		typedef std::bidirectional_iterator_tag iterator_category;
-	#endif
-
-		// Default constructor
-		xml_node_iterator();
-
-		// Construct an iterator which points to the specified node
-		xml_node_iterator(const xml_node& node);
-
-		// Iterator operators
-		bool operator==(const xml_node_iterator& rhs) const;
-		bool operator!=(const xml_node_iterator& rhs) const;
-
-		xml_node& operator*() const;
-		xml_node* operator->() const;
-
-		const xml_node_iterator& operator++();
-		xml_node_iterator operator++(int);
-
-		const xml_node_iterator& operator--();
-		xml_node_iterator operator--(int);
-	};
-
-	// Attribute iterator (a bidirectional iterator over a collection of xml_attribute)
-	class PUGIXML_CLASS xml_attribute_iterator
-	{
-		friend class xml_node;
-
-	private:
-		mutable xml_attribute _wrap;
-		xml_node _parent;
-
-		xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent);
-
-	public:
-		// Iterator traits
-		typedef ptrdiff_t difference_type;
-		typedef xml_attribute value_type;
-		typedef xml_attribute* pointer;
-		typedef xml_attribute& reference;
-
-	#ifndef PUGIXML_NO_STL
-		typedef std::bidirectional_iterator_tag iterator_category;
-	#endif
-
-		// Default constructor
-		xml_attribute_iterator();
-
-		// Construct an iterator which points to the specified attribute
-		xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent);
-
-		// Iterator operators
-		bool operator==(const xml_attribute_iterator& rhs) const;
-		bool operator!=(const xml_attribute_iterator& rhs) const;
-
-		xml_attribute& operator*() const;
-		xml_attribute* operator->() const;
-
-		const xml_attribute_iterator& operator++();
-		xml_attribute_iterator operator++(int);
-
-		const xml_attribute_iterator& operator--();
-		xml_attribute_iterator operator--(int);
-	};
-
-	// Named node range helper
-	class xml_named_node_iterator
-	{
-	public:
-		// Iterator traits
-		typedef ptrdiff_t difference_type;
-		typedef xml_node value_type;
-		typedef xml_node* pointer;
-		typedef xml_node& reference;
-
-	#ifndef PUGIXML_NO_STL
-		typedef std::forward_iterator_tag iterator_category;
-	#endif
-
-		// Default constructor
-		xml_named_node_iterator();
-
-		// Construct an iterator which points to the specified node
-		xml_named_node_iterator(const xml_node& node, const char_t* name);
-
-		// Iterator operators
-		bool operator==(const xml_named_node_iterator& rhs) const;
-		bool operator!=(const xml_named_node_iterator& rhs) const;
-
-		xml_node& operator*() const;
-		xml_node* operator->() const;
-
-		const xml_named_node_iterator& operator++();
-		xml_named_node_iterator operator++(int);
-
-	private:
-		mutable xml_node _node;
-		const char_t* _name;
-	};
-
-	// Abstract tree walker class (see xml_node::traverse)
-	class PUGIXML_CLASS xml_tree_walker
-	{
-		friend class xml_node;
-
-	private:
-		int _depth;
-	
-	protected:
-		// Get current traversal depth
-		int depth() const;
-	
-	public:
-		xml_tree_walker();
-		virtual ~xml_tree_walker();
-
-		// Callback that is called when traversal begins
-		virtual bool begin(xml_node& node);
-
-		// Callback that is called for each node traversed
-		virtual bool for_each(xml_node& node) = 0;
-
-		// Callback that is called when traversal ends
-		virtual bool end(xml_node& node);
-	};
-
-	// Parsing status, returned as part of xml_parse_result object
-	enum xml_parse_status
-	{
-		status_ok = 0,				// No error
-
-		status_file_not_found,		// File was not found during load_file()
-		status_io_error,			// Error reading from file/stream
-		status_out_of_memory,		// Could not allocate memory
-		status_internal_error,		// Internal error occurred
-
-		status_unrecognized_tag,	// Parser could not determine tag type
-
-		status_bad_pi,				// Parsing error occurred while parsing document declaration/processing instruction
-		status_bad_comment,			// Parsing error occurred while parsing comment
-		status_bad_cdata,			// Parsing error occurred while parsing CDATA section
-		status_bad_doctype,			// Parsing error occurred while parsing document type declaration
-		status_bad_pcdata,			// Parsing error occurred while parsing PCDATA section
-		status_bad_start_element,	// Parsing error occurred while parsing start element tag
-		status_bad_attribute,		// Parsing error occurred while parsing element attribute
-		status_bad_end_element,		// Parsing error occurred while parsing end element tag
-		status_end_element_mismatch // There was a mismatch of start-end tags (closing tag had incorrect name, some tag was not closed or there was an excessive closing tag)
-	};
-
-	// Parsing result
-	struct PUGIXML_CLASS xml_parse_result
-	{
-		// Parsing status (see xml_parse_status)
-		xml_parse_status status;
-
-		// Last parsed offset (in char_t units from start of input data)
-		ptrdiff_t offset;
-
-		// Source document encoding
-		xml_encoding encoding;
-
-		// Default constructor, initializes object to failed state
-		xml_parse_result();
-
-		// Cast to bool operator
-		operator bool() const;
-
-		// Get error description
-		const char* description() const;
-	};
-
-	// Document class (DOM tree root)
-	class PUGIXML_CLASS xml_document: public xml_node
-	{
-	private:
-		char_t* _buffer;
-
-		char _memory[192];
-		
-		// Non-copyable semantics
-		xml_document(const xml_document&);
-		const xml_document& operator=(const xml_document&);
-
-		void create();
-		void destroy();
-
-		xml_parse_result load_buffer_impl(void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own);
-
-	public:
-		// Default constructor, makes empty document
-		xml_document();
-
-		// Destructor, invalidates all node/attribute handles to this document
-		~xml_document();
-
-		// Removes all nodes, leaving the empty document
-		void reset();
-
-		// Removes all nodes, then copies the entire contents of the specified document
-		void reset(const xml_document& proto);
-
-	#ifndef PUGIXML_NO_STL
-		// Load document from stream.
-		xml_parse_result load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
-		xml_parse_result load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options = parse_default);
-	#endif
-
-		// Load document from zero-terminated string. No encoding conversions are applied.
-		xml_parse_result load(const char_t* contents, unsigned int options = parse_default);
-
-		// Load document from file
-		xml_parse_result load_file(const char* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
-		xml_parse_result load_file(const wchar_t* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
-
-		// Load document from buffer. Copies/converts the buffer, so it may be deleted or changed after the function returns.
-		xml_parse_result load_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
-
-		// Load document from buffer, using the buffer for in-place parsing (the buffer is modified and used for storage of document data).
-		// You should ensure that buffer data will persist throughout the document's lifetime, and free the buffer memory manually once document is destroyed.
-		xml_parse_result load_buffer_inplace(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
-
-		// Load document from buffer, using the buffer for in-place parsing (the buffer is modified and used for storage of document data).
-		// You should allocate the buffer with pugixml allocation function; document will free the buffer when it is no longer needed (you can't use it anymore).
-		xml_parse_result load_buffer_inplace_own(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
-
-		// Save XML document to writer (semantics is slightly different from xml_node::print, see documentation for details).
-		void save(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
-
-	#ifndef PUGIXML_NO_STL
-		// Save XML document to stream (semantics is slightly different from xml_node::print, see documentation for details).
-		void save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
-		void save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default) const;
-	#endif
-
-		// Save XML to file
-		bool save_file(const char* path, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
-		bool save_file(const wchar_t* path, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
-
-		// Get document element
-		xml_node document_element() const;
-	};
-
-#ifndef PUGIXML_NO_XPATH
-	// XPath query return type
-	enum xpath_value_type
-	{
-		xpath_type_none,	  // Unknown type (query failed to compile)
-		xpath_type_node_set,  // Node set (xpath_node_set)
-		xpath_type_number,	  // Number
-		xpath_type_string,	  // String
-		xpath_type_boolean	  // Boolean
-	};
-
-	// XPath parsing result
-	struct PUGIXML_CLASS xpath_parse_result
-	{
-		// Error message (0 if no error)
-		const char* error;
-
-		// Last parsed offset (in char_t units from string start)
-		ptrdiff_t offset;
-
-		// Default constructor, initializes object to failed state
-		xpath_parse_result();
-
-		// Cast to bool operator
-		operator bool() const;
-
-		// Get error description
-		const char* description() const;
-	};
-
-	// A single XPath variable
-	class PUGIXML_CLASS xpath_variable
-	{
-		friend class xpath_variable_set;
-
-	protected:
-		xpath_value_type _type;
-		xpath_variable* _next;
-
-		xpath_variable();
-
-		// Non-copyable semantics
-		xpath_variable(const xpath_variable&);
-		xpath_variable& operator=(const xpath_variable&);
-		
-	public:
-		// Get variable name
-		const char_t* name() const;
-
-		// Get variable type
-		xpath_value_type type() const;
-
-		// Get variable value; no type conversion is performed, default value (false, NaN, empty string, empty node set) is returned on type mismatch error
-		bool get_boolean() const;
-		double get_number() const;
-		const char_t* get_string() const;
-		const xpath_node_set& get_node_set() const;
-
-		// Set variable value; no type conversion is performed, false is returned on type mismatch error
-		bool set(bool value);
-		bool set(double value);
-		bool set(const char_t* value);
-		bool set(const xpath_node_set& value);
-	};
-
-	// A set of XPath variables
-	class PUGIXML_CLASS xpath_variable_set
-	{
-	private:
-		xpath_variable* _data[64];
-
-		// Non-copyable semantics
-		xpath_variable_set(const xpath_variable_set&);
-		xpath_variable_set& operator=(const xpath_variable_set&);
-
-		xpath_variable* find(const char_t* name) const;
-
-	public:
-		// Default constructor/destructor
-		xpath_variable_set();
-		~xpath_variable_set();
-
-		// Add a new variable or get the existing one, if the types match
-		xpath_variable* add(const char_t* name, xpath_value_type type);
-
-		// Set value of an existing variable; no type conversion is performed, false is returned if there is no such variable or if types mismatch
-		bool set(const char_t* name, bool value);
-		bool set(const char_t* name, double value);
-		bool set(const char_t* name, const char_t* value);
-		bool set(const char_t* name, const xpath_node_set& value);
-
-		// Get existing variable by name
-		xpath_variable* get(const char_t* name);
-		const xpath_variable* get(const char_t* name) const;
-	};
-
-	// A compiled XPath query object
-	class PUGIXML_CLASS xpath_query
-	{
-	private:
-		void* _impl;
-		xpath_parse_result _result;
-
-		typedef void (*unspecified_bool_type)(xpath_query***);
-
-		// Non-copyable semantics
-		xpath_query(const xpath_query&);
-		xpath_query& operator=(const xpath_query&);
-
-	public:
-		// Construct a compiled object from XPath expression.
-		// If PUGIXML_NO_EXCEPTIONS is not defined, throws xpath_exception on compilation errors.
-		explicit xpath_query(const char_t* query, xpath_variable_set* variables = 0);
-
-		// Destructor
-		~xpath_query();
-
-		// Get query expression return type
-		xpath_value_type return_type() const;
-		
-		// Evaluate expression as boolean value in the specified context; performs type conversion if necessary.
-		// If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
-		bool evaluate_boolean(const xpath_node& n) const;
-		
-		// Evaluate expression as double value in the specified context; performs type conversion if necessary.
-		// If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
-		double evaluate_number(const xpath_node& n) const;
-		
-	#ifndef PUGIXML_NO_STL
-		// Evaluate expression as string value in the specified context; performs type conversion if necessary.
-		// If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
-		string_t evaluate_string(const xpath_node& n) const;
-	#endif
-		
-		// Evaluate expression as string value in the specified context; performs type conversion if necessary.
-		// At most capacity characters are written to the destination buffer, full result size is returned (includes terminating zero).
-		// If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
-		// If PUGIXML_NO_EXCEPTIONS is defined, returns empty  set instead.
-		size_t evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const;
-
-		// Evaluate expression as node set in the specified context.
-		// If PUGIXML_NO_EXCEPTIONS is not defined, throws xpath_exception on type mismatch and std::bad_alloc on out of memory errors.
-		// If PUGIXML_NO_EXCEPTIONS is defined, returns empty node set instead.
-		xpath_node_set evaluate_node_set(const xpath_node& n) const;
-
-		// Get parsing result (used to get compilation errors in PUGIXML_NO_EXCEPTIONS mode)
-		const xpath_parse_result& result() const;
-
-		// Safe bool conversion operator
-		operator unspecified_bool_type() const;
-
-		// Borland C++ workaround
-		bool operator!() const;
-	};
-	
-	#ifndef PUGIXML_NO_EXCEPTIONS
-	// XPath exception class
-	class PUGIXML_CLASS xpath_exception: public std::exception
-	{
-	private:
-		xpath_parse_result _result;
-
-	public:
-		// Construct exception from parse result
-		explicit xpath_exception(const xpath_parse_result& result);
-
-		// Get error message
-		virtual const char* what() const throw();
-
-		// Get parse result
-		const xpath_parse_result& result() const;
-	};
-	#endif
-	
-	// XPath node class (either xml_node or xml_attribute)
-	class PUGIXML_CLASS xpath_node
-	{
-	private:
-		xml_node _node;
-		xml_attribute _attribute;
-	
-		typedef void (*unspecified_bool_type)(xpath_node***);
-
-	public:
-		// Default constructor; constructs empty XPath node
-		xpath_node();
-		
-		// Construct XPath node from XML node/attribute
-		xpath_node(const xml_node& node);
-		xpath_node(const xml_attribute& attribute, const xml_node& parent);
-
-		// Get node/attribute, if any
-		xml_node node() const;
-		xml_attribute attribute() const;
-		
-		// Get parent of contained node/attribute
-		xml_node parent() const;
-
-		// Safe bool conversion operator
-		operator unspecified_bool_type() const;
-		
-		// Borland C++ workaround
-		bool operator!() const;
-
-		// Comparison operators
-		bool operator==(const xpath_node& n) const;
-		bool operator!=(const xpath_node& n) const;
-	};
-
-#ifdef __BORLANDC__
-	// Borland C++ workaround
-	bool PUGIXML_FUNCTION operator&&(const xpath_node& lhs, bool rhs);
-	bool PUGIXML_FUNCTION operator||(const xpath_node& lhs, bool rhs);
-#endif
-
-	// A fixed-size collection of XPath nodes
-	class PUGIXML_CLASS xpath_node_set
-	{
-	public:
-		// Collection type
-		enum type_t
-		{
-			type_unsorted,			// Not ordered
-			type_sorted,			// Sorted by document order (ascending)
-			type_sorted_reverse		// Sorted by document order (descending)
-		};
-		
-		// Constant iterator type
-		typedef const xpath_node* const_iterator;
-	
-		// Default constructor. Constructs empty set.
-		xpath_node_set();
-
-		// Constructs a set from iterator range; data is not checked for duplicates and is not sorted according to provided type, so be careful
-		xpath_node_set(const_iterator begin, const_iterator end, type_t type = type_unsorted);
-
-		// Destructor
-		~xpath_node_set();
-		
-		// Copy constructor/assignment operator
-		xpath_node_set(const xpath_node_set& ns);
-		xpath_node_set& operator=(const xpath_node_set& ns);
-
-		// Get collection type
-		type_t type() const;
-		
-		// Get collection size
-		size_t size() const;
-
-		// Indexing operator
-		const xpath_node& operator[](size_t index) const;
-		
-		// Collection iterators
-		const_iterator begin() const;
-		const_iterator end() const;
-
-		// Sort the collection in ascending/descending order by document order
-		void sort(bool reverse = false);
-		
-		// Get first node in the collection by document order
-		xpath_node first() const;
-		
-		// Check if collection is empty
-		bool empty() const;
-	
-	private:
-		type_t _type;
-		
-		xpath_node _storage;
-		
-		xpath_node* _begin;
-		xpath_node* _end;
-
-		void _assign(const_iterator begin, const_iterator end);
-	};
-#endif
-
-#ifndef PUGIXML_NO_STL
-	// Convert wide string to UTF8
-	std::basic_string<char, std::char_traits<char>, std::allocator<char> > PUGIXML_FUNCTION as_utf8(const wchar_t* str);
-	std::basic_string<char, std::char_traits<char>, std::allocator<char> > PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >& str);
-	
-	// Convert UTF8 to wide string
-	std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > PUGIXML_FUNCTION as_wide(const char* str);
-	std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > PUGIXML_FUNCTION as_wide(const std::basic_string<char, std::char_traits<char>, std::allocator<char> >& str);
-#endif
-
-	// Memory allocation function interface; returns pointer to allocated memory or NULL on failure
-	typedef void* (*allocation_function)(size_t size);
-	
-	// Memory deallocation function interface
-	typedef void (*deallocation_function)(void* ptr);
-
-	// Override default memory management functions. All subsequent allocations/deallocations will be performed via supplied functions.
-	void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate);
-	
-	// Get current memory management functions
-	allocation_function PUGIXML_FUNCTION get_memory_allocation_function();
-	deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function();
-}
-
-#if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
-namespace std
-{
-	// Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
-	std::bidirectional_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_node_iterator&);
-	std::bidirectional_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_attribute_iterator&);
-	std::forward_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_named_node_iterator&);
-}
-#endif
-
-#if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
-namespace std
-{
-	// Workarounds for (non-standard) iterator category detection
-	std::bidirectional_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_node_iterator&);
-	std::bidirectional_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_attribute_iterator&);
-	std::forward_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_named_node_iterator&);
-}
-#endif
-
-#endif
-
-/**
- * Copyright (c) 2006-2012 Arseny Kapoulkine
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
diff --git a/gadgets/pmri/CMakeLists.txt b/gadgets/pmri/CMakeLists.txt
new file mode 100644
index 0000000..6b5dc7a
--- /dev/null
+++ b/gadgets/pmri/CMakeLists.txt
@@ -0,0 +1,62 @@
+if (WIN32)
+  ADD_DEFINITIONS(-D__BUILD_GADGETRON_GADGET_GPUPMRI__)
+  # The two flags below is to fix Windows problems in relation to multiple defined constructors in our headers
+  SET (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /FORCE:MULTIPLE") 
+  SET (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /FORCE:MULTIPLE") 
+endif (WIN32)
+
+find_package(Ismrmrd REQUIRED)
+
+include_directories(   
+  ${CMAKE_SOURCE_DIR}/gadgets/mri_core
+  ${CMAKE_SOURCE_DIR}/toolboxes/mri/pmri/gpu
+  ${CMAKE_SOURCE_DIR}/toolboxes/nfft/gpu
+  ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu
+  ${CMAKE_SOURCE_DIR}/toolboxes/core/gpu
+  ${CMAKE_SOURCE_DIR}/toolboxes/fft/gpu
+  ${CMAKE_SOURCE_DIR}/toolboxes/operators
+  ${CMAKE_SOURCE_DIR}/toolboxes/operators/gpu
+  ${CMAKE_SOURCE_DIR}/toolboxes/solvers
+  ${CMAKE_SOURCE_DIR}/toolboxes/solvers/gpu
+  ${HDF5_INCLUDE_DIR}
+  ${HDF5_INCLUDE_DIR}/cpp
+)
+
+add_library(gadgetron_gpuparallelmri SHARED 
+    gadgetron_gpupmri_export.h
+    GenericReconJob.h
+    gpuCgKtSenseGadget.h
+    gpuCgSenseGadget.h
+    gpuCgSpiritGadget.h
+    gpuGenericSensePrepGadget.h
+    gpuSbSenseGadget.h
+    gpuCgSenseGadget.cpp 
+    gpuCgKtSenseGadget.cpp 
+    gpuSbSenseGadget.cpp 
+    gpuGenericSensePrepGadget.cpp
+    gpuCgSpiritGadget.cpp 
+  )
+
+set_target_properties(gadgetron_gpuparallelmri PROPERTIES VERSION ${GADGETRON_VERSION_STRING} SOVERSION ${GADGETRON_SOVERSION})
+
+target_link_libraries(gadgetron_gpuparallelmri 
+  gadgetron_gadgetbase
+  gadgetron_toolbox_cpucore gadgetron_toolbox_gpucore gadgetron_toolbox_gpusolvers gadgetron_toolbox_gpuoperators
+  ${Boost_LIBRARIES} ${ISMRMRD_LIBRARIES} ${FFTW3_LIBRARIES} ${CUDA_LIBRARIES} 
+  optimized ${ACE_LIBRARIES} debug ${ACE_DEBUG_LIBRARY}
+  )
+
+target_link_libraries(gadgetron_gpuparallelmri gadgetron_toolbox_gpuparallelmri)
+
+install (TARGETS gadgetron_gpuparallelmri DESTINATION lib COMPONENT main)
+
+install (FILES  gadgetron_gpupmri_export.h
+                GenericReconJob.h
+                gpuCgKtSenseGadget.h
+                gpuCgSenseGadget.h
+                gpuCgSpiritGadget.h
+                gpuGenericSensePrepGadget.h
+                gpuSbSenseGadget.h
+                DESTINATION include COMPONENT main)
+
+add_subdirectory(config)
diff --git a/gadgets/pmri/GenericReconJob.h b/gadgets/pmri/GenericReconJob.h
new file mode 100644
index 0000000..0e1cf82
--- /dev/null
+++ b/gadgets/pmri/GenericReconJob.h
@@ -0,0 +1,27 @@
+#pragma once
+
+#include "hoNDArray.h"
+#include "vector_td.h"
+
+#include <ismrmrd/ismrmrd.h>
+#include <boost/shared_ptr.hpp>
+#include <boost/shared_array.hpp>
+
+namespace Gadgetron{
+  
+  class GenericReconJob
+  {
+  public:
+    
+    GenericReconJob() {}
+    ~GenericReconJob() {}
+
+    boost::shared_array<ISMRMRD::ImageHeader> image_headers_;
+
+    boost::shared_ptr< hoNDArray<float_complext> >  dat_host_;
+    boost::shared_ptr< hoNDArray<floatd2>        >  tra_host_;
+    boost::shared_ptr< hoNDArray<float>          >  dcw_host_;
+    boost::shared_ptr< hoNDArray<float_complext> >  csm_host_;
+    boost::shared_ptr< hoNDArray<float_complext> >  reg_host_;
+  };
+}
diff --git a/gadgets/pmri/config/CMakeLists.txt b/gadgets/pmri/config/CMakeLists.txt
new file mode 100644
index 0000000..aa946c7
--- /dev/null
+++ b/gadgets/pmri/config/CMakeLists.txt
@@ -0,0 +1,16 @@
+if (ARMADILLO_FOUND)
+  install (FILES 
+    generic_gpusense_cg.xml 
+    generic_gpusense_cg_singleshot.xml 
+    generic_gpusense_sb_singleshot.xml 
+    generic_gpusense_nlcg_singleshot.xml
+    generic_gpu_ktsense_singleshot.xml 
+    DESTINATION ${GADGETRON_INSTALL_CONFIG_PATH} COMPONENT main)
+elseif (ARMADILLO_FOUND)
+  MESSAGE("Armadillo not found, only unoptimized generic trajectory config files will be available")
+endif (ARMADILLO_FOUND)
+
+#install (FILES 
+#  generic_gpusense_cg_unoptimized.xml 
+#  generic_gpusense_sb_unoptimized.xml 
+#  DESTINATION ${GADGETRON_INSTALL_CONFIG_PATH})
diff --git a/gadgets/pmri/config/generic_gpu_ktsense_singleshot.xml b/gadgets/pmri/config/generic_gpu_ktsense_singleshot.xml
new file mode 100644
index 0000000..396f6a0
--- /dev/null
+++ b/gadgets/pmri/config/generic_gpu_ktsense_singleshot.xml
@@ -0,0 +1,115 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+			      xmlns="http://gadgetron.sf.net/gadgetron"
+			      xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+  
+  <reader>
+    <slot>1008</slot>
+    <dll>gadgetron_mricore</dll>
+    <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+  </reader>
+  
+  <writer>
+    <slot>1004</slot>
+    <dll>gadgetron_mricore</dll>
+    <classname>MRIImageWriterCPLX</classname>
+  </writer>
+  <writer>
+    <slot>1005</slot>
+    <dll>gadgetron_mricore</dll>
+    <classname>MRIImageWriterFLOAT</classname>
+  </writer>
+  <writer>
+    <slot>1006</slot>
+    <dll>gadgetron_mricore</dll>
+    <classname>MRIImageWriterUSHORT</classname>
+  </writer>
+
+  <gadget>
+    <name>NoiseAdjust</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>NoiseAdjustGadget</classname>
+  </gadget>
+
+  <gadget>
+    <name>PCA</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>PCACoilGadget</classname>
+  </gadget>
+  
+  <gadget>
+    <name>CoilReduction</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>CoilReductionGadget</classname>
+    <property><name>coils_out</name><value>16</value></property>
+  </gadget>
+
+  <gadget>
+    <name>gpuGenericSensePrepGadget</name>
+    <dll>gadgetron_gpuparallelmri</dll>
+    <classname>gpuGenericSensePrepGadget</classname>
+      <property><name>deviceno</name><value>0</value></property>
+      <property><name>readouts_per_frame</name><value>1</value></property>
+      <property><name>frames_per_rotation</name><value>1</value></property>
+      <property><name>rotations_per_reconstruction</name><value>50</value></property>
+      <property><name>buffer_convolution_kernel_width</name><value>5.5</value></property>
+      <property><name>buffer_convolution_oversampling_factor</name><value>1.25</value></property>
+      <property><name>reconstruction_os_factor_x</name><value>1.5</value></property>
+      <property><name>reconstruction_os_factor_y</name><value>1.5</value></property>
+  </gadget>
+  
+  <gadget>
+    <name>gpuCgKtSenseGadget_slice0</name>
+    <dll>gadgetron_gpuparallelmri</dll>
+    <classname>gpuCgKtSenseGadget</classname>
+    <property><name>pass_on_undesired_data</name><value>true</value></property>
+    <property><name>deviceno</name>                <value>0</value></property>
+    <property><name>number_of_iterations</name>    <value>50</value></property>
+    <property><name>cg_limit</name>                <value>1e-6</value></property>
+    <property><name>oversampling_factor</name>     <value>1.25</value></property>
+    <property><name>kernel_width</name>            <value>5.5</value></property>
+    <property><name>kappa</name>                   <value>0.1</value></property>
+    <property><name>output_convergence</name>      <value>true</value></property>
+  </gadget>
+  
+  <gadget>
+    <name>Extract</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>ExtractGadget</classname>
+  </gadget>
+  
+  <!--
+      <gadget>
+      <name>ImageWrite</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageWriterGadgetFLOAT</classname>
+      </gadget>
+  -->    
+  
+  <gadget>
+    <name>AutoScaleGadget</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>AutoScaleGadget</classname>
+  </gadget> 
+
+  <gadget>
+    <name>FloatToShort</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>FloatToUShortGadget</classname>
+  </gadget>
+  
+  <!--
+      <gadget>
+      <name>ImageFinishCPLX</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishGadgetCPLX</classname>
+      </gadget>
+  -->
+  
+  <gadget>
+    <name>ImageFinishShort</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>ImageFinishGadgetUSHORT</classname>
+  </gadget>    
+
+</gadgetronStreamConfiguration>
diff --git a/gadgets/pmri/config/generic_gpusense_cg.xml b/gadgets/pmri/config/generic_gpusense_cg.xml
new file mode 100644
index 0000000..76f2750
--- /dev/null
+++ b/gadgets/pmri/config/generic_gpusense_cg.xml
@@ -0,0 +1,113 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+			      xmlns="http://gadgetron.sf.net/gadgetron"
+			      xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+  
+  <reader>
+    <slot>1008</slot>
+    <dll>gadgetron_mricore</dll>
+    <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+  </reader>
+  
+  <writer>
+    <slot>1004</slot>
+    <dll>gadgetron_mricore</dll>
+    <classname>MRIImageWriterCPLX</classname>
+  </writer>
+  <writer>
+    <slot>1005</slot>
+    <dll>gadgetron_mricore</dll>
+    <classname>MRIImageWriterFLOAT</classname>
+  </writer>
+  <writer>
+    <slot>1006</slot>
+    <dll>gadgetron_mricore</dll>
+    <classname>MRIImageWriterUSHORT</classname>
+  </writer>
+
+  <gadget>
+    <name>NoiseAdjust</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>NoiseAdjustGadget</classname>
+  </gadget>
+  
+  <gadget>
+    <name>PCA</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>PCACoilGadget</classname>
+  </gadget>
+  
+  <gadget>
+    <name>CoilReduction</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>CoilReductionGadget</classname>
+    <property><name>coils_out</name><value>16</value></property>
+  </gadget>
+
+  <gadget>
+    <name>gpuGenericSensePrepGadget</name>
+    <dll>gadgetron_gpuparallelmri</dll>
+    <classname>gpuGenericSensePrepGadget</classname>
+      <property><name>deviceno</name><value>0</value></property>
+      <property><name>rotations_per_reconstruction</name><value>4</value></property>
+      <property><name>buffer_convolution_kernel_width</name><value>5.5</value></property>
+      <property><name>buffer_convolution_oversampling_factor</name><value>1.25</value></property>
+      <property><name>reconstruction_os_factor_x</name><value>1.5</value></property>
+      <property><name>reconstruction_os_factor_y</name><value>1.5</value></property>
+  </gadget>
+  
+  <gadget>
+    <name>gpuCgSenseGadget</name>
+    <dll>gadgetron_gpuparallelmri</dll>
+    <classname>gpuCgSenseGadget</classname>
+    <property><name>pass_on_undesired_data</name>  <value>true</value></property>
+    <property><name>deviceno</name>                <value>0</value></property>
+    <property><name>number_of_iterations</name>    <value>30</value></property>
+    <property><name>cg_limit</name>                <value>1e-6</value></property>
+    <property><name>oversampling_factor</name>     <value>1.25</value></property>
+    <property><name>kernel_width</name>            <value>5.5</value></property>
+    <property><name>kappa</name>                   <value>0.1</value></property>
+    <property><name>output_convergence</name><value>true</value></property>
+  </gadget>
+
+  <gadget>
+    <name>Extract</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>ExtractGadget</classname>
+  </gadget>
+  
+  <!--
+      <gadget>
+      <name>ImageWrite</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageWriterGadgetFLOAT</classname>
+      </gadget>
+  -->    
+  
+  <gadget>
+    <name>AutoScaleGadget</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>AutoScaleGadget</classname>
+  </gadget> 
+
+  <gadget>
+    <name>FloatToShort</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>FloatToUShortGadget</classname>
+  </gadget>
+  
+  <!--
+      <gadget>
+      <name>ImageFinishCPLX</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishGadgetCPLX</classname>
+      </gadget>
+  -->
+  
+  <gadget>
+    <name>ImageFinishShort</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>ImageFinishGadgetUSHORT</classname>
+  </gadget>    
+
+</gadgetronStreamConfiguration>
diff --git a/gadgets/pmri/config/generic_gpusense_cg_singleshot.xml b/gadgets/pmri/config/generic_gpusense_cg_singleshot.xml
new file mode 100644
index 0000000..3f697b5
--- /dev/null
+++ b/gadgets/pmri/config/generic_gpusense_cg_singleshot.xml
@@ -0,0 +1,115 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+			      xmlns="http://gadgetron.sf.net/gadgetron"
+			      xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+  
+  <reader>
+    <slot>1008</slot>
+    <dll>gadgetron_mricore</dll>
+    <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+  </reader>
+  
+  <writer>
+    <slot>1004</slot>
+    <dll>gadgetron_mricore</dll>
+    <classname>MRIImageWriterCPLX</classname>
+  </writer>
+  <writer>
+    <slot>1005</slot>
+    <dll>gadgetron_mricore</dll>
+    <classname>MRIImageWriterFLOAT</classname>
+  </writer>
+  <writer>
+    <slot>1006</slot>
+    <dll>gadgetron_mricore</dll>
+    <classname>MRIImageWriterUSHORT</classname>
+  </writer>
+
+  <gadget>
+    <name>NoiseAdjust</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>NoiseAdjustGadget</classname>
+  </gadget>
+
+  <gadget>
+    <name>PCA</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>PCACoilGadget</classname>
+  </gadget>
+  
+  <gadget>
+    <name>CoilReduction</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>CoilReductionGadget</classname>
+    <property><name>coils_out</name><value>16</value></property>
+  </gadget>
+
+  <gadget>
+    <name>gpuGenericSensePrepGadget</name>
+    <dll>gadgetron_gpuparallelmri</dll>
+    <classname>gpuGenericSensePrepGadget</classname>
+      <property><name>deviceno</name><value>0</value></property>
+      <property><name>readouts_per_frame</name><value>1</value></property>
+      <property><name>frames_per_rotation</name><value>1</value></property>
+      <property><name>rotations_per_reconstruction</name><value>50</value></property>
+      <property><name>buffer_convolution_kernel_width</name><value>5.5</value></property>
+      <property><name>buffer_convolution_oversampling_factor</name><value>1.25</value></property>
+      <property><name>reconstruction_os_factor_x</name><value>1.5</value></property>
+      <property><name>reconstruction_os_factor_y</name><value>1.5</value></property>
+  </gadget>
+  
+  <gadget>
+    <name>gpuCgSenseGadget</name>
+    <dll>gadgetron_gpuparallelmri</dll>
+    <classname>gpuCgSenseGadget</classname>
+    <property><name>pass_on_undesired_data</name>  <value>true</value></property>
+    <property><name>deviceno</name>                <value>0</value></property>
+    <property><name>number_of_iterations</name>    <value>30</value></property>
+    <property><name>cg_limit</name>                <value>1e-6</value></property>
+    <property><name>oversampling_factor</name>     <value>1.25</value></property>
+    <property><name>kernel_width</name>            <value>5.5</value></property>
+    <property><name>kappa</name>                   <value>0.1</value></property>
+    <property><name>output_convergence</name><value>true</value></property>
+  </gadget>
+  
+  <gadget>
+    <name>Extract</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>ExtractGadget</classname>
+  </gadget>
+  
+  <!--
+      <gadget>
+      <name>ImageWrite</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageWriterGadgetFLOAT</classname>
+      </gadget>
+  -->    
+  
+  <gadget>
+    <name>AutoScaleGadget</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>AutoScaleGadget</classname>
+  </gadget> 
+
+  <gadget>
+    <name>FloatToShort</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>FloatToUShortGadget</classname>
+  </gadget>
+  
+  <!--
+      <gadget>
+      <name>ImageFinishCPLX</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishGadgetCPLX</classname>
+      </gadget>
+  -->
+  
+  <gadget>
+    <name>ImageFinishShort</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>ImageFinishGadgetUSHORT</classname>
+  </gadget>    
+
+</gadgetronStreamConfiguration>
diff --git a/gadgets/pmri/config/generic_gpusense_nlcg_singleshot.xml b/gadgets/pmri/config/generic_gpusense_nlcg_singleshot.xml
new file mode 100644
index 0000000..e1f275e
--- /dev/null
+++ b/gadgets/pmri/config/generic_gpusense_nlcg_singleshot.xml
@@ -0,0 +1,116 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+			      xmlns="http://gadgetron.sf.net/gadgetron"
+			      xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+  
+  <reader>
+    <slot>1008</slot>
+    <dll>gadgetron_mricore</dll>
+    <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+  </reader>
+  
+  <writer>
+    <slot>1004</slot>
+    <dll>gadgetron_mricore</dll>
+    <classname>MRIImageWriterCPLX</classname>
+  </writer>
+  <writer>
+    <slot>1005</slot>
+    <dll>gadgetron_mricore</dll>
+    <classname>MRIImageWriterFLOAT</classname>
+  </writer>
+  <writer>
+    <slot>1006</slot>
+    <dll>gadgetron_mricore</dll>
+    <classname>MRIImageWriterUSHORT</classname>
+  </writer>
+
+  <gadget>
+    <name>NoiseAdjust</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>NoiseAdjustGadget</classname>
+  </gadget>
+
+  <gadget>
+    <name>PCA</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>PCACoilGadget</classname>
+  </gadget>
+  
+  <gadget>
+    <name>CoilReduction</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>CoilReductionGadget</classname>
+    <property><name>coils_out</name><value>16</value></property>
+  </gadget>
+
+  <gadget>
+    <name>gpuGenericSensePrepGadget</name>
+    <dll>gadgetron_gpuparallelmri</dll>
+    <classname>gpuGenericSensePrepGadget</classname>
+      <property><name>deviceno</name><value>0</value></property>
+      <property><name>readouts_per_frame</name><value>1</value></property>
+      <property><name>frames_per_rotation</name><value>1</value></property>
+      <property><name>rotations_per_reconstruction</name><value>50</value></property>
+      <property><name>buffer_using_solver</name><value>true</value></property>
+      <property><name>buffer_convolution_kernel_width</name><value>5.5</value></property>
+      <property><name>buffer_convolution_oversampling_factor</name><value>1.25</value></property>
+      <property><name>reconstruction_os_factor_x</name><value>1.5</value></property>
+      <property><name>reconstruction_os_factor_y</name><value>1.5</value></property>
+  </gadget>
+  
+    <gadget>
+      <name>gpuNlcgSenseGadget_slice0</name>
+      <dll>gadgetron_gpuparallelmri</dll>
+      <classname>gpuNlcgSenseGadget</classname>
+      <property><name>pass_on_undesired_data</name>  <value>true</value></property>
+      <property><name>deviceno</name>                <value>0</value></property>
+      <property><name>number_of_cg_iterations</name> <value>30</value></property>
+      <property><name>cg_limit</name>                <value>1e-6</value></property>
+      <property><name>oversampling_factor</name>     <value>1.25</value></property>
+      <property><name>kernel_width</name>            <value>5.5</value></property>
+      <property><name>lambda</name>                  <value>0.01</value></property>
+      <property><name>alpha</name>                   <value>0.5</value></property>
+    </gadget>
+  
+  <gadget>
+    <name>Extract</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>ExtractGadget</classname>
+  </gadget>
+  
+  <!--
+      <gadget>
+      <name>ImageWrite</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageWriterGadgetFLOAT</classname>
+      </gadget>
+  -->    
+  
+  <gadget>
+    <name>AutoScaleGadget</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>AutoScaleGadget</classname>
+  </gadget> 
+
+  <gadget>
+    <name>FloatToShort</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>FloatToUShortGadget</classname>
+  </gadget>
+  
+  <!--
+      <gadget>
+      <name>ImageFinishCPLX</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishGadgetCPLX</classname>
+      </gadget>
+  -->
+  
+  <gadget>
+    <name>ImageFinishShort</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>ImageFinishGadgetUSHORT</classname>
+  </gadget>    
+
+</gadgetronStreamConfiguration>
diff --git a/gadgets/pmri/config/generic_gpusense_sb_singleshot.xml b/gadgets/pmri/config/generic_gpusense_sb_singleshot.xml
new file mode 100644
index 0000000..ff91974
--- /dev/null
+++ b/gadgets/pmri/config/generic_gpusense_sb_singleshot.xml
@@ -0,0 +1,119 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+			      xmlns="http://gadgetron.sf.net/gadgetron"
+			      xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+  
+  <reader>
+    <slot>1008</slot>
+    <dll>gadgetron_mricore</dll>
+    <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+  </reader>
+  
+  <writer>
+    <slot>1004</slot>
+    <dll>gadgetron_mricore</dll>
+    <classname>MRIImageWriterCPLX</classname>
+  </writer>
+  <writer>
+    <slot>1005</slot>
+    <dll>gadgetron_mricore</dll>
+    <classname>MRIImageWriterFLOAT</classname>
+  </writer>
+  <writer>
+    <slot>1006</slot>
+    <dll>gadgetron_mricore</dll>
+    <classname>MRIImageWriterUSHORT</classname>
+  </writer>
+
+  <gadget>
+    <name>NoiseAdjust</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>NoiseAdjustGadget</classname>
+  </gadget>
+
+  <gadget>
+    <name>PCA</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>PCACoilGadget</classname>
+  </gadget>
+  
+  <gadget>
+    <name>CoilReduction</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>CoilReductionGadget</classname>
+    <property><name>coils_out</name><value>16</value></property>
+  </gadget>
+
+  <gadget>
+    <name>gpuGenericSensePrepGadget</name>
+    <dll>gadgetron_gpuparallelmri</dll>
+    <classname>gpuGenericSensePrepGadget</classname>
+      <property><name>deviceno</name><value>0</value></property>
+      <property><name>readouts_per_frame</name><value>1</value></property>
+      <property><name>frames_per_rotation</name><value>1</value></property>
+      <property><name>rotations_per_reconstruction</name><value>50</value></property>
+      <property><name>buffer_using_solver</name><value>true</value></property>
+      <property><name>buffer_convolution_kernel_width</name><value>5.5</value></property>
+      <property><name>buffer_convolution_oversampling_factor</name><value>1.25</value></property>
+      <property><name>reconstruction_os_factor_x</name><value>1.5</value></property>
+      <property><name>reconstruction_os_factor_y</name><value>1.5</value></property>
+  </gadget>
+  
+    <gadget>
+      <name>gpuSbSenseGadget_slice0</name>
+      <dll>gadgetron_gpuparallelmri</dll>
+      <classname>gpuSbSenseGadget</classname>
+      <property><name>pass_on_undesired_data</name>  <value>true</value></property>
+      <property><name>deviceno</name>                <value>0</value></property>
+      <property><name>number_of_sb_iterations</name> <value>20</value></property>
+      <property><name>number_of_cg_iterations</name> <value>10</value></property>
+      <property><name>cg_limit</name>                <value>1e-6</value></property>
+      <property><name>oversampling_factor</name>     <value>1.25</value></property>
+      <property><name>kernel_width</name>            <value>5.5</value></property>
+      <property><name>mu</name>                      <value>0.1</value></property>
+      <property><name>lambda</name>                  <value>0.2</value></property>
+      <property><name>alpha</name>                   <value>0.5</value></property>
+      <property><name>output_convergence</name><value>true</value></property>
+    </gadget>
+  
+  <gadget>
+    <name>Extract</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>ExtractGadget</classname>
+  </gadget>
+  
+  <!--
+      <gadget>
+      <name>ImageWrite</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageWriterGadgetFLOAT</classname>
+      </gadget>
+  -->    
+  
+  <gadget>
+    <name>AutoScaleGadget</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>AutoScaleGadget</classname>
+  </gadget> 
+
+  <gadget>
+    <name>FloatToShort</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>FloatToUShortGadget</classname>
+  </gadget>
+  
+  <!--
+      <gadget>
+      <name>ImageFinishCPLX</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishGadgetCPLX</classname>
+      </gadget>
+  -->
+  
+  <gadget>
+    <name>ImageFinishShort</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>ImageFinishGadgetUSHORT</classname>
+  </gadget>    
+
+</gadgetronStreamConfiguration>
diff --git a/gadgets/pmri/gadgetron_gpupmri_export.h b/gadgets/pmri/gadgetron_gpupmri_export.h
new file mode 100644
index 0000000..bd71ffa
--- /dev/null
+++ b/gadgets/pmri/gadgetron_gpupmri_export.h
@@ -0,0 +1,14 @@
+#ifndef GADGETRON_GPUPMRI_EXPORT_H_
+#define GADGETRON_GPUPMRI_EXPORT_H_
+
+#if defined (WIN32)
+#if defined (__BUILD_GADGETRON_GADGET_GPUPMRI__)
+#define EXPORTGADGETS_GPUPMRI __declspec(dllexport)
+#else
+#define EXPORTGADGETS_GPUPMRI __declspec(dllimport)
+#endif
+#else
+#define EXPORTGADGETS_GPUPMRI
+#endif
+
+#endif /* GADGETRON_GPUPMRI_EXPORT_H_ */
diff --git a/gadgets/pmri/gpuCgKtSenseGadget.cpp b/gadgets/pmri/gpuCgKtSenseGadget.cpp
new file mode 100644
index 0000000..eee7aaa
--- /dev/null
+++ b/gadgets/pmri/gpuCgKtSenseGadget.cpp
@@ -0,0 +1,375 @@
+#include "gpuCgKtSenseGadget.h"
+#include "cuNDArray_operators.h"
+#include "cuNDArray_elemwise.h"
+#include "cuNDArray_blas.h"
+#include "cuNDArray_utils.h"
+#include "cuNDArray_reductions.h"
+#include "cuNDFFT.h"
+#include "Gadgetron.h"
+#include "GadgetMRIHeaders.h"
+#include "b1_map.h"
+#include "GPUTimer.h"
+#include "vector_td_utilities.h"
+#include "ismrmrd/xml.h"
+
+namespace Gadgetron{
+
+  gpuCgKtSenseGadget::gpuCgKtSenseGadget()
+    : is_configured_(false)
+    , channels_(0)
+    , frame_counter_(0)
+  {
+    set_parameter(std::string("deviceno").c_str(), "0");
+    set_parameter(std::string("setno").c_str(), "0");
+    set_parameter(std::string("sliceno").c_str(), "0");
+    set_parameter(std::string("number_of_iterations").c_str(), "5");
+    set_parameter(std::string("cg_limit").c_str(), "1e-6");
+    set_parameter(std::string("oversampling_factor").c_str(), "1.25");
+    set_parameter(std::string("kernel_width").c_str(), "5.5");
+    set_parameter(std::string("kappa").c_str(), "0.3");
+    
+    matrix_size_ = uint64d2(0,0);
+    matrix_size_os_ = uint64d2(0,0);
+    matrix_size_seq_ = uint64d2(0,0);
+  }
+
+  gpuCgKtSenseGadget::~gpuCgKtSenseGadget() {}
+
+  int gpuCgKtSenseGadget::process_config( ACE_Message_Block* mb )
+  {
+    //GADGET_DEBUG1("gpuCgKtSenseGadget::process_config\n");
+
+    device_number_ = get_int_value(std::string("deviceno").c_str());
+
+    int number_of_devices = 0;
+    if (cudaGetDeviceCount(&number_of_devices)!= cudaSuccess) {
+      GADGET_DEBUG1( "Error: unable to query number of CUDA devices.\n" );
+      return GADGET_FAIL;
+    }
+
+    if (number_of_devices == 0) {
+      GADGET_DEBUG1( "Error: No available CUDA devices.\n" );
+      return GADGET_FAIL;
+    }
+
+    if (device_number_ >= number_of_devices) {
+      GADGET_DEBUG2("Adjusting device number from %d to %d\n", device_number_,  (device_number_%number_of_devices));
+      device_number_ = (device_number_%number_of_devices);
+    }
+
+    if (cudaSetDevice(device_number_)!= cudaSuccess) {
+      GADGET_DEBUG1( "Error: unable to set CUDA device.\n" );
+      return GADGET_FAIL;
+    }
+
+    pass_on_undesired_data_ = get_bool_value(std::string("pass_on_undesired_data").c_str());
+    set_number_ = get_int_value(std::string("setno").c_str());
+    slice_number_ = get_int_value(std::string("sliceno").c_str());
+    number_of_iterations_ = get_int_value(std::string("number_of_iterations").c_str());
+    cg_limit_ = get_double_value(std::string("cg_limit").c_str());
+    oversampling_factor_ = get_double_value(std::string("oversampling_factor").c_str());
+    kernel_width_ = get_double_value(std::string("kernel_width").c_str());
+    kappa_ = get_double_value(std::string("kappa").c_str());
+    shutter_radius_ = get_double_value(std::string("training_data_shutter_radius").c_str());
+    rotations_to_discard_ = get_int_value(std::string("rotations_to_discard").c_str());
+    output_convergence_ = get_bool_value(std::string("output_convergence").c_str());
+
+    if( (rotations_to_discard_%2) == 1 ){
+      GADGET_DEBUG1("#rotations to discard must be even.\n");
+      return GADGET_FAIL;
+    }
+
+    // Get the Ismrmrd header
+    //
+    ISMRMRD::IsmrmrdHeader h;
+    ISMRMRD::deserialize(mb->rd_ptr(),h);
+    
+    
+    if (h.encoding.size() != 1) {
+      GADGET_DEBUG1("This Gadget only supports one encoding space\n");
+      return GADGET_FAIL;
+    }
+    
+    // Get the encoding space and trajectory description
+    ISMRMRD::EncodingSpace e_space = h.encoding[0].encodedSpace;
+    ISMRMRD::EncodingSpace r_space = h.encoding[0].reconSpace;
+    ISMRMRD::EncodingLimits e_limits = h.encoding[0].encodingLimits;
+
+    matrix_size_seq_ = uint64d2( r_space.matrixSize.x, r_space.matrixSize.y );
+
+    if (!is_configured_) {
+
+      if (h.acquisitionSystemInformation) {
+	channels_ = h.acquisitionSystemInformation->receiverChannels ? *h.acquisitionSystemInformation->receiverChannels : 1;
+      } else {
+	channels_ = 1;
+      }
+
+      // Allocate encoding operator for non-Cartesian Sense
+      E_ = boost::shared_ptr< cuNonCartesianKtSenseOperator<float,2> >( new cuNonCartesianKtSenseOperator<float,2>() );
+
+      // Allocate preconditioner
+      D_ = boost::shared_ptr< cuCgPreconditioner<float_complext> >( new cuCgPreconditioner<float_complext>() );
+
+      // Allocate regularization image operator
+      R_ = boost::shared_ptr< cuImageOperator<float_complext> >( new cuImageOperator<float_complext>() );
+      R_->set_weight( kappa_ );
+
+      // Setup solver
+      cg_.set_encoding_operator( E_ );        // encoding matrix
+      cg_.add_regularization_operator( R_ );  // regularization matrix
+      cg_.set_preconditioner( D_ );           // preconditioning matrix
+      cg_.set_max_iterations( number_of_iterations_ );
+      cg_.set_tc_tolerance( cg_limit_ );
+      cg_.set_output_mode( (output_convergence_) ? cuCgSolver<float_complext>::OUTPUT_VERBOSE : cuCgSolver<float_complext>::OUTPUT_SILENT );
+
+      is_configured_ = true;
+    }
+
+    return GADGET_OK;
+  }
+
+  int gpuCgKtSenseGadget::process(GadgetContainerMessage<ISMRMRD::ImageHeader> *m1, GadgetContainerMessage<GenericReconJob> *m2)
+  {
+    // Is this data for this gadget's set/slice?
+    //
+    
+    if( m1->getObjectPtr()->set != set_number_ || m1->getObjectPtr()->slice != slice_number_ ) {      
+      // No, pass it downstream...
+      return this->next()->putq(m1);
+    }
+    
+    //GADGET_DEBUG1("gpuCgKtSenseGadget::process\n");
+    //GPUTimer timer("gpuCgKtSenseGadget::process");
+
+    if (!is_configured_) {
+      GADGET_DEBUG1("Data received before configuration was completed\n");
+      return GADGET_FAIL;
+    }
+
+    GenericReconJob* j = m2->getObjectPtr();
+
+    // Some basic validation of the incoming Sense job
+    if (!j->csm_host_.get() || !j->dat_host_.get() || !j->tra_host_.get() || !j->dcw_host_.get()) {
+      GADGET_DEBUG1("Received an incomplete Sense job\n");
+      return GADGET_FAIL;
+    }
+
+    unsigned int samples = j->dat_host_->get_size(0);
+    unsigned int channels = j->dat_host_->get_size(1);
+    unsigned int rotations = samples / j->tra_host_->get_number_of_elements();
+    unsigned int frames = j->tra_host_->get_size(1)*rotations;
+
+    if( samples%j->tra_host_->get_number_of_elements() ) {
+      GADGET_DEBUG2("Mismatch between number of samples (%d) and number of k-space coordinates (%d).\nThe first should be a multiplum of the latter.\n", 
+		    samples, j->tra_host_->get_number_of_elements());
+      return GADGET_FAIL;
+    }
+
+    boost::shared_ptr< cuNDArray<floatd2> > traj(new cuNDArray<floatd2> (j->tra_host_.get()));
+    boost::shared_ptr< cuNDArray<float> > dcw(new cuNDArray<float> (j->dcw_host_.get()));
+    sqrt_inplace(dcw.get()); //Take square root to use for weighting
+    boost::shared_ptr< cuNDArray<float_complext> > csm(new cuNDArray<float_complext> (j->csm_host_.get()));
+    boost::shared_ptr< cuNDArray<float_complext> > device_samples(new cuNDArray<float_complext> (j->dat_host_.get()));
+
+    cudaDeviceProp deviceProp;
+    if( cudaGetDeviceProperties( &deviceProp, device_number_ ) != cudaSuccess) {
+      GADGET_DEBUG1( "Error: unable to query device properties.\n" );
+      return GADGET_FAIL;
+    }
+    
+    unsigned int warp_size = deviceProp.warpSize;
+    
+    matrix_size_ = uint64d2( j->reg_host_->get_size(0), j->reg_host_->get_size(1) );    
+
+    matrix_size_os_ =
+      uint64d2(((static_cast<unsigned int>(std::ceil(matrix_size_[0]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size,
+	     ((static_cast<unsigned int>(std::ceil(matrix_size_[1]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size);
+    
+    GADGET_DEBUG2("Matrix size    : [%d,%d] \n", matrix_size_[0], matrix_size_[1]);    
+    GADGET_DEBUG2("Matrix size OS : [%d,%d] \n", matrix_size_os_[0], matrix_size_os_[1]);
+
+    std::vector<size_t> image_dims = to_std_vector(matrix_size_);
+    image_dims.push_back(frames);
+    
+    E_->set_domain_dimensions(&image_dims);
+    E_->set_codomain_dimensions(device_samples->get_dimensions().get());
+    E_->set_dcw(dcw);
+    E_->set_csm(csm);
+
+    E_->setup( matrix_size_, matrix_size_os_, static_cast<float>(kernel_width_) );
+    E_->preprocess(traj.get());
+        
+    R_->compute(compute_regularization_image(j).get());
+
+    // Define preconditioning weights
+    boost::shared_ptr< cuNDArray<float> > __precon_weights = sum(abs_square(csm.get()).get(), 2);
+    boost::shared_ptr< cuNDArray<float> > _precon_weights = expand<float>( __precon_weights.get(), frames );
+    boost::shared_ptr<cuNDArray<float> > R_diag = R_->get();
+    *R_diag *= float(kappa_);
+    *_precon_weights += *R_diag;
+    R_diag.reset();
+    reciprocal_sqrt_inplace(_precon_weights.get());	
+    boost::shared_ptr< cuNDArray<float_complext> > precon_weights = real_to_complex<float_complext>( _precon_weights.get() );
+    __precon_weights.reset(); _precon_weights.reset();
+    D_->set_weights( precon_weights );
+
+    *device_samples *= *dcw;
+    // Invoke solver
+    // 
+
+    boost::shared_ptr< cuNDArray<float_complext> > cgresult;
+    
+    {
+      GPUTimer timer("gpuCgKtSenseGadget::solve()");
+      cgresult = cg_.solve(device_samples.get());
+    }
+
+    if (!cgresult.get()) {
+      GADGET_DEBUG1("Iterative_sense_compute failed\n");
+      return GADGET_FAIL;
+    }
+
+    // Goto from x-f to x-t space
+    cuNDFFT<float>::instance()->fft( cgresult.get(), 2 );
+
+    /*
+    static int counter = 0;
+    char filename[256];
+    sprintf((char*)filename, "recon_%d.real", counter);
+    write_nd_array<float>( abs(cgresult.get())->to_host().get(), filename );
+    counter++; */
+
+    // If the recon matrix size exceeds the sequence matrix size then crop
+    if( matrix_size_seq_ != matrix_size_ )
+      cgresult = crop<float_complext,2>( (matrix_size_-matrix_size_seq_)>>1, matrix_size_seq_, cgresult.get() );    
+    
+    // Now pass on the reconstructed images
+    //
+
+    unsigned int frames_per_rotation = frames/rotations;
+
+    if( rotations == 1 ){ // this is the case for golden ratio
+      rotations = frames;
+      frames_per_rotation = 1;
+    }
+
+    for( unsigned int frame=0; frame<frames; frame++ ){
+
+      unsigned int rotation_idx = frame/frames_per_rotation;
+
+      // Check if we should discard this frame
+      if( rotation_idx < (rotations_to_discard_>>1) || rotation_idx >= rotations-(rotations_to_discard_>>1) )
+	continue;
+            
+      GadgetContainerMessage<ISMRMRD::ImageHeader> *m = 
+	new GadgetContainerMessage<ISMRMRD::ImageHeader>();
+
+      GadgetContainerMessage< hoNDArray< std::complex<float> > > *cm = 
+	new GadgetContainerMessage< hoNDArray< std::complex<float> > >();      
+
+      *m->getObjectPtr() = j->image_headers_[frame];
+      m->cont(cm);
+      
+      std::vector<size_t> img_dims(2);
+      img_dims[0] = matrix_size_seq_[0];
+      img_dims[1] = matrix_size_seq_[1];
+
+      cm->getObjectPtr()->create(&img_dims);
+
+      size_t data_length = prod(matrix_size_seq_);
+
+      cudaMemcpy(cm->getObjectPtr()->get_data_ptr(),
+		 cgresult->get_data_ptr()+frame*data_length,
+		 data_length*sizeof(std::complex<float>),
+		 cudaMemcpyDeviceToHost);
+
+      cudaError_t err = cudaGetLastError();
+      if( err != cudaSuccess ){
+	GADGET_DEBUG2("Unable to copy result from device to host: %s\n", cudaGetErrorString(err));
+	m->release();
+	return GADGET_FAIL;
+      }
+
+      m->getObjectPtr()->matrix_size[0] = matrix_size_seq_[0];
+      m->getObjectPtr()->matrix_size[1] = matrix_size_seq_[1];
+      m->getObjectPtr()->matrix_size[2] = 1;
+      m->getObjectPtr()->channels       = 1;
+      m->getObjectPtr()->image_index    = frame_counter_ + frame;
+      
+      if (this->next()->putq(m) < 0) {
+	GADGET_DEBUG1("Failed to put result image on to queue\n");
+	m->release();
+	return GADGET_FAIL;
+      }
+    }
+    
+    frame_counter_ += frames;
+
+    m1->release();
+    return GADGET_OK;
+  }
+
+  boost::shared_ptr< cuNDArray<float_complext> > gpuCgKtSenseGadget::
+  compute_regularization_image( GenericReconJob *job )
+  {
+    // 
+    // Estimate training data
+    // 
+
+    unsigned int num_samples = job->dat_host_->get_size(0);
+    unsigned int num_coils = job->dat_host_->get_size(1);
+    unsigned int num_rotations = num_samples / job->tra_host_->get_number_of_elements();
+    unsigned int frames_per_reconstruction = job->tra_host_->get_size(1)*num_rotations;
+
+    std::vector<size_t> dims = to_std_vector(matrix_size_os_);
+    dims.push_back(frames_per_reconstruction); 
+    dims.push_back(num_coils); 
+
+    cuNDArray<float_complext> image_os(&dims);    
+    cuNDArray<float_complext> data((job->dat_host_).get());
+    cuNDArray<float> dcw((job->dcw_host_).get());
+  
+    // Convolve to Cartesian k-space
+    //
+
+    E_->get_plan()->convolve( &data, &image_os, &dcw, cuNFFT_plan<float,2>::NFFT_CONV_NC2C );
+
+    // Apply shutter
+    //
+
+    if( shutter_radius_ < 0.0001 ){ // If not specified in the configuration then try to make an estimation
+
+      // #profiles/frame : this is just an estimate (we dont have the exact value at this stage)
+      unsigned int profiles_per_frame = num_samples / (frames_per_reconstruction*matrix_size_os_[0]);
+      shutter_radius_ = ((float)matrix_size_os_[0]/(float)matrix_size_[0])*(float)profiles_per_frame/(float)M_PI;
+      GADGET_DEBUG2("Estimated training data shutter radius: %f\n", shutter_radius_);
+    }
+
+    fill_border<float_complext,2>( shutter_radius_, &image_os );
+    E_->get_plan()->fft( &image_os, cuNFFT_plan<float,2>::NFFT_BACKWARDS );
+    E_->get_plan()->deapodize( &image_os );
+
+    // Remove oversampling
+    //
+
+    dims = to_std_vector(matrix_size_);
+    dims.push_back(frames_per_reconstruction); 
+    dims.push_back(num_coils);
+    cuNDArray<float_complext> image(&dims);
+    crop<float_complext,2>( (matrix_size_os_-matrix_size_)>>1, &image_os, &image );
+
+    // Compute regularization image
+    //
+
+    dims.pop_back();
+    boost::shared_ptr< cuNDArray<float_complext> > reg_image( new cuNDArray<float_complext>(&dims) );
+
+    E_->mult_csm_conj_sum( &image, reg_image.get() );
+    cuNDFFT<float>::instance()->ifft( reg_image.get(), 2, true );
+
+    return reg_image;
+  }
+
+  GADGET_FACTORY_DECLARE(gpuCgKtSenseGadget)
+}
diff --git a/gadgets/pmri/gpuCgKtSenseGadget.h b/gadgets/pmri/gpuCgKtSenseGadget.h
new file mode 100644
index 0000000..7c2bd98
--- /dev/null
+++ b/gadgets/pmri/gpuCgKtSenseGadget.h
@@ -0,0 +1,71 @@
+#ifndef gpuCgKtSenseGadget_H
+#define gpuCgKtSenseGadget_H
+#pragma once
+
+#include "gadgetron_gpupmri_export.h"
+#include "Gadget.h"
+#include "GenericReconJob.h"
+#include "GadgetMRIHeaders.h"
+#include "cuCgSolver.h"
+#include "cuNonCartesianKtSenseOperator.h"
+#include "cuCgPreconditioner.h"
+#include "cuNFFT.h"
+#include "cuImageOperator.h"
+
+#include <ismrmrd/ismrmrd.h>
+#include <complex>
+
+namespace Gadgetron{
+
+  class EXPORTGADGETS_GPUPMRI gpuCgKtSenseGadget : public Gadget2<ISMRMRD::ImageHeader, GenericReconJob>
+  {
+
+  public:
+    GADGET_DECLARE(gpuCgKtSenseGadget);
+
+    gpuCgKtSenseGadget();
+    virtual ~gpuCgKtSenseGadget();
+
+  protected:
+
+    virtual int process( GadgetContainerMessage< ISMRMRD::ImageHeader > *m1, GadgetContainerMessage< GenericReconJob > *m2 );
+    virtual int process_config( ACE_Message_Block* mb );
+
+    boost::shared_ptr< cuNDArray<float_complext> > compute_regularization_image( GenericReconJob *job );
+
+    int channels_;
+    int device_number_;
+    int set_number_;
+    int slice_number_;
+
+    uint64d2 matrix_size_;
+    uint64d2 matrix_size_os_;
+    uint64d2 matrix_size_seq_;
+
+    unsigned int number_of_iterations_;
+    double cg_limit_;
+    double oversampling_factor_;
+    double kernel_width_;
+    double kappa_;
+    double shutter_radius_;
+    unsigned int rotations_to_discard_;
+
+    bool output_convergence_;
+    bool is_configured_;
+
+    // Define conjugate gradient solver
+    cuCgSolver<float_complext> cg_;
+
+    // Define non-Cartesian Sense Encoding operator
+    boost::shared_ptr< cuNonCartesianKtSenseOperator<float,2> > E_;
+
+    // Define preconditioner
+    boost::shared_ptr< cuCgPreconditioner<float_complext> > D_;
+
+    // Define regularization image operator
+    boost::shared_ptr< cuImageOperator<float_complext> > R_;
+
+    int frame_counter_;
+  };
+}
+#endif //gpuCgKtSenseGadget
diff --git a/gadgets/pmri/gpuCgSenseGadget.cpp b/gadgets/pmri/gpuCgSenseGadget.cpp
new file mode 100644
index 0000000..c47caef
--- /dev/null
+++ b/gadgets/pmri/gpuCgSenseGadget.cpp
@@ -0,0 +1,347 @@
+#include "gpuCgSenseGadget.h"
+#include "cuNDArray_operators.h"
+#include "cuNDArray_elemwise.h"
+#include "cuNDArray_blas.h"
+#include "cuNDArray_utils.h"
+#include "cuNDArray_reductions.h"
+#include "Gadgetron.h"
+#include "GadgetMRIHeaders.h"
+#include "b1_map.h"
+#include "GPUTimer.h"
+#include "vector_td_utilities.h"
+#include "hoNDArray_fileio.h"
+#include "ismrmrd/xml.h"
+
+namespace Gadgetron{
+
+  gpuCgSenseGadget::gpuCgSenseGadget()
+    : is_configured_(false)
+    , channels_(0)
+    , frame_counter_(0)
+    , matrix_size_reported_(0)
+  {
+    set_parameter(std::string("deviceno").c_str(), "0");
+    set_parameter(std::string("setno").c_str(), "0");
+    set_parameter(std::string("sliceno").c_str(), "0");
+    set_parameter(std::string("number_of_iterations").c_str(), "5");
+    set_parameter(std::string("cg_limit").c_str(), "1e-6");
+    set_parameter(std::string("oversampling_factor").c_str(), "1.25");
+    set_parameter(std::string("kernel_width").c_str(), "5.5");
+    set_parameter(std::string("kappa").c_str(), "0.3");
+    
+    matrix_size_ = uint64d2(0,0);
+    matrix_size_os_ = uint64d2(0,0);
+    matrix_size_seq_ = uint64d2(0,0);
+  }
+
+  gpuCgSenseGadget::~gpuCgSenseGadget() {}
+
+  int gpuCgSenseGadget::process_config( ACE_Message_Block* mb )
+  {
+    //GADGET_DEBUG1("gpuCgSenseGadget::process_config\n");
+
+    device_number_ = get_int_value(std::string("deviceno").c_str());
+
+    int number_of_devices = 0;
+    if (cudaGetDeviceCount(&number_of_devices)!= cudaSuccess) {
+      GADGET_DEBUG1( "Error: unable to query number of CUDA devices.\n" );
+      return GADGET_FAIL;
+    }
+
+    if (number_of_devices == 0) {
+      GADGET_DEBUG1( "Error: No available CUDA devices.\n" );
+      return GADGET_FAIL;
+    }
+
+    if (device_number_ >= number_of_devices) {
+      GADGET_DEBUG2("Adjusting device number from %d to %d\n", device_number_,  (device_number_%number_of_devices));
+      device_number_ = (device_number_%number_of_devices);
+    }
+
+    if (cudaSetDevice(device_number_)!= cudaSuccess) {
+      GADGET_DEBUG1( "Error: unable to set CUDA device.\n" );
+      return GADGET_FAIL;
+    }
+
+    pass_on_undesired_data_ = get_bool_value(std::string("pass_on_undesired_data").c_str());
+    set_number_ = get_int_value(std::string("setno").c_str());
+    slice_number_ = get_int_value(std::string("sliceno").c_str());
+    number_of_iterations_ = get_int_value(std::string("number_of_iterations").c_str());
+    cg_limit_ = get_double_value(std::string("cg_limit").c_str());
+    oversampling_factor_ = get_double_value(std::string("oversampling_factor").c_str());
+    kernel_width_ = get_double_value(std::string("kernel_width").c_str());
+    kappa_ = get_double_value(std::string("kappa").c_str());
+    output_convergence_ = get_bool_value(std::string("output_convergence").c_str());
+    output_timing_ = get_bool_value(std::string("output_timing").c_str());
+    rotations_to_discard_ = get_int_value(std::string("rotations_to_discard").c_str());
+
+    if( (rotations_to_discard_%2) == 1 ){
+      GADGET_DEBUG1("#rotations to discard must be even.\n");
+      return GADGET_FAIL;
+    }
+
+    // Get the Ismrmrd header
+    //
+    ISMRMRD::IsmrmrdHeader h;
+    ISMRMRD::deserialize(mb->rd_ptr(),h);
+    
+    
+    if (h.encoding.size() != 1) {
+      GADGET_DEBUG1("This Gadget only supports one encoding space\n");
+      return GADGET_FAIL;
+    }
+    
+    // Get the encoding space and trajectory description
+    ISMRMRD::EncodingSpace e_space = h.encoding[0].encodedSpace;
+    ISMRMRD::EncodingSpace r_space = h.encoding[0].reconSpace;
+    ISMRMRD::EncodingLimits e_limits = h.encoding[0].encodingLimits;
+
+    matrix_size_seq_ = uint64d2( r_space.matrixSize.x, r_space.matrixSize.y );
+
+    if (!is_configured_) {
+
+      if (h.acquisitionSystemInformation) {
+	channels_ = h.acquisitionSystemInformation->receiverChannels ? *h.acquisitionSystemInformation->receiverChannels : 1;
+      } else {
+	channels_ = 1;
+      }
+
+     // Allocate encoding operator for non-Cartesian Sense
+      E_ = boost::shared_ptr< cuNonCartesianSenseOperator<float,2> >( new cuNonCartesianSenseOperator<float,2>() );
+
+      // Allocate preconditioner
+      D_ = boost::shared_ptr< cuCgPreconditioner<float_complext> >( new cuCgPreconditioner<float_complext>() );
+
+      // Allocate regularization image operator
+      R_ = boost::shared_ptr< cuImageOperator<float_complext> >( new cuImageOperator<float_complext>() );
+      R_->set_weight( kappa_ );
+
+      // Setup solver
+      cg_.set_encoding_operator( E_ );        // encoding matrix
+      cg_.add_regularization_operator( R_ );  // regularization matrix
+      cg_.set_preconditioner( D_ );           // preconditioning matrix
+      cg_.set_max_iterations( number_of_iterations_ );
+      cg_.set_tc_tolerance( cg_limit_ );
+      cg_.set_output_mode( (output_convergence_) ? cuCgSolver<float_complext>::OUTPUT_VERBOSE : cuCgSolver<float_complext>::OUTPUT_SILENT);
+
+      is_configured_ = true;
+    }
+
+    return GADGET_OK;
+  }
+
+  int gpuCgSenseGadget::process(GadgetContainerMessage<ISMRMRD::ImageHeader> *m1, GadgetContainerMessage<GenericReconJob> *m2)
+  {
+    // Is this data for this gadget's set/slice?
+    //
+    
+    if( m1->getObjectPtr()->set != set_number_ || m1->getObjectPtr()->slice != slice_number_ ) {      
+      // No, pass it downstream...
+      return this->next()->putq(m1);
+    }
+    
+    //GADGET_DEBUG1("gpuCgSenseGadget::process\n");
+
+    boost::shared_ptr<GPUTimer> process_timer;
+    if( output_timing_ )
+      process_timer = boost::shared_ptr<GPUTimer>( new GPUTimer("gpuCgSenseGadget::process()") );
+    
+    if (!is_configured_) {
+      GADGET_DEBUG1("Data received before configuration was completed\n");
+      return GADGET_FAIL;
+    }
+
+    GenericReconJob* j = m2->getObjectPtr();
+
+    // Some basic validation of the incoming Sense job
+    if (!j->csm_host_.get() || !j->dat_host_.get() || !j->tra_host_.get() || !j->dcw_host_.get() || !j->reg_host_.get()) {
+      GADGET_DEBUG1("Received an incomplete Sense job\n");
+      return GADGET_FAIL;
+    }
+
+    unsigned int samples = j->dat_host_->get_size(0);
+    unsigned int channels = j->dat_host_->get_size(1);
+    unsigned int rotations = samples / j->tra_host_->get_number_of_elements();
+    unsigned int frames = j->tra_host_->get_size(1)*rotations;
+
+    if( samples%j->tra_host_->get_number_of_elements() ) {
+      GADGET_DEBUG2("Mismatch between number of samples (%d) and number of k-space coordinates (%d).\nThe first should be a multiplum of the latter.\n", 
+                    samples, j->tra_host_->get_number_of_elements());
+      return GADGET_FAIL;
+    }
+
+    boost::shared_ptr< cuNDArray<floatd2> > traj(new cuNDArray<floatd2> (j->tra_host_.get()));
+    boost::shared_ptr< cuNDArray<float> > dcw(new cuNDArray<float> (j->dcw_host_.get()));
+    sqrt_inplace(dcw.get()); //Take square root to use for weighting
+    boost::shared_ptr< cuNDArray<float_complext> > csm(new cuNDArray<float_complext> (j->csm_host_.get()));
+    boost::shared_ptr< cuNDArray<float_complext> > device_samples(new cuNDArray<float_complext> (j->dat_host_.get()));
+    
+    cudaDeviceProp deviceProp;
+    if( cudaGetDeviceProperties( &deviceProp, device_number_ ) != cudaSuccess) {
+      GADGET_DEBUG1( "Error: unable to query device properties.\n" );
+      return GADGET_FAIL;
+    }
+    
+    unsigned int warp_size = deviceProp.warpSize;
+    
+    matrix_size_ = uint64d2( j->reg_host_->get_size(0), j->reg_host_->get_size(1) );    
+
+    matrix_size_os_ =
+      uint64d2(((static_cast<unsigned int>(std::ceil(matrix_size_[0]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size,
+               ((static_cast<unsigned int>(std::ceil(matrix_size_[1]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size);
+
+    if( !matrix_size_reported_ ) {
+      GADGET_DEBUG2("Matrix size    : [%d,%d] \n", matrix_size_[0], matrix_size_[1]);    
+      GADGET_DEBUG2("Matrix size OS : [%d,%d] \n", matrix_size_os_[0], matrix_size_os_[1]);
+      matrix_size_reported_ = true;
+    }
+
+    std::vector<size_t> image_dims = to_std_vector(matrix_size_);
+    image_dims.push_back(frames);
+    
+    E_->set_domain_dimensions(&image_dims);
+    E_->set_codomain_dimensions(device_samples->get_dimensions().get());
+    E_->set_dcw(dcw);
+    E_->set_csm(csm);
+
+    E_->setup( matrix_size_, matrix_size_os_, static_cast<float>(kernel_width_) );
+    E_->preprocess(traj.get());
+
+    boost::shared_ptr< cuNDArray<float_complext> > reg_image(new cuNDArray<float_complext> (j->reg_host_.get()));
+    R_->compute(reg_image.get());
+
+    // Define preconditioning weights
+    boost::shared_ptr< cuNDArray<float> > _precon_weights = sum(abs_square(csm.get()).get(), 2);
+    boost::shared_ptr<cuNDArray<float> > R_diag = R_->get();
+    *R_diag *= float(kappa_);
+    *_precon_weights += *R_diag;
+    R_diag.reset();
+    reciprocal_sqrt_inplace(_precon_weights.get());	
+    boost::shared_ptr< cuNDArray<float_complext> > precon_weights = real_to_complex<float_complext>( _precon_weights.get() );
+    _precon_weights.reset();
+    D_->set_weights( precon_weights );
+    
+    //Apply dcw weights
+    *device_samples *= *dcw;
+
+
+    /*{
+      static int counter = 0;
+      char filename[256];
+      sprintf((char*)filename, "_traj_%d.real", counter);
+      write_nd_array<floatd2>( traj->to_host().get(), filename );
+      sprintf((char*)filename, "_dcw_%d.real", counter);
+      write_nd_array<float>( dcw->to_host().get(), filename );
+      sprintf((char*)filename, "_csm_%d.cplx", counter);
+      write_nd_array<float_complext>( csm->to_host().get(), filename );
+      sprintf((char*)filename, "_samples_%d.cplx", counter);
+      write_nd_array<float_complext>( device_samples->to_host().get(), filename );
+      sprintf((char*)filename, "_reg_%d.cplx", counter);
+      write_nd_array<float_complext>( reg_image->to_host().get(), filename );
+      counter++; 
+      }*/
+
+    // Invoke solver
+    // 
+
+    boost::shared_ptr< cuNDArray<float_complext> > cgresult;
+
+    {
+      boost::shared_ptr<GPUTimer> solve_timer;
+      if( output_timing_ )
+        solve_timer = boost::shared_ptr<GPUTimer>( new GPUTimer("gpuCgSenseGadget::solve()") );
+      
+      cgresult = cg_.solve(device_samples.get());
+      
+      if( output_timing_ )
+        solve_timer.reset();
+    }
+    
+    if (!cgresult.get()) {
+      GADGET_DEBUG1("Iterative_sense_compute failed\n");
+      return GADGET_FAIL;
+    }
+
+    /*
+      static int counter = 0;
+      char filename[256];
+      sprintf((char*)filename, "recon_%d.real", counter);
+      write_nd_array<float>( abs(cgresult.get())->to_host().get(), filename );
+      counter++; 
+    */
+
+    // If the recon matrix size exceeds the sequence matrix size then crop
+    if( matrix_size_seq_ != matrix_size_ )
+      cgresult = crop<float_complext,2>( (matrix_size_-matrix_size_seq_)>>1, matrix_size_seq_, cgresult.get() );    
+    
+    // Now pass on the reconstructed images
+    //
+
+    unsigned int frames_per_rotation = frames/rotations;
+
+    if( rotations == 1 ){ // this is the case for golden ratio
+      rotations = frames;
+      frames_per_rotation = 1;
+    }
+
+    for( unsigned int frame=0; frame<frames; frame++ ){
+      
+      unsigned int rotation_idx = frame/frames_per_rotation;
+
+      // Check if we should discard this frame
+      if( rotation_idx < (rotations_to_discard_>>1) || rotation_idx >= rotations-(rotations_to_discard_>>1) )
+        continue;
+
+      GadgetContainerMessage<ISMRMRD::ImageHeader> *m = 
+        new GadgetContainerMessage<ISMRMRD::ImageHeader>();
+
+      GadgetContainerMessage< hoNDArray< std::complex<float> > > *cm = 
+        new GadgetContainerMessage< hoNDArray< std::complex<float> > >();      
+      
+      *m->getObjectPtr() = j->image_headers_[frame];
+      m->cont(cm);
+      
+      std::vector<size_t> img_dims(2);
+      img_dims[0] = matrix_size_seq_[0];
+      img_dims[1] = matrix_size_seq_[1];
+
+      cm->getObjectPtr()->create(&img_dims);
+
+      size_t data_length = prod(matrix_size_seq_);
+
+      cudaMemcpy(cm->getObjectPtr()->get_data_ptr(),
+                 cgresult->get_data_ptr()+frame*data_length,
+                 data_length*sizeof(std::complex<float>),
+                 cudaMemcpyDeviceToHost);
+      
+      cudaError_t err = cudaGetLastError();
+      if( err != cudaSuccess ){
+        GADGET_DEBUG2("Unable to copy result from device to host: %s\n", cudaGetErrorString(err));
+        m->release();
+        return GADGET_FAIL;
+      }
+
+      m->getObjectPtr()->matrix_size[0] = matrix_size_seq_[0];
+      m->getObjectPtr()->matrix_size[1] = matrix_size_seq_[1];
+      m->getObjectPtr()->matrix_size[2] = 1;
+      m->getObjectPtr()->channels       = 1;
+      m->getObjectPtr()->image_index    = frame_counter_ + frame;
+            
+      if (this->next()->putq(m) < 0) {
+        GADGET_DEBUG1("Failed to put result image on to queue\n");
+        m->release();
+        return GADGET_FAIL;
+      }
+    }
+    
+    frame_counter_ += frames;
+
+    if( output_timing_ )
+      process_timer.reset();
+
+    m1->release();
+    return GADGET_OK;
+  }
+
+  GADGET_FACTORY_DECLARE(gpuCgSenseGadget)
+}
diff --git a/gadgets/pmri/gpuCgSenseGadget.h b/gadgets/pmri/gpuCgSenseGadget.h
new file mode 100644
index 0000000..3366fdb
--- /dev/null
+++ b/gadgets/pmri/gpuCgSenseGadget.h
@@ -0,0 +1,71 @@
+#ifndef gpuCgSenseGadget_H
+#define gpuCgSenseGadget_H
+#pragma once
+
+#include "gadgetron_gpupmri_export.h"
+#include "Gadget.h"
+#include "GenericReconJob.h"
+#include "GadgetMRIHeaders.h"
+#include "cuCgSolver.h"
+#include "cuNonCartesianSenseOperator.h"
+#include "cuCgPreconditioner.h"
+#include "cuNFFT.h"
+#include "cuImageOperator.h"
+
+#include <ismrmrd/ismrmrd.h>
+#include <complex>
+
+namespace Gadgetron{
+
+  class EXPORTGADGETS_GPUPMRI gpuCgSenseGadget : public Gadget2<ISMRMRD::ImageHeader, GenericReconJob>
+  {
+
+  public:
+
+    GADGET_DECLARE(gpuCgSenseGadget);
+
+    gpuCgSenseGadget();
+    virtual ~gpuCgSenseGadget();
+
+  protected:
+
+    virtual int process( GadgetContainerMessage< ISMRMRD::ImageHeader > *m1, GadgetContainerMessage< GenericReconJob > *m2 );
+    virtual int process_config( ACE_Message_Block* mb );
+
+    int channels_;
+    int device_number_;
+    int set_number_;
+    int slice_number_;
+
+    uint64d2 matrix_size_;
+    uint64d2 matrix_size_os_;
+    uint64d2 matrix_size_seq_;
+
+    unsigned int number_of_iterations_;
+    double cg_limit_;
+    double oversampling_factor_;
+    double kernel_width_;
+    double kappa_;
+    unsigned int rotations_to_discard_;
+
+    bool output_convergence_;
+    bool output_timing_;
+    bool matrix_size_reported_;
+    bool is_configured_;
+
+    // Define conjugate gradient solver
+    cuCgSolver<float_complext> cg_;
+
+    // Define non-Cartesian Sense Encoding operator
+    boost::shared_ptr< cuNonCartesianSenseOperator<float,2> > E_;
+
+    // Define preconditioner
+    boost::shared_ptr< cuCgPreconditioner<float_complext> > D_;
+
+    // Define regularization image operator
+    boost::shared_ptr< cuImageOperator<float_complext> > R_;
+
+    unsigned int frame_counter_;
+  };
+}
+#endif //gpuCgSenseGadget
diff --git a/gadgets/pmri/gpuCgSpiritGadget.cpp b/gadgets/pmri/gpuCgSpiritGadget.cpp
new file mode 100644
index 0000000..db8984c
--- /dev/null
+++ b/gadgets/pmri/gpuCgSpiritGadget.cpp
@@ -0,0 +1,357 @@
+#include "gpuCgSpiritGadget.h"
+#include "cuNDArray_operators.h"
+#include "cuNDArray_elemwise.h"
+#include "cuNDArray_blas.h"
+#include "cuNDArray_utils.h"
+#include "cuNDArray_reductions.h"
+#include "Gadgetron.h"
+#include "GadgetMRIHeaders.h"
+#include "b1_map.h"
+#include "GPUTimer.h"
+#include "vector_td_utilities.h"
+#include "hoNDArray_fileio.h"
+#include "ismrmrd/xml.h"
+
+namespace Gadgetron{
+
+  gpuCgSpiritGadget::gpuCgSpiritGadget()
+    : is_configured_(false)
+    , channels_(0)
+    , frame_counter_(0)
+    , matrix_size_reported_(0)
+  {
+    set_parameter(std::string("deviceno").c_str(), "0");
+    set_parameter(std::string("setno").c_str(), "0");
+    set_parameter(std::string("sliceno").c_str(), "0");
+    set_parameter(std::string("number_of_iterations").c_str(), "5");
+    set_parameter(std::string("cg_limit").c_str(), "1e-6");
+    set_parameter(std::string("oversampling_factor").c_str(), "1.25");
+    set_parameter(std::string("kernel_width").c_str(), "5.5");
+    set_parameter(std::string("kappa").c_str(), "0.3");
+    
+    matrix_size_ = uint64d2(0,0);
+    matrix_size_os_ = uint64d2(0,0);
+    matrix_size_seq_ = uint64d2(0,0);
+  }
+
+  gpuCgSpiritGadget::~gpuCgSpiritGadget() {}
+
+  int gpuCgSpiritGadget::process_config( ACE_Message_Block* mb )
+  {
+    //GADGET_DEBUG1("gpuCgSpiritGadget::process_config\n");
+
+    device_number_ = get_int_value(std::string("deviceno").c_str());
+
+    int number_of_devices = 0;
+    if (cudaGetDeviceCount(&number_of_devices)!= cudaSuccess) {
+      GADGET_DEBUG1( "Error: unable to query number of CUDA devices.\n" );
+      return GADGET_FAIL;
+    }
+
+    if (number_of_devices == 0) {
+      GADGET_DEBUG1( "Error: No available CUDA devices.\n" );
+      return GADGET_FAIL;
+    }
+
+    if (device_number_ >= number_of_devices) {
+      GADGET_DEBUG2("Adjusting device number from %d to %d\n", device_number_,  (device_number_%number_of_devices));
+      device_number_ = (device_number_%number_of_devices);
+    }
+
+    if (cudaSetDevice(device_number_)!= cudaSuccess) {
+      GADGET_DEBUG1( "Error: unable to set CUDA device.\n" );
+      return GADGET_FAIL;
+    }
+
+    pass_on_undesired_data_ = get_bool_value(std::string("pass_on_undesired_data").c_str());
+    set_number_ = get_int_value(std::string("setno").c_str());
+    slice_number_ = get_int_value(std::string("sliceno").c_str());
+    number_of_iterations_ = get_int_value(std::string("number_of_iterations").c_str());
+    cg_limit_ = get_double_value(std::string("cg_limit").c_str());
+    oversampling_factor_ = get_double_value(std::string("oversampling_factor").c_str());
+    kernel_width_ = get_double_value(std::string("kernel_width").c_str());
+    kappa_ = get_double_value(std::string("kappa").c_str());
+    output_convergence_ = get_bool_value(std::string("output_convergence").c_str());
+    output_timing_ = get_bool_value(std::string("output_timing").c_str());
+    rotations_to_discard_ = get_int_value(std::string("rotations_to_discard").c_str());
+
+    if( (rotations_to_discard_%2) == 1 ){
+      GADGET_DEBUG1("#rotations to discard must be even.\n");
+      return GADGET_FAIL;
+    }
+
+    // Get the Ismrmrd header
+    //
+    ISMRMRD::IsmrmrdHeader h;
+    ISMRMRD::deserialize(mb->rd_ptr(),h);
+    
+    
+    if (h.encoding.size() != 1) {
+      GADGET_DEBUG1("This Gadget only supports one encoding space\n");
+      return GADGET_FAIL;
+    }
+    
+    // Get the encoding space and trajectory description
+    ISMRMRD::EncodingSpace e_space = h.encoding[0].encodedSpace;
+    ISMRMRD::EncodingSpace r_space = h.encoding[0].reconSpace;
+    ISMRMRD::EncodingLimits e_limits = h.encoding[0].encodingLimits;
+
+    matrix_size_seq_ = uint64d2( r_space.matrixSize.x, r_space.matrixSize.y );
+
+    if (!is_configured_) {
+
+      if (h.acquisitionSystemInformation) {
+	channels_ = h.acquisitionSystemInformation->receiverChannels ? *h.acquisitionSystemInformation->receiverChannels : 1;
+      } else {
+	channels_ = 1;
+      }
+      // Allocate Spirit operators
+      E_ = boost::shared_ptr< cuNFFTOperator<float,2> >( new cuNFFTOperator<float,2>() );
+      S_ = boost::shared_ptr< cuSpirit2DOperator<float> >( new cuSpirit2DOperator<float>() );
+      S_->set_weight( kappa_ );
+
+      // Allocate preconditioner
+      //D_ = boost::shared_ptr< cuCgPreconditioner<float_complext> >( new cuCgPreconditioner<float_complext>() );
+
+      // Allocate regularization image operator
+      //R_ = boost::shared_ptr< cuImageOperator<float_complext> >( new cuImageOperator<float_complext>() );
+      //R_->set_weight( kappa_ );
+
+      // Setup solver
+      cg_.set_encoding_operator( E_ );        // encoding matrix
+      if( kappa_ > 0.0f ) cg_.add_regularization_operator( S_ );  // regularization matrix
+      //cg_.add_regularization_operator( R_ );  // regularization matrix
+      //cg_.set_preconditioner( D_ );           // preconditioning matrix
+      cg_.set_max_iterations( number_of_iterations_ );
+      cg_.set_tc_tolerance( cg_limit_ );
+      cg_.set_output_mode( (output_convergence_) ? cuCgSolver<float_complext>::OUTPUT_VERBOSE : cuCgSolver<float_complext>::OUTPUT_SILENT);
+
+      is_configured_ = true;
+    }
+
+    return GADGET_OK;
+  }
+
+  int gpuCgSpiritGadget::process(GadgetContainerMessage<ISMRMRD::ImageHeader> *m1, GadgetContainerMessage<GenericReconJob> *m2)
+  {
+    // Is this data for this gadget's set/slice?
+    //
+    
+    if( m1->getObjectPtr()->set != set_number_ || m1->getObjectPtr()->slice != slice_number_ ) {      
+      // No, pass it downstream...
+      return this->next()->putq(m1);
+    }
+    
+    //GADGET_DEBUG1("gpuCgSpiritGadget::process\n");
+
+    boost::shared_ptr<GPUTimer> process_timer;
+    if( output_timing_ )
+      process_timer = boost::shared_ptr<GPUTimer>( new GPUTimer("gpuCgSpiritGadget::process()") );
+    
+    if (!is_configured_) {
+      GADGET_DEBUG1("Data received before configuration was completed\n");
+      return GADGET_FAIL;
+    }
+
+    GenericReconJob* j = m2->getObjectPtr();
+
+    // Some basic validation of the incoming Spirit job
+    if (!j->csm_host_.get() || !j->dat_host_.get() || !j->tra_host_.get() || !j->dcw_host_.get() || !j->reg_host_.get()) {
+      GADGET_DEBUG1("Received an incomplete Spirit job\n");
+      return GADGET_FAIL;
+    }
+
+    unsigned int samples = j->dat_host_->get_size(0);
+    unsigned int channels = j->dat_host_->get_size(1);
+    unsigned int rotations = samples / j->tra_host_->get_number_of_elements();
+    unsigned int frames = j->tra_host_->get_size(1)*rotations;
+
+    if( samples%j->tra_host_->get_number_of_elements() ) {
+      GADGET_DEBUG2("Mismatch between number of samples (%d) and number of k-space coordinates (%d).\nThe first should be a multiplum of the latter.\n", 
+                    samples, j->tra_host_->get_number_of_elements());
+      return GADGET_FAIL;
+    }
+
+    boost::shared_ptr< cuNDArray<floatd2> > traj(new cuNDArray<floatd2> (j->tra_host_.get()));
+    boost::shared_ptr< cuNDArray<float> > dcw(new cuNDArray<float> (j->dcw_host_.get()));
+    sqrt_inplace(dcw.get()); //Take square root to use for weighting
+    boost::shared_ptr< cuNDArray<float_complext> > csm(new cuNDArray<float_complext> (j->csm_host_.get()));
+    boost::shared_ptr< cuNDArray<float_complext> > device_samples(new cuNDArray<float_complext> (j->dat_host_.get()));
+    
+    cudaDeviceProp deviceProp;
+    if( cudaGetDeviceProperties( &deviceProp, device_number_ ) != cudaSuccess) {
+      GADGET_DEBUG1( "Error: unable to query device properties.\n" );
+      return GADGET_FAIL;
+    }
+    
+    unsigned int warp_size = deviceProp.warpSize;
+    
+    matrix_size_ = uint64d2( j->reg_host_->get_size(0), j->reg_host_->get_size(1) );    
+
+    matrix_size_os_ =
+      uint64d2(((static_cast<unsigned int>(std::ceil(matrix_size_[0]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size,
+               ((static_cast<unsigned int>(std::ceil(matrix_size_[1]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size);
+
+    if( !matrix_size_reported_ ) {
+      GADGET_DEBUG2("Matrix size    : [%d,%d] \n", matrix_size_[0], matrix_size_[1]);    
+      GADGET_DEBUG2("Matrix size OS : [%d,%d] \n", matrix_size_os_[0], matrix_size_os_[1]);
+      matrix_size_reported_ = true;
+    }
+
+    std::vector<size_t> image_dims = to_std_vector(matrix_size_);
+    image_dims.push_back(channels);
+    //image_dims.push_back(frames);
+    
+    E_->set_domain_dimensions(&image_dims);
+    E_->set_codomain_dimensions(device_samples->get_dimensions().get());
+    E_->set_dcw(dcw);
+    E_->setup( matrix_size_, matrix_size_os_, static_cast<float>(kernel_width_) );
+    E_->preprocess(traj.get());
+    
+    boost::shared_ptr< cuNDArray<float_complext> > csm_device( new cuNDArray<float_complext>( csm.get() ));
+    S_->set_calibration_kernels(csm_device);
+    S_->set_domain_dimensions(&image_dims);
+    S_->set_codomain_dimensions(&image_dims);
+
+    /*
+    boost::shared_ptr< cuNDArray<float_complext> > reg_image(new cuNDArray<float_complext> (j->reg_host_.get()));
+    R_->compute(reg_image.get());
+
+    // Define preconditioning weights
+    boost::shared_ptr< cuNDArray<float> > _precon_weights = sum(abs_square(csm.get()).get(), 2);
+    boost::shared_ptr<cuNDArray<float> > R_diag = R_->get();
+    *R_diag *= float(kappa_);
+    *_precon_weights += *R_diag;
+    R_diag.reset();
+    reciprocal_sqrt_inplace(_precon_weights.get());	
+    boost::shared_ptr< cuNDArray<float_complext> > precon_weights = real_to_complex<float_complext>( _precon_weights.get() );
+    _precon_weights.reset();
+    D_->set_weights( precon_weights );
+    */
+
+    /*{
+      static int counter = 0;
+      char filename[256];
+      sprintf((char*)filename, "_traj_%d.real", counter);
+      write_nd_array<floatd2>( traj->to_host().get(), filename );
+      sprintf((char*)filename, "_dcw_%d.real", counter);
+      write_nd_array<float>( dcw->to_host().get(), filename );
+      sprintf((char*)filename, "_csm_%d.cplx", counter);
+      write_nd_array<float_complext>( csm->to_host().get(), filename );
+      sprintf((char*)filename, "_samples_%d.cplx", counter);
+      write_nd_array<float_complext>( device_samples->to_host().get(), filename );
+      sprintf((char*)filename, "_reg_%d.cplx", counter);
+      write_nd_array<float_complext>( reg_image->to_host().get(), filename );
+      counter++; 
+      }*/
+
+    // Invoke solver
+    // 
+
+    boost::shared_ptr< cuNDArray<float_complext> > cgresult;
+
+    {
+      boost::shared_ptr<GPUTimer> solve_timer;
+      if( output_timing_ )
+        solve_timer = boost::shared_ptr<GPUTimer>( new GPUTimer("gpuCgSpiritGadget::solve()") );
+      
+      cgresult = cg_.solve(device_samples.get());
+      
+      if( output_timing_ )
+        solve_timer.reset();
+    }
+    
+    if (!cgresult.get()) {
+      GADGET_DEBUG1("Iterative_spirit_compute failed\n");
+      return GADGET_FAIL;
+    }
+
+    /*
+      static int counter = 0;
+      char filename[256];
+      sprintf((char*)filename, "recon_%d.real", counter);
+      write_nd_array<float>( abs(cgresult.get())->to_host().get(), filename );
+      counter++; 
+    */
+
+    // If the recon matrix size exceeds the sequence matrix size then crop
+    if( matrix_size_seq_ != matrix_size_ )
+      cgresult = crop<float_complext,2>( (matrix_size_-matrix_size_seq_)>>1, matrix_size_seq_, cgresult.get() );    
+    
+    // Combine coil images
+    //
+
+    cgresult = real_to_complex<float_complext>(sqrt(sum(abs_square(cgresult.get()).get(), 2).get()).get()); // RSS
+    //cgresult = sum(cgresult.get(), 2);
+
+    // Pass on the reconstructed images
+    //
+
+    unsigned int frames_per_rotation = frames/rotations;
+
+    if( rotations == 1 ){ // this is the case for golden ratio
+      rotations = frames;
+      frames_per_rotation = 1;
+    }
+
+    for( unsigned int frame=0; frame<frames; frame++ ){
+      
+      unsigned int rotation_idx = frame/frames_per_rotation;
+
+      // Check if we should discard this frame
+      if( rotation_idx < (rotations_to_discard_>>1) || rotation_idx >= rotations-(rotations_to_discard_>>1) )
+        continue;
+
+      GadgetContainerMessage<ISMRMRD::ImageHeader> *m = 
+        new GadgetContainerMessage<ISMRMRD::ImageHeader>();
+
+      GadgetContainerMessage< hoNDArray< std::complex<float> > > *cm = 
+        new GadgetContainerMessage< hoNDArray< std::complex<float> > >();      
+      
+      *m->getObjectPtr() = j->image_headers_[frame];
+      m->cont(cm);
+      
+      std::vector<size_t> img_dims(2);
+      img_dims[0] = matrix_size_seq_[0];
+      img_dims[1] = matrix_size_seq_[1];
+
+      cm->getObjectPtr()->create(&img_dims);
+
+      size_t data_length = prod(matrix_size_seq_);
+
+      cudaMemcpy(cm->getObjectPtr()->get_data_ptr(),
+                 cgresult->get_data_ptr()+frame*data_length,
+                 data_length*sizeof(std::complex<float>),
+                 cudaMemcpyDeviceToHost);
+      
+      cudaError_t err = cudaGetLastError();
+      if( err != cudaSuccess ){
+        GADGET_DEBUG2("Unable to copy result from device to host: %s\n", cudaGetErrorString(err));
+        m->release();
+        return GADGET_FAIL;
+      }
+
+      m->getObjectPtr()->matrix_size[0] = matrix_size_seq_[0];
+      m->getObjectPtr()->matrix_size[1] = matrix_size_seq_[1];
+      m->getObjectPtr()->matrix_size[2] = 1;
+      m->getObjectPtr()->channels       = 1;
+      m->getObjectPtr()->image_index    = frame_counter_ + frame;
+            
+      if (this->next()->putq(m) < 0) {
+        GADGET_DEBUG1("Failed to put result image on to queue\n");
+        m->release();
+        return GADGET_FAIL;
+      }
+    }
+    
+    frame_counter_ += frames;
+
+    if( output_timing_ )
+      process_timer.reset();
+
+    m1->release();
+    return GADGET_OK;
+  }
+
+  GADGET_FACTORY_DECLARE(gpuCgSpiritGadget)
+}
diff --git a/gadgets/pmri/gpuCgSpiritGadget.h b/gadgets/pmri/gpuCgSpiritGadget.h
new file mode 100644
index 0000000..af02553
--- /dev/null
+++ b/gadgets/pmri/gpuCgSpiritGadget.h
@@ -0,0 +1,75 @@
+#ifndef gpuCgSpiritGadget_H
+#define gpuCgSpiritGadget_H
+#pragma once
+
+#include "gadgetron_gpupmri_export.h"
+#include "Gadget.h"
+#include "GenericReconJob.h"
+#include "GadgetMRIHeaders.h"
+#include "cuCgSolver.h"
+#include "cuNFFTOperator.h"
+#include "cuSpiritOperator.h"
+#include "cuCgPreconditioner.h"
+#include "cuNFFT.h"
+#include "cuImageOperator.h"
+
+#include <ismrmrd/ismrmrd.h>
+#include <complex>
+
+namespace Gadgetron{
+
+  class EXPORTGADGETS_GPUPMRI gpuCgSpiritGadget : public Gadget2<ISMRMRD::ImageHeader, GenericReconJob>
+  {
+
+  public:
+
+    GADGET_DECLARE(gpuCgSpiritGadget);
+
+    gpuCgSpiritGadget();
+    virtual ~gpuCgSpiritGadget();
+
+  protected:
+
+    virtual int process( GadgetContainerMessage< ISMRMRD::ImageHeader > *m1, GadgetContainerMessage< GenericReconJob > *m2 );
+    virtual int process_config( ACE_Message_Block* mb );
+
+    int channels_;
+    int device_number_;
+    int set_number_;
+    int slice_number_;
+
+    uint64d2 matrix_size_;
+    uint64d2 matrix_size_os_;
+    uint64d2 matrix_size_seq_;
+
+    unsigned int number_of_iterations_;
+    double cg_limit_;
+    double oversampling_factor_;
+    double kernel_width_;
+    double kappa_;
+    unsigned int rotations_to_discard_;
+
+    bool output_convergence_;
+    bool output_timing_;
+    bool matrix_size_reported_;
+    bool is_configured_;
+
+    // Define conjugate gradient solver
+    cuCgSolver<float_complext> cg_;
+
+    // Define Spirit encoding operator (NFFT)
+    boost::shared_ptr< cuNFFTOperator<float,2> > E_;
+
+    // Define Spirit regularization operator (convolution consistency)
+    boost::shared_ptr< cuSpirit2DOperator<float> > S_;
+
+    // Define preconditioner
+    //boost::shared_ptr< cuCgPreconditioner<float_complext> > D_;
+
+    // Define regularization image operator
+    //boost::shared_ptr< cuImageOperator<float_complext> > R_;
+    
+    unsigned int frame_counter_;
+  };
+}
+#endif //gpuCgSpiritGadget
diff --git a/gadgets/pmri/gpuGenericSensePrepGadget.cpp b/gadgets/pmri/gpuGenericSensePrepGadget.cpp
new file mode 100644
index 0000000..90e1e41
--- /dev/null
+++ b/gadgets/pmri/gpuGenericSensePrepGadget.cpp
@@ -0,0 +1,941 @@
+#include "gpuGenericSensePrepGadget.h"
+#include "Gadgetron.h"
+#include "cuNonCartesianSenseOperator.h"
+#include "GenericReconJob.h"
+#include "cuNDArray_elemwise.h"
+#include "cuNDArray_utils.h"
+#include "hoNDArray_utils.h"
+#include "vector_td_operators.h"
+#include "b1_map.h"
+#include "GPUTimer.h"
+#include "check_CUDA.h"
+#include "hoNDArray_fileio.h"
+#include "ismrmrd/xml.h"
+
+#include <algorithm>
+#include <vector>
+#include <cmath>
+#include <stdexcept>
+
+namespace Gadgetron{
+
+  gpuGenericSensePrepGadget::gpuGenericSensePrepGadget()
+    : slices_(-1)
+    , sets_(-1)
+    , device_number_(-1)
+    , samples_per_readout_(-1)
+  {
+    // Set some default values in case the config does not contain a specification
+    //
+
+    set_parameter(std::string("deviceno").c_str(), "0");
+    set_parameter(std::string("rotations_per_reconstruction").c_str(), "0");
+    set_parameter(std::string("propagate_csm_from_set").c_str(), "-1");
+    set_parameter(std::string("buffer_length_in_rotations").c_str(), "0");
+    set_parameter(std::string("buffer_using_solver").c_str(), "false");
+    set_parameter(std::string("buffer_convolution_kernel_width").c_str(), "5.5");
+    set_parameter(std::string("buffer_convolution_oversampling_factor").c_str(), "1.25");
+    set_parameter(std::string("reconstruction_os_factor_x").c_str(), "1.0");
+    set_parameter(std::string("reconstruction_os_factor_y").c_str(), "1.0");
+  }
+  
+  gpuGenericSensePrepGadget::~gpuGenericSensePrepGadget() {}
+  
+  int gpuGenericSensePrepGadget::process_config(ACE_Message_Block* mb)
+  {
+    // Get configuration values from config file
+    //
+
+    device_number_ = get_int_value(std::string("deviceno").c_str());
+    rotations_per_reconstruction_ = get_int_value(std::string("rotations_per_reconstruction").c_str());
+    buffer_length_in_rotations_ = get_int_value(std::string("buffer_length_in_rotations").c_str());
+    buffer_using_solver_ = get_bool_value(std::string("buffer_using_solver").c_str());
+    output_timing_ = get_bool_value(std::string("output_timing").c_str());
+
+    // Currently there are some restrictions on the allowed sliding window configurations
+    //
+    
+    sliding_window_readouts_ = get_int_value(std::string("sliding_window_readouts").c_str());
+    sliding_window_rotations_ = get_int_value(std::string("sliding_window_rotations").c_str());
+
+    if( sliding_window_readouts_>0 && sliding_window_rotations_>0 ){
+      GADGET_DEBUG1( "Error: Sliding window reconstruction is not yet supported for both readouts and frames simultaneously.\n" );
+      return GADGET_FAIL;
+    }
+
+    if( sliding_window_readouts_>0 && rotations_per_reconstruction_>0 ){
+      GADGET_DEBUG1( "Error: Sliding window reconstruction over readouts is not yet supported for multiframe reconstructions.\n" );
+      return GADGET_FAIL;
+    }
+    
+    if( sliding_window_rotations_ > 0 && sliding_window_rotations_ >= rotations_per_reconstruction_ ){
+      GADGET_DEBUG1( "Error: Illegal sliding window configuration.\n" );
+      return GADGET_FAIL;
+    }
+
+    // Setup and validate device configuration
+    //
+
+    int number_of_devices;
+    if (cudaGetDeviceCount(&number_of_devices)!= cudaSuccess) {
+      GADGET_DEBUG1( "Error: unable to query number of CUDA devices.\n" );
+      return GADGET_FAIL;
+    }
+
+    if (number_of_devices == 0) {
+      GADGET_DEBUG1( "Error: No available CUDA devices.\n" );
+      return GADGET_FAIL;
+    }
+
+    if (device_number_ >= number_of_devices) {
+      GADGET_DEBUG2("Adjusting device number from %d to %d\n", device_number_,  (device_number_%number_of_devices));
+      device_number_ = (device_number_%number_of_devices);
+    }
+
+    if (cudaSetDevice(device_number_)!= cudaSuccess) {
+      GADGET_DEBUG1( "Error: unable to set CUDA device.\n" );
+      return GADGET_FAIL;
+    }
+
+    cudaDeviceProp deviceProp;
+    if( cudaGetDeviceProperties( &deviceProp, device_number_ ) != cudaSuccess) {
+      GADGET_DEBUG1( "Error: unable to query device properties.\n" );
+      return GADGET_FAIL;
+    }
+    
+    unsigned int warp_size = deviceProp.warpSize;
+
+    // It is possible to specify one set to use for csm propagation, and then propagate this to all sets
+    //
+
+    propagate_csm_from_set_ = get_int_value(std::string("propagate_csm_from_set").c_str());
+
+    if( propagate_csm_from_set_ > 0 ){
+      GADGET_DEBUG2("Currently, only set 0 can propagate coil sensitivity maps. Set %d was specified.\n", propagate_csm_from_set_ );
+      return GADGET_FAIL;
+    }
+
+    if( propagate_csm_from_set_ >= 0 ){
+      GADGET_DEBUG2("Propagating csm from set %d to all sets\n", propagate_csm_from_set_ );
+    }
+
+    // Convolution kernel width and oversampling ratio (for the buffer)
+    //
+
+    kernel_width_ = get_double_value(std::string("buffer_convolution_kernel_width").c_str());
+    oversampling_factor_ = get_double_value(std::string("buffer_convolution_oversampling_factor").c_str());
+
+    // Get the Ismrmrd header
+    //
+    ISMRMRD::IsmrmrdHeader h;
+    ISMRMRD::deserialize(mb->rd_ptr(),h);
+    
+    
+    if (h.encoding.size() != 1) {
+      GADGET_DEBUG1("This Gadget only supports one encoding space\n");
+      return GADGET_FAIL;
+    }
+    
+    // Get the encoding space and trajectory description
+    ISMRMRD::EncodingSpace e_space = h.encoding[0].encodedSpace;
+    ISMRMRD::EncodingSpace r_space = h.encoding[0].reconSpace;
+    ISMRMRD::EncodingLimits e_limits = h.encoding[0].encodingLimits;
+
+    // Matrix sizes (as a multiple of the GPU's warp size)
+    //
+    
+    image_dimensions_.push_back(((e_space.matrixSize.x+warp_size-1)/warp_size)*warp_size);
+    image_dimensions_.push_back(((e_space.matrixSize.y+warp_size-1)/warp_size)*warp_size);
+
+    image_dimensions_recon_.push_back(((static_cast<unsigned int>(std::ceil(e_space.matrixSize.x*get_double_value(std::string("reconstruction_os_factor_x").c_str())))+warp_size-1)/warp_size)*warp_size);  
+    image_dimensions_recon_.push_back(((static_cast<unsigned int>(std::ceil(e_space.matrixSize.y*get_double_value(std::string("reconstruction_os_factor_y").c_str())))+warp_size-1)/warp_size)*warp_size);
+    
+    image_dimensions_recon_os_ = uint64d2
+      (((static_cast<unsigned int>(std::ceil(image_dimensions_recon_[0]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size,
+       ((static_cast<unsigned int>(std::ceil(image_dimensions_recon_[1]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size);
+    
+    // In case the warp_size constraint kicked in
+    oversampling_factor_ = float(image_dimensions_recon_os_[0])/float(image_dimensions_recon_[0]); 
+    
+    GADGET_DEBUG2("matrix_size_x : %d, recon: %d, recon_os: %d\n", 
+                  image_dimensions_[0], image_dimensions_recon_[0], image_dimensions_recon_os_[0]);
+
+    GADGET_DEBUG2("matrix_size_y : %d, recon: %d, recon_os: %d\n", 
+                  image_dimensions_[1], image_dimensions_recon_[1], image_dimensions_recon_os_[1]);
+    
+    fov_.push_back(r_space.fieldOfView_mm.x);
+    fov_.push_back(r_space.fieldOfView_mm.y);
+    fov_.push_back(r_space.fieldOfView_mm.z);
+
+    slices_ = e_limits.slice ? e_limits.slice->maximum + 1 : 1;
+    sets_ = e_limits.set ? e_limits.set->maximum + 1 : 1;
+    
+    // Allocate readout and trajectory queues
+    // - one queue for the currently incoming frame
+    // - one queue for the upcoming reconstruction
+
+    frame_readout_queue_ = boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> >(new ACE_Message_Queue<ACE_MT_SYNCH>[slices_*sets_]);
+    recon_readout_queue_ = boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> >(new ACE_Message_Queue<ACE_MT_SYNCH>[slices_*sets_]);
+    frame_traj_queue_ = boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> >(new ACE_Message_Queue<ACE_MT_SYNCH>[slices_*sets_]);
+    recon_traj_queue_ = boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> >(new ACE_Message_Queue<ACE_MT_SYNCH>[slices_*sets_]);
+    image_headers_queue_ = boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> >(new ACE_Message_Queue<ACE_MT_SYNCH>[slices_*sets_]);
+    
+    size_t bsize = sizeof(GadgetContainerMessage< hoNDArray< std::complex<float> > >)*image_dimensions_[0]*10;
+    
+    for( unsigned int i=0; i<slices_*sets_; i++ ){
+      frame_readout_queue_[i].high_water_mark(bsize);
+      frame_readout_queue_[i].low_water_mark(bsize);
+      frame_traj_queue_[i].high_water_mark(bsize);
+      frame_traj_queue_[i].low_water_mark(bsize);
+    }
+    
+    bsize *= (rotations_per_reconstruction_+1);
+    
+    for( unsigned int i=0; i<slices_*sets_; i++ ){
+      recon_readout_queue_[i].high_water_mark(bsize);
+      recon_readout_queue_[i].low_water_mark(bsize);
+      recon_traj_queue_[i].high_water_mark(bsize);
+      recon_traj_queue_[i].low_water_mark(bsize);
+    }
+    
+    // Define various per slice/set variables
+    //
+
+    previous_readout_no_ = boost::shared_array<long>(new long[slices_*sets_]);
+    acceleration_factor_ = boost::shared_array<long>(new long[slices_*sets_]);
+    image_counter_ = boost::shared_array<long>(new long[slices_*sets_]);
+    readout_counter_frame_= boost::shared_array<long>(new long[slices_*sets_]);
+    readout_counter_global_= boost::shared_array<long>(new long[slices_*sets_]);
+    readouts_per_frame_= boost::shared_array<long>(new long[slices_*sets_]);
+    frames_per_rotation_= boost::shared_array<long>(new long[slices_*sets_]);
+    buffer_frames_per_rotation_= boost::shared_array<long>(new long[slices_*sets_]);
+    buffer_update_needed_ = boost::shared_array<bool>(new bool[slices_*sets_]);
+    reconfigure_ = boost::shared_array<bool>(new bool[slices_*sets_]);
+    num_coils_ = boost::shared_array<unsigned int>(new unsigned int[slices_*sets_]);
+    
+    for( unsigned int i=0; i<slices_*sets_; i++ ){
+
+      previous_readout_no_[i] = -1;
+      acceleration_factor_[i] = -1;
+      image_counter_[i] = 0;
+      readout_counter_frame_[i] = 0;
+      readout_counter_global_[i] = 0;
+      readouts_per_frame_[i] = get_int_value(std::string("readouts_per_frame").c_str());
+      frames_per_rotation_[i] = get_int_value(std::string("frames_per_rotation").c_str());
+      buffer_frames_per_rotation_[i] = get_int_value(std::string("buffer_frames_per_rotation").c_str());
+      num_coils_[i] = 0;
+      buffer_update_needed_[i] = true;
+      reconfigure_[i] = true;
+
+      // Assign some default values ("upper bound estimates") of the (possibly) unknown entities
+      //
+      
+      if( readouts_per_frame_[i] == 0 ){
+        readouts_per_frame_[i] = image_dimensions_[0];
+      }
+      
+      if( frames_per_rotation_[i] == 0 ){
+        frames_per_rotation_[i] = image_dimensions_[0]/readouts_per_frame_[i];
+      }
+
+      // Also remember to set the high/low water marks of the ISMRMRD image header queue
+      //
+
+      bsize = sizeof(GadgetContainerMessage<ISMRMRD::ImageHeader>)*100*
+        std::max(1L, frames_per_rotation_[i]*rotations_per_reconstruction_);
+    
+      image_headers_queue_[i].high_water_mark(bsize);
+      image_headers_queue_[i].low_water_mark(bsize);
+    }
+
+    // If need be the following limitation can be lifted, but it would be a little tedious... 
+    //
+
+    if( buffer_using_solver_ && rotations_per_reconstruction_ < 1 ) {
+      GADGET_DEBUG1("Error: when buffering using a cg solver, 'rotations_per_reconstruction' must be specified (and strictly positive).");
+    }
+
+    if( buffer_using_solver_ && ( buffer_frames_per_rotation_[0] > 0 || buffer_length_in_rotations_ > 0 ) ){
+      GADGET_DEBUG1("Error: when buffering using a cg solver, we currently do not support specification of 'buffer_frames_per_rotation' or 'buffer_length_in_rotations'. These values are instead automatically set to match the reconstruction settings.\n");
+      return GADGET_FAIL;
+    }
+            
+    position_ = boost::shared_array<float[3]>(new float[slices_*sets_][3]);
+    read_dir_ = boost::shared_array<float[3]>(new float[slices_*sets_][3]);
+    phase_dir_ = boost::shared_array<float[3]>(new float[slices_*sets_][3]);
+    slice_dir_ = boost::shared_array<float[3]>(new float[slices_*sets_][3]);
+
+    for( unsigned int i=0; i<slices_*sets_; i++ ){
+      (position_[i])[0] = (position_[i])[1] = (position_[i])[2] = 0.0f;
+      (read_dir_[i])[0] = (read_dir_[i])[1] = (read_dir_[i])[2] = 0.0f;
+      (phase_dir_[i])[0] = (phase_dir_[i])[1] = (phase_dir_[i])[2] = 0.0f;
+      (slice_dir_[i])[0] = (slice_dir_[i])[1] = (slice_dir_[i])[2] = 0.0f;
+    }
+
+    // Allocate accumulation buffer
+    //
+
+    if( buffer_using_solver_ )
+      acc_buffer_cg_ = boost::shared_array< cuSenseBufferCg<float,2> >(new cuSenseBufferCg<float,2>[slices_*sets_]);
+    else
+      acc_buffer_ = boost::shared_array< cuSenseBuffer<float,2> >(new cuSenseBuffer<float,2>[slices_*sets_]);
+    
+    // Allocate remaining shared_arrays
+    //
+    
+    csm_host_ = boost::shared_array< hoNDArray<float_complext> >(new hoNDArray<float_complext>[slices_*sets_]);
+    reg_host_ = boost::shared_array< hoNDArray<float_complext> >(new hoNDArray<float_complext>[slices_*sets_]);
+
+    return GADGET_OK;
+  }
+
+  int gpuGenericSensePrepGadget::
+  process(GadgetContainerMessage<ISMRMRD::AcquisitionHeader> *m1,           // header
+          GadgetContainerMessage< hoNDArray< std::complex<float> > > *m2,   // data
+          GadgetContainerMessage< hoNDArray<float> > *m3)                   // traj/dcw
+  {
+    // Noise should have been consumed by the noise adjust (if in the gadget chain)
+    //
+    
+    bool is_noise = m1->getObjectPtr()->isFlagSet(ISMRMRD::ISMRMRD_ACQ_IS_NOISE_MEASUREMENT);
+    if (is_noise) { 
+      m1->release();
+      return GADGET_OK;
+    }
+
+    // Setup timer if asked for
+    //
+
+    boost::shared_ptr<GPUTimer> process_timer;
+    if( output_timing_ )
+      process_timer = boost::shared_ptr<GPUTimer>( new GPUTimer("gpuGenericSensePrepGadget::process()") );
+
+    // Some convienient utility variables
+    //
+
+    unsigned int set = m1->getObjectPtr()->idx.set;
+    unsigned int slice = m1->getObjectPtr()->idx.slice;
+    unsigned int readout = m1->getObjectPtr()->idx.kspace_encode_step_1;
+    unsigned int idx = set*slices_+slice;
+
+    // Get a pointer to the accumulation buffer. 
+    //
+
+    cuSenseBuffer<float,2> *acc_buffer = 
+      (buffer_using_solver_) ? &acc_buffer_cg_[idx] : &acc_buffer_[idx];
+
+    // Have the imaging plane changed?
+    //
+
+    if( !vec_equal(position_[idx], m1->getObjectPtr()->position) ||
+        !vec_equal(read_dir_[idx], m1->getObjectPtr()->read_dir) || 
+        !vec_equal(phase_dir_[idx], m1->getObjectPtr()->phase_dir) ||
+        !vec_equal(slice_dir_[idx], m1->getObjectPtr()->slice_dir) ){
+      
+      // Yes indeed, clear the accumulation buffer and update structs
+      //
+
+      acc_buffer->clear();
+      buffer_update_needed_[idx] = true;
+      
+      memcpy(position_[idx],m1->getObjectPtr()->position,3*sizeof(float));
+      memcpy(read_dir_[idx],m1->getObjectPtr()->read_dir,3*sizeof(float));
+      memcpy(phase_dir_[idx],m1->getObjectPtr()->phase_dir,3*sizeof(float));
+      memcpy(slice_dir_[idx],m1->getObjectPtr()->slice_dir,3*sizeof(float));
+    }
+    
+    // Only when the first readout arrives, do we know the #samples/readout
+    //
+
+    if( samples_per_readout_ == -1 )      
+      samples_per_readout_ = m1->getObjectPtr()->number_of_samples;
+    
+    if( samples_per_readout_ != m1->getObjectPtr()->number_of_samples ){
+      GADGET_DEBUG1("Unexpected change in the readout length\n");
+      return GADGET_FAIL;
+    }
+    
+    bool new_frame_detected = false;
+
+    // Reconfigure at first pass
+    // - or if the number of coil changes
+    // - or if the reconfigure_ flag is set
+
+    if( num_coils_[idx] != m1->getObjectPtr()->active_channels ){
+      GADGET_DEBUG1("Reconfiguring (the number of coils changed)\n");
+      num_coils_[idx] = m1->getObjectPtr()->active_channels;
+      reconfigure(set, slice);
+    }
+
+    if( reconfigure_[idx] ){
+      GADGET_DEBUG1("Reconfiguring (due to boolean indicator)\n");
+      reconfigure(set, slice);
+    }
+
+    // Keep track of the incoming readout ids
+    // - to determine the number of readouts per frame
+    // - to determine the number of frames per rotation
+
+    if (previous_readout_no_[idx] >= 0) {
+
+      if ( readout > previous_readout_no_[idx]) { 
+        // This is not the last readout in the frame.
+        // Make an estimate of the acceleration factor
+        //
+	
+        long tmp_accel = readout - previous_readout_no_[idx];
+
+        if( acceleration_factor_[idx] != tmp_accel )
+          GADGET_DEBUG2("Detected an acceleration factor of %d\n", tmp_accel);
+	
+        acceleration_factor_[idx] = tmp_accel;
+      }
+      else{ 
+
+        // This is the first readout in a new frame
+        //
+
+        if( get_int_value(std::string("readouts_per_frame").c_str()) == 0 &&
+            readout_counter_frame_[idx] > 0 &&
+            readout_counter_frame_[idx] != readouts_per_frame_[idx] ){ 
+
+          // A new acceleration factor is detected
+          //
+
+          GADGET_DEBUG1("Reconfiguring (acceleration factor changed)\n");
+
+          new_frame_detected = true;
+          readouts_per_frame_[idx] = readout_counter_frame_[idx];
+
+          // Assume that #frames/rotation equals the acceleration factor
+          // If not, or if we cannot deduce the acceleration factor from the difference
+          // of two subsequent readout ids, then 'frames_per_rotation' have to be specified in the config...
+          //
+	    
+          if( get_int_value(std::string("frames_per_rotation").c_str()) == 0 ) {
+            frames_per_rotation_[idx] = acceleration_factor_[idx];
+          }
+          reconfigure(set, slice);
+        }
+      }
+    }
+    previous_readout_no_[idx] = readout;
+
+    // Enqueue readout
+    // - unless 'new_frame_detected', then the current readout does not belong to the current frame and we delay enqueing
+
+    if( !new_frame_detected ) {
+      
+      // Memory handling is easier if we make copies for our internal queues
+      frame_readout_queue_[idx].enqueue_tail(duplicate_array(m2));
+      recon_readout_queue_[idx].enqueue_tail(duplicate_array(m2));
+      frame_traj_queue_[idx].enqueue_tail(duplicate_array(m3));
+      recon_traj_queue_[idx].enqueue_tail(duplicate_array(m3));
+    }
+
+    // If the readout is the last of a "true frame" (ignoring any sliding window readouts)
+    // - then update the accumulation buffer
+
+    bool is_last_readout_in_frame = (readout_counter_frame_[idx] == readouts_per_frame_[idx]-1);
+    is_last_readout_in_frame |= new_frame_detected;
+
+    cuNDArray<floatd2> traj;
+    cuNDArray<float> dcw;
+    
+    if( is_last_readout_in_frame ){
+
+      // Get ready to update the csm/regularization buffer
+      //
+
+      // Extract this frame's samples 
+      //
+
+      boost::shared_ptr< hoNDArray<float_complext> > host_samples = 
+        extract_samples_from_queue( &frame_readout_queue_[idx], false, set, slice );
+            
+      cuNDArray<float_complext> samples( host_samples.get() );
+
+      // Extract this frame's trajectory and dcw.
+      //
+
+      extract_trajectory_and_dcw_from_queue( &frame_traj_queue_[idx], false, set, slice, 
+                                             samples_per_readout_*readouts_per_frame_[idx], 1,
+                                             &traj, &dcw );
+
+      // Scale dcw weights to the are of the oversampled recon matrix size
+      float scale_factor = float(prod(image_dimensions_recon_os_))/asum(&dcw);
+      dcw *= scale_factor;
+      
+      // Add this frame to the buffer
+      //
+
+      acc_buffer->set_dcw(boost::shared_ptr< cuNDArray<float> >(new cuNDArray<float>(&dcw)));
+      buffer_update_needed_[idx] |= acc_buffer->add_frame_data( &samples, &traj );
+    }
+
+    // Are we ready to reconstruct (downstream)?
+    //
+
+    long readouts_per_reconstruction = readouts_per_frame_[idx];
+
+    if( rotations_per_reconstruction_ > 0 )
+      readouts_per_reconstruction *= (frames_per_rotation_[idx]*rotations_per_reconstruction_);
+    
+    bool is_last_readout_in_reconstruction = ( recon_readout_queue_[idx].message_count() == readouts_per_reconstruction );
+
+    // Prepare the image header for this frame
+    // - if this is indeed the last profile of a new frame
+    // - or if we are about to reconstruct due to 'sliding_window_profiles_' > 0
+    
+    if( is_last_readout_in_frame || 
+        (is_last_readout_in_reconstruction && image_headers_queue_[idx].message_count() == 0) ){
+      
+      GadgetContainerMessage<ISMRMRD::ImageHeader> *header = new GadgetContainerMessage<ISMRMRD::ImageHeader>();
+      ISMRMRD::AcquisitionHeader *base_head = m1->getObjectPtr();
+
+      {
+        // Initialize header to all zeroes (there is a few fields we do not set yet)
+        ISMRMRD::ImageHeader tmp;
+        *(header->getObjectPtr()) = tmp;
+      }
+
+      header->getObjectPtr()->version = base_head->version;
+
+      header->getObjectPtr()->matrix_size[0] = image_dimensions_recon_[0];
+      header->getObjectPtr()->matrix_size[1] = image_dimensions_recon_[1];
+      header->getObjectPtr()->matrix_size[2] = std::max(1L,frames_per_rotation_[idx]*rotations_per_reconstruction_);
+
+      header->getObjectPtr()->field_of_view[0] = fov_[0];
+      header->getObjectPtr()->field_of_view[1] = fov_[1];
+      header->getObjectPtr()->field_of_view[2] = fov_[2];
+
+      header->getObjectPtr()->channels = num_coils_[idx];
+      header->getObjectPtr()->slice = base_head->idx.slice;
+      header->getObjectPtr()->set = base_head->idx.set;
+
+      header->getObjectPtr()->acquisition_time_stamp = base_head->acquisition_time_stamp;
+      memcpy(header->getObjectPtr()->physiology_time_stamp, base_head->physiology_time_stamp, sizeof(uint32_t)*ISMRMRD::ISMRMRD_PHYS_STAMPS);
+
+      memcpy(header->getObjectPtr()->position, base_head->position, sizeof(float)*3);
+      memcpy(header->getObjectPtr()->read_dir, base_head->read_dir, sizeof(float)*3);
+      memcpy(header->getObjectPtr()->phase_dir, base_head->phase_dir, sizeof(float)*3);
+      memcpy(header->getObjectPtr()->slice_dir, base_head->slice_dir, sizeof(float)*3);
+      memcpy(header->getObjectPtr()->patient_table_position, base_head->patient_table_position, sizeof(float)*3);
+
+      header->getObjectPtr()->data_type = ISMRMRD::ISMRMRD_CXFLOAT;
+      header->getObjectPtr()->image_index = image_counter_[idx]++; 
+      header->getObjectPtr()->image_series_index = idx;
+
+      image_headers_queue_[idx].enqueue_tail(header);
+    }
+    
+    // If it is time to reconstruct (downstream) then prepare the Sense job
+    // 
+
+    if( is_last_readout_in_reconstruction ){
+      
+      // Update csm and regularization images if the buffer has changed (completed a cycle) 
+      // - and at the first pass
+
+      if( buffer_update_needed_[idx] || 
+          csm_host_[idx].get_number_of_elements() == 0 || 
+          reg_host_[idx].get_number_of_elements() == 0 ){
+
+        // Get the accumulated coil images
+        //
+
+        boost::shared_ptr< cuNDArray<float_complext> > csm_data = acc_buffer->get_accumulated_coil_images();
+
+        // Estimate CSM
+        //
+
+        if( propagate_csm_from_set_ < 0 || propagate_csm_from_set_ == set ){	  	  
+          csm_ = estimate_b1_map<float,2>( csm_data.get() );
+        }
+        else{
+          GADGET_DEBUG2("Set %d is reusing the csm from set %d\n", set, propagate_csm_from_set_);
+          if( csm_.get() == 0x0 ){
+            GADGET_DEBUG1("Error: csm has not been computed, cannot propagate\n");
+            return GADGET_FAIL;
+          }	  
+        }
+
+        acc_buffer->set_csm(csm_);
+        csm_host_[idx] = *(csm_->to_host());
+	
+        // Compute regularization image
+        //
+
+        boost::shared_ptr< cuNDArray<float_complext> > reg_image;
+        std::vector<size_t> dims;
+    	
+        if( buffer_using_solver_ ){
+
+          //GPUTimer timer("\n\n AVOIDABLE PREPROCESSING. HOW EXPENSIVE?\n\n");
+
+          extract_trajectory_and_dcw_from_queue( &recon_traj_queue_[idx], true, set, slice, 
+                                                 samples_per_readout_*readouts_per_frame_[idx],
+                                                 std::max(1L, frames_per_rotation_[idx]*rotations_per_reconstruction_),
+                                                 &traj, &dcw );
+
+          // Scale dcw weights to the are of the oversampled recon matrix size
+          float scale_factor = float(prod(image_dimensions_recon_os_))/asum(&dcw);
+          dcw *= scale_factor;
+
+          dims = *traj.get_dimensions();
+
+          std::vector<size_t> tmp_dims;
+          tmp_dims.push_back(dims[0]*dims[1]);
+          tmp_dims.push_back(1);
+	  
+          traj.reshape(&tmp_dims);
+          dcw.reshape(&tmp_dims);
+	  
+          ((cuSenseBufferCg<float,2>*)acc_buffer)->preprocess(&traj);
+          ((cuSenseBufferCg<float,2>*)acc_buffer)->set_dcw_for_rhs(boost::shared_ptr< cuNDArray<float> >(new cuNDArray<float>(&dcw)));
+        }
+
+        reg_image = acc_buffer->get_combined_coil_image();	
+        reg_host_[idx] = *(reg_image->to_host());
+	
+        if( buffer_using_solver_ ){
+          traj.reshape(&dims);
+          dcw.reshape(&dims);
+        }
+	
+        /*
+          static int counter = 0;
+          char filename[256];
+          sprintf((char*)filename, "reg_%d.cplx", counter);
+          write_nd_array<float_complext>( &reg_host_[idx], filename );
+          counter++; */
+
+        buffer_update_needed_[idx] = false;
+      }
+
+      // Prepare data array for the downstream reconstruction
+      //
+      
+      boost::shared_ptr< hoNDArray<float_complext> > samples_host = 
+        extract_samples_from_queue( &recon_readout_queue_[idx], true, set, slice );
+      
+      // Preapre the trajectory and dcw arrays.
+      // They have already been computed above 
+      // - if 'rotations_per_reconstruction_' is 0
+      // - if 'buffer_using_solver_' is true
+      
+      if( !(/*rotations_per_reconstruction_ == 0 ||*/ buffer_using_solver_) ){
+      	extract_trajectory_and_dcw_from_queue( &recon_traj_queue_[idx], true, set, slice, 
+                                               samples_per_readout_*readouts_per_frame_[idx],
+                                               std::max(1L, frames_per_rotation_[idx]*rotations_per_reconstruction_),
+                                               &traj, &dcw );
+      }
+
+      // Set up the Sense job
+      //
+
+      GadgetContainerMessage< GenericReconJob > *sj = new GadgetContainerMessage<GenericReconJob>();
+      	
+      sj->getObjectPtr()->dat_host_ = samples_host;      
+      sj->getObjectPtr()->tra_host_ = traj.to_host();
+      sj->getObjectPtr()->dcw_host_ = dcw.to_host();
+      sj->getObjectPtr()->csm_host_ = boost::shared_ptr< hoNDArray<float_complext> >( new hoNDArray<float_complext>(csm_host_[idx]));
+      sj->getObjectPtr()->reg_host_ = boost::shared_ptr< hoNDArray<float_complext> >( new hoNDArray<float_complext>(reg_host_[idx]));
+      
+      // Pull the image headers out of the queue
+      //
+
+      long frames_per_reconstruction = 
+        std::max( 1L, frames_per_rotation_[idx]*rotations_per_reconstruction_ );
+      
+      if( image_headers_queue_[idx].message_count() != frames_per_reconstruction ){
+        sj->release();
+        GADGET_DEBUG2("Unexpected size of image header queue: %d, %d\n", 
+                      image_headers_queue_[idx].message_count(), frames_per_reconstruction);
+        return GADGET_FAIL;
+      }
+      
+      sj->getObjectPtr()->image_headers_ =
+        boost::shared_array<ISMRMRD::ImageHeader>( new ISMRMRD::ImageHeader[frames_per_reconstruction] );
+      
+      for( unsigned int i=0; i<frames_per_reconstruction; i++ ){	
+
+        ACE_Message_Block *mbq;
+
+        if( image_headers_queue_[idx].dequeue_head(mbq) < 0 ) {
+          sj->release();
+          GADGET_DEBUG1("Image header dequeue failed\n");
+          return GADGET_FAIL;
+        }
+	
+        GadgetContainerMessage<ISMRMRD::ImageHeader> *m = AsContainerMessage<ISMRMRD::ImageHeader>(mbq);
+        sj->getObjectPtr()->image_headers_[i] = *m->getObjectPtr();
+
+        // In sliding window mode the header might need to go back at the end of the queue for reuse
+        // 
+	
+        if( i >= frames_per_reconstruction-sliding_window_rotations_*frames_per_rotation_[idx] ){
+          image_headers_queue_[idx].enqueue_tail(m);
+        }
+        else {
+          m->release();
+        }
+      }
+      
+      // The Sense Job needs an image header as well. 
+      // Let us just copy the initial one...
+
+      GadgetContainerMessage<ISMRMRD::ImageHeader> *m4 = new GadgetContainerMessage<ISMRMRD::ImageHeader>;
+
+      *m4->getObjectPtr() = sj->getObjectPtr()->image_headers_[0];
+      m4->cont(sj);
+
+      // Pass the Sense job downstream
+      //
+      
+      if (this->next()->putq(m4) < 0) {
+        GADGET_DEBUG1("Failed to put job on queue.\n");
+        m4->release();
+        return GADGET_FAIL;
+      }
+    }
+    
+    if( is_last_readout_in_frame )
+      readout_counter_frame_[idx] = 0;
+    else{
+      readout_counter_frame_[idx]++;
+    }
+
+    if( new_frame_detected ){
+
+      // The incoming profile was actually the first readout of the next frame, enqueue.
+      //
+
+      frame_readout_queue_[idx].enqueue_tail(duplicate_array(m2));
+      recon_readout_queue_[idx].enqueue_tail(duplicate_array(m2)); 
+      frame_traj_queue_[idx].enqueue_tail(duplicate_array(m3));
+      recon_traj_queue_[idx].enqueue_tail(duplicate_array(m3)); 
+
+      readout_counter_frame_[idx]++;
+    }
+
+    readout_counter_global_[idx]++;
+
+    if( output_timing_ )
+      process_timer.reset();
+    
+    m1->release(); // this is safe, the internal queues hold copies
+    return GADGET_OK;
+  }
+  
+  boost::shared_ptr< hoNDArray<float_complext> > 
+  gpuGenericSensePrepGadget::extract_samples_from_queue ( ACE_Message_Queue<ACE_MT_SYNCH> *queue, 
+                                                          bool sliding_window, unsigned int set, unsigned int slice )
+  {    
+    unsigned int readouts_buffered = queue->message_count();
+    
+    std::vector<size_t> dims;
+    dims.push_back(samples_per_readout_*readouts_buffered);
+    dims.push_back(num_coils_[set*slices_+slice]);
+    
+    boost::shared_ptr< hoNDArray<float_complext> > host_samples(new hoNDArray<float_complext>(&dims));
+    
+    for (unsigned int p=0; p<readouts_buffered; p++) {
+      
+      ACE_Message_Block* mbq;
+      if (queue->dequeue_head(mbq) < 0) {
+        GADGET_DEBUG1("Message dequeue failed\n");
+        throw std::runtime_error("gpuGenericSensePrepGadget::extract_samples_from_queue: dequeing failed");	
+      }
+      
+      GadgetContainerMessage< hoNDArray< std::complex<float> > > *daq = AsContainerMessage<hoNDArray< std::complex<float> > >(mbq);
+	
+      if (!daq) {
+        GADGET_DEBUG1("Unable to interpret data on message queue\n");
+        throw std::runtime_error("gpuGenericSensePrepGadget::extract_samples_from_queue: failed to interpret data");	
+      }
+	
+      for (unsigned int c = 0; c < num_coils_[set*slices_+slice]; c++) {
+	
+        float_complext *data_ptr = host_samples->get_data_ptr();
+        data_ptr += c*samples_per_readout_*readouts_buffered+p*samples_per_readout_;
+	    
+        std::complex<float> *r_ptr = daq->getObjectPtr()->get_data_ptr();
+        r_ptr += c*daq->getObjectPtr()->get_size(0);
+	  
+        memcpy(data_ptr, r_ptr, samples_per_readout_*sizeof(float_complext));
+      }
+
+      // In sliding window mode the readout might need to go back at the end of the queue
+      // 
+      
+      long readouts_in_sliding_window = sliding_window_readouts_ + 
+        readouts_per_frame_[set*slices_+slice]*frames_per_rotation_[set*slices_+slice]*sliding_window_rotations_;
+
+      if( sliding_window && p >= (readouts_buffered-readouts_in_sliding_window) )
+        queue->enqueue_tail(mbq);
+      else
+        mbq->release();
+    } 
+    
+    return host_samples;
+  }
+  
+  boost::shared_ptr< hoNDArray<float> > 
+  gpuGenericSensePrepGadget::extract_trajectory_from_queue ( ACE_Message_Queue<ACE_MT_SYNCH> *queue, 
+                                                             bool sliding_window, unsigned int set, unsigned int slice )
+  {    
+    if(!queue) {
+      GADGET_DEBUG1("Illegal queue pointer, cannot extract trajectory\n");
+      throw std::runtime_error("gpuGenericSensePrepGadget::extract_trajectory_from_queue: illegal queue pointer");	
+    }
+
+    if(queue->message_count()==0) {
+      GADGET_DEBUG1("Empty queue, cannot extract trajectory\n");
+      throw std::runtime_error("gpuGenericSensePrepGadget::extract_trajectory_from_queue: empty queue");	
+    }
+
+    if(samples_per_readout_ < 1) {
+      GADGET_DEBUG2("Empty queue (%d), cannot extract trajectory\n", samples_per_readout_);
+      throw std::runtime_error("gpuGenericSensePrepGadget::extract_trajectory_from_queue: empty queue");	
+    }
+    
+    unsigned int readouts_buffered = queue->message_count();
+    
+    std::vector<size_t> dims;
+    dims.push_back(3);
+    dims.push_back(samples_per_readout_);
+    dims.push_back(readouts_buffered);
+    
+    boost::shared_ptr< hoNDArray<float> > host_samples(new hoNDArray<float>(&dims));
+    
+    for (unsigned int p=0; p<readouts_buffered; p++) {      
+      ACE_Message_Block* mbq;
+      if (queue->dequeue_head(mbq) < 0) {
+        GADGET_DEBUG1("Message dequeue failed\n");
+        throw std::runtime_error("gpuGenericSensePrepGadget::extract_trajectory_from_queue: dequeing failed");	
+      }
+      
+      GadgetContainerMessage< hoNDArray<float> > *daq = AsContainerMessage<hoNDArray<float> >(mbq);
+	
+      if (!daq) {
+        GADGET_DEBUG1("Unable to interpret data on message queue\n");
+        throw std::runtime_error("gpuGenericSensePrepGadget::extract_trajectory_from_queue: failed to interpret data");	
+      }
+
+      float *data_ptr = host_samples->get_data_ptr();
+      data_ptr += 3*samples_per_readout_*p;
+      
+      float *r_ptr = daq->getObjectPtr()->get_data_ptr();
+      
+      memcpy(data_ptr, r_ptr, 3*samples_per_readout_*sizeof(float));
+      
+      // In sliding window mode the readout might need to go back at the end of the queue
+      // 
+      
+      long readouts_in_sliding_window = sliding_window_readouts_ + 
+        readouts_per_frame_[set*slices_+slice]*frames_per_rotation_[set*slices_+slice]*sliding_window_rotations_;
+
+      if( sliding_window && p >= (readouts_buffered-readouts_in_sliding_window) )
+        queue->enqueue_tail(mbq);
+      else
+        mbq->release();
+    } 
+    
+    return host_samples;
+  }
+  
+  void gpuGenericSensePrepGadget::extract_trajectory_and_dcw_from_queue
+  ( ACE_Message_Queue<ACE_MT_SYNCH> *queue, bool sliding_window, unsigned int set, unsigned int slice, 
+    unsigned int samples_per_frame, unsigned int num_frames,
+    cuNDArray<floatd2> *traj, cuNDArray<float> *dcw )
+  {
+    // Extract trajectory and dcw.
+    // They are stored as a float array of dimensions: 3 x #samples_per_readout x #readouts.
+    // We need
+    // - a floatd2 trajectory array 
+    // - a float dcw array 
+    //
+    
+    boost::shared_ptr< hoNDArray<float> > host_traj_dcw =
+      extract_trajectory_from_queue( queue, sliding_window, set, slice );
+    
+    std::vector<size_t> order;
+    order.push_back(1); order.push_back(2); order.push_back(0);
+    
+    boost::shared_ptr< hoNDArray<float> > host_traj_dcw_shifted =
+      permute( host_traj_dcw.get(), &order );
+    
+    std::vector<size_t> dims_1d;
+    dims_1d.push_back(host_traj_dcw_shifted->get_size(0)*host_traj_dcw_shifted->get_size(1));
+    
+    {
+      hoNDArray<float> tmp(&dims_1d, host_traj_dcw_shifted->get_data_ptr()+2*dims_1d[0]);
+      *dcw = tmp;
+    }
+    
+    std::vector<size_t> dims_2d = dims_1d;
+    dims_2d.push_back(2);
+    
+    order.clear();
+    order.push_back(1); order.push_back(0);
+
+    hoNDArray<float> tmp(&dims_2d, host_traj_dcw_shifted->get_data_ptr());
+    cuNDArray<float> __traj(&tmp);
+    boost::shared_ptr< cuNDArray<float> > _traj = permute( &__traj, &order );
+    
+    cuNDArray<floatd2> tmp2(&dims_1d, (floatd2*)_traj->get_data_ptr());
+    
+    *traj = tmp2;
+    
+    unsigned int idx = set*slices_+slice;
+    dims_2d.clear();
+
+    dims_2d.push_back(samples_per_frame);
+    dims_2d.push_back(num_frames);
+
+    dcw->reshape(&dims_2d);
+    traj->reshape(&dims_2d);
+  }
+
+  template<class T> GadgetContainerMessage< hoNDArray<T> >*
+  gpuGenericSensePrepGadget::duplicate_array( GadgetContainerMessage< hoNDArray<T> > *array )
+  {
+    GadgetContainerMessage< hoNDArray<T> > *copy = new GadgetContainerMessage< hoNDArray<T> >();   
+    *(copy->getObjectPtr()) = *(array->getObjectPtr());
+    return copy;
+  }
+
+  void gpuGenericSensePrepGadget::reconfigure(unsigned int set, unsigned int slice)
+  {    
+    unsigned int idx = set*slices_+slice;
+    
+    GADGET_DEBUG2("\nReconfiguring:\n#readouts/frame:%d\n#frames/rotation: %d\n#rotations/reconstruction:%d\n", 
+                  readouts_per_frame_[idx], frames_per_rotation_[idx], rotations_per_reconstruction_);
+    
+    buffer_frames_per_rotation_[idx] = get_int_value(std::string("buffer_frames_per_rotation").c_str());
+    
+    if( buffer_frames_per_rotation_[idx] == 0 ){
+      buffer_frames_per_rotation_[idx] = frames_per_rotation_[idx];
+    }
+    
+    if( get_int_value(std::string("buffer_length_in_rotations").c_str()) == 0 ){
+      buffer_length_in_rotations_ = std::max(1L, rotations_per_reconstruction_);
+    }
+
+    cuSenseBuffer<float,2> *acc_buffer = 
+      (buffer_using_solver_) ? &acc_buffer_cg_[idx] : &acc_buffer_[idx];
+    
+    if( buffer_frames_per_rotation_[idx] == 1 ){ // Is this general enough to detect golden ratio type trajectories?
+
+      acc_buffer->setup( from_std_vector<size_t,2>(image_dimensions_recon_), image_dimensions_recon_os_, 
+                         kernel_width_, num_coils_[idx], 1, buffer_length_in_rotations_ );
+    }else{
+      acc_buffer->setup( from_std_vector<size_t,2>(image_dimensions_recon_), image_dimensions_recon_os_, 
+                         kernel_width_, num_coils_[idx], buffer_length_in_rotations_, buffer_frames_per_rotation_[idx] );
+    }
+    reconfigure_[idx] = false;
+  }
+
+  GADGET_FACTORY_DECLARE(gpuGenericSensePrepGadget)
+}
diff --git a/gadgets/pmri/gpuGenericSensePrepGadget.h b/gadgets/pmri/gpuGenericSensePrepGadget.h
new file mode 100644
index 0000000..4a79130
--- /dev/null
+++ b/gadgets/pmri/gpuGenericSensePrepGadget.h
@@ -0,0 +1,127 @@
+#pragma once
+
+#include "gadgetron_gpupmri_export.h"
+#include "Gadget.h"
+#include "GadgetMRIHeaders.h"
+#include "hoNDArray.h"
+#include "vector_td.h"
+#include "cuNFFT.h"
+#include "cuCgPreconditioner.h"
+#include "cuSenseBufferCg.h"
+
+#include <ismrmrd/ismrmrd.h>
+#include <complex>
+#include <boost/shared_ptr.hpp>
+#include <boost/shared_array.hpp>
+
+namespace Gadgetron{
+
+  class EXPORTGADGETS_GPUPMRI gpuGenericSensePrepGadget :
+    public Gadget3< ISMRMRD::AcquisitionHeader, hoNDArray< std::complex<float> >, hoNDArray<float> >
+  {
+    
+  public:
+    GADGET_DECLARE(gpuGenericSensePrepGadget);
+
+    gpuGenericSensePrepGadget();
+    virtual ~gpuGenericSensePrepGadget();
+
+  protected:
+    
+    virtual int process_config(ACE_Message_Block *mb);
+
+    virtual int process(GadgetContainerMessage< ISMRMRD::AcquisitionHeader > *m1,        // header
+			GadgetContainerMessage< hoNDArray< std::complex<float> > > *m2,  // data
+			GadgetContainerMessage< hoNDArray<float> > *m3 );                // traj/dcw
+
+  private:
+
+    inline bool vec_equal(float *in1, float *in2) {
+      for (unsigned int i = 0; i < 3; i++) {
+	if (in1[i] != in2[i]) return false;
+      }
+      return true;
+    }
+    
+    boost::shared_array<bool> reconfigure_;
+    virtual void reconfigure(unsigned int set, unsigned int slice);
+
+    template<class T> GadgetContainerMessage< hoNDArray<T> >* 
+      duplicate_array( GadgetContainerMessage< hoNDArray<T> > *array );
+    
+    boost::shared_ptr< hoNDArray<float_complext> > 
+      extract_samples_from_queue ( ACE_Message_Queue<ACE_MT_SYNCH> *queue, 
+				   bool sliding_window, unsigned int set, unsigned int slice );
+    
+    boost::shared_ptr< hoNDArray<float> > 
+      extract_trajectory_from_queue ( ACE_Message_Queue<ACE_MT_SYNCH> *queue, 
+				      bool sliding_window, unsigned int set, unsigned int slice );
+      
+    void extract_trajectory_and_dcw_from_queue
+      ( ACE_Message_Queue<ACE_MT_SYNCH> *queue, bool sliding_window, unsigned int set, unsigned int slice, 
+	unsigned int samples_per_frame, unsigned int num_frames,
+	cuNDArray<floatd2> *traj, cuNDArray<float> *dcw );
+    
+    int slices_;
+    int sets_;
+    int device_number_;
+    long samples_per_readout_;
+
+    boost::shared_array<long> image_counter_;
+    boost::shared_array<long> readouts_per_frame_;  // for an undersampled frame
+    boost::shared_array<long> frames_per_rotation_; // representing a fully sampled frame
+
+    // The number of rotations to batch per reconstruction. 
+    // Set to '0' to reconstruct frames individually.
+    long rotations_per_reconstruction_; 
+
+    // The number of buffer cycles
+    long buffer_length_in_rotations_; 
+
+    boost::shared_array<long> buffer_frames_per_rotation_; // the number of buffer subcycles
+
+    // Internal book-keping
+    boost::shared_array<long> previous_readout_no_;
+    boost::shared_array<long> acceleration_factor_;
+    boost::shared_array<long> readout_counter_frame_;
+    boost::shared_array<long> readout_counter_global_;
+
+    long sliding_window_readouts_;
+    long sliding_window_rotations_;
+
+    float kernel_width_;
+    float oversampling_factor_;
+
+    boost::shared_array<unsigned int> num_coils_;
+
+    boost::shared_array<float[3]> position_;
+    boost::shared_array<float[3]> read_dir_;
+    boost::shared_array<float[3]> phase_dir_;
+    boost::shared_array<float[3]> slice_dir_;
+
+    bool output_timing_;
+    bool buffer_using_solver_;
+
+    int propagate_csm_from_set_;
+    boost::shared_ptr< cuNDArray<float_complext> > csm_;
+
+    boost::shared_array<bool> buffer_update_needed_;
+
+    boost::shared_array< hoNDArray<float_complext> > csm_host_;
+    boost::shared_array< hoNDArray<float_complext> > reg_host_;
+    
+    boost::shared_array< cuSenseBuffer<float,2> > acc_buffer_;
+    boost::shared_array< cuSenseBufferCg<float,2> > acc_buffer_cg_;
+
+    std::vector<size_t> fov_;
+    std::vector<size_t> image_dimensions_;
+    std::vector<size_t> image_dimensions_recon_;
+    uint64d2 image_dimensions_recon_os_;
+
+    boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> > frame_readout_queue_;
+    boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> > recon_readout_queue_;
+    boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> > frame_traj_queue_;
+    boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> > recon_traj_queue_;
+    boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> > image_headers_queue_;
+  };
+}
diff --git a/gadgets/pmri/gpuNlcgSenseGadget.cpp b/gadgets/pmri/gpuNlcgSenseGadget.cpp
new file mode 100644
index 0000000..e5f68a8
--- /dev/null
+++ b/gadgets/pmri/gpuNlcgSenseGadget.cpp
@@ -0,0 +1,387 @@
+#include "gpuNlcgSenseGadget.h"
+#include "cuNDArray_operators.h"
+#include "cuNDArray_elemwise.h"
+#include "cuNDArray_blas.h"
+#include "cuNDArray_utils.h"
+#include "cuNDArray_reductions.h"
+#include "Gadgetron.h"
+#include "GadgetMRIHeaders.h"
+#include "b1_map.h"
+#include "GPUTimer.h"
+#include "vector_td_utilities.h"
+#include "hoNDArray_fileio.h"
+#include "ismrmrd/xml.h"
+#include <boost/thread/mutex.hpp>
+
+namespace Gadgetron{
+
+#define max_number_of_gpus 10
+  static boost::mutex _mutex[max_number_of_gpus];
+
+  gpuNlcgSenseGadget::gpuNlcgSenseGadget()
+    : is_configured_(false)
+    , prepared_(false)
+    , channels_(0)
+    , frame_counter_(0)
+  {
+    set_parameter(std::string("deviceno").c_str(), "0");
+    set_parameter(std::string("setno").c_str(), "0");
+    set_parameter(std::string("sliceno").c_str(), "0");
+    set_parameter(std::string("number_of_cg_iterations").c_str(), "10");
+    set_parameter(std::string("cg_limit").c_str(), "1e-6");
+    set_parameter(std::string("oversampling_factor").c_str(), "1.5");
+    set_parameter(std::string("kernel_width").c_str(), "5.5");
+    set_parameter(std::string("lambda").c_str(), "1e-6");
+    set_parameter(std::string("alpha").c_str(), "0.5");
+    set_parameter(std::string("exclusive_access").c_str(), "false");
+
+    matrix_size_ = uint64d2(0,0);
+    matrix_size_os_ = uint64d2(0,0);
+    matrix_size_seq_ = uint64d2(0,0);
+  }
+
+  gpuNlcgSenseGadget::~gpuNlcgSenseGadget() {}
+
+  int gpuNlcgSenseGadget::process_config( ACE_Message_Block* mb )
+  {
+    GADGET_DEBUG1("gpuNlcgSenseGadget::process_config\n");
+
+    device_number_ = get_int_value(std::string("deviceno").c_str());
+
+    int number_of_devices = 0;
+    if (cudaGetDeviceCount(&number_of_devices)!= cudaSuccess) {
+      GADGET_DEBUG1( "Error: unable to query number of CUDA devices.\n" );
+      return GADGET_FAIL;
+    }
+
+    if (number_of_devices == 0) {
+      GADGET_DEBUG1( "Error: No available CUDA devices.\n" );
+      return GADGET_FAIL;
+    }
+
+    if (device_number_ >= number_of_devices) {
+      GADGET_DEBUG2("Adjusting device number from %d to %d\n", device_number_,  (device_number_%number_of_devices));
+      device_number_ = (device_number_%number_of_devices);
+    }
+
+    if (cudaSetDevice(device_number_)!= cudaSuccess) {
+      GADGET_DEBUG1( "Error: unable to set CUDA device.\n" );
+      return GADGET_FAIL;
+    }
+
+    pass_on_undesired_data_ = get_bool_value(std::string("pass_on_undesired_data").c_str());
+    set_number_ = get_int_value(std::string("setno").c_str());
+    slice_number_ = get_int_value(std::string("sliceno").c_str());
+
+    number_of_cg_iterations_ = get_int_value(std::string("number_of_cg_iterations").c_str());
+    cg_limit_ = get_double_value(std::string("cg_limit").c_str());
+    oversampling_factor_ = get_double_value(std::string("oversampling_factor").c_str());
+    kernel_width_ = get_double_value(std::string("kernel_width").c_str());
+
+    lambda_ = get_double_value(std::string("lambda").c_str());
+    alpha_ = get_double_value(std::string("alpha").c_str());
+    rotations_to_discard_ = get_int_value(std::string("rotations_to_discard").c_str());
+    output_convergence_ = get_bool_value(std::string("output_convergence").c_str());
+    exclusive_access_ = get_bool_value(std::string("exclusive_access").c_str());
+
+    if( (rotations_to_discard_%2) == 1 ){
+      GADGET_DEBUG1("#rotations to discard must be even.\n");
+      return GADGET_FAIL;
+    }
+
+    // Get the Ismrmrd header
+    //
+    ISMRMRD::IsmrmrdHeader h;
+    ISMRMRD::deserialize(mb->rd_ptr(),h);
+    
+    
+    if (h.encoding.size() != 1) {
+      GADGET_DEBUG1("This Gadget only supports one encoding space\n");
+      return GADGET_FAIL;
+    }
+    
+    // Get the encoding space and trajectory description
+    ISMRMRD::EncodingSpace e_space = h.encoding[0].encodedSpace;
+    ISMRMRD::EncodingSpace r_space = h.encoding[0].reconSpace;
+    ISMRMRD::EncodingLimits e_limits = h.encoding[0].encodingLimits;
+
+    matrix_size_seq_ = uint64d2( r_space.matrixSize.x, r_space.matrixSize.y );
+
+    if (!is_configured_) {
+
+      if (h.acquisitionSystemInformation) {
+	channels_ = h.acquisitionSystemInformation->receiverChannels ? *h.acquisitionSystemInformation->receiverChannels : 1;
+      } else {
+	channels_ = 1;
+      }
+
+      // Allocate encoding operator for non-Cartesian Sense
+      E_ = boost::shared_ptr< cuNonCartesianSenseOperator<float,2> >( new cuNonCartesianSenseOperator<float,2>() );
+
+
+		// Allocate preconditioner
+      D_ = boost::shared_ptr< cuCgPreconditioner<float_complext> >( new cuCgPreconditioner<float_complext>() );
+
+
+      TV_ = boost::shared_ptr<cuTvOperator<float_complext,3> >(new cuTvOperator<float_complext,3>);
+      PICS_ = boost::shared_ptr<cuTvPicsOperator<float_complext,3> >(new cuTvPicsOperator<float_complext,3>);
+
+
+      // Setup NLCG solver
+      solver_ = cuNlcgSolver<float_complext>();
+      solver_.set_encoding_operator( E_ );
+
+      solver_.set_output_mode( (output_convergence_) ? cuNlcgSolver<float_complext>::OUTPUT_VERBOSE : cuNlcgSolver<float_complext>::OUTPUT_SILENT );
+      solver_.set_max_iterations( number_of_cg_iterations_ );
+      solver_.set_tc_tolerance(cg_limit_);
+      solver_.set_preconditioner( D_ );
+
+      is_configured_ = true;
+    }
+
+    GADGET_DEBUG1("gpuNlcgSenseGadget::end of process_config\n");
+
+    return GADGET_OK;
+  }
+
+  int gpuNlcgSenseGadget::process(GadgetContainerMessage<ISMRMRD::ImageHeader> *m1, GadgetContainerMessage<GenericReconJob> *m2)
+  {
+    // Is this data for this gadget's set/slice?
+    //
+
+    if( m1->getObjectPtr()->set != set_number_ || m1->getObjectPtr()->slice != slice_number_ ) {
+      // No, pass it downstream...
+      return this->next()->putq(m1);
+    }
+
+    //GADGET_DEBUG1("gpuNlcgSenseGadget::process\n");
+    //GPUTimer timer("gpuNlcgSenseGadget::process");
+
+    if (!is_configured_) {
+      GADGET_DEBUG1("\nData received before configuration complete\n");
+      return GADGET_FAIL;
+    }
+
+    GenericReconJob* j = m2->getObjectPtr();
+
+    // Let's first check that this job has the required data...
+    if (!j->csm_host_.get() || !j->dat_host_.get() || !j->tra_host_.get() || !j->dcw_host_.get()) {
+      GADGET_DEBUG1("Received an incomplete Sense job\n");
+      return GADGET_FAIL;
+    }
+
+    unsigned int samples = j->dat_host_->get_size(0);
+    unsigned int channels = j->dat_host_->get_size(1);
+    unsigned int rotations = samples / j->tra_host_->get_number_of_elements();
+    unsigned int frames = j->tra_host_->get_size(1)*rotations;
+
+    if( samples%j->tra_host_->get_number_of_elements() ) {
+      GADGET_DEBUG2("Mismatch between number of samples (%d) and number of k-space coordinates (%d).\nThe first should be a multiplum of the latter.\n",
+                    samples, j->tra_host_->get_number_of_elements());
+      return GADGET_FAIL;
+    }
+
+    boost::shared_ptr< cuNDArray<floatd2> > traj(new cuNDArray<floatd2> (j->tra_host_.get()));
+    boost::shared_ptr< cuNDArray<float> > dcw(new cuNDArray<float> (j->dcw_host_.get()));
+    boost::shared_ptr< cuNDArray<float_complext> > csm(new cuNDArray<float_complext> (j->csm_host_.get()));
+    boost::shared_ptr< cuNDArray<float_complext> > device_samples(new cuNDArray<float_complext> (j->dat_host_.get()));
+
+    if( !prepared_){
+
+      // Take the reconstruction matrix size from the regulariaztion image.
+      // It could be oversampled from the sequence specified size...
+
+      matrix_size_ = uint64d2( j->reg_host_->get_size(0), j->reg_host_->get_size(1) );
+
+      cudaDeviceProp deviceProp;
+      if( cudaGetDeviceProperties( &deviceProp, device_number_ ) != cudaSuccess) {
+        GADGET_DEBUG1( "\nError: unable to query device properties.\n" );
+        return GADGET_FAIL;
+      }
+
+      unsigned int warp_size = deviceProp.warpSize;
+
+      matrix_size_os_ =
+        uint64d2(((static_cast<unsigned int>(std::ceil(matrix_size_[0]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size,
+                 ((static_cast<unsigned int>(std::ceil(matrix_size_[1]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size);
+
+      GADGET_DEBUG2("Matrix size    : [%d,%d] \n", matrix_size_[0], matrix_size_[1]);
+      GADGET_DEBUG2("Matrix size OS : [%d,%d] \n", matrix_size_os_[0], matrix_size_os_[1]);
+
+      std::vector<size_t> image_dims = to_std_vector(matrix_size_);
+      image_dims.push_back(frames);
+
+
+      E_->set_domain_dimensions(&image_dims);
+      E_->set_codomain_dimensions(device_samples->get_dimensions().get());
+
+      reg_image_ = boost::shared_ptr< cuNDArray<float_complext> >(new cuNDArray<float_complext>(&image_dims));
+
+      // These operators need their domain/codomain set before being added to the solver
+      //
+
+      // Add "TV" regularization
+      //
+
+      if( lambda_ > 0.0 ){
+      	TV_->set_weight((1.0-alpha_)*lambda_);
+      	solver_.add_nonlinear_operator(TV_);
+      }
+
+      // Add "PICCS" regularization
+      //
+
+      if( alpha_ > 0.0 ){
+        PICS_->set_prior(reg_image_);
+        PICS_->set_weight(alpha_*lambda_);
+        solver_.add_nonlinear_operator(PICS_);
+      }
+
+      prepared_ = true;
+    }
+
+    E_->set_dcw(dcw);
+    E_->set_csm(csm);
+    E_->setup( matrix_size_, matrix_size_os_, static_cast<float>(kernel_width_) );
+    E_->preprocess(traj.get());
+
+    // Expand the average image to the number of frames
+    //
+
+    {
+      cuNDArray<float_complext> tmp(*j->reg_host_);
+      *reg_image_ = *expand( &tmp, frames );
+    }
+
+    // Define preconditioning weights
+    //
+
+    boost::shared_ptr< cuNDArray<float> > _precon_weights = sum(abs_square(csm.get()).get(), 2);
+    reciprocal_sqrt_inplace(_precon_weights.get());
+    boost::shared_ptr< cuNDArray<float_complext> > precon_weights = real_to_complex<float_complext>( _precon_weights.get() );
+    _precon_weights.reset();
+    D_->set_weights( precon_weights );
+    precon_weights.reset();
+
+    //Apply weights
+    *device_samples *= *dcw;
+
+    // Invoke solver
+    //
+
+    boost::shared_ptr< cuNDArray<float_complext> > result;
+    {
+      GADGET_DEBUG1("Running NLCG solver\n");
+      GPUTimer timer("Running NLCG solver");
+
+      // Optionally, allow exclusive (per device) access to the solver
+      // This may not matter much in terms of speed, but it can in terms of memory consumption
+      //
+
+      if( exclusive_access_ )
+        _mutex[device_number_].lock();
+
+      result = solver_.solve(device_samples.get());
+
+      if( exclusive_access_ )
+        _mutex[device_number_].unlock();
+    }
+
+    // Provide some info about the scaling between the regularization and reconstruction.
+    // If it is not close to one, PICCS does not work optimally...
+    //
+
+    if( alpha_ > 0.0 ){
+      cuNDArray<float_complext> gpureg(j->reg_host_.get());
+      boost::shared_ptr< cuNDArray<float_complext> > gpurec = sum(result.get(),2);
+      *gpurec /= float(result->get_size(2));
+      float scale = abs(dot(gpurec.get(), gpurec.get())/dot(gpurec.get(),&gpureg));
+      GADGET_DEBUG2("Scaling factor between regularization and reconstruction is %f.\n", scale);
+    }
+
+    if (!result.get()) {
+      GADGET_DEBUG1("\nNon-linear conjugate gradient solver failed\n");
+      return GADGET_FAIL;
+    }
+
+    /*
+      static int counter = 0;
+      char filename[256];
+      sprintf((char*)filename, "recon_sb_%d.cplx", counter);
+      write_nd_array<float_complext>( sbresult->to_host().get(), filename );
+      counter++; */
+
+    // If the recon matrix size exceeds the sequence matrix size then crop
+    if( matrix_size_seq_ != matrix_size_ )
+      result = crop<float_complext,2>( (matrix_size_-matrix_size_seq_)>>1, matrix_size_seq_, result.get() );
+
+    // Now pass on the reconstructed images
+    //
+
+    unsigned int frames_per_rotation = frames/rotations;
+
+    if( rotations == 1 ){ // this is the case for golden ratio
+      rotations = frames;
+      frames_per_rotation = 1;
+    }
+
+    for( unsigned int frame=0; frame<frames; frame++ ){
+
+      unsigned int rotation_idx = frame/frames_per_rotation;
+
+      // Check if we should discard this frame
+      if( rotation_idx < (rotations_to_discard_>>1) || rotation_idx >= rotations-(rotations_to_discard_>>1) )
+        continue;
+
+      GadgetContainerMessage< hoNDArray< std::complex<float> > > *cm =
+        new GadgetContainerMessage< hoNDArray< std::complex<float> > >();
+
+      GadgetContainerMessage<ISMRMRD::ImageHeader> *m =
+        new GadgetContainerMessage<ISMRMRD::ImageHeader>();
+
+      *m->getObjectPtr() = j->image_headers_[frame];
+      m->getObjectPtr()->matrix_size[0] = matrix_size_seq_[0];
+      m->getObjectPtr()->matrix_size[1] = matrix_size_seq_[1];
+      m->cont(cm);
+
+      std::vector<size_t> img_dims(2);
+      img_dims[0] = matrix_size_seq_[0];
+      img_dims[1] = matrix_size_seq_[1];
+
+      cm->getObjectPtr()->create(&img_dims);
+
+      size_t data_length = prod(matrix_size_seq_);
+
+      cudaMemcpy(cm->getObjectPtr()->get_data_ptr(),
+                 result->get_data_ptr()+frame*data_length,
+                 data_length*sizeof(std::complex<float>),
+                 cudaMemcpyDeviceToHost);
+
+      cudaError_t err = cudaGetLastError();
+      if( err != cudaSuccess ){
+        GADGET_DEBUG2("\nUnable to copy result from device to host: %s", cudaGetErrorString(err));
+        m->release();
+        return GADGET_FAIL;
+      }
+
+      m->getObjectPtr()->matrix_size[0] = img_dims[0];
+      m->getObjectPtr()->matrix_size[1] = img_dims[1];
+      m->getObjectPtr()->matrix_size[2] = 1;
+      m->getObjectPtr()->channels       = 1;
+      m->getObjectPtr()->image_index    = frame_counter_ + frame;
+
+      if (this->next()->putq(m) < 0) {
+        GADGET_DEBUG1("\nFailed to result image on to Q\n");
+        m->release();
+        return GADGET_FAIL;
+      }
+    }
+
+    frame_counter_ += frames;
+    m1->release();
+    return GADGET_OK;
+  }
+
+  GADGET_FACTORY_DECLARE(gpuNlcgSenseGadget)
+}
+
diff --git a/gadgets/pmri/gpuNlcgSenseGadget.h b/gadgets/pmri/gpuNlcgSenseGadget.h
new file mode 100644
index 0000000..09b8b41
--- /dev/null
+++ b/gadgets/pmri/gpuNlcgSenseGadget.h
@@ -0,0 +1,82 @@
+#ifndef gpuSbSenseGadget_H
+#define gpuSbSenseGadget_H
+#pragma once
+
+#include <ace/Synch.h>
+#include <ace/Mutex.h>
+
+#include "gadgetron_gpupmri_export.h"
+#include "Gadget.h"
+#include "GenericReconJob.h"
+#include "GadgetMRIHeaders.h"
+#include "cuNlcgSolver.h"
+#include "cuNonCartesianSenseOperator.h"
+#include "cuCgPreconditioner.h"
+#include "cuPartialDerivativeOperator.h"
+#include "cuNFFT.h"
+#include "cuImageOperator.h"
+#include "ismrmrd/ismrmrd.h"
+#include "cuTvOperator.h"
+#include "cuTvPicsOperator.h"
+
+#include <complex>
+
+namespace Gadgetron{
+
+  class EXPORTGADGETS_GPUPMRI gpuNlcgSenseGadget : public Gadget2< ISMRMRD::ImageHeader, GenericReconJob >
+  {
+
+  public:
+
+    gpuNlcgSenseGadget();
+    virtual ~gpuNlcgSenseGadget();
+
+  protected:
+
+    virtual int process( GadgetContainerMessage< ISMRMRD::ImageHeader >* m1, GadgetContainerMessage< GenericReconJob > * m2 );
+    virtual int process_config( ACE_Message_Block* mb );
+
+    int channels_;
+    int device_number_;
+    int set_number_;
+    int slice_number_;
+
+    uint64d2 matrix_size_;
+    uint64d2 matrix_size_os_;
+    uint64d2 matrix_size_seq_;
+
+    unsigned int number_of_cg_iterations_;
+
+    double cg_limit_;
+    double oversampling_factor_;
+    double kernel_width_;
+
+    double lambda_;
+    double alpha_;
+    unsigned int rotations_to_discard_;
+
+    bool output_convergence_;
+    bool exclusive_access_;
+    bool is_configured_;
+    bool prepared_;
+
+    // Define non-linear conjugate gradient solver
+    cuNlcgSolver<float_complext> solver_;
+
+    // Define non-Cartesian Sense Encoding operator
+    boost::shared_ptr< cuNonCartesianSenseOperator<float,2> > E_;
+
+    // Define preconditioner
+    boost::shared_ptr< cuCgPreconditioner<float_complext> > D_;
+
+    // Average image for regularization
+    boost::shared_ptr< cuNDArray<float_complext> > reg_image_;
+
+    boost::shared_ptr<cuTvOperator<float_complext,3> > TV_;
+    boost::shared_ptr<cuTvPicsOperator<float_complext,3> > PICS_;
+
+    int frame_counter_;
+  };
+}
+#endif //gpuSbSenseGadget
+
diff --git a/gadgets/pmri/gpuSbSenseGadget.cpp b/gadgets/pmri/gpuSbSenseGadget.cpp
new file mode 100644
index 0000000..ddfb399
--- /dev/null
+++ b/gadgets/pmri/gpuSbSenseGadget.cpp
@@ -0,0 +1,435 @@
+#include "gpuSbSenseGadget.h"
+#include "cuNDArray_operators.h"
+#include "cuNDArray_elemwise.h"
+#include "cuNDArray_blas.h"
+#include "cuNDArray_utils.h"
+#include "cuNDArray_reductions.h"
+#include "Gadgetron.h"
+#include "GadgetMRIHeaders.h"
+#include "b1_map.h"
+#include "GPUTimer.h"
+#include "vector_td_utilities.h"
+#include "hoNDArray_fileio.h"
+#include "ismrmrd/xml.h"
+
+#include <boost/thread/mutex.hpp>
+
+namespace Gadgetron{
+
+#define max_number_of_gpus 10
+  static boost::mutex _mutex[max_number_of_gpus];
+  
+  gpuSbSenseGadget::gpuSbSenseGadget()
+    : is_configured_(false)
+    , prepared_(false)
+    , channels_(0)
+    , frame_counter_(0)
+  {
+    set_parameter(std::string("deviceno").c_str(), "0");
+    set_parameter(std::string("setno").c_str(), "0");
+    set_parameter(std::string("sliceno").c_str(), "0");
+    set_parameter(std::string("number_of_sb_iterations").c_str(), "20");
+    set_parameter(std::string("number_of_cg_iterations").c_str(), "10");
+    set_parameter(std::string("cg_limit").c_str(), "1e-6");
+    set_parameter(std::string("oversampling_factor").c_str(), "1.5");
+    set_parameter(std::string("kernel_width").c_str(), "5.5");
+    set_parameter(std::string("mu").c_str(), "1.0");
+    set_parameter(std::string("lambda").c_str(), "2.0");
+    set_parameter(std::string("alpha").c_str(), "0.5");
+    set_parameter(std::string("exclusive_access").c_str(), "false");
+
+    matrix_size_ = uint64d2(0,0);
+    matrix_size_os_ = uint64d2(0,0);
+    matrix_size_seq_ = uint64d2(0,0);
+  }
+
+  gpuSbSenseGadget::~gpuSbSenseGadget() {}
+
+  int gpuSbSenseGadget::process_config( ACE_Message_Block* mb )
+  {
+    GADGET_DEBUG1("gpuSbSenseGadget::process_config\n");
+
+    device_number_ = get_int_value(std::string("deviceno").c_str());
+
+    int number_of_devices = 0;
+    if (cudaGetDeviceCount(&number_of_devices)!= cudaSuccess) {
+      GADGET_DEBUG1( "Error: unable to query number of CUDA devices.\n" );
+      return GADGET_FAIL;
+    }
+
+    if (number_of_devices == 0) {
+      GADGET_DEBUG1( "Error: No available CUDA devices.\n" );
+      return GADGET_FAIL;
+    }
+
+    if (device_number_ >= number_of_devices) {
+      GADGET_DEBUG2("Adjusting device number from %d to %d\n", device_number_,  (device_number_%number_of_devices));
+      device_number_ = (device_number_%number_of_devices);
+    }
+
+    if (cudaSetDevice(device_number_)!= cudaSuccess) {
+      GADGET_DEBUG1( "Error: unable to set CUDA device.\n" );
+      return GADGET_FAIL;
+    }
+
+    pass_on_undesired_data_ = get_bool_value(std::string("pass_on_undesired_data").c_str());
+    set_number_ = get_int_value(std::string("setno").c_str());
+    slice_number_ = get_int_value(std::string("sliceno").c_str());
+    number_of_sb_iterations_ = get_int_value(std::string("number_of_sb_iterations").c_str());
+    number_of_cg_iterations_ = get_int_value(std::string("number_of_cg_iterations").c_str());
+    cg_limit_ = get_double_value(std::string("cg_limit").c_str());
+    oversampling_factor_ = get_double_value(std::string("oversampling_factor").c_str());
+    kernel_width_ = get_double_value(std::string("kernel_width").c_str());
+    mu_ = get_double_value(std::string("mu").c_str());
+    lambda_ = get_double_value(std::string("lambda").c_str());
+    alpha_ = get_double_value(std::string("alpha").c_str());
+    rotations_to_discard_ = get_int_value(std::string("rotations_to_discard").c_str());
+    output_convergence_ = get_bool_value(std::string("output_convergence").c_str());
+    exclusive_access_ = get_bool_value(std::string("exclusive_access").c_str());
+
+    if( (rotations_to_discard_%2) == 1 ){
+      GADGET_DEBUG1("#rotations to discard must be even.\n");
+      return GADGET_FAIL;
+    }
+
+    // Get the Ismrmrd header
+    //
+    ISMRMRD::IsmrmrdHeader h;
+    ISMRMRD::deserialize(mb->rd_ptr(),h);
+    
+    
+    if (h.encoding.size() != 1) {
+      GADGET_DEBUG1("This Gadget only supports one encoding space\n");
+      return GADGET_FAIL;
+    }
+    
+    // Get the encoding space and trajectory description
+    ISMRMRD::EncodingSpace e_space = h.encoding[0].encodedSpace;
+    ISMRMRD::EncodingSpace r_space = h.encoding[0].reconSpace;
+    ISMRMRD::EncodingLimits e_limits = h.encoding[0].encodingLimits;
+
+    matrix_size_seq_ = uint64d2( r_space.matrixSize.x, r_space.matrixSize.y );
+
+    if (!is_configured_) {
+
+      if (h.acquisitionSystemInformation) {
+	channels_ = h.acquisitionSystemInformation->receiverChannels ? *h.acquisitionSystemInformation->receiverChannels : 1;
+      } else {
+	channels_ = 1;
+      }
+
+      // Allocate encoding operator for non-Cartesian Sense
+      E_ = boost::shared_ptr< cuNonCartesianSenseOperator<float,2> >( new cuNonCartesianSenseOperator<float,2>() );
+      E_->set_weight(mu_);
+
+      // Allocate preconditioner
+      D_ = boost::shared_ptr< cuCgPreconditioner<float_complext> >( new cuCgPreconditioner<float_complext>() );
+
+      Rx1_ = boost::shared_ptr< cuPartialDerivativeOperator<float_complext,3> >
+        ( new cuPartialDerivativeOperator<float_complext,3>(0) );
+      Rx1_->set_weight( (1.0-alpha_)*lambda_ );
+
+      Ry1_ = boost::shared_ptr< cuPartialDerivativeOperator<float_complext,3> >
+        ( new cuPartialDerivativeOperator<float_complext,3>(1) );
+      Ry1_->set_weight( (1.0-alpha_)*lambda_ );
+
+      Rz1_ = boost::shared_ptr< cuPartialDerivativeOperator<float_complext,3> >
+        ( new cuPartialDerivativeOperator<float_complext,3>(2) );
+      Rz1_->set_weight( (1.0-alpha_)*lambda_ );
+
+      Rx2_ = boost::shared_ptr< cuPartialDerivativeOperator<float_complext,3> >
+        ( new cuPartialDerivativeOperator<float_complext,3>(0) );
+      Rx2_->set_weight( alpha_*lambda_ );
+
+      Ry2_ = boost::shared_ptr< cuPartialDerivativeOperator<float_complext,3> >
+        ( new cuPartialDerivativeOperator<float_complext,3>(1) );
+      Ry2_->set_weight( alpha_*lambda_ );
+
+      Rz2_ = boost::shared_ptr< cuPartialDerivativeOperator<float_complext,3> >
+        ( new cuPartialDerivativeOperator<float_complext,3>(2) );
+      Rz2_->set_weight( alpha_*lambda_ );
+
+      // Setup split-Bregman solver
+      sb_.set_encoding_operator( E_ );
+            
+      sb_.set_max_outer_iterations(number_of_sb_iterations_);
+      sb_.set_max_inner_iterations(1);
+      sb_.set_output_mode( (output_convergence_) ? cuSbcCgSolver<float_complext>::OUTPUT_VERBOSE : cuSbcCgSolver<float_complext>::OUTPUT_SILENT );
+      
+      sb_.get_inner_solver()->set_max_iterations( number_of_cg_iterations_ );
+      sb_.get_inner_solver()->set_tc_tolerance( cg_limit_ );
+      sb_.get_inner_solver()->set_output_mode( (output_convergence_) ? cuCgSolver<float_complext>::OUTPUT_VERBOSE : cuCgSolver<float_complext>::OUTPUT_SILENT );
+      sb_.get_inner_solver()->set_preconditioner( D_ );
+
+      is_configured_ = true;
+    }
+
+    GADGET_DEBUG1("gpuSbSenseGadget::end of process_config\n");
+
+    return GADGET_OK;
+  }
+
+  int gpuSbSenseGadget::process(GadgetContainerMessage<ISMRMRD::ImageHeader> *m1, GadgetContainerMessage<GenericReconJob> *m2)
+  {
+    // Is this data for this gadget's set/slice?
+    //
+    
+    if( m1->getObjectPtr()->set != set_number_ || m1->getObjectPtr()->slice != slice_number_ ) {      
+      // No, pass it downstream...
+      return this->next()->putq(m1);
+    }
+
+    //GADGET_DEBUG1("gpuSbSenseGadget::process\n");
+    //GPUTimer timer("gpuSbSenseGadget::process");
+
+    if (!is_configured_) {
+      GADGET_DEBUG1("\nData received before configuration complete\n");
+      return GADGET_FAIL;
+    }
+
+    GenericReconJob* j = m2->getObjectPtr();
+
+    // Let's first check that this job has the required data...
+    if (!j->csm_host_.get() || !j->dat_host_.get() || !j->tra_host_.get() || !j->dcw_host_.get()) {
+      GADGET_DEBUG1("Received an incomplete Sense job\n");
+      return GADGET_FAIL;
+    }
+
+    unsigned int samples = j->dat_host_->get_size(0);
+    unsigned int channels = j->dat_host_->get_size(1);
+    unsigned int rotations = samples / j->tra_host_->get_number_of_elements();
+    unsigned int frames = j->tra_host_->get_size(1)*rotations;
+
+    if( samples%j->tra_host_->get_number_of_elements() ) {
+      GADGET_DEBUG2("Mismatch between number of samples (%d) and number of k-space coordinates (%d).\nThe first should be a multiplum of the latter.\n", 
+                    samples, j->tra_host_->get_number_of_elements());
+      return GADGET_FAIL;
+    }
+
+    boost::shared_ptr< cuNDArray<floatd2> > traj(new cuNDArray<floatd2> (j->tra_host_.get()));
+    boost::shared_ptr< cuNDArray<float> > dcw(new cuNDArray<float> (j->dcw_host_.get()));
+    sqrt_inplace(dcw.get());
+    boost::shared_ptr< cuNDArray<float_complext> > csm(new cuNDArray<float_complext> (j->csm_host_.get()));
+    boost::shared_ptr< cuNDArray<float_complext> > device_samples(new cuNDArray<float_complext> (j->dat_host_.get()));
+    
+    if( !prepared_){
+
+      // Take the reconstruction matrix size from the regulariaztion image. 
+      // It could be oversampled from the sequence specified size...
+      
+      matrix_size_ = uint64d2( j->reg_host_->get_size(0), j->reg_host_->get_size(1) );
+      
+      cudaDeviceProp deviceProp;
+      if( cudaGetDeviceProperties( &deviceProp, device_number_ ) != cudaSuccess) {
+        GADGET_DEBUG1( "\nError: unable to query device properties.\n" );
+        return GADGET_FAIL;
+      }
+
+      unsigned int warp_size = deviceProp.warpSize;
+
+      matrix_size_os_ =
+        uint64d2(((static_cast<unsigned int>(std::ceil(matrix_size_[0]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size,
+                 ((static_cast<unsigned int>(std::ceil(matrix_size_[1]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size);
+      
+      GADGET_DEBUG2("Matrix size    : [%d,%d] \n", matrix_size_[0], matrix_size_[1]);
+      GADGET_DEBUG2("Matrix size OS : [%d,%d] \n", matrix_size_os_[0], matrix_size_os_[1]);
+
+      std::vector<size_t> image_dims = to_std_vector(matrix_size_);
+      image_dims.push_back(frames);
+      
+      E_->set_domain_dimensions(&image_dims);
+      E_->set_codomain_dimensions(device_samples->get_dimensions().get());
+            
+      reg_image_ = boost::shared_ptr< cuNDArray<float_complext> >(new cuNDArray<float_complext>(&image_dims));
+      
+      // These operators need their domain/codomain set before being added to the solver
+      //
+
+      Rx1_->set_domain_dimensions(&image_dims);
+      Rx1_->set_codomain_dimensions(&image_dims);
+      
+      Ry1_->set_domain_dimensions(&image_dims);
+      Ry1_->set_codomain_dimensions(&image_dims);
+      
+      Rz1_->set_domain_dimensions(&image_dims);
+      Rz1_->set_codomain_dimensions(&image_dims);
+      
+      Rx2_->set_domain_dimensions(&image_dims);
+      Rx2_->set_codomain_dimensions(&image_dims);
+      
+      Ry2_->set_domain_dimensions(&image_dims);
+      Ry2_->set_codomain_dimensions(&image_dims);
+      
+      Rz2_->set_domain_dimensions(&image_dims);
+      Rz2_->set_codomain_dimensions(&image_dims);
+      
+      // Add "TV" regularization
+      // 
+      
+      if( alpha_<1.0 ){
+        sb_.add_regularization_group_operator( Rx1_ ); 
+        sb_.add_regularization_group_operator( Ry1_ ); 
+        if(frames>1)
+          sb_.add_regularization_group_operator( Rz1_ ); 
+        sb_.add_group();
+      }
+      
+      // Add "PICCS" regularization
+      //
+
+      if( alpha_ > 0.0 ){
+        sb_.add_regularization_group_operator( Rx2_ ); 
+        sb_.add_regularization_group_operator( Ry2_ ); 
+        if(frames>1)
+          sb_.add_regularization_group_operator( Rz2_ ); 
+        sb_.add_group(reg_image_);
+      }
+      
+      prepared_ = true;
+    }
+    
+    E_->set_dcw(dcw);
+    E_->set_csm(csm);    
+    E_->setup( matrix_size_, matrix_size_os_, static_cast<float>(kernel_width_) );
+    E_->preprocess(traj.get());
+
+    // Expand the average image to the number of frames
+    //
+
+    {
+      cuNDArray<float_complext> tmp(*j->reg_host_);
+      *reg_image_ = *expand( &tmp, frames );
+    }
+
+    // Define preconditioning weights
+    //
+
+    boost::shared_ptr< cuNDArray<float> > _precon_weights = sum(abs_square(csm.get()).get(), 2);
+    reciprocal_sqrt_inplace(_precon_weights.get());	
+    boost::shared_ptr< cuNDArray<float_complext> > precon_weights = real_to_complex<float_complext>( _precon_weights.get() );
+    _precon_weights.reset();
+    D_->set_weights( precon_weights );
+    precon_weights.reset();
+    
+    //Apply weights
+    *device_samples *= *dcw;
+
+    // Invoke solver
+    //
+
+    boost::shared_ptr< cuNDArray<float_complext> > sbresult;
+    {
+      GADGET_DEBUG1("Running split Bregman solver\n");
+      GPUTimer timer("Running split Bregman solver");
+
+      // Optionally, allow exclusive (per device) access to the solver
+      // This may not matter much in terms of speed, but it can in terms of memory consumption
+      //
+
+      if( exclusive_access_ )
+        _mutex[device_number_].lock();
+
+      sbresult = sb_.solve(device_samples.get());
+
+      if( exclusive_access_ )
+        _mutex[device_number_].unlock();
+    }
+
+    // Provide some info about the scaling between the regularization and reconstruction.
+    // If it is not close to one, PICCS does not work optimally...
+    // 
+
+    if( alpha_ > 0.0 ){
+      cuNDArray<float_complext> gpureg(j->reg_host_.get());
+      boost::shared_ptr< cuNDArray<float_complext> > gpurec = sum(sbresult.get(),2);
+      *gpurec /= float(sbresult->get_size(2));
+      float scale = abs(dot(gpurec.get(), gpurec.get())/dot(gpurec.get(),&gpureg));
+      GADGET_DEBUG2("Scaling factor between regularization and reconstruction is %f.\n", scale);
+    }
+    
+    if (!sbresult.get()) {
+      GADGET_DEBUG1("\nSplit Bregman solver failed\n");
+      return GADGET_FAIL;
+    }
+    
+    /*
+      static int counter = 0;
+      char filename[256];
+      sprintf((char*)filename, "recon_sb_%d.cplx", counter);
+      write_nd_array<float_complext>( sbresult->to_host().get(), filename );
+      counter++; */
+
+    // If the recon matrix size exceeds the sequence matrix size then crop
+    if( matrix_size_seq_ != matrix_size_ )
+      sbresult = crop<float_complext,2>( (matrix_size_-matrix_size_seq_)>>1, matrix_size_seq_, sbresult.get() );
+        
+    // Now pass on the reconstructed images
+    //
+
+    unsigned int frames_per_rotation = frames/rotations;
+
+    if( rotations == 1 ){ // this is the case for golden ratio
+      rotations = frames;
+      frames_per_rotation = 1;
+    }
+
+    for( unsigned int frame=0; frame<frames; frame++ ){
+      
+      unsigned int rotation_idx = frame/frames_per_rotation;
+
+      // Check if we should discard this frame
+      if( rotation_idx < (rotations_to_discard_>>1) || rotation_idx >= rotations-(rotations_to_discard_>>1) )
+        continue;
+
+      GadgetContainerMessage< hoNDArray< std::complex<float> > > *cm = 
+        new GadgetContainerMessage< hoNDArray< std::complex<float> > >();     
+
+      GadgetContainerMessage<ISMRMRD::ImageHeader> *m = 
+        new GadgetContainerMessage<ISMRMRD::ImageHeader>();
+
+      *m->getObjectPtr() = j->image_headers_[frame];
+      m->getObjectPtr()->matrix_size[0] = matrix_size_seq_[0];
+      m->getObjectPtr()->matrix_size[1] = matrix_size_seq_[1];      
+      m->cont(cm);
+      
+      std::vector<size_t> img_dims(2);
+      img_dims[0] = matrix_size_seq_[0];
+      img_dims[1] = matrix_size_seq_[1];
+
+      cm->getObjectPtr()->create(&img_dims);
+
+      size_t data_length = prod(matrix_size_seq_);
+
+      cudaMemcpy(cm->getObjectPtr()->get_data_ptr(),
+                 sbresult->get_data_ptr()+frame*data_length,
+                 data_length*sizeof(std::complex<float>),
+                 cudaMemcpyDeviceToHost);
+
+      cudaError_t err = cudaGetLastError();
+      if( err != cudaSuccess ){
+        GADGET_DEBUG2("\nUnable to copy result from device to host: %s", cudaGetErrorString(err));
+        m->release();
+        return GADGET_FAIL;
+      }
+
+      m->getObjectPtr()->matrix_size[0] = img_dims[0];
+      m->getObjectPtr()->matrix_size[1] = img_dims[1];
+      m->getObjectPtr()->matrix_size[2] = 1;
+      m->getObjectPtr()->channels       = 1;
+      m->getObjectPtr()->image_index    = frame_counter_ + frame;
+
+      if (this->next()->putq(m) < 0) {
+        GADGET_DEBUG1("\nFailed to result image on to Q\n");
+        m->release();
+        return GADGET_FAIL;
+      }
+    }
+
+    frame_counter_ += frames;
+    m1->release();
+    return GADGET_OK;
+  }
+
+  GADGET_FACTORY_DECLARE(gpuSbSenseGadget)
+}
+
diff --git a/gadgets/pmri/gpuSbSenseGadget.h b/gadgets/pmri/gpuSbSenseGadget.h
new file mode 100644
index 0000000..9aca78c
--- /dev/null
+++ b/gadgets/pmri/gpuSbSenseGadget.h
@@ -0,0 +1,85 @@
+#ifndef gpuSbSenseGadget_H
+#define gpuSbSenseGadget_H
+#pragma once
+
+#include <ace/Synch.h>
+#include <ace/Mutex.h>
+
+#include "gadgetron_gpupmri_export.h"
+#include "Gadget.h"
+#include "GenericReconJob.h"
+#include "GadgetMRIHeaders.h"
+#include "cuSbcCgSolver.h"
+#include "cuNonCartesianSenseOperator.h"
+#include "cuCgPreconditioner.h"
+#include "cuPartialDerivativeOperator.h"
+#include "cuNFFT.h"
+#include "cuImageOperator.h"
+#include "ismrmrd/ismrmrd.h"
+
+#include <complex>
+
+namespace Gadgetron{
+
+  class EXPORTGADGETS_GPUPMRI gpuSbSenseGadget : public Gadget2< ISMRMRD::ImageHeader, GenericReconJob >
+  {
+
+  public:
+    GADGET_DECLARE(gpuSbSenseGadget);
+
+    gpuSbSenseGadget();
+    virtual ~gpuSbSenseGadget();
+
+  protected:
+
+    virtual int process( GadgetContainerMessage< ISMRMRD::ImageHeader >* m1, GadgetContainerMessage< GenericReconJob > * m2 );
+    virtual int process_config( ACE_Message_Block* mb );
+
+    int channels_;
+    int device_number_;
+    int set_number_;
+    int slice_number_;
+
+    uint64d2 matrix_size_;
+    uint64d2 matrix_size_os_;
+    uint64d2 matrix_size_seq_;
+
+    unsigned int number_of_cg_iterations_;
+    unsigned int number_of_sb_iterations_;
+    double cg_limit_;
+    double oversampling_factor_;
+    double kernel_width_;
+    double mu_;
+    double lambda_;
+    double alpha_;
+    unsigned int rotations_to_discard_;
+
+    bool output_convergence_;
+    bool exclusive_access_;
+    bool is_configured_;
+    bool prepared_;
+
+    // Define constraint Split Bregman solver
+    cuSbcCgSolver<float_complext> sb_;
+
+    // Define non-Cartesian Sense Encoding operator
+    boost::shared_ptr< cuNonCartesianSenseOperator<float,2> > E_;
+
+    // Define preconditioner
+    boost::shared_ptr< cuCgPreconditioner<float_complext> > D_;
+
+    // Average image for regularization
+    boost::shared_ptr< cuNDArray<float_complext> > reg_image_;
+
+    // Define regularization operators
+    boost::shared_ptr< cuPartialDerivativeOperator<float_complext,3> > Rx1_;
+    boost::shared_ptr< cuPartialDerivativeOperator<float_complext,3> > Rx2_;
+    boost::shared_ptr< cuPartialDerivativeOperator<float_complext,3> > Ry1_;
+    boost::shared_ptr< cuPartialDerivativeOperator<float_complext,3> > Ry2_;
+    boost::shared_ptr< cuPartialDerivativeOperator<float_complext,3> > Rz1_;
+    boost::shared_ptr< cuPartialDerivativeOperator<float_complext,3> > Rz2_;
+	
+    int frame_counter_;
+  };
+}
+#endif //gpuSbSenseGadget
diff --git a/gadgets/python/CMakeLists.txt b/gadgets/python/CMakeLists.txt
index 0273d22..0cceef5 100644
--- a/gadgets/python/CMakeLists.txt
+++ b/gadgets/python/CMakeLists.txt
@@ -9,21 +9,32 @@ ENDIF (WIN32)
 
 find_package(Ismrmrd REQUIRED)
 
+message(STATUS ${Boost_INCLUDE_DIR} ${Boost_LIBRARIES})
+
 include_directories(
+  ${CMAKE_SOURCE_DIR}/apps/gadgetron
+  ${CMAKE_BINARY_DIR}/apps/gadgetron
   ${CMAKE_SOURCE_DIR}/gadgets/mri_core
   ${PYTHON_INCLUDE_PATH}
   ${NUMPY_INCLUDE_DIRS}
+  ${Boost_INCLUDE_DIR}
   )
 
 add_library(GadgetronPythonMRI MODULE GadgetronPythonMRI.cpp GadgetReference.cpp)
 
+#We should probably not set soversion on the python module. Causes problems with clang
+#set_target_properties(GadgetronPythonMRI PROPERTIES VERSION ${GADGETRON_VERSION_STRING} SOVERSION ${GADGETRON_SOVERSION})
+
 add_library(gadgetron_python SHARED       		
 	PythonGadget.cpp
 	GadgetReference.cpp
 	GadgetronPythonMRI.cpp
 	PythonCommunicator.cpp)
 
+set_target_properties(gadgetron_python PROPERTIES VERSION ${GADGETRON_VERSION_STRING} SOVERSION ${GADGETRON_SOVERSION})
+
 target_link_libraries(gadgetron_python
+        gadgetron_gadgetbase
 	${ISMRMRD_LIBRARIES} 
 	optimized ${ACE_LIBRARIES}  
 	debug ${ACE_DEBUG_LIBRARY} 
@@ -32,6 +43,7 @@ target_link_libraries(gadgetron_python
     ${MKL_LIBRARIES})
 	
 target_link_libraries(GadgetronPythonMRI
+    ${ISMRMRD_LIBRARIES}
 	optimized ${ACE_LIBRARIES}  
 	debug ${ACE_DEBUG_LIBRARY} 
 	${PYTHON_LIBRARIES} 
@@ -40,12 +52,13 @@ target_link_libraries(GadgetronPythonMRI
 
 IF (WIN32)
     SET_TARGET_PROPERTIES(GadgetronPythonMRI PROPERTIES SUFFIX .pyd)
-	SET_TARGET_PROPERTIES(gadgetron_python PROPERTIES LINK_FLAGS "/LIBPATH:${PYTHON_INCLUDE_DIR}/../libs" )
+    SET_TARGET_PROPERTIES(gadgetron_python PROPERTIES LINK_FLAGS "/LIBPATH:${PYTHON_INCLUDE_DIR}/../libs" )
 ENDIF(WIN32)
 
 set_target_properties(GadgetronPythonMRI PROPERTIES PREFIX "")
-	
-install(TARGETS gadgetron_python GadgetronPythonMRI DESTINATION lib)
+
+install(TARGETS gadgetron_python DESTINATION lib COMPONENT main)
+install(TARGETS GadgetronPythonMRI DESTINATION ${GADGETRON_INSTALL_PYTHON_MODULE_PATH} COMPONENT main)
 
 install(FILES 
 	rms_coil_combine.py
@@ -54,8 +67,8 @@ install(FILES
 	accumulate_and_recon.py
 	GadgetronXML.py
 	image_viewer.py 
-DESTINATION lib)
+    DESTINATION ${GADGETRON_INSTALL_PYTHON_MODULE_PATH} COMPONENT main)
 
 install(FILES
 	python.xml python_short.xml
-DESTINATION config)
+    DESTINATION ${GADGETRON_INSTALL_CONFIG_PATH} COMPONENT main)
diff --git a/gadgets/python/GadgetReference.cpp b/gadgets/python/GadgetReference.cpp
index 217d152..2667275 100644
--- a/gadgets/python/GadgetReference.cpp
+++ b/gadgets/python/GadgetReference.cpp
@@ -3,7 +3,7 @@
 #include "GadgetReference.h"
 #include "GadgetContainerMessage.h"
 #include "hoNDArray.h"
-#include "ismrmrd.h"
+#include "ismrmrd/ismrmrd.h"
 #include <boost/preprocessor/stringize.hpp>
 #include <boost/python.hpp>
 #include <numpy/numpyconfig.h>
diff --git a/gadgets/python/GadgetReference.h b/gadgets/python/GadgetReference.h
index 0584b63..e348559 100644
--- a/gadgets/python/GadgetReference.h
+++ b/gadgets/python/GadgetReference.h
@@ -4,7 +4,7 @@
 #include "GadgetMRIHeaders.h"
 #include "gadgetronpython_export.h"
 
-#include <ismrmrd.h>
+#include <ismrmrd/ismrmrd.h>
 #include <boost/python.hpp>
 #include <boost/python/tuple.hpp>
 
diff --git a/gadgets/python/GadgetronPythonMRI.cpp b/gadgets/python/GadgetronPythonMRI.cpp
index 40595df..3923e84 100644
--- a/gadgets/python/GadgetronPythonMRI.cpp
+++ b/gadgets/python/GadgetronPythonMRI.cpp
@@ -4,7 +4,7 @@
 
 #include "../mri_core/GadgetMRIHeaders.h"
 
-#include <ismrmrd.h>
+#include <ismrmrd/ismrmrd.h>
 
 using namespace boost::python;
 
@@ -398,7 +398,7 @@ BOOST_PYTHON_MODULE(GadgetronPythonMRI)
 			.def_readwrite("average", &ISMRMRD::ImageHeader::average)
 			.def_readwrite("repetition", &ISMRMRD::ImageHeader::repetition)
 			.def_readwrite("acquisition_time_stamp", &ISMRMRD::ImageHeader::acquisition_time_stamp)
-			.def_readwrite("image_data_type", &ISMRMRD::ImageHeader::image_data_type)
+			.def_readwrite("data_type", &ISMRMRD::ImageHeader::data_type)
 			.def_readwrite("image_type", &ISMRMRD::ImageHeader::image_type)
 			.def_readwrite("image_index", &ISMRMRD::ImageHeader::image_index)
 			.def_readwrite("image_series_index", &ISMRMRD::ImageHeader::image_series_index)
@@ -410,18 +410,18 @@ BOOST_PYTHON_MODULE(GadgetronPythonMRI)
 
     		;
 
-	enum_<ISMRMRD::ImageDataType>("ImageDataType")
-    		   .value("DATA_COMPLEX_FLOAT", ISMRMRD::DATA_COMPLEX_FLOAT)
-    		   .value("DATA_FLOAT", ISMRMRD::DATA_FLOAT)
-    		   .value("DATA_UNSIGNED_SHORT", ISMRMRD::DATA_UNSIGNED_SHORT)
+	enum_<ISMRMRD::ISMRMRD_DataTypes>("ISMRMRD_DataTypes")
+    		   .value("ISMRMRD_CXFLOAT", ISMRMRD::ISMRMRD_CXFLOAT)
+	           .value("ISMRMRD_FLOAT", ISMRMRD::ISMRMRD_FLOAT)
+    		   .value("ISMRMRD_USHORT", ISMRMRD::ISMRMRD_USHORT)
     		   ;
 
 
-	enum_<ISMRMRD::ImageType>("ImageType")
-				  .value("TYPE_MAGNITUDE",ISMRMRD::TYPE_MAGNITUDE)
-				  .value("TYPE_PHASE", ISMRMRD::TYPE_PHASE)
-				  .value("TYPE_REAL",ISMRMRD::TYPE_REAL)
-				  .value("TYPE_IMAG",ISMRMRD::TYPE_IMAG)
+	enum_<ISMRMRD::ISMRMRD_ImageTypes>("ISMRMRD_ImageTypes")
+				  .value("TYPE_MAGNITUDE",ISMRMRD::ISMRMRD_IMTYPE_MAGNITUDE)
+				  .value("TYPE_PHASE", ISMRMRD::ISMRMRD_IMTYPE_PHASE)
+				  .value("TYPE_REAL",ISMRMRD::ISMRMRD_IMTYPE_REAL)
+				  .value("TYPE_IMAG",ISMRMRD::ISMRMRD_IMTYPE_IMAG)
 				  ;
 
 	enum_<Gadgetron::GadgetMessageID>("GadgetMessageID")
diff --git a/gadgets/python/PythonCommunicator.cpp b/gadgets/python/PythonCommunicator.cpp
index c5cb67e..1448f2f 100644
--- a/gadgets/python/PythonCommunicator.cpp
+++ b/gadgets/python/PythonCommunicator.cpp
@@ -1,11 +1,13 @@
 #include "PythonCommunicator.h"
 #include "../mri_core/GadgetMRIHeaders.h"
+#include "gadgetron_paths.h"
+#include "gadgetron_config.h"
 
 #include <numpy/numpyconfig.h>
 #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
 #include <numpy/arrayobject.h>
 #include <boost/algorithm/string.hpp>
-#include <ismrmrd.h>
+#include <ismrmrd/ismrmrd.h>
 
 namespace Gadgetron{
 PythonCommunicator::PythonCommunicator()
@@ -24,10 +26,10 @@ PythonCommunicator::PythonCommunicator()
 
 
 	//Let's first get the path set for the library folder
-	const char* gadgetron_home = ACE_OS::getenv("GADGETRON_HOME");
-	std::string path_name = std::string(gadgetron_home) + std::string("/lib");
+	std::string  gadgetron_home = get_gadgetron_home();
+	std::string path_name = gadgetron_home + std::string("/") + std::string(GADGETRON_PYTHON_PATH);
 
-	if (gadgetron_home != 0) {
+	if (gadgetron_home.size() != 0) {
 		if (addPath(path_name) == GADGET_FAIL) {
 			GADGET_DEBUG2("PythonCommunicator (constructor) failed to add path %s\n", path_name.c_str());
 		}
@@ -166,6 +168,7 @@ template<class T> int PythonCommunicator::process(Gadget* g,
 
 	it = process_fnc_.find(g);
 	if (it != process_fnc_.end()) {
+                mutex_.lock();
 		gstate = PyGILState_Ensure();
 		try {
 			std::vector<size_t> dims = (*(m2->getObjectPtr()->get_dimensions().get()));
@@ -191,10 +194,12 @@ template<class T> int PythonCommunicator::process(Gadget* g,
 		} catch(boost::python::error_already_set const &) {
 			GADGET_DEBUG1("Passing data on to python module failed\n");
 			PyErr_Print();
+                        mutex_.unlock();
 			PyGILState_Release(gstate);
 			return GADGET_FAIL;
 		}
 		PyGILState_Release(gstate);
+                mutex_.unlock();
 	} else {
 		GADGET_DEBUG2("No registered process function found for Gadget %s\n", g->module()->name());
 		return GADGET_FAIL;
diff --git a/gadgets/python/PythonCommunicator.h b/gadgets/python/PythonCommunicator.h
index c256b34..19fe2ea 100644
--- a/gadgets/python/PythonCommunicator.h
+++ b/gadgets/python/PythonCommunicator.h
@@ -10,6 +10,7 @@
 #include <ace/Singleton.h>
 #include <ace/Synch.h>
 
+#include <boost/thread/mutex.hpp>
 #include <boost/python.hpp>
 #include <boost/shared_ptr.hpp>
 
@@ -37,6 +38,8 @@ class EXPORTGADGETSPYTHON PythonCommunicator
 				GadgetContainerMessage<T>* m1,
 				GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2);
 
+    boost::mutex mutex_;
+
  private:
   std::map<Gadget*, boost::python::object> module_;
   std::map<Gadget*, boost::python::object> gadget_ref_fnc_;
diff --git a/gadgets/python/PythonGadget.h b/gadgets/python/PythonGadget.h
index 0731eff..37c4f97 100644
--- a/gadgets/python/PythonGadget.h
+++ b/gadgets/python/PythonGadget.h
@@ -7,7 +7,7 @@
 #include "PythonCommunicator.h"
 #include "gadgetronpython_export.h"
 
-#include <ismrmrd.h>
+#include <ismrmrd/ismrmrd.h>
 #include <boost/python.hpp>
 #include <boost/algorithm/string.hpp>
 #include <stdio.h>
@@ -83,17 +83,8 @@ namespace Gadgetron{
     };
   
   class EXPORTGADGETSPYTHON AcquisitionPythonGadget :
-  public PythonGadget<ISMRMRD::AcquisitionHeader>
-  {
-  public:
-    GADGET_DECLARE(AcquisitionPythonGadget);
-  
-  };
+  public PythonGadget<ISMRMRD::AcquisitionHeader> {};
 
   class EXPORTGADGETSPYTHON ImagePythonGadget :
-  public PythonGadget<ISMRMRD::ImageHeader>
-  {
-  public:
-    GADGET_DECLARE(ImagePythonGadget);    
-  };
+  public PythonGadget<ISMRMRD::ImageHeader> {};
 }
diff --git a/gadgets/radial/CMakeLists.txt b/gadgets/radial/CMakeLists.txt
index 7c4b6ce..42b5759 100644
--- a/gadgets/radial/CMakeLists.txt
+++ b/gadgets/radial/CMakeLists.txt
@@ -3,37 +3,58 @@ IF (WIN32)
 ENDIF (WIN32)
 
 find_package(Ismrmrd REQUIRED)
-find_package(XSD REQUIRED)
-find_package(XercesC REQUIRED)
 
 include_directories(
   ${CMAKE_SOURCE_DIR}/gadgets/mri_core
-  ${CMAKE_SOURCE_DIR}/gadgets/sense
+  ${CMAKE_SOURCE_DIR}/gadgets/pmri
   ${CMAKE_SOURCE_DIR}/toolboxes/nfft/gpu
   ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu
+  ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/math
   ${CMAKE_SOURCE_DIR}/toolboxes/core/gpu
+  ${CMAKE_SOURCE_DIR}/toolboxes/fft/gpu
   ${CMAKE_SOURCE_DIR}/toolboxes/mri/pmri/gpu
   ${CMAKE_SOURCE_DIR}/toolboxes/solvers
   ${CMAKE_SOURCE_DIR}/toolboxes/solvers/gpu
   ${CMAKE_SOURCE_DIR}/toolboxes/operators
   ${CMAKE_SOURCE_DIR}/toolboxes/operators/gpu
-  ${ISMRMRD_XSD_INCLUDE_DIR}
+  ${ARMADILLO_INCLUDE_DIRS}
   )
 
-if(CUDA_FOUND)
-  include_directories(${CUDA_INCLUDE_DIRS})
-  
-  add_library(gadgetron_gpuradial SHARED 
-    gpuRadialSensePrepGadget.cpp 
-    ${ISMRMRD_XSD_SOURCE})
-  
-  target_link_libraries(gadgetron_gpuradial
-    gpunfft gpusolvers gpuoperators gpuparallelmri cpucore gpucore
-    ${ISMRMRD_LIBRARIES} ${XERCESC_LIBRARIES} ${FFTW3_LIBRARIES} ${CUDA_LIBRARIES}
-    optimized ${ACE_LIBRARIES} debug ${ACE_DEBUG_LIBRARY}
-    )
-  
-  install (TARGETS gadgetron_gpuradial DESTINATION lib)
-endif(CUDA_FOUND)
+include_directories(${CUDA_INCLUDE_DIRS})
+
+if (ARMADILLO_FOUND)
+  list(APPEND PHASE_GADGET RadialPhaseCorrectionGadget.h RadialPhaseCorrectionGadget.cpp)
+  install (FILES  RadialPhaseCorrectionGadget.h DESTINATION include COMPONENT main)
+elseif (ARMADILLO_FOUND)
+  MESSAGE("Armadillo not found, not compiling radial phase correction gadget")
+endif (ARMADILLO_FOUND)
+
+add_library(gadgetron_gpuradial SHARED 
+  gadgetron_radial_export.h
+  gpuRadialPrepGadget.h gpuRadialPrepGadget.cpp 
+  gpuRadialSensePrepGadget.h gpuRadialSensePrepGadget.cpp 
+  gpuRadialSpiritPrepGadget.h gpuRadialSpiritPrepGadget.cpp 
+  gpuRetroGatedSensePrepGadget.h gpuRetroGatedSensePrepGadget.cpp
+  ${PHASE_GADGET})
+
+set_target_properties(gadgetron_gpuradial PROPERTIES VERSION ${GADGETRON_VERSION_STRING} SOVERSION ${GADGETRON_SOVERSION})
+
+target_link_libraries(gadgetron_gpuradial
+  gadgetron_gadgetbase
+  gadgetron_toolbox_gpunfft gadgetron_toolbox_gpusolvers gadgetron_toolbox_gpuoperators gadgetron_toolbox_cpucore gadgetron_toolbox_cpucore_math gadgetron_toolbox_gpucore
+  ${ISMRMRD_LIBRARIES} ${FFTW3_LIBRARIES} ${CUDA_LIBRARIES}
+  optimized ${ACE_LIBRARIES} debug ${ACE_DEBUG_LIBRARY}
+  )
+
+target_link_libraries(gadgetron_gpuradial gadgetron_toolbox_gpuparallelmri )
+
+install (FILES  gadgetron_radial_export.h
+                gpuRadialPrepGadget.h 
+                gpuRadialSensePrepGadget.h 
+                gpuRadialSpiritPrepGadget.h 
+                gpuRetroGatedSensePrepGadget.h 
+                DESTINATION include COMPONENT main)
+
+install (TARGETS gadgetron_gpuradial DESTINATION lib COMPONENT main)
 
 add_subdirectory(config)
diff --git a/gadgets/radial/RadialPhaseCorrectionGadget.cpp b/gadgets/radial/RadialPhaseCorrectionGadget.cpp
new file mode 100644
index 0000000..734ab4d
--- /dev/null
+++ b/gadgets/radial/RadialPhaseCorrectionGadget.cpp
@@ -0,0 +1,314 @@
+#include "RadialPhaseCorrectionGadget.h"
+#include "Gadgetron.h"
+#include "hoNDArray_elemwise.h"
+#include "hoArmadillo.h"
+#include "hoNDArray_fileio.h"
+#include "ismrmrd/xml.h"
+
+#define _USE_MATH_DEFINES
+#include <math.h>
+#include <cmath>
+
+#ifdef USE_OMP
+#include <omp.h>
+#endif 
+
+namespace Gadgetron{
+  
+  RadialPhaseCorrectionGadget::RadialPhaseCorrectionGadget()
+    : slices_(-1)
+    , sets_(-1)
+    , channels_(-1)
+    , profiles_counter_(0)
+  {
+    set_parameter(std::string("mode").c_str(), "3");
+    set_parameter(std::string("order").c_str(), "6");
+    set_parameter(std::string("profiles").c_str(), "500");
+  }
+  
+  int RadialPhaseCorrectionGadget::
+  process_config( ACE_Message_Block *mb )
+  {
+    ISMRMRD::IsmrmrdHeader h;
+    ISMRMRD::deserialize(mb->rd_ptr(),h);
+    
+    
+    if (h.encoding.size() != 1) {
+      GADGET_DEBUG1("This Gadget only supports one encoding space\n");
+      return GADGET_FAIL;
+    }
+    
+    // Get the encoding space and trajectory description
+    ISMRMRD::EncodingSpace e_space = h.encoding[0].encodedSpace;
+    ISMRMRD::EncodingSpace r_space = h.encoding[0].reconSpace;
+    ISMRMRD::EncodingLimits e_limits = h.encoding[0].encodingLimits;
+
+    slices_ = e_limits.slice ? e_limits.slice->maximum + 1 : 1;
+    sets_ = e_limits.set ? e_limits.set->maximum + 1 : 1;
+
+    if (h.acquisitionSystemInformation) {
+      channels_ = h.acquisitionSystemInformation->receiverChannels ? *h.acquisitionSystemInformation->receiverChannels : 128;
+    }
+
+    mode_ = get_int_value(std::string("mode").c_str());
+    order_ = get_int_value(std::string("order").c_str());
+    profiles_ = get_int_value(std::string("profiles").c_str());
+
+    if( profiles_ < 1 ) {
+      GADGET_DEBUG1("The number of profiles to estimate polynomial fit is too low.\n");
+      return GADGET_FAIL;
+    }
+
+    fit_calculated_ = boost::shared_array<bool>(new bool[sets_*slices_]);
+    polyfit_ = boost::shared_array<double>(new double[(order_+1)*channels_*sets_*slices_]);   
+    profiles_queue_ = boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> >(new ACE_Message_Queue<ACE_MT_SYNCH>[slices_*sets_]);
+
+    size_t bsize = sizeof(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>)*profiles_*10;
+    for( unsigned int i=0; i<slices_*sets_; i++ ){
+      fit_calculated_[i] = false;
+      profiles_queue_[i].high_water_mark(bsize);
+      profiles_queue_[i].low_water_mark(bsize);
+    }    
+    
+    return GADGET_OK;
+  }
+  
+  int RadialPhaseCorrectionGadget
+  ::process( GadgetContainerMessage<ISMRMRD::AcquisitionHeader> *m1,
+             GadgetContainerMessage< hoNDArray< std::complex<float> > > *m2 )
+  {
+
+    // Pass any noise measurements down the chain
+    //
+    
+    bool is_noise = m1->getObjectPtr()->isFlagSet(ISMRMRD::ISMRMRD_ACQ_IS_NOISE_MEASUREMENT);
+    if (is_noise) { 
+      if (this->next()->putq(m1) < 0) {
+        GADGET_DEBUG1("Failed to pass on noise samples.\n");
+        return GADGET_FAIL;
+      }
+      return GADGET_OK;
+    }
+
+    // For now we require that this gadget is inserted before any coil reduction gadgets
+    //
+
+    if( channels_ != m1->getObjectPtr()->active_channels ){
+      GADGET_DEBUG1("Unexpected number of coils encountered. Did you insert the phase correction gadget after a coil reduction gadget? In that case invert the order of these gadgets\n");
+      return GADGET_FAIL;
+    }
+
+    unsigned int slice = m1->getObjectPtr()->idx.slice;
+    unsigned int set = m1->getObjectPtr()->idx.set;
+    int idx = set*slices_+slice;
+
+    if( !fit_calculated_[idx] ){
+
+      // Enqueue the first 'profiles_' profiles...
+      //
+      
+      profiles_queue_[idx].enqueue_tail(m1);
+
+      // ...before estimating the polynomial fit of order 'order_'
+      //
+
+      if( profiles_queue_[idx].message_count() == profiles_ ){
+
+        // Perform polynomial fit,
+        // assemble system matix A.
+        //
+        
+        arma::mat A( profiles_, order_+1 );
+        
+        for( int m=0; m<profiles_; m++ ){
+
+          double angle = get_projection_angle(m);          
+
+          for( int n=0; n<order_+1; n++ ){
+            A(m,n) = pow( angle, double(n) );
+          }
+        }
+
+        // Assemble right hand side
+        //
+        
+        arma::mat b( profiles_, channels_ );
+        //double prev_phase[channels_];
+        std::vector<double> prev_phase(channels_);
+        ACE_Message_Queue<ACE_MT_SYNCH>::ITERATOR iter(profiles_queue_[idx]);
+        
+        for( int m=0; m<profiles_; m++ ){                     
+          
+          ACE_Message_Block* mbq = 0x0;
+          iter.next( mbq );
+          iter.advance();
+          
+          if(!mbq) {
+            GADGET_DEBUG1("Unable to interpret data on message queue (1)\n");
+            return GADGET_FAIL;
+          }
+          
+          GadgetContainerMessage< hoNDArray< std::complex<float> > > *_profile = 
+            AsContainerMessage< hoNDArray< std::complex<float> > >(mbq->cont());
+        
+          if(!_profile) {
+            GADGET_DEBUG1("Unable to interpret data on message queue (2)\n");
+            return GADGET_FAIL;
+          }
+          
+          hoNDArray< std::complex<float> > *profile = _profile->getObjectPtr();
+
+          // A unique fit for each coil
+          //
+
+          for( unsigned int coil=0; coil<channels_; coil++ ){
+            
+            // 'arg' returns angles in the interval (-pi;pi)
+            // Make sure that no discontinouities arise on the graph as they cannot be fitted
+            //
+            
+            std::complex<float> sample = profile->get_data_ptr()[coil*profile->get_size(0)+(profile->get_size(0)>>1)];
+            double phase = double(std::arg(sample));
+
+            if( m>0 && std::abs(phase-prev_phase[coil])>M_PI ){
+
+              // It appears as if phase wrapping has occurred, make correction...
+              //
+
+              if( phase<prev_phase[coil] )
+                phase += 2.0*M_PI;
+              else
+                phase -= 2.0*M_PI;                
+            }
+
+            b(m,coil) = phase;
+            prev_phase[coil] = phase;
+          }
+        }
+        
+        // Linear least squares fit, i.e. solve "A^T A x = b"
+        //
+        
+        std::vector<size_t> dims; dims.push_back(order_+1); dims.push_back(channels_);
+        hoNDArray<double> vec( &dims, &polyfit_[set*(order_+1)*channels_*slices_+slice*(order_+1)*channels_] );
+
+        arma::mat x = as_arma_matrix(&vec);          
+        x = arma::solve(A.t()*A,A.t()*b);
+
+        /*
+        static int counter = 0;
+        char filename[256];
+        sprintf((char*)filename, "_polyfit_%d.real", counter);
+        write_nd_array<double>( &vec, filename );
+        */
+        
+        // Phase correct buffered profiles
+        //
+
+        for( int m=0; m<profiles_; m++ ){          
+
+          ACE_Message_Block *mbq;
+          if( profiles_queue_[idx].dequeue_head(mbq) < 0 ){
+            GADGET_DEBUG1("Message dequeue failed\n");
+            GADGET_FAIL;
+          }
+
+          GadgetContainerMessage<ISMRMRD::AcquisitionHeader> *header = 
+            AsContainerMessage<ISMRMRD::AcquisitionHeader>(mbq);
+          
+          if(!header) {
+            GADGET_DEBUG1("Unable to interpret data on message queue (3)\n");
+            return GADGET_FAIL;
+          }
+
+          phase_correct(header);
+
+          if (this->next()->putq(header) < 0) {
+            GADGET_DEBUG1("Failed to put data on queue\n");
+            return GADGET_FAIL;
+          }          
+        }
+        fit_calculated_[idx] = true;
+      }
+    }
+    else{
+      
+      // Phase correct profile
+      //
+      
+      phase_correct(m1);
+      
+      if (this->next()->putq(m1) < 0) {
+        GADGET_DEBUG1("Failed to put data on queue\n");
+        return GADGET_FAIL;
+      }          
+    }
+
+    return GADGET_OK;
+  }  
+  
+
+  double RadialPhaseCorrectionGadget
+  ::get_projection_angle( unsigned int idx )
+  {
+    if(!(mode_ == 2 || mode_ == 3 )){
+      throw std::runtime_error("RadialPhaseCorrectionGadget: currently only trajectory modes 2 and 3 are supported (golden ratio)");;
+    }
+
+    double angle_step;
+    if( mode_ == 2 )
+      angle_step = M_PI/((std::sqrt(5.0)+1.0)*0.5); // GR_ORIGINAL
+    else if( mode_ == 3 ){
+      angle_step = M_PI*(3.0-std::sqrt(5.0))*0.5;   // GR_SMALLEST
+    }
+    return fmod(idx*angle_step, 2.0*M_PI);
+  }
+
+  void RadialPhaseCorrectionGadget
+  ::phase_correct( GadgetContainerMessage<ISMRMRD::AcquisitionHeader> *m1 )
+  {
+    unsigned int slice = m1->getObjectPtr()->idx.slice;
+    unsigned int set = m1->getObjectPtr()->idx.set;
+    double angle = get_projection_angle(profiles_counter_);
+
+    for( unsigned int coil=0; coil<channels_; coil++ ){
+
+      double estimated_phase = 0.0;
+
+      for( unsigned int i=0; i<order_+1; i++ ){
+
+        double weight = polyfit_[set*(order_+1)*channels_*slices_ +
+                                 slice*(order_+1)*channels_ +
+                                 coil*(order_+1) + 
+                                 i ];
+
+        double power = std::pow(angle, double(i));
+
+        estimated_phase += (weight*power);
+      }
+      
+      GadgetContainerMessage< hoNDArray< std::complex<float> > > *_profile = 
+        AsContainerMessage<hoNDArray< std::complex<float> > >(m1->cont());
+      
+      if(!_profile) {
+        GADGET_DEBUG1("Unable to phase correct profile\n");
+        return;
+      }
+
+      hoNDArray< std::complex<float> > *profile = _profile->getObjectPtr();      
+#ifdef USE_OMP
+#pragma omp parallel for
+#endif
+      for( int i=0; i<profile->get_size(0); i++ ){
+        std::complex<float> sample = profile->get_data_ptr()[coil*profile->get_size(0)+i];
+        float phase = std::arg(sample);
+        float mag = std::abs(sample);
+        profile->get_data_ptr()[coil*profile->get_size(0)+i] = std::polar( mag, phase-float(estimated_phase) );
+      }
+    }
+    profiles_counter_++;
+  }
+
+  GADGET_FACTORY_DECLARE(RadialPhaseCorrectionGadget)
+
+} // namespace Gadgetron
diff --git a/gadgets/radial/RadialPhaseCorrectionGadget.h b/gadgets/radial/RadialPhaseCorrectionGadget.h
new file mode 100644
index 0000000..6c65337
--- /dev/null
+++ b/gadgets/radial/RadialPhaseCorrectionGadget.h
@@ -0,0 +1,44 @@
+#pragma once
+
+#include "Gadget.h"
+#include "hoNDArray.h"
+#include "gadgetron_radial_export.h"
+
+#include <boost/shared_ptr.hpp>
+#include <boost/shared_array.hpp>
+#include <ismrmrd/ismrmrd.h>
+#include <complex>
+
+namespace Gadgetron {
+
+  class EXPORTGADGETS_RADIAL RadialPhaseCorrectionGadget :
+    public Gadget2<ISMRMRD::AcquisitionHeader, hoNDArray< std::complex<float> > >
+  {
+  public:
+    GADGET_DECLARE(RadialPhaseCorrectionGadget);
+    RadialPhaseCorrectionGadget();
+    ~RadialPhaseCorrectionGadget() {};
+    
+  protected:
+    
+    virtual int process_config( ACE_Message_Block *mb );
+    
+    virtual int process( GadgetContainerMessage<ISMRMRD::AcquisitionHeader> *m1,
+                         GadgetContainerMessage< hoNDArray< std::complex<float> > > *m2);
+    
+    unsigned int mode_;
+    unsigned int order_;
+    unsigned int profiles_;
+    unsigned int profiles_counter_;
+    int slices_;
+    int sets_;
+    int channels_;
+    boost::shared_array<bool> fit_calculated_;
+    boost::shared_array<double> polyfit_;
+    boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> > profiles_queue_;
+
+  private:
+    double get_projection_angle( unsigned int profile_idx );
+    void phase_correct( GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* );
+  };
+}
diff --git a/gadgets/radial/config/CMakeLists.txt b/gadgets/radial/config/CMakeLists.txt
index 3b4c966..d61f829 100644
--- a/gadgets/radial/config/CMakeLists.txt
+++ b/gadgets/radial/config/CMakeLists.txt
@@ -6,14 +6,17 @@ if (ARMADILLO_FOUND)
     fixed_radial_mode0_gpusense_cg.xml 
     fixed_radial_mode1_gpusense_cg.xml 
     golden_radial_mode2_gpusense_cg.xml 
+    golden_radial_mode3_gpusense_cg.xml 
     fixed_radial_mode0_gpusense_sb.xml 
     fixed_radial_mode1_gpusense_sb.xml 
-    golden_radial_mode2_gpusense_sb.xml 
+    golden_radial_mode2_gpusense_sb.xml
+    golden_radial_mode2_gpusense_nlcg.xml  
     golden_radial_mode3_gpusense_sb.xml 
     fixed_radial_mode0_gpu_ktsense.xml 
     fixed_radial_mode1_gpu_ktsense.xml 
     golden_radial_mode2_gpu_ktsense.xml 
-    DESTINATION config)
+    spirit.xml
+    DESTINATION ${GADGETRON_INSTALL_CONFIG_PATH} COMPONENT main)
 elseif (ARMADILLO_FOUND)
   MESSAGE("Armadillo not found, only unoptimized radial config files will be available")
 endif (ARMADILLO_FOUND)
@@ -22,7 +25,8 @@ install (FILES
   fixed_radial_mode0_gpusense_cg_unoptimized.xml 
   fixed_radial_mode1_gpusense_cg_unoptimized.xml 
   golden_radial_mode2_gpusense_cg_unoptimized.xml 
+  golden_radial_mode2_gpusense_nlcg_unoptimized.xml
   fixed_radial_mode0_gpusense_sb_unoptimized.xml 
   fixed_radial_mode1_gpusense_sb_unoptimized.xml 
   golden_radial_mode2_gpusense_sb_unoptimized.xml 
-  DESTINATION config)
+  DESTINATION ${GADGETRON_INSTALL_CONFIG_PATH} COMPONENT main)
diff --git a/gadgets/radial/config/fixed_radial_mode0_gpu_ktsense.xml b/gadgets/radial/config/fixed_radial_mode0_gpu_ktsense.xml
index 971d86a..cf3a0ab 100644
--- a/gadgets/radial/config/fixed_radial_mode0_gpu_ktsense.xml
+++ b/gadgets/radial/config/fixed_radial_mode0_gpu_ktsense.xml
@@ -60,7 +60,7 @@
     
     <gadget>
       <name>gpuCgKtSenseGadget_slice0</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuCgKtSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -76,7 +76,7 @@
 
     <gadget>
       <name>gpuCgKtSenseGadget_slice1</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuCgKtSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -92,7 +92,7 @@
 
     <gadget>
       <name>gpuCgKtSenseGadget_slice2</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuCgKtSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
diff --git a/gadgets/radial/config/fixed_radial_mode0_gpusense_cg.xml b/gadgets/radial/config/fixed_radial_mode0_gpusense_cg.xml
index 202506c..74f292f 100644
--- a/gadgets/radial/config/fixed_radial_mode0_gpusense_cg.xml
+++ b/gadgets/radial/config/fixed_radial_mode0_gpusense_cg.xml
@@ -59,7 +59,7 @@
     
     <gadget>
       <name>gpuCgSenseGadget_slice0</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuCgSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -74,7 +74,7 @@
 
     <gadget>
       <name>gpuCgSenseGadget_slice1</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuCgSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -89,7 +89,7 @@
 
     <gadget>
       <name>gpuCgSenseGadget_slice2</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuCgSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
diff --git a/gadgets/radial/config/fixed_radial_mode0_gpusense_cg_unoptimized.xml b/gadgets/radial/config/fixed_radial_mode0_gpusense_cg_unoptimized.xml
index 9dc0987..2d5c7a8 100644
--- a/gadgets/radial/config/fixed_radial_mode0_gpusense_cg_unoptimized.xml
+++ b/gadgets/radial/config/fixed_radial_mode0_gpusense_cg_unoptimized.xml
@@ -46,7 +46,7 @@
     
     <gadget>
       <name>gpuCgSenseGadget_slice0</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuCgSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -61,7 +61,7 @@
 
     <gadget>
       <name>gpuCgSenseGadget_slice1</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuCgSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -76,7 +76,7 @@
 
     <gadget>
       <name>gpuCgSenseGadget_slice2</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuCgSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
diff --git a/gadgets/radial/config/fixed_radial_mode0_gpusense_sb.xml b/gadgets/radial/config/fixed_radial_mode0_gpusense_sb.xml
index 1df2f3e..2d3fa4d 100644
--- a/gadgets/radial/config/fixed_radial_mode0_gpusense_sb.xml
+++ b/gadgets/radial/config/fixed_radial_mode0_gpusense_sb.xml
@@ -60,7 +60,7 @@
     
     <gadget>
       <name>gpuSbSenseGadget_slice0</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuSbSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -78,7 +78,7 @@
 
     <gadget>
       <name>gpuSbSenseGadget_slice1</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuSbSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -96,7 +96,7 @@
 
     <gadget>
       <name>gpuSbSenseGadget_slice2</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuSbSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
diff --git a/gadgets/radial/config/fixed_radial_mode0_gpusense_sb_unoptimized.xml b/gadgets/radial/config/fixed_radial_mode0_gpusense_sb_unoptimized.xml
index 68bc150..3467409 100644
--- a/gadgets/radial/config/fixed_radial_mode0_gpusense_sb_unoptimized.xml
+++ b/gadgets/radial/config/fixed_radial_mode0_gpusense_sb_unoptimized.xml
@@ -47,7 +47,7 @@
     
     <gadget>
       <name>gpuSbSenseGadget_slice0</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuSbSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -65,7 +65,7 @@
 
     <gadget>
       <name>gpuSbSenseGadget_slice1</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuSbSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -83,7 +83,7 @@
 
     <gadget>
       <name>gpuSbSenseGadget_slice2</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuSbSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
diff --git a/gadgets/radial/config/fixed_radial_mode0_realtime.xml b/gadgets/radial/config/fixed_radial_mode0_realtime.xml
index 9299321..ddc7661 100644
--- a/gadgets/radial/config/fixed_radial_mode0_realtime.xml
+++ b/gadgets/radial/config/fixed_radial_mode0_realtime.xml
@@ -58,7 +58,7 @@
     
     <gadget>
       <name>gpuCgSenseGadget_slice0</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuCgSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -72,7 +72,7 @@
 
     <gadget>
       <name>gpuCgSenseGadget_slice1</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuCgSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -86,7 +86,7 @@
 
     <gadget>
       <name>gpuCgSenseGadget_slice2</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuCgSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
diff --git a/gadgets/radial/config/fixed_radial_mode1_gpu_ktsense.xml b/gadgets/radial/config/fixed_radial_mode1_gpu_ktsense.xml
index 17cbdd4..563be86 100644
--- a/gadgets/radial/config/fixed_radial_mode1_gpu_ktsense.xml
+++ b/gadgets/radial/config/fixed_radial_mode1_gpu_ktsense.xml
@@ -60,7 +60,7 @@
     
     <gadget>
       <name>gpuCgKtSenseGadget_slice0</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuCgKtSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -76,7 +76,7 @@
 
     <gadget>
       <name>gpuCgKtSenseGadget_slice1</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuCgKtSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -92,7 +92,7 @@
 
     <gadget>
       <name>gpuCgKtSenseGadget_slice2</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuCgKtSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
diff --git a/gadgets/radial/config/fixed_radial_mode1_gpusense_cg.xml b/gadgets/radial/config/fixed_radial_mode1_gpusense_cg.xml
index a4aae87..6c1d0b0 100644
--- a/gadgets/radial/config/fixed_radial_mode1_gpusense_cg.xml
+++ b/gadgets/radial/config/fixed_radial_mode1_gpusense_cg.xml
@@ -59,7 +59,7 @@
     
     <gadget>
       <name>gpuCgSenseGadget_slice0</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuCgSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -74,7 +74,7 @@
 
     <gadget>
       <name>gpuCgSenseGadget_slice1</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuCgSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -89,7 +89,7 @@
 
     <gadget>
       <name>gpuCgSenseGadget_slice2</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuCgSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
diff --git a/gadgets/radial/config/fixed_radial_mode1_gpusense_cg_unoptimized.xml b/gadgets/radial/config/fixed_radial_mode1_gpusense_cg_unoptimized.xml
index 12bc114..6db3f90 100644
--- a/gadgets/radial/config/fixed_radial_mode1_gpusense_cg_unoptimized.xml
+++ b/gadgets/radial/config/fixed_radial_mode1_gpusense_cg_unoptimized.xml
@@ -46,7 +46,7 @@
     
     <gadget>
       <name>gpuCgSenseGadget_slice0</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuCgSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -61,7 +61,7 @@
 
     <gadget>
       <name>gpuCgSenseGadget_slice1</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuCgSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -76,7 +76,7 @@
 
     <gadget>
       <name>gpuCgSenseGadget_slice2</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuCgSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
diff --git a/gadgets/radial/config/fixed_radial_mode1_gpusense_sb.xml b/gadgets/radial/config/fixed_radial_mode1_gpusense_sb.xml
index ba6e2e1..997156f 100644
--- a/gadgets/radial/config/fixed_radial_mode1_gpusense_sb.xml
+++ b/gadgets/radial/config/fixed_radial_mode1_gpusense_sb.xml
@@ -60,7 +60,7 @@
     
     <gadget>
       <name>gpuSbSenseGadget_slice0</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuSbSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -78,7 +78,7 @@
 
     <gadget>
       <name>gpuSbSenseGadget_slice1</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuSbSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -96,7 +96,7 @@
 
     <gadget>
       <name>gpuSbSenseGadget_slice2</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuSbSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
diff --git a/gadgets/radial/config/fixed_radial_mode1_gpusense_sb_unoptimized.xml b/gadgets/radial/config/fixed_radial_mode1_gpusense_sb_unoptimized.xml
index 4c1a251..bcafd2c 100644
--- a/gadgets/radial/config/fixed_radial_mode1_gpusense_sb_unoptimized.xml
+++ b/gadgets/radial/config/fixed_radial_mode1_gpusense_sb_unoptimized.xml
@@ -47,7 +47,7 @@
     
     <gadget>
       <name>gpuSbSenseGadget_slice0</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuSbSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -65,7 +65,7 @@
 
     <gadget>
       <name>gpuSbSenseGadget_slice1</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuSbSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -83,7 +83,7 @@
 
     <gadget>
       <name>gpuSbSenseGadget_slice2</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuSbSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
diff --git a/gadgets/radial/config/fixed_radial_mode1_realtime.xml b/gadgets/radial/config/fixed_radial_mode1_realtime.xml
index 3a4a0ff..216876e 100644
--- a/gadgets/radial/config/fixed_radial_mode1_realtime.xml
+++ b/gadgets/radial/config/fixed_radial_mode1_realtime.xml
@@ -58,7 +58,7 @@
     
     <gadget>
       <name>gpuCgSenseGadget_slice0</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuCgSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -72,7 +72,7 @@
 
     <gadget>
       <name>gpuCgSenseGadget_slice1</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuCgSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -86,7 +86,7 @@
 
     <gadget>
       <name>gpuCgSenseGadget_slice2</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuCgSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
diff --git a/gadgets/radial/config/golden_radial_mode2_gpu_ktsense.xml b/gadgets/radial/config/golden_radial_mode2_gpu_ktsense.xml
index 8d49c1b..54ac75f 100644
--- a/gadgets/radial/config/golden_radial_mode2_gpu_ktsense.xml
+++ b/gadgets/radial/config/golden_radial_mode2_gpu_ktsense.xml
@@ -62,7 +62,7 @@
     
     <gadget>
       <name>gpuCgKtSenseGadget_slice0</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuCgKtSenseGadget</classname>
       <property><name>pass_on_undesired_data</name><value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -78,7 +78,7 @@
 
     <gadget>
       <name>gpuCgKtSenseGadget_slice1</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuCgKtSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -94,7 +94,7 @@
 
     <gadget>
       <name>gpuCgKtSenseGadget_slice2</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuCgKtSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
diff --git a/gadgets/radial/config/golden_radial_mode2_gpusense_cg.xml b/gadgets/radial/config/golden_radial_mode2_gpusense_cg.xml
index 683203c..5ef16f5 100644
--- a/gadgets/radial/config/golden_radial_mode2_gpusense_cg.xml
+++ b/gadgets/radial/config/golden_radial_mode2_gpusense_cg.xml
@@ -61,7 +61,7 @@
     
     <gadget>
       <name>gpuCgSenseGadget_slice0</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuCgSenseGadget</classname>
       <property><name>pass_on_undesired_data</name><value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -76,7 +76,7 @@
 
     <gadget>
       <name>gpuCgSenseGadget_slice1</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuCgSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -91,7 +91,7 @@
 
     <gadget>
       <name>gpuCgSenseGadget_slice2</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuCgSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
diff --git a/gadgets/radial/config/golden_radial_mode2_gpusense_cg_unoptimized.xml b/gadgets/radial/config/golden_radial_mode2_gpusense_cg_unoptimized.xml
index 29bb68c..0d1b913 100644
--- a/gadgets/radial/config/golden_radial_mode2_gpusense_cg_unoptimized.xml
+++ b/gadgets/radial/config/golden_radial_mode2_gpusense_cg_unoptimized.xml
@@ -48,7 +48,7 @@
     
     <gadget>
       <name>gpuCgSenseGadget_slice0</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuCgSenseGadget</classname>
       <property><name>pass_on_undesired_data</name><value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -63,7 +63,7 @@
 
     <gadget>
       <name>gpuCgSenseGadget_slice1</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuCgSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -78,7 +78,7 @@
 
     <gadget>
       <name>gpuCgSenseGadget_slice2</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuCgSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
diff --git a/gadgets/radial/config/golden_radial_mode2_gpusense_nlcg.xml b/gadgets/radial/config/golden_radial_mode2_gpusense_nlcg.xml
new file mode 100644
index 0000000..4631488
--- /dev/null
+++ b/gadgets/radial/config/golden_radial_mode2_gpusense_nlcg.xml
@@ -0,0 +1,158 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+         
+    <reader>
+      <slot>1008</slot>
+      <dll>gadgetron_mricore</dll>
+      <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+    </reader>
+    <writer>
+      <slot>1004</slot>
+      <dll>gadgetron_mricore</dll>
+      <classname>MRIImageWriterCPLX</classname>
+    </writer>
+    <writer>
+      <slot>1005</slot>
+      <dll>gadgetron_mricore</dll>
+      <classname>MRIImageWriterFLOAT</classname>
+    </writer>
+    <writer>
+      <slot>1006</slot>
+      <dll>gadgetron_mricore</dll>
+      <classname>MRIImageWriterUSHORT</classname>
+    </writer>
+
+    <gadget>
+      <name>NoiseAdjust</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>NoiseAdjustGadget</classname>
+    </gadget>
+
+    <gadget>
+      <name>PCA</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>PCACoilGadget</classname>
+    </gadget>
+
+    <gadget>
+      <name>CoilReduction</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>CoilReductionGadget</classname>
+      <property><name>coils_out</name><value>16</value></property>
+    </gadget>
+    
+    <gadget>
+      <name>gpuRadialSensePrepGadget</name>
+      <dll>gadgetron_gpuradial</dll>
+      <classname>gpuRadialSensePrepGadget</classname>
+      <property><name>deviceno</name><value>0</value></property>
+      <property><name>mode</name><value>2</value></property>
+      <property><name>profiles_per_frame</name><value>16</value></property>
+      <property><name>rotations_per_reconstruction</name><value>32</value></property>
+      <property><name>buffer_using_solver</name><value>true</value></property>
+      <property><name>buffer_frames_per_rotation</name><value>32</value></property>
+      <property><name>buffer_length_in_rotations</name><value>1</value></property>
+      <property><name>buffer_convolution_kernel_width</name><value>5.5</value></property>
+      <property><name>buffer_convolution_oversampling_factor</name><value>1.25</value></property>
+      <property><name>reconstruction_os_factor_x</name><value>1.5</value></property>
+      <property><name>reconstruction_os_factor_y</name><value>1.5</value></property>
+    </gadget>
+    
+      <gadget>
+      <name>gpuNlcgSenseGadget_slice0</name>
+      <dll>gadgetron_gpuparallelmri</dll>
+      <classname>gpuNlcgSenseGadget</classname>
+      <property><name>pass_on_undesired_data</name>  <value>true</value></property>
+      <property><name>deviceno</name>                <value>0</value></property>
+      <property><name>sliceno</name>                 <value>0</value></property>
+      <property><name>number_of_cg_iterations</name> <value>50</value></property>
+      <property><name>cg_limit</name>                <value>1e-6</value></property>
+      <property><name>oversampling_factor</name>     <value>1.25</value></property>
+      <property><name>kernel_width</name>            <value>5.5</value></property>
+      <property><name>lambda</name>                  <value>1e-6</value></property>
+      <property><name>alpha</name>                   <value>0.5</value></property>
+      <property><name>output_convergence</name><value>true</value></property>
+    </gadget>
+      <gadget>
+      <name>gpuNlcgSenseGadget_slice1</name>
+      <dll>gadgetron_gpuparallelmri</dll>
+      <classname>gpuNlcgSenseGadget</classname>
+      <property><name>pass_on_undesired_data</name>  <value>true</value></property>
+      <property><name>deviceno</name>                <value>0</value></property>
+      <property><name>sliceno</name>                 <value>0</value></property>
+      <property><name>number_of_cg_iterations</name> <value>50</value></property>
+      <property><name>cg_limit</name>                <value>1e-6</value></property>
+      <property><name>oversampling_factor</name>     <value>1.25</value></property>
+      <property><name>kernel_width</name>            <value>5.5</value></property>
+      <property><name>lambda</name>                  <value>1e-6</value></property>
+      <property><name>alpha</name>                   <value>0.5</value></property>
+      <property><name>output_convergence</name><value>true</value></property>
+    </gadget>
+
+       <gadget>
+      <name>gpuNlcgSenseGadget_slice2</name>
+      <dll>gadgetron_gpuparallelmri</dll>
+      <classname>gpuNlcgSenseGadget</classname>
+      <property><name>pass_on_undesired_data</name>  <value>true</value></property>
+      <property><name>deviceno</name>                <value>0</value></property>
+      <property><name>sliceno</name>                 <value>0</value></property>
+      <property><name>number_of_cg_iterations</name> <value>50</value></property>
+      <property><name>cg_limit</name>                <value>1e-6</value></property>
+      <property><name>oversampling_factor</name>     <value>1.25</value></property>
+      <property><name>kernel_width</name>            <value>5.5</value></property>
+      <property><name>lambda</name>                  <value>1e-6</value></property>
+      <property><name>alpha</name>                   <value>0.5</value></property>
+      <property><name>output_convergence</name><value>true</value></property>
+    </gadget>
+
+     <gadget>
+      <name>Extract</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ExtractGadget</classname>
+    </gadget>
+
+    <!--
+	<gadget>
+	<name>ImageWrite</name>
+	<dll>gadgetron_mricore</dll>
+	<classname>ImageWriterGadgetFLOAT</classname>
+	</gadget>
+    -->
+    
+    <gadget>
+      <name>AutoScale</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>AutoScaleGadget</classname>
+    </gadget>
+        
+    <gadget>
+      <name>FloatToShort</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>FloatToUShortGadget</classname>
+    </gadget>
+    
+    <!--
+	<gadget>
+	<name>ImageFinishCPLX</name>
+	<dll>gadgetron_mricore</dll>
+	<classname>ImageFinishGadgetCPLX</classname>
+	</gadget>
+    -->
+    
+    <!--
+	<gadget>
+	<name>ImageFinishFLOAT</name>
+	<dll>gadgetron_mricore</dll>
+	<classname>ImageFinishGadgetFLOAT</classname>
+	</gadget>
+    -->
+    
+    <gadget>
+      <name>ImageFinishUSHORT</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishGadgetUSHORT</classname>
+    </gadget>
+    
+</gadgetronStreamConfiguration>
diff --git a/gadgets/radial/config/golden_radial_mode2_gpusense_nlcg_unoptimized.xml b/gadgets/radial/config/golden_radial_mode2_gpusense_nlcg_unoptimized.xml
new file mode 100644
index 0000000..f0bb427
--- /dev/null
+++ b/gadgets/radial/config/golden_radial_mode2_gpusense_nlcg_unoptimized.xml
@@ -0,0 +1,146 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+         
+    <reader>
+      <slot>1008</slot>
+      <dll>gadgetron_mricore</dll>
+      <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+    </reader>
+    <writer>
+      <slot>1004</slot>
+      <dll>gadgetron_mricore</dll>
+      <classname>MRIImageWriterCPLX</classname>
+    </writer>
+    <writer>
+      <slot>1005</slot>
+      <dll>gadgetron_mricore</dll>
+      <classname>MRIImageWriterFLOAT</classname>
+    </writer>
+    <writer>
+      <slot>1006</slot>
+      <dll>gadgetron_mricore</dll>
+      <classname>MRIImageWriterUSHORT</classname>
+    </writer>
+
+    <gadget>
+      <name>NoiseAdjust</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>NoiseAdjustGadget_unoptimized</classname>
+    </gadget>
+    
+    <gadget>
+      <name>gpuRadialSensePrepGadget</name>
+      <dll>gadgetron_gpuradial</dll>
+      <classname>gpuRadialSensePrepGadget</classname>
+      <property><name>deviceno</name><value>0</value></property>
+      <property><name>mode</name><value>2</value></property>
+      <property><name>profiles_per_frame</name><value>16</value></property>
+      <property><name>rotations_per_reconstruction</name><value>32</value></property>
+      <property><name>buffer_using_solver</name><value>true</value></property>
+      <property><name>buffer_frames_per_rotation</name><value>32</value></property>
+      <property><name>buffer_length_in_rotations</name><value>1</value></property>
+      <property><name>buffer_convolution_kernel_width</name><value>5.5</value></property>
+      <property><name>buffer_convolution_oversampling_factor</name><value>1.25</value></property>
+      <property><name>reconstruction_os_factor_x</name><value>1.5</value></property>
+      <property><name>reconstruction_os_factor_y</name><value>1.5</value></property>
+      <property><name>output_convergence</name><value>true</value></property>
+    </gadget>
+    
+    <gadget>
+      <name>gpuNlcgSenseGadget_slice0</name>
+      <dll>gadgetron_gpuparallelmri</dll>
+      <classname>gpuNlcgSenseGadget</classname>
+      <property><name>pass_on_undesired_data</name>  <value>true</value></property>
+      <property><name>deviceno</name>                <value>0</value></property>
+      <property><name>sliceno</name>                 <value>0</value></property>
+      <property><name>number_of_cg_iterations</name> <value>30</value></property>
+      <property><name>cg_limit</name>                <value>1e-6</value></property>
+      <property><name>oversampling_factor</name>     <value>1.25</value></property>
+      <property><name>kernel_width</name>            <value>5.5</value></property>
+      <property><name>lambda</name>                  <value>0.01</value></property>
+      <property><name>alpha</name>                   <value>0.5</value></property>
+      <property><name>output_convergence</name><value>true</value></property>
+    </gadget>
+
+    <gadget>
+      <name>gpuNlcgSenseGadget_slice1</name>
+      <dll>gadgetron_gpuparallelmri</dll>
+      <classname>gpuNlcgSenseGadget</classname>
+      <property><name>pass_on_undesired_data</name>  <value>true</value></property>
+      <property><name>deviceno</name>                <value>0</value></property>
+      <property><name>sliceno</name>                 <value>0</value></property>
+      <property><name>number_of_cg_iterations</name> <value>30</value></property>
+      <property><name>cg_limit</name>                <value>1e-6</value></property>
+      <property><name>oversampling_factor</name>     <value>1.25</value></property>
+      <property><name>kernel_width</name>            <value>5.5</value></property>
+      <property><name>lambda</name>                  <value>0.01</value></property>
+      <property><name>alpha</name>                   <value>0.5</value></property>
+      <property><name>output_convergence</name><value>true</value></property>
+    </gadget>
+    
+    <gadget>
+      <name>gpuNlcgSenseGadget_slice2</name>
+      <dll>gadgetron_gpuparallelmri</dll>
+      <classname>gpuNlcgSenseGadget</classname>
+      <property><name>pass_on_undesired_data</name>  <value>true</value></property>
+      <property><name>deviceno</name>                <value>0</value></property>
+      <property><name>sliceno</name>                 <value>0</value></property>
+      <property><name>number_of_cg_iterations</name> <value>30</value></property>
+      <property><name>cg_limit</name>                <value>1e-6</value></property>
+      <property><name>oversampling_factor</name>     <value>1.25</value></property>
+      <property><name>kernel_width</name>            <value>5.5</value></property>
+      <property><name>lambda</name>                  <value>0.01</value></property>
+      <property><name>alpha</name>                   <value>0.5</value></property>
+    </gadget>
+    
+     <gadget>
+      <name>Extract</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ExtractGadget</classname>
+    </gadget>
+
+    <!--
+	<gadget>
+	<name>ImageWrite</name>
+	<dll>gadgetron_mricore</dll>
+	<classname>ImageWriterGadgetFLOAT</classname>
+	</gadget>
+    -->
+    
+    <gadget>
+      <name>AutoScale</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>AutoScaleGadget</classname>
+    </gadget>
+        
+    <gadget>
+      <name>FloatToShort</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>FloatToUShortGadget</classname>
+    </gadget>
+    
+    <!--
+	<gadget>
+	<name>ImageFinishCPLX</name>
+	<dll>gadgetron_mricore</dll>
+	<classname>ImageFinishGadgetCPLX</classname>
+	</gadget>
+    -->
+    
+    <!--
+	<gadget>
+	<name>ImageFinishFLOAT</name>
+	<dll>gadgetron_mricore</dll>
+	<classname>ImageFinishGadgetFLOAT</classname>
+	</gadget>
+    -->
+    
+    <gadget>
+      <name>ImageFinishUSHORT</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishGadgetUSHORT</classname>
+    </gadget>
+    
+</gadgetronStreamConfiguration>
diff --git a/gadgets/radial/config/golden_radial_mode2_gpusense_sb.xml b/gadgets/radial/config/golden_radial_mode2_gpusense_sb.xml
index dec345b..9933698 100644
--- a/gadgets/radial/config/golden_radial_mode2_gpusense_sb.xml
+++ b/gadgets/radial/config/golden_radial_mode2_gpusense_sb.xml
@@ -62,7 +62,7 @@
     
     <gadget>
       <name>gpuSbSenseGadget_slice0</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuSbSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -80,7 +80,7 @@
 
     <gadget>
       <name>gpuSbSenseGadget_slice1</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuSbSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -98,7 +98,7 @@
 
     <gadget>
       <name>gpuSbSenseGadget_slice2</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuSbSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
diff --git a/gadgets/radial/config/golden_radial_mode2_gpusense_sb_unoptimized.xml b/gadgets/radial/config/golden_radial_mode2_gpusense_sb_unoptimized.xml
index 34289ca..c87f945 100644
--- a/gadgets/radial/config/golden_radial_mode2_gpusense_sb_unoptimized.xml
+++ b/gadgets/radial/config/golden_radial_mode2_gpusense_sb_unoptimized.xml
@@ -49,7 +49,7 @@
     
     <gadget>
       <name>gpuSbSenseGadget_slice0</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuSbSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -67,7 +67,7 @@
 
     <gadget>
       <name>gpuSbSenseGadget_slice1</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuSbSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -85,7 +85,7 @@
 
     <gadget>
       <name>gpuSbSenseGadget_slice2</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuSbSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
diff --git a/gadgets/radial/config/golden_radial_mode2_realtime.xml b/gadgets/radial/config/golden_radial_mode2_realtime.xml
index b2fc23e..8bffb52 100644
--- a/gadgets/radial/config/golden_radial_mode2_realtime.xml
+++ b/gadgets/radial/config/golden_radial_mode2_realtime.xml
@@ -59,7 +59,7 @@
     
     <gadget>
       <name>gpuCgSenseGadget_slice0</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuCgSenseGadget</classname>
       <property><name>pass_on_undesired_data</name><value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -73,7 +73,7 @@
 
     <gadget>
       <name>gpuCgSenseGadget_slice1</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuCgSenseGadget</classname>
       <property><name>pass_on_undesired_data</name><value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -87,7 +87,7 @@
 
     <gadget>
       <name>gpuCgSenseGadget_slice2</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuCgSenseGadget</classname>
       <property><name>pass_on_undesired_data</name><value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
diff --git a/gadgets/radial/config/golden_radial_mode3_gpusense_cg.xml b/gadgets/radial/config/golden_radial_mode3_gpusense_cg.xml
new file mode 100644
index 0000000..e05404f
--- /dev/null
+++ b/gadgets/radial/config/golden_radial_mode3_gpusense_cg.xml
@@ -0,0 +1,155 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+        xmlns="http://gadgetron.sf.net/gadgetron"
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+         
+    <reader>
+      <slot>1008</slot>
+      <dll>gadgetron_mricore</dll>
+      <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+    </reader>
+    <writer>
+      <slot>1004</slot>
+      <dll>gadgetron_mricore</dll>
+      <classname>MRIImageWriterCPLX</classname>
+    </writer>
+    <writer>
+      <slot>1005</slot>
+      <dll>gadgetron_mricore</dll>
+      <classname>MRIImageWriterFLOAT</classname>
+    </writer>
+    <writer>
+      <slot>1006</slot>
+      <dll>gadgetron_mricore</dll>
+      <classname>MRIImageWriterUSHORT</classname>
+    </writer>
+
+    <gadget>
+      <name>NoiseAdjust</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>NoiseAdjustGadget</classname>
+    </gadget>
+
+    <gadget>
+      <name>PCA</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>PCACoilGadget</classname>
+    </gadget>
+
+    <gadget>
+      <name>CoilReduction</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>CoilReductionGadget</classname>
+      <property><name>coils_out</name><value>16</value></property>
+    </gadget>
+    
+    <gadget>
+      <name>gpuRadialSensePrepGadget</name>
+      <dll>gadgetron_gpuradial</dll>
+      <classname>gpuRadialSensePrepGadget</classname>
+      <property><name>deviceno</name><value>0</value></property>
+      <property><name>mode</name><value>3</value></property>
+      <property><name>profiles_per_frame</name><value>32</value></property>
+      <property><name>rotations_per_reconstruction</name><value>8</value></property>
+      <property><name>buffer_frames_per_rotation</name><value>8</value></property>
+      <property><name>buffer_length_in_rotations</name><value>4</value></property>
+      <property><name>buffer_convolution_kernel_width</name><value>5.5</value></property>
+      <property><name>buffer_convolution_oversampling_factor</name><value>1.25</value></property>
+      <property><name>reconstruction_os_factor_x</name><value>1.5</value></property>
+      <property><name>reconstruction_os_factor_y</name><value>1.5</value></property>
+    </gadget>
+    
+    <gadget>
+      <name>gpuCgSenseGadget_slice0</name>
+      <dll>gadgetron_gpuparallelmri</dll>
+      <classname>gpuCgSenseGadget</classname>
+      <property><name>pass_on_undesired_data</name><value>true</value></property>
+      <property><name>deviceno</name>                <value>0</value></property>
+      <property><name>sliceno</name>                 <value>0</value></property>
+      <property><name>number_of_iterations</name>    <value>40</value></property>
+      <property><name>cg_limit</name>                <value>1e-6</value></property>
+      <property><name>oversampling_factor</name>     <value>1.25</value></property>
+      <property><name>kernel_width</name>            <value>5.5</value></property>
+      <property><name>kappa</name>                   <value>0.3</value></property>
+      <property><name>output_convergence</name><value>true</value></property>
+    </gadget>
+
+    <gadget>
+      <name>gpuCgSenseGadget_slice1</name>
+      <dll>gadgetron_gpuparallelmri</dll>
+      <classname>gpuCgSenseGadget</classname>
+      <property><name>pass_on_undesired_data</name>  <value>true</value></property>
+      <property><name>deviceno</name>                <value>0</value></property>
+      <property><name>sliceno</name>                 <value>1</value></property>
+      <property><name>number_of_iterations</name>    <value>40</value></property>
+      <property><name>cg_limit</name>                <value>1e-6</value></property>
+      <property><name>oversampling_factor</name>     <value>1.25</value></property>
+      <property><name>kernel_width</name>            <value>5.5</value></property>
+      <property><name>kappa</name>                   <value>0.3</value></property>
+      <property><name>output_convergence</name><value>true</value></property>
+    </gadget>
+
+    <gadget>
+      <name>gpuCgSenseGadget_slice2</name>
+      <dll>gadgetron_gpuparallelmri</dll>
+      <classname>gpuCgSenseGadget</classname>
+      <property><name>pass_on_undesired_data</name>  <value>true</value></property>
+      <property><name>deviceno</name>                <value>0</value></property>
+      <property><name>sliceno</name>                 <value>2</value></property>
+      <property><name>number_of_iterations</name>    <value>40</value></property>
+      <property><name>cg_limit</name>                <value>1e-6</value></property>
+      <property><name>oversampling_factor</name>     <value>1.25</value></property>
+      <property><name>kernel_width</name>            <value>5.5</value></property>
+      <property><name>kappa</name>                   <value>0.3</value></property>
+      <property><name>output_convergence</name><value>true</value></property>
+    </gadget>
+
+     <gadget>
+      <name>Extract</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ExtractGadget</classname>
+    </gadget>
+
+    <!--
+	<gadget>
+	<name>ImageWrite</name>
+	<dll>gadgetron_mricore</dll>
+	<classname>ImageWriterGadgetFLOAT</classname>
+	</gadget>
+    -->
+    
+    <gadget>
+      <name>AutoScale</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>AutoScaleGadget</classname>
+    </gadget>
+        
+    <gadget>
+      <name>FloatToShort</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>FloatToUShortGadget</classname>
+    </gadget>
+    
+    <!--
+	<gadget>
+	<name>ImageFinishCPLX</name>
+	<dll>gadgetron_mricore</dll>
+	<classname>ImageFinishGadgetCPLX</classname>
+	</gadget>
+    -->
+    
+    <!--
+	<gadget>
+	<name>ImageFinishFLOAT</name>
+	<dll>gadgetron_mricore</dll>
+	<classname>ImageFinishGadgetFLOAT</classname>
+	</gadget>
+    -->
+    
+    <gadget>
+      <name>ImageFinishUSHORT</name>
+      <dll>gadgetron_mricore</dll>
+      <classname>ImageFinishGadgetUSHORT</classname>
+    </gadget>
+    
+</gadgetronStreamConfiguration>
diff --git a/gadgets/radial/config/golden_radial_mode3_gpusense_sb.xml b/gadgets/radial/config/golden_radial_mode3_gpusense_sb.xml
index bfbdeb0..e3a6203 100644
--- a/gadgets/radial/config/golden_radial_mode3_gpusense_sb.xml
+++ b/gadgets/radial/config/golden_radial_mode3_gpusense_sb.xml
@@ -62,7 +62,7 @@
     
     <gadget>
       <name>gpuSbSenseGadget_slice0</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuSbSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -80,7 +80,7 @@
 
     <gadget>
       <name>gpuSbSenseGadget_slice1</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuSbSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
@@ -98,7 +98,7 @@
 
     <gadget>
       <name>gpuSbSenseGadget_slice2</name>
-      <dll>gadgetron_gpusense</dll>
+      <dll>gadgetron_gpuparallelmri</dll>
       <classname>gpuSbSenseGadget</classname>
       <property><name>pass_on_undesired_data</name>  <value>true</value></property>
       <property><name>deviceno</name>                <value>0</value></property>
diff --git a/gadgets/radial/config/spirit.xml b/gadgets/radial/config/spirit.xml
new file mode 100644
index 0000000..f6e0da8
--- /dev/null
+++ b/gadgets/radial/config/spirit.xml
@@ -0,0 +1,106 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
+                              xmlns="http://gadgetron.sf.net/gadgetron"
+                              xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+  
+  <reader>
+    <slot>1008</slot>
+    <dll>gadgetron_mricore</dll>
+    <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
+  </reader>
+  <writer>
+    <slot>1004</slot>
+    <dll>gadgetron_mricore</dll>
+    <classname>MRIImageWriterCPLX</classname>
+  </writer>
+  <writer>
+    <slot>1005</slot>
+    <dll>gadgetron_mricore</dll>
+    <classname>MRIImageWriterFLOAT</classname>
+  </writer>
+  <writer>
+    <slot>1006</slot>
+    <dll>gadgetron_mricore</dll>
+    <classname>MRIImageWriterUSHORT</classname>
+  </writer>
+
+  <gadget>
+    <name>NoiseAdjust</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>NoiseAdjustGadget</classname>
+  </gadget>
+
+  <gadget>
+    <name>PCA</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>PCACoilGadget</classname>
+  </gadget>
+
+  <gadget>
+    <name>CoilReduction</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>CoilReductionGadget</classname>
+    <property><name>coils_out</name><value>16</value></property>
+  </gadget>
+<!--  
+  <gadget>
+    <name>gpuRadialSpiritPrepGadget</name>
+    <dll>gadgetron_gpuradial</dll>
+    <classname>gpuRadialSpiritPrepGadget</classname>
+    <property><name>deviceno</name><value>0</value></property>
+    <property><name>mode</name><value>3</value></property>
+    <property><name>profiles_per_frame</name><value>16</value></property>
+    <property><name>rotations_per_reconstruction</name><value>16</value></property>
+    <property><name>buffer_frames_per_rotation</name><value>16</value></property>
+    <property><name>buffer_length_in_rotations</name><value>2</value></property>
+    <property><name>buffer_convolution_kernel_width</name><value>5.5</value></property>
+    <property><name>buffer_convolution_oversampling_factor</name><value>1.25</value></property>
+    <property><name>reconstruction_os_factor_x</name><value>1.5</value></property>
+    <property><name>reconstruction_os_factor_y</name><value>1.5</value></property>
+  </gadget>
+-->
+
+    <gadget>
+      <name>gpuRadialSpiritPrepGadget</name>
+      <dll>gadgetron_gpuradial</dll>
+      <classname>gpuRadialSpiritPrepGadget</classname>
+      <property><name>deviceno</name><value>0</value></property>
+      <property><name>mode</name><value>3</value></property>
+      <property><name>profiles_per_frame</name><value>16</value></property>
+      <property><name>buffer_frames_per_rotation</name><value>4</value></property>
+      <property><name>buffer_length_in_rotations</name><value>8</value></property>
+      <property><name>buffer_convolution_kernel_width</name><value>5.5</value></property>
+      <property><name>buffer_convolution_oversampling_factor</name><value>1.25</value></property>
+      <property><name>reconstruction_os_factor_x</name><value>1.5</value></property>
+      <property><name>reconstruction_os_factor_y</name><value>1.5</value></property>
+      <property><name>number_of_iterations</name><value>25</value></property>
+    </gadget>
+
+  <gadget>
+    <name>gpuCgSpiritGadget</name>
+    <dll>gadgetron_gpuparallelmri</dll>
+    <classname>gpuCgSpiritGadget</classname>
+    <property><name>pass_on_undesired_data</name><value>true</value></property>
+    <property><name>deviceno</name>                <value>0</value></property>
+    <property><name>sliceno</name>                 <value>0</value></property>
+    <property><name>number_of_iterations</name>    <value>40</value></property>
+    <property><name>cg_limit</name>                <value>1e-9</value></property>
+    <property><name>oversampling_factor</name>     <value>1.25</value></property>
+    <property><name>kernel_width</name>            <value>5.5</value></property>
+    <property><name>kappa</name>                   <value>0.0</value></property>
+    <property><name>output_convergence</name>      <value>true</value></property>
+  </gadget>
+
+  <gadget>
+    <name>Extract</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>ExtractGadget</classname>
+  </gadget>
+  
+  <gadget>
+    <name>ImageFinishFLOAT</name>
+    <dll>gadgetron_mricore</dll>
+    <classname>ImageFinishGadgetFLOAT</classname>
+  </gadget>
+  
+</gadgetronStreamConfiguration>
diff --git a/gadgets/radial/gpuRadialPrepGadget.cpp b/gadgets/radial/gpuRadialPrepGadget.cpp
new file mode 100644
index 0000000..5b783b7
--- /dev/null
+++ b/gadgets/radial/gpuRadialPrepGadget.cpp
@@ -0,0 +1,952 @@
+#include "gpuRadialPrepGadget.h"
+#include "Gadgetron.h"
+#include "cuNonCartesianSenseOperator.h"
+#include "GenericReconJob.h"
+#include "cuNDArray_elemwise.h"
+#include "cuNDArray_utils.h"
+#include "vector_td_operators.h"
+#include "GPUTimer.h"
+#include "check_CUDA.h"
+#include "radial_utilities.h"
+#include "hoNDArray_elemwise.h"
+#include "hoNDArray_fileio.h"
+#include "ismrmrd/xml.h"
+
+#include <algorithm>
+#include <vector>
+#include <cmath>
+
+namespace Gadgetron{
+
+  gpuRadialPrepGadget::gpuRadialPrepGadget()
+    : slices_(-1)
+    , sets_(-1)
+    , device_number_(-1)
+    , mode_(-1)
+    , samples_per_profile_(-1)
+  {
+    // Set some default values in case the config does not contain a specification
+    //
+
+    set_parameter(std::string("mode").c_str(), "0");
+    set_parameter(std::string("deviceno").c_str(), "0");
+    set_parameter(std::string("buffer_length_in_rotations").c_str(), "1");
+    set_parameter(std::string("buffer_using_solver").c_str(), "false");
+    set_parameter(std::string("buffer_convolution_kernel_width").c_str(), "5.5");
+    set_parameter(std::string("buffer_convolution_oversampling_factor").c_str(), "1.25");
+    set_parameter(std::string("rotations_per_reconstruction").c_str(), "0");
+    set_parameter(std::string("reconstruction_os_factor_x").c_str(), "1.0");
+    set_parameter(std::string("reconstruction_os_factor_y").c_str(), "1.0");
+  }
+  
+  gpuRadialPrepGadget::~gpuRadialPrepGadget() {}
+  
+  int gpuRadialPrepGadget::process_config(ACE_Message_Block* mb)
+  {
+    //GADGET_DEBUG1("gpuRadialPrepGadget::process_config\n");
+
+    // Get configuration values from config file
+    //
+
+    mode_ = get_int_value(std::string("mode").c_str());
+    device_number_ = get_int_value(std::string("deviceno").c_str());
+    rotations_per_reconstruction_ = get_int_value(std::string("rotations_per_reconstruction").c_str());
+    buffer_length_in_rotations_ = get_int_value(std::string("buffer_length_in_rotations").c_str());
+    buffer_using_solver_ = get_bool_value(std::string("buffer_using_solver").c_str());
+    output_timing_ = get_bool_value(std::string("output_timing").c_str());
+
+    // Currently there are some restrictions on the allowed sliding window configurations
+    //
+    
+    sliding_window_profiles_ = get_int_value(std::string("sliding_window_profiles").c_str());
+    sliding_window_rotations_ = get_int_value(std::string("sliding_window_rotations").c_str());
+
+    if( sliding_window_profiles_>0 && sliding_window_rotations_>0 ){
+      GADGET_DEBUG1( "Error: Sliding window reconstruction is not yet supported for both profiles and frames simultaneously.\n" );
+      return GADGET_FAIL;
+    }
+
+    if( sliding_window_profiles_>0 && rotations_per_reconstruction_>0 ){
+      GADGET_DEBUG1( "Error: Sliding window reconstruction over profiles is not yet supported for multiframe reconstructions.\n" );
+      return GADGET_FAIL;
+    }
+    
+    if( sliding_window_rotations_ > 0 && sliding_window_rotations_ >= rotations_per_reconstruction_ ){
+      GADGET_DEBUG1( "Error: Illegal sliding window configuration.\n" );
+      return GADGET_FAIL;
+    }
+
+    // Setup and validate device configuration
+    //
+
+    int number_of_devices;
+    if (cudaGetDeviceCount(&number_of_devices)!= cudaSuccess) {
+      GADGET_DEBUG1( "Error: unable to query number of CUDA devices.\n" );
+      return GADGET_FAIL;
+    }
+
+    if (number_of_devices == 0) {
+      GADGET_DEBUG1( "Error: No available CUDA devices.\n" );
+      return GADGET_FAIL;
+    }
+
+    if (device_number_ >= number_of_devices) {
+      GADGET_DEBUG2("Adjusting device number from %d to %d\n", device_number_,  (device_number_%number_of_devices));
+      device_number_ = (device_number_%number_of_devices);
+    }
+
+    if (cudaSetDevice(device_number_)!= cudaSuccess) {
+      GADGET_DEBUG1( "Error: unable to set CUDA device.\n" );
+      return GADGET_FAIL;
+    }
+
+    cudaDeviceProp deviceProp;
+    if( cudaGetDeviceProperties( &deviceProp, device_number_ ) != cudaSuccess) {
+      GADGET_DEBUG1( "Error: unable to query device properties.\n" );
+      return GADGET_FAIL;
+    }
+    
+    unsigned int warp_size = deviceProp.warpSize;
+
+    // Convolution kernel width and oversampling ratio (for the buffer)
+    //
+
+    kernel_width_ = get_double_value(std::string("buffer_convolution_kernel_width").c_str());
+    oversampling_factor_ = get_double_value(std::string("buffer_convolution_oversampling_factor").c_str());
+
+    // Get the Ismrmrd header
+    //
+    ISMRMRD::IsmrmrdHeader h;
+    ISMRMRD::deserialize(mb->rd_ptr(),h);
+    
+    
+    if (h.encoding.size() != 1) {
+      GADGET_DEBUG1("This Gadget only supports one encoding space\n");
+      return GADGET_FAIL;
+    }
+    
+    // Get the encoding space and trajectory description
+    ISMRMRD::EncodingSpace e_space = h.encoding[0].encodedSpace;
+    ISMRMRD::EncodingSpace r_space = h.encoding[0].reconSpace;
+    ISMRMRD::EncodingLimits e_limits = h.encoding[0].encodingLimits;
+    ISMRMRD::TrajectoryDescription traj_desc;
+    // Matrix sizes (as a multiple of the GPU's warp size)
+    //
+    
+    image_dimensions_.push_back(((e_space.matrixSize.x+warp_size-1)/warp_size)*warp_size);
+    image_dimensions_.push_back(((e_space.matrixSize.y+warp_size-1)/warp_size)*warp_size);
+
+    image_dimensions_recon_.push_back(((static_cast<unsigned int>(std::ceil(e_space.matrixSize.x*get_double_value(std::string("reconstruction_os_factor_x").c_str())))+warp_size-1)/warp_size)*warp_size);  
+    image_dimensions_recon_.push_back(((static_cast<unsigned int>(std::ceil(e_space.matrixSize.y*get_double_value(std::string("reconstruction_os_factor_y").c_str())))+warp_size-1)/warp_size)*warp_size);
+    
+    image_dimensions_recon_os_ = uint64d2
+      (((static_cast<unsigned int>(std::ceil(image_dimensions_recon_[0]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size,
+       ((static_cast<unsigned int>(std::ceil(image_dimensions_recon_[1]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size);
+    
+    // In case the warp_size constraint kicked in
+    oversampling_factor_ = float(image_dimensions_recon_os_[0])/float(image_dimensions_recon_[0]); 
+    
+    GADGET_DEBUG2("matrix_size_x : %d, recon: %d, recon_os: %d\n", 
+                  image_dimensions_[0], image_dimensions_recon_[0], image_dimensions_recon_os_[0]);
+
+    GADGET_DEBUG2("matrix_size_y : %d, recon: %d, recon_os: %d\n", 
+                  image_dimensions_[1], image_dimensions_recon_[1], image_dimensions_recon_os_[1]);
+    
+    fov_.push_back(r_space.fieldOfView_mm.x);
+    fov_.push_back(r_space.fieldOfView_mm.y);
+    fov_.push_back(r_space.fieldOfView_mm.z);
+
+    slices_ = e_limits.slice ? e_limits.slice->maximum + 1 : 1;
+    sets_ = e_limits.set ? e_limits.set->maximum + 1 : 1;
+    
+    // Allocate profile queues
+    // - one queue for the currently incoming frame
+    // - one queue for the next reconstruction
+
+    frame_profiles_queue_ = boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> >(new ACE_Message_Queue<ACE_MT_SYNCH>[slices_*sets_]);
+    recon_profiles_queue_ = boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> >(new ACE_Message_Queue<ACE_MT_SYNCH>[slices_*sets_]);
+    image_headers_queue_ = boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> >(new ACE_Message_Queue<ACE_MT_SYNCH>[slices_*sets_]);
+
+    size_t bsize = sizeof(GadgetContainerMessage< hoNDArray< std::complex<float> > >)*image_dimensions_[0]*10;
+
+    for( unsigned int i=0; i<slices_*sets_; i++ ){
+      frame_profiles_queue_[i].high_water_mark(bsize);
+      frame_profiles_queue_[i].low_water_mark(bsize);
+    }
+    
+    bsize *= (rotations_per_reconstruction_+1);
+    
+    for( unsigned int i=0; i<slices_*sets_; i++ ){
+      recon_profiles_queue_[i].high_water_mark(bsize);
+      recon_profiles_queue_[i].low_water_mark(bsize);
+    }
+
+    // Define some profile counters for book-keeping
+    //
+
+    previous_profile_ = boost::shared_array<long>(new long[slices_*sets_]);
+    image_counter_ = boost::shared_array<long>(new long[slices_*sets_]);
+    profiles_counter_frame_= boost::shared_array<long>(new long[slices_*sets_]);
+    profiles_counter_global_= boost::shared_array<long>(new long[slices_*sets_]);
+    profiles_per_frame_= boost::shared_array<long>(new long[slices_*sets_]);
+    frames_per_rotation_= boost::shared_array<long>(new long[slices_*sets_]);
+    buffer_frames_per_rotation_= boost::shared_array<long>(new long[slices_*sets_]);
+    buffer_update_needed_ = boost::shared_array<bool>(new bool[slices_*sets_]);
+    reconfigure_ = boost::shared_array<bool>(new bool[slices_*sets_]);
+    num_coils_ = boost::shared_array<unsigned int>(new unsigned int[slices_*sets_]);
+    
+    if( !previous_profile_.get() ||
+        !image_counter_.get() || 
+        !profiles_counter_frame_.get() ||
+        !profiles_counter_global_.get() ||
+        !profiles_per_frame_.get() || 
+        !frames_per_rotation_.get() ||
+        !buffer_frames_per_rotation_.get() ||
+        !buffer_update_needed_.get() ||
+        !num_coils_.get() ||
+        !reconfigure_ ){
+      GADGET_DEBUG1("Failed to allocate host memory (1)\n");
+      return GADGET_FAIL;
+    }
+
+    for( unsigned int i=0; i<slices_*sets_; i++ ){
+
+      previous_profile_[i] = -1;
+      image_counter_[i] = 0;
+      profiles_counter_frame_[i] = 0;
+      profiles_counter_global_[i] = 0;
+      profiles_per_frame_[i] = get_int_value(std::string("profiles_per_frame").c_str());
+      frames_per_rotation_[i] = get_int_value(std::string("frames_per_rotation").c_str());
+      buffer_frames_per_rotation_[i] = get_int_value(std::string("buffer_frames_per_rotation").c_str());
+      num_coils_[i] = 0;
+      buffer_update_needed_[i] = true;
+      reconfigure_[i] = true;
+
+      // Assign some default values ("upper bound estimates") of the (possibly) unknown entities
+      //
+      
+      if( profiles_per_frame_[i] == 0 ){
+        profiles_per_frame_[i] = image_dimensions_[0];
+      }
+      
+      if( frames_per_rotation_[i] == 0 ){
+        if( mode_ == 2 || mode_ == 3 ) // golden ratio
+          frames_per_rotation_[i] = 1;
+        else
+          frames_per_rotation_[i] = image_dimensions_[0]/profiles_per_frame_[i];
+      }
+
+      bsize = sizeof(GadgetContainerMessage<ISMRMRD::ImageHeader>)*100*
+        std::max(1L, frames_per_rotation_[i]*rotations_per_reconstruction_);
+    
+      image_headers_queue_[i].high_water_mark(bsize);
+      image_headers_queue_[i].low_water_mark(bsize);
+    }
+        
+    position_ = boost::shared_array<float[3]>(new float[slices_*sets_][3]);
+    read_dir_ = boost::shared_array<float[3]>(new float[slices_*sets_][3]);
+    phase_dir_ = boost::shared_array<float[3]>(new float[slices_*sets_][3]);
+    slice_dir_ = boost::shared_array<float[3]>(new float[slices_*sets_][3]);
+
+    if( !position_.get() || !read_dir_.get() || !phase_dir_.get() || !slice_dir_.get() ){
+      GADGET_DEBUG1("Failed to allocate host memory (2)\n");
+      return GADGET_FAIL;
+    }
+
+    for( unsigned int i=0; i<slices_*sets_; i++ ){
+      (position_[i])[0] = (position_[i])[1] = (position_[i])[2] = 0.0f;
+      (read_dir_[i])[0] = (read_dir_[i])[1] = (read_dir_[i])[2] = 0.0f;
+      (phase_dir_[i])[0] = (phase_dir_[i])[1] = (phase_dir_[i])[2] = 0.0f;
+      (slice_dir_[i])[0] = (slice_dir_[i])[1] = (slice_dir_[i])[2] = 0.0f;
+    }
+
+    // Allocate accumulation buffer
+    //
+
+    allocate_accumulation_buffer( slices_*sets_ );
+    
+    // Allocate remaining shared_arrays
+    //
+    
+    csm_host_ = boost::shared_array< hoNDArray<float_complext> >(new hoNDArray<float_complext>[slices_*sets_]);
+    reg_host_ = boost::shared_array< hoNDArray<float_complext> >(new hoNDArray<float_complext>[slices_*sets_]);
+
+    host_traj_recon_ = boost::shared_array< hoNDArray<floatd2> >(new hoNDArray<floatd2>[slices_*sets_]);
+    host_weights_recon_ = boost::shared_array< hoNDArray<float> >(new hoNDArray<float>[slices_*sets_]);
+
+    if( !csm_host_.get() || !reg_host_.get() || !host_traj_recon_.get() || !host_weights_recon_ ){
+      GADGET_DEBUG1("Failed to allocate host memory (3)\n");
+      return GADGET_FAIL;
+    }
+
+    return GADGET_OK;
+  }
+
+  int gpuRadialPrepGadget::
+  process(GadgetContainerMessage<ISMRMRD::AcquisitionHeader> *m1,
+          GadgetContainerMessage< hoNDArray< std::complex<float> > > *m2)
+  {
+    // Noise should have been consumed by the noise adjust (if in the gadget chain)
+    //
+    
+    bool is_noise = m1->getObjectPtr()->isFlagSet(ISMRMRD::ISMRMRD_ACQ_IS_NOISE_MEASUREMENT);
+    if (is_noise) { 
+      m1->release();
+      return GADGET_OK;
+    }
+
+    unsigned int profile = m1->getObjectPtr()->idx.kspace_encode_step_1;
+    unsigned int slice = m1->getObjectPtr()->idx.slice;
+    unsigned int set = m1->getObjectPtr()->idx.set;
+
+    // Only when the first profile arrives, do we know the #samples/profile
+    //
+
+    if( samples_per_profile_ == -1 )      
+      samples_per_profile_ = m1->getObjectPtr()->number_of_samples;
+    
+    if( samples_per_profile_ != m1->getObjectPtr()->number_of_samples ){
+      GADGET_DEBUG1("Unexpected change in the incoming profiles' lengths\n");
+      return GADGET_FAIL;
+    }
+    
+    //GADGET_DEBUG1("gpuRadialPrepGadget::process\n");
+
+    boost::shared_ptr<GPUTimer> process_timer;
+    if( output_timing_ )
+      process_timer = boost::shared_ptr<GPUTimer>( new GPUTimer("gpuRadialPrepGadget::process()") );
+
+    // Reconfigure at first pass
+    // - or if the number of coil changes
+    // - or if the reconfigure_ flag is set
+
+    if( num_coils_[set*slices_+slice] != m1->getObjectPtr()->active_channels ){
+      GADGET_DEBUG1("Reconfiguring due to change in the number of coils\n");
+      num_coils_[set*slices_+slice] = m1->getObjectPtr()->active_channels;
+      reconfigure(set, slice);
+    }
+
+    if( reconfigure_[set*slices_+slice] ){
+      GADGET_DEBUG1("Reconfiguring due to boolean indicator\n");
+      reconfigure(set, slice);
+    }
+
+    // Get a pointer to the accumulation buffer. 
+    //
+    
+    cuBuffer<float,2> *acc_buffer = get_buffer_ptr(set*slices_+slice);
+
+    // Have the imaging plane changed?
+    //
+
+    if( !vec_equal(position_[set*slices_+slice], m1->getObjectPtr()->position) ||
+        !vec_equal(read_dir_[set*slices_+slice], m1->getObjectPtr()->read_dir) || 
+        !vec_equal(phase_dir_[set*slices_+slice], m1->getObjectPtr()->phase_dir) ||
+        !vec_equal(slice_dir_[set*slices_+slice], m1->getObjectPtr()->slice_dir) ){
+      
+      // Yes indeed, clear the accumulation buffer
+      acc_buffer->clear();
+      buffer_update_needed_[set*slices_+slice] = true;
+      
+      memcpy(position_[set*slices_+slice],m1->getObjectPtr()->position,3*sizeof(float));
+      memcpy(read_dir_[set*slices_+slice],m1->getObjectPtr()->read_dir,3*sizeof(float));
+      memcpy(phase_dir_[set*slices_+slice],m1->getObjectPtr()->phase_dir,3*sizeof(float));
+      memcpy(slice_dir_[set*slices_+slice],m1->getObjectPtr()->slice_dir,3*sizeof(float));
+    }
+        
+    bool new_frame_detected = false;
+
+    // Keep track of the incoming profile ids (mode dependent)
+    // - to determine the number of profiles per frame
+    // - to determine the number of frames per rotation
+    //
+
+    if (previous_profile_[set*slices_+slice] >= 0) {
+
+      if ( profile > previous_profile_[set*slices_+slice]) { // this is not the last profile in the frame
+        if( mode_ == 0 && get_int_value(std::string("frames_per_rotation").c_str()) == 0 ){
+          unsigned int acceleration_factor = profile - previous_profile_[set*slices_+slice];
+          if( acceleration_factor != frames_per_rotation_[set*slices_+slice] ){
+            GADGET_DEBUG1("Reconfiguring due to change in acceleration factor\n");
+            frames_per_rotation_[set*slices_+slice] = acceleration_factor;
+            reconfigure(set, slice);
+          }
+        }
+      }
+      else{ // This is the first profile in a new frame
+        if( get_int_value(std::string("profiles_per_frame").c_str()) == 0 && // make sure the user did not specify a desired value for this variable
+            profiles_counter_frame_[set*slices_+slice] > 0 &&
+            profiles_counter_frame_[set*slices_+slice] != profiles_per_frame_[set*slices_+slice] ){ // a new acceleration factor is detected
+          GADGET_DEBUG1("Reconfiguring due to new slice detection\n");
+          new_frame_detected = true;
+          profiles_per_frame_[set*slices_+slice] = profiles_counter_frame_[set*slices_+slice];
+          if( mode_ == 1 && get_int_value(std::string("frames_per_rotation").c_str()) == 0 )
+            frames_per_rotation_[set*slices_+slice] = image_dimensions_[0]/profiles_per_frame_[set*slices_+slice];
+          reconfigure(set, slice);
+        }
+      }
+    }
+    previous_profile_[set*slices_+slice] = profile;
+
+    // Enqueue profile
+    // - if 'new_frame_detected' the current profile does not belong to the current frame and we delay enqueing
+
+    if( !new_frame_detected ) {
+      
+      // Memory handling is easier if we make copies for our internal queues
+      frame_profiles_queue_[set*slices_+slice].enqueue_tail(duplicate_profile(m2));
+      recon_profiles_queue_[set*slices_+slice].enqueue_tail(duplicate_profile(m2));
+    }
+
+    // If the profile is the last of a "true frame" (ignoring any sliding window profiles)
+    // - then update the accumulation buffer
+
+    bool is_last_profile_in_frame = (profiles_counter_frame_[set*slices_+slice] == profiles_per_frame_[set*slices_+slice]-1);
+    is_last_profile_in_frame |= new_frame_detected;
+
+    if( is_last_profile_in_frame ){
+
+      // Extract this frame's samples to update the csm/regularization buffer
+      //
+
+      boost::shared_ptr< hoNDArray<float_complext> > host_samples = 
+        extract_samples_from_queue( &frame_profiles_queue_[set*slices_+slice], false, set, slice );
+
+      if( host_samples.get() == 0x0 ){
+        GADGET_DEBUG1("Failed to extract frame data from queue\n");
+        return GADGET_FAIL;
+      }
+      
+      cuNDArray<float_complext> samples( host_samples.get() );
+      
+      long profile_offset = profiles_counter_global_[set*slices_+slice] - ((new_frame_detected) ? 1 : 0);
+      boost::shared_ptr< cuNDArray<floatd2> > traj = calculate_trajectory_for_frame(profile_offset, set, slice);
+
+      buffer_update_needed_[set*slices_+slice] |= acc_buffer->add_frame_data( &samples, traj.get() );
+    }
+    
+    // Are we ready to reconstruct (downstream)?
+    //
+    
+    long profiles_per_reconstruction = profiles_per_frame_[set*slices_+slice];
+    
+    if( rotations_per_reconstruction_ > 0 )
+      profiles_per_reconstruction *= (frames_per_rotation_[set*slices_+slice]*rotations_per_reconstruction_);
+    
+    bool is_last_profile_in_reconstruction = ( recon_profiles_queue_[set*slices_+slice].message_count() == profiles_per_reconstruction );
+        
+    // Prepare the image header for this frame
+    // - if this is indeed the last profile of a new frame
+    // - or if we are about to reconstruct due to 'sliding_window_profiles_' > 0
+
+    if( is_last_profile_in_frame || 
+        (is_last_profile_in_reconstruction && image_headers_queue_[set*slices_+slice].message_count() == 0) ){
+      
+      GadgetContainerMessage<ISMRMRD::ImageHeader> *header = new GadgetContainerMessage<ISMRMRD::ImageHeader>();
+      ISMRMRD::AcquisitionHeader *base_head = m1->getObjectPtr();
+
+      {
+        // Initialize header to all zeroes (there is a few fields we do not set yet)
+        ISMRMRD::ImageHeader tmp;
+        *(header->getObjectPtr()) = tmp;
+      }
+
+      header->getObjectPtr()->version = base_head->version;
+
+      header->getObjectPtr()->matrix_size[0] = image_dimensions_recon_[0];
+      header->getObjectPtr()->matrix_size[1] = image_dimensions_recon_[1];
+      header->getObjectPtr()->matrix_size[2] = std::max(1L,frames_per_rotation_[set*slices_+slice]*rotations_per_reconstruction_);
+
+      header->getObjectPtr()->field_of_view[0] = fov_[0];
+      header->getObjectPtr()->field_of_view[1] = fov_[1];
+      header->getObjectPtr()->field_of_view[2] = fov_[2];
+
+      header->getObjectPtr()->channels = num_coils_[set*slices_+slice];
+      header->getObjectPtr()->slice = base_head->idx.slice;
+      header->getObjectPtr()->set = base_head->idx.set;
+
+      header->getObjectPtr()->acquisition_time_stamp = base_head->acquisition_time_stamp;
+      memcpy(header->getObjectPtr()->physiology_time_stamp, base_head->physiology_time_stamp, sizeof(uint32_t)*ISMRMRD::ISMRMRD_PHYS_STAMPS);
+
+      memcpy(header->getObjectPtr()->position, base_head->position, sizeof(float)*3);
+      memcpy(header->getObjectPtr()->read_dir, base_head->read_dir, sizeof(float)*3);
+      memcpy(header->getObjectPtr()->phase_dir, base_head->phase_dir, sizeof(float)*3);
+      memcpy(header->getObjectPtr()->slice_dir, base_head->slice_dir, sizeof(float)*3);
+      memcpy(header->getObjectPtr()->patient_table_position, base_head->patient_table_position, sizeof(float)*3);
+
+      header->getObjectPtr()->data_type = ISMRMRD::ISMRMRD_CXFLOAT;
+      header->getObjectPtr()->image_index = image_counter_[set*slices_+slice]++; 
+      header->getObjectPtr()->image_series_index = set*slices_+slice;
+
+      image_headers_queue_[set*slices_+slice].enqueue_tail(header);
+    }
+    
+    // If it is time to reconstruct (downstream) then prepare the Sense job
+    // 
+
+    if( is_last_profile_in_reconstruction ){
+      
+      // Update csm and regularization images if the buffer has changed (completed a cycle) 
+      // - and at the first pass
+      
+      if( buffer_update_needed_[set*slices_+slice] || 
+          csm_host_[set*slices_+slice].get_number_of_elements() == 0 || 
+          reg_host_[set*slices_+slice].get_number_of_elements() == 0 ){
+
+        // Compute and set CSM (in derived Sense/Spirit/... class)
+        //
+
+        csm_host_[set*slices_+slice] = *compute_csm( set*slices_+slice );
+	                
+        // Compute regularization image
+        //
+        
+        reg_host_[set*slices_+slice] = *compute_reg( set, slice, new_frame_detected );
+		
+        /*
+          static int counter = 0;
+          char filename[256];
+          sprintf((char*)filename, "_reg_%d.real", counter);
+          write_nd_array<float>( abs(&reg_host_[set*slices_+slice]).get(), filename );
+          counter++; */
+
+        buffer_update_needed_[set*slices_+slice] = false;
+      }
+
+      // Prepare data array of the profiles for the downstream reconstruction
+      //
+      
+      boost::shared_ptr< hoNDArray<float_complext> > samples_host = 
+        extract_samples_from_queue( &recon_profiles_queue_[set*slices_+slice], true, set, slice );
+      
+      if( samples_host.get() == 0x0 ){
+        GADGET_DEBUG1("Failed to extract frame data from queue\n");
+        return GADGET_FAIL;
+      }
+           
+      // The trajectory needs to be updated on the fly:
+      // - for golden ratio based acquisitions
+      // - when we are reconstructing frame-by-frame
+      
+      if( mode_ == 2 || mode_ == 3 || rotations_per_reconstruction_ == 0 ){
+        calculate_trajectory_for_reconstruction
+          ( profiles_counter_global_[set*slices_+slice] - ((new_frame_detected) ? 1 : 0), set, slice );
+      }
+      
+      // Set up Sense job
+      //
+
+      GadgetContainerMessage< GenericReconJob >* m4 = new GadgetContainerMessage< GenericReconJob >();
+	
+      m4->getObjectPtr()->dat_host_ = samples_host;
+      m4->getObjectPtr()->tra_host_ = boost::shared_ptr< hoNDArray<floatd2> >(new hoNDArray<floatd2>(host_traj_recon_[set*slices_+slice]));
+      m4->getObjectPtr()->dcw_host_ = boost::shared_ptr< hoNDArray<float> >(new hoNDArray<float>(host_weights_recon_[set*slices_+slice]));
+      m4->getObjectPtr()->csm_host_ = boost::shared_ptr< hoNDArray<float_complext> >( new hoNDArray<float_complext>(csm_host_[set*slices_+slice]));
+      m4->getObjectPtr()->reg_host_ = boost::shared_ptr< hoNDArray<float_complext> >( new hoNDArray<float_complext>(reg_host_[set*slices_+slice]));
+
+      // Pull the image headers out of the queue
+      //
+
+      long frames_per_reconstruction = 
+        std::max( 1L, frames_per_rotation_[set*slices_+slice]*rotations_per_reconstruction_ );
+      
+      if( image_headers_queue_[set*slices_+slice].message_count() != frames_per_reconstruction ){
+        m4->release();
+        GADGET_DEBUG2("Unexpected size of image header queue: %d, %d\n", 
+                      image_headers_queue_[set*slices_+slice].message_count(), frames_per_reconstruction);
+        return GADGET_FAIL;
+      }
+
+      m4->getObjectPtr()->image_headers_ =
+        boost::shared_array<ISMRMRD::ImageHeader>( new ISMRMRD::ImageHeader[frames_per_reconstruction] );
+      
+      for( unsigned int i=0; i<frames_per_reconstruction; i++ ){	
+
+        ACE_Message_Block *mbq;
+
+        if( image_headers_queue_[set*slices_+slice].dequeue_head(mbq) < 0 ) {
+          m4->release();
+          GADGET_DEBUG1("Image header dequeue failed\n");
+          return GADGET_FAIL;
+        }
+	
+        GadgetContainerMessage<ISMRMRD::ImageHeader> *m = AsContainerMessage<ISMRMRD::ImageHeader>(mbq);
+        m4->getObjectPtr()->image_headers_[i] = *m->getObjectPtr();
+
+        // In sliding window mode the header might need to go back at the end of the queue for reuse
+        // 
+	
+        if( i >= frames_per_reconstruction-sliding_window_rotations_*frames_per_rotation_[set*slices_+slice] ){
+          image_headers_queue_[set*slices_+slice].enqueue_tail(m);
+        }
+        else {
+          m->release();
+        }
+      }      
+      
+      // The Sense Job needs an image header as well. 
+      // Let us just copy the initial one...
+
+      GadgetContainerMessage<ISMRMRD::ImageHeader> *m3 = new GadgetContainerMessage<ISMRMRD::ImageHeader>;
+      *m3->getObjectPtr() = m4->getObjectPtr()->image_headers_[0];
+      m3->cont(m4);
+      
+      //GADGET_DEBUG1("Putting job on queue\n");
+      
+      if (this->next()->putq(m3) < 0) {
+        GADGET_DEBUG1("Failed to put job on queue.\n");
+        m3->release();
+        return GADGET_FAIL;
+      }
+    }
+    
+    if( is_last_profile_in_frame )
+      profiles_counter_frame_[set*slices_+slice] = 0;
+    else{
+      profiles_counter_frame_[set*slices_+slice]++;
+    }
+
+    if( new_frame_detected ){
+
+      // This is the first profile of the next frame, enqueue.
+      // We have encountered deadlocks if the same profile is enqueued twice in different queues. Hence the copy.
+      
+      frame_profiles_queue_[set*slices_+slice].enqueue_tail(duplicate_profile(m2));
+      recon_profiles_queue_[set*slices_+slice].enqueue_tail(duplicate_profile(m2)); 
+
+      profiles_counter_frame_[set*slices_+slice]++;
+    }
+
+    profiles_counter_global_[set*slices_+slice]++;
+
+    if( output_timing_ )
+      process_timer.reset();
+    
+    m1->release(); // the internal queues hold copies
+    return GADGET_OK;
+  }
+  
+  int 
+  gpuRadialPrepGadget::calculate_trajectory_for_reconstruction(long profile_offset, unsigned int set, unsigned int slice)
+  {   
+    //GADGET_DEBUG1("Calculating trajectory for reconstruction\n");
+
+    switch(mode_){
+      
+    case 0:
+    case 1:
+      {
+        if( rotations_per_reconstruction_ == 0 ){
+
+          long local_frame = (profile_offset/profiles_per_frame_[set*slices_+slice])%frames_per_rotation_[set*slices_+slice];
+          float angular_offset = M_PI/float(profiles_per_frame_[set*slices_+slice])*float(local_frame)/float(frames_per_rotation_[set*slices_+slice]);	  
+
+          host_traj_recon_[set*slices_+slice] = *compute_radial_trajectory_fixed_angle_2d<float>
+            ( samples_per_profile_, profiles_per_frame_[set*slices_+slice], 1, angular_offset )->to_host();	
+        }
+        else{
+          host_traj_recon_[set*slices_+slice] = *compute_radial_trajectory_fixed_angle_2d<float>
+            ( samples_per_profile_, profiles_per_frame_[set*slices_+slice], frames_per_rotation_[set*slices_+slice] )->to_host();
+        }
+      }
+      break;
+      
+    case 2:
+    case 3:
+      {
+        if( rotations_per_reconstruction_ == 0 ){	  
+          unsigned int first_profile_in_reconstruction = std::max(0L, profile_offset-profiles_per_frame_[set*slices_+slice]+1);
+          host_traj_recon_[set*slices_+slice] = *compute_radial_trajectory_golden_ratio_2d<float>
+            ( samples_per_profile_, profiles_per_frame_[set*slices_+slice], 1, first_profile_in_reconstruction,
+              (mode_==2) ? GR_ORIGINAL : GR_SMALLEST )->to_host();	
+        }
+        else{
+          unsigned int first_profile_in_reconstruction = 
+            std::max(0L, profile_offset-profiles_per_frame_[set*slices_+slice]*frames_per_rotation_[set*slices_+slice]*rotations_per_reconstruction_+1);
+          host_traj_recon_[set*slices_+slice] = *compute_radial_trajectory_golden_ratio_2d<float>
+            ( samples_per_profile_, profiles_per_frame_[set*slices_+slice], 
+              frames_per_rotation_[set*slices_+slice]*rotations_per_reconstruction_, first_profile_in_reconstruction,
+              (mode_==2) ? GR_ORIGINAL : GR_SMALLEST )->to_host();
+        }	  
+      }
+      break;
+	
+    default:
+      GADGET_DEBUG1("Illegal trajectory mode\n");
+      return GADGET_FAIL;
+      break;
+    }
+    return GADGET_OK;
+  }  
+
+  int
+  gpuRadialPrepGadget::calculate_density_compensation_for_reconstruction( unsigned int set, unsigned int slice)
+  {
+    //GADGET_DEBUG1("Calculating dcw for reconstruction\n");
+    
+    switch(mode_){
+      
+    case 0:
+    case 1:
+      host_weights_recon_[set*slices_+slice] = *compute_radial_dcw_fixed_angle_2d<float>
+        ( samples_per_profile_, profiles_per_frame_[set*slices_+slice], oversampling_factor_, 
+          1.0f/(float(samples_per_profile_)/float(image_dimensions_recon_[0])) )->to_host();
+      break;
+      
+    case 2:
+    case 3:
+      host_weights_recon_[set*slices_+slice] = *compute_radial_dcw_golden_ratio_2d<float>
+        ( samples_per_profile_, profiles_per_frame_[set*slices_+slice], oversampling_factor_, 
+          1.0f/(float(samples_per_profile_)/float(image_dimensions_recon_[0])),0,
+          (mode_==2) ? GR_ORIGINAL : GR_SMALLEST )->to_host();
+      break;
+      
+    default:
+      GADGET_DEBUG1("Illegal dcw mode\n");
+      return GADGET_FAIL;
+      break;
+    }
+    return GADGET_OK;
+  }
+  
+  boost::shared_ptr< cuNDArray<floatd2> > 
+  gpuRadialPrepGadget::calculate_trajectory_for_frame(long profile_offset, unsigned int set, unsigned int slice)
+  {
+    //GADGET_DEBUG1("Calculating trajectory for buffer frame\n");
+
+    boost::shared_ptr< cuNDArray<floatd2> > result;
+
+    switch(mode_){
+
+    case 0:
+    case 1:
+      {
+        long local_frame = (profile_offset/profiles_per_frame_[set*slices_+slice])%frames_per_rotation_[set*slices_+slice];
+        float angular_offset = M_PI/float(profiles_per_frame_[set*slices_+slice])*float(local_frame)/float(frames_per_rotation_[set*slices_+slice]);	  
+
+        result = compute_radial_trajectory_fixed_angle_2d<float>
+          ( samples_per_profile_, profiles_per_frame_[set*slices_+slice], 1, angular_offset );  
+      }
+      break;
+	
+    case 2:
+    case 3:
+      { 
+        unsigned int first_profile_in_buffer = std::max(0L, profile_offset-profiles_per_frame_[set*slices_+slice]+1);
+        result = compute_radial_trajectory_golden_ratio_2d<float>
+          ( samples_per_profile_, profiles_per_frame_[set*slices_+slice], 1, first_profile_in_buffer,
+            (mode_==2) ? GR_ORIGINAL : GR_SMALLEST );
+      }
+      break;	
+	
+    default:
+      GADGET_DEBUG1("Illegal trajectory mode\n");
+      break;
+    }
+    
+    return result;
+  }
+
+  boost::shared_ptr< cuNDArray<float> >
+  gpuRadialPrepGadget::calculate_density_compensation_for_frame(unsigned int set, unsigned int slice)
+  {    
+    //GADGET_DEBUG1("Calculating dcw for buffer frame\n");
+
+    switch(mode_){
+      
+    case 0:
+    case 1:
+      return compute_radial_dcw_fixed_angle_2d<float>
+        ( samples_per_profile_, profiles_per_frame_[set*slices_+slice], oversampling_factor_, 1.0f/(float(samples_per_profile_)/float(image_dimensions_recon_[0])) );
+      break;
+      
+    case 2:
+    case 3:
+      return compute_radial_dcw_golden_ratio_2d<float>
+        ( samples_per_profile_, profiles_per_frame_[set*slices_+slice], oversampling_factor_, 
+          1.0f/(float(samples_per_profile_)/float(image_dimensions_recon_[0])),0,
+          (mode_==2) ? GR_ORIGINAL : GR_SMALLEST );
+      break;
+      
+    default:
+      GADGET_DEBUG1("Illegal dcw mode\n");
+      return boost::shared_ptr< cuNDArray<float> >();
+      break;
+    }   
+  }
+
+
+  boost::shared_ptr< cuNDArray<floatd2> > 
+  gpuRadialPrepGadget::calculate_trajectory_for_rhs(long profile_offset, unsigned int set, unsigned int slice)
+  {
+    //GADGET_DEBUG1("Calculating trajectory for rhs\n");
+
+    switch(mode_){
+
+    case 0:
+    case 1:
+      return compute_radial_trajectory_fixed_angle_2d<float>
+        ( samples_per_profile_, profiles_per_frame_[set*slices_+slice]*buffer_frames_per_rotation_[set*slices_+slice], 1 );
+      break;
+	
+    case 2:
+    case 3:
+      { 
+        unsigned int first_profile = 
+          std::max(0L, profile_offset-profiles_per_frame_[set*slices_+slice]*
+                   buffer_frames_per_rotation_[set*slices_+slice]*
+                   buffer_length_in_rotations_+1);
+
+        return compute_radial_trajectory_golden_ratio_2d<float>
+          ( samples_per_profile_, 
+            profiles_per_frame_[set*slices_+slice]*
+            buffer_frames_per_rotation_[set*slices_+slice]*buffer_length_in_rotations_, 
+            1, first_profile,
+            (mode_==2) ? GR_ORIGINAL : GR_SMALLEST );
+      }
+      break;	
+	
+    default:
+      GADGET_DEBUG1("Illegal trajectory mode\n");
+      return boost::shared_ptr< cuNDArray<floatd2> >();
+      break;
+    }
+  }
+  
+  boost::shared_ptr< cuNDArray<float> >
+  gpuRadialPrepGadget::calculate_density_compensation_for_rhs(unsigned int set, unsigned int slice)
+  {
+    //GADGET_DEBUG1("Calculating dcw for rhs\n");
+    
+    switch(mode_){
+      
+    case 0:
+    case 1:
+      {
+        unsigned int num_profiles = 
+          profiles_per_frame_[set*slices_+slice]*buffer_frames_per_rotation_[set*slices_+slice];
+
+        return compute_radial_dcw_fixed_angle_2d<float>
+          ( samples_per_profile_, num_profiles, oversampling_factor_, 
+            1.0f/(float(samples_per_profile_)/float(image_dimensions_recon_[0])) );
+      }
+      break;
+      
+    case 2:
+    case 3:
+      {
+        unsigned int num_profiles = 
+          profiles_per_frame_[set*slices_+slice]*buffer_frames_per_rotation_[set*slices_+slice]*buffer_length_in_rotations_;
+
+        return compute_radial_dcw_golden_ratio_2d<float>
+          ( samples_per_profile_, num_profiles, oversampling_factor_, 
+            1.0f/(float(samples_per_profile_)/float(image_dimensions_recon_[0])),0,
+            (mode_==2) ? GR_ORIGINAL : GR_SMALLEST );
+      }
+      break;
+      
+    default:
+      GADGET_DEBUG1("Illegal dcw mode\n");
+      return boost::shared_ptr< cuNDArray<float> >();
+      break;
+    }
+  }
+
+  boost::shared_ptr< hoNDArray<float_complext> > gpuRadialPrepGadget::
+  extract_samples_from_queue( ACE_Message_Queue<ACE_MT_SYNCH> *queue, bool sliding_window,
+                              unsigned int set, unsigned int slice )
+  {    
+    //GADGET_DEBUG1("Emptying queue...\n");
+
+    unsigned int profiles_buffered = queue->message_count();
+    
+    std::vector<size_t> dims;
+    dims.push_back(samples_per_profile_*profiles_buffered);
+    dims.push_back(num_coils_[set*slices_+slice]);
+    
+    boost::shared_ptr< hoNDArray<float_complext> > host_samples(new hoNDArray<float_complext>(&dims));
+    
+    for (unsigned int p=0; p<profiles_buffered; p++) {
+
+      ACE_Message_Block* mbq;
+      if (queue->dequeue_head(mbq) < 0) {
+        GADGET_DEBUG1("Message dequeue failed\n");
+        return boost::shared_ptr< hoNDArray<float_complext> >();
+      }
+      
+      GadgetContainerMessage< hoNDArray< std::complex<float> > > *daq = AsContainerMessage<hoNDArray< std::complex<float> > >(mbq);
+	
+      if (!daq) {
+        GADGET_DEBUG1("Unable to interpret data on message queue\n");
+        return boost::shared_ptr< hoNDArray<float_complext> >();
+      }
+	
+      for (unsigned int c = 0; c < num_coils_[set*slices_+slice]; c++) {
+	
+        float_complext *data_ptr = host_samples->get_data_ptr();
+        data_ptr += c*samples_per_profile_*profiles_buffered+p*samples_per_profile_;
+	    
+        std::complex<float> *r_ptr = daq->getObjectPtr()->get_data_ptr();
+        r_ptr += c*daq->getObjectPtr()->get_size(0);
+	  
+        memcpy(data_ptr,r_ptr,samples_per_profile_*sizeof(float_complext));
+      }
+
+      // In sliding window mode the profile might need to go back at the end of the queue
+      // 
+      
+      long profiles_in_sliding_window = sliding_window_profiles_ + 
+        profiles_per_frame_[set*slices_+slice]*frames_per_rotation_[set*slices_+slice]*sliding_window_rotations_;
+
+      if( sliding_window && p >= (profiles_buffered-profiles_in_sliding_window) )
+        queue->enqueue_tail(mbq);
+      else
+        mbq->release();
+    } 
+    
+    return host_samples;
+  }
+  
+  GadgetContainerMessage< hoNDArray< std::complex<float> > >*
+  gpuRadialPrepGadget::duplicate_profile( GadgetContainerMessage< hoNDArray< std::complex<float> > > *profile )
+  {
+    GadgetContainerMessage< hoNDArray< std::complex<float> > > *copy = 
+      new GadgetContainerMessage< hoNDArray< std::complex<float> > >();
+    
+    *copy->getObjectPtr() = *profile->getObjectPtr();
+    
+    return copy;
+  }
+
+  void gpuRadialPrepGadget::reconfigure(unsigned int set, unsigned int slice, bool use_dcw)
+  {    
+    GADGET_DEBUG2("\nReconfiguring:\n#profiles/frame:%d\n#frames/rotation: %d\n#rotations/reconstruction:%d\n", 
+                  profiles_per_frame_[set*slices_+slice], frames_per_rotation_[set*slices_+slice], rotations_per_reconstruction_);
+
+    calculate_trajectory_for_reconstruction(0, set, slice);
+    calculate_density_compensation_for_reconstruction(set, slice);
+    
+    buffer_frames_per_rotation_[set*slices_+slice] = get_int_value(std::string("buffer_frames_per_rotation").c_str());
+
+    if( buffer_frames_per_rotation_[set*slices_+slice] == 0 ){
+      if( mode_ == 2 || mode_ == 3 )
+        buffer_frames_per_rotation_[set*slices_+slice] = 
+          image_dimensions_recon_os_[0]/profiles_per_frame_[set*slices_+slice];
+      else
+        buffer_frames_per_rotation_[set*slices_+slice] = frames_per_rotation_[set*slices_+slice];
+    }
+    
+    cuBuffer<float,2> *acc_buffer = get_buffer_ptr(set*slices_+slice);
+
+    acc_buffer->setup( from_std_vector<size_t,2>(image_dimensions_recon_), image_dimensions_recon_os_, 
+                       kernel_width_, num_coils_[set*slices_+slice], 
+                       buffer_length_in_rotations_, buffer_frames_per_rotation_[set*slices_+slice] );
+    
+    if(use_dcw){
+      boost::shared_ptr< cuNDArray<float> > device_weights_frame = calculate_density_compensation_for_frame(set, slice);
+      acc_buffer->set_dcw(device_weights_frame);
+    }
+
+    reconfigure_[set*slices_+slice] = false;
+  }
+}
diff --git a/gadgets/radial/gpuRadialPrepGadget.h b/gadgets/radial/gpuRadialPrepGadget.h
new file mode 100644
index 0000000..4a6f03c
--- /dev/null
+++ b/gadgets/radial/gpuRadialPrepGadget.h
@@ -0,0 +1,207 @@
+#pragma once
+
+#include "gadgetron_radial_export.h"
+#include "Gadget.h"
+#include "GadgetMRIHeaders.h"
+#include "hoNDArray.h"
+#include "vector_td.h"
+#include "cuNFFT.h"
+#include "cuCgPreconditioner.h"
+#include "cuBuffer.h"
+#include "cuSenseBufferCg.h"
+#include "cuSpiritBuffer.h"
+
+#include <ismrmrd/ismrmrd.h>
+#include <complex>
+#include <boost/shared_ptr.hpp>
+#include <boost/shared_array.hpp>
+
+/*
+  ------------------------------------------
+  Trajectory modes for radial reconstruction
+  ------------------------------------------
+  
+  Mode 0 and Mode 1 are variants of 'fixed' radial trajectories with interframe rotation.
+  Mode 2 and Mode 3 denote radial trajectories with golden ratio based angular profile spacings.
+  
+  Let 
+  'i' denote the number of profiles per (undersampled) frame
+  'j' denote the number of frames per trajectory rotation (to obtain a fully sampled acquisition)
+  'h' denote a variable of type ISMRMRD::AcquisitionHeader
+
+  It is possible to explicitly set 'i' and 'j' in the Gadgetron configuration file.
+  For some modes this is (partly) required, 
+  for others they will be automatically determined from the incoming profile headers.
+  
+  Mode 0:
+  -------
+  For each rotation cycle profiles are numbered using the scheme
+
+    0+0*j,0+1*j,0+2*j,...,0+(i-1)*j, (1st frame)
+    1+0*j,1+1*j,1+2*j,...,1+(i-1)*j, (2nd frame)
+    2+0*j,2+1*j,2+2*j,...,2+(i-1)*j, (3rd frame)
+    ...,
+    (j-1)+0*j,(j-1)+1*j,(j-1)+2*j,...,(j-1)+(i-1)*j
+
+  as given in h.idx.kspace_encode_step_1.
+  Both 'i' and 'j' are automatically derived and thus need not be explicitly specified in a configuration file.
+  For mode 0 both 'i' and 'j' can be changed dynamically as desired e.g. for real-time imaging.
+
+  Mode 1:
+  -------
+  Profiles are numbered 0,1,2,...,i-1, 0,1,2,...,i-1, ... as given in h.idx.kspace_encode_step_1.
+  'j' is estimated as 'matrix_size'/'i' and should be explicitly set in the configuration file if this is not the case, e.g.:
+  <property><name>frames_per_rotation</name><value>8</value></property>
+      
+
+  Mode 2 and Mode 3:
+  -------
+  Profiles are numbered 
+  0,1,2,...,i-1, 0,1,2,...,i-1, 0,1,2,...,i-1, ...
+  or
+  0,1,2,...,i-1, i,i+1,i+2,...,2*i-1, 2*i,2*i+1,2*i+2,3*i-1, ...
+  as given in h.idx.kspace_encode_step_1.
+  'i' should be explicitly specified in the Gadgetron configuration file, e.g.:
+  <property><name>profiles_per_frame</name><value>32</value></property>
+  If not it defaults to i=32.
+  'j' is explicitly set to '1' even if specified in the configuration file.
+*/
+
+namespace Gadgetron{
+
+  class EXPORTGADGETS_RADIAL gpuRadialPrepGadget :
+    public Gadget2< ISMRMRD::AcquisitionHeader, hoNDArray< std::complex<float> > >
+  {
+
+  public:
+
+    gpuRadialPrepGadget();
+    virtual ~gpuRadialPrepGadget();
+
+  protected:
+    
+    virtual int process_config(ACE_Message_Block *mb);
+
+    virtual int process(GadgetContainerMessage< ISMRMRD::AcquisitionHeader > *m1,
+			GadgetContainerMessage< hoNDArray< std::complex<float> > > *m2);
+
+    virtual void reconfigure(unsigned int set, unsigned int slice, bool use_dcw = true);
+
+    virtual boost::shared_ptr< hoNDArray<float_complext> > compute_csm( unsigned int buffer_idx ) = 0;
+
+    virtual boost::shared_ptr< hoNDArray<float_complext> > compute_reg
+      ( unsigned int set, unsigned int slice, bool new_frame ) = 0;
+    
+    virtual void allocate_accumulation_buffer( unsigned int num_buffers ) = 0;
+    
+    boost::shared_array<bool> reconfigure_;
+
+    GadgetContainerMessage< hoNDArray< std::complex<float> > >*
+      duplicate_profile( GadgetContainerMessage< hoNDArray< std::complex<float> > > *profile );
+
+    boost::shared_ptr< hoNDArray<float_complext> > 
+      extract_samples_from_queue( ACE_Message_Queue<ACE_MT_SYNCH> *queue,
+				  bool acknowledge_sliding_window,
+				  unsigned int set, unsigned int slice );
+
+    // Compute trajectory/dcw for a reconstruction (to store internally)
+    //
+
+    int calculate_trajectory_for_reconstruction(long profile_offset, unsigned int set, unsigned int slice);
+    int calculate_density_compensation_for_reconstruction(unsigned int set, unsigned int slice);
+
+    // Compute trajectory/dcw for adding (usually undersampled) frames to the accumulation buffer
+    //
+
+    boost::shared_ptr< cuNDArray<floatd2> > 
+      calculate_trajectory_for_frame(long profile_offset, unsigned int set, unsigned int slice);
+
+    boost::shared_ptr< cuNDArray<float> >
+      calculate_density_compensation_for_frame(unsigned int set, unsigned int slice);
+
+    // Compute trajectory/dcw for the fully sampled accumulation buffer (iterative buffer mode only)
+    //
+
+    boost::shared_ptr< cuNDArray<floatd2> > 
+      calculate_trajectory_for_rhs(long profile_offset, unsigned int set, unsigned int slice);
+
+    boost::shared_ptr< cuNDArray<float> > 
+      calculate_density_compensation_for_rhs(unsigned int set, unsigned int slice);
+
+    int slices_;
+    int sets_;
+    int device_number_;
+    int mode_; // See note above
+    long samples_per_profile_;
+
+    boost::shared_array<long> image_counter_;
+    boost::shared_array<long> profiles_per_frame_;  // for an undersampled frame
+    boost::shared_array<long> frames_per_rotation_; // representing a fully sampled frame
+
+    // The number of rotations to batch per reconstruction. 
+    // Set to '0' to reconstruct frames individually.
+    long rotations_per_reconstruction_; 
+
+    // The number of buffer cycles
+    long buffer_length_in_rotations_; 
+
+    boost::shared_array<long> buffer_frames_per_rotation_; // the number of buffer subcycles
+
+    // Internal book-keping
+    boost::shared_array<long> previous_profile_;
+    boost::shared_array<long> profiles_counter_frame_;
+    boost::shared_array<long> profiles_counter_global_;
+
+    long sliding_window_profiles_;
+    long sliding_window_rotations_;
+
+    float kernel_width_;
+    float oversampling_factor_;
+
+    boost::shared_array<unsigned int> num_coils_;
+
+    boost::shared_array<float[3]> position_;
+    boost::shared_array<float[3]> read_dir_;
+    boost::shared_array<float[3]> phase_dir_;
+    boost::shared_array<float[3]> slice_dir_;
+
+    bool output_timing_;
+    bool buffer_using_solver_;
+
+    boost::shared_array<bool> buffer_update_needed_;
+
+    boost::shared_array< hoNDArray<floatd2> > host_traj_recon_;
+    boost::shared_array< hoNDArray<float> > host_weights_recon_;
+    
+    boost::shared_array< hoNDArray<float_complext> > csm_host_;
+    boost::shared_array< hoNDArray<float_complext> > reg_host_;
+    
+    // We would like to make a single array of the buffer base class
+    // but encounter yet unexplainable heap corruptions if we do.
+    // Hence this workaround:
+    //boost::shared_array< cuBuffer<float,2> > acc_buffer_;
+    boost::shared_array< cuSenseBuffer<float,2> > acc_buffer_sense_;
+    boost::shared_array< cuSenseBufferCg<float,2> > acc_buffer_sense_cg_;
+    boost::shared_array< cuSpiritBuffer<float,2> > acc_buffer_spirit_;
+    virtual cuBuffer<float,2>* get_buffer_ptr(int idx) = 0;
+    // <-- end of workaround
+
+    std::vector<size_t> fov_;
+    std::vector<size_t> image_dimensions_;
+    std::vector<size_t> image_dimensions_recon_;
+    uint64d2 image_dimensions_recon_os_;
+
+    boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> > frame_profiles_queue_;
+    boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> > recon_profiles_queue_;
+    boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> > image_headers_queue_;
+
+  private:
+
+    inline bool vec_equal(float *in1, float *in2) {
+      for (unsigned int i = 0; i < 3; i++) {
+        if (in1[i] != in2[i]) return false;
+      }
+      return true;
+    }   
+  };
+}
diff --git a/gadgets/radial/gpuRadialSensePrepGadget.cpp b/gadgets/radial/gpuRadialSensePrepGadget.cpp
index 0b1a911..28aa7d0 100644
--- a/gadgets/radial/gpuRadialSensePrepGadget.cpp
+++ b/gadgets/radial/gpuRadialSensePrepGadget.cpp
@@ -1,997 +1,88 @@
 #include "gpuRadialSensePrepGadget.h"
-#include "Gadgetron.h"
-#include "GadgetIsmrmrdReadWrite.h"
-#include "cuNonCartesianSenseOperator.h"
-#include "SenseJob.h"
-#include "cuNDArray_elemwise.h"
-#include "cuNDArray_utils.h"
-#include "vector_td_operators.h"
 #include "b1_map.h"
-#include "GPUTimer.h"
-#include "check_CUDA.h"
-#include "radial_utilities.h"
-#include "hoNDArray_fileio.h"
-
-#include <algorithm>
-#include <vector>
-#include <cmath>
+#include "cuSenseBufferCg.h"
 
 namespace Gadgetron{
 
-  gpuRadialSensePrepGadget::gpuRadialSensePrepGadget()
-    : slices_(-1)
-    , sets_(-1)
-    , device_number_(-1)
-    , mode_(-1)
-    , samples_per_profile_(-1)
-  {
-    // Set some default values in case the config does not contain a specification
+  boost::shared_ptr< hoNDArray<float_complext> > 
+  gpuRadialSensePrepGadget::compute_csm( unsigned int idx )
+  {    
+    // Estimate and update csm related data structures
     //
-
-    set_parameter(std::string("mode").c_str(), "0");
-    set_parameter(std::string("deviceno").c_str(), "0");
-    set_parameter(std::string("buffer_length_in_rotations").c_str(), "1");
-    set_parameter(std::string("buffer_using_solver").c_str(), "false");
-    set_parameter(std::string("buffer_convolution_kernel_width").c_str(), "5.5");
-    set_parameter(std::string("buffer_convolution_oversampling_factor").c_str(), "1.25");
-    set_parameter(std::string("rotations_per_reconstruction").c_str(), "0");
-    set_parameter(std::string("reconstruction_os_factor_x").c_str(), "1.0");
-    set_parameter(std::string("reconstruction_os_factor_y").c_str(), "1.0");
-  }
   
-  gpuRadialSensePrepGadget::~gpuRadialSensePrepGadget() {}
+    cuSenseBuffer<float,2> *acc_buffer = 
+      (this->buffer_using_solver_) ? &this->acc_buffer_sense_cg_[idx] : &this->acc_buffer_sense_[idx];
   
-  int gpuRadialSensePrepGadget::process_config(ACE_Message_Block* mb)
-  {
-    //GADGET_DEBUG1("gpuRadialSensePrepGadget::process_config\n");
-
-    // Get configuration values from config file
-    //
-
-    mode_ = get_int_value(std::string("mode").c_str());
-    device_number_ = get_int_value(std::string("deviceno").c_str());
-    rotations_per_reconstruction_ = get_int_value(std::string("rotations_per_reconstruction").c_str());
-    buffer_length_in_rotations_ = get_int_value(std::string("buffer_length_in_rotations").c_str());
-    buffer_using_solver_ = get_bool_value(std::string("buffer_using_solver").c_str());
-    output_timing_ = get_bool_value(std::string("output_timing").c_str());
-
-    // Currently there are some restrictions on the allowed sliding window configurations
-    //
-    
-    sliding_window_profiles_ = get_int_value(std::string("sliding_window_profiles").c_str());
-    sliding_window_rotations_ = get_int_value(std::string("sliding_window_rotations").c_str());
-
-    if( sliding_window_profiles_>0 && sliding_window_rotations_>0 ){
-      GADGET_DEBUG1( "Error: Sliding window reconstruction is not yet supported for both profiles and frames simultaneously.\n" );
-      return GADGET_FAIL;
-    }
-
-    if( sliding_window_profiles_>0 && rotations_per_reconstruction_>0 ){
-      GADGET_DEBUG1( "Error: Sliding window reconstruction over profiles is not yet supported for multiframe reconstructions.\n" );
-      return GADGET_FAIL;
-    }
-    
-    if( sliding_window_rotations_ > 0 && sliding_window_rotations_ >= rotations_per_reconstruction_ ){
-      GADGET_DEBUG1( "Error: Illegal sliding window configuration.\n" );
-      return GADGET_FAIL;
-    }
-
-    // Setup and validate device configuration
-    //
-
-    int number_of_devices;
-    if (cudaGetDeviceCount(&number_of_devices)!= cudaSuccess) {
-      GADGET_DEBUG1( "Error: unable to query number of CUDA devices.\n" );
-      return GADGET_FAIL;
-    }
-
-    if (number_of_devices == 0) {
-      GADGET_DEBUG1( "Error: No available CUDA devices.\n" );
-      return GADGET_FAIL;
-    }
-
-    if (device_number_ >= number_of_devices) {
-      GADGET_DEBUG2("Adjusting device number from %d to %d\n", device_number_,  (device_number_%number_of_devices));
-      device_number_ = (device_number_%number_of_devices);
-    }
-
-    if (cudaSetDevice(device_number_)!= cudaSuccess) {
-      GADGET_DEBUG1( "Error: unable to set CUDA device.\n" );
-      return GADGET_FAIL;
-    }
-
-    cudaDeviceProp deviceProp;
-    if( cudaGetDeviceProperties( &deviceProp, device_number_ ) != cudaSuccess) {
-      GADGET_DEBUG1( "Error: unable to query device properties.\n" );
-      return GADGET_FAIL;
-    }
+    boost::shared_ptr< cuNDArray<float_complext> > csm_data = 
+      acc_buffer->get_accumulated_coil_images();
     
-    unsigned int warp_size = deviceProp.warpSize;
-
-    // Convolution kernel width and oversampling ratio (for the buffer)
-    //
-
-    kernel_width_ = get_double_value(std::string("buffer_convolution_kernel_width").c_str());
-    oversampling_factor_ = get_double_value(std::string("buffer_convolution_oversampling_factor").c_str());
-
-    // Get the Ismrmrd header
-    //
-
-    boost::shared_ptr<ISMRMRD::ismrmrdHeader> cfg = parseIsmrmrdXMLHeader(std::string(mb->rd_ptr()));
-    
-    if( cfg.get() == 0x0 ){
-      GADGET_DEBUG1("Unable to parse Ismrmrd header\n");
-      return GADGET_FAIL;
-    }
-
-    ISMRMRD::ismrmrdHeader::encoding_sequence e_seq = cfg->encoding();
-
-    if (e_seq.size() != 1) {
-      GADGET_DEBUG2("Number of encoding spaces: %d\n", e_seq.size());
-      GADGET_DEBUG1("This Gadget only supports one encoding space\n");
-      return GADGET_FAIL;
-    }
-    
-    ISMRMRD::encodingSpaceType e_space = (*e_seq.begin()).encodedSpace();
-    ISMRMRD::encodingSpaceType r_space = (*e_seq.begin()).reconSpace();
-    ISMRMRD::encodingLimitsType e_limits = (*e_seq.begin()).encodingLimits();
-
-    // Matrix sizes (as a multiple of the GPU's warp size)
-    //
-    
-    image_dimensions_.push_back(((e_space.matrixSize().x()+warp_size-1)/warp_size)*warp_size);
-    image_dimensions_.push_back(((e_space.matrixSize().y()+warp_size-1)/warp_size)*warp_size);
-
-    image_dimensions_recon_.push_back(((static_cast<unsigned int>(std::ceil(e_space.matrixSize().x()*get_double_value(std::string("reconstruction_os_factor_x").c_str())))+warp_size-1)/warp_size)*warp_size);  
-    image_dimensions_recon_.push_back(((static_cast<unsigned int>(std::ceil(e_space.matrixSize().y()*get_double_value(std::string("reconstruction_os_factor_y").c_str())))+warp_size-1)/warp_size)*warp_size);
-    
-    image_dimensions_recon_os_ = uint64d2
-      (((static_cast<unsigned int>(std::ceil(image_dimensions_recon_[0]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size,
-       ((static_cast<unsigned int>(std::ceil(image_dimensions_recon_[1]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size);
-    
-    // In case the warp_size constraint kicked in
-    oversampling_factor_ = float(image_dimensions_recon_os_[0])/float(image_dimensions_recon_[0]); 
-    
-    GADGET_DEBUG2("matrix_size_x : %d, recon: %d, recon_os: %d\n", 
-                  image_dimensions_[0], image_dimensions_recon_[0], image_dimensions_recon_os_[0]);
-
-    GADGET_DEBUG2("matrix_size_y : %d, recon: %d, recon_os: %d\n", 
-                  image_dimensions_[1], image_dimensions_recon_[1], image_dimensions_recon_os_[1]);
-    
-    fov_.push_back(r_space.fieldOfView_mm().x());
-    fov_.push_back(r_space.fieldOfView_mm().y());
-    fov_.push_back(r_space.fieldOfView_mm().z());
-
-    slices_ = e_limits.slice().present() ? e_limits.slice().get().maximum() + 1 : 1;
-    sets_ = e_limits.set().present() ? e_limits.set().get().maximum() + 1 : 1;
-    
-    // Allocate profile queues
-    // - one queue for the currently incoming frame
-    // - one queue for the next reconstruction
-
-    frame_profiles_queue_ = boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> >(new ACE_Message_Queue<ACE_MT_SYNCH>[slices_*sets_]);
-    recon_profiles_queue_ = boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> >(new ACE_Message_Queue<ACE_MT_SYNCH>[slices_*sets_]);
-    image_headers_queue_ = boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> >(new ACE_Message_Queue<ACE_MT_SYNCH>[slices_*sets_]);
-
-    size_t bsize = sizeof(GadgetContainerMessage< hoNDArray< std::complex<float> > >)*image_dimensions_[0]*10;
-
-    for( unsigned int i=0; i<slices_*sets_; i++ ){
-      frame_profiles_queue_[i].high_water_mark(bsize);
-      frame_profiles_queue_[i].low_water_mark(bsize);
+    if( !csm_data.get() ){
+      GADGET_DEBUG1("Error during accumulation buffer computation\n");
+      return boost::shared_ptr< hoNDArray<float_complext> >();
     }
     
-    bsize *= (rotations_per_reconstruction_+1);
-    
-    for( unsigned int i=0; i<slices_*sets_; i++ ){
-      recon_profiles_queue_[i].high_water_mark(bsize);
-      recon_profiles_queue_[i].low_water_mark(bsize);
-    }
-
-    // Define some profile counters for book-keeping
-    //
-
-    previous_profile_ = boost::shared_array<long>(new long[slices_*sets_]);
-    image_counter_ = boost::shared_array<long>(new long[slices_*sets_]);
-    profiles_counter_frame_= boost::shared_array<long>(new long[slices_*sets_]);
-    profiles_counter_global_= boost::shared_array<long>(new long[slices_*sets_]);
-    profiles_per_frame_= boost::shared_array<long>(new long[slices_*sets_]);
-    frames_per_rotation_= boost::shared_array<long>(new long[slices_*sets_]);
-    buffer_frames_per_rotation_= boost::shared_array<long>(new long[slices_*sets_]);
-    buffer_update_needed_ = boost::shared_array<bool>(new bool[slices_*sets_]);
-    reconfigure_ = boost::shared_array<bool>(new bool[slices_*sets_]);
-    num_coils_ = boost::shared_array<unsigned int>(new unsigned int[slices_*sets_]);
-    
-    if( !previous_profile_.get() ||
-        !image_counter_.get() || 
-        !profiles_counter_frame_.get() ||
-        !profiles_counter_global_.get() ||
-        !profiles_per_frame_.get() || 
-        !frames_per_rotation_.get() ||
-        !buffer_frames_per_rotation_.get() ||
-        !buffer_update_needed_.get() ||
-        !num_coils_.get() ||
-        !reconfigure_ ){
-      GADGET_DEBUG1("Failed to allocate host memory (1)\n");
-      return GADGET_FAIL;
-    }
-
-    for( unsigned int i=0; i<slices_*sets_; i++ ){
-
-      previous_profile_[i] = -1;
-      image_counter_[i] = 0;
-      profiles_counter_frame_[i] = 0;
-      profiles_counter_global_[i] = 0;
-      profiles_per_frame_[i] = get_int_value(std::string("profiles_per_frame").c_str());
-      frames_per_rotation_[i] = get_int_value(std::string("frames_per_rotation").c_str());
-      buffer_frames_per_rotation_[i] = get_int_value(std::string("buffer_frames_per_rotation").c_str());
-      num_coils_[i] = 0;
-      buffer_update_needed_[i] = true;
-      reconfigure_[i] = true;
-
-      // Assign some default values ("upper bound estimates") of the (possibly) unknown entities
-      //
-      
-      if( profiles_per_frame_[i] == 0 ){
-        profiles_per_frame_[i] = image_dimensions_[0];
-      }
-      
-      if( frames_per_rotation_[i] == 0 ){
-        if( mode_ == 2 || mode_ == 3 ) // golden ratio
-          frames_per_rotation_[i] = 1;
-        else
-          frames_per_rotation_[i] = image_dimensions_[0]/profiles_per_frame_[i];
-      }
-
-      bsize = sizeof(GadgetContainerMessage<ISMRMRD::ImageHeader>)*100*
-        std::max(1L, frames_per_rotation_[i]*rotations_per_reconstruction_);
-    
-      image_headers_queue_[i].high_water_mark(bsize);
-      image_headers_queue_[i].low_water_mark(bsize);
-    }
-        
-    position_ = boost::shared_array<float[3]>(new float[slices_*sets_][3]);
-    read_dir_ = boost::shared_array<float[3]>(new float[slices_*sets_][3]);
-    phase_dir_ = boost::shared_array<float[3]>(new float[slices_*sets_][3]);
-    slice_dir_ = boost::shared_array<float[3]>(new float[slices_*sets_][3]);
-
-    if( !position_.get() || !read_dir_.get() || !phase_dir_.get() || !slice_dir_.get() ){
-      GADGET_DEBUG1("Failed to allocate host memory (2)\n");
-      return GADGET_FAIL;
-    }
-
-    for( unsigned int i=0; i<slices_*sets_; i++ ){
-      (position_[i])[0] = (position_[i])[1] = (position_[i])[2] = 0.0f;
-      (read_dir_[i])[0] = (read_dir_[i])[1] = (read_dir_[i])[2] = 0.0f;
-      (phase_dir_[i])[0] = (phase_dir_[i])[1] = (phase_dir_[i])[2] = 0.0f;
-      (slice_dir_[i])[0] = (slice_dir_[i])[1] = (slice_dir_[i])[2] = 0.0f;
-    }
-
-    // Allocate accumulation buffer
-    //
-
-    if( buffer_using_solver_ )
-      acc_buffer_cg_ = boost::shared_array< cuSenseBufferCg<float,2> >(new cuSenseBufferCg<float,2>[slices_*sets_]);
-    else
-      acc_buffer_ = boost::shared_array< cuSenseBuffer<float,2> >(new cuSenseBuffer<float,2>[slices_*sets_]);
-    
-    // Allocate remaining shared_arrays
-    //
+    boost::shared_ptr< cuNDArray<float_complext> > csm = 
+      estimate_b1_map<float,2>( csm_data.get() );
+  
+    if( !csm.get() ){
+      GADGET_DEBUG1("Error during coil estimation\n");
+      return boost::shared_ptr< hoNDArray<float_complext> >();
+    }            
     
-    csm_host_ = boost::shared_array< hoNDArray<float_complext> >(new hoNDArray<float_complext>[slices_*sets_]);
-    reg_host_ = boost::shared_array< hoNDArray<float_complext> >(new hoNDArray<float_complext>[slices_*sets_]);
-
-    host_traj_recon_ = boost::shared_array< hoNDArray<floatd2> >(new hoNDArray<floatd2>[slices_*sets_]);
-    host_weights_recon_ = boost::shared_array< hoNDArray<float> >(new hoNDArray<float>[slices_*sets_]);
-
-    if( !csm_host_.get() || !reg_host_.get() || !host_traj_recon_.get() || !host_weights_recon_ ){
-      GADGET_DEBUG1("Failed to allocate host memory (3)\n");
-      return GADGET_FAIL;
-    }
-
-    return GADGET_OK;
+    acc_buffer->set_csm(csm);
+    return csm->to_host(); 
   }
-
-  int gpuRadialSensePrepGadget::
-  process(GadgetContainerMessage<ISMRMRD::AcquisitionHeader> *m1,
-          GadgetContainerMessage< hoNDArray< std::complex<float> > > *m2)
-  {
-    // Noise should have been consumed by the noise adjust (if in the gadget chain)
-    //
-    
-    bool is_noise = ISMRMRD::FlagBit(ISMRMRD::ACQ_IS_NOISE_MEASUREMENT).isSet(m1->getObjectPtr()->flags);
-    if (is_noise) { 
-      m1->release();
-      return GADGET_OK;
-    }
-
-    unsigned int profile = m1->getObjectPtr()->idx.kspace_encode_step_1;
-    unsigned int slice = m1->getObjectPtr()->idx.slice;
-    unsigned int set = m1->getObjectPtr()->idx.set;
-
-    // Get a pointer to the accumulation buffer. 
-    //
-
-    cuSenseBuffer<float,2> *acc_buffer = (buffer_using_solver_) ? &acc_buffer_cg_[set*slices_+slice] : &acc_buffer_[set*slices_+slice];
-
-    //GADGET_DEBUG1("gpuRadialSensePrepGadget::process\n");
-
-    boost::shared_ptr<GPUTimer> process_timer;
-    if( output_timing_ )
-      process_timer = boost::shared_ptr<GPUTimer>( new GPUTimer("gpuRadialSensePrepGadget::process()") );
-
-    // Have the imaging plane changed?
-    //
-
-    if( !vec_equal(position_[set*slices_+slice], m1->getObjectPtr()->position) ||
-        !vec_equal(read_dir_[set*slices_+slice], m1->getObjectPtr()->read_dir) || 
-        !vec_equal(phase_dir_[set*slices_+slice], m1->getObjectPtr()->phase_dir) ||
-        !vec_equal(slice_dir_[set*slices_+slice], m1->getObjectPtr()->slice_dir) ){
-      
-      // Yes indeed, clear the accumulation buffer
-      acc_buffer->clear();
-      buffer_update_needed_[set*slices_+slice] = true;
-      
-      memcpy(position_[set*slices_+slice],m1->getObjectPtr()->position,3*sizeof(float));
-      memcpy(read_dir_[set*slices_+slice],m1->getObjectPtr()->read_dir,3*sizeof(float));
-      memcpy(phase_dir_[set*slices_+slice],m1->getObjectPtr()->phase_dir,3*sizeof(float));
-      memcpy(slice_dir_[set*slices_+slice],m1->getObjectPtr()->slice_dir,3*sizeof(float));
-    }
-    
-    // Only when the first profile arrives, do we know the #samples/profile
-    //
-
-    if( samples_per_profile_ == -1 )      
-      samples_per_profile_ = m1->getObjectPtr()->number_of_samples;
-    
-    if( samples_per_profile_ != m1->getObjectPtr()->number_of_samples ){
-      GADGET_DEBUG1("Unexpected change in the incoming profiles' lengths\n");
-      return GADGET_FAIL;
-    }
-    
-    bool new_frame_detected = false;
-
-    // Reconfigure at first pass
-    // - or if the number of coil changes
-    // - or if the reconfigure_ flag is set
-
-    if( num_coils_[set*slices_+slice] != m1->getObjectPtr()->active_channels ){
-      GADGET_DEBUG1("Reconfiguring due to change in the number of coils\n");
-      num_coils_[set*slices_+slice] = m1->getObjectPtr()->active_channels;
-      reconfigure(set, slice);
-    }
-
-    if( reconfigure_[set*slices_+slice] ){
-      GADGET_DEBUG1("Reconfiguring due to boolean indicator\n");
-      reconfigure(set, slice);
-    }
-
-    // Keep track of the incoming profile ids (mode dependent)
-    // - to determine the number of profiles per frame
-    // - to determine the number of frames per rotation
-    //
-
-    if (previous_profile_[set*slices_+slice] >= 0) {
-
-      if ( profile > previous_profile_[set*slices_+slice]) { // this is not the last profile in the frame
-        if( mode_ == 0 && get_int_value(std::string("frames_per_rotation").c_str()) == 0 ){
-          unsigned int acceleration_factor = profile - previous_profile_[set*slices_+slice];
-          if( acceleration_factor != frames_per_rotation_[set*slices_+slice] ){
-            GADGET_DEBUG1("Reconfiguring due to change in acceleration factor\n");
-            frames_per_rotation_[set*slices_+slice] = acceleration_factor;
-            reconfigure(set, slice);
-          }
-        }
-      }
-      else{ // This is the first profile in a new frame
-        if( get_int_value(std::string("profiles_per_frame").c_str()) == 0 && // make sure the user did not specify a desired value for this variable
-            profiles_counter_frame_[set*slices_+slice] > 0 &&
-            profiles_counter_frame_[set*slices_+slice] != profiles_per_frame_[set*slices_+slice] ){ // a new acceleration factor is detected
-          GADGET_DEBUG1("Reconfiguring due to new slice detection\n");
-          new_frame_detected = true;
-          profiles_per_frame_[set*slices_+slice] = profiles_counter_frame_[set*slices_+slice];
-          if( mode_ == 1 && get_int_value(std::string("frames_per_rotation").c_str()) == 0 )
-            frames_per_rotation_[set*slices_+slice] = image_dimensions_[0]/profiles_per_frame_[set*slices_+slice];
-          reconfigure(set, slice);
-        }
-      }
-    }
-    previous_profile_[set*slices_+slice] = profile;
-
-    // Enqueue profile
-    // - if 'new_frame_detected' the current profile does not belong to the current frame and we delay enqueing
-
-    if( !new_frame_detected ) {
-      
-      // Memory handling is easier if we make copies for our internal queues
-      frame_profiles_queue_[set*slices_+slice].enqueue_tail(duplicate_profile(m2));
-      recon_profiles_queue_[set*slices_+slice].enqueue_tail(duplicate_profile(m2));
-    }
-
-    // If the profile is the last of a "true frame" (ignoring any sliding window profiles)
-    // - then update the accumulation buffer
-
-    bool is_last_profile_in_frame = (profiles_counter_frame_[set*slices_+slice] == profiles_per_frame_[set*slices_+slice]-1);
-    is_last_profile_in_frame |= new_frame_detected;
-
-    if( is_last_profile_in_frame ){
-
-      // Extract this frame's samples to update the csm/regularization buffer
-      //
-
-      boost::shared_ptr< hoNDArray<float_complext> > host_samples = 
-        extract_samples_from_queue( &frame_profiles_queue_[set*slices_+slice], false, set, slice );
-
-      if( host_samples.get() == 0x0 ){
-        GADGET_DEBUG1("Failed to extract frame data from queue\n");
-        return GADGET_FAIL;
-      }
-      
-      cuNDArray<float_complext> samples( host_samples.get() );
-      
-      long profile_offset = profiles_counter_global_[set*slices_+slice] - ((new_frame_detected) ? 1 : 0);
-      boost::shared_ptr< cuNDArray<floatd2> > traj = calculate_trajectory_for_frame(profile_offset, set, slice);
-
-      buffer_update_needed_[set*slices_+slice] |= acc_buffer->add_frame_data( &samples, traj.get() );
-    }
-    
-    // Are we ready to reconstruct (downstream)?
+  
+  boost::shared_ptr< hoNDArray<float_complext> > 
+  gpuRadialSensePrepGadget::compute_reg( unsigned int set, unsigned int slice, bool new_frame )
+  {    
+    // Estimate and update regularization image related data structures
     //
     
-    long profiles_per_reconstruction = profiles_per_frame_[set*slices_+slice];
-    
-    if( rotations_per_reconstruction_ > 0 )
-      profiles_per_reconstruction *= (frames_per_rotation_[set*slices_+slice]*rotations_per_reconstruction_);
-    
-    bool is_last_profile_in_reconstruction = ( recon_profiles_queue_[set*slices_+slice].message_count() == profiles_per_reconstruction );
-        
-    // Prepare the image header for this frame
-    // - if this is indeed the last profile of a new frame
-    // - or if we are about to reconstruct due to 'sliding_window_profiles_' > 0
-
-    if( is_last_profile_in_frame || 
-        (is_last_profile_in_reconstruction && image_headers_queue_[set*slices_+slice].message_count() == 0) ){
-      
-      GadgetContainerMessage<ISMRMRD::ImageHeader> *header = new GadgetContainerMessage<ISMRMRD::ImageHeader>();
-      ISMRMRD::AcquisitionHeader *base_head = m1->getObjectPtr();
-
-      {
-        // Initialize header to all zeroes (there is a few fields we do not set yet)
-        ISMRMRD::ImageHeader tmp = {0};
-        *(header->getObjectPtr()) = tmp;
-      }
-
-      header->getObjectPtr()->version = base_head->version;
-
-      header->getObjectPtr()->matrix_size[0] = image_dimensions_recon_[0];
-      header->getObjectPtr()->matrix_size[1] = image_dimensions_recon_[1];
-      header->getObjectPtr()->matrix_size[2] = std::max(1L,frames_per_rotation_[set*slices_+slice]*rotations_per_reconstruction_);
+    cuSenseBuffer<float,2> *acc_buffer = (this->buffer_using_solver_) ? 
+      &this->acc_buffer_sense_cg_[set*this->slices_+slice] : &this->acc_buffer_sense_[set*this->slices_+slice];
 
-      header->getObjectPtr()->field_of_view[0] = fov_[0];
-      header->getObjectPtr()->field_of_view[1] = fov_[1];
-      header->getObjectPtr()->field_of_view[2] = fov_[2];
-
-      header->getObjectPtr()->channels = num_coils_[set*slices_+slice];
-      header->getObjectPtr()->slice = base_head->idx.slice;
-      header->getObjectPtr()->set = base_head->idx.set;
-
-      header->getObjectPtr()->acquisition_time_stamp = base_head->acquisition_time_stamp;
-      memcpy(header->getObjectPtr()->physiology_time_stamp, base_head->physiology_time_stamp, sizeof(uint32_t)*ISMRMRD_PHYS_STAMPS);
-
-      memcpy(header->getObjectPtr()->position, base_head->position, sizeof(float)*3);
-      memcpy(header->getObjectPtr()->read_dir, base_head->read_dir, sizeof(float)*3);
-      memcpy(header->getObjectPtr()->phase_dir, base_head->phase_dir, sizeof(float)*3);
-      memcpy(header->getObjectPtr()->slice_dir, base_head->slice_dir, sizeof(float)*3);
-      memcpy(header->getObjectPtr()->patient_table_position, base_head->patient_table_position, sizeof(float)*3);
-
-      header->getObjectPtr()->image_data_type = ISMRMRD::DATA_COMPLEX_FLOAT;
-      header->getObjectPtr()->image_index = image_counter_[set*slices_+slice]++; 
-      header->getObjectPtr()->image_series_index = set*slices_+slice;
-
-      image_headers_queue_[set*slices_+slice].enqueue_tail(header);
+    if( buffer_using_solver_ && ( mode_ == 2 || mode_ == 3 ) ){
+      static_cast<cuSenseBufferCg<float,2>*>( acc_buffer )->preprocess
+        ( calculate_trajectory_for_rhs( this->profiles_counter_global_[set*this->slices_+slice] - ((new_frame) ? 1 : 0), set, slice).get());
     }
     
-    // If it is time to reconstruct (downstream) then prepare the Sense job
-    // 
-
-    if( is_last_profile_in_reconstruction ){
-      
-      // Update csm and regularization images if the buffer has changed (completed a cycle) 
-      // - and at the first pass
-      
-      if( buffer_update_needed_[set*slices_+slice] || 
-          csm_host_[set*slices_+slice].get_number_of_elements() == 0 || 
-          reg_host_[set*slices_+slice].get_number_of_elements() == 0 ){
-
-        // Get the accumulated coil images
-        //
-
-        boost::shared_ptr< cuNDArray<float_complext> > csm_data = acc_buffer->get_accumulated_coil_images();
-
-        if( !csm_data.get() ){
-          GADGET_DEBUG1("Error during accumulation buffer computation\n");
-          return GADGET_FAIL;
-        }            
-	
-        // Estimate CSM
-        //
-
-        boost::shared_ptr< cuNDArray<float_complext> > csm = estimate_b1_map<float,2>( csm_data.get() );
-
-        if( !csm.get() ){
-          GADGET_DEBUG1("Error during coil estimation\n");
-          return GADGET_FAIL;
-        }            
-
-        acc_buffer->set_csm(csm);
-        csm_host_[set*slices_+slice] = *(csm->to_host());
-	
-        // Compute regularization image
-        //
-
-        boost::shared_ptr< cuNDArray<float_complext> > reg_image;
-	
-        if( buffer_using_solver_ && ( mode_ == 2 || mode_ == 3 ) ){
-          ((cuSenseBufferCg<float,2>*)acc_buffer)->preprocess
-            ( calculate_trajectory_for_rhs( profiles_counter_global_[set*slices_+slice] - ((new_frame_detected) ? 1 : 0), set, slice).get());
-        }
-
-        reg_image = acc_buffer->get_combined_coil_image();
-	
-        if( !reg_image.get() ){
-          GADGET_DEBUG1("Error computing regularization image\n");
-          return GADGET_FAIL;
-        }            
-	
-        reg_host_[set*slices_+slice] = *(reg_image->to_host());
-		
-        /*
-          static int counter = 0;
-          char filename[256];
-          sprintf((char*)filename, "reg_%d.cplx", counter);
-          write_nd_array<float_complext>( reg_host_[set*slices_+slice].get(), filename );
-          counter++; */
-
-        buffer_update_needed_[set*slices_+slice] = false;
-      }
-
-      // Prepare data array of the profiles for the downstream reconstruction
-      //
-      
-      boost::shared_ptr< hoNDArray<float_complext> > samples_host = 
-        extract_samples_from_queue( &recon_profiles_queue_[set*slices_+slice], true, set, slice );
-      
-      if( samples_host.get() == 0x0 ){
-        GADGET_DEBUG1("Failed to extract frame data from queue\n");
-        return GADGET_FAIL;
-      }
-           
-      // The trajectory needs to be updated on the fly:
-      // - for golden ratio based acquisitions
-      // - when we are reconstructing frame-by-frame
-      
-      if( mode_ == 2 || mode_ == 3 || rotations_per_reconstruction_ == 0 ){
-        calculate_trajectory_for_reconstruction
-          ( profiles_counter_global_[set*slices_+slice] - ((new_frame_detected) ? 1 : 0), set, slice );
-      }
-      
-      // Set up Sense job
-      //
-
-      GadgetContainerMessage< SenseJob >* m4 = new GadgetContainerMessage< SenseJob >();
-	
-      m4->getObjectPtr()->dat_host_ = samples_host;
-      m4->getObjectPtr()->tra_host_ = boost::shared_ptr< hoNDArray<floatd2> >(new hoNDArray<floatd2>(host_traj_recon_[set*slices_+slice]));
-      m4->getObjectPtr()->dcw_host_ = boost::shared_ptr< hoNDArray<float> >(new hoNDArray<float>(host_weights_recon_[set*slices_+slice]));
-      m4->getObjectPtr()->csm_host_ = boost::shared_ptr< hoNDArray<float_complext> >( new hoNDArray<float_complext>(csm_host_[set*slices_+slice]));
-      m4->getObjectPtr()->reg_host_ = boost::shared_ptr< hoNDArray<float_complext> >( new hoNDArray<float_complext>(reg_host_[set*slices_+slice]));
-
-      // Pull the image headers out of the queue
-      //
-
-      long frames_per_reconstruction = 
-        std::max( 1L, frames_per_rotation_[set*slices_+slice]*rotations_per_reconstruction_ );
-      
-      if( image_headers_queue_[set*slices_+slice].message_count() != frames_per_reconstruction ){
-        m4->release();
-        GADGET_DEBUG2("Unexpected size of image header queue: %d, %d\n", 
-                      image_headers_queue_[set*slices_+slice].message_count(), frames_per_reconstruction);
-        return GADGET_FAIL;
-      }
-
-      m4->getObjectPtr()->image_headers_ =
-        boost::shared_array<ISMRMRD::ImageHeader>( new ISMRMRD::ImageHeader[frames_per_reconstruction] );
-      
-      for( unsigned int i=0; i<frames_per_reconstruction; i++ ){	
-
-        ACE_Message_Block *mbq;
-
-        if( image_headers_queue_[set*slices_+slice].dequeue_head(mbq) < 0 ) {
-          m4->release();
-          GADGET_DEBUG1("Image header dequeue failed\n");
-          return GADGET_FAIL;
-        }
-	
-        GadgetContainerMessage<ISMRMRD::ImageHeader> *m = AsContainerMessage<ISMRMRD::ImageHeader>(mbq);
-        m4->getObjectPtr()->image_headers_[i] = *m->getObjectPtr();
-
-        // In sliding window mode the header might need to go back at the end of the queue for reuse
-        // 
-	
-        if( i >= frames_per_reconstruction-sliding_window_rotations_*frames_per_rotation_[set*slices_+slice] ){
-          image_headers_queue_[set*slices_+slice].enqueue_tail(m);
-        }
-        else {
-          m->release();
-        }
-      }      
-      
-      // The Sense Job needs an image header as well. 
-      // Let us just copy the initial one...
-
-      GadgetContainerMessage<ISMRMRD::ImageHeader> *m3 = new GadgetContainerMessage<ISMRMRD::ImageHeader>;
-      *m3->getObjectPtr() = m4->getObjectPtr()->image_headers_[0];
-      m3->cont(m4);
-      
-      //GADGET_DEBUG1("Putting job on queue\n");
-      
-      if (this->next()->putq(m3) < 0) {
-        GADGET_DEBUG1("Failed to put job on queue.\n");
-        m3->release();
-        return GADGET_FAIL;
-      }
-    }
+    boost::shared_ptr< cuNDArray<float_complext> > reg_image = 
+      acc_buffer->get_combined_coil_image();
     
-    if( is_last_profile_in_frame )
-      profiles_counter_frame_[set*slices_+slice] = 0;
-    else{
-      profiles_counter_frame_[set*slices_+slice]++;
-    }
-
-    if( new_frame_detected ){
-
-      // This is the first profile of the next frame, enqueue.
-      // We have encountered deadlocks if the same profile is enqueued twice in different queues. Hence the copy.
-      
-      frame_profiles_queue_[set*slices_+slice].enqueue_tail(duplicate_profile(m2));
-      recon_profiles_queue_[set*slices_+slice].enqueue_tail(duplicate_profile(m2)); 
-
-      profiles_counter_frame_[set*slices_+slice]++;
-    }
-
-    profiles_counter_global_[set*slices_+slice]++;
-
-    if( output_timing_ )
-      process_timer.reset();
+    if( !reg_image.get() ){
+      GADGET_DEBUG1("Error computing regularization image\n");
+      return boost::shared_ptr< hoNDArray<float_complext> >();
+    }            
     
-    m1->release(); // the internal queues hold copies
-    return GADGET_OK;
+    return reg_image->to_host();
   }
-  
-  int 
-  gpuRadialSensePrepGadget::calculate_trajectory_for_reconstruction(long profile_offset, unsigned int set, unsigned int slice)
-  {   
-    //GADGET_DEBUG1("Calculating trajectory for reconstruction\n");
-
-    switch(mode_){
-      
-    case 0:
-    case 1:
-      {
-        if( rotations_per_reconstruction_ == 0 ){
 
-          long local_frame = (profile_offset/profiles_per_frame_[set*slices_+slice])%frames_per_rotation_[set*slices_+slice];
-          float angular_offset = M_PI/float(profiles_per_frame_[set*slices_+slice])*float(local_frame)/float(frames_per_rotation_[set*slices_+slice]);	  
-
-          host_traj_recon_[set*slices_+slice] = *compute_radial_trajectory_fixed_angle_2d<float>
-            ( samples_per_profile_, profiles_per_frame_[set*slices_+slice], 1, angular_offset )->to_host();	
-        }
-        else{
-          host_traj_recon_[set*slices_+slice] = *compute_radial_trajectory_fixed_angle_2d<float>
-            ( samples_per_profile_, profiles_per_frame_[set*slices_+slice], frames_per_rotation_[set*slices_+slice] )->to_host();
-        }
-      }
-      break;
-      
-    case 2:
-    case 3:
-      {
-        if( rotations_per_reconstruction_ == 0 ){	  
-          unsigned int first_profile_in_reconstruction = std::max(0L, profile_offset-profiles_per_frame_[set*slices_+slice]+1);
-          host_traj_recon_[set*slices_+slice] = *compute_radial_trajectory_golden_ratio_2d<float>
-            ( samples_per_profile_, profiles_per_frame_[set*slices_+slice], 1, first_profile_in_reconstruction,
-              (mode_==2) ? GR_ORIGINAL : GR_SMALLEST )->to_host();	
-        }
-        else{
-          unsigned int first_profile_in_reconstruction = 
-            std::max(0L, profile_offset-profiles_per_frame_[set*slices_+slice]*frames_per_rotation_[set*slices_+slice]*rotations_per_reconstruction_+1);
-          host_traj_recon_[set*slices_+slice] = *compute_radial_trajectory_golden_ratio_2d<float>
-            ( samples_per_profile_, profiles_per_frame_[set*slices_+slice], 
-              frames_per_rotation_[set*slices_+slice]*rotations_per_reconstruction_, first_profile_in_reconstruction,
-              (mode_==2) ? GR_ORIGINAL : GR_SMALLEST )->to_host();
-        }	  
-      }
-      break;
-	
-    default:
-      GADGET_DEBUG1("Illegal trajectory mode\n");
-      return GADGET_FAIL;
-      break;
-    }
-    return GADGET_OK;
-  }  
-
-  int
-  gpuRadialSensePrepGadget::calculate_density_compensation_for_reconstruction( unsigned int set, unsigned int slice)
-  {
-    //GADGET_DEBUG1("Calculating dcw for reconstruction\n");
-    
-    switch(mode_){
-      
-    case 0:
-    case 1:
-      host_weights_recon_[set*slices_+slice] = *compute_radial_dcw_fixed_angle_2d<float>
-        ( samples_per_profile_, profiles_per_frame_[set*slices_+slice], oversampling_factor_, 
-          1.0f/(float(samples_per_profile_)/float(image_dimensions_recon_[0])) )->to_host();
-      break;
-      
-    case 2:
-    case 3:
-      host_weights_recon_[set*slices_+slice] = *compute_radial_dcw_golden_ratio_2d<float>
-        ( samples_per_profile_, profiles_per_frame_[set*slices_+slice], oversampling_factor_, 
-          1.0f/(float(samples_per_profile_)/float(image_dimensions_recon_[0])),0,
-          (mode_==2) ? GR_ORIGINAL : GR_SMALLEST )->to_host();
-      break;
-      
-    default:
-      GADGET_DEBUG1("Illegal dcw mode\n");
-      return GADGET_FAIL;
-      break;
-    }
-    return GADGET_OK;
-  }
-  
-  boost::shared_ptr< cuNDArray<floatd2> > 
-  gpuRadialSensePrepGadget::calculate_trajectory_for_frame(long profile_offset, unsigned int set, unsigned int slice)
-  {
-    //GADGET_DEBUG1("Calculating trajectory for buffer frame\n");
-
-    boost::shared_ptr< cuNDArray<floatd2> > result;
-
-    switch(mode_){
-
-    case 0:
-    case 1:
-      {
-        long local_frame = (profile_offset/profiles_per_frame_[set*slices_+slice])%frames_per_rotation_[set*slices_+slice];
-        float angular_offset = M_PI/float(profiles_per_frame_[set*slices_+slice])*float(local_frame)/float(frames_per_rotation_[set*slices_+slice]);	  
-
-        result = compute_radial_trajectory_fixed_angle_2d<float>
-          ( samples_per_profile_, profiles_per_frame_[set*slices_+slice], 1, angular_offset );  
-      }
-      break;
-	
-    case 2:
-    case 3:
-      { 
-        unsigned int first_profile_in_buffer = std::max(0L, profile_offset-profiles_per_frame_[set*slices_+slice]+1);
-        result = compute_radial_trajectory_golden_ratio_2d<float>
-          ( samples_per_profile_, profiles_per_frame_[set*slices_+slice], 1, first_profile_in_buffer,
-            (mode_==2) ? GR_ORIGINAL : GR_SMALLEST );
-      }
-      break;	
-	
-    default:
-      GADGET_DEBUG1("Illegal trajectory mode\n");
-      break;
-    }
-    
-    return result;
-  }
-
-  boost::shared_ptr< cuNDArray<float> >
-  gpuRadialSensePrepGadget::calculate_density_compensation_for_frame(unsigned int set, unsigned int slice)
+  void 
+  gpuRadialSensePrepGadget::allocate_accumulation_buffer( unsigned int size )
   {    
-    //GADGET_DEBUG1("Calculating dcw for buffer frame\n");
-
-    switch(mode_){
-      
-    case 0:
-    case 1:
-      return compute_radial_dcw_fixed_angle_2d<float>
-        ( samples_per_profile_, profiles_per_frame_[set*slices_+slice], oversampling_factor_, 1.0f/(float(samples_per_profile_)/float(image_dimensions_recon_[0])) );
-      break;
-      
-    case 2:
-    case 3:
-      return compute_radial_dcw_golden_ratio_2d<float>
-        ( samples_per_profile_, profiles_per_frame_[set*slices_+slice], oversampling_factor_, 
-          1.0f/(float(samples_per_profile_)/float(image_dimensions_recon_[0])),0,
-          (mode_==2) ? GR_ORIGINAL : GR_SMALLEST );
-      break;
-      
-    default:
-      GADGET_DEBUG1("Illegal dcw mode\n");
-      return boost::shared_ptr< cuNDArray<float> >();
-      break;
-    }   
-  }
-
-
-  boost::shared_ptr< cuNDArray<floatd2> > 
-  gpuRadialSensePrepGadget::calculate_trajectory_for_rhs(long profile_offset, unsigned int set, unsigned int slice)
-  {
-    //GADGET_DEBUG1("Calculating trajectory for rhs\n");
-
-    switch(mode_){
-
-    case 0:
-    case 1:
-      return compute_radial_trajectory_fixed_angle_2d<float>
-        ( samples_per_profile_, profiles_per_frame_[set*slices_+slice]*buffer_frames_per_rotation_[set*slices_+slice], 1 );
-      break;
-	
-    case 2:
-    case 3:
-      { 
-        unsigned int first_profile = 
-          std::max(0L, profile_offset-profiles_per_frame_[set*slices_+slice]*
-                   buffer_frames_per_rotation_[set*slices_+slice]*
-                   buffer_length_in_rotations_+1);
-
-        return compute_radial_trajectory_golden_ratio_2d<float>
-          ( samples_per_profile_, 
-            profiles_per_frame_[set*slices_+slice]*
-            buffer_frames_per_rotation_[set*slices_+slice]*buffer_length_in_rotations_, 
-            1, first_profile,
-            (mode_==2) ? GR_ORIGINAL : GR_SMALLEST );
-      }
-      break;	
-	
-    default:
-      GADGET_DEBUG1("Illegal trajectory mode\n");
-      return boost::shared_ptr< cuNDArray<floatd2> >();
-      break;
-    }
-  }
+    // Allocate accumulation buffer
+    //
   
-  boost::shared_ptr< cuNDArray<float> >
-  gpuRadialSensePrepGadget::calculate_density_compensation_for_rhs(unsigned int set, unsigned int slice)
-  {
-    //GADGET_DEBUG1("Calculating dcw for rhs\n");
-    
-    switch(mode_){
-      
-    case 0:
-    case 1:
-      {
-        unsigned int num_profiles = 
-          profiles_per_frame_[set*slices_+slice]*buffer_frames_per_rotation_[set*slices_+slice];
-
-        return compute_radial_dcw_fixed_angle_2d<float>
-          ( samples_per_profile_, num_profiles, oversampling_factor_, 
-            1.0f/(float(samples_per_profile_)/float(image_dimensions_recon_[0])) );
-      }
-      break;
-      
-    case 2:
-    case 3:
-      {
-        unsigned int num_profiles = 
-          profiles_per_frame_[set*slices_+slice]*buffer_frames_per_rotation_[set*slices_+slice]*buffer_length_in_rotations_;
-
-        return compute_radial_dcw_golden_ratio_2d<float>
-          ( samples_per_profile_, num_profiles, oversampling_factor_, 
-            1.0f/(float(samples_per_profile_)/float(image_dimensions_recon_[0])),0,
-            (mode_==2) ? GR_ORIGINAL : GR_SMALLEST );
-      }
-      break;
-      
-    default:
-      GADGET_DEBUG1("Illegal dcw mode\n");
-      return boost::shared_ptr< cuNDArray<float> >();
-      break;
+    if( this->buffer_using_solver_ ){
+      this->acc_buffer_sense_cg_ = boost::shared_array< cuSenseBufferCg<float,2> >(new cuSenseBufferCg<float,2>[size]);
+    }
+    else{
+      this->acc_buffer_sense_ = boost::shared_array< cuSenseBuffer<float,2> >(new cuSenseBuffer<float,2>[size]);
     }
   }
 
-  boost::shared_ptr< hoNDArray<float_complext> > gpuRadialSensePrepGadget::
-  extract_samples_from_queue( ACE_Message_Queue<ACE_MT_SYNCH> *queue, bool sliding_window,
-                              unsigned int set, unsigned int slice )
-  {    
-    //GADGET_DEBUG1("Emptying queue...\n");
-
-    unsigned int profiles_buffered = queue->message_count();
-    
-    std::vector<size_t> dims;
-    dims.push_back(samples_per_profile_*profiles_buffered);
-    dims.push_back(num_coils_[set*slices_+slice]);
-    
-    boost::shared_ptr< hoNDArray<float_complext> > host_samples(new hoNDArray<float_complext>(&dims));
-    
-    for (unsigned int p=0; p<profiles_buffered; p++) {
-
-      ACE_Message_Block* mbq;
-      if (queue->dequeue_head(mbq) < 0) {
-        GADGET_DEBUG1("Message dequeue failed\n");
-        return boost::shared_ptr< hoNDArray<float_complext> >();
-      }
-      
-      GadgetContainerMessage< hoNDArray< std::complex<float> > > *daq = AsContainerMessage<hoNDArray< std::complex<float> > >(mbq);
-	
-      if (!daq) {
-        GADGET_DEBUG1("Unable to interpret data on message queue\n");
-        return boost::shared_ptr< hoNDArray<float_complext> >();
-      }
-	
-      for (unsigned int c = 0; c < num_coils_[set*slices_+slice]; c++) {
-	
-        float_complext *data_ptr = host_samples->get_data_ptr();
-        data_ptr += c*samples_per_profile_*profiles_buffered+p*samples_per_profile_;
-	    
-        std::complex<float> *r_ptr = daq->getObjectPtr()->get_data_ptr();
-        r_ptr += c*daq->getObjectPtr()->get_size(0);
-	  
-        memcpy(data_ptr,r_ptr,samples_per_profile_*sizeof(float_complext));
-      }
-
-      // In sliding window mode the profile might need to go back at the end of the queue
-      // 
-      
-      long profiles_in_sliding_window = sliding_window_profiles_ + 
-        profiles_per_frame_[set*slices_+slice]*frames_per_rotation_[set*slices_+slice]*sliding_window_rotations_;
-
-      if( sliding_window && p >= (profiles_buffered-profiles_in_sliding_window) )
-        queue->enqueue_tail(mbq);
-      else
-        mbq->release();
-    } 
-    
-    return host_samples;
-  }
-  
-  GadgetContainerMessage< hoNDArray< std::complex<float> > >*
-  gpuRadialSensePrepGadget::duplicate_profile( GadgetContainerMessage< hoNDArray< std::complex<float> > > *profile )
-  {
-    GadgetContainerMessage< hoNDArray< std::complex<float> > > *copy = 
-      new GadgetContainerMessage< hoNDArray< std::complex<float> > >();
-    
-    *copy->getObjectPtr() = *profile->getObjectPtr();
-    
-    return copy;
-  }
-
-  void gpuRadialSensePrepGadget::reconfigure(unsigned int set, unsigned int slice)
+  void gpuRadialSensePrepGadget::reconfigure(unsigned int set, unsigned int slice, bool use_dcw)
   {    
-    GADGET_DEBUG2("\nReconfiguring:\n#profiles/frame:%d\n#frames/rotation: %d\n#rotations/reconstruction:%d\n", 
-                  profiles_per_frame_[set*slices_+slice], frames_per_rotation_[set*slices_+slice], rotations_per_reconstruction_);
-
-    calculate_trajectory_for_reconstruction(0, set, slice);
-    calculate_density_compensation_for_reconstruction(set, slice);
+    gpuRadialPrepGadget::reconfigure(set, slice, use_dcw);
     
-    buffer_frames_per_rotation_[set*slices_+slice] = get_int_value(std::string("buffer_frames_per_rotation").c_str());
-
-    if( buffer_frames_per_rotation_[set*slices_+slice] == 0 ){
-      if( mode_ == 2 || mode_ == 3 )
-        buffer_frames_per_rotation_[set*slices_+slice] = 
-          image_dimensions_recon_os_[0]/profiles_per_frame_[set*slices_+slice];
-      else
-        buffer_frames_per_rotation_[set*slices_+slice] = frames_per_rotation_[set*slices_+slice];
-    }
-    
-    cuSenseBuffer<float,2> *acc_buffer = (buffer_using_solver_) ? &acc_buffer_cg_[set*slices_+slice] : &acc_buffer_[set*slices_+slice];
+    if( buffer_using_solver_ ){
 
-    acc_buffer->setup( from_std_vector<size_t,2>(image_dimensions_recon_), image_dimensions_recon_os_, 
-                       kernel_width_, num_coils_[set*slices_+slice], 
-                       buffer_length_in_rotations_, buffer_frames_per_rotation_[set*slices_+slice] );
-    
-    boost::shared_ptr< cuNDArray<float> > device_weights_frame = calculate_density_compensation_for_frame(set, slice);
-    acc_buffer->set_dcw(device_weights_frame);
+      if(use_dcw) 
+        this->acc_buffer_sense_cg_[set*this->slices_+slice].set_dcw_for_rhs(calculate_density_compensation_for_rhs(set, slice));
 
-    if( buffer_using_solver_ ){
-      ((cuSenseBufferCg<float,2>*) acc_buffer)->set_dcw_for_rhs(calculate_density_compensation_for_rhs(set, slice));
-      ((cuSenseBufferCg<float,2>*) acc_buffer)->preprocess(calculate_trajectory_for_rhs(0, set, slice).get());
-    }
-    
-    reconfigure_[set*slices_+slice] = false;
+      this->acc_buffer_sense_cg_[set*this->slices_+slice].preprocess(calculate_trajectory_for_rhs(0, set, slice).get());
+    }    
   }
 
   GADGET_FACTORY_DECLARE(gpuRadialSensePrepGadget)
diff --git a/gadgets/radial/gpuRadialSensePrepGadget.h b/gadgets/radial/gpuRadialSensePrepGadget.h
index 7f67b69..9ca0519 100644
--- a/gadgets/radial/gpuRadialSensePrepGadget.h
+++ b/gadgets/radial/gpuRadialSensePrepGadget.h
@@ -1,191 +1,31 @@
 #pragma once
 
-#include "gadgetron_radial_export.h"
-#include "Gadget.h"
-#include "GadgetMRIHeaders.h"
-#include "hoNDArray.h"
-#include "vector_td.h"
-#include "cuNFFT.h"
-#include "cuCgPreconditioner.h"
-#include "cuSenseBufferCg.h"
-
-#include <ismrmrd.h>
-#include <complex>
-#include <boost/shared_ptr.hpp>
-#include <boost/shared_array.hpp>
-
-/*
-  ------------------------------------------
-  Trajectory modes for radial reconstruction
-  ------------------------------------------
-  
-  Mode 0 and Mode 1 are variants of 'fixed' radial trajectories with interframe rotation.
-  Mode 2 and Mode 3 denote radial trajectories with golden ratio based angular profile spacings.
-  
-  Let 
-  'i' denote the number of profiles per (undersampled) frame
-  'j' denote the number of frames per trajectory rotation (to obtain a fully sampled acquisition)
-  'h' denote a variable of type ISMRMRD::AcquisitionHeader
-
-  It is possible to explicitly set 'i' and 'j' in the Gadgetron configuration file.
-  For some modes this is (partly) required, 
-  for others they will be automatically determined from the incoming profile headers.
-  
-  Mode 0:
-  -------
-  For each rotation cycle profiles are numbered using the scheme
-
-    0+0*j,0+1*j,0+2*j,...,0+(i-1)*j, (1st frame)
-    1+0*j,1+1*j,1+2*j,...,1+(i-1)*j, (2nd frame)
-    2+0*j,2+1*j,2+2*j,...,2+(i-1)*j, (3rd frame)
-    ...,
-    (j-1)+0*j,(j-1)+1*j,(j-1)+2*j,...,(j-1)+(i-1)*j
-
-  as given in h.idx.kspace_encode_step_1.
-  Both 'i' and 'j' are automatically derived and thus need not be explicitly specified in a configuration file.
-  For mode 0 both 'i' and 'j' can be changed dynamically as desired e.g. for real-time imaging.
-
-  Mode 1:
-  -------
-  Profiles are numbered 0,1,2,...,i-1, 0,1,2,...,i-1, ... as given in h.idx.kspace_encode_step_1.
-  'j' is estimated as 'matrix_size'/'i' and should be explicitly set in the configuration file if this is not the case, e.g.:
-  <property><name>frames_per_rotation</name><value>8</value></property>
-      
-
-  Mode 2 and Mode 3:
-  -------
-  Profiles are numbered 
-  0,1,2,...,i-1, 0,1,2,...,i-1, 0,1,2,...,i-1, ...
-  or
-  0,1,2,...,i-1, i,i+1,i+2,...,2*i-1, 2*i,2*i+1,2*i+2,3*i-1, ...
-  as given in h.idx.kspace_encode_step_1.
-  'i' should be explicitly specified in the Gadgetron configuration file, e.g.:
-  <property><name>profiles_per_frame</name><value>32</value></property>
-  If not it defaults to i=32.
-  'j' is explicitly set to '1' even if specified in the configuration file.
-*/
+#include "gpuRadialPrepGadget.h"
 
 namespace Gadgetron{
 
-  class EXPORTGADGETS_RADIAL gpuRadialSensePrepGadget :
-    public Gadget2< ISMRMRD::AcquisitionHeader, hoNDArray< std::complex<float> > >
+  class EXPORTGADGETS_RADIAL gpuRadialSensePrepGadget : public gpuRadialPrepGadget
   {
-
+    
   public:
     GADGET_DECLARE(gpuRadialSensePrepGadget);
-
-    gpuRadialSensePrepGadget();
-    virtual ~gpuRadialSensePrepGadget();
-
+    gpuRadialSensePrepGadget() : gpuRadialPrepGadget() {}
+    virtual ~gpuRadialSensePrepGadget() {}
+    
   protected:
     
-    virtual int process_config(ACE_Message_Block *mb);
-
-    virtual int process(GadgetContainerMessage< ISMRMRD::AcquisitionHeader > *m1,
-			GadgetContainerMessage< hoNDArray< std::complex<float> > > *m2);
+    virtual void reconfigure(unsigned int set, unsigned int slice, bool use_dcw = true);
 
-  private:
+    virtual boost::shared_ptr< hoNDArray<float_complext> > compute_csm( unsigned int buffer_idx );
 
-    inline bool vec_equal(float *in1, float *in2) {
-      for (unsigned int i = 0; i < 3; i++) {
-	if (in1[i] != in2[i]) return false;
-      }
-      return true;
-    }
+    virtual boost::shared_ptr< hoNDArray<float_complext> > compute_reg( unsigned int set, 
+                                                                        unsigned int slice, 
+                                                                        bool new_frame );
     
-    boost::shared_array<bool> reconfigure_;
-    virtual void reconfigure(unsigned int set, unsigned int slice);
-
-    GadgetContainerMessage< hoNDArray< std::complex<float> > >*
-      duplicate_profile( GadgetContainerMessage< hoNDArray< std::complex<float> > > *profile );
-
-    boost::shared_ptr< hoNDArray<float_complext> > 
-      extract_samples_from_queue( ACE_Message_Queue<ACE_MT_SYNCH> *queue,
-				  bool acknowledge_sliding_window,
-				  unsigned int set, unsigned int slice );
-
-    // Compute trajectory/dcw for a reconstruction (to store internally)
-    //
-
-    int calculate_trajectory_for_reconstruction(long profile_offset, unsigned int set, unsigned int slice);
-    int calculate_density_compensation_for_reconstruction(unsigned int set, unsigned int slice);
-
-    // Compute trajectory/dcw for adding (usually undersampled) frames to the accumulation buffer
-    //
-
-    boost::shared_ptr< cuNDArray<floatd2> > 
-      calculate_trajectory_for_frame(long profile_offset, unsigned int set, unsigned int slice);
-
-    boost::shared_ptr< cuNDArray<float> >
-      calculate_density_compensation_for_frame(unsigned int set, unsigned int slice);
-
-    // Compute trajectory/dcw for the fully sampled accumulation buffer (iterative buffer mode only)
-    //
-
-    boost::shared_ptr< cuNDArray<floatd2> > 
-      calculate_trajectory_for_rhs(long profile_offset, unsigned int set, unsigned int slice);
-
-    boost::shared_ptr< cuNDArray<float> > 
-      calculate_density_compensation_for_rhs(unsigned int set, unsigned int slice);
-
-    int slices_;
-    int sets_;
-    int device_number_;
-    int mode_; // See note above
-    long samples_per_profile_;
-
-    boost::shared_array<long> image_counter_;
-    boost::shared_array<long> profiles_per_frame_;  // for an undersampled frame
-    boost::shared_array<long> frames_per_rotation_; // representing a fully sampled frame
-
-    // The number of rotations to batch per reconstruction. 
-    // Set to '0' to reconstruct frames individually.
-    long rotations_per_reconstruction_; 
-
-    // The number of buffer cycles
-    long buffer_length_in_rotations_; 
-
-    boost::shared_array<long> buffer_frames_per_rotation_; // the number of buffer subcycles
-
-    // Internal book-keping
-    boost::shared_array<long> previous_profile_;
-    boost::shared_array<long> profiles_counter_frame_;
-    boost::shared_array<long> profiles_counter_global_;
-
-    long sliding_window_profiles_;
-    long sliding_window_rotations_;
-
-    float kernel_width_;
-    float oversampling_factor_;
-
-    boost::shared_array<unsigned int> num_coils_;
-
-    boost::shared_array<float[3]> position_;
-    boost::shared_array<float[3]> read_dir_;
-    boost::shared_array<float[3]> phase_dir_;
-    boost::shared_array<float[3]> slice_dir_;
-
-    bool output_timing_;
-    bool buffer_using_solver_;
-
-    boost::shared_array<bool> buffer_update_needed_;
-
-    boost::shared_array< hoNDArray<floatd2> > host_traj_recon_;
-    boost::shared_array< hoNDArray<float> > host_weights_recon_;
-    
-    boost::shared_array< hoNDArray<float_complext> > csm_host_;
-    boost::shared_array< hoNDArray<float_complext> > reg_host_;
+    virtual void allocate_accumulation_buffer( unsigned int num_buffers );
     
-    boost::shared_array< cuSenseBuffer<float,2> > acc_buffer_;
-    boost::shared_array< cuSenseBufferCg<float,2> > acc_buffer_cg_;
-
-    std::vector<size_t> fov_;
-    std::vector<size_t> image_dimensions_;
-    std::vector<size_t> image_dimensions_recon_;
-    uint64d2 image_dimensions_recon_os_;
-
-    boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> > frame_profiles_queue_;
-    boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> > recon_profiles_queue_;
-    boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> > image_headers_queue_;
+    virtual cuBuffer<float,2>* get_buffer_ptr(int idx){
+      return (this->buffer_using_solver_) ? &this->acc_buffer_sense_cg_[idx] : &this->acc_buffer_sense_[idx];
+    }
   };
 }
diff --git a/gadgets/radial/gpuRadialSpiritPrepGadget.cpp b/gadgets/radial/gpuRadialSpiritPrepGadget.cpp
new file mode 100644
index 0000000..829c175
--- /dev/null
+++ b/gadgets/radial/gpuRadialSpiritPrepGadget.cpp
@@ -0,0 +1,98 @@
+#include "gpuRadialSpiritPrepGadget.h"
+#include "spirit_calibration.h"
+#include "cuSpiritBuffer.h"
+#include "cuNDFFT.h"
+#include "cuSpiritOperator.h"
+#include "hoNDArray_fileio.h"
+
+namespace Gadgetron{
+
+  gpuRadialSpiritPrepGadget::gpuRadialSpiritPrepGadget() : gpuRadialPrepGadget() {}
+
+  int 
+  gpuRadialSpiritPrepGadget::process_config(ACE_Message_Block* mb)
+  {
+    return gpuRadialPrepGadget::process_config(mb);
+  }
+  
+  boost::shared_ptr< hoNDArray<float_complext> > 
+  gpuRadialSpiritPrepGadget::compute_csm( unsigned int idx )
+  {    
+    // Estimate and update csm related data structures
+    //
+  
+    cuSpiritBuffer<float,2> *acc_buffer = &this->acc_buffer_spirit_[idx];
+  
+    boost::shared_ptr< cuNDArray<float_complext> > csm_data = 
+      acc_buffer->get_accumulated_coil_images();
+
+    std::vector<size_t> dims_to_xform;
+    dims_to_xform.push_back(0); dims_to_xform.push_back(1);    
+    cuNDFFT<float>::instance()->fft( csm_data.get(), &dims_to_xform );
+    
+    boost::shared_ptr< cuNDArray<float_complext> > csm =       
+      estimate_spirit_kernels( csm_data.get(), 7 ); // TODO: let the kernel size be user defined
+
+
+
+/*
+    // --> START debug output
+    boost::shared_ptr< cuSpirit2DOperator<float> > C( new cuSpirit2DOperator<float>() );
+		C->set_calibration_kernels(csm);
+    static int counter = 0;
+    char filename[256];
+    cuNDFFT<float>::instance()->ifft( csm_data.get(), &dims_to_xform );
+    //boost::shared_ptr< cuSpirit2DOperator<float> > C( new cuSpirit2DOperator<float>() );
+    //C->set_calibration_kernels(csm);
+    sprintf((char*)filename, "_before_%d.real", counter);
+    write_nd_array<float>( abs(csm_data.get())->to_host().get(), filename );
+    cuNDArray<float_complext> after(csm_data->get_dimensions()); C->mult_M(csm_data.get(),&after);
+    sprintf((char*)filename, "_after_%d.real", counter);
+    write_nd_array<float>( abs(&after)->to_host().get(), filename );
+    sprintf((char*)filename, "_spirit_calibration_%d.real", counter);
+    write_nd_array<float>( abs(csm.get())->to_host().get(), filename );    
+    counter++;
+    // <-- END debug output
+*/
+
+    return csm->to_host(); 
+  }
+  
+  boost::shared_ptr< hoNDArray<float_complext> > 
+  gpuRadialSpiritPrepGadget::compute_reg( unsigned int set, unsigned int slice, bool new_frame )
+  {    
+    // Estimate and update regularization image related data structures
+    //
+    
+    cuSpiritBuffer<float,2> *acc_buffer = &this->acc_buffer_spirit_[set*this->slices_+slice];
+    boost::shared_ptr< cuNDArray<float_complext> > reg_image = acc_buffer->get_combined_coil_image();
+    
+    if( !reg_image.get() ){
+      GADGET_DEBUG1("Error computing regularization image\n");
+      return boost::shared_ptr< hoNDArray<float_complext> >();
+    }            
+    
+    return reg_image->to_host();
+  }
+
+  void 
+  gpuRadialSpiritPrepGadget::allocate_accumulation_buffer( unsigned int size )
+  {    
+    this->acc_buffer_spirit_ = boost::shared_array< cuSpiritBuffer<float,2> >(new cuSpiritBuffer<float,2>[size]);
+  }
+
+  void gpuRadialSpiritPrepGadget::reconfigure(unsigned int set, unsigned int slice, bool use_dcw)
+  {    
+    gpuRadialPrepGadget::reconfigure(set, slice, use_dcw);
+    //gpuRadialPrepGadget::reconfigure(set, slice, false);
+    
+    cuSpiritBuffer<float,2> *acc_buffer = &this->acc_buffer_spirit_[set*this->slices_+slice];
+
+    if( use_dcw ) 
+      acc_buffer->set_dcw_for_rhs(calculate_density_compensation_for_rhs(set, slice));
+
+    acc_buffer->preprocess(calculate_trajectory_for_rhs(0, set, slice).get());
+  }
+
+  GADGET_FACTORY_DECLARE(gpuRadialSpiritPrepGadget)
+}
diff --git a/gadgets/radial/gpuRadialSpiritPrepGadget.h b/gadgets/radial/gpuRadialSpiritPrepGadget.h
new file mode 100644
index 0000000..8d9824a
--- /dev/null
+++ b/gadgets/radial/gpuRadialSpiritPrepGadget.h
@@ -0,0 +1,33 @@
+#pragma once
+
+#include "gpuRadialPrepGadget.h"
+
+namespace Gadgetron{
+
+  class EXPORTGADGETS_RADIAL gpuRadialSpiritPrepGadget : public gpuRadialPrepGadget
+  {
+    
+  public:
+    GADGET_DECLARE(gpuRadialSpiritPrepGadget);
+    gpuRadialSpiritPrepGadget();
+    virtual ~gpuRadialSpiritPrepGadget() {}
+    
+  protected:
+    
+    virtual int process_config(ACE_Message_Block *mb);
+
+    virtual void reconfigure(unsigned int set, unsigned int slice, bool use_dcw = true );
+
+    virtual boost::shared_ptr< hoNDArray<float_complext> > compute_csm( unsigned int buffer_idx );
+
+    virtual boost::shared_ptr< hoNDArray<float_complext> > compute_reg( unsigned int set, 
+                                                                        unsigned int slice, 
+                                                                        bool new_frame );
+    
+    virtual void allocate_accumulation_buffer( unsigned int num_buffers );
+
+    virtual cuBuffer<float,2>* get_buffer_ptr(int idx){
+      return &this->acc_buffer_spirit_[idx];
+    }
+  };
+}
diff --git a/gadgets/radial/gpuRetroGatedSensePrepGadget.cpp b/gadgets/radial/gpuRetroGatedSensePrepGadget.cpp
new file mode 100644
index 0000000..d55dd8f
--- /dev/null
+++ b/gadgets/radial/gpuRetroGatedSensePrepGadget.cpp
@@ -0,0 +1,890 @@
+#include "gpuRetroGatedSensePrepGadget.h"
+#include "Gadgetron.h"
+#include "cuNonCartesianSenseOperator.h"
+#include "GenericReconJob.h"
+#include "cuNDArray_elemwise.h"
+#include "hoNDArray_elemwise.h"
+#include "cuNDArray_utils.h"
+#include "vector_td_operators.h"
+#include "b1_map.h"
+#include "GPUTimer.h"
+#include "check_CUDA.h"
+#include "radial_utilities.h"
+#include "hoNDArray_fileio.h"
+#include "ismrmrd/xml.h"
+
+#include <algorithm>
+#include <vector>
+#include <cmath>
+
+namespace Gadgetron{
+
+  gpuRetroGatedSensePrepGadget::gpuRetroGatedSensePrepGadget()
+    : slices_(-1)
+    , sets_(-1)
+    , samples_per_profile_(-1)
+    , phys_time_index_(0)
+  {
+    // Set some default values in case the config does not contain a specification
+    //
+
+    set_parameter(std::string("mode").c_str(), "-1");
+    set_parameter(std::string("deviceno").c_str(), "0");
+    set_parameter(std::string("profiles_per_frame").c_str(), "16");
+    set_parameter(std::string("frames_per_cardiac_cycle").c_str(), "30");
+    set_parameter(std::string("profiles_per_buffer_frame").c_str(), "32");
+    set_parameter(std::string("number_of_buffer_frames_inner").c_str(), "8");
+    set_parameter(std::string("number_of_buffer_frames_outer").c_str(), "1");
+    set_parameter(std::string("buffer_using_solver").c_str(), "false");
+    set_parameter(std::string("buffer_convolution_kernel_width").c_str(), "5.5");
+    set_parameter(std::string("buffer_convolution_oversampling_factor").c_str(), "1.25");
+    set_parameter(std::string("reconstruction_os_factor_x").c_str(), "1.0");
+    set_parameter(std::string("reconstruction_os_factor_y").c_str(), "1.0");
+  }
+  
+  gpuRetroGatedSensePrepGadget::~gpuRetroGatedSensePrepGadget() {}
+  
+  int gpuRetroGatedSensePrepGadget::process_config(ACE_Message_Block* mb)
+  {
+    // Get configuration values from config file
+    //
+
+    mode_ = get_int_value(std::string("mode").c_str());
+    device_number_ = get_int_value(std::string("deviceno").c_str());
+    profiles_per_frame_ = get_int_value(std::string("profiles_per_frame").c_str());
+    frames_per_cardiac_cycle_ = get_int_value(std::string("frames_per_cardiac_cycle").c_str());
+    profiles_per_buffer_frame_ = get_int_value(std::string("profiles_per_buffer_frame").c_str());
+    num_buffer_frames_inner_ = get_int_value(std::string("number_of_buffer_frames_inner").c_str());
+    num_buffer_frames_outer_ = get_int_value(std::string("number_of_buffer_frames_outer").c_str());
+    buffer_using_solver_ = get_bool_value(std::string("buffer_using_solver").c_str());
+    output_timing_ = get_bool_value(std::string("output_timing").c_str());
+    phys_time_index_ = get_int_value("physiology_time_index");
+
+    // Check that a golden ratio based reconstruction mode was specified
+    //
+
+    if( mode_ == -1 ){
+      GADGET_DEBUG1( "Radial reconstruction mode not specified.\n" );
+      return GADGET_FAIL;
+    }
+
+    if( !(mode_ == 2 || mode_ == 3) ){
+      GADGET_DEBUG1( "Only radial reconstruction modes {2,3} (golden ratio based) are supported.\n" );
+      return GADGET_FAIL;
+    }
+    
+    // Setup and validate device configuration
+    //
+
+    int number_of_devices;
+    if (cudaGetDeviceCount(&number_of_devices)!= cudaSuccess) {
+      GADGET_DEBUG1( "Error: unable to query number of CUDA devices.\n" );
+      return GADGET_FAIL;
+    }
+
+    if (number_of_devices == 0) {
+      GADGET_DEBUG1( "Error: No available CUDA devices.\n" );
+      return GADGET_FAIL;
+    }
+
+    if (device_number_ >= number_of_devices) {
+      GADGET_DEBUG2("Adjusting device number from %d to %d\n", device_number_,  (device_number_%number_of_devices));
+      device_number_ = (device_number_%number_of_devices);
+    }
+
+    if (cudaSetDevice(device_number_)!= cudaSuccess) {
+      GADGET_DEBUG1( "Error: unable to set CUDA device.\n" );
+      return GADGET_FAIL;
+    }
+
+    cudaDeviceProp deviceProp;
+    if( cudaGetDeviceProperties( &deviceProp, device_number_ ) != cudaSuccess) {
+      GADGET_DEBUG1( "Error: unable to query device properties.\n" );
+      return GADGET_FAIL;
+    }
+    
+    // Convolution kernel width and oversampling ratio (for the buffer)
+    //
+
+    kernel_width_ = get_double_value(std::string("buffer_convolution_kernel_width").c_str());
+    oversampling_factor_ = get_double_value(std::string("buffer_convolution_oversampling_factor").c_str());
+
+    // Get the Ismrmrd header
+    //
+
+    ISMRMRD::IsmrmrdHeader h;
+    ISMRMRD::deserialize(mb->rd_ptr(),h);
+    
+    
+    if (h.encoding.size() != 1) {
+      GADGET_DEBUG1("This Gadget only supports one encoding space\n");
+      return GADGET_FAIL;
+    }
+    
+    // Get the encoding space and trajectory description
+    ISMRMRD::EncodingSpace e_space = h.encoding[0].encodedSpace;
+    ISMRMRD::EncodingSpace r_space = h.encoding[0].reconSpace;
+    ISMRMRD::EncodingLimits e_limits = h.encoding[0].encodingLimits;
+
+
+    // Matrix sizes (as a multiple of the GPU's warp size)
+    //
+    
+    unsigned int warp_size = deviceProp.warpSize;
+
+    image_dimensions_.push_back(((e_space.matrixSize.x+warp_size-1)/warp_size)*warp_size);
+    image_dimensions_.push_back(((e_space.matrixSize.y+warp_size-1)/warp_size)*warp_size);
+
+    image_dimensions_recon_.push_back(((static_cast<unsigned int>(std::ceil(e_space.matrixSize.x*get_double_value(std::string("reconstruction_os_factor_x").c_str())))+warp_size-1)/warp_size)*warp_size);  
+    image_dimensions_recon_.push_back(((static_cast<unsigned int>(std::ceil(e_space.matrixSize.y*get_double_value(std::string("reconstruction_os_factor_y").c_str())))+warp_size-1)/warp_size)*warp_size);
+    
+    image_dimensions_recon_os_ = uint64d2
+      (((static_cast<unsigned int>(std::ceil(image_dimensions_recon_[0]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size,
+       ((static_cast<unsigned int>(std::ceil(image_dimensions_recon_[1]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size);
+    
+    // In case the warp_size constraint kicked in
+    oversampling_factor_ = float(image_dimensions_recon_os_[0])/float(image_dimensions_recon_[0]); 
+    
+    GADGET_DEBUG2("matrix_size_x : %d, recon: %d, recon_os: %d\n", 
+                  image_dimensions_[0], image_dimensions_recon_[0], image_dimensions_recon_os_[0]);
+
+    GADGET_DEBUG2("matrix_size_y : %d, recon: %d, recon_os: %d\n", 
+                  image_dimensions_[1], image_dimensions_recon_[1], image_dimensions_recon_os_[1]);
+    
+    fov_.push_back(r_space.fieldOfView_mm.x);
+    fov_.push_back(r_space.fieldOfView_mm.y);
+    fov_.push_back(r_space.fieldOfView_mm.z);
+
+    slices_ = e_limits.slice ? e_limits.slice->maximum + 1 : 1;
+    sets_ = e_limits.set ? e_limits.set->maximum + 1 : 1;
+    
+    // Allocate profile queues
+    // - one queue for the currently incoming frame (for the accumulation buffer)
+    // - one queue for the next reconstruction
+    
+    buffer_profiles_queue_ = boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> >(new ACE_Message_Queue<ACE_MT_SYNCH>[slices_*sets_]);
+    recon_profiles_queue_ = boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> >(new ACE_Message_Queue<ACE_MT_SYNCH>[slices_*sets_]);
+
+    size_t bsize = sizeof(GadgetContainerMessage< hoNDArray< std::complex<float> > >)*profiles_per_buffer_frame_*10;
+
+    for( unsigned int i=0; i<slices_*sets_; i++ ){
+      buffer_profiles_queue_[i].high_water_mark(bsize);
+      buffer_profiles_queue_[i].low_water_mark(bsize);
+    }
+
+    bsize = sizeof(GadgetContainerMessage< hoNDArray< std::complex<float> > >)*profiles_per_frame_*frames_per_cardiac_cycle_*10;
+    
+    for( unsigned int i=0; i<slices_*sets_; i++ ){
+      recon_profiles_queue_[i].high_water_mark(bsize);
+      recon_profiles_queue_[i].low_water_mark(bsize);
+    }
+    
+    // Define some profile counters for book-keeping
+    //
+
+    image_counter_ = boost::shared_array<long>(new long[slices_*sets_]);
+    num_coils_ = boost::shared_array<unsigned int>(new unsigned int[slices_*sets_]);
+    first_profile_acq_time_ = boost::shared_array<unsigned int>(new unsigned int[slices_*sets_]);
+    first_profile_phys_time_ = boost::shared_array<unsigned int>(new unsigned int[slices_*sets_]);
+    previous_timestamp_ = boost::shared_array<unsigned int>(new unsigned int[slices_*sets_]);
+    profiles_counter_global_ = boost::shared_array<long>(new long[slices_*sets_]);
+    Rw_reached_ = boost::shared_array<bool>(new bool[slices_*sets_]);
+    Rw_offset_ = boost::shared_array<unsigned int>(new unsigned int[slices_*sets_]);
+    buffer_update_needed_ = boost::shared_array<bool>(new bool[slices_*sets_]);
+    reconfigure_ = boost::shared_array<bool>(new bool[slices_*sets_]);
+    
+    if( !image_counter_.get() || 
+        !num_coils_.get() || 
+        !first_profile_acq_time_.get() ||
+        !first_profile_phys_time_.get() ||
+        !previous_timestamp_.get() ||
+        !profiles_counter_global_.get() ||
+        !Rw_reached_.get() ||
+        !Rw_offset_.get() ||
+        !buffer_update_needed_.get() ||
+        !reconfigure_ ){
+      GADGET_DEBUG1("Failed to allocate host memory (1)\n");
+      return GADGET_FAIL;
+    }
+
+    for( unsigned int i=0; i<slices_*sets_; i++ ){
+      image_counter_[i] = 0;
+      num_coils_[i] = 0;
+      previous_timestamp_[i] = 0;
+      profiles_counter_global_[i] = 0;
+      Rw_reached_[i] = false;
+      Rw_offset_[i] = 0;
+      buffer_update_needed_[i] = true;
+      reconfigure_[i] = true;
+    }
+        
+    position_ = boost::shared_array<float[3]>(new float[slices_*sets_][3]);
+    read_dir_ = boost::shared_array<float[3]>(new float[slices_*sets_][3]);
+    phase_dir_ = boost::shared_array<float[3]>(new float[slices_*sets_][3]);
+    slice_dir_ = boost::shared_array<float[3]>(new float[slices_*sets_][3]);
+
+    if( !position_.get() || !read_dir_.get() || !phase_dir_.get() || !slice_dir_.get() ){
+      GADGET_DEBUG1("Failed to allocate host memory (2)\n");
+      return GADGET_FAIL;
+    }
+
+    for( unsigned int i=0; i<slices_*sets_; i++ ){
+      (position_[i])[0] = (position_[i])[1] = (position_[i])[2] = 0.0f;
+      (read_dir_[i])[0] = (read_dir_[i])[1] = (read_dir_[i])[2] = 0.0f;
+      (phase_dir_[i])[0] = (phase_dir_[i])[1] = (phase_dir_[i])[2] = 0.0f;
+      (slice_dir_[i])[0] = (slice_dir_[i])[1] = (slice_dir_[i])[2] = 0.0f;
+    }
+
+    // Allocate accumulation buffer
+    //
+
+    if( buffer_using_solver_ )
+      acc_buffer_cg_ = boost::shared_array< cuSenseBufferCg<float,2> >(new cuSenseBufferCg<float,2>[slices_*sets_]);
+    else
+      acc_buffer_ = boost::shared_array< cuSenseBuffer<float,2> >(new cuSenseBuffer<float,2>[slices_*sets_]);
+    
+    // Allocate remaining shared_arrays
+    //
+    
+    csm_host_ = boost::shared_array< hoNDArray<float_complext> >(new hoNDArray<float_complext>[slices_*sets_]);
+    reg_host_ = boost::shared_array< hoNDArray<float_complext> >(new hoNDArray<float_complext>[slices_*sets_]);
+
+    host_weights_recon_ = boost::shared_array< hoNDArray<float> >(new hoNDArray<float>[slices_*sets_]);
+
+    if( !csm_host_.get() || !reg_host_.get() || !host_weights_recon_ ){
+      GADGET_DEBUG1("Failed to allocate host memory (3)\n");
+      return GADGET_FAIL;
+    }
+
+    return GADGET_OK;
+  }
+
+  int gpuRetroGatedSensePrepGadget::
+  process(GadgetContainerMessage<ISMRMRD::AcquisitionHeader> *m1,
+          GadgetContainerMessage< hoNDArray< std::complex<float> > > *m2)
+  {
+    // Noise should have been consumed by the noise adjust (if in the gadget chain)
+    //
+    
+    bool is_noise = m1->getObjectPtr()->isFlagSet(ISMRMRD::ISMRMRD_ACQ_IS_NOISE_MEASUREMENT);
+    if (is_noise) { 
+      m1->release();
+      return GADGET_OK;
+    }
+
+    unsigned int slice = m1->getObjectPtr()->idx.slice;
+    unsigned int set = m1->getObjectPtr()->idx.set;
+
+    unsigned int profile = m1->getObjectPtr()->idx.kspace_encode_step_1;
+
+    unsigned int current_timestamp = m1->getObjectPtr()->physiology_time_stamp[phys_time_index_];
+    unsigned int previous_timestamp = previous_timestamp_[set*slices_+slice];
+    
+    bool new_cardiac_cycle_detected = (current_timestamp < previous_timestamp);
+
+    previous_timestamp_[set*slices_+slice] = current_timestamp;
+
+    if( !Rw_reached_[set*slices_+slice] && !new_cardiac_cycle_detected ){ 
+      Rw_offset_[set*slices_+slice]++;
+      m1->release();
+      return GADGET_OK;
+    }
+
+    if( !Rw_reached_[set*slices_+slice] && new_cardiac_cycle_detected ){ 
+      Rw_reached_[set*slices_+slice] = true;
+      profiles_counter_global_[set*slices_+slice] = Rw_offset_[set*slices_+slice];
+      new_cardiac_cycle_detected = false;
+    }
+
+    boost::shared_ptr<GPUTimer> process_timer;
+    if( output_timing_ )
+      process_timer = boost::shared_ptr<GPUTimer>( new GPUTimer("gpuRetroGatedSensePrepGadget::process()") );
+
+    // Get a pointer to the accumulation buffer. 
+    //
+
+    cuSenseBuffer<float,2> *acc_buffer = (buffer_using_solver_) ? &acc_buffer_cg_[set*slices_+slice] : &acc_buffer_[set*slices_+slice];
+
+    // Have the imaging plane changed?
+    //
+
+    if( !vec_equal(position_[set*slices_+slice], m1->getObjectPtr()->position) ||
+        !vec_equal(read_dir_[set*slices_+slice], m1->getObjectPtr()->read_dir) || 
+        !vec_equal(phase_dir_[set*slices_+slice], m1->getObjectPtr()->phase_dir) ||
+        !vec_equal(slice_dir_[set*slices_+slice], m1->getObjectPtr()->slice_dir) ){
+      
+      // Yes indeed, clear the accumulation buffer
+      acc_buffer->clear();
+      buffer_update_needed_[set*slices_+slice] = true;
+      
+      memcpy(position_[set*slices_+slice],m1->getObjectPtr()->position,3*sizeof(float));
+      memcpy(read_dir_[set*slices_+slice],m1->getObjectPtr()->read_dir,3*sizeof(float));
+      memcpy(phase_dir_[set*slices_+slice],m1->getObjectPtr()->phase_dir,3*sizeof(float));
+      memcpy(slice_dir_[set*slices_+slice],m1->getObjectPtr()->slice_dir,3*sizeof(float));
+    }
+    
+    // Only when the first profile arrives, do we know the #samples/profile
+    //
+
+    if( samples_per_profile_ == -1 )      
+      samples_per_profile_ = m1->getObjectPtr()->number_of_samples;
+    
+    if( samples_per_profile_ != m1->getObjectPtr()->number_of_samples ){
+      GADGET_DEBUG1("Unexpected change in the incoming profiles' lengths\n");
+      return GADGET_FAIL;
+    }
+    
+    // Reconfigure at first pass
+    // - or if the number of coil changes
+    // - or if the reconfigure_ flag is set
+
+    if( num_coils_[set*slices_+slice] != m1->getObjectPtr()->active_channels ){
+      GADGET_DEBUG1("Reconfiguring due to change in the number of coils\n");
+      num_coils_[set*slices_+slice] = m1->getObjectPtr()->active_channels;
+      reconfigure(set, slice);
+    }
+
+    if( reconfigure_[set*slices_+slice] ){
+      GADGET_DEBUG1("Reconfiguring due to boolean indicator\n");
+      reconfigure(set, slice);
+    }
+
+    // Enqueue profile
+    // - if 'new_cardiac_cycle_detected' the current profile does not
+    //   belong to the current cardiac cycle and we delay enqueing
+    //
+
+    buffer_profiles_queue_[set*slices_+slice].enqueue_tail(duplicate_profile(m2));
+    
+    if( !new_cardiac_cycle_detected ) {
+      if( recon_profiles_queue_[set*slices_+slice].message_count() == 0 ){
+        first_profile_acq_time_[set*slices_+slice] = m1->getObjectPtr()->acquisition_time_stamp;
+        first_profile_phys_time_[set*slices_+slice] = m1->getObjectPtr()->physiology_time_stamp[phys_time_index_];
+      }
+      recon_profiles_queue_[set*slices_+slice].enqueue_tail(duplicate_profile(m2));
+    }
+    
+    // If the profile is the last of a "buffer frame" 
+    // - then update the accumulation buffer
+    //
+    
+    bool is_last_profile_in_buffer_frame = 
+      ( buffer_profiles_queue_[set*slices_+slice].message_count() == profiles_per_buffer_frame_ );
+    
+    if( is_last_profile_in_buffer_frame ){
+      
+      // Extract this frame's samples to update the csm/regularization buffer
+      //
+      
+      boost::shared_ptr< hoNDArray<float_complext> > host_samples = 
+        extract_samples_from_buffer_queue( set, slice );
+      
+      if( host_samples.get() == 0x0 ){
+        GADGET_DEBUG1("Failed to extract buffer samples from queue\n");
+        return GADGET_FAIL;
+      }
+      
+      cuNDArray<float_complext> samples( host_samples.get() );
+      
+      long profile_offset = profiles_counter_global_[set*slices_+slice];
+      boost::shared_ptr< cuNDArray<floatd2> > traj = calculate_trajectory_for_buffer(profile_offset, set, slice);
+      
+      buffer_update_needed_[set*slices_+slice] |= acc_buffer->add_frame_data( &samples, traj.get() );
+    }
+    
+    // Perform reconstruction if it is time...
+    //
+      
+    if( new_cardiac_cycle_detected ){
+      
+      // Prepare the image headers for the reconstruction
+      //
+      
+      boost::shared_array<ISMRMRD::ImageHeader> headers( new ISMRMRD::ImageHeader[frames_per_cardiac_cycle_] );
+      
+      for( unsigned int i=0; i<frames_per_cardiac_cycle_; i++ ){
+        
+        ISMRMRD::AcquisitionHeader *base_head = m1->getObjectPtr();
+        ISMRMRD::ImageHeader *header = &headers[i];
+        
+        {
+          // Initialize header to all zeroes (there is a few fields we do not set yet)
+          ISMRMRD::ImageHeader tmp;
+          *header = tmp;
+        }
+        
+        header->version = base_head->version;
+        
+        header->matrix_size[0] = image_dimensions_recon_[0];
+        header->matrix_size[1] = image_dimensions_recon_[1];
+        header->matrix_size[2] = 1;
+        
+        header->field_of_view[0] = fov_[0];
+        header->field_of_view[1] = fov_[1];
+        header->field_of_view[2] = fov_[2];
+        
+        header->channels = num_coils_[set*slices_+slice];
+        header->slice = base_head->idx.slice;
+        header->set = base_head->idx.set;
+        
+        header->acquisition_time_stamp = 
+          first_profile_acq_time_[set*slices_+slice] + 
+          i*(base_head->acquisition_time_stamp-first_profile_acq_time_[set*slices_+slice])/frames_per_cardiac_cycle_;
+
+        header->physiology_time_stamp[phys_time_index_] = 
+          first_profile_phys_time_[set*slices_+slice] + 
+          i*(base_head->physiology_time_stamp[phys_time_index_]-first_profile_phys_time_[set*slices_+slice])/frames_per_cardiac_cycle_;
+
+        memcpy(header->position, base_head->position, sizeof(float)*3);
+        memcpy(header->read_dir, base_head->read_dir, sizeof(float)*3);
+        memcpy(header->phase_dir, base_head->phase_dir, sizeof(float)*3);
+        memcpy(header->slice_dir, base_head->slice_dir, sizeof(float)*3);
+        memcpy(header->patient_table_position, base_head->patient_table_position, sizeof(float)*3);
+        
+        header->data_type = ISMRMRD::ISMRMRD_CXFLOAT;
+        header->image_index = image_counter_[set*slices_+slice]++; 
+        header->image_series_index = set*slices_+slice;        
+      }
+      
+      // Update csm and regularization images
+      //
+
+      if( buffer_update_needed_[set*slices_+slice] || 
+          csm_host_[set*slices_+slice].get_number_of_elements() == 0 || 
+          reg_host_[set*slices_+slice].get_number_of_elements() == 0 ) {
+
+        // Get the accumulated coil images
+        //
+        
+        boost::shared_ptr< cuNDArray<float_complext> > csm_data = acc_buffer->get_accumulated_coil_images();
+        
+        if( !csm_data.get() ){
+          GADGET_DEBUG1("Error during accumulation buffer computation\n");
+          return GADGET_FAIL;
+        }            
+	
+        // Estimate CSM
+        //
+
+        boost::shared_ptr< cuNDArray<float_complext> > csm = estimate_b1_map<float,2>( csm_data.get() );
+
+        if( !csm.get() ){
+          GADGET_DEBUG1("Error during coil estimation\n");
+          return GADGET_FAIL;
+        }            
+      
+        acc_buffer->set_csm(csm);
+        csm_host_[set*slices_+slice] = *(csm->to_host());
+      
+        // Compute regularization image
+        //
+
+        boost::shared_ptr< cuNDArray<float_complext> > reg_image;
+	
+        if( buffer_using_solver_ ){
+          ((cuSenseBufferCg<float,2>*)acc_buffer)->preprocess( calculate_trajectory_for_rhs( profiles_counter_global_[set*slices_+slice], set, slice).get() );
+        }
+      
+        reg_image = acc_buffer->get_combined_coil_image();
+        
+        if( !reg_image.get() ){
+          GADGET_DEBUG1("Error computing regularization image\n");
+          return GADGET_FAIL;
+        }            
+	
+        reg_host_[set*slices_+slice] = *(reg_image->to_host());
+        
+        /*
+          static int counter = 0;
+          char filename[256];
+          sprintf((char*)filename, "reg_%d.real", counter);
+          write_nd_array<float>( abs(&reg_host_[set*slices_+slice]).get(), filename );
+          counter++;  */
+
+        buffer_update_needed_[set*slices_+slice] = false;        
+      }
+
+      // Prepare data array of the profiles for the downstream reconstruction
+      //
+      
+      boost::shared_ptr< hoNDArray<float_complext> > samples_host( new hoNDArray<float_complext>() );
+      boost::shared_ptr< hoNDArray<floatd2> > traj_host( new hoNDArray<floatd2> );
+
+      if( extract_samples_and_trajectory_from_recon_queue( set, slice, samples_host, traj_host ) != GADGET_OK ){
+        GADGET_DEBUG1("Failed to extract samples and/or trajectories.\n");
+        return GADGET_FAIL;
+      }        
+      
+      // Set up Sense job
+      //
+
+      GadgetContainerMessage< GenericReconJob >* m4 = new GadgetContainerMessage< GenericReconJob >();
+	
+      m4->getObjectPtr()->dat_host_ = samples_host;
+      m4->getObjectPtr()->tra_host_ = traj_host;
+      m4->getObjectPtr()->dcw_host_ = boost::shared_ptr< hoNDArray<float> >(new hoNDArray<float>(host_weights_recon_[set*slices_+slice]));
+      m4->getObjectPtr()->csm_host_ = boost::shared_ptr< hoNDArray<float_complext> >( new hoNDArray<float_complext>(csm_host_[set*slices_+slice]));
+      m4->getObjectPtr()->reg_host_ = boost::shared_ptr< hoNDArray<float_complext> >( new hoNDArray<float_complext>(reg_host_[set*slices_+slice]));
+      m4->getObjectPtr()->image_headers_ = headers;
+      
+      // The Sense Job needs an image header as well. 
+      // Let us just copy the initial one...
+      //
+
+      GadgetContainerMessage<ISMRMRD::ImageHeader> *m3 = new GadgetContainerMessage<ISMRMRD::ImageHeader>;
+      *m3->getObjectPtr() = m4->getObjectPtr()->image_headers_[0];
+      m3->cont(m4);
+      
+      if (this->next()->putq(m3) < 0) {
+        GADGET_DEBUG1("Failed to put job on queue.\n");
+        m3->release();
+        return GADGET_FAIL;
+      }
+    }
+    
+    // This is was first profile of a new cardiac cycle, enqueue (since this was postponed above).
+    //
+
+    if( new_cardiac_cycle_detected ){      
+      if( recon_profiles_queue_[set*slices_+slice].message_count() == 0 ){
+        first_profile_acq_time_[set*slices_+slice] = m1->getObjectPtr()->acquisition_time_stamp;
+        first_profile_phys_time_[set*slices_+slice] = m1->getObjectPtr()->physiology_time_stamp[phys_time_index_];
+      }
+      recon_profiles_queue_[set*slices_+slice].enqueue_tail(duplicate_profile(m2)); 
+    }
+    
+    profiles_counter_global_[set*slices_+slice]++;
+
+    if( output_timing_ )
+      process_timer.reset();
+    
+    m1->release(); // the internal queues hold copies
+    return GADGET_OK;
+  }
+  
+  int
+  gpuRetroGatedSensePrepGadget::calculate_density_compensation_for_reconstruction( unsigned int set, unsigned int slice )
+  {
+    switch(mode_){
+      
+    case 2:
+    case 3:
+      host_weights_recon_[set*slices_+slice] = *compute_radial_dcw_golden_ratio_2d<float>
+        ( samples_per_profile_, profiles_per_frame_, oversampling_factor_, 
+          1.0f/(float(samples_per_profile_)/float(image_dimensions_recon_[0])), 0,
+          (mode_==2) ? GR_ORIGINAL : GR_SMALLEST )->to_host();
+      break;
+      
+    default:
+      GADGET_DEBUG1("Illegal dcw mode\n");
+      return GADGET_FAIL;
+      break;
+    }
+    return GADGET_OK;
+  }
+  
+  boost::shared_ptr< cuNDArray<floatd2> > 
+  gpuRetroGatedSensePrepGadget::calculate_trajectory_for_buffer( long profile_offset, unsigned int set, unsigned int slice )
+  {
+    boost::shared_ptr< cuNDArray<floatd2> > result;
+
+    switch(mode_){
+
+    case 2:
+    case 3:
+      { 
+
+        long first_profile_in_buffer = profile_offset + 1 - profiles_per_buffer_frame_;
+
+        result = compute_radial_trajectory_golden_ratio_2d<float>
+          ( samples_per_profile_, profiles_per_buffer_frame_, 1, first_profile_in_buffer, (mode_==2) ? GR_ORIGINAL : GR_SMALLEST );
+
+      }
+      break;	
+	
+    default:
+      GADGET_DEBUG1("Illegal trajectory mode\n");
+      break;
+    }
+    
+    return result;
+  }
+
+  boost::shared_ptr< cuNDArray<float> >
+  gpuRetroGatedSensePrepGadget::calculate_density_compensation_for_buffer( unsigned int set, unsigned int slice )
+  {    
+    switch(mode_){
+      
+    case 2:
+    case 3:
+      return compute_radial_dcw_golden_ratio_2d<float>
+        ( samples_per_profile_, profiles_per_buffer_frame_, oversampling_factor_, 
+          1.0f/(float(samples_per_profile_)/float(image_dimensions_recon_[0])), 0,
+          (mode_==2) ? GR_ORIGINAL : GR_SMALLEST );
+      break;
+      
+    default:
+      GADGET_DEBUG1("Illegal dcw mode\n");
+      return boost::shared_ptr< cuNDArray<float> >();
+      break;
+    }   
+  }
+
+
+  boost::shared_ptr< cuNDArray<floatd2> > 
+  gpuRetroGatedSensePrepGadget::calculate_trajectory_for_rhs( long profile_offset, unsigned int set, unsigned int slice )
+  {
+    switch(mode_){
+
+    case 2:
+    case 3:
+      { 
+
+        long first_profile =
+          std::max( 0L, profile_offset + 1 - profiles_per_buffer_frame_*num_buffer_frames_inner_ );
+
+        return compute_radial_trajectory_golden_ratio_2d<float>
+          ( samples_per_profile_, profiles_per_buffer_frame_*num_buffer_frames_inner_, 1, first_profile, (mode_==2) ? GR_ORIGINAL : GR_SMALLEST );
+      }
+      break;	
+	
+    default:
+      GADGET_DEBUG1("Illegal trajectory mode\n");
+      return boost::shared_ptr< cuNDArray<floatd2> >();
+      break;
+    }
+  }
+  
+  boost::shared_ptr< cuNDArray<float> >
+  gpuRetroGatedSensePrepGadget::calculate_density_compensation_for_rhs( unsigned int set, unsigned int slice )
+  {
+    switch(mode_){
+      
+    case 2:
+    case 3:
+      {
+
+        long num_profiles = profiles_per_buffer_frame_*num_buffer_frames_inner_;
+
+        return compute_radial_dcw_golden_ratio_2d<float>
+          ( samples_per_profile_, num_profiles, oversampling_factor_, 
+            1.0f/(float(samples_per_profile_)/float(image_dimensions_recon_[0])), 0,
+            (mode_==2) ? GR_ORIGINAL : GR_SMALLEST );
+
+      }
+      break;
+      
+    default:
+      GADGET_DEBUG1("Illegal dcw mode\n");
+      return boost::shared_ptr< cuNDArray<float> >();
+      break;
+    }
+  }
+
+  boost::shared_ptr< hoNDArray<float_complext> > 
+  gpuRetroGatedSensePrepGadget::extract_samples_from_buffer_queue( unsigned int set, unsigned int slice )
+  {    
+    ACE_Message_Queue<ACE_MT_SYNCH> *queue = &buffer_profiles_queue_[set*slices_+slice];
+
+    unsigned int profiles_buffered = queue->message_count();
+    
+    std::vector<size_t> dims;
+    dims.push_back(samples_per_profile_*profiles_buffered);
+    dims.push_back(num_coils_[set*slices_+slice]);
+    
+    boost::shared_ptr< hoNDArray<float_complext> > host_samples(new hoNDArray<float_complext>(&dims));
+    
+    for (unsigned int p=0; p<profiles_buffered; p++) {
+
+      ACE_Message_Block* mbq;
+      if (queue->dequeue_head(mbq) < 0) {
+        GADGET_DEBUG1("Message dequeue failed\n");
+        return boost::shared_ptr< hoNDArray<float_complext> >();
+      }
+      
+      GadgetContainerMessage< hoNDArray< std::complex<float> > > *daq = AsContainerMessage<hoNDArray< std::complex<float> > >(mbq);
+	
+      if (!daq) {
+        GADGET_DEBUG1("Unable to interpret data on message queue\n");
+        return boost::shared_ptr< hoNDArray<float_complext> >();
+      }
+	
+      for (unsigned int c = 0; c < num_coils_[set*slices_+slice]; c++) {
+	
+        float_complext *data_ptr = host_samples->get_data_ptr();
+        data_ptr += c*samples_per_profile_*profiles_buffered+p*samples_per_profile_;
+	    
+        std::complex<float> *r_ptr = daq->getObjectPtr()->get_data_ptr();
+        r_ptr += c*daq->getObjectPtr()->get_size(0);
+	  
+        memcpy(data_ptr,r_ptr,samples_per_profile_*sizeof(float_complext));
+      }
+      
+      mbq->release();
+    } 
+    
+    return host_samples;
+  }
+  
+  int gpuRetroGatedSensePrepGadget::extract_samples_and_trajectory_from_recon_queue
+  ( unsigned int set, unsigned int slice, boost::shared_ptr< hoNDArray<float_complext> > samples, boost::shared_ptr< hoNDArray<floatd2> > trajectory )
+  {    
+    // Extract samples from queue and put into buffer 
+    //
+
+    ACE_Message_Queue<ACE_MT_SYNCH> *queue = &recon_profiles_queue_[set*slices_+slice];
+    long profiles_buffered = queue->message_count();
+    
+    std::vector<size_t> dims_per_readout;
+    dims_per_readout.push_back(samples_per_profile_);
+    dims_per_readout.push_back(num_coils_[set*slices_+slice]);
+    
+    std::vector<size_t> dims_for_buffer = dims_per_readout;
+    dims_for_buffer.push_back(profiles_buffered);
+    
+    hoNDArray< std::complex<float> > host_buffer(&dims_for_buffer);
+
+    for (long p=0; p<profiles_buffered; p++) {
+      
+      ACE_Message_Block* mbq;
+      if (queue->dequeue_head(mbq) < 0) {
+        GADGET_DEBUG1("Message dequeue failed\n");
+        return GADGET_FAIL;
+      }
+      
+      GadgetContainerMessage< hoNDArray< std::complex<float> > > *daq = 
+        AsContainerMessage<hoNDArray< std::complex<float> > >(mbq);
+	
+      if (!daq) {
+        GADGET_DEBUG1("Unable to interpret data on message queue\n");
+        return GADGET_FAIL;
+      }
+
+      {
+        // Copy daq into host_buffer array
+        hoNDArray< std::complex<float> > tmp( &dims_per_readout, host_buffer.get_data_ptr() + p*dims_per_readout[0]*dims_per_readout[1] );
+        if( !tmp.dimensions_equal( daq->getObjectPtr()->get_dimensions().get() )){
+          GADGET_DEBUG1("Unexpected dimensionality of array on message queue\n");
+          return GADGET_FAIL;
+        }
+        tmp = *daq->getObjectPtr();
+      }
+      mbq->release();
+    } 
+
+    // Create trajectory array according to the samples buffer
+    //
+
+    long first_profile_in_buffer = 
+      profiles_counter_global_[set*slices_+slice] - profiles_buffered;
+    
+    boost::shared_ptr< hoNDArray<floatd2> > host_traj = compute_radial_trajectory_golden_ratio_2d<float>
+      ( samples_per_profile_, profiles_buffered, 1, first_profile_in_buffer, (mode_==2) ? GR_ORIGINAL : GR_SMALLEST )->to_host();
+
+    host_traj->squeeze();
+
+    // Prepare samples and trajecotry arrays according to the current 
+    // 'profiles_per_frame_' and 'frames_per_cardiac_cycle_' settings
+    //
+    
+    std::vector<size_t> recon_dims;
+    recon_dims.push_back(samples_per_profile_*profiles_per_frame_*frames_per_cardiac_cycle_);
+    recon_dims.push_back(num_coils_[set*slices_+slice]);
+    
+    std::vector<size_t> traj_dims_frame;
+    traj_dims_frame.push_back( samples_per_profile_*profiles_per_frame_ );
+
+    std::vector<size_t> traj_dims = traj_dims_frame;
+    traj_dims.push_back( frames_per_cardiac_cycle_ );
+
+    samples->create( recon_dims );
+    trajectory->create( traj_dims );
+    
+    for( long frame=0; frame<frames_per_cardiac_cycle_; frame++ ){
+      
+      long first_profile = 
+        (long)(float(frame)*float(profiles_buffered-profiles_per_frame_)/float(frames_per_cardiac_cycle_-1));
+      // Just to be sure we do run get out-of-bounds due to rounding errors in the float<->int math
+      //
+
+      if( first_profile < 0 ){
+        GADGET_DEBUG1("\nWARNING: first profile is negative. Corrected.");
+        first_profile = 0;
+      }
+
+      if (first_profile + profiles_per_frame_ - 1  > profiles_buffered -1 ){
+        GADGET_DEBUG1("\nWARNING: first profile is out of bounds for the last profile. Corrected.");
+        first_profile = profiles_buffered - profiles_per_frame_;
+      }
+
+      //printf( "\nFor frame %ld: The first profile has index %ld (of %ld).", frame, first_profile, profiles_buffered );
+        
+      for( long coil=0; coil<num_coils_[set*slices_+slice]; coil++ ){
+        
+        for( long profile = 0; profile<profiles_per_frame_; profile++ ){
+
+          // Copy samples for profile
+          //
+
+          memcpy( samples->get_data_ptr() + 
+                  coil*samples_per_profile_*profiles_per_frame_*frames_per_cardiac_cycle_ +
+                  frame*samples_per_profile_*profiles_per_frame_ + 
+                  profile*samples_per_profile_,
+                  
+                  host_buffer.get_data_ptr() + 
+                  (first_profile + profile) * samples_per_profile_*num_coils_[set*slices_+slice]+
+                  coil*samples_per_profile_,
+                  
+                  sizeof(std::complex<float>)*samples_per_profile_);
+          
+          // Copy trajectory for profile
+          //
+
+          memcpy( trajectory->get_data_ptr() + 
+                  frame*samples_per_profile_*profiles_per_frame_ + 
+                  profile*samples_per_profile_,
+                  
+                  host_traj->get_data_ptr() + 
+                  (first_profile + profile) * samples_per_profile_,
+                  
+                  sizeof(floatd2)*samples_per_profile_);
+        }
+      }
+    }
+    return GADGET_OK;
+  }
+  
+  GadgetContainerMessage< hoNDArray< std::complex<float> > >*
+  gpuRetroGatedSensePrepGadget::duplicate_profile( GadgetContainerMessage< hoNDArray< std::complex<float> > > *profile )
+  {
+    GadgetContainerMessage< hoNDArray< std::complex<float> > > *copy = 
+      new GadgetContainerMessage< hoNDArray< std::complex<float> > >();
+    
+    *copy->getObjectPtr() = *profile->getObjectPtr();
+    
+    return copy;
+  }
+
+  void gpuRetroGatedSensePrepGadget::reconfigure(unsigned int set, unsigned int slice)
+  {    
+    calculate_density_compensation_for_reconstruction(set, slice);
+    
+    cuSenseBuffer<float,2> *acc_buffer = (buffer_using_solver_) ? &acc_buffer_cg_[set*slices_+slice] : &acc_buffer_[set*slices_+slice];
+
+    acc_buffer->setup( from_std_vector<size_t,2>(image_dimensions_recon_), image_dimensions_recon_os_, 
+                       kernel_width_, num_coils_[set*slices_+slice],                        
+                       num_buffer_frames_outer_, num_buffer_frames_inner_ );
+    
+    boost::shared_ptr< cuNDArray<float> > device_weights = calculate_density_compensation_for_buffer(set, slice);
+    acc_buffer->set_dcw(device_weights);
+
+    if( buffer_using_solver_ ){
+      ((cuSenseBufferCg<float,2>*) acc_buffer)->set_dcw_for_rhs(calculate_density_compensation_for_rhs(set, slice));
+      ((cuSenseBufferCg<float,2>*) acc_buffer)->preprocess(calculate_trajectory_for_rhs(0, set, slice).get());
+    }
+    
+    reconfigure_[set*slices_+slice] = false;
+  }
+
+  GADGET_FACTORY_DECLARE(gpuRetroGatedSensePrepGadget)
+}
diff --git a/gadgets/radial/gpuRetroGatedSensePrepGadget.h b/gadgets/radial/gpuRetroGatedSensePrepGadget.h
new file mode 100644
index 0000000..4cda67c
--- /dev/null
+++ b/gadgets/radial/gpuRetroGatedSensePrepGadget.h
@@ -0,0 +1,133 @@
+#pragma once
+
+#include "gadgetron_radial_export.h"
+#include "Gadget.h"
+#include "GadgetMRIHeaders.h"
+#include "hoNDArray.h"
+#include "vector_td.h"
+#include "cuNFFT.h"
+#include "cuCgPreconditioner.h"
+#include "cuSenseBufferCg.h"
+
+#include <ismrmrd/ismrmrd.h>
+#include <complex>
+#include <boost/shared_ptr.hpp>
+#include <boost/shared_array.hpp>
+
+/*
+  Prep gadget for retrospectively gated Sense based on golden ratio sampling.
+  Thus only radial modes 2-3 are supported.  
+*/
+
+namespace Gadgetron{
+
+  class EXPORTGADGETS_RADIAL gpuRetroGatedSensePrepGadget :
+    public Gadget2< ISMRMRD::AcquisitionHeader, hoNDArray< std::complex<float> > >
+  {
+
+  public:
+    GADGET_DECLARE(gpuRetroGatedSensePrepGadget);
+    gpuRetroGatedSensePrepGadget();
+    virtual ~gpuRetroGatedSensePrepGadget();
+
+  protected:
+    
+    virtual int process_config(ACE_Message_Block *mb);
+
+    virtual int process(GadgetContainerMessage< ISMRMRD::AcquisitionHeader > *m1,
+			GadgetContainerMessage< hoNDArray< std::complex<float> > > *m2);
+
+  private:
+
+    inline bool vec_equal(float *in1, float *in2) {
+      for (unsigned int i = 0; i < 3; i++) {
+        if (in1[i] != in2[i]) return false;
+      }
+      return true;
+    }
+    
+    boost::shared_array<bool> reconfigure_;
+    virtual void reconfigure(unsigned int set, unsigned int slice);
+
+    GadgetContainerMessage< hoNDArray< std::complex<float> > >*
+      duplicate_profile( GadgetContainerMessage< hoNDArray< std::complex<float> > > *profile );
+
+    boost::shared_ptr< hoNDArray<float_complext> > extract_samples_from_buffer_queue( unsigned int set, unsigned int slice );
+
+    int extract_samples_and_trajectory_from_recon_queue
+      ( unsigned int set, unsigned int slice, boost::shared_ptr< hoNDArray<float_complext> > samples, boost::shared_ptr< hoNDArray<floatd2> > trajectory );
+
+    int calculate_density_compensation_for_reconstruction(unsigned int set, unsigned int slice);
+
+    boost::shared_ptr< cuNDArray<floatd2> > 
+      calculate_trajectory_for_buffer(long profile_offset, unsigned int set, unsigned int slice);
+
+    boost::shared_ptr< cuNDArray<float> >
+      calculate_density_compensation_for_buffer(unsigned int set, unsigned int slice);
+
+    boost::shared_ptr< cuNDArray<floatd2> > 
+      calculate_trajectory_for_rhs(long profile_offset, unsigned int set, unsigned int slice);
+
+    boost::shared_ptr< cuNDArray<float> > 
+      calculate_density_compensation_for_rhs(unsigned int set, unsigned int slice);
+
+    int slices_;
+    int sets_;
+    int device_number_;
+    int mode_;
+
+    unsigned short phys_time_index_;
+
+    long samples_per_profile_;
+    long profiles_per_frame_;
+    long frames_per_cardiac_cycle_;
+
+    // The number of buffer cycles
+    long profiles_per_buffer_frame_;
+    long num_buffer_frames_inner_; 
+    long num_buffer_frames_outer_;
+
+    // Internal book-keeping
+    boost::shared_array<unsigned int> first_profile_acq_time_;
+    boost::shared_array<unsigned int> first_profile_phys_time_;
+    boost::shared_array<unsigned int> previous_timestamp_;
+    boost::shared_array<long> profiles_counter_global_;
+
+    // We will discard profiles until the first R-wave is encountered
+    boost::shared_array<bool> Rw_reached_;
+    boost::shared_array<unsigned int> Rw_offset_;
+
+    // For the buffer
+    float kernel_width_;
+    float oversampling_factor_;
+
+    boost::shared_array<long> image_counter_;
+    boost::shared_array<unsigned int> num_coils_;
+
+    boost::shared_array<float[3]> position_;
+    boost::shared_array<float[3]> read_dir_;
+    boost::shared_array<float[3]> phase_dir_;
+    boost::shared_array<float[3]> slice_dir_;
+
+    bool output_timing_;
+    bool buffer_using_solver_;
+
+    boost::shared_array<bool> buffer_update_needed_;
+
+    boost::shared_array< hoNDArray<float> > host_weights_recon_;
+    
+    boost::shared_array< hoNDArray<float_complext> > csm_host_;
+    boost::shared_array< hoNDArray<float_complext> > reg_host_;
+    
+    boost::shared_array< cuSenseBuffer<float,2> > acc_buffer_;
+    boost::shared_array< cuSenseBufferCg<float,2> > acc_buffer_cg_;
+
+    std::vector<size_t> fov_;
+    std::vector<size_t> image_dimensions_;
+    std::vector<size_t> image_dimensions_recon_;
+    uint64d2 image_dimensions_recon_os_;
+
+    boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> > buffer_profiles_queue_;
+    boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> > recon_profiles_queue_;
+  };
+}
diff --git a/gadgets/sense/CMakeLists.txt b/gadgets/sense/CMakeLists.txt
deleted file mode 100644
index 3728dad..0000000
--- a/gadgets/sense/CMakeLists.txt
+++ /dev/null
@@ -1,43 +0,0 @@
-if (WIN32)
-  ADD_DEFINITIONS(-D__BUILD_GADGETRON_GPUSENSE__)
-endif (WIN32)
-
-find_package(Ismrmrd REQUIRED)
-
-include_directories(   
-  ${CMAKE_SOURCE_DIR}/gadgets/mri_core
-  ${CMAKE_SOURCE_DIR}/gadgets/sense
-  ${CMAKE_SOURCE_DIR}/toolboxes/mri/pmri/gpu
-  ${CMAKE_SOURCE_DIR}/toolboxes/nfft/gpu
-  ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu
-  ${CMAKE_SOURCE_DIR}/toolboxes/core/gpu
-  ${CMAKE_SOURCE_DIR}/toolboxes/operators
-  ${CMAKE_SOURCE_DIR}/toolboxes/operators/gpu
-  ${CMAKE_SOURCE_DIR}/toolboxes/solvers
-  ${CMAKE_SOURCE_DIR}/toolboxes/solvers/gpu
-  ${HDF5_INCLUDE_DIR}
-  ${HDF5_INCLUDE_DIR}/cpp
-  ${ISMRMRD_XSD_INCLUDE_DIR}
-)
-
-add_library(gadgetron_gpusense SHARED 
-  gpuCgSenseGadget.cpp 
-  gpuCgKtSenseGadget.cpp 
-  gpuSbSenseGadget.cpp 
-  gpuGenericSensePrepGadget.cpp
-  ${ISMRMRD_XSD_SOURCE}
-  )
-
-target_link_libraries(gadgetron_gpusense 
-  cpucore gpucore gpusolvers gpuoperators gpuparallelmri 
-  ${Boost_LIBRARIES} ${ISMRMRD_LIBRARIES} ${XERCESC_LIBRARIES} ${FFTW3_LIBRARIES} ${CUDA_LIBRARIES} 
-  optimized ${ACE_LIBRARIES} debug ${ACE_DEBUG_LIBRARY}
-  )
-
-install (TARGETS gadgetron_gpusense DESTINATION lib)
-
-install (FILES 
-  SenseJob.h
-  DESTINATION include)
-
-add_subdirectory(config)
diff --git a/gadgets/sense/SenseJob.h b/gadgets/sense/SenseJob.h
deleted file mode 100644
index 169a6ec..0000000
--- a/gadgets/sense/SenseJob.h
+++ /dev/null
@@ -1,27 +0,0 @@
-#pragma once
-
-#include "hoNDArray.h"
-#include "vector_td.h"
-
-#include <ismrmrd.h>
-#include <boost/shared_ptr.hpp>
-#include <boost/shared_array.hpp>
-
-namespace Gadgetron{
-  
-  class SenseJob
-  {
-  public:
-    
-    SenseJob() {}
-    ~SenseJob() {}
-
-    boost::shared_array<ISMRMRD::ImageHeader> image_headers_;
-
-    boost::shared_ptr< hoNDArray<float_complext> >  dat_host_;
-    boost::shared_ptr< hoNDArray<floatd2>        >  tra_host_;
-    boost::shared_ptr< hoNDArray<float>          >  dcw_host_;
-    boost::shared_ptr< hoNDArray<float_complext> >  csm_host_;
-    boost::shared_ptr< hoNDArray<float_complext> >  reg_host_;
-  };
-}
diff --git a/gadgets/sense/config/CMakeLists.txt b/gadgets/sense/config/CMakeLists.txt
deleted file mode 100644
index fec681a..0000000
--- a/gadgets/sense/config/CMakeLists.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-if (ARMADILLO_FOUND)
-  install (FILES 
-    generic_gpusense_cg.xml 
-    generic_gpusense_cg_singleshot.xml 
-    generic_gpusense_sb_singleshot.xml 
-    generic_gpu_ktsense_singleshot.xml 
-    DESTINATION config)
-elseif (ARMADILLO_FOUND)
-  MESSAGE("Armadillo not found, only unoptimized generic trajectory config files will be available")
-endif (ARMADILLO_FOUND)
-
-#install (FILES 
-#  generic_gpusense_cg_unoptimized.xml 
-#  generic_gpusense_sb_unoptimized.xml 
-#  DESTINATION config)
diff --git a/gadgets/sense/config/generic_gpu_ktsense_singleshot.xml b/gadgets/sense/config/generic_gpu_ktsense_singleshot.xml
deleted file mode 100644
index f3fe738..0000000
--- a/gadgets/sense/config/generic_gpu_ktsense_singleshot.xml
+++ /dev/null
@@ -1,115 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
-			      xmlns="http://gadgetron.sf.net/gadgetron"
-			      xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-  
-  <reader>
-    <slot>1008</slot>
-    <dll>gadgetron_mricore</dll>
-    <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
-  </reader>
-  
-  <writer>
-    <slot>1004</slot>
-    <dll>gadgetron_mricore</dll>
-    <classname>MRIImageWriterCPLX</classname>
-  </writer>
-  <writer>
-    <slot>1005</slot>
-    <dll>gadgetron_mricore</dll>
-    <classname>MRIImageWriterFLOAT</classname>
-  </writer>
-  <writer>
-    <slot>1006</slot>
-    <dll>gadgetron_mricore</dll>
-    <classname>MRIImageWriterUSHORT</classname>
-  </writer>
-
-  <gadget>
-    <name>NoiseAdjust</name>
-    <dll>gadgetron_mricore</dll>
-    <classname>NoiseAdjustGadget</classname>
-  </gadget>
-
-  <gadget>
-    <name>PCA</name>
-    <dll>gadgetron_mricore</dll>
-    <classname>PCACoilGadget</classname>
-  </gadget>
-  
-  <gadget>
-    <name>CoilReduction</name>
-    <dll>gadgetron_mricore</dll>
-    <classname>CoilReductionGadget</classname>
-    <property><name>coils_out</name><value>16</value></property>
-  </gadget>
-
-  <gadget>
-    <name>gpuGenericSensePrepGadget</name>
-    <dll>gadgetron_gpusense</dll>
-    <classname>gpuGenericSensePrepGadget</classname>
-      <property><name>deviceno</name><value>0</value></property>
-      <property><name>readouts_per_frame</name><value>1</value></property>
-      <property><name>frames_per_rotation</name><value>1</value></property>
-      <property><name>rotations_per_reconstruction</name><value>50</value></property>
-      <property><name>buffer_convolution_kernel_width</name><value>5.5</value></property>
-      <property><name>buffer_convolution_oversampling_factor</name><value>1.25</value></property>
-      <property><name>reconstruction_os_factor_x</name><value>1.5</value></property>
-      <property><name>reconstruction_os_factor_y</name><value>1.5</value></property>
-  </gadget>
-  
-  <gadget>
-    <name>gpuCgKtSenseGadget_slice0</name>
-    <dll>gadgetron_gpusense</dll>
-    <classname>gpuCgKtSenseGadget</classname>
-    <property><name>pass_on_undesired_data</name><value>true</value></property>
-    <property><name>deviceno</name>                <value>0</value></property>
-    <property><name>number_of_iterations</name>    <value>50</value></property>
-    <property><name>cg_limit</name>                <value>1e-6</value></property>
-    <property><name>oversampling_factor</name>     <value>1.25</value></property>
-    <property><name>kernel_width</name>            <value>5.5</value></property>
-    <property><name>kappa</name>                   <value>0.1</value></property>
-    <property><name>output_convergence</name>      <value>true</value></property>
-  </gadget>
-  
-  <gadget>
-    <name>Extract</name>
-    <dll>gadgetron_mricore</dll>
-    <classname>ExtractGadget</classname>
-  </gadget>
-  
-  <!--
-      <gadget>
-      <name>ImageWrite</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageWriterGadgetFLOAT</classname>
-      </gadget>
-  -->    
-  
-  <gadget>
-    <name>AutoScaleGadget</name>
-    <dll>gadgetron_mricore</dll>
-    <classname>AutoScaleGadget</classname>
-  </gadget> 
-
-  <gadget>
-    <name>FloatToShort</name>
-    <dll>gadgetron_mricore</dll>
-    <classname>FloatToUShortGadget</classname>
-  </gadget>
-  
-  <!--
-      <gadget>
-      <name>ImageFinishCPLX</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageFinishGadgetCPLX</classname>
-      </gadget>
-  -->
-  
-  <gadget>
-    <name>ImageFinishShort</name>
-    <dll>gadgetron_mricore</dll>
-    <classname>ImageFinishGadgetUSHORT</classname>
-  </gadget>    
-
-</gadgetronStreamConfiguration>
diff --git a/gadgets/sense/config/generic_gpusense_cg.xml b/gadgets/sense/config/generic_gpusense_cg.xml
deleted file mode 100644
index f2e2975..0000000
--- a/gadgets/sense/config/generic_gpusense_cg.xml
+++ /dev/null
@@ -1,113 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
-			      xmlns="http://gadgetron.sf.net/gadgetron"
-			      xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-  
-  <reader>
-    <slot>1008</slot>
-    <dll>gadgetron_mricore</dll>
-    <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
-  </reader>
-  
-  <writer>
-    <slot>1004</slot>
-    <dll>gadgetron_mricore</dll>
-    <classname>MRIImageWriterCPLX</classname>
-  </writer>
-  <writer>
-    <slot>1005</slot>
-    <dll>gadgetron_mricore</dll>
-    <classname>MRIImageWriterFLOAT</classname>
-  </writer>
-  <writer>
-    <slot>1006</slot>
-    <dll>gadgetron_mricore</dll>
-    <classname>MRIImageWriterUSHORT</classname>
-  </writer>
-
-  <gadget>
-    <name>NoiseAdjust</name>
-    <dll>gadgetron_mricore</dll>
-    <classname>NoiseAdjustGadget</classname>
-  </gadget>
-  
-  <gadget>
-    <name>PCA</name>
-    <dll>gadgetron_mricore</dll>
-    <classname>PCACoilGadget</classname>
-  </gadget>
-  
-  <gadget>
-    <name>CoilReduction</name>
-    <dll>gadgetron_mricore</dll>
-    <classname>CoilReductionGadget</classname>
-    <property><name>coils_out</name><value>16</value></property>
-  </gadget>
-
-  <gadget>
-    <name>gpuGenericSensePrepGadget</name>
-    <dll>gadgetron_gpusense</dll>
-    <classname>gpuGenericSensePrepGadget</classname>
-      <property><name>deviceno</name><value>0</value></property>
-      <property><name>rotations_per_reconstruction</name><value>4</value></property>
-      <property><name>buffer_convolution_kernel_width</name><value>5.5</value></property>
-      <property><name>buffer_convolution_oversampling_factor</name><value>1.25</value></property>
-      <property><name>reconstruction_os_factor_x</name><value>1.5</value></property>
-      <property><name>reconstruction_os_factor_y</name><value>1.5</value></property>
-  </gadget>
-  
-  <gadget>
-    <name>gpuCgSenseGadget</name>
-    <dll>gadgetron_gpusense</dll>
-    <classname>gpuCgSenseGadget</classname>
-    <property><name>pass_on_undesired_data</name>  <value>true</value></property>
-    <property><name>deviceno</name>                <value>0</value></property>
-    <property><name>number_of_iterations</name>    <value>30</value></property>
-    <property><name>cg_limit</name>                <value>1e-6</value></property>
-    <property><name>oversampling_factor</name>     <value>1.25</value></property>
-    <property><name>kernel_width</name>            <value>5.5</value></property>
-    <property><name>kappa</name>                   <value>0.1</value></property>
-    <property><name>output_convergence</name><value>true</value></property>
-  </gadget>
-
-  <gadget>
-    <name>Extract</name>
-    <dll>gadgetron_mricore</dll>
-    <classname>ExtractGadget</classname>
-  </gadget>
-  
-  <!--
-      <gadget>
-      <name>ImageWrite</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageWriterGadgetFLOAT</classname>
-      </gadget>
-  -->    
-  
-  <gadget>
-    <name>AutoScaleGadget</name>
-    <dll>gadgetron_mricore</dll>
-    <classname>AutoScaleGadget</classname>
-  </gadget> 
-
-  <gadget>
-    <name>FloatToShort</name>
-    <dll>gadgetron_mricore</dll>
-    <classname>FloatToUShortGadget</classname>
-  </gadget>
-  
-  <!--
-      <gadget>
-      <name>ImageFinishCPLX</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageFinishGadgetCPLX</classname>
-      </gadget>
-  -->
-  
-  <gadget>
-    <name>ImageFinishShort</name>
-    <dll>gadgetron_mricore</dll>
-    <classname>ImageFinishGadgetUSHORT</classname>
-  </gadget>    
-
-</gadgetronStreamConfiguration>
diff --git a/gadgets/sense/config/generic_gpusense_cg_singleshot.xml b/gadgets/sense/config/generic_gpusense_cg_singleshot.xml
deleted file mode 100644
index acd38e2..0000000
--- a/gadgets/sense/config/generic_gpusense_cg_singleshot.xml
+++ /dev/null
@@ -1,115 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
-			      xmlns="http://gadgetron.sf.net/gadgetron"
-			      xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-  
-  <reader>
-    <slot>1008</slot>
-    <dll>gadgetron_mricore</dll>
-    <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
-  </reader>
-  
-  <writer>
-    <slot>1004</slot>
-    <dll>gadgetron_mricore</dll>
-    <classname>MRIImageWriterCPLX</classname>
-  </writer>
-  <writer>
-    <slot>1005</slot>
-    <dll>gadgetron_mricore</dll>
-    <classname>MRIImageWriterFLOAT</classname>
-  </writer>
-  <writer>
-    <slot>1006</slot>
-    <dll>gadgetron_mricore</dll>
-    <classname>MRIImageWriterUSHORT</classname>
-  </writer>
-
-  <gadget>
-    <name>NoiseAdjust</name>
-    <dll>gadgetron_mricore</dll>
-    <classname>NoiseAdjustGadget</classname>
-  </gadget>
-
-  <gadget>
-    <name>PCA</name>
-    <dll>gadgetron_mricore</dll>
-    <classname>PCACoilGadget</classname>
-  </gadget>
-  
-  <gadget>
-    <name>CoilReduction</name>
-    <dll>gadgetron_mricore</dll>
-    <classname>CoilReductionGadget</classname>
-    <property><name>coils_out</name><value>16</value></property>
-  </gadget>
-
-  <gadget>
-    <name>gpuGenericSensePrepGadget</name>
-    <dll>gadgetron_gpusense</dll>
-    <classname>gpuGenericSensePrepGadget</classname>
-      <property><name>deviceno</name><value>0</value></property>
-      <property><name>readouts_per_frame</name><value>1</value></property>
-      <property><name>frames_per_rotation</name><value>1</value></property>
-      <property><name>rotations_per_reconstruction</name><value>50</value></property>
-      <property><name>buffer_convolution_kernel_width</name><value>5.5</value></property>
-      <property><name>buffer_convolution_oversampling_factor</name><value>1.25</value></property>
-      <property><name>reconstruction_os_factor_x</name><value>1.5</value></property>
-      <property><name>reconstruction_os_factor_y</name><value>1.5</value></property>
-  </gadget>
-  
-  <gadget>
-    <name>gpuCgSenseGadget</name>
-    <dll>gadgetron_gpusense</dll>
-    <classname>gpuCgSenseGadget</classname>
-    <property><name>pass_on_undesired_data</name>  <value>true</value></property>
-    <property><name>deviceno</name>                <value>0</value></property>
-    <property><name>number_of_iterations</name>    <value>30</value></property>
-    <property><name>cg_limit</name>                <value>1e-6</value></property>
-    <property><name>oversampling_factor</name>     <value>1.25</value></property>
-    <property><name>kernel_width</name>            <value>5.5</value></property>
-    <property><name>kappa</name>                   <value>0.1</value></property>
-    <property><name>output_convergence</name><value>true</value></property>
-  </gadget>
-  
-  <gadget>
-    <name>Extract</name>
-    <dll>gadgetron_mricore</dll>
-    <classname>ExtractGadget</classname>
-  </gadget>
-  
-  <!--
-      <gadget>
-      <name>ImageWrite</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageWriterGadgetFLOAT</classname>
-      </gadget>
-  -->    
-  
-  <gadget>
-    <name>AutoScaleGadget</name>
-    <dll>gadgetron_mricore</dll>
-    <classname>AutoScaleGadget</classname>
-  </gadget> 
-
-  <gadget>
-    <name>FloatToShort</name>
-    <dll>gadgetron_mricore</dll>
-    <classname>FloatToUShortGadget</classname>
-  </gadget>
-  
-  <!--
-      <gadget>
-      <name>ImageFinishCPLX</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageFinishGadgetCPLX</classname>
-      </gadget>
-  -->
-  
-  <gadget>
-    <name>ImageFinishShort</name>
-    <dll>gadgetron_mricore</dll>
-    <classname>ImageFinishGadgetUSHORT</classname>
-  </gadget>    
-
-</gadgetronStreamConfiguration>
diff --git a/gadgets/sense/config/generic_gpusense_sb_singleshot.xml b/gadgets/sense/config/generic_gpusense_sb_singleshot.xml
deleted file mode 100644
index 5dbae87..0000000
--- a/gadgets/sense/config/generic_gpusense_sb_singleshot.xml
+++ /dev/null
@@ -1,119 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
-			      xmlns="http://gadgetron.sf.net/gadgetron"
-			      xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-  
-  <reader>
-    <slot>1008</slot>
-    <dll>gadgetron_mricore</dll>
-    <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
-  </reader>
-  
-  <writer>
-    <slot>1004</slot>
-    <dll>gadgetron_mricore</dll>
-    <classname>MRIImageWriterCPLX</classname>
-  </writer>
-  <writer>
-    <slot>1005</slot>
-    <dll>gadgetron_mricore</dll>
-    <classname>MRIImageWriterFLOAT</classname>
-  </writer>
-  <writer>
-    <slot>1006</slot>
-    <dll>gadgetron_mricore</dll>
-    <classname>MRIImageWriterUSHORT</classname>
-  </writer>
-
-  <gadget>
-    <name>NoiseAdjust</name>
-    <dll>gadgetron_mricore</dll>
-    <classname>NoiseAdjustGadget</classname>
-  </gadget>
-
-  <gadget>
-    <name>PCA</name>
-    <dll>gadgetron_mricore</dll>
-    <classname>PCACoilGadget</classname>
-  </gadget>
-  
-  <gadget>
-    <name>CoilReduction</name>
-    <dll>gadgetron_mricore</dll>
-    <classname>CoilReductionGadget</classname>
-    <property><name>coils_out</name><value>16</value></property>
-  </gadget>
-
-  <gadget>
-    <name>gpuGenericSensePrepGadget</name>
-    <dll>gadgetron_gpusense</dll>
-    <classname>gpuGenericSensePrepGadget</classname>
-      <property><name>deviceno</name><value>0</value></property>
-      <property><name>readouts_per_frame</name><value>1</value></property>
-      <property><name>frames_per_rotation</name><value>1</value></property>
-      <property><name>rotations_per_reconstruction</name><value>50</value></property>
-      <property><name>buffer_using_solver</name><value>true</value></property>
-      <property><name>buffer_convolution_kernel_width</name><value>5.5</value></property>
-      <property><name>buffer_convolution_oversampling_factor</name><value>1.25</value></property>
-      <property><name>reconstruction_os_factor_x</name><value>1.5</value></property>
-      <property><name>reconstruction_os_factor_y</name><value>1.5</value></property>
-  </gadget>
-  
-    <gadget>
-      <name>gpuSbSenseGadget_slice0</name>
-      <dll>gadgetron_gpusense</dll>
-      <classname>gpuSbSenseGadget</classname>
-      <property><name>pass_on_undesired_data</name>  <value>true</value></property>
-      <property><name>deviceno</name>                <value>0</value></property>
-      <property><name>number_of_sb_iterations</name> <value>20</value></property>
-      <property><name>number_of_cg_iterations</name> <value>10</value></property>
-      <property><name>cg_limit</name>                <value>1e-6</value></property>
-      <property><name>oversampling_factor</name>     <value>1.25</value></property>
-      <property><name>kernel_width</name>            <value>5.5</value></property>
-      <property><name>mu</name>                      <value>0.1</value></property>
-      <property><name>lambda</name>                  <value>0.2</value></property>
-      <property><name>alpha</name>                   <value>0.5</value></property>
-      <property><name>output_convergence</name><value>true</value></property>
-    </gadget>
-  
-  <gadget>
-    <name>Extract</name>
-    <dll>gadgetron_mricore</dll>
-    <classname>ExtractGadget</classname>
-  </gadget>
-  
-  <!--
-      <gadget>
-      <name>ImageWrite</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageWriterGadgetFLOAT</classname>
-      </gadget>
-  -->    
-  
-  <gadget>
-    <name>AutoScaleGadget</name>
-    <dll>gadgetron_mricore</dll>
-    <classname>AutoScaleGadget</classname>
-  </gadget> 
-
-  <gadget>
-    <name>FloatToShort</name>
-    <dll>gadgetron_mricore</dll>
-    <classname>FloatToUShortGadget</classname>
-  </gadget>
-  
-  <!--
-      <gadget>
-      <name>ImageFinishCPLX</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageFinishGadgetCPLX</classname>
-      </gadget>
-  -->
-  
-  <gadget>
-    <name>ImageFinishShort</name>
-    <dll>gadgetron_mricore</dll>
-    <classname>ImageFinishGadgetUSHORT</classname>
-  </gadget>    
-
-</gadgetronStreamConfiguration>
diff --git a/gadgets/sense/gadgetron_gpusense_export.h b/gadgets/sense/gadgetron_gpusense_export.h
deleted file mode 100644
index 7957ab5..0000000
--- a/gadgets/sense/gadgetron_gpusense_export.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef GADGETRON_GPUSENSE_EXPORT_H_
-#define GADGETRON_GPUSENSE_EXPORT_H_
-
-#if defined (WIN32)
-#if defined (__BUILD_GADGETRON_GPUSENSE__)
-#define EXPORTGADGETS_GPUSENSE __declspec(dllexport)
-#else
-#define EXPORTGADGETS_GPUSENSE __declspec(dllimport)
-#endif
-#else
-#define EXPORTGADGETS_GPUSENSE
-#endif
-
-#endif /* GADGETRON_GPUSENSE_EXPORT_H_ */
diff --git a/gadgets/sense/gpuCgKtSenseGadget.cpp b/gadgets/sense/gpuCgKtSenseGadget.cpp
deleted file mode 100644
index 78c7d58..0000000
--- a/gadgets/sense/gpuCgKtSenseGadget.cpp
+++ /dev/null
@@ -1,370 +0,0 @@
-#include "gpuCgKtSenseGadget.h"
-#include "cuNDArray_operators.h"
-#include "cuNDArray_elemwise.h"
-#include "cuNDArray_blas.h"
-#include "cuNDArray_utils.h"
-#include "cuNDArray_reductions.h"
-#include "cuNDFFT.h"
-#include "Gadgetron.h"
-#include "GadgetMRIHeaders.h"
-#include "b1_map.h"
-#include "GPUTimer.h"
-#include "GadgetIsmrmrdReadWrite.h"
-#include "vector_td_utilities.h"
-
-//#include "hoNDArray_fileio.h"
-
-namespace Gadgetron{
-
-  gpuCgKtSenseGadget::gpuCgKtSenseGadget()
-    : is_configured_(false)
-    , channels_(0)
-    , frame_counter_(0)
-  {
-    set_parameter(std::string("deviceno").c_str(), "0");
-    set_parameter(std::string("setno").c_str(), "0");
-    set_parameter(std::string("sliceno").c_str(), "0");
-    set_parameter(std::string("number_of_iterations").c_str(), "5");
-    set_parameter(std::string("cg_limit").c_str(), "1e-6");
-    set_parameter(std::string("oversampling_factor").c_str(), "1.25");
-    set_parameter(std::string("kernel_width").c_str(), "5.5");
-    set_parameter(std::string("kappa").c_str(), "0.3");
-    
-    matrix_size_ = uint64d2(0,0);
-    matrix_size_os_ = uint64d2(0,0);
-    matrix_size_seq_ = uint64d2(0,0);
-  }
-
-  gpuCgKtSenseGadget::~gpuCgKtSenseGadget() {}
-
-  int gpuCgKtSenseGadget::process_config( ACE_Message_Block* mb )
-  {
-    //GADGET_DEBUG1("gpuCgKtSenseGadget::process_config\n");
-
-    device_number_ = get_int_value(std::string("deviceno").c_str());
-
-    int number_of_devices = 0;
-    if (cudaGetDeviceCount(&number_of_devices)!= cudaSuccess) {
-      GADGET_DEBUG1( "Error: unable to query number of CUDA devices.\n" );
-      return GADGET_FAIL;
-    }
-
-    if (number_of_devices == 0) {
-      GADGET_DEBUG1( "Error: No available CUDA devices.\n" );
-      return GADGET_FAIL;
-    }
-
-    if (device_number_ >= number_of_devices) {
-      GADGET_DEBUG2("Adjusting device number from %d to %d\n", device_number_,  (device_number_%number_of_devices));
-      device_number_ = (device_number_%number_of_devices);
-    }
-
-    if (cudaSetDevice(device_number_)!= cudaSuccess) {
-      GADGET_DEBUG1( "Error: unable to set CUDA device.\n" );
-      return GADGET_FAIL;
-    }
-
-    pass_on_undesired_data_ = get_bool_value(std::string("pass_on_undesired_data").c_str());
-    set_number_ = get_int_value(std::string("setno").c_str());
-    slice_number_ = get_int_value(std::string("sliceno").c_str());
-    number_of_iterations_ = get_int_value(std::string("number_of_iterations").c_str());
-    cg_limit_ = get_double_value(std::string("cg_limit").c_str());
-    oversampling_factor_ = get_double_value(std::string("oversampling_factor").c_str());
-    kernel_width_ = get_double_value(std::string("kernel_width").c_str());
-    kappa_ = get_double_value(std::string("kappa").c_str());
-    shutter_radius_ = get_double_value(std::string("training_data_shutter_radius").c_str());
-    rotations_to_discard_ = get_int_value(std::string("rotations_to_discard").c_str());
-    output_convergence_ = get_bool_value(std::string("output_convergence").c_str());
-
-    if( (rotations_to_discard_%2) == 1 ){
-      GADGET_DEBUG1("#rotations to discard must be even.\n");
-      return GADGET_FAIL;
-    }
-
-    boost::shared_ptr<ISMRMRD::ismrmrdHeader> cfg = parseIsmrmrdXMLHeader(std::string(mb->rd_ptr()));
-
-    std::vector<long> dims;
-    ISMRMRD::ismrmrdHeader::encoding_sequence e_seq = cfg->encoding();
-    if (e_seq.size() != 1) {
-      GADGET_DEBUG2("Number of encoding spaces: %d\n", e_seq.size());
-      GADGET_DEBUG1("This Gadget only supports one encoding space\n");
-      return GADGET_FAIL;
-    }
-
-    //ISMRMRD::encodingSpaceType e_space = (*e_seq.begin()).encodedSpace();
-    ISMRMRD::encodingSpaceType r_space = (*e_seq.begin()).reconSpace();
-    //ISMRMRD::encodingLimitsType e_limits = (*e_seq.begin()).encodingLimits();
-
-    matrix_size_seq_ = uint64d2( r_space.matrixSize().x(), r_space.matrixSize().y() );
-
-    if (!is_configured_) {
-
-      channels_ = cfg->acquisitionSystemInformation().present() ?
-	(cfg->acquisitionSystemInformation().get().receiverChannels().present() ? cfg->acquisitionSystemInformation().get().receiverChannels().get() : 1) : 1;
-
-      // Allocate encoding operator for non-Cartesian Sense
-      E_ = boost::shared_ptr< cuNonCartesianKtSenseOperator<float,2> >( new cuNonCartesianKtSenseOperator<float,2>() );
-
-      // Allocate preconditioner
-      D_ = boost::shared_ptr< cuCgPreconditioner<float_complext> >( new cuCgPreconditioner<float_complext>() );
-
-      // Allocate regularization image operator
-      R_ = boost::shared_ptr< cuImageOperator<float_complext> >( new cuImageOperator<float_complext>() );
-      R_->set_weight( kappa_ );
-
-      // Setup solver
-      cg_.set_encoding_operator( E_ );        // encoding matrix
-      cg_.add_regularization_operator( R_ );  // regularization matrix
-      cg_.set_preconditioner( D_ );           // preconditioning matrix
-      cg_.set_max_iterations( number_of_iterations_ );
-      cg_.set_tc_tolerance( cg_limit_ );
-      cg_.set_output_mode( (output_convergence_) ? cuCgSolver<float_complext>::OUTPUT_VERBOSE : cuCgSolver<float_complext>::OUTPUT_SILENT );
-
-      is_configured_ = true;
-    }
-
-    return GADGET_OK;
-  }
-
-  int gpuCgKtSenseGadget::process(GadgetContainerMessage<ISMRMRD::ImageHeader> *m1, GadgetContainerMessage<SenseJob> *m2)
-  {
-    // Is this data for this gadget's set/slice?
-    //
-    
-    if( m1->getObjectPtr()->set != set_number_ || m1->getObjectPtr()->slice != slice_number_ ) {      
-      // No, pass it downstream...
-      return this->next()->putq(m1);
-    }
-    
-    //GADGET_DEBUG1("gpuCgKtSenseGadget::process\n");
-    //GPUTimer timer("gpuCgKtSenseGadget::process");
-
-    if (!is_configured_) {
-      GADGET_DEBUG1("Data received before configuration was completed\n");
-      return GADGET_FAIL;
-    }
-
-    SenseJob* j = m2->getObjectPtr();
-
-    // Some basic validation of the incoming Sense job
-    if (!j->csm_host_.get() || !j->dat_host_.get() || !j->tra_host_.get() || !j->dcw_host_.get()) {
-      GADGET_DEBUG1("Received an incomplete Sense job\n");
-      return GADGET_FAIL;
-    }
-
-    unsigned int samples = j->dat_host_->get_size(0);
-    unsigned int channels = j->dat_host_->get_size(1);
-    unsigned int rotations = samples / j->tra_host_->get_number_of_elements();
-    unsigned int frames = j->tra_host_->get_size(1)*rotations;
-
-    if( samples%j->tra_host_->get_number_of_elements() ) {
-      GADGET_DEBUG2("Mismatch between number of samples (%d) and number of k-space coordinates (%d).\nThe first should be a multiplum of the latter.\n", 
-		    samples, j->tra_host_->get_number_of_elements());
-      return GADGET_FAIL;
-    }
-
-    boost::shared_ptr< cuNDArray<floatd2> > traj(new cuNDArray<floatd2> (j->tra_host_.get()));
-    boost::shared_ptr< cuNDArray<float> > dcw(new cuNDArray<float> (j->dcw_host_.get()));
-    boost::shared_ptr< cuNDArray<float_complext> > csm(new cuNDArray<float_complext> (j->csm_host_.get()));
-    boost::shared_ptr< cuNDArray<float_complext> > device_samples(new cuNDArray<float_complext> (j->dat_host_.get()));
-
-    cudaDeviceProp deviceProp;
-    if( cudaGetDeviceProperties( &deviceProp, device_number_ ) != cudaSuccess) {
-      GADGET_DEBUG1( "Error: unable to query device properties.\n" );
-      return GADGET_FAIL;
-    }
-    
-    unsigned int warp_size = deviceProp.warpSize;
-    
-    matrix_size_ = uint64d2( j->reg_host_->get_size(0), j->reg_host_->get_size(1) );    
-
-    matrix_size_os_ =
-      uint64d2(((static_cast<unsigned int>(std::ceil(matrix_size_[0]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size,
-	     ((static_cast<unsigned int>(std::ceil(matrix_size_[1]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size);
-    
-    GADGET_DEBUG2("Matrix size    : [%d,%d] \n", matrix_size_[0], matrix_size_[1]);    
-    GADGET_DEBUG2("Matrix size OS : [%d,%d] \n", matrix_size_os_[0], matrix_size_os_[1]);
-
-    std::vector<size_t> image_dims = to_std_vector(matrix_size_);
-    image_dims.push_back(frames);
-    
-    E_->set_domain_dimensions(&image_dims);
-    E_->set_codomain_dimensions(device_samples->get_dimensions().get());
-    E_->set_dcw(dcw);
-    E_->set_csm(csm);
-
-    E_->setup( matrix_size_, matrix_size_os_, static_cast<float>(kernel_width_) );
-    E_->preprocess(traj.get());
-        
-    R_->compute(compute_regularization_image(j).get());
-
-    // Define preconditioning weights
-    boost::shared_ptr< cuNDArray<float> > __precon_weights = sum(abs_square(csm.get()).get(), 2);
-    boost::shared_ptr< cuNDArray<float> > _precon_weights = expand<float>( __precon_weights.get(), frames );
-    boost::shared_ptr<cuNDArray<float> > R_diag = R_->get();
-    *R_diag *= float(kappa_);
-    *_precon_weights += *R_diag;
-    R_diag.reset();
-    reciprocal_sqrt_inplace(_precon_weights.get());	
-    boost::shared_ptr< cuNDArray<float_complext> > precon_weights = real_to_complex<float_complext>( _precon_weights.get() );
-    __precon_weights.reset(); _precon_weights.reset();
-    D_->set_weights( precon_weights );
-	
-    // Invoke solver
-    // 
-
-    boost::shared_ptr< cuNDArray<float_complext> > cgresult;
-    
-    {
-      GPUTimer timer("gpuCgKtSenseGadget::solve()");
-      cgresult = cg_.solve(device_samples.get());
-    }
-
-    if (!cgresult.get()) {
-      GADGET_DEBUG1("Iterative_sense_compute failed\n");
-      return GADGET_FAIL;
-    }
-
-    // Goto from x-f to x-t space
-    cuNDFFT<float>::instance()->fft( cgresult.get(), 2 );
-
-    /*
-    static int counter = 0;
-    char filename[256];
-    sprintf((char*)filename, "recon_%d.real", counter);
-    write_nd_array<float>( abs(cgresult.get())->to_host().get(), filename );
-    counter++; */
-
-    // If the recon matrix size exceeds the sequence matrix size then crop
-    if( matrix_size_seq_ != matrix_size_ )
-      cgresult = crop<float_complext,2>( (matrix_size_-matrix_size_seq_)>>1, matrix_size_seq_, cgresult.get() );    
-    
-    // Now pass on the reconstructed images
-    //
-
-    unsigned int frames_per_rotation = frames/rotations;
-
-    if( rotations == 1 ){ // this is the case for golden ratio
-      rotations = frames;
-      frames_per_rotation = 1;
-    }
-
-    for( unsigned int frame=0; frame<frames; frame++ ){
-
-      unsigned int rotation_idx = frame/frames_per_rotation;
-
-      // Check if we should discard this frame
-      if( rotation_idx < (rotations_to_discard_>>1) || rotation_idx >= rotations-(rotations_to_discard_>>1) )
-	continue;
-            
-      GadgetContainerMessage<ISMRMRD::ImageHeader> *m = 
-	new GadgetContainerMessage<ISMRMRD::ImageHeader>();
-
-      GadgetContainerMessage< hoNDArray< std::complex<float> > > *cm = 
-	new GadgetContainerMessage< hoNDArray< std::complex<float> > >();      
-
-      *m->getObjectPtr() = j->image_headers_[frame];
-      m->cont(cm);
-      
-      std::vector<size_t> img_dims(2);
-      img_dims[0] = matrix_size_seq_[0];
-      img_dims[1] = matrix_size_seq_[1];
-
-      cm->getObjectPtr()->create(&img_dims);
-
-      size_t data_length = prod(matrix_size_seq_);
-
-      cudaMemcpy(cm->getObjectPtr()->get_data_ptr(),
-		 cgresult->get_data_ptr()+frame*data_length,
-		 data_length*sizeof(std::complex<float>),
-		 cudaMemcpyDeviceToHost);
-
-      cudaError_t err = cudaGetLastError();
-      if( err != cudaSuccess ){
-	GADGET_DEBUG2("Unable to copy result from device to host: %s\n", cudaGetErrorString(err));
-	m->release();
-	return GADGET_FAIL;
-      }
-
-      m->getObjectPtr()->matrix_size[0] = matrix_size_seq_[0];
-      m->getObjectPtr()->matrix_size[1] = matrix_size_seq_[1];
-      m->getObjectPtr()->matrix_size[2] = 1;
-      m->getObjectPtr()->channels       = 1;
-      m->getObjectPtr()->image_index    = frame_counter_ + frame;
-      
-      if (this->next()->putq(m) < 0) {
-	GADGET_DEBUG1("Failed to put result image on to queue\n");
-	m->release();
-	return GADGET_FAIL;
-      }
-    }
-    
-    frame_counter_ += frames;
-
-    m1->release();
-    return GADGET_OK;
-  }
-
-  boost::shared_ptr< cuNDArray<float_complext> > gpuCgKtSenseGadget::
-  compute_regularization_image( SenseJob *job )
-  {
-    // 
-    // Estimate training data
-    // 
-
-    unsigned int num_samples = job->dat_host_->get_size(0);
-    unsigned int num_coils = job->dat_host_->get_size(1);
-    unsigned int num_rotations = num_samples / job->tra_host_->get_number_of_elements();
-    unsigned int frames_per_reconstruction = job->tra_host_->get_size(1)*num_rotations;
-
-    std::vector<size_t> dims = to_std_vector(matrix_size_os_);
-    dims.push_back(frames_per_reconstruction); 
-    dims.push_back(num_coils); 
-
-    cuNDArray<float_complext> image_os(&dims);    
-    cuNDArray<float_complext> data((job->dat_host_).get());
-    cuNDArray<float> dcw((job->dcw_host_).get());
-  
-    // Convolve to Cartesian k-space
-    //
-
-    E_->get_plan()->convolve( &data, &image_os, &dcw, cuNFFT_plan<float,2>::NFFT_CONV_NC2C );
-
-    // Apply shutter
-    //
-
-    if( shutter_radius_ < 0.0001 ){ // If not specified in the configuration then try to make an estimation
-
-      // #profiles/frame : this is just an estimate (we dont have the exact value at this stage)
-      unsigned int profiles_per_frame = num_samples / (frames_per_reconstruction*matrix_size_os_[0]);
-      shutter_radius_ = ((float)matrix_size_os_[0]/(float)matrix_size_[0])*(float)profiles_per_frame/(float)M_PI;
-      GADGET_DEBUG2("Estimated training data shutter radius: %f\n", shutter_radius_);
-    }
-
-    fill_border<float_complext,2>( shutter_radius_, &image_os );
-    E_->get_plan()->fft( &image_os, cuNFFT_plan<float,2>::NFFT_BACKWARDS );
-    E_->get_plan()->deapodize( &image_os );
-
-    // Remove oversampling
-    //
-
-    dims = to_std_vector(matrix_size_);
-    dims.push_back(frames_per_reconstruction); 
-    dims.push_back(num_coils);
-    cuNDArray<float_complext> image(&dims);
-    crop<float_complext,2>( (matrix_size_os_-matrix_size_)>>1, &image_os, &image );
-
-    // Compute regularization image
-    //
-
-    dims.pop_back();
-    boost::shared_ptr< cuNDArray<float_complext> > reg_image( new cuNDArray<float_complext>(&dims) );
-
-    E_->mult_csm_conj_sum( &image, reg_image.get() );
-    cuNDFFT<float>::instance()->ifft( reg_image.get(), 2, true );
-
-    return reg_image;
-  }
-
-  GADGET_FACTORY_DECLARE(gpuCgKtSenseGadget)
-}
diff --git a/gadgets/sense/gpuCgKtSenseGadget.h b/gadgets/sense/gpuCgKtSenseGadget.h
deleted file mode 100644
index f2e06b6..0000000
--- a/gadgets/sense/gpuCgKtSenseGadget.h
+++ /dev/null
@@ -1,71 +0,0 @@
-#ifndef gpuCgKtSenseGadget_H
-#define gpuCgKtSenseGadget_H
-#pragma once
-
-#include "gadgetron_gpusense_export.h"
-#include "Gadget.h"
-#include "SenseJob.h"
-#include "GadgetMRIHeaders.h"
-#include "cuCgSolver.h"
-#include "cuNonCartesianKtSenseOperator.h"
-#include "cuCgPreconditioner.h"
-#include "cuNFFT.h"
-#include "cuImageOperator.h"
-
-#include <ismrmrd.h>
-#include <complex>
-
-namespace Gadgetron{
-
-  class EXPORTGADGETS_GPUSENSE gpuCgKtSenseGadget : public Gadget2<ISMRMRD::ImageHeader, SenseJob>
-  {
-
-  public:
-    GADGET_DECLARE(gpuCgKtSenseGadget);
-
-    gpuCgKtSenseGadget();
-    virtual ~gpuCgKtSenseGadget();
-
-  protected:
-
-    virtual int process( GadgetContainerMessage< ISMRMRD::ImageHeader > *m1, GadgetContainerMessage< SenseJob > *m2 );
-    virtual int process_config( ACE_Message_Block* mb );
-
-    boost::shared_ptr< cuNDArray<float_complext> > compute_regularization_image( SenseJob *job );
-
-    int channels_;
-    int device_number_;
-    int set_number_;
-    int slice_number_;
-
-    uint64d2 matrix_size_;
-    uint64d2 matrix_size_os_;
-    uint64d2 matrix_size_seq_;
-
-    unsigned int number_of_iterations_;
-    double cg_limit_;
-    double oversampling_factor_;
-    double kernel_width_;
-    double kappa_;
-    double shutter_radius_;
-    unsigned int rotations_to_discard_;
-
-    bool output_convergence_;
-    bool is_configured_;
-
-    // Define conjugate gradient solver
-    cuCgSolver<float_complext> cg_;
-
-    // Define non-Cartesian Sense Encofing operator
-    boost::shared_ptr< cuNonCartesianKtSenseOperator<float,2> > E_;
-
-    // Define preconditioner
-    boost::shared_ptr< cuCgPreconditioner<float_complext> > D_;
-
-    // Define regularization image operator
-    boost::shared_ptr< cuImageOperator<float_complext> > R_;
-
-    int frame_counter_;
-  };
-}
-#endif //gpuCgKtSenseGadget
diff --git a/gadgets/sense/gpuCgSenseGadget.cpp b/gadgets/sense/gpuCgSenseGadget.cpp
deleted file mode 100644
index b795ddc..0000000
--- a/gadgets/sense/gpuCgSenseGadget.cpp
+++ /dev/null
@@ -1,321 +0,0 @@
-#include "gpuCgSenseGadget.h"
-#include "cuNDArray_operators.h"
-#include "cuNDArray_elemwise.h"
-#include "cuNDArray_blas.h"
-#include "cuNDArray_utils.h"
-#include "cuNDArray_reductions.h"
-#include "Gadgetron.h"
-#include "GadgetMRIHeaders.h"
-#include "b1_map.h"
-#include "GPUTimer.h"
-#include "GadgetIsmrmrdReadWrite.h"
-#include "vector_td_utilities.h"
-#include "hoNDArray_fileio.h"
-
-namespace Gadgetron{
-
-  gpuCgSenseGadget::gpuCgSenseGadget()
-    : is_configured_(false)
-    , channels_(0)
-    , frame_counter_(0)
-    , matrix_size_reported_(0)
-  {
-    set_parameter(std::string("deviceno").c_str(), "0");
-    set_parameter(std::string("setno").c_str(), "0");
-    set_parameter(std::string("sliceno").c_str(), "0");
-    set_parameter(std::string("number_of_iterations").c_str(), "5");
-    set_parameter(std::string("cg_limit").c_str(), "1e-6");
-    set_parameter(std::string("oversampling_factor").c_str(), "1.25");
-    set_parameter(std::string("kernel_width").c_str(), "5.5");
-    set_parameter(std::string("kappa").c_str(), "0.3");
-    
-    matrix_size_ = uint64d2(0,0);
-    matrix_size_os_ = uint64d2(0,0);
-    matrix_size_seq_ = uint64d2(0,0);
-  }
-
-  gpuCgSenseGadget::~gpuCgSenseGadget() {}
-
-  int gpuCgSenseGadget::process_config( ACE_Message_Block* mb )
-  {
-    //GADGET_DEBUG1("gpuCgSenseGadget::process_config\n");
-
-    device_number_ = get_int_value(std::string("deviceno").c_str());
-
-    int number_of_devices = 0;
-    if (cudaGetDeviceCount(&number_of_devices)!= cudaSuccess) {
-      GADGET_DEBUG1( "Error: unable to query number of CUDA devices.\n" );
-      return GADGET_FAIL;
-    }
-
-    if (number_of_devices == 0) {
-      GADGET_DEBUG1( "Error: No available CUDA devices.\n" );
-      return GADGET_FAIL;
-    }
-
-    if (device_number_ >= number_of_devices) {
-      GADGET_DEBUG2("Adjusting device number from %d to %d\n", device_number_,  (device_number_%number_of_devices));
-      device_number_ = (device_number_%number_of_devices);
-    }
-
-    if (cudaSetDevice(device_number_)!= cudaSuccess) {
-      GADGET_DEBUG1( "Error: unable to set CUDA device.\n" );
-      return GADGET_FAIL;
-    }
-
-    pass_on_undesired_data_ = get_bool_value(std::string("pass_on_undesired_data").c_str());
-    set_number_ = get_int_value(std::string("setno").c_str());
-    slice_number_ = get_int_value(std::string("sliceno").c_str());
-    number_of_iterations_ = get_int_value(std::string("number_of_iterations").c_str());
-    cg_limit_ = get_double_value(std::string("cg_limit").c_str());
-    oversampling_factor_ = get_double_value(std::string("oversampling_factor").c_str());
-    kernel_width_ = get_double_value(std::string("kernel_width").c_str());
-    kappa_ = get_double_value(std::string("kappa").c_str());
-    output_convergence_ = get_bool_value(std::string("output_convergence").c_str());
-    output_timing_ = get_bool_value(std::string("output_timing").c_str());
-    rotations_to_discard_ = get_int_value(std::string("rotations_to_discard").c_str());
-
-    if( (rotations_to_discard_%2) == 1 ){
-      GADGET_DEBUG1("#rotations to discard must be even.\n");
-      return GADGET_FAIL;
-    }
-
-    boost::shared_ptr<ISMRMRD::ismrmrdHeader> cfg = parseIsmrmrdXMLHeader(std::string(mb->rd_ptr()));
-
-    std::vector<long> dims;
-    ISMRMRD::ismrmrdHeader::encoding_sequence e_seq = cfg->encoding();
-    if (e_seq.size() != 1) {
-      GADGET_DEBUG2("Number of encoding spaces: %d\n", e_seq.size());
-      GADGET_DEBUG1("This Gadget only supports one encoding space\n");
-      return GADGET_FAIL;
-    }
-
-    //ISMRMRD::encodingSpaceType e_space = (*e_seq.begin()).encodedSpace();
-    ISMRMRD::encodingSpaceType r_space = (*e_seq.begin()).reconSpace();
-    //ISMRMRD::encodingLimitsType e_limits = (*e_seq.begin()).encodingLimits();
-
-    matrix_size_seq_ = uint64d2( r_space.matrixSize().x(), r_space.matrixSize().y() );
-
-    if (!is_configured_) {
-
-      channels_ = cfg->acquisitionSystemInformation().present() ?
-	(cfg->acquisitionSystemInformation().get().receiverChannels().present() ? cfg->acquisitionSystemInformation().get().receiverChannels().get() : 1) : 1;
-
-      // Allocate encoding operator for non-Cartesian Sense
-      E_ = boost::shared_ptr< cuNonCartesianSenseOperator<float,2> >( new cuNonCartesianSenseOperator<float,2>() );
-
-      // Allocate preconditioner
-      D_ = boost::shared_ptr< cuCgPreconditioner<float_complext> >( new cuCgPreconditioner<float_complext>() );
-
-      // Allocate regularization image operator
-      R_ = boost::shared_ptr< cuImageOperator<float_complext> >( new cuImageOperator<float_complext>() );
-      R_->set_weight( kappa_ );
-
-      // Setup solver
-      cg_.set_encoding_operator( E_ );        // encoding matrix
-      cg_.add_regularization_operator( R_ );  // regularization matrix
-      cg_.set_preconditioner( D_ );           // preconditioning matrix
-      cg_.set_max_iterations( number_of_iterations_ );
-      cg_.set_tc_tolerance( cg_limit_ );
-      cg_.set_output_mode( (output_convergence_) ? cuCgSolver<float_complext>::OUTPUT_VERBOSE : cuCgSolver<float_complext>::OUTPUT_SILENT);
-
-      is_configured_ = true;
-    }
-
-    return GADGET_OK;
-  }
-
-  int gpuCgSenseGadget::process(GadgetContainerMessage<ISMRMRD::ImageHeader> *m1, GadgetContainerMessage<SenseJob> *m2)
-  {
-    // Is this data for this gadget's set/slice?
-    //
-    
-    if( m1->getObjectPtr()->set != set_number_ || m1->getObjectPtr()->slice != slice_number_ ) {      
-      // No, pass it downstream...
-      return this->next()->putq(m1);
-    }
-    
-    //GADGET_DEBUG1("gpuCgSenseGadget::process\n");
-
-    boost::shared_ptr<GPUTimer> process_timer;
-    if( output_timing_ )
-      process_timer = boost::shared_ptr<GPUTimer>( new GPUTimer("gpuCgSenseGadget::process()") );
-    
-    if (!is_configured_) {
-      GADGET_DEBUG1("Data received before configuration was completed\n");
-      return GADGET_FAIL;
-    }
-
-    SenseJob* j = m2->getObjectPtr();
-
-    // Some basic validation of the incoming Sense job
-    if (!j->csm_host_.get() || !j->dat_host_.get() || !j->tra_host_.get() || !j->dcw_host_.get() || !j->reg_host_.get()) {
-      GADGET_DEBUG1("Received an incomplete Sense job\n");
-      return GADGET_FAIL;
-    }
-
-    unsigned int samples = j->dat_host_->get_size(0);
-    unsigned int channels = j->dat_host_->get_size(1);
-    unsigned int rotations = samples / j->tra_host_->get_number_of_elements();
-    unsigned int frames = j->tra_host_->get_size(1)*rotations;
-
-    if( samples%j->tra_host_->get_number_of_elements() ) {
-      GADGET_DEBUG2("Mismatch between number of samples (%d) and number of k-space coordinates (%d).\nThe first should be a multiplum of the latter.\n", 
-		    samples, j->tra_host_->get_number_of_elements());
-      return GADGET_FAIL;
-    }
-
-    boost::shared_ptr< cuNDArray<floatd2> > traj(new cuNDArray<floatd2> (j->tra_host_.get()));
-    boost::shared_ptr< cuNDArray<float> > dcw(new cuNDArray<float> (j->dcw_host_.get()));
-    boost::shared_ptr< cuNDArray<float_complext> > csm(new cuNDArray<float_complext> (j->csm_host_.get()));
-    boost::shared_ptr< cuNDArray<float_complext> > device_samples(new cuNDArray<float_complext> (j->dat_host_.get()));
-
-    cudaDeviceProp deviceProp;
-    if( cudaGetDeviceProperties( &deviceProp, device_number_ ) != cudaSuccess) {
-      GADGET_DEBUG1( "Error: unable to query device properties.\n" );
-      return GADGET_FAIL;
-    }
-    
-    unsigned int warp_size = deviceProp.warpSize;
-    
-    matrix_size_ = uint64d2( j->reg_host_->get_size(0), j->reg_host_->get_size(1) );    
-
-    matrix_size_os_ =
-      uint64d2(((static_cast<unsigned int>(std::ceil(matrix_size_[0]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size,
-	     ((static_cast<unsigned int>(std::ceil(matrix_size_[1]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size);
-
-    if( !matrix_size_reported_ ) {
-      GADGET_DEBUG2("Matrix size    : [%d,%d] \n", matrix_size_[0], matrix_size_[1]);    
-      GADGET_DEBUG2("Matrix size OS : [%d,%d] \n", matrix_size_os_[0], matrix_size_os_[1]);
-      matrix_size_reported_ = true;
-    }
-
-    std::vector<size_t> image_dims = to_std_vector(matrix_size_);
-    image_dims.push_back(frames);
-    
-    E_->set_domain_dimensions(&image_dims);
-    E_->set_codomain_dimensions(device_samples->get_dimensions().get());
-    E_->set_dcw(dcw);
-    E_->set_csm(csm);
-
-    E_->setup( matrix_size_, matrix_size_os_, static_cast<float>(kernel_width_) );
-    E_->preprocess(traj.get());
-
-    boost::shared_ptr< cuNDArray<float_complext> > reg_image(new cuNDArray<float_complext> (j->reg_host_.get()));
-    R_->compute(reg_image.get());
-
-    // Define preconditioning weights
-    boost::shared_ptr< cuNDArray<float> > _precon_weights = sum(abs_square(csm.get()).get(), 2);
-    boost::shared_ptr<cuNDArray<float> > R_diag = R_->get();
-    *R_diag *= float(kappa_);
-    *_precon_weights += *R_diag;
-    R_diag.reset();
-    reciprocal_sqrt_inplace(_precon_weights.get());	
-    boost::shared_ptr< cuNDArray<float_complext> > precon_weights = real_to_complex<float_complext>( _precon_weights.get() );
-    _precon_weights.reset();
-    D_->set_weights( precon_weights );
-	
-    // Invoke solver
-    // 
-
-    boost::shared_ptr< cuNDArray<float_complext> > cgresult;
-    
-    {
-      boost::shared_ptr<GPUTimer> solve_timer;
-      if( output_timing_ )
-        solve_timer = boost::shared_ptr<GPUTimer>( new GPUTimer("gpuCgSenseGadget::solve()") );
-      
-      cgresult = cg_.solve(device_samples.get());
-      
-      if( output_timing_ )
-        solve_timer.reset();
-    }
-    
-    if (!cgresult.get()) {
-      GADGET_DEBUG1("Iterative_sense_compute failed\n");
-      return GADGET_FAIL;
-    }
-
-    /*
-      static int counter = 0;
-      char filename[256];
-      sprintf((char*)filename, "recon_%d.real", counter);
-      write_nd_array<float>( abs(cgresult.get())->to_host().get(), filename );
-      counter++; 
-    */
-
-    // If the recon matrix size exceeds the sequence matrix size then crop
-    if( matrix_size_seq_ != matrix_size_ )
-      cgresult = crop<float_complext,2>( (matrix_size_-matrix_size_seq_)>>1, matrix_size_seq_, cgresult.get() );    
-    
-    // Now pass on the reconstructed images
-    //
-
-    unsigned int frames_per_rotation = frames/rotations;
-
-    if( rotations == 1 ){ // this is the case for golden ratio
-      rotations = frames;
-      frames_per_rotation = 1;
-    }
-
-    for( unsigned int frame=0; frame<frames; frame++ ){
-      
-      unsigned int rotation_idx = frame/frames_per_rotation;
-
-      // Check if we should discard this frame
-      if( rotation_idx < (rotations_to_discard_>>1) || rotation_idx >= rotations-(rotations_to_discard_>>1) )
-        continue;
-
-      GadgetContainerMessage<ISMRMRD::ImageHeader> *m = 
-        new GadgetContainerMessage<ISMRMRD::ImageHeader>();
-
-      GadgetContainerMessage< hoNDArray< std::complex<float> > > *cm = 
-        new GadgetContainerMessage< hoNDArray< std::complex<float> > >();      
-      
-      *m->getObjectPtr() = j->image_headers_[frame];
-      m->cont(cm);
-      
-      std::vector<size_t> img_dims(2);
-      img_dims[0] = matrix_size_seq_[0];
-      img_dims[1] = matrix_size_seq_[1];
-
-      cm->getObjectPtr()->create(&img_dims);
-
-      size_t data_length = prod(matrix_size_seq_);
-
-      cudaMemcpy(cm->getObjectPtr()->get_data_ptr(),
-                 cgresult->get_data_ptr()+frame*data_length,
-                 data_length*sizeof(std::complex<float>),
-                 cudaMemcpyDeviceToHost);
-      
-      cudaError_t err = cudaGetLastError();
-      if( err != cudaSuccess ){
-        GADGET_DEBUG2("Unable to copy result from device to host: %s\n", cudaGetErrorString(err));
-        m->release();
-        return GADGET_FAIL;
-      }
-
-      m->getObjectPtr()->matrix_size[0] = matrix_size_seq_[0];
-      m->getObjectPtr()->matrix_size[1] = matrix_size_seq_[1];
-      m->getObjectPtr()->matrix_size[2] = 1;
-      m->getObjectPtr()->channels       = 1;
-      m->getObjectPtr()->image_index    = frame_counter_ + frame;
-            
-      if (this->next()->putq(m) < 0) {
-        GADGET_DEBUG1("Failed to put result image on to queue\n");
-        m->release();
-        return GADGET_FAIL;
-      }
-    }
-    
-    frame_counter_ += frames;
-
-    if( output_timing_ )
-      process_timer.reset();
-
-    m1->release();
-    return GADGET_OK;
-  }
-
-  GADGET_FACTORY_DECLARE(gpuCgSenseGadget)
-}
diff --git a/gadgets/sense/gpuCgSenseGadget.h b/gadgets/sense/gpuCgSenseGadget.h
deleted file mode 100644
index fd954b7..0000000
--- a/gadgets/sense/gpuCgSenseGadget.h
+++ /dev/null
@@ -1,71 +0,0 @@
-#ifndef gpuCgSenseGadget_H
-#define gpuCgSenseGadget_H
-#pragma once
-
-#include "gadgetron_gpusense_export.h"
-#include "Gadget.h"
-#include "SenseJob.h"
-#include "GadgetMRIHeaders.h"
-#include "cuCgSolver.h"
-#include "cuNonCartesianSenseOperator.h"
-#include "cuCgPreconditioner.h"
-#include "cuNFFT.h"
-#include "cuImageOperator.h"
-
-#include <ismrmrd.h>
-#include <complex>
-
-namespace Gadgetron{
-
-  class EXPORTGADGETS_GPUSENSE gpuCgSenseGadget : public Gadget2<ISMRMRD::ImageHeader, SenseJob>
-  {
-
-  public:
-
-    GADGET_DECLARE(gpuCgSenseGadget);
-
-    gpuCgSenseGadget();
-    virtual ~gpuCgSenseGadget();
-
-  protected:
-
-    virtual int process( GadgetContainerMessage< ISMRMRD::ImageHeader > *m1, GadgetContainerMessage< SenseJob > *m2 );
-    virtual int process_config( ACE_Message_Block* mb );
-
-    int channels_;
-    int device_number_;
-    int set_number_;
-    int slice_number_;
-
-    uint64d2 matrix_size_;
-    uint64d2 matrix_size_os_;
-    uint64d2 matrix_size_seq_;
-
-    unsigned int number_of_iterations_;
-    double cg_limit_;
-    double oversampling_factor_;
-    double kernel_width_;
-    double kappa_;
-    unsigned int rotations_to_discard_;
-
-    bool output_convergence_;
-    bool output_timing_;
-    bool matrix_size_reported_;
-    bool is_configured_;
-
-    // Define conjugate gradient solver
-    cuCgSolver<float_complext> cg_;
-
-    // Define non-Cartesian Sense Encofing operator
-    boost::shared_ptr< cuNonCartesianSenseOperator<float,2> > E_;
-
-    // Define preconditioner
-    boost::shared_ptr< cuCgPreconditioner<float_complext> > D_;
-
-    // Define regularization image operator
-    boost::shared_ptr< cuImageOperator<float_complext> > R_;
-
-    unsigned int frame_counter_;
-  };
-}
-#endif //gpuCgSenseGadget
diff --git a/gadgets/sense/gpuGenericSensePrepGadget.cpp b/gadgets/sense/gpuGenericSensePrepGadget.cpp
deleted file mode 100644
index 3815f23..0000000
--- a/gadgets/sense/gpuGenericSensePrepGadget.cpp
+++ /dev/null
@@ -1,948 +0,0 @@
-#include "gpuGenericSensePrepGadget.h"
-#include "Gadgetron.h"
-#include "GadgetIsmrmrdReadWrite.h"
-#include "cuNonCartesianSenseOperator.h"
-#include "SenseJob.h"
-#include "cuNDArray_elemwise.h"
-#include "cuNDArray_utils.h"
-#include "hoNDArray_utils.h"
-#include "vector_td_operators.h"
-#include "b1_map.h"
-#include "GPUTimer.h"
-#include "check_CUDA.h"
-#include "hoNDArray_fileio.h"
-
-#include <algorithm>
-#include <vector>
-#include <cmath>
-#include <stdexcept>
-
-namespace Gadgetron{
-
-  gpuGenericSensePrepGadget::gpuGenericSensePrepGadget()
-    : slices_(-1)
-    , sets_(-1)
-    , device_number_(-1)
-    , samples_per_readout_(-1)
-  {
-    // Set some default values in case the config does not contain a specification
-    //
-
-    set_parameter(std::string("deviceno").c_str(), "0");
-    set_parameter(std::string("rotations_per_reconstruction").c_str(), "0");
-    set_parameter(std::string("propagate_csm_from_set").c_str(), "-1");
-    set_parameter(std::string("buffer_length_in_rotations").c_str(), "0");
-    set_parameter(std::string("buffer_using_solver").c_str(), "false");
-    set_parameter(std::string("buffer_convolution_kernel_width").c_str(), "5.5");
-    set_parameter(std::string("buffer_convolution_oversampling_factor").c_str(), "1.25");
-    set_parameter(std::string("reconstruction_os_factor_x").c_str(), "1.0");
-    set_parameter(std::string("reconstruction_os_factor_y").c_str(), "1.0");
-  }
-  
-  gpuGenericSensePrepGadget::~gpuGenericSensePrepGadget() {}
-  
-  int gpuGenericSensePrepGadget::process_config(ACE_Message_Block* mb)
-  {
-    // Get configuration values from config file
-    //
-
-    device_number_ = get_int_value(std::string("deviceno").c_str());
-    rotations_per_reconstruction_ = get_int_value(std::string("rotations_per_reconstruction").c_str());
-    buffer_length_in_rotations_ = get_int_value(std::string("buffer_length_in_rotations").c_str());
-    buffer_using_solver_ = get_bool_value(std::string("buffer_using_solver").c_str());
-    output_timing_ = get_bool_value(std::string("output_timing").c_str());
-
-    // Currently there are some restrictions on the allowed sliding window configurations
-    //
-    
-    sliding_window_readouts_ = get_int_value(std::string("sliding_window_readouts").c_str());
-    sliding_window_rotations_ = get_int_value(std::string("sliding_window_rotations").c_str());
-
-    if( sliding_window_readouts_>0 && sliding_window_rotations_>0 ){
-      GADGET_DEBUG1( "Error: Sliding window reconstruction is not yet supported for both readouts and frames simultaneously.\n" );
-      return GADGET_FAIL;
-    }
-
-    if( sliding_window_readouts_>0 && rotations_per_reconstruction_>0 ){
-      GADGET_DEBUG1( "Error: Sliding window reconstruction over readouts is not yet supported for multiframe reconstructions.\n" );
-      return GADGET_FAIL;
-    }
-    
-    if( sliding_window_rotations_ > 0 && sliding_window_rotations_ >= rotations_per_reconstruction_ ){
-      GADGET_DEBUG1( "Error: Illegal sliding window configuration.\n" );
-      return GADGET_FAIL;
-    }
-
-    // Setup and validate device configuration
-    //
-
-    int number_of_devices;
-    if (cudaGetDeviceCount(&number_of_devices)!= cudaSuccess) {
-      GADGET_DEBUG1( "Error: unable to query number of CUDA devices.\n" );
-      return GADGET_FAIL;
-    }
-
-    if (number_of_devices == 0) {
-      GADGET_DEBUG1( "Error: No available CUDA devices.\n" );
-      return GADGET_FAIL;
-    }
-
-    if (device_number_ >= number_of_devices) {
-      GADGET_DEBUG2("Adjusting device number from %d to %d\n", device_number_,  (device_number_%number_of_devices));
-      device_number_ = (device_number_%number_of_devices);
-    }
-
-    if (cudaSetDevice(device_number_)!= cudaSuccess) {
-      GADGET_DEBUG1( "Error: unable to set CUDA device.\n" );
-      return GADGET_FAIL;
-    }
-
-    cudaDeviceProp deviceProp;
-    if( cudaGetDeviceProperties( &deviceProp, device_number_ ) != cudaSuccess) {
-      GADGET_DEBUG1( "Error: unable to query device properties.\n" );
-      return GADGET_FAIL;
-    }
-    
-    unsigned int warp_size = deviceProp.warpSize;
-
-    // It is possible to specify one set to use for csm propagation, and then propagate this to all sets
-    //
-
-    propagate_csm_from_set_ = get_int_value(std::string("propagate_csm_from_set").c_str());
-
-    if( propagate_csm_from_set_ > 0 ){
-      GADGET_DEBUG2("Currently, only set 0 can propagate coil sensitivity maps. Set %d was specified.\n", propagate_csm_from_set_ );
-      return GADGET_FAIL;
-    }
-
-    if( propagate_csm_from_set_ >= 0 ){
-      GADGET_DEBUG2("Propagating csm from set %d to all sets\n", propagate_csm_from_set_ );
-    }
-
-    // Convolution kernel width and oversampling ratio (for the buffer)
-    //
-
-    kernel_width_ = get_double_value(std::string("buffer_convolution_kernel_width").c_str());
-    oversampling_factor_ = get_double_value(std::string("buffer_convolution_oversampling_factor").c_str());
-
-    // Get the Ismrmrd header
-    //
-
-    boost::shared_ptr<ISMRMRD::ismrmrdHeader> cfg = parseIsmrmrdXMLHeader(std::string(mb->rd_ptr()));
-    
-    if( cfg.get() == 0x0 ){
-      GADGET_DEBUG1("Unable to parse Ismrmrd header\n");
-      return GADGET_FAIL;
-    }
-
-    ISMRMRD::ismrmrdHeader::encoding_sequence e_seq = cfg->encoding();
-
-    if (e_seq.size() != 1) {
-      GADGET_DEBUG2("Number of encoding spaces: %d\n", e_seq.size());
-      GADGET_DEBUG1("This Gadget only supports one encoding space\n");
-      return GADGET_FAIL;
-    }
-    
-    ISMRMRD::encodingSpaceType e_space = (*e_seq.begin()).encodedSpace();
-    ISMRMRD::encodingSpaceType r_space = (*e_seq.begin()).reconSpace();
-    ISMRMRD::encodingLimitsType e_limits = (*e_seq.begin()).encodingLimits();
-
-    // Matrix sizes (as a multiple of the GPU's warp size)
-    //
-    
-    image_dimensions_.push_back(e_space.matrixSize().x());
-    image_dimensions_.push_back(e_space.matrixSize().y());
-
-    image_dimensions_recon_.push_back(((static_cast<unsigned int>(std::ceil(e_space.matrixSize().x()*get_double_value(std::string("reconstruction_os_factor_x").c_str())))+warp_size-1)/warp_size)*warp_size);  
-
-    image_dimensions_recon_.push_back(((static_cast<unsigned int>(std::ceil(e_space.matrixSize().y()*get_double_value(std::string("reconstruction_os_factor_y").c_str())))+warp_size-1)/warp_size)*warp_size);
-    
-    image_dimensions_recon_os_ = uint64d2
-      (((static_cast<unsigned int>(std::ceil(image_dimensions_recon_[0]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size,
-       ((static_cast<unsigned int>(std::ceil(image_dimensions_recon_[1]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size);
-    
-    // In case the warp_size constraint kicked in
-    oversampling_factor_ = float(image_dimensions_recon_os_[0])/float(image_dimensions_recon_[0]); 
-    
-    GADGET_DEBUG2("matrix_size_x : %d, recon: %d, recon_os: %d\n", 
-		  image_dimensions_[0], image_dimensions_recon_[0], image_dimensions_recon_os_[0]);
-
-    GADGET_DEBUG2("matrix_size_y : %d, recon: %d, recon_os: %d\n", 
-		  image_dimensions_[1], image_dimensions_recon_[1], image_dimensions_recon_os_[1]);
-    
-    fov_.push_back(r_space.fieldOfView_mm().x());
-    fov_.push_back(r_space.fieldOfView_mm().y());
-    fov_.push_back(r_space.fieldOfView_mm().z());
-
-    slices_ = e_limits.slice().present() ? e_limits.slice().get().maximum() + 1 : 1;
-    sets_ = e_limits.set().present() ? e_limits.set().get().maximum() + 1 : 1;
-    
-    // Allocate readout and trajectory queues
-    // - one queue for the currently incoming frame
-    // - one queue for the upcoming reconstruction
-
-    frame_readout_queue_ = boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> >(new ACE_Message_Queue<ACE_MT_SYNCH>[slices_*sets_]);
-    recon_readout_queue_ = boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> >(new ACE_Message_Queue<ACE_MT_SYNCH>[slices_*sets_]);
-    frame_traj_queue_ = boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> >(new ACE_Message_Queue<ACE_MT_SYNCH>[slices_*sets_]);
-    recon_traj_queue_ = boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> >(new ACE_Message_Queue<ACE_MT_SYNCH>[slices_*sets_]);
-    image_headers_queue_ = boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> >(new ACE_Message_Queue<ACE_MT_SYNCH>[slices_*sets_]);
-    
-    size_t bsize = sizeof(GadgetContainerMessage< hoNDArray< std::complex<float> > >)*image_dimensions_[0]*10;
-    
-    for( unsigned int i=0; i<slices_*sets_; i++ ){
-      frame_readout_queue_[i].high_water_mark(bsize);
-      frame_readout_queue_[i].low_water_mark(bsize);
-      frame_traj_queue_[i].high_water_mark(bsize);
-      frame_traj_queue_[i].low_water_mark(bsize);
-    }
-    
-    bsize *= (rotations_per_reconstruction_+1);
-    
-    for( unsigned int i=0; i<slices_*sets_; i++ ){
-      recon_readout_queue_[i].high_water_mark(bsize);
-      recon_readout_queue_[i].low_water_mark(bsize);
-      recon_traj_queue_[i].high_water_mark(bsize);
-      recon_traj_queue_[i].low_water_mark(bsize);
-    }
-    
-    // Define various per slice/set variables
-    //
-
-    previous_readout_no_ = boost::shared_array<long>(new long[slices_*sets_]);
-    acceleration_factor_ = boost::shared_array<long>(new long[slices_*sets_]);
-    image_counter_ = boost::shared_array<long>(new long[slices_*sets_]);
-    readout_counter_frame_= boost::shared_array<long>(new long[slices_*sets_]);
-    readout_counter_global_= boost::shared_array<long>(new long[slices_*sets_]);
-    readouts_per_frame_= boost::shared_array<long>(new long[slices_*sets_]);
-    frames_per_rotation_= boost::shared_array<long>(new long[slices_*sets_]);
-    buffer_frames_per_rotation_= boost::shared_array<long>(new long[slices_*sets_]);
-    buffer_update_needed_ = boost::shared_array<bool>(new bool[slices_*sets_]);
-    reconfigure_ = boost::shared_array<bool>(new bool[slices_*sets_]);
-    num_coils_ = boost::shared_array<unsigned int>(new unsigned int[slices_*sets_]);
-    
-    for( unsigned int i=0; i<slices_*sets_; i++ ){
-
-      previous_readout_no_[i] = -1;
-      acceleration_factor_[i] = -1;
-      image_counter_[i] = 0;
-      readout_counter_frame_[i] = 0;
-      readout_counter_global_[i] = 0;
-      readouts_per_frame_[i] = get_int_value(std::string("readouts_per_frame").c_str());
-      frames_per_rotation_[i] = get_int_value(std::string("frames_per_rotation").c_str());
-      buffer_frames_per_rotation_[i] = get_int_value(std::string("buffer_frames_per_rotation").c_str());
-      num_coils_[i] = 0;
-      buffer_update_needed_[i] = true;
-      reconfigure_[i] = true;
-
-      // Assign some default values ("upper bound estimates") of the (possibly) unknown entities
-      //
-      
-      if( readouts_per_frame_[i] == 0 ){
-	readouts_per_frame_[i] = image_dimensions_[0];
-      }
-      
-      if( frames_per_rotation_[i] == 0 ){
-	frames_per_rotation_[i] = image_dimensions_[0]/readouts_per_frame_[i];
-      }
-
-      // Also remember to set the high/low water marks of the ISMRMRD image header queue
-      //
-
-      bsize = sizeof(GadgetContainerMessage<ISMRMRD::ImageHeader>)*100*
-	std::max(1L, frames_per_rotation_[i]*rotations_per_reconstruction_);
-    
-      image_headers_queue_[i].high_water_mark(bsize);
-      image_headers_queue_[i].low_water_mark(bsize);
-    }
-
-    // If need be the following limitation can be lifted, but it would be a little tedious... 
-    //
-
-    if( buffer_using_solver_ && rotations_per_reconstruction_ < 1 ) {
-      GADGET_DEBUG1("Error: when buffering using a cg solver, 'rotations_per_reconstruction' must be specified (and strictly positive).");
-    }
-
-    if( buffer_using_solver_ && ( buffer_frames_per_rotation_[0] > 0 || buffer_length_in_rotations_ > 0 ) ){
-      GADGET_DEBUG1("Error: when buffering using a cg solver, we currently do not support specification of 'buffer_frames_per_rotation' or 'buffer_length_in_rotations'. These values are instead automatically set to match the reconstruction settings.\n");
-      return GADGET_FAIL;
-    }
-            
-    position_ = boost::shared_array<float[3]>(new float[slices_*sets_][3]);
-    read_dir_ = boost::shared_array<float[3]>(new float[slices_*sets_][3]);
-    phase_dir_ = boost::shared_array<float[3]>(new float[slices_*sets_][3]);
-    slice_dir_ = boost::shared_array<float[3]>(new float[slices_*sets_][3]);
-
-    for( unsigned int i=0; i<slices_*sets_; i++ ){
-      (position_[i])[0] = (position_[i])[1] = (position_[i])[2] = 0.0f;
-      (read_dir_[i])[0] = (read_dir_[i])[1] = (read_dir_[i])[2] = 0.0f;
-      (phase_dir_[i])[0] = (phase_dir_[i])[1] = (phase_dir_[i])[2] = 0.0f;
-      (slice_dir_[i])[0] = (slice_dir_[i])[1] = (slice_dir_[i])[2] = 0.0f;
-    }
-
-    // Allocate accumulation buffer
-    //
-
-    if( buffer_using_solver_ )
-      acc_buffer_cg_ = boost::shared_array< cuSenseBufferCg<float,2> >(new cuSenseBufferCg<float,2>[slices_*sets_]);
-    else
-      acc_buffer_ = boost::shared_array< cuSenseBuffer<float,2> >(new cuSenseBuffer<float,2>[slices_*sets_]);
-    
-    // Allocate remaining shared_arrays
-    //
-    
-    csm_host_ = boost::shared_array< hoNDArray<float_complext> >(new hoNDArray<float_complext>[slices_*sets_]);
-    reg_host_ = boost::shared_array< hoNDArray<float_complext> >(new hoNDArray<float_complext>[slices_*sets_]);
-
-    return GADGET_OK;
-  }
-
-  int gpuGenericSensePrepGadget::
-  process(GadgetContainerMessage<ISMRMRD::AcquisitionHeader> *m1,           // header
-	  GadgetContainerMessage< hoNDArray< std::complex<float> > > *m2,   // data
-	  GadgetContainerMessage< hoNDArray<float> > *m3)                   // traj/dcw
-  {
-    // Noise should have been consumed by the noise adjust (if in the gadget chain)
-    //
-    
-    bool is_noise = ISMRMRD::FlagBit(ISMRMRD::ACQ_IS_NOISE_MEASUREMENT).isSet(m1->getObjectPtr()->flags);
-    if (is_noise) { 
-      m1->release();
-      return GADGET_OK;
-    }
-
-    // Setup timer if asked for
-    //
-
-    boost::shared_ptr<GPUTimer> process_timer;
-    if( output_timing_ )
-      process_timer = boost::shared_ptr<GPUTimer>( new GPUTimer("gpuGenericSensePrepGadget::process()") );
-
-    // Some convienient utility variables
-    //
-
-    unsigned int set = m1->getObjectPtr()->idx.set;
-    unsigned int slice = m1->getObjectPtr()->idx.slice;
-    unsigned int readout = m1->getObjectPtr()->idx.kspace_encode_step_1;
-    unsigned int idx = set*slices_+slice;
-
-    // Get a pointer to the accumulation buffer. 
-    //
-
-    cuSenseBuffer<float,2> *acc_buffer = 
-      (buffer_using_solver_) ? &acc_buffer_cg_[idx] : &acc_buffer_[idx];
-
-    // Have the imaging plane changed?
-    //
-
-    if( !vec_equal(position_[idx], m1->getObjectPtr()->position) ||
-	!vec_equal(read_dir_[idx], m1->getObjectPtr()->read_dir) || 
-	!vec_equal(phase_dir_[idx], m1->getObjectPtr()->phase_dir) ||
-	!vec_equal(slice_dir_[idx], m1->getObjectPtr()->slice_dir) ){
-      
-      // Yes indeed, clear the accumulation buffer and update structs
-      //
-
-      acc_buffer->clear();
-      buffer_update_needed_[idx] = true;
-      
-      memcpy(position_[idx],m1->getObjectPtr()->position,3*sizeof(float));
-      memcpy(read_dir_[idx],m1->getObjectPtr()->read_dir,3*sizeof(float));
-      memcpy(phase_dir_[idx],m1->getObjectPtr()->phase_dir,3*sizeof(float));
-      memcpy(slice_dir_[idx],m1->getObjectPtr()->slice_dir,3*sizeof(float));
-    }
-    
-    // Only when the first readout arrives, do we know the #samples/readout
-    //
-
-    if( samples_per_readout_ == -1 )      
-      samples_per_readout_ = m1->getObjectPtr()->number_of_samples;
-    
-    if( samples_per_readout_ != m1->getObjectPtr()->number_of_samples ){
-      GADGET_DEBUG1("Unexpected change in the readout length\n");
-      return GADGET_FAIL;
-    }
-    
-    bool new_frame_detected = false;
-
-    // Reconfigure at first pass
-    // - or if the number of coil changes
-    // - or if the reconfigure_ flag is set
-
-    if( num_coils_[idx] != m1->getObjectPtr()->active_channels ){
-      GADGET_DEBUG1("Reconfiguring (the number of coils changed)\n");
-      num_coils_[idx] = m1->getObjectPtr()->active_channels;
-      reconfigure(set, slice);
-    }
-
-    if( reconfigure_[idx] ){
-      GADGET_DEBUG1("Reconfiguring (due to boolean indicator)\n");
-      reconfigure(set, slice);
-    }
-
-    // Keep track of the incoming readout ids
-    // - to determine the number of readouts per frame
-    // - to determine the number of frames per rotation
-
-    if (previous_readout_no_[idx] >= 0) {
-
-      if ( readout > previous_readout_no_[idx]) { 
-	// This is not the last readout in the frame.
-	// Make an estimate of the acceleration factor
-	//
-	
-	long tmp_accel = readout - previous_readout_no_[idx];
-
-	if( acceleration_factor_[idx] != tmp_accel )
-	  GADGET_DEBUG2("Detected an acceleration factor of %d\n", tmp_accel);
-	
-	acceleration_factor_[idx] = tmp_accel;
-      }
-      else{ 
-
-	// This is the first readout in a new frame
-	//
-
-	if( get_int_value(std::string("readouts_per_frame").c_str()) == 0 &&
-	    readout_counter_frame_[idx] > 0 &&
-	    readout_counter_frame_[idx] != readouts_per_frame_[idx] ){ 
-
-	  // A new acceleration factor is detected
-	  //
-
-	  GADGET_DEBUG1("Reconfiguring (acceleration factor changed)\n");
-
-	  new_frame_detected = true;
-	  readouts_per_frame_[idx] = readout_counter_frame_[idx];
-
-	    // Assume that #frames/rotation equals the acceleration factor
-	    // If not, or if we cannot deduce the acceleration factor from the difference
-	    // of two subsequent readout ids, then 'frames_per_rotation' have to be specified in the config...
-	    //
-	    
-	  if( get_int_value(std::string("frames_per_rotation").c_str()) == 0 ) {
-	    frames_per_rotation_[idx] = acceleration_factor_[idx];
-	  }
-	  reconfigure(set, slice);
-	}
-      }
-    }
-    previous_readout_no_[idx] = readout;
-
-    // Enqueue readout
-    // - unless 'new_frame_detected', then the current readout does not belong to the current frame and we delay enqueing
-
-    if( !new_frame_detected ) {
-      
-      // Memory handling is easier if we make copies for our internal queues
-      frame_readout_queue_[idx].enqueue_tail(duplicate_array(m2));
-      recon_readout_queue_[idx].enqueue_tail(duplicate_array(m2));
-      frame_traj_queue_[idx].enqueue_tail(duplicate_array(m3));
-      recon_traj_queue_[idx].enqueue_tail(duplicate_array(m3));
-    }
-
-    // If the readout is the last of a "true frame" (ignoring any sliding window readouts)
-    // - then update the accumulation buffer
-
-    bool is_last_readout_in_frame = (readout_counter_frame_[idx] == readouts_per_frame_[idx]-1);
-    is_last_readout_in_frame |= new_frame_detected;
-
-    cuNDArray<floatd2> traj;
-    cuNDArray<float> dcw;
-    
-    if( is_last_readout_in_frame ){
-
-      // Get ready to update the csm/regularization buffer
-      //
-
-      // Extract this frame's samples 
-      //
-
-      boost::shared_ptr< hoNDArray<float_complext> > host_samples = 
-	extract_samples_from_queue( &frame_readout_queue_[idx], false, set, slice );
-            
-      cuNDArray<float_complext> samples( host_samples.get() );
-
-      // Extract this frame's trajectory and dcw.
-      //
-
-      extract_trajectory_and_dcw_from_queue( &frame_traj_queue_[idx], false, set, slice, 
-					     samples_per_readout_*readouts_per_frame_[idx], 1,
-					     &traj, &dcw );
-
-      // Scale dcw weights to the are of the oversampled recon matrix size
-      float scale_factor = float(prod(image_dimensions_recon_os_))/asum(&dcw);
-      dcw *= scale_factor;
-      
-      // Add this frame to the buffer
-      //
-
-      acc_buffer->set_dcw(boost::shared_ptr< cuNDArray<float> >(new cuNDArray<float>(&dcw)));
-      buffer_update_needed_[idx] |= acc_buffer->add_frame_data( &samples, &traj );
-    }
-
-    // Are we ready to reconstruct (downstream)?
-    //
-
-    long readouts_per_reconstruction = readouts_per_frame_[idx];
-
-    if( rotations_per_reconstruction_ > 0 )
-      readouts_per_reconstruction *= (frames_per_rotation_[idx]*rotations_per_reconstruction_);
-    
-    bool is_last_readout_in_reconstruction = ( recon_readout_queue_[idx].message_count() == readouts_per_reconstruction );
-
-    // Prepare the image header for this frame
-    // - if this is indeed the last profile of a new frame
-    // - or if we are about to reconstruct due to 'sliding_window_profiles_' > 0
-    
-    if( is_last_readout_in_frame || 
-	(is_last_readout_in_reconstruction && image_headers_queue_[idx].message_count() == 0) ){
-      
-      GadgetContainerMessage<ISMRMRD::ImageHeader> *header = new GadgetContainerMessage<ISMRMRD::ImageHeader>();
-      ISMRMRD::AcquisitionHeader *base_head = m1->getObjectPtr();
-
-      {
-	// Initialize header to all zeroes (there is a few fields we do not set yet)
-	ISMRMRD::ImageHeader tmp = {0};
-	*(header->getObjectPtr()) = tmp;
-      }
-
-      header->getObjectPtr()->version = base_head->version;
-
-      header->getObjectPtr()->matrix_size[0] = image_dimensions_recon_[0];
-      header->getObjectPtr()->matrix_size[1] = image_dimensions_recon_[1];
-      header->getObjectPtr()->matrix_size[2] = std::max(1L,frames_per_rotation_[idx]*rotations_per_reconstruction_);
-
-      header->getObjectPtr()->field_of_view[0] = fov_[0];
-      header->getObjectPtr()->field_of_view[1] = fov_[1];
-      header->getObjectPtr()->field_of_view[2] = fov_[2];
-
-      header->getObjectPtr()->channels = num_coils_[idx];
-      header->getObjectPtr()->slice = base_head->idx.slice;
-      header->getObjectPtr()->set = base_head->idx.set;
-
-      header->getObjectPtr()->acquisition_time_stamp = base_head->acquisition_time_stamp;
-      memcpy(header->getObjectPtr()->physiology_time_stamp, base_head->physiology_time_stamp, sizeof(uint32_t)*ISMRMRD_PHYS_STAMPS);
-
-      memcpy(header->getObjectPtr()->position, base_head->position, sizeof(float)*3);
-      memcpy(header->getObjectPtr()->read_dir, base_head->read_dir, sizeof(float)*3);
-      memcpy(header->getObjectPtr()->phase_dir, base_head->phase_dir, sizeof(float)*3);
-      memcpy(header->getObjectPtr()->slice_dir, base_head->slice_dir, sizeof(float)*3);
-      memcpy(header->getObjectPtr()->patient_table_position, base_head->patient_table_position, sizeof(float)*3);
-
-      header->getObjectPtr()->image_data_type = ISMRMRD::DATA_COMPLEX_FLOAT;
-      header->getObjectPtr()->image_index = image_counter_[idx]++; 
-      header->getObjectPtr()->image_series_index = idx;
-
-      image_headers_queue_[idx].enqueue_tail(header);
-    }
-    
-    // If it is time to reconstruct (downstream) then prepare the Sense job
-    // 
-
-    if( is_last_readout_in_reconstruction ){
-      
-      // Update csm and regularization images if the buffer has changed (completed a cycle) 
-      // - and at the first pass
-
-      if( buffer_update_needed_[idx] || 
-	  csm_host_[idx].get_number_of_elements() == 0 || 
-	  reg_host_[idx].get_number_of_elements() == 0 ){
-
-	// Get the accumulated coil images
-	//
-
-	boost::shared_ptr< cuNDArray<float_complext> > csm_data = acc_buffer->get_accumulated_coil_images();
-
- 	// Estimate CSM
-	//
-
-	if( propagate_csm_from_set_ < 0 || propagate_csm_from_set_ == set ){	  	  
-	  csm_ = estimate_b1_map<float,2>( csm_data.get() );
-	}
-	else{
-	  GADGET_DEBUG2("Set %d is reusing the csm from set %d\n", set, propagate_csm_from_set_);
-	  if( csm_.get() == 0x0 ){
-	    GADGET_DEBUG1("Error: csm has not been computed, cannot propagate\n");
-	    return GADGET_FAIL;
-	  }	  
-	}
-
-	acc_buffer->set_csm(csm_);
-	csm_host_[idx] = *(csm_->to_host());
-	
-	// Compute regularization image
-	//
-
-	boost::shared_ptr< cuNDArray<float_complext> > reg_image;
-	std::vector<size_t> dims;
-    	
-	if( buffer_using_solver_ ){
-
-	  //GPUTimer timer("\n\n AVOIDABLE PREPROCESSING. HOW EXPENSIVE?\n\n");
-
-	  extract_trajectory_and_dcw_from_queue( &recon_traj_queue_[idx], true, set, slice, 
-						 samples_per_readout_*readouts_per_frame_[idx],
-						 std::max(1L, frames_per_rotation_[idx]*rotations_per_reconstruction_),
-						 &traj, &dcw );
-
-	  // Scale dcw weights to the are of the oversampled recon matrix size
-	  float scale_factor = float(prod(image_dimensions_recon_os_))/asum(&dcw);
-	  dcw *= scale_factor;
-
-	  dims = *traj.get_dimensions();
-
- 	  std::vector<size_t> tmp_dims;
-	  tmp_dims.push_back(dims[0]*dims[1]);
-	  tmp_dims.push_back(1);
-	  
-	  traj.reshape(&tmp_dims);
-	  dcw.reshape(&tmp_dims);
-	  
-	  ((cuSenseBufferCg<float,2>*)acc_buffer)->preprocess(&traj);
-	  ((cuSenseBufferCg<float,2>*)acc_buffer)->set_dcw_for_rhs(boost::shared_ptr< cuNDArray<float> >(new cuNDArray<float>(&dcw)));
-	}
-
-	reg_image = acc_buffer->get_combined_coil_image();	
-	reg_host_[idx] = *(reg_image->to_host());
-	
-	if( buffer_using_solver_ ){
-	  traj.reshape(&dims);
-	  dcw.reshape(&dims);
-	}
-	
-	/*
-	static int counter = 0;
-	char filename[256];
-	sprintf((char*)filename, "reg_%d.cplx", counter);
-	write_nd_array<float_complext>( &reg_host_[idx], filename );
-	counter++; */
-
-	buffer_update_needed_[idx] = false;
-      }
-
-      // Prepare data array for the downstream reconstruction
-      //
-      
-      boost::shared_ptr< hoNDArray<float_complext> > samples_host = 
-	extract_samples_from_queue( &recon_readout_queue_[idx], true, set, slice );
-      
-      // Preapre the trajectory and dcw arrays.
-      // They have already been computed above 
-      // - if 'rotations_per_reconstruction_' is 0
-      // - if 'buffer_using_solver_' is true
-      
-      if( !(/*rotations_per_reconstruction_ == 0 ||*/ buffer_using_solver_) ){
-      	extract_trajectory_and_dcw_from_queue( &recon_traj_queue_[idx], true, set, slice, 
-					       samples_per_readout_*readouts_per_frame_[idx],
-					       std::max(1L, frames_per_rotation_[idx]*rotations_per_reconstruction_),
-					       &traj, &dcw );
-      }
-
-      // Set up the Sense job
-      //
-
-      GadgetContainerMessage< SenseJob > *sj = new GadgetContainerMessage<SenseJob>();
-      	
-      sj->getObjectPtr()->dat_host_ = samples_host;      
-      sj->getObjectPtr()->tra_host_ = traj.to_host();
-      sj->getObjectPtr()->dcw_host_ = dcw.to_host();
-      sj->getObjectPtr()->csm_host_ = boost::shared_ptr< hoNDArray<float_complext> >( new hoNDArray<float_complext>(csm_host_[idx]));
-      sj->getObjectPtr()->reg_host_ = boost::shared_ptr< hoNDArray<float_complext> >( new hoNDArray<float_complext>(reg_host_[idx]));
-      
-      // Pull the image headers out of the queue
-      //
-
-      long frames_per_reconstruction = 
-	std::max( 1L, frames_per_rotation_[idx]*rotations_per_reconstruction_ );
-      
-      if( image_headers_queue_[idx].message_count() != frames_per_reconstruction ){
-	sj->release();
-	GADGET_DEBUG2("Unexpected size of image header queue: %d, %d\n", 
-		      image_headers_queue_[idx].message_count(), frames_per_reconstruction);
-	return GADGET_FAIL;
-      }
-      
-      sj->getObjectPtr()->image_headers_ =
-      boost::shared_array<ISMRMRD::ImageHeader>( new ISMRMRD::ImageHeader[frames_per_reconstruction] );
-      
-      for( unsigned int i=0; i<frames_per_reconstruction; i++ ){	
-
-	ACE_Message_Block *mbq;
-
-	if( image_headers_queue_[idx].dequeue_head(mbq) < 0 ) {
-	  sj->release();
-	  GADGET_DEBUG1("Image header dequeue failed\n");
-	  return GADGET_FAIL;
-	}
-	
-	GadgetContainerMessage<ISMRMRD::ImageHeader> *m = AsContainerMessage<ISMRMRD::ImageHeader>(mbq);
-	sj->getObjectPtr()->image_headers_[i] = *m->getObjectPtr();
-
-	// In sliding window mode the header might need to go back at the end of the queue for reuse
-	// 
-	
-	if( i >= frames_per_reconstruction-sliding_window_rotations_*frames_per_rotation_[idx] ){
-	  image_headers_queue_[idx].enqueue_tail(m);
-	}
-	else {
-	  m->release();
-	}
-      }
-      
-      // The Sense Job needs an image header as well. 
-      // Let us just copy the initial one...
-
-      GadgetContainerMessage<ISMRMRD::ImageHeader> *m4 = new GadgetContainerMessage<ISMRMRD::ImageHeader>;
-
-      *m4->getObjectPtr() = sj->getObjectPtr()->image_headers_[0];
-      m4->cont(sj);
-
-      // Pass the Sense job downstream
-      //
-      
-      if (this->next()->putq(m4) < 0) {
-	GADGET_DEBUG1("Failed to put job on queue.\n");
-	m4->release();
-	return GADGET_FAIL;
-      }
-    }
-    
-    if( is_last_readout_in_frame )
-      readout_counter_frame_[idx] = 0;
-    else{
-      readout_counter_frame_[idx]++;
-    }
-
-    if( new_frame_detected ){
-
-      // The incoming profile was actually the first readout of the next frame, enqueue.
-      //
-
-      frame_readout_queue_[idx].enqueue_tail(duplicate_array(m2));
-      recon_readout_queue_[idx].enqueue_tail(duplicate_array(m2)); 
-      frame_traj_queue_[idx].enqueue_tail(duplicate_array(m3));
-      recon_traj_queue_[idx].enqueue_tail(duplicate_array(m3)); 
-
-      readout_counter_frame_[idx]++;
-    }
-
-    readout_counter_global_[idx]++;
-
-    if( output_timing_ )
-      process_timer.reset();
-    
-    m1->release(); // this is safe, the internal queues hold copies
-    return GADGET_OK;
-  }
-  
-  boost::shared_ptr< hoNDArray<float_complext> > 
-  gpuGenericSensePrepGadget::extract_samples_from_queue ( ACE_Message_Queue<ACE_MT_SYNCH> *queue, 
-							  bool sliding_window, unsigned int set, unsigned int slice )
-  {    
-    unsigned int readouts_buffered = queue->message_count();
-    
-    std::vector<size_t> dims;
-    dims.push_back(samples_per_readout_*readouts_buffered);
-    dims.push_back(num_coils_[set*slices_+slice]);
-    
-    boost::shared_ptr< hoNDArray<float_complext> > host_samples(new hoNDArray<float_complext>(&dims));
-    
-    for (unsigned int p=0; p<readouts_buffered; p++) {
-      
-      ACE_Message_Block* mbq;
-      if (queue->dequeue_head(mbq) < 0) {
-	GADGET_DEBUG1("Message dequeue failed\n");
-	throw std::runtime_error("gpuGenericSensePrepGadget::extract_samples_from_queue: dequeing failed");	
-      }
-      
-      GadgetContainerMessage< hoNDArray< std::complex<float> > > *daq = AsContainerMessage<hoNDArray< std::complex<float> > >(mbq);
-	
-      if (!daq) {
-	GADGET_DEBUG1("Unable to interpret data on message queue\n");
-	throw std::runtime_error("gpuGenericSensePrepGadget::extract_samples_from_queue: failed to interpret data");	
-      }
-	
-      for (unsigned int c = 0; c < num_coils_[set*slices_+slice]; c++) {
-	
-	float_complext *data_ptr = host_samples->get_data_ptr();
-	data_ptr += c*samples_per_readout_*readouts_buffered+p*samples_per_readout_;
-	    
-	std::complex<float> *r_ptr = daq->getObjectPtr()->get_data_ptr();
-	r_ptr += c*daq->getObjectPtr()->get_size(0);
-	  
-	memcpy(data_ptr, r_ptr, samples_per_readout_*sizeof(float_complext));
-      }
-
-      // In sliding window mode the readout might need to go back at the end of the queue
-      // 
-      
-      long readouts_in_sliding_window = sliding_window_readouts_ + 
-	readouts_per_frame_[set*slices_+slice]*frames_per_rotation_[set*slices_+slice]*sliding_window_rotations_;
-
-      if( sliding_window && p >= (readouts_buffered-readouts_in_sliding_window) )
-	queue->enqueue_tail(mbq);
-      else
-	mbq->release();
-    } 
-    
-    return host_samples;
-  }
-  
-  boost::shared_ptr< hoNDArray<float> > 
-  gpuGenericSensePrepGadget::extract_trajectory_from_queue ( ACE_Message_Queue<ACE_MT_SYNCH> *queue, 
-							     bool sliding_window, unsigned int set, unsigned int slice )
-  {    
-    if(!queue) {
-      GADGET_DEBUG1("Illegal queue pointer, cannot extract trajectory\n");
-      throw std::runtime_error("gpuGenericSensePrepGadget::extract_trajectory_from_queue: illegal queue pointer");	
-    }
-
-    if(queue->message_count()==0) {
-      GADGET_DEBUG1("Empty queue, cannot extract trajectory\n");
-      throw std::runtime_error("gpuGenericSensePrepGadget::extract_trajectory_from_queue: empty queue");	
-    }
-
-    if(samples_per_readout_ < 1) {
-      GADGET_DEBUG2("Empty queue (%d), cannot extract trajectory\n", samples_per_readout_);
-      throw std::runtime_error("gpuGenericSensePrepGadget::extract_trajectory_from_queue: empty queue");	
-    }
-    
-    unsigned int readouts_buffered = queue->message_count();
-    
-    std::vector<size_t> dims;
-    dims.push_back(3);
-    dims.push_back(samples_per_readout_);
-    dims.push_back(readouts_buffered);
-    
-    boost::shared_ptr< hoNDArray<float> > host_samples(new hoNDArray<float>(&dims));
-    
-    for (unsigned int p=0; p<readouts_buffered; p++) {      
-      ACE_Message_Block* mbq;
-      if (queue->dequeue_head(mbq) < 0) {
-	GADGET_DEBUG1("Message dequeue failed\n");
-	throw std::runtime_error("gpuGenericSensePrepGadget::extract_trajectory_from_queue: dequeing failed");	
-      }
-      
-      GadgetContainerMessage< hoNDArray<float> > *daq = AsContainerMessage<hoNDArray<float> >(mbq);
-	
-      if (!daq) {
-	GADGET_DEBUG1("Unable to interpret data on message queue\n");
-	throw std::runtime_error("gpuGenericSensePrepGadget::extract_trajectory_from_queue: failed to interpret data");	
-      }
-
-      float *data_ptr = host_samples->get_data_ptr();
-      data_ptr += 3*samples_per_readout_*p;
-      
-      float *r_ptr = daq->getObjectPtr()->get_data_ptr();
-      
-      memcpy(data_ptr, r_ptr, 3*samples_per_readout_*sizeof(float));
-      
-      // In sliding window mode the readout might need to go back at the end of the queue
-      // 
-      
-      long readouts_in_sliding_window = sliding_window_readouts_ + 
-	readouts_per_frame_[set*slices_+slice]*frames_per_rotation_[set*slices_+slice]*sliding_window_rotations_;
-
-      if( sliding_window && p >= (readouts_buffered-readouts_in_sliding_window) )
-	queue->enqueue_tail(mbq);
-      else
-	mbq->release();
-    } 
-    
-    return host_samples;
-  }
-  
-  void gpuGenericSensePrepGadget::extract_trajectory_and_dcw_from_queue
-  ( ACE_Message_Queue<ACE_MT_SYNCH> *queue, bool sliding_window, unsigned int set, unsigned int slice, 
-    unsigned int samples_per_frame, unsigned int num_frames,
-    cuNDArray<floatd2> *traj, cuNDArray<float> *dcw )
-  {
-    // Extract trajectory and dcw.
-    // They are stored as a float array of dimensions: 3 x #samples_per_readout x #readouts.
-    // We need
-    // - a floatd2 trajectory array 
-    // - a float dcw array 
-    //
-    
-    boost::shared_ptr< hoNDArray<float> > host_traj_dcw =
-      extract_trajectory_from_queue( queue, sliding_window, set, slice );
-    
-    std::vector<size_t> order;
-    order.push_back(1); order.push_back(2); order.push_back(0);
-    
-    boost::shared_ptr< hoNDArray<float> > host_traj_dcw_shifted =
-      permute( host_traj_dcw.get(), &order );
-    
-    std::vector<size_t> dims_1d;
-    dims_1d.push_back(host_traj_dcw_shifted->get_size(0)*host_traj_dcw_shifted->get_size(1));
-    
-    {
-      hoNDArray<float> tmp(&dims_1d, host_traj_dcw_shifted->get_data_ptr()+2*dims_1d[0]);
-      *dcw = tmp;
-    }
-    
-    std::vector<size_t> dims_2d = dims_1d;
-    dims_2d.push_back(2);
-    
-    order.clear();
-    order.push_back(1); order.push_back(0);
-
-    hoNDArray<float> tmp(&dims_2d, host_traj_dcw_shifted->get_data_ptr());
-    cuNDArray<float> __traj(&tmp);
-    boost::shared_ptr< cuNDArray<float> > _traj = permute( &__traj, &order );
-    
-    cuNDArray<floatd2> tmp2(&dims_1d, (floatd2*)_traj->get_data_ptr());
-    
-    *traj = tmp2;
-    
-    unsigned int idx = set*slices_+slice;
-    dims_2d.clear();
-
-    dims_2d.push_back(samples_per_frame);
-    dims_2d.push_back(num_frames);
-
-    dcw->reshape(&dims_2d);
-    traj->reshape(&dims_2d);
-  }
-
-  template<class T> GadgetContainerMessage< hoNDArray<T> >*
-  gpuGenericSensePrepGadget::duplicate_array( GadgetContainerMessage< hoNDArray<T> > *array )
-  {
-    GadgetContainerMessage< hoNDArray<T> > *copy = new GadgetContainerMessage< hoNDArray<T> >();   
-    *(copy->getObjectPtr()) = *(array->getObjectPtr());
-    return copy;
-  }
-
-  void gpuGenericSensePrepGadget::reconfigure(unsigned int set, unsigned int slice)
-  {    
-    unsigned int idx = set*slices_+slice;
-    
-    GADGET_DEBUG2("\nReconfiguring:\n#readouts/frame:%d\n#frames/rotation: %d\n#rotations/reconstruction:%d\n", 
-		  readouts_per_frame_[idx], frames_per_rotation_[idx], rotations_per_reconstruction_);
-    
-    buffer_frames_per_rotation_[idx] = get_int_value(std::string("buffer_frames_per_rotation").c_str());
-    
-    if( buffer_frames_per_rotation_[idx] == 0 ){
-      buffer_frames_per_rotation_[idx] = frames_per_rotation_[idx];
-    }
-    
-    if( get_int_value(std::string("buffer_length_in_rotations").c_str()) == 0 ){
-      buffer_length_in_rotations_ = std::max(1L, rotations_per_reconstruction_);
-    }
-
-    cuSenseBuffer<float,2> *acc_buffer = 
-      (buffer_using_solver_) ? &acc_buffer_cg_[idx] : &acc_buffer_[idx];
-    
-    if( buffer_frames_per_rotation_[idx] == 1 ){ // Is this general enough to detect golden ratio type trajectories?
-
-      acc_buffer->setup( from_std_vector<size_t,2>(image_dimensions_recon_), image_dimensions_recon_os_, 
-			 kernel_width_, num_coils_[idx], 1, buffer_length_in_rotations_ );
-    }else{
-      acc_buffer->setup( from_std_vector<size_t,2>(image_dimensions_recon_), image_dimensions_recon_os_, 
-			 kernel_width_, num_coils_[idx], buffer_length_in_rotations_, buffer_frames_per_rotation_[idx] );
-    }
-    reconfigure_[idx] = false;
-  }
-
-  GADGET_FACTORY_DECLARE(gpuGenericSensePrepGadget)
-}
diff --git a/gadgets/sense/gpuGenericSensePrepGadget.h b/gadgets/sense/gpuGenericSensePrepGadget.h
deleted file mode 100644
index 28a8401..0000000
--- a/gadgets/sense/gpuGenericSensePrepGadget.h
+++ /dev/null
@@ -1,127 +0,0 @@
-#pragma once
-
-#include "gadgetron_gpusense_export.h"
-#include "Gadget.h"
-#include "GadgetMRIHeaders.h"
-#include "hoNDArray.h"
-#include "vector_td.h"
-#include "cuNFFT.h"
-#include "cuCgPreconditioner.h"
-#include "cuSenseBufferCg.h"
-
-#include <ismrmrd.h>
-#include <complex>
-#include <boost/shared_ptr.hpp>
-#include <boost/shared_array.hpp>
-
-namespace Gadgetron{
-
-  class EXPORTGADGETS_GPUSENSE gpuGenericSensePrepGadget :
-    public Gadget3< ISMRMRD::AcquisitionHeader, hoNDArray< std::complex<float> >, hoNDArray<float> >
-  {
-    
-  public:
-    GADGET_DECLARE(gpuGenericSensePrepGadget);
-
-    gpuGenericSensePrepGadget();
-    virtual ~gpuGenericSensePrepGadget();
-
-  protected:
-    
-    virtual int process_config(ACE_Message_Block *mb);
-
-    virtual int process(GadgetContainerMessage< ISMRMRD::AcquisitionHeader > *m1,        // header
-			GadgetContainerMessage< hoNDArray< std::complex<float> > > *m2,  // data
-			GadgetContainerMessage< hoNDArray<float> > *m3 );                // traj/dcw
-
-  private:
-
-    inline bool vec_equal(float *in1, float *in2) {
-      for (unsigned int i = 0; i < 3; i++) {
-	if (in1[i] != in2[i]) return false;
-      }
-      return true;
-    }
-    
-    boost::shared_array<bool> reconfigure_;
-    virtual void reconfigure(unsigned int set, unsigned int slice);
-
-    template<class T> GadgetContainerMessage< hoNDArray<T> >* 
-      duplicate_array( GadgetContainerMessage< hoNDArray<T> > *array );
-    
-    boost::shared_ptr< hoNDArray<float_complext> > 
-      extract_samples_from_queue ( ACE_Message_Queue<ACE_MT_SYNCH> *queue, 
-				   bool sliding_window, unsigned int set, unsigned int slice );
-    
-    boost::shared_ptr< hoNDArray<float> > 
-      extract_trajectory_from_queue ( ACE_Message_Queue<ACE_MT_SYNCH> *queue, 
-				      bool sliding_window, unsigned int set, unsigned int slice );
-      
-    void extract_trajectory_and_dcw_from_queue
-      ( ACE_Message_Queue<ACE_MT_SYNCH> *queue, bool sliding_window, unsigned int set, unsigned int slice, 
-	unsigned int samples_per_frame, unsigned int num_frames,
-	cuNDArray<floatd2> *traj, cuNDArray<float> *dcw );
-    
-    int slices_;
-    int sets_;
-    int device_number_;
-    long samples_per_readout_;
-
-    boost::shared_array<long> image_counter_;
-    boost::shared_array<long> readouts_per_frame_;  // for an undersampled frame
-    boost::shared_array<long> frames_per_rotation_; // representing a fully sampled frame
-
-    // The number of rotations to batch per reconstruction. 
-    // Set to '0' to reconstruct frames individually.
-    long rotations_per_reconstruction_; 
-
-    // The number of buffer cycles
-    long buffer_length_in_rotations_; 
-
-    boost::shared_array<long> buffer_frames_per_rotation_; // the number of buffer subcycles
-
-    // Internal book-keping
-    boost::shared_array<long> previous_readout_no_;
-    boost::shared_array<long> acceleration_factor_;
-    boost::shared_array<long> readout_counter_frame_;
-    boost::shared_array<long> readout_counter_global_;
-
-    long sliding_window_readouts_;
-    long sliding_window_rotations_;
-
-    float kernel_width_;
-    float oversampling_factor_;
-
-    boost::shared_array<unsigned int> num_coils_;
-
-    boost::shared_array<float[3]> position_;
-    boost::shared_array<float[3]> read_dir_;
-    boost::shared_array<float[3]> phase_dir_;
-    boost::shared_array<float[3]> slice_dir_;
-
-    bool output_timing_;
-    bool buffer_using_solver_;
-
-    int propagate_csm_from_set_;
-    boost::shared_ptr< cuNDArray<float_complext> > csm_;
-
-    boost::shared_array<bool> buffer_update_needed_;
-
-    boost::shared_array< hoNDArray<float_complext> > csm_host_;
-    boost::shared_array< hoNDArray<float_complext> > reg_host_;
-    
-    boost::shared_array< cuSenseBuffer<float,2> > acc_buffer_;
-    boost::shared_array< cuSenseBufferCg<float,2> > acc_buffer_cg_;
-
-    std::vector<size_t> fov_;
-    std::vector<size_t> image_dimensions_;
-    std::vector<size_t> image_dimensions_recon_;
-    uint64d2 image_dimensions_recon_os_;
-
-    boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> > frame_readout_queue_;
-    boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> > recon_readout_queue_;
-    boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> > frame_traj_queue_;
-    boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> > recon_traj_queue_;
-    boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> > image_headers_queue_;
-  };
-}
diff --git a/gadgets/sense/gpuSbSenseGadget.cpp b/gadgets/sense/gpuSbSenseGadget.cpp
deleted file mode 100644
index 39e6d3e..0000000
--- a/gadgets/sense/gpuSbSenseGadget.cpp
+++ /dev/null
@@ -1,426 +0,0 @@
-#include "gpuSbSenseGadget.h"
-#include "cuNDArray_operators.h"
-#include "cuNDArray_elemwise.h"
-#include "cuNDArray_blas.h"
-#include "cuNDArray_utils.h"
-#include "cuNDArray_reductions.h"
-#include "Gadgetron.h"
-#include "GadgetMRIHeaders.h"
-#include "b1_map.h"
-#include "GPUTimer.h"
-#include "GadgetIsmrmrdReadWrite.h"
-#include "vector_td_utilities.h"
-#include "hoNDArray_fileio.h"
-
-#include <boost/thread/mutex.hpp>
-
-namespace Gadgetron{
-
-#define max_number_of_gpus 10
-  static boost::mutex _mutex[max_number_of_gpus];
-  
-  gpuSbSenseGadget::gpuSbSenseGadget()
-    : is_configured_(false)
-    , prepared_(false)
-    , channels_(0)
-    , frame_counter_(0)
-  {
-    set_parameter(std::string("deviceno").c_str(), "0");
-    set_parameter(std::string("setno").c_str(), "0");
-    set_parameter(std::string("sliceno").c_str(), "0");
-    set_parameter(std::string("number_of_sb_iterations").c_str(), "20");
-    set_parameter(std::string("number_of_cg_iterations").c_str(), "10");
-    set_parameter(std::string("cg_limit").c_str(), "1e-6");
-    set_parameter(std::string("oversampling_factor").c_str(), "1.5");
-    set_parameter(std::string("kernel_width").c_str(), "5.5");
-    set_parameter(std::string("mu").c_str(), "1.0");
-    set_parameter(std::string("lambda").c_str(), "2.0");
-    set_parameter(std::string("alpha").c_str(), "0.5");
-    set_parameter(std::string("exclusive_access").c_str(), "false");
-
-    matrix_size_ = uint64d2(0,0);
-    matrix_size_os_ = uint64d2(0,0);
-    matrix_size_seq_ = uint64d2(0,0);
-  }
-
-  gpuSbSenseGadget::~gpuSbSenseGadget() {}
-
-  int gpuSbSenseGadget::process_config( ACE_Message_Block* mb )
-  {
-    GADGET_DEBUG1("gpuSbSenseGadget::process_config\n");
-
-    device_number_ = get_int_value(std::string("deviceno").c_str());
-
-    int number_of_devices = 0;
-    if (cudaGetDeviceCount(&number_of_devices)!= cudaSuccess) {
-      GADGET_DEBUG1( "Error: unable to query number of CUDA devices.\n" );
-      return GADGET_FAIL;
-    }
-
-    if (number_of_devices == 0) {
-      GADGET_DEBUG1( "Error: No available CUDA devices.\n" );
-      return GADGET_FAIL;
-    }
-
-    if (device_number_ >= number_of_devices) {
-      GADGET_DEBUG2("Adjusting device number from %d to %d\n", device_number_,  (device_number_%number_of_devices));
-      device_number_ = (device_number_%number_of_devices);
-    }
-
-    if (cudaSetDevice(device_number_)!= cudaSuccess) {
-      GADGET_DEBUG1( "Error: unable to set CUDA device.\n" );
-      return GADGET_FAIL;
-    }
-
-    pass_on_undesired_data_ = get_bool_value(std::string("pass_on_undesired_data").c_str());
-    set_number_ = get_int_value(std::string("setno").c_str());
-    slice_number_ = get_int_value(std::string("sliceno").c_str());
-    number_of_sb_iterations_ = get_int_value(std::string("number_of_sb_iterations").c_str());
-    number_of_cg_iterations_ = get_int_value(std::string("number_of_cg_iterations").c_str());
-    cg_limit_ = get_double_value(std::string("cg_limit").c_str());
-    oversampling_factor_ = get_double_value(std::string("oversampling_factor").c_str());
-    kernel_width_ = get_double_value(std::string("kernel_width").c_str());
-    mu_ = get_double_value(std::string("mu").c_str());
-    lambda_ = get_double_value(std::string("lambda").c_str());
-    alpha_ = get_double_value(std::string("alpha").c_str());
-    rotations_to_discard_ = get_int_value(std::string("rotations_to_discard").c_str());
-    output_convergence_ = get_bool_value(std::string("output_convergence").c_str());
-    exclusive_access_ = get_bool_value(std::string("exclusive_access").c_str());
-
-    if( (rotations_to_discard_%2) == 1 ){
-      GADGET_DEBUG1("#rotations to discard must be even.\n");
-      return GADGET_FAIL;
-    }
-
-    boost::shared_ptr<ISMRMRD::ismrmrdHeader> cfg = parseIsmrmrdXMLHeader(std::string(mb->rd_ptr()));
-
-    std::vector<long> dims;
-    ISMRMRD::ismrmrdHeader::encoding_sequence e_seq = cfg->encoding();
-    if (e_seq.size() != 1) {
-      GADGET_DEBUG2("Number of encoding spaces: %d\n", e_seq.size());
-      GADGET_DEBUG1("This Gadget only supports one encoding space\n");
-      return GADGET_FAIL;
-    }
-    
-    //ISMRMRD::encodingSpaceType e_space = (*e_seq.begin()).encodedSpace();
-    ISMRMRD::encodingSpaceType r_space = (*e_seq.begin()).reconSpace();
-    //ISMRMRD::encodingLimitsType e_limits = (*e_seq.begin()).encodingLimits();
-
-    matrix_size_seq_ = uint64d2( r_space.matrixSize().x(), r_space.matrixSize().y() );
-
-    if (!is_configured_) {
-
-      channels_ = cfg->acquisitionSystemInformation().present() ?
-	(cfg->acquisitionSystemInformation().get().receiverChannels().present() ? cfg->acquisitionSystemInformation().get().receiverChannels().get() : 1) : 1;
-     
-      // Allocate encoding operator for non-Cartesian Sense
-      E_ = boost::shared_ptr< cuNonCartesianSenseOperator<float,2> >( new cuNonCartesianSenseOperator<float,2>() );
-      E_->set_weight(mu_);
-
-      // Allocate preconditioner
-      D_ = boost::shared_ptr< cuCgPreconditioner<float_complext> >( new cuCgPreconditioner<float_complext>() );
-
-      Rx1_ = boost::shared_ptr< cuPartialDerivativeOperator<float_complext,3> >
-	( new cuPartialDerivativeOperator<float_complext,3>(0) );
-      Rx1_->set_weight( (1.0-alpha_)*lambda_ );
-
-      Ry1_ = boost::shared_ptr< cuPartialDerivativeOperator<float_complext,3> >
-	( new cuPartialDerivativeOperator<float_complext,3>(1) );
-      Ry1_->set_weight( (1.0-alpha_)*lambda_ );
-
-      Rz1_ = boost::shared_ptr< cuPartialDerivativeOperator<float_complext,3> >
-	( new cuPartialDerivativeOperator<float_complext,3>(2) );
-      Rz1_->set_weight( (1.0-alpha_)*lambda_ );
-
-      Rx2_ = boost::shared_ptr< cuPartialDerivativeOperator<float_complext,3> >
-	( new cuPartialDerivativeOperator<float_complext,3>(0) );
-      Rx2_->set_weight( alpha_*lambda_ );
-
-      Ry2_ = boost::shared_ptr< cuPartialDerivativeOperator<float_complext,3> >
-	( new cuPartialDerivativeOperator<float_complext,3>(1) );
-      Ry2_->set_weight( alpha_*lambda_ );
-
-      Rz2_ = boost::shared_ptr< cuPartialDerivativeOperator<float_complext,3> >
-	( new cuPartialDerivativeOperator<float_complext,3>(2) );
-      Rz2_->set_weight( alpha_*lambda_ );
-
-      // Setup split-Bregman solver
-      sb_.set_encoding_operator( E_ );
-            
-      sb_.set_max_outer_iterations(number_of_sb_iterations_);
-      sb_.set_max_inner_iterations(1);
-      sb_.set_output_mode( (output_convergence_) ? cuSbcCgSolver<float_complext>::OUTPUT_VERBOSE : cuSbcCgSolver<float_complext>::OUTPUT_SILENT );
-      
-      sb_.get_inner_solver()->set_max_iterations( number_of_cg_iterations_ );
-      sb_.get_inner_solver()->set_tc_tolerance( cg_limit_ );
-      sb_.get_inner_solver()->set_output_mode( (output_convergence_) ? cuCgSolver<float_complext>::OUTPUT_VERBOSE : cuCgSolver<float_complext>::OUTPUT_SILENT );
-      sb_.get_inner_solver()->set_preconditioner( D_ );
-
-      is_configured_ = true;
-    }
-
-    GADGET_DEBUG1("gpuSbSenseGadget::end of process_config\n");
-
-    return GADGET_OK;
-  }
-
-  int gpuSbSenseGadget::process(GadgetContainerMessage<ISMRMRD::ImageHeader> *m1, GadgetContainerMessage<SenseJob> *m2)
-  {
-    // Is this data for this gadget's set/slice?
-    //
-    
-    if( m1->getObjectPtr()->set != set_number_ || m1->getObjectPtr()->slice != slice_number_ ) {      
-      // No, pass it downstream...
-      return this->next()->putq(m1);
-    }
-
-    //GADGET_DEBUG1("gpuSbSenseGadget::process\n");
-    //GPUTimer timer("gpuSbSenseGadget::process");
-
-    if (!is_configured_) {
-      GADGET_DEBUG1("\nData received before configuration complete\n");
-      return GADGET_FAIL;
-    }
-
-    SenseJob* j = m2->getObjectPtr();
-
-    // Let's first check that this job has the required data...
-    if (!j->csm_host_.get() || !j->dat_host_.get() || !j->tra_host_.get() || !j->dcw_host_.get()) {
-      GADGET_DEBUG1("Received an incomplete Sense job\n");
-      return GADGET_FAIL;
-    }
-
-    unsigned int samples = j->dat_host_->get_size(0);
-    unsigned int channels = j->dat_host_->get_size(1);
-    unsigned int rotations = samples / j->tra_host_->get_number_of_elements();
-    unsigned int frames = j->tra_host_->get_size(1)*rotations;
-
-    if( samples%j->tra_host_->get_number_of_elements() ) {
-      GADGET_DEBUG2("Mismatch between number of samples (%d) and number of k-space coordinates (%d).\nThe first should be a multiplum of the latter.\n", 
-		    samples, j->tra_host_->get_number_of_elements());
-      return GADGET_FAIL;
-    }
-
-    boost::shared_ptr< cuNDArray<floatd2> > traj(new cuNDArray<floatd2> (j->tra_host_.get()));
-    boost::shared_ptr< cuNDArray<float> > dcw(new cuNDArray<float> (j->dcw_host_.get()));
-    boost::shared_ptr< cuNDArray<float_complext> > csm(new cuNDArray<float_complext> (j->csm_host_.get()));
-    boost::shared_ptr< cuNDArray<float_complext> > device_samples(new cuNDArray<float_complext> (j->dat_host_.get()));
-    
-    if( !prepared_){
-
-      // Take the reconstruction matrix size from the regulariaztion image. 
-      // It could be oversampled from the sequence specified size...
-      
-      matrix_size_ = uint64d2( j->reg_host_->get_size(0), j->reg_host_->get_size(1) );
-      
-      cudaDeviceProp deviceProp;
-      if( cudaGetDeviceProperties( &deviceProp, device_number_ ) != cudaSuccess) {
-	GADGET_DEBUG1( "\nError: unable to query device properties.\n" );
-	return GADGET_FAIL;
-      }
-
-      unsigned int warp_size = deviceProp.warpSize;
-
-      matrix_size_os_ =
-	uint64d2(((static_cast<unsigned int>(std::ceil(matrix_size_[0]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size,
-	       ((static_cast<unsigned int>(std::ceil(matrix_size_[1]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size);
-      
-      GADGET_DEBUG2("Matrix size    : [%d,%d] \n", matrix_size_[0], matrix_size_[1]);
-      GADGET_DEBUG2("Matrix size OS : [%d,%d] \n", matrix_size_os_[0], matrix_size_os_[1]);
-
-      std::vector<size_t> image_dims = to_std_vector(matrix_size_);
-      image_dims.push_back(frames);
-      
-      E_->set_domain_dimensions(&image_dims);
-      E_->set_codomain_dimensions(device_samples->get_dimensions().get());
-            
-      reg_image_ = boost::shared_ptr< cuNDArray<float_complext> >(new cuNDArray<float_complext>(&image_dims));
-      
-      // These operators need their domain/codomain set before being added to the solver
-      //
-
-      Rx1_->set_domain_dimensions(&image_dims);
-      Rx1_->set_codomain_dimensions(&image_dims);
-      
-      Ry1_->set_domain_dimensions(&image_dims);
-      Ry1_->set_codomain_dimensions(&image_dims);
-      
-      Rz1_->set_domain_dimensions(&image_dims);
-      Rz1_->set_codomain_dimensions(&image_dims);
-      
-      Rx2_->set_domain_dimensions(&image_dims);
-      Rx2_->set_codomain_dimensions(&image_dims);
-      
-      Ry2_->set_domain_dimensions(&image_dims);
-      Ry2_->set_codomain_dimensions(&image_dims);
-      
-      Rz2_->set_domain_dimensions(&image_dims);
-      Rz2_->set_codomain_dimensions(&image_dims);
-      
-      // Add "TV" regularization
-      // 
-      
-      if( alpha_<1.0 ){
-	sb_.add_regularization_group_operator( Rx1_ ); 
-	sb_.add_regularization_group_operator( Ry1_ ); 
-	if(frames>1)
-	  sb_.add_regularization_group_operator( Rz1_ ); 
-	sb_.add_group();
-      }
-      
-      // Add "PICCS" regularization
-      //
-
-      if( alpha_ > 0.0 ){
-	sb_.add_regularization_group_operator( Rx2_ ); 
-	sb_.add_regularization_group_operator( Ry2_ ); 
-	if(frames>1)
-	  sb_.add_regularization_group_operator( Rz2_ ); 
-	sb_.add_group(reg_image_);
-      }
-      
-      prepared_ = true;
-    }
-    
-    E_->set_dcw(dcw);
-    E_->set_csm(csm);    
-    E_->setup( matrix_size_, matrix_size_os_, static_cast<float>(kernel_width_) );
-    E_->preprocess(traj.get());
-
-    // Expand the average image to the number of frames
-    //
-
-    {
-      cuNDArray<float_complext> tmp(*j->reg_host_);
-      *reg_image_ = *expand( &tmp, frames );
-    }
-
-    // Define preconditioning weights
-    //
-
-    boost::shared_ptr< cuNDArray<float> > _precon_weights = sum(abs_square(csm.get()).get(), 2);
-    reciprocal_sqrt_inplace(_precon_weights.get());	
-    boost::shared_ptr< cuNDArray<float_complext> > precon_weights = real_to_complex<float_complext>( _precon_weights.get() );
-    _precon_weights.reset();
-    D_->set_weights( precon_weights );
-    precon_weights.reset();
-    
-    // Invoke solver
-    //
-
-    boost::shared_ptr< cuNDArray<float_complext> > sbresult;
-    {
-      GADGET_DEBUG1("Running split Bregman solver\n");
-      GPUTimer timer("Running split Bregman solver");
-
-      // Optionally, allow exclusive (per device) access to the solver
-      // This may not matter much in terms of speed, but it can in terms of memory consumption
-      //
-
-      if( exclusive_access_ )
-	_mutex[device_number_].lock();
-
-      sbresult = sb_.solve(device_samples.get());
-
-      if( exclusive_access_ )
-	_mutex[device_number_].unlock();
-    }
-
-    // Provide some info about the scaling between the regularization and reconstruction.
-    // If it is not close to one, PICCS does not work optimally...
-    // 
-
-    if( alpha_ > 0.0 ){
-      cuNDArray<float_complext> gpureg(j->reg_host_.get());
-      boost::shared_ptr< cuNDArray<float_complext> > gpurec = sum(sbresult.get(),2);
-      *gpurec /= float(sbresult->get_size(2));
-      float scale = abs(dot(gpurec.get(), gpurec.get())/dot(gpurec.get(),&gpureg));
-      GADGET_DEBUG2("Scaling factor between regularization and reconstruction is %f.\n", scale);
-    }
-    
-    if (!sbresult.get()) {
-      GADGET_DEBUG1("\nSplit Bregman solver failed\n");
-      return GADGET_FAIL;
-    }
-    
-    /*
-    static int counter = 0;
-    char filename[256];
-    sprintf((char*)filename, "recon_sb_%d.cplx", counter);
-    write_nd_array<float_complext>( sbresult->to_host().get(), filename );
-    counter++; */
-
-    // If the recon matrix size exceeds the sequence matrix size then crop
-    if( matrix_size_seq_ != matrix_size_ )
-      sbresult = crop<float_complext,2>( (matrix_size_-matrix_size_seq_)>>1, matrix_size_seq_, sbresult.get() );
-        
-    // Now pass on the reconstructed images
-    //
-
-    unsigned int frames_per_rotation = frames/rotations;
-
-    if( rotations == 1 ){ // this is the case for golden ratio
-      rotations = frames;
-      frames_per_rotation = 1;
-    }
-
-    for( unsigned int frame=0; frame<frames; frame++ ){
-      
-      unsigned int rotation_idx = frame/frames_per_rotation;
-
-      // Check if we should discard this frame
-      if( rotation_idx < (rotations_to_discard_>>1) || rotation_idx >= rotations-(rotations_to_discard_>>1) )
-	continue;
-
-      GadgetContainerMessage< hoNDArray< std::complex<float> > > *cm = 
-	new GadgetContainerMessage< hoNDArray< std::complex<float> > >();     
-
-      GadgetContainerMessage<ISMRMRD::ImageHeader> *m = 
-	new GadgetContainerMessage<ISMRMRD::ImageHeader>();
-
-      *m->getObjectPtr() = j->image_headers_[frame];
-      m->getObjectPtr()->matrix_size[0] = matrix_size_seq_[0];
-      m->getObjectPtr()->matrix_size[1] = matrix_size_seq_[1];      
-      m->cont(cm);
-      
-      std::vector<size_t> img_dims(2);
-      img_dims[0] = matrix_size_seq_[0];
-      img_dims[1] = matrix_size_seq_[1];
-
-      cm->getObjectPtr()->create(&img_dims);
-
-      size_t data_length = prod(matrix_size_seq_);
-
-      cudaMemcpy(cm->getObjectPtr()->get_data_ptr(),
-		 sbresult->get_data_ptr()+frame*data_length,
-		 data_length*sizeof(std::complex<float>),
-		 cudaMemcpyDeviceToHost);
-
-      cudaError_t err = cudaGetLastError();
-      if( err != cudaSuccess ){
-	GADGET_DEBUG2("\nUnable to copy result from device to host: %s", cudaGetErrorString(err));
-	m->release();
-	return GADGET_FAIL;
-      }
-
-      m->getObjectPtr()->matrix_size[0] = img_dims[0];
-      m->getObjectPtr()->matrix_size[1] = img_dims[1];
-      m->getObjectPtr()->matrix_size[2] = 1;
-      m->getObjectPtr()->channels       = 1;
-      m->getObjectPtr()->image_index    = frame_counter_ + frame;
-
-      if (this->next()->putq(m) < 0) {
-	GADGET_DEBUG1("\nFailed to result image on to Q\n");
-	m->release();
-	return GADGET_FAIL;
-      }
-    }
-
-    frame_counter_ += frames;
-    m1->release();
-    return GADGET_OK;
-  }
-
-  GADGET_FACTORY_DECLARE(gpuSbSenseGadget)
-}
-
diff --git a/gadgets/sense/gpuSbSenseGadget.h b/gadgets/sense/gpuSbSenseGadget.h
deleted file mode 100644
index 9355692..0000000
--- a/gadgets/sense/gpuSbSenseGadget.h
+++ /dev/null
@@ -1,85 +0,0 @@
-#ifndef gpuSbSenseGadget_H
-#define gpuSbSenseGadget_H
-#pragma once
-
-#include <ace/Synch.h>
-#include <ace/Mutex.h>
-
-#include "gadgetron_gpusense_export.h"
-#include "Gadget.h"
-#include "SenseJob.h"
-#include "GadgetMRIHeaders.h"
-#include "cuSbcCgSolver.h"
-#include "cuNonCartesianSenseOperator.h"
-#include "cuCgPreconditioner.h"
-#include "cuPartialDerivativeOperator.h"
-#include "cuNFFT.h"
-#include "cuImageOperator.h"
-#include "ismrmrd.h"
-
-#include <complex>
-
-namespace Gadgetron{
-
-  class EXPORTGADGETS_GPUSENSE gpuSbSenseGadget : public Gadget2< ISMRMRD::ImageHeader, SenseJob >
-  {
-
-  public:
-    GADGET_DECLARE(gpuSbSenseGadget);
-
-    gpuSbSenseGadget();
-    virtual ~gpuSbSenseGadget();
-
-  protected:
-
-    virtual int process( GadgetContainerMessage< ISMRMRD::ImageHeader >* m1, GadgetContainerMessage< SenseJob > * m2 );
-    virtual int process_config( ACE_Message_Block* mb );
-
-    int channels_;
-    int device_number_;
-    int set_number_;
-    int slice_number_;
-
-    uint64d2 matrix_size_;
-    uint64d2 matrix_size_os_;
-    uint64d2 matrix_size_seq_;
-
-    unsigned int number_of_cg_iterations_;
-    unsigned int number_of_sb_iterations_;
-    double cg_limit_;
-    double oversampling_factor_;
-    double kernel_width_;
-    double mu_;
-    double lambda_;
-    double alpha_;
-    unsigned int rotations_to_discard_;
-
-    bool output_convergence_;
-    bool exclusive_access_;
-    bool is_configured_;
-    bool prepared_;
-
-    // Define constraint Split Bregman solver
-    cuSbcCgSolver<float_complext> sb_;
-
-    // Define non-Cartesian Sense Encofing operator
-    boost::shared_ptr< cuNonCartesianSenseOperator<float,2> > E_;
-
-    // Define preconditioner
-    boost::shared_ptr< cuCgPreconditioner<float_complext> > D_;
-
-    // Average image for regularization
-    boost::shared_ptr< cuNDArray<float_complext> > reg_image_;
-
-    // Define regularization operators
-    boost::shared_ptr< cuPartialDerivativeOperator<float_complext,3> > Rx1_;
-    boost::shared_ptr< cuPartialDerivativeOperator<float_complext,3> > Rx2_;
-    boost::shared_ptr< cuPartialDerivativeOperator<float_complext,3> > Ry1_;
-    boost::shared_ptr< cuPartialDerivativeOperator<float_complext,3> > Ry2_;
-    boost::shared_ptr< cuPartialDerivativeOperator<float_complext,3> > Rz1_;
-    boost::shared_ptr< cuPartialDerivativeOperator<float_complext,3> > Rz2_;
-	
-    int frame_counter_;
-  };
-}
-#endif //gpuSbSenseGadget
diff --git a/gadgets/spiral/CMakeLists.txt b/gadgets/spiral/CMakeLists.txt
index 32bd768..71e85be 100644
--- a/gadgets/spiral/CMakeLists.txt
+++ b/gadgets/spiral/CMakeLists.txt
@@ -3,12 +3,10 @@ IF (WIN32)
 ENDIF (WIN32)
 
 find_package(Ismrmrd REQUIRED)
-find_package(XSD REQUIRED)
-find_package(XercesC REQUIRED)
 
 include_directories(
   ${CMAKE_SOURCE_DIR}/gadgets/mri_core
-  ${CMAKE_SOURCE_DIR}/gadgets/sense
+  ${CMAKE_SOURCE_DIR}/gadgets/pmri
   ${CMAKE_SOURCE_DIR}/toolboxes/nfft/gpu
   ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu
   ${CMAKE_SOURCE_DIR}/toolboxes/core/gpu
@@ -17,23 +15,30 @@ include_directories(
   ${CMAKE_SOURCE_DIR}/toolboxes/solvers/gpu
   ${CMAKE_SOURCE_DIR}/toolboxes/operators
   ${CMAKE_SOURCE_DIR}/toolboxes/operators/gpu
-  ${ISMRMRD_XSD_INCLUDE_DIR}
   ${CUDA_INCLUDE_DIRS}
   )
 
 add_library(gadgetron_spiral SHARED 
+  gadgetron_spiral_export.h 
   vds.cpp 
-  gpuSpiralSensePrepGadget.cpp 
-  SpiralToGenericGadget.cpp
-  ${ISMRMRD_XSD_SOURCE})
+  gpuSpiralSensePrepGadget.h gpuSpiralSensePrepGadget.cpp 
+  SpiralToGenericGadget.h SpiralToGenericGadget.cpp)
+
+set_target_properties(gadgetron_spiral PROPERTIES VERSION ${GADGETRON_VERSION_STRING} SOVERSION ${GADGETRON_SOVERSION})
 
 target_link_libraries(gadgetron_spiral
-  cpucore gpucore gpunfft gpusolvers gpuoperators gpuparallelmri
-  ${ISMRMRD_LIBRARIES} ${XERCESC_LIBRARIES} ${FFTW3_LIBRARIES} ${CUDA_LIBRARIES}
+  gadgetron_gadgetbase
+  gadgetron_toolbox_cpucore gadgetron_toolbox_gpucore gadgetron_toolbox_gpunfft gadgetron_toolbox_gpusolvers gadgetron_toolbox_gpuoperators
+  ${ISMRMRD_LIBRARIES} ${FFTW3_LIBRARIES} ${CUDA_LIBRARIES}
   optimized ${ACE_LIBRARIES} debug ${ACE_DEBUG_LIBRARY}
   )
 
-install (TARGETS gadgetron_spiral DESTINATION lib)
-install (FILES vds.h DESTINATION include)
+target_link_libraries(gadgetron_spiral gadgetron_toolbox_gpuparallelmri)
+
+install (TARGETS gadgetron_spiral DESTINATION lib COMPONENT main)
+install (FILES vds.h gadgetron_spiral_export.h 
+                     gpuSpiralSensePrepGadget.h 
+                     SpiralToGenericGadget.h 
+                     DESTINATION include COMPONENT main)
 
 add_subdirectory(config)
diff --git a/gadgets/spiral/SpiralToGenericGadget.cpp b/gadgets/spiral/SpiralToGenericGadget.cpp
index 831f127..8b15652 100644
--- a/gadgets/spiral/SpiralToGenericGadget.cpp
+++ b/gadgets/spiral/SpiralToGenericGadget.cpp
@@ -1,5 +1,5 @@
 #include "SpiralToGenericGadget.h"
-#include "GadgetIsmrmrdReadWrite.h"
+#include "ismrmrd/xml.h"
 #include "vds.h"
 
 #include <algorithm>
@@ -21,105 +21,98 @@ namespace Gadgetron{
   {
     // Start parsing the ISMRMRD XML header
     //
+  ISMRMRD::IsmrmrdHeader h;
+  ISMRMRD::deserialize(mb->rd_ptr(),h);
+  
+  
+  if (h.encoding.size() != 1) {
+    GADGET_DEBUG1("This Gadget only supports one encoding space\n");
+    return GADGET_FAIL;
+  }
 
-    boost::shared_ptr<ISMRMRD::ismrmrdHeader> cfg = parseIsmrmrdXMLHeader(std::string(mb->rd_ptr()));
-
-    if( cfg.get() == 0x0 ){
-      GADGET_DEBUG1("Unable to parse Ismrmrd header\n");
-      return GADGET_FAIL;
-    }
-
-    ISMRMRD::ismrmrdHeader::encoding_sequence e_seq = cfg->encoding();
-
-    if (e_seq.size() != 1) {
-      GADGET_DEBUG2("Number of encoding spaces: %d\n", e_seq.size());
-      GADGET_DEBUG1("This Gadget only supports one encoding space\n");
-      return GADGET_FAIL;
-    }
-
-    //ISMRMRD::encodingSpaceType e_space = (*e_seq.begin()).encodedSpace();
-    ISMRMRD::encodingSpaceType r_space = (*e_seq.begin()).reconSpace();
-    ISMRMRD::encodingLimitsType e_limits = (*e_seq.begin()).encodingLimits();
-
-    //
-    // Setup the spiral trajectory
-    //
+  // Get the encoding space and trajectory description
+  ISMRMRD::EncodingSpace e_space = h.encoding[0].encodedSpace;
+  ISMRMRD::EncodingSpace r_space = h.encoding[0].reconSpace;
+  ISMRMRD::EncodingLimits e_limits = h.encoding[0].encodingLimits;
+  ISMRMRD::TrajectoryDescription traj_desc;
+
+  if (h.encoding[0].trajectoryDescription) {
+    traj_desc = *h.encoding[0].trajectoryDescription;
+  } else {
+    GADGET_DEBUG1("Trajectory description missing");
+    return GADGET_FAIL;
+  }
 
-    if (!(*e_seq.begin()).trajectoryDescription().present()) {
-      GADGET_DEBUG1("Trajectory description needed to calculate trajectory");
-      return GADGET_FAIL;
-    }
+  if (std::strcmp(traj_desc.identifier.c_str(), "HargreavesVDS2000")) {
+    GADGET_DEBUG1("Expected trajectory description identifier 'HargreavesVDS2000', not found.");
+    return GADGET_FAIL;
+  }
 
-    ISMRMRD::trajectoryDescriptionType traj_desc = (*e_seq.begin()).trajectoryDescription().get();
 
-    if (std::strcmp(traj_desc.identifier().c_str(), "HargreavesVDS2000")) {
-      GADGET_DEBUG1("Expected trajectory description identifier 'HargreavesVDS2000', not found.");
-      return GADGET_FAIL;
-    }
+  long interleaves = -1;
+  long fov_coefficients = -1;
+  long sampling_time_ns = -1;
+  double max_grad = -1.0;
+  double max_slew = -1.0;
+  double fov_coeff = -1.0;
+  double kr_max = -1.0;
 
-    long interleaves = -1;
-    long fov_coefficients = -1;
-    long sampling_time_ns = -1;
-    double max_grad = -1.0;
-    double max_slew = -1.0;
-    double fov_coeff = -1.0;
-    double kr_max = -1.0;
-
-    for (ISMRMRD::trajectoryDescriptionType::userParameterLong_sequence::iterator i (traj_desc.userParameterLong().begin ()); i != traj_desc.userParameterLong().end(); ++i) {
-      if (std::strcmp(i->name().c_str(),"interleaves") == 0) {
-	interleaves = i->value();
-      } else if (std::strcmp(i->name().c_str(),"fov_coefficients") == 0) {
-	fov_coefficients = i->value();
-      } else if (std::strcmp(i->name().c_str(),"SamplingTime_ns") == 0) {
-	sampling_time_ns = i->value();
-      } else {
-	GADGET_DEBUG2("WARNING: unused trajectory parameter %s found\n", i->name().c_str());
-      }
+  
+  for (std::vector<ISMRMRD::UserParameterLong>::iterator i (traj_desc.userParameterLong.begin()); i != traj_desc.userParameterLong.end(); ++i) {
+    if (std::strcmp(i->name.c_str(),"interleaves") == 0) {
+      interleaves = i->value;
+    } else if (std::strcmp(i->name.c_str(),"fov_coefficients") == 0) {
+	fov_coefficients = i->value;
+      } else if (std::strcmp(i->name.c_str(),"SamplingTime_ns") == 0) {
+	sampling_time_ns = i->value;
+    } else {
+      GADGET_DEBUG2("WARNING: unused trajectory parameter %s found\n", i->name.c_str());
     }
+  }
 
-    for (ISMRMRD::trajectoryDescriptionType::userParameterDouble_sequence::iterator i (traj_desc.userParameterDouble().begin ()); i != traj_desc.userParameterDouble().end(); ++i) {
-      if (std::strcmp(i->name().c_str(),"MaxGradient_G_per_cm") == 0) {
-	max_grad = i->value();
-      } else if (std::strcmp(i->name().c_str(),"MaxSlewRate_G_per_cm_per_s") == 0) {
-	max_slew = i->value();
-      } else if (std::strcmp(i->name().c_str(),"FOVCoeff_1_cm") == 0) {
-	fov_coeff = i->value();
-      } else if (std::strcmp(i->name().c_str(),"krmax_per_cm") == 0) {
-	kr_max= i->value();
+  for (std::vector<ISMRMRD::UserParameterDouble>::iterator i (traj_desc.userParameterDouble.begin()); i != traj_desc.userParameterDouble.end(); ++i) {
+    if (std::strcmp(i->name.c_str(),"MaxGradient_G_per_cm") == 0) {
+	max_grad = i->value;
+      } else if (std::strcmp(i->name.c_str(),"MaxSlewRate_G_per_cm_per_s") == 0) {
+	max_slew = i->value;
+      } else if (std::strcmp(i->name.c_str(),"FOVCoeff_1_cm") == 0) {
+	fov_coeff = i->value;
+      } else if (std::strcmp(i->name.c_str(),"krmax_per_cm") == 0) {
+	kr_max= i->value;
       } else {
-	GADGET_DEBUG2("WARNING: unused trajectory parameter %s found\n", i->name().c_str());
+	GADGET_DEBUG2("WARNING: unused trajectory parameter %s found\n", i->name.c_str());
       }
-    }
-
-    if ((interleaves < 0) || (fov_coefficients < 0) || (sampling_time_ns < 0) || (max_grad < 0) || (max_slew < 0) || (fov_coeff < 0) || (kr_max < 0)) {
-      GADGET_DEBUG1("Appropriate parameters for calculating spiral trajectory not found in XML configuration\n");
-      return GADGET_FAIL;
-    }
-
-    Tsamp_ns_ = sampling_time_ns;
-    Nints_ = interleaves;
-    interleaves_ = static_cast<int>(Nints_);
-
-    gmax_ = max_grad;
-    smax_ = max_slew;
-    krmax_ = kr_max;
-    fov_ = fov_coeff;
-
-    samples_to_skip_start_  =  0; //n.get<int>(std::string("samplestoskipstart.value"))[0];
-    samples_to_skip_end_    = -1; //n.get<int>(std::string("samplestoskipend.value"))[0];
-
-    GADGET_DEBUG2("smax:                    %f\n", smax_);
-    GADGET_DEBUG2("gmax:                    %f\n", gmax_);
-    GADGET_DEBUG2("Tsamp_ns:                %d\n", Tsamp_ns_);
-    GADGET_DEBUG2("Nints:                   %d\n", Nints_);
-    GADGET_DEBUG2("fov:                     %f\n", fov_);
-    GADGET_DEBUG2("krmax:                   %f\n", krmax_);
-    GADGET_DEBUG2("samples_to_skip_start_ : %d\n", samples_to_skip_start_);
-    GADGET_DEBUG2("samples_to_skip_end_   : %d\n", samples_to_skip_end_);
-
-    return GADGET_OK;
+  }
+  
+  if ((interleaves < 0) || (fov_coefficients < 0) || (sampling_time_ns < 0) || (max_grad < 0) || (max_slew < 0) || (fov_coeff < 0) || (kr_max < 0)) {
+    GADGET_DEBUG1("Appropriate parameters for calculating spiral trajectory not found in XML configuration\n");
+    return GADGET_FAIL;
   }
 
+  Tsamp_ns_ = sampling_time_ns;
+  Nints_ = interleaves;
+  interleaves_ = static_cast<int>(Nints_);
+  
+  gmax_ = max_grad;
+  smax_ = max_slew;
+  krmax_ = kr_max;
+  fov_ = fov_coeff;
+  
+  samples_to_skip_start_  =  0; //n.get<int>(std::string("samplestoskipstart.value"))[0];
+  samples_to_skip_end_    = -1; //n.get<int>(std::string("samplestoskipend.value"))[0];
+  
+  GADGET_DEBUG2("smax:                    %f\n", smax_);
+  GADGET_DEBUG2("gmax:                    %f\n", gmax_);
+  GADGET_DEBUG2("Tsamp_ns:                %d\n", Tsamp_ns_);
+  GADGET_DEBUG2("Nints:                   %d\n", Nints_);
+  GADGET_DEBUG2("fov:                     %f\n", fov_);
+  GADGET_DEBUG2("krmax:                   %f\n", krmax_);
+  GADGET_DEBUG2("samples_to_skip_start_ : %d\n", samples_to_skip_start_);
+  GADGET_DEBUG2("samples_to_skip_end_   : %d\n", samples_to_skip_end_);
+  
+  return GADGET_OK;
+  }
+  
   int SpiralToGenericGadget::
   process(GadgetContainerMessage<ISMRMRD::AcquisitionHeader> *m1,
 	  GadgetContainerMessage< hoNDArray< std::complex<float> > > *m2)
@@ -127,7 +120,7 @@ namespace Gadgetron{
     // Noise should have been consumed by the noise adjust, but just in case...
     //
 
-    bool is_noise = ISMRMRD::FlagBit(ISMRMRD::ACQ_IS_NOISE_MEASUREMENT).isSet(m1->getObjectPtr()->flags);
+    bool is_noise = m1->getObjectPtr()->isFlagSet(ISMRMRD::ISMRMRD_ACQ_IS_NOISE_MEASUREMENT);
     if (is_noise) {
       m1->release();
       return GADGET_OK;
diff --git a/gadgets/spiral/SpiralToGenericGadget.h b/gadgets/spiral/SpiralToGenericGadget.h
index 27764ab..d28bd30 100644
--- a/gadgets/spiral/SpiralToGenericGadget.h
+++ b/gadgets/spiral/SpiralToGenericGadget.h
@@ -7,7 +7,7 @@
 #include "GadgetMRIHeaders.h"
 #include "hoNDArray.h"
 
-#include <ismrmrd.h>
+#include <ismrmrd/ismrmrd.h>
 #include <complex>
 #include <boost/shared_ptr.hpp>
 
diff --git a/gadgets/spiral/config/CMakeLists.txt b/gadgets/spiral/config/CMakeLists.txt
index 5c3861f..ed1c006 100644
--- a/gadgets/spiral/config/CMakeLists.txt
+++ b/gadgets/spiral/config/CMakeLists.txt
@@ -5,7 +5,7 @@ if (ARMADILLO_FOUND)
     spiral_flow_generic_gpusense_cg.xml 
     spiral_flow_generic_gpusense_sb.xml 
     spiral_interactive.xml 
-    DESTINATION config)
+    DESTINATION ${GADGETRON_INSTALL_CONFIG_PATH} COMPONENT main)
 elseif (ARMADILLO_FOUND)
   MESSAGE("Armadillo not found, only unoptimized spiral config files will be available")
 endif (ARMADILLO_FOUND)
@@ -13,4 +13,4 @@ endif (ARMADILLO_FOUND)
 install (FILES 
   spiral_flow_gpusense_cg_unoptimized.xml 
   spiral_flow_gpusense_sb_unoptimized.xml 
-  DESTINATION config)
+  DESTINATION ${GADGETRON_INSTALL_CONFIG_PATH} COMPONENT main)
diff --git a/gadgets/spiral/config/spiral_flow_generic_gpusense_cg.xml b/gadgets/spiral/config/spiral_flow_generic_gpusense_cg.xml
index 7a5ae1b..93357ff 100644
--- a/gadgets/spiral/config/spiral_flow_generic_gpusense_cg.xml
+++ b/gadgets/spiral/config/spiral_flow_generic_gpusense_cg.xml
@@ -52,7 +52,7 @@
 
   <gadget>
     <name>gpuGenericSensePrepGadget</name>
-    <dll>gadgetron_gpusense</dll>
+    <dll>gadgetron_gpuparallelmri</dll>
     <classname>gpuGenericSensePrepGadget</classname>
     <property><name>deviceno</name><value>0</value></property>
     <property><name>rotations_per_reconstruction</name><value>16</value></property>
@@ -65,7 +65,7 @@
   
   <gadget>
     <name>gpuCgSenseGadget</name>
-    <dll>gadgetron_gpusense</dll>
+    <dll>gadgetron_gpuparallelmri</dll>
     <classname>gpuCgSenseGadget</classname>
     <property><name>pass_on_undesired_data</name>  <value>true</value></property>
     <property><name>deviceno</name>                <value>0</value></property>
@@ -80,7 +80,7 @@
 
   <gadget>
     <name>gpuCgSenseGadget</name>
-    <dll>gadgetron_gpusense</dll>
+    <dll>gadgetron_gpuparallelmri</dll>
     <classname>gpuCgSenseGadget</classname>
     <property><name>pass_on_undesired_data</name>  <value>true</value></property>
     <property><name>deviceno</name>                <value>0</value></property>
diff --git a/gadgets/spiral/config/spiral_flow_generic_gpusense_sb.xml b/gadgets/spiral/config/spiral_flow_generic_gpusense_sb.xml
index 2313eef..3d5cf87 100644
--- a/gadgets/spiral/config/spiral_flow_generic_gpusense_sb.xml
+++ b/gadgets/spiral/config/spiral_flow_generic_gpusense_sb.xml
@@ -52,7 +52,7 @@
   
   <gadget>
     <name>gpuGenericSensePrepGadget</name>
-    <dll>gadgetron_gpusense</dll>
+    <dll>gadgetron_gpuparallelmri</dll>
     <classname>gpuGenericSensePrepGadget</classname>
     <property><name>deviceno</name><value>0</value></property>
     <property><name>rotations_per_reconstruction</name><value>16</value></property>
@@ -66,7 +66,7 @@
 
   <gadget>
     <name>gpuSbSenseGadget</name>
-    <dll>gadgetron_gpusense</dll>
+    <dll>gadgetron_gpuparallelmri</dll>
     <classname>gpuSbSenseGadget</classname>
     <property><name>pass_on_undesired_data</name>  <value>true</value></property>
     <property><name>deviceno</name>                <value>0</value></property>
@@ -85,7 +85,7 @@
 
   <gadget>
     <name>gpuSbSenseGadget</name>
-    <dll>gadgetron_gpusense</dll>
+    <dll>gadgetron_gpuparallelmri</dll>
     <classname>gpuSbSenseGadget</classname>
     <property><name>pass_on_undesired_data</name>  <value>true</value></property>
     <property><name>deviceno</name>                <value>0</value></property>
diff --git a/gadgets/spiral/config/spiral_flow_gpusense_cg.xml b/gadgets/spiral/config/spiral_flow_gpusense_cg.xml
index 18bbb9e..1001825 100644
--- a/gadgets/spiral/config/spiral_flow_gpusense_cg.xml
+++ b/gadgets/spiral/config/spiral_flow_gpusense_cg.xml
@@ -59,7 +59,7 @@
   
   <gadget>
     <name>gpuCgSenseGadget</name>
-    <dll>gadgetron_gpusense</dll>
+    <dll>gadgetron_gpuparallelmri</dll>
     <classname>gpuCgSenseGadget</classname>
     <property><name>pass_on_undesired_data</name>  <value>true</value></property>
     <property><name>deviceno</name>                <value>0</value></property>
@@ -73,7 +73,7 @@
 
   <gadget>
     <name>gpuCgSenseGadget</name>
-    <dll>gadgetron_gpusense</dll>
+    <dll>gadgetron_gpuparallelmri</dll>
     <classname>gpuCgSenseGadget</classname>
     <property><name>pass_on_undesired_data</name>  <value>true</value></property>
     <property><name>deviceno</name>                <value>0</value></property>
diff --git a/gadgets/spiral/config/spiral_flow_gpusense_cg_ecg.xml b/gadgets/spiral/config/spiral_flow_gpusense_cg_ecg.xml
index 09dd515..2138ce4 100644
--- a/gadgets/spiral/config/spiral_flow_gpusense_cg_ecg.xml
+++ b/gadgets/spiral/config/spiral_flow_gpusense_cg_ecg.xml
@@ -59,7 +59,7 @@
   
   <gadget>
     <name>gpuCgSenseGadget</name>
-    <dll>gadgetron_gpusense</dll>
+    <dll>gadgetron_gpuparallelmri</dll>
     <classname>gpuCgSenseGadget</classname>
     <property><name>pass_on_undesired_data</name>  <value>true</value></property>
     <property><name>deviceno</name>                <value>0</value></property>
@@ -73,7 +73,7 @@
 
   <gadget>
     <name>gpuCgSenseGadget</name>
-    <dll>gadgetron_gpusense</dll>
+    <dll>gadgetron_gpuparallelmri</dll>
     <classname>gpuCgSenseGadget</classname>
     <property><name>pass_on_undesired_data</name>  <value>true</value></property>
     <property><name>deviceno</name>                <value>0</value></property>
diff --git a/gadgets/spiral/config/spiral_flow_gpusense_cg_unoptimized.xml b/gadgets/spiral/config/spiral_flow_gpusense_cg_unoptimized.xml
index b174cc2..edaa26e 100644
--- a/gadgets/spiral/config/spiral_flow_gpusense_cg_unoptimized.xml
+++ b/gadgets/spiral/config/spiral_flow_gpusense_cg_unoptimized.xml
@@ -41,7 +41,7 @@
   
     <gadget>
     <name>gpuCgSenseGadget</name>
-    <dll>gadgetron_gpusense</dll>
+    <dll>gadgetron_gpuparallelmri</dll>
     <classname>gpuCgSenseGadget</classname>
     <property><name>pass_on_undesired_data</name>  <value>true</value></property>
     <property><name>deviceno</name>                <value>0</value></property>
@@ -55,7 +55,7 @@
 
   <gadget>
     <name>gpuCgSenseGadget</name>
-    <dll>gadgetron_gpusense</dll>
+    <dll>gadgetron_gpuparallelmri</dll>
     <classname>gpuCgSenseGadget</classname>
     <property><name>pass_on_undesired_data</name>  <value>true</value></property>
     <property><name>deviceno</name>                <value>0</value></property>
diff --git a/gadgets/spiral/config/spiral_flow_gpusense_sb.xml b/gadgets/spiral/config/spiral_flow_gpusense_sb.xml
index 61e63bf..c1a3fa7 100644
--- a/gadgets/spiral/config/spiral_flow_gpusense_sb.xml
+++ b/gadgets/spiral/config/spiral_flow_gpusense_sb.xml
@@ -60,7 +60,7 @@
   
   <gadget>
     <name>gpuSbSenseGadget</name>
-    <dll>gadgetron_gpusense</dll>
+    <dll>gadgetron_gpuparallelmri</dll>
     <classname>gpuSbSenseGadget</classname>
     <property><name>pass_on_undesired_data</name>  <value>true</value></property>
     <property><name>deviceno</name>                <value>0</value></property>
@@ -78,7 +78,7 @@
 
   <gadget>
     <name>gpuSbSenseGadget</name>
-    <dll>gadgetron_gpusense</dll>
+    <dll>gadgetron_gpuparallelmri</dll>
     <classname>gpuSbSenseGadget</classname>
     <property><name>pass_on_undesired_data</name>  <value>true</value></property>
     <property><name>deviceno</name>                <value>0</value></property>
diff --git a/gadgets/spiral/config/spiral_flow_gpusense_sb_unoptimized.xml b/gadgets/spiral/config/spiral_flow_gpusense_sb_unoptimized.xml
index cbcad54..6fbc308 100644
--- a/gadgets/spiral/config/spiral_flow_gpusense_sb_unoptimized.xml
+++ b/gadgets/spiral/config/spiral_flow_gpusense_sb_unoptimized.xml
@@ -41,7 +41,7 @@
   
   <gadget>
     <name>gpuSbSenseGadget</name>
-    <dll>gadgetron_gpusense</dll>
+    <dll>gadgetron_gpuparallelmri</dll>
     <classname>gpuSbSenseGadget</classname>
     <property><name>pass_on_undesired_data</name>  <value>true</value></property>
     <property><name>deviceno</name>                <value>0</value></property>
@@ -58,7 +58,7 @@
 
   <gadget>
     <name>gpuSbSenseGadget</name>
-    <dll>gadgetron_gpusense</dll>
+    <dll>gadgetron_gpuparallelmri</dll>
     <classname>gpuSbSenseGadget</classname>
     <property><name>pass_on_undesired_data</name>  <value>true</value></property>
     <property><name>deviceno</name>                <value>0</value></property>
diff --git a/gadgets/spiral/config/spiral_interactive.xml b/gadgets/spiral/config/spiral_interactive.xml
index 2c138fd..8eb9c4f 100644
--- a/gadgets/spiral/config/spiral_interactive.xml
+++ b/gadgets/spiral/config/spiral_interactive.xml
@@ -55,7 +55,7 @@
   
   <gadget>
     <name>gpuCgSenseGadget</name>
-    <dll>gadgetron_gpusense</dll>
+    <dll>gadgetron_gpuparallelmri</dll>
     <classname>gpuCgSenseGadget</classname>
     <property><name>pass_on_undesired_data</name>  <value>true</value></property>
     <property><name>deviceno</name>                <value>0</value></property>
@@ -69,7 +69,7 @@
 
   <gadget>
     <name>gpuCgSenseGadget</name>
-    <dll>gadgetron_gpusense</dll>
+    <dll>gadgetron_gpuparallelmri</dll>
     <classname>gpuCgSenseGadget</classname>
     <property><name>pass_on_undesired_data</name>  <value>true</value></property>
     <property><name>deviceno</name>                <value>0</value></property>
diff --git a/gadgets/spiral/gpuSpiralSensePrepGadget.cpp b/gadgets/spiral/gpuSpiralSensePrepGadget.cpp
index 852670d..e78dd02 100644
--- a/gadgets/spiral/gpuSpiralSensePrepGadget.cpp
+++ b/gadgets/spiral/gpuSpiralSensePrepGadget.cpp
@@ -1,5 +1,5 @@
 #include "gpuSpiralSensePrepGadget.h"
-#include "SenseJob.h"
+#include "GenericReconJob.h"
 #include "Gadgetron.h"
 #include "cuNDArray_utils.h"
 #include "cuNDArray_reductions.h"
@@ -10,8 +10,8 @@
 #include "check_CUDA.h"
 #include "b1_map.h"
 #include "GPUTimer.h"
-#include "GadgetIsmrmrdReadWrite.h"
 #include "vds.h"
+#include "ismrmrd/xml.h"
 
 #include <algorithm>
 #include <vector>
@@ -93,24 +93,20 @@ namespace Gadgetron{
     // Start parsing the ISMRMRD XML header
     //
 
-    boost::shared_ptr<ISMRMRD::ismrmrdHeader> cfg = parseIsmrmrdXMLHeader(std::string(mb->rd_ptr()));
-
-    if( cfg.get() == 0x0 ){
-      GADGET_DEBUG1("Unable to parse Ismrmrd header\n");
-      return GADGET_FAIL;
-    }
-
-    ISMRMRD::ismrmrdHeader::encoding_sequence e_seq = cfg->encoding();
-
-    if (e_seq.size() != 1) {
-      GADGET_DEBUG2("Number of encoding spaces: %d\n", e_seq.size());
+    ISMRMRD::IsmrmrdHeader h;
+    ISMRMRD::deserialize(mb->rd_ptr(),h);
+    
+    
+    if (h.encoding.size() != 1) {
       GADGET_DEBUG1("This Gadget only supports one encoding space\n");
       return GADGET_FAIL;
     }
-
-    ISMRMRD::encodingSpaceType e_space = (*e_seq.begin()).encodedSpace();
-    ISMRMRD::encodingSpaceType r_space = (*e_seq.begin()).reconSpace();
-    ISMRMRD::encodingLimitsType e_limits = (*e_seq.begin()).encodingLimits();
+    
+    // Get the encoding space and trajectory description
+    ISMRMRD::EncodingSpace e_space = h.encoding[0].encodedSpace;
+    ISMRMRD::EncodingSpace r_space = h.encoding[0].reconSpace;
+    ISMRMRD::EncodingLimits e_limits = h.encoding[0].encodingLimits;
+    ISMRMRD::TrajectoryDescription traj_desc;
 
     // Determine reconstruction matrix sizes
     //
@@ -118,8 +114,8 @@ namespace Gadgetron{
     kernel_width_ = get_double_value(std::string("buffer_convolution_kernel_width").c_str());
     oversampling_factor_ = get_double_value(std::string("buffer_convolution_oversampling_factor").c_str());
     
-    image_dimensions_recon_.push_back(((static_cast<unsigned int>(std::ceil(e_space.matrixSize().x()*get_double_value(std::string("reconstruction_os_factor_x").c_str())))+warp_size-1)/warp_size)*warp_size);  
-    image_dimensions_recon_.push_back(((static_cast<unsigned int>(std::ceil(e_space.matrixSize().y()*get_double_value(std::string("reconstruction_os_factor_y").c_str())))+warp_size-1)/warp_size)*warp_size);
+    image_dimensions_recon_.push_back(((static_cast<unsigned int>(std::ceil(e_space.matrixSize.x*get_double_value(std::string("reconstruction_os_factor_x").c_str())))+warp_size-1)/warp_size)*warp_size);  
+    image_dimensions_recon_.push_back(((static_cast<unsigned int>(std::ceil(e_space.matrixSize.y*get_double_value(std::string("reconstruction_os_factor_y").c_str())))+warp_size-1)/warp_size)*warp_size);
       
     image_dimensions_recon_os_ = uint64d2
       (((static_cast<unsigned int>(std::ceil(image_dimensions_recon_[0]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size,
@@ -127,23 +123,21 @@ namespace Gadgetron{
     
     // In case the warp_size constraint kicked in
     oversampling_factor_ = float(image_dimensions_recon_os_[0])/float(image_dimensions_recon_[0]);
-    
-    //
-    // Setup the spiral trajectory
-    //
 
-    if (!(*e_seq.begin()).trajectoryDescription().present()) {
-      GADGET_DEBUG1("Trajectory description needed to calculate trajectory");
+
+    if (h.encoding[0].trajectoryDescription) {
+      traj_desc = *h.encoding[0].trajectoryDescription;
+    } else {
+      GADGET_DEBUG1("Trajectory description missing");
       return GADGET_FAIL;
     }
-
-    ISMRMRD::trajectoryDescriptionType traj_desc = (*e_seq.begin()).trajectoryDescription().get();
-
-    if (std::strcmp(traj_desc.identifier().c_str(), "HargreavesVDS2000")) {
+    
+    if (std::strcmp(traj_desc.identifier.c_str(), "HargreavesVDS2000")) {
       GADGET_DEBUG1("Expected trajectory description identifier 'HargreavesVDS2000', not found.");
       return GADGET_FAIL;
     }
-
+    
+    
     long interleaves = -1;
     long fov_coefficients = -1;
     long sampling_time_ns = -1;
@@ -151,38 +145,40 @@ namespace Gadgetron{
     double max_slew = -1.0;
     double fov_coeff = -1.0;
     double kr_max = -1.0;
-
-    for (ISMRMRD::trajectoryDescriptionType::userParameterLong_sequence::iterator i (traj_desc.userParameterLong().begin ()); i != traj_desc.userParameterLong().end(); ++i) {
-      if (std::strcmp(i->name().c_str(),"interleaves") == 0) {
-	interleaves = i->value();
-      } else if (std::strcmp(i->name().c_str(),"fov_coefficients") == 0) {
-	fov_coefficients = i->value();
-      } else if (std::strcmp(i->name().c_str(),"SamplingTime_ns") == 0) {
-	sampling_time_ns = i->value();
+    
+    
+    for (std::vector<ISMRMRD::UserParameterLong>::iterator i (traj_desc.userParameterLong.begin()); i != traj_desc.userParameterLong.end(); ++i) {
+      if (std::strcmp(i->name.c_str(),"interleaves") == 0) {
+	interleaves = i->value;
+      } else if (std::strcmp(i->name.c_str(),"fov_coefficients") == 0) {
+	fov_coefficients = i->value;
+      } else if (std::strcmp(i->name.c_str(),"SamplingTime_ns") == 0) {
+	sampling_time_ns = i->value;
       } else {
-	GADGET_DEBUG2("WARNING: unused trajectory parameter %s found\n", i->name().c_str());
+	GADGET_DEBUG2("WARNING: unused trajectory parameter %s found\n", i->name.c_str());
       }
     }
 
-    for (ISMRMRD::trajectoryDescriptionType::userParameterDouble_sequence::iterator i (traj_desc.userParameterDouble().begin ()); i != traj_desc.userParameterDouble().end(); ++i) {
-      if (std::strcmp(i->name().c_str(),"MaxGradient_G_per_cm") == 0) {
-	max_grad = i->value();
-      } else if (std::strcmp(i->name().c_str(),"MaxSlewRate_G_per_cm_per_s") == 0) {
-	max_slew = i->value();
-      } else if (std::strcmp(i->name().c_str(),"FOVCoeff_1_cm") == 0) {
-	fov_coeff = i->value();
-      } else if (std::strcmp(i->name().c_str(),"krmax_per_cm") == 0) {
-	kr_max= i->value();
+    for (std::vector<ISMRMRD::UserParameterDouble>::iterator i (traj_desc.userParameterDouble.begin()); i != traj_desc.userParameterDouble.end(); ++i) {
+      if (std::strcmp(i->name.c_str(),"MaxGradient_G_per_cm") == 0) {
+	max_grad = i->value;
+      } else if (std::strcmp(i->name.c_str(),"MaxSlewRate_G_per_cm_per_s") == 0) {
+	max_slew = i->value;
+      } else if (std::strcmp(i->name.c_str(),"FOVCoeff_1_cm") == 0) {
+	fov_coeff = i->value;
+      } else if (std::strcmp(i->name.c_str(),"krmax_per_cm") == 0) {
+	kr_max= i->value;
       } else {
-	GADGET_DEBUG2("WARNING: unused trajectory parameter %s found\n", i->name().c_str());
+	GADGET_DEBUG2("WARNING: unused trajectory parameter %s found\n", i->name.c_str());
       }
     }
-
+    
     if ((interleaves < 0) || (fov_coefficients < 0) || (sampling_time_ns < 0) || (max_grad < 0) || (max_slew < 0) || (fov_coeff < 0) || (kr_max < 0)) {
       GADGET_DEBUG1("Appropriate parameters for calculating spiral trajectory not found in XML configuration\n");
       return GADGET_FAIL;
     }
-
+    
+    
     Tsamp_ns_ = sampling_time_ns;
     Nints_ = interleaves;
     interleaves_ = static_cast<int>(Nints_);
@@ -195,12 +191,12 @@ namespace Gadgetron{
     samples_to_skip_start_  = 0; //n.get<int>(std::string("samplestoskipstart.value"))[0];
     samples_to_skip_end_    = -1; //n.get<int>(std::string("samplestoskipend.value"))[0];
 
-    fov_vec_.push_back(r_space.fieldOfView_mm().x());
-    fov_vec_.push_back(r_space.fieldOfView_mm().y());
-    fov_vec_.push_back(r_space.fieldOfView_mm().z());
+    fov_vec_.push_back(r_space.fieldOfView_mm.x);
+    fov_vec_.push_back(r_space.fieldOfView_mm.y);
+    fov_vec_.push_back(r_space.fieldOfView_mm.z);
 
-    slices_ = e_limits.slice().present() ? e_limits.slice().get().maximum() + 1 : 1;
-    sets_ = e_limits.set().present() ? e_limits.set().get().maximum() + 1 : 1;
+    slices_ = e_limits.slice ? e_limits.slice->maximum + 1 : 1;
+    sets_ = e_limits.set ? e_limits.set->maximum + 1 : 1;
 
     buffer_ = boost::shared_array< ACE_Message_Queue<ACE_MT_SYNCH> >(new ACE_Message_Queue<ACE_MT_SYNCH>[slices_*sets_]);
 
@@ -235,7 +231,7 @@ namespace Gadgetron{
     // Noise should have been consumed by the noise adjust, but just in case...
     //
 
-    bool is_noise = ISMRMRD::FlagBit(ISMRMRD::ACQ_IS_NOISE_MEASUREMENT).isSet(m1->getObjectPtr()->flags);
+    bool is_noise = m1->getObjectPtr()->isFlagSet(ISMRMRD::ISMRMRD_ACQ_IS_NOISE_MEASUREMENT);
     if (is_noise) {
       m1->release();
       return GADGET_OK;
@@ -309,7 +305,7 @@ namespace Gadgetron{
 
       if( buffer_using_solver_ ){
 
-	E_->set_dcw(dcw_buffer_);
+	E_->set_dcw(sqrt(dcw_buffer_.get()));
 
 	D_ = boost::shared_ptr< cuCgPreconditioner<float_complext> >( new cuCgPreconditioner<float_complext>() );
 	cg_.set_encoding_operator( E_ );
@@ -410,8 +406,7 @@ namespace Gadgetron{
     // Have we received sufficient data for a new frame?
     //
 
-    bool is_last_scan_in_slice = 
-      ISMRMRD::FlagBit(ISMRMRD::ACQ_LAST_IN_SLICE).isSet(m1->getObjectPtr()->flags);
+    bool is_last_scan_in_slice = m1->getObjectPtr()->isFlagSet(ISMRMRD::ISMRMRD_ACQ_LAST_IN_SLICE);
 
     if (is_last_scan_in_slice) {
 
@@ -457,7 +452,7 @@ namespace Gadgetron{
 
       {
 	// Initialize header to all zeroes (there is a few fields we do not set yet)
-	ISMRMRD::ImageHeader tmp = {0};
+	ISMRMRD::ImageHeader tmp;
 	*(header->getObjectPtr()) = tmp;
       }
 
@@ -476,7 +471,7 @@ namespace Gadgetron{
       header->getObjectPtr()->set = base_head->idx.set;
 
       header->getObjectPtr()->acquisition_time_stamp = base_head->acquisition_time_stamp;
-      memcpy(header->getObjectPtr()->physiology_time_stamp, base_head->physiology_time_stamp, sizeof(uint32_t)*ISMRMRD_PHYS_STAMPS);
+      memcpy(header->getObjectPtr()->physiology_time_stamp, base_head->physiology_time_stamp, sizeof(uint32_t)*ISMRMRD::ISMRMRD_PHYS_STAMPS);
 
       memcpy(header->getObjectPtr()->position, base_head->position, sizeof(float)*3);
       memcpy(header->getObjectPtr()->read_dir, base_head->read_dir, sizeof(float)*3);
@@ -484,7 +479,7 @@ namespace Gadgetron{
       memcpy(header->getObjectPtr()->slice_dir, base_head->slice_dir, sizeof(float)*3);
       memcpy(header->getObjectPtr()->patient_table_position, base_head->patient_table_position, sizeof(float)*3);
 
-      header->getObjectPtr()->image_data_type = ISMRMRD::DATA_COMPLEX_FLOAT;
+      header->getObjectPtr()->data_type = ISMRMRD::ISMRMRD_CXFLOAT;
       header->getObjectPtr()->image_index = image_counter_[set*slices_+slice]++; 
       header->getObjectPtr()->image_series_index = set*slices_+slice;
 
@@ -622,7 +617,7 @@ namespace Gadgetron{
 	  mbq->release();
 	}
 
-	GadgetContainerMessage< SenseJob >* m4 = new GadgetContainerMessage< SenseJob >();
+	GadgetContainerMessage< GenericReconJob >* m4 = new GadgetContainerMessage< GenericReconJob >();
 
 	m4->getObjectPtr()->dat_host_ = data_host;
 	m4->getObjectPtr()->csm_host_ = csm_host;
diff --git a/gadgets/spiral/gpuSpiralSensePrepGadget.h b/gadgets/spiral/gpuSpiralSensePrepGadget.h
index 277927a..f592b61 100644
--- a/gadgets/spiral/gpuSpiralSensePrepGadget.h
+++ b/gadgets/spiral/gpuSpiralSensePrepGadget.h
@@ -13,7 +13,7 @@
 #include "vector_td.h"
 #include "cuNFFT.h"
 
-#include <ismrmrd.h>
+#include <ismrmrd/ismrmrd.h>
 #include <complex>
 #include <boost/shared_ptr.hpp>
 #include <boost/shared_array.hpp>
diff --git a/gadgets/util/CMakeLists.txt b/gadgets/util/CMakeLists.txt
new file mode 100644
index 0000000..82ddfc9
--- /dev/null
+++ b/gadgets/util/CMakeLists.txt
@@ -0,0 +1,27 @@
+IF (WIN32)
+    ADD_DEFINITIONS(-D__BUILD_GADGETRON_UTIL_GADGETS__)
+ENDIF (WIN32)
+
+include_directories(
+    ${CMAKE_SOURCE_DIR}/toolboxes/core
+)
+
+add_library(gadgetron_util SHARED 
+    gadgetron_util_gadgets_export.h 
+    ParameterRelayGadget.h
+    ParameterRelayGadget.cpp
+)
+
+set_target_properties(gadgetron_util PROPERTIES VERSION ${GADGETRON_VERSION_STRING} SOVERSION ${GADGETRON_SOVERSION})                                                                                                                                                                                                      
+
+target_link_libraries(gadgetron_util
+    gadgetron_gadgetbase
+    ${ACE_LIBRARIES}
+)
+
+install(FILES 
+    gadgetron_util_gadgets_export.h
+    ParameterRelayGadget.h
+    DESTINATION include COMPONENT main)
+
+install(TARGETS gadgetron_util DESTINATION lib COMPONENT main)
diff --git a/gadgets/util/ParameterRelayGadget.cpp b/gadgets/util/ParameterRelayGadget.cpp
new file mode 100644
index 0000000..0a2df62
--- /dev/null
+++ b/gadgets/util/ParameterRelayGadget.cpp
@@ -0,0 +1,20 @@
+#include "ParameterRelayGadget.h"
+#include "Gadgetron.h"
+namespace Gadgetron{
+int ParameterRelayGadget
+::process(ACE_Message_Block* m)
+{
+  if (this->next()->putq(m) == -1) {
+    m->release();
+    ACE_ERROR_RETURN( (LM_ERROR,
+		       ACE_TEXT("%p\n"),
+		       ACE_TEXT("ParameterRelayGadget::process, passing data on to next gadget")),
+		      -1);
+  }
+
+  return GADGET_OK;
+}
+GADGET_FACTORY_DECLARE(ParameterRelayGadget)
+}
+
+
diff --git a/gadgets/util/ParameterRelayGadget.h b/gadgets/util/ParameterRelayGadget.h
new file mode 100644
index 0000000..29eb4b5
--- /dev/null
+++ b/gadgets/util/ParameterRelayGadget.h
@@ -0,0 +1,18 @@
+#ifndef PARAMETERRELAYGADGET_H
+#define PARAMETERRELAYGADGET_H
+
+#include "Gadget.h"
+#include "gadgetron_util_gadgets_export.h"
+
+namespace Gadgetron{
+
+  class EXPORTUTILGADGETS ParameterRelayGadget : public Gadget
+    {
+    public:
+      GADGET_DECLARE(ParameterRelayGadget);
+      
+    protected:
+      virtual int process(ACE_Message_Block* m);
+    };
+}
+#endif //PARAMETERRELAYGADGET_H
diff --git a/gadgets/util/gadgetron_util_gadgets_export.h b/gadgets/util/gadgetron_util_gadgets_export.h
new file mode 100644
index 0000000..7f75c2e
--- /dev/null
+++ b/gadgets/util/gadgetron_util_gadgets_export.h
@@ -0,0 +1,14 @@
+#ifndef GADGETRON_UTIL_GADGETS_EXPORT_H_
+#define GADGETRON_UTIL_GADGETS_EXPORT_H_
+
+#if defined (WIN32)
+#if defined (__BUILD_GADGETRON_UTIL_GADGETS__)
+#define EXPORTUTILGADGETS __declspec(dllexport)
+#else
+#define EXPORTUTILGADGETS __declspec(dllimport)
+#endif
+#else
+#define EXPORTUTILGADGETS
+#endif
+
+#endif // GADGETRON_UTIL_GADGETS_EXPORT_H_
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 556bca1..52b3e8f 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -1,3 +1,5 @@
+if (GTEST_FOUND AND ARMADILLO_FOUND)
+
 ENABLE_TESTING()
 
 if(WIN32)
@@ -7,29 +9,35 @@ endif(WIN32)
 include_directories(
   ${CMAKE_SOURCE_DIR}/toolboxes/core
   ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu
-  ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/arma_math
+  ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/image
+  ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/math
   ${CMAKE_SOURCE_DIR}/toolboxes/core/gpu
   ${Boost_INCLUDE_DIR}
   ${ARMADILLO_INCLUDE_DIRS}
   ${GTEST_INCLUDE_DIRS}
+  ${ACE_INCLUDE_DIR}
+  ${ISMRMRD_INCLUDE_DIR}
   )
 
 if (CUDA_FOUND)
     include_directories(${CUDA_INCLUDE_DIRS})
 endif (CUDA_FOUND)
 
-link_libraries(cpucore 
-    cpucore_math
+link_libraries(
+    gadgetron_toolbox_cpucore 
+    gadgetron_toolbox_cpucore_math
     ${BOOST_LIBRARIES}
     ${GTEST_LIBRARIES} 
     ${ARMADILLO_LIBRARIES}
     )
-    
+
 if ( CUDA_FOUND )
-    cuda_add_executable(test_all 
+
+    include_directories( ${CUDA_INCLUDE_DIRS} )
+
+  cuda_add_executable(test_all 
       tests.cpp 
       hoNDArray_elemwise_test.cpp 
-      hoNDArray_operators_test.cpp 
       hoNDArray_blas_test.cpp 
       hoNDArray_utils_test.cpp
       vector_td_test.cpp
@@ -45,7 +53,6 @@ else ( CUDA_FOUND )
     add_executable(test_all 
       tests.cpp 
       hoNDArray_elemwise_test.cpp 
-      hoNDArray_operators_test.cpp 
       hoNDArray_blas_test.cpp 
       hoNDArray_utils_test.cpp
       )
@@ -53,10 +60,12 @@ endif ( CUDA_FOUND )
 
 if ( CUDA_FOUND )
   target_link_libraries(test_all 
-    gpucore
+    gadgetron_toolbox_gpucore
     )
-elseif ( CUDA_FOUND )  
-
-endif ( CUDA_FOUND )
+endif( CUDA_FOUND )
 
 add_test(test_all test_all)
+
+endif (GTEST_FOUND AND ARMADILLO_FOUND)
+
+add_subdirectory(integration)
diff --git a/test/hoNDArray_blas_test.cpp b/test/hoNDArray_blas_test.cpp
index ae99f14..2b2f28e 100644
--- a/test/hoNDArray_blas_test.cpp
+++ b/test/hoNDArray_blas_test.cpp
@@ -1,4 +1,4 @@
-#include "hoNDArray_blas.h"
+#include "hoNDArray_math.h"
 #include "hoNDArray_elemwise.h"
 #include <gtest/gtest.h>
 #include <vector>
diff --git a/test/hoNDArray_elemwise_test.cpp b/test/hoNDArray_elemwise_test.cpp
index e904e54..bda1f47 100644
--- a/test/hoNDArray_elemwise_test.cpp
+++ b/test/hoNDArray_elemwise_test.cpp
@@ -377,3 +377,242 @@ TYPED_TEST(hoNDArray_elemwise_TestCplx3,realToCplxTest){
   EXPECT_FLOAT_EQ(3.4,real(real_to_complex<TypeParam>(real(&this->Array).get())->get_data_ptr()[33425]));
   EXPECT_FLOAT_EQ(0.0,imag(real_to_complex<TypeParam>(real(&this->Array).get())->get_data_ptr()[33425]));
 }
+
+template <typename T> class hoNDArray_operators_TestReal : public ::testing::Test {
+protected:
+  virtual void SetUp() {
+    size_t vdims[] = {37, 49, 23, 19}; //Using prime numbers for setup because they are messy
+    size_t vdims2[] = {37, 49}; //Smaller dimensionality to test batch mode
+    dims = std::vector<size_t>(vdims,vdims+sizeof(vdims)/sizeof(size_t));
+    dims2 = std::vector<size_t>(vdims2,vdims2+sizeof(vdims2)/sizeof(size_t));
+    Array = hoNDArray<T>(&dims);
+    Array2 = hoNDArray<T>(&dims2);
+  }
+  std::vector<size_t> dims;
+  std::vector<size_t> dims2;
+  hoNDArray<T> Array;
+  hoNDArray<T> Array2;
+};
+
+template <typename T> class hoNDArray_operators_TestCplx : public ::testing::Test {
+protected:
+  virtual void SetUp() {
+    size_t vdims[] = {37, 49, 23, 19}; //Using prime numbers for setup because they are messy
+    size_t vdims2[] = {37, 49}; //Smaller dimensionality to test batch mode
+    dims = std::vector<size_t>(vdims,vdims+sizeof(vdims)/sizeof(size_t));
+    dims2 = std::vector<size_t>(vdims2,vdims2+sizeof(vdims2)/sizeof(size_t));
+    Array = hoNDArray<T>(&dims);
+    Array2 = hoNDArray<T>(&dims2);
+  }
+  std::vector<size_t> dims;
+  std::vector<size_t> dims2;
+  hoNDArray<T> Array;
+  hoNDArray<T> Array2;
+};
+
+typedef Types<float, double> realImplementations;
+typedef Types<std::complex<float>, std::complex<double>, float_complext, double_complext> cplxImplementations;
+
+TYPED_TEST_CASE(hoNDArray_operators_TestReal, realImplementations);
+
+TYPED_TEST(hoNDArray_operators_TestReal,equalsAddTest1){
+  TypeParam v1 = TypeParam(46865.35435);
+  TypeParam v2 = TypeParam(13784.34);
+  unsigned int idx = 73243;
+  fill(&this->Array,v1);
+  fill(&this->Array2,v2);
+  this->Array += this->Array2;
+  EXPECT_FLOAT_EQ(v1+v2,this->Array.get_data_ptr()[idx]);
+}
+
+TYPED_TEST(hoNDArray_operators_TestReal,equalsAddTest2){
+  TypeParam v1 = TypeParam(98.4);
+  TypeParam v2 = TypeParam(2.2);
+  unsigned int idx = 12295;
+  fill(&this->Array,v1);
+  this->Array += v2;
+  EXPECT_FLOAT_EQ(v1+v2,this->Array.get_data_ptr()[idx]);
+}
+
+TYPED_TEST(hoNDArray_operators_TestReal,equalsSubtractTest1){
+  TypeParam v1 = TypeParam(98475334.34);
+  TypeParam v2 = TypeParam(2452.234);
+  unsigned int idx = 124999;
+  fill(&this->Array,v1);
+  fill(&this->Array2,v2);
+  this->Array -= this->Array2;
+  EXPECT_FLOAT_EQ(v1-v2,this->Array.get_data_ptr()[idx]);
+}
+
+TYPED_TEST(hoNDArray_operators_TestReal,equalsSubtractTest2){
+  TypeParam v1 = TypeParam(4.4);
+  TypeParam v2 = TypeParam(9212.21);
+  unsigned int idx = 122131;
+  fill(&this->Array,v1);
+  this->Array -= v2;
+  EXPECT_FLOAT_EQ(v1-v2,this->Array.get_data_ptr()[idx]);
+}
+
+TYPED_TEST(hoNDArray_operators_TestReal,equalsMultiplyTest1){
+  TypeParam v1 = TypeParam(342.145);
+  TypeParam v2 = TypeParam(43545.43);
+  unsigned int idx = 12344;
+  fill(&this->Array,v1);
+  fill(&this->Array2,v2);
+  this->Array *= this->Array2;
+  EXPECT_FLOAT_EQ(v1*v2,this->Array.get_data_ptr()[idx]);
+}
+
+TYPED_TEST(hoNDArray_operators_TestReal,equalsMultiplyTest2){
+  TypeParam v1 = TypeParam(43534.443);
+  TypeParam v2 = TypeParam(92.842);
+  unsigned int idx = 96735;
+  fill(&this->Array,v1);
+  this->Array *= v2;
+  EXPECT_FLOAT_EQ(v1*v2,this->Array.get_data_ptr()[idx]);
+}
+
+TYPED_TEST(hoNDArray_operators_TestReal,equalsDivideTest1){
+  TypeParam v1 = TypeParam(644.24);
+  TypeParam v2 = TypeParam(38564.64);
+  unsigned int idx = 98322;
+  fill(&this->Array,v1);
+  fill(&this->Array2,v2);
+  this->Array /= this->Array2;
+  EXPECT_FLOAT_EQ(v1/v2,this->Array.get_data_ptr()[idx]);
+}
+
+TYPED_TEST(hoNDArray_operators_TestReal,equalsDivideTest2){
+  TypeParam v1 = TypeParam(56342.24);
+  TypeParam v2 = TypeParam(23434.34);
+  unsigned int idx = 12591;
+  fill(&this->Array,v1);
+  this->Array /= v2;
+  EXPECT_FLOAT_EQ(v1/v2,this->Array.get_data_ptr()[idx]);
+}
+
+TYPED_TEST_CASE(hoNDArray_operators_TestCplx, cplxImplementations);
+
+TYPED_TEST(hoNDArray_operators_TestCplx,equalsAddTest1){
+  TypeParam v1 = TypeParam(46865.35435, 534544.534523);
+  TypeParam v2 = TypeParam(13784.34, 54543543.1243);
+  unsigned int idx = 73243;
+  fill(&this->Array,v1);
+  fill(&this->Array2,v2);
+  this->Array += this->Array2;
+  EXPECT_FLOAT_EQ(real(v1+v2),real(this->Array.get_data_ptr()[idx]));
+  EXPECT_FLOAT_EQ(imag(v1+v2),imag(this->Array.get_data_ptr()[idx]));
+}
+
+TYPED_TEST(hoNDArray_operators_TestCplx,equalsAddTest2){
+  TypeParam v1 = TypeParam(98.4, 45.34);
+  TypeParam v2 = TypeParam(2.2,3.23);
+  unsigned int idx = 12925;
+  fill(&this->Array,v1);
+  this->Array += v2;
+  EXPECT_FLOAT_EQ(real(v1+v2),real(this->Array.get_data_ptr()[idx]));
+  EXPECT_FLOAT_EQ(imag(v1+v2),imag(this->Array.get_data_ptr()[idx]));
+}
+
+TYPED_TEST(hoNDArray_operators_TestCplx,equalsAddTest3){
+  TypeParam v1 = TypeParam(98.4, 45.34);
+  TypeParam v2 = TypeParam(2.2,0.0);
+  unsigned int idx = 12295;
+  fill(&this->Array,v1);
+  this->Array += real(v2);
+  EXPECT_FLOAT_EQ(real(v1+v2),real(this->Array.get_data_ptr()[idx]));
+  EXPECT_FLOAT_EQ(imag(v1+v2),imag(this->Array.get_data_ptr()[idx]));
+}
+
+TYPED_TEST(hoNDArray_operators_TestCplx,equalsSubtractTest1){
+  TypeParam v1 = TypeParam(46865.35435, 534544.534523);
+  TypeParam v2 = TypeParam(13784.34, 54543543.1243);
+  unsigned int idx = 73243;
+  fill(&this->Array,v1);
+  fill(&this->Array2,v2);
+  this->Array -= this->Array2;
+  EXPECT_FLOAT_EQ(real(v1-v2),real(this->Array.get_data_ptr()[idx]));
+  EXPECT_FLOAT_EQ(imag(v1-v2),imag(this->Array.get_data_ptr()[idx]));
+}
+
+TYPED_TEST(hoNDArray_operators_TestCplx,equalsSubtractTest2){
+  TypeParam v1 = TypeParam(98.4, 45.34);
+  TypeParam v2 = TypeParam(2.2,3.23);
+  unsigned int idx = 12925;
+  fill(&this->Array,v1);
+  this->Array -= v2;
+  EXPECT_FLOAT_EQ(real(v1-v2),real(this->Array.get_data_ptr()[idx]));
+  EXPECT_FLOAT_EQ(imag(v1-v2),imag(this->Array.get_data_ptr()[idx]));
+}
+
+TYPED_TEST(hoNDArray_operators_TestCplx,equalsSubtractTest3){
+  TypeParam v1 = TypeParam(98.4, 45.34);
+  TypeParam v2 = TypeParam(2.2,0.0);
+  unsigned int idx = 12925;
+  fill(&this->Array,v1);
+  this->Array -= real(v2);
+  EXPECT_FLOAT_EQ(real(v1-v2),real(this->Array.get_data_ptr()[idx]));
+  EXPECT_FLOAT_EQ(imag(v1-v2),imag(this->Array.get_data_ptr()[idx]));
+}
+
+TYPED_TEST(hoNDArray_operators_TestCplx,equalsMultiplyTest1){
+  TypeParam v1 = TypeParam(46865.35435, 534544.534523);
+  TypeParam v2 = TypeParam(13784.34, 54543543.1243);
+  unsigned int idx = 73243;
+  fill(&this->Array,v1);
+  fill(&this->Array2,v2);
+  this->Array *= this->Array2;
+  EXPECT_FLOAT_EQ(real(v1*v2),real(this->Array.get_data_ptr()[idx]));
+  EXPECT_FLOAT_EQ(imag(v1*v2),imag(this->Array.get_data_ptr()[idx]));
+}
+
+TYPED_TEST(hoNDArray_operators_TestCplx,equalsMultiplyTest2){
+  TypeParam v1 = TypeParam(98.4, 45.34);
+  TypeParam v2 = TypeParam(2.2,3.23);
+  unsigned int idx = 12925;
+  fill(&this->Array,v1);
+  this->Array *= v2;
+  EXPECT_FLOAT_EQ(real(v1*v2),real(this->Array.get_data_ptr()[idx]));
+  EXPECT_FLOAT_EQ(imag(v1*v2),imag(this->Array.get_data_ptr()[idx]));
+}
+
+TYPED_TEST(hoNDArray_operators_TestCplx,equalsMultiplyTest3){
+  TypeParam v1 = TypeParam(98.4, 45.34);
+  TypeParam v2 = TypeParam(2.2,0.0);
+  unsigned int idx = 12295;
+  fill(&this->Array,v1);
+  this->Array *= real(v2);
+  EXPECT_FLOAT_EQ(real(v1*v2),real(this->Array.get_data_ptr()[idx]));
+  EXPECT_FLOAT_EQ(imag(v1*v2),imag(this->Array.get_data_ptr()[idx]));
+}
+
+TYPED_TEST(hoNDArray_operators_TestCplx,equalsDivideTest1){
+  TypeParam v1 = TypeParam(46865.35435, 534544.534523);
+  TypeParam v2 = TypeParam(13784.34, 54543543.1243);
+  unsigned int idx = 73243;
+  fill(&this->Array,v1);
+  fill(&this->Array2,v2);
+  this->Array /= this->Array2;
+  EXPECT_FLOAT_EQ(real(v1/v2),real(this->Array.get_data_ptr()[idx]));
+  EXPECT_FLOAT_EQ(imag(v1/v2),imag(this->Array.get_data_ptr()[idx]));
+}
+
+TYPED_TEST(hoNDArray_operators_TestCplx,equalsDivideTest2){
+  TypeParam v1 = TypeParam(98.4, 45.34);
+  TypeParam v2 = TypeParam(2.2,3.23);
+  unsigned int idx = 12295;
+  fill(&this->Array,v1);
+  this->Array /= v2;
+  EXPECT_FLOAT_EQ(real(v1/v2),real(this->Array.get_data_ptr()[idx]));
+  EXPECT_FLOAT_EQ(imag(v1/v2),imag(this->Array.get_data_ptr()[idx]));
+}
+
+TYPED_TEST(hoNDArray_operators_TestCplx,equalsDivideTest3){
+  TypeParam v1 = TypeParam(98.4, 45.34);
+  TypeParam v2 = TypeParam(2.2,0.0);
+  unsigned int idx = 12295;
+  fill(&this->Array,v1);
+  this->Array /= real(v2);
+  EXPECT_FLOAT_EQ(real(v1/v2),real(this->Array.get_data_ptr()[idx]));
+  EXPECT_FLOAT_EQ(imag(v1/v2),imag(this->Array.get_data_ptr()[idx]));
+}
diff --git a/test/hoNDArray_operators_test.cpp b/test/hoNDArray_operators_test.cpp
index 02739b9..90ce1e9 100644
--- a/test/hoNDArray_operators_test.cpp
+++ b/test/hoNDArray_operators_test.cpp
@@ -1,4 +1,3 @@
-#include "hoNDArray_operators.h"
 #include "hoNDArray_elemwise.h"
 #include "complext.h"
 
diff --git a/test/integration/.gitignore b/test/integration/.gitignore
new file mode 100644
index 0000000..df420e0
--- /dev/null
+++ b/test/integration/.gitignore
@@ -0,0 +1,8 @@
+*#
+.#*
+*~
+data/
+test/
+*.log
+index.html
+test_cases.txt
diff --git a/test/integration/CMakeLists.txt b/test/integration/CMakeLists.txt
new file mode 100644
index 0000000..dc927a3
--- /dev/null
+++ b/test/integration/CMakeLists.txt
@@ -0,0 +1,47 @@
+
+# detect the environment
+# CUDA_FOUND, MKL_FOUND, WIN32, UNIX, APPLE, PYTHONLIBS_FOUND
+
+# read in what will be tested for any environment
+FILE(GLOB TEST_CASE_ALWAYS RELATIVE ${CMAKE_SOURCE_DIR}/test/integration/  ./cases/always/*.cfg  )
+FILE(GLOB TEST_CASE_WITH_MKL RELATIVE ${CMAKE_SOURCE_DIR}/test/integration/  ./cases/mkl/*.cfg  )
+FILE(GLOB TEST_CASE_WITH_GPU RELATIVE ${CMAKE_SOURCE_DIR}/test/integration/  ./cases/gpu/*.cfg  )
+FILE(GLOB TEST_CASE_WITH_PYTHON RELATIVE ${CMAKE_SOURCE_DIR}/test/integration/  ./cases/python/*.cfg  )
+# We are skipping one gpu test, since it can not run on the win buildbot (not enough memory)
+FILE(GLOB TEST_CASE_LINUX_MAC_ONLY RELATIVE ${CMAKE_SOURCE_DIR}/test/integration/  ./cases/linux_mac_only/*.cfg  )
+
+message("Test cases found : ${TEST_CASE_ALWAYS}")
+message("Test cases with MKL found : ${TEST_CASE_WITH_MKL}")
+message("Test cases with_GPU : ${TEST_CASE_WITH_GPU}")
+message("Test cases with python : ${TEST_CASE_WITH_PYTHON}")
+message("Test cases for linux and mac only : ${TEST_CASE_LINUX_MAC_ONLY}")
+
+
+# according to the building environment, generate the list of available test cases
+
+set(TEST_CASE ${TEST_CASE_ALWAYS})
+
+# TODO once we remove mkl, then this scheme will need to change
+# for now we can try to run them all even without MKL
+set(TEST_CASE ${TEST_CASE} ${TEST_CASE_WITH_MKL})
+
+if (CUDA_FOUND)
+    set(TEST_CASE ${TEST_CASE} ${TEST_CASE_WITH_GPU})
+    if (WIN32)
+
+    else (WIN32)
+        set(TEST_CASE ${TEST_CASE} ${TEST_CASE_LINUX_MAC_ONLY})
+    endif (WIN32)
+endif (CUDA_FOUND)
+
+if (Boost_PYTHON_FOUND AND PYTHONLIBS_FOUND AND NUMPY_FOUND)
+    set(TEST_CASE ${TEST_CASE} ${TEST_CASE_WITH_PYTHON})
+endif (Boost_PYTHON_FOUND AND PYTHONLIBS_FOUND AND NUMPY_FOUND)
+
+message("Test cases to be performed : ${TEST_CASE}")
+
+# write the available test cases to a file
+file(WRITE ./test_cases.txt)
+foreach(test ${TEST_CASE})
+    file(APPEND ./test_cases.txt "${test}\n")
+endforeach()
diff --git a/test/integration/cases/always/simple_gre.cfg b/test/integration/cases/always/simple_gre.cfg
new file mode 100644
index 0000000..1aa9e41
--- /dev/null
+++ b/test/integration/cases/always/simple_gre.cfg
@@ -0,0 +1,24 @@
+[FILES]
+siemens_dat=data/simple_gre/meas_MiniGadgetron_GRE.dat
+siemens_parameter_xml=IsmrmrdParameterMap.xml
+siemens_parameter_xsl=IsmrmrdParameterMap.xsl
+siemens_dependency_measurement1=0
+siemens_dependency_measurement2=0
+siemens_dependency_measurement3=0
+siemens_dependency_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_dependency_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_data_measurement=0
+out_folder=test
+ismrmrd=simple_gre.h5
+result_h5=simple_gre_out.h5
+reference_h5= data/simple_gre/simple_gre_out.h5
+
+[TEST]
+gadgetron_configuration=default.xml
+reference_dataset=default.xml/image_0.img
+result_dataset=default.xml/image_0/data
+compare_dimensions=1
+compare_values=1
+compare_scales=1
+comparison_threshold_values=1e-5
+comparison_threshold_scales=1e-5
diff --git a/test/integration/cases/always/simple_gre_3d.cfg b/test/integration/cases/always/simple_gre_3d.cfg
new file mode 100644
index 0000000..6c51650
--- /dev/null
+++ b/test/integration/cases/always/simple_gre_3d.cfg
@@ -0,0 +1,26 @@
+[FILES]
+siemens_dat=data/gre_3d/meas_MID248_gre_FID30644.dat
+siemens_parameter_xml=IsmrmrdParameterMap.xml
+siemens_parameter_xsl=IsmrmrdParameterMap.xsl
+siemens_dependency_measurement1=0
+siemens_dependency_measurement2=0
+siemens_dependency_measurement3=0
+siemens_dependency_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_dependency_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_data_measurement=0
+out_folder=test
+ismrmrd=simple_gre_3d.h5
+result_h5=simple_gre_out_3d.h5
+reference_h5= data/gre_3d/simple_gre_out_3d.h5
+
+[TEST]
+gadgetron_configuration=default_optimized.xml
+reference_dataset=default_optimized.xml/image_0.img
+result_dataset=default_optimized.xml/image_0/data
+compare_dimensions=1
+compare_values=1
+compare_scales=1
+comparison_threshold_values=1e-5
+comparison_threshold_scales=1e-5
+
+
diff --git a/test/integration/cases/gpu/gpu_fixed_radial_mode1_cg.cfg b/test/integration/cases/gpu/gpu_fixed_radial_mode1_cg.cfg
new file mode 100644
index 0000000..3ee0718
--- /dev/null
+++ b/test/integration/cases/gpu/gpu_fixed_radial_mode1_cg.cfg
@@ -0,0 +1,24 @@
+[FILES]
+siemens_dat=data/radial_phantom/meas_MID00133_FID20080_CV_Radial_Fixed_Angle_128_x8_32phs.dat
+siemens_parameter_xml=IsmrmrdParameterMap.xml
+siemens_parameter_xsl=IsmrmrdParameterMap.xsl
+siemens_dependency_measurement1=0
+siemens_dependency_measurement2=0
+siemens_dependency_measurement3=0
+siemens_dependency_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_dependency_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_data_measurement=0
+out_folder=test
+ismrmrd=fixed_radial.h5
+result_h5=fixed_radial_mode1_cg_out.h5
+reference_h5= data/radial_phantom/fixed_radial_mode1.h5
+
+[TEST]
+gadgetron_configuration=fixed_radial_mode1_gpusense_cg.xml
+reference_dataset=cg/image_0.img
+result_dataset=fixed_radial_mode1_gpusense_cg.xml/image_0/data
+compare_dimensions=1
+compare_values=1
+compare_scales=1
+comparison_threshold_values=1e-2
+comparison_threshold_scales=1e-2
diff --git a/test/integration/cases/gpu/gpu_fixed_radial_mode1_ktsense.cfg b/test/integration/cases/gpu/gpu_fixed_radial_mode1_ktsense.cfg
new file mode 100644
index 0000000..a0e5cb7
--- /dev/null
+++ b/test/integration/cases/gpu/gpu_fixed_radial_mode1_ktsense.cfg
@@ -0,0 +1,24 @@
+[FILES]
+siemens_dat=data/radial_phantom/meas_MID00133_FID20080_CV_Radial_Fixed_Angle_128_x8_32phs.dat
+siemens_parameter_xml=IsmrmrdParameterMap.xml
+siemens_parameter_xsl=IsmrmrdParameterMap.xsl
+siemens_dependency_measurement1=0
+siemens_dependency_measurement2=0
+siemens_dependency_measurement3=0
+siemens_dependency_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_dependency_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_data_measurement=0
+out_folder=test
+ismrmrd=fixed_radial.h5
+result_h5=fixed_radial_mode1_ktsense_out.h5
+reference_h5= data/radial_phantom/fixed_radial_mode1.h5
+
+[TEST]
+gadgetron_configuration=fixed_radial_mode1_gpu_ktsense.xml
+reference_dataset=kt/image_0.img
+result_dataset=fixed_radial_mode1_gpu_ktsense.xml/image_0/data
+compare_dimensions=1
+compare_values=1
+compare_scales=1
+comparison_threshold_values=1e-2
+comparison_threshold_scales=1e-2
diff --git a/test/integration/cases/gpu/gpu_fixed_radial_mode1_realtime.cfg b/test/integration/cases/gpu/gpu_fixed_radial_mode1_realtime.cfg
new file mode 100644
index 0000000..539708d
--- /dev/null
+++ b/test/integration/cases/gpu/gpu_fixed_radial_mode1_realtime.cfg
@@ -0,0 +1,24 @@
+[FILES]
+siemens_dat=data/radial_phantom/meas_MID00133_FID20080_CV_Radial_Fixed_Angle_128_x8_32phs.dat
+siemens_parameter_xml=IsmrmrdParameterMap.xml
+siemens_parameter_xsl=IsmrmrdParameterMap.xsl
+siemens_dependency_measurement1=0
+siemens_dependency_measurement2=0
+siemens_dependency_measurement3=0
+siemens_dependency_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_dependency_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_data_measurement=0
+out_folder=test
+ismrmrd=fixed_radial.h5
+result_h5=fixed_radial_mode1_realtime_out.h5
+reference_h5= data/radial_phantom/fixed_radial_mode1.h5
+
+[TEST]
+gadgetron_configuration=fixed_radial_mode1_realtime.xml
+reference_dataset=realtime/image_0.img
+result_dataset=fixed_radial_mode1_realtime.xml/image_0/data
+compare_dimensions=1
+compare_values=1
+compare_scales=1
+comparison_threshold_values=1e-2
+comparison_threshold_scales=1e-2
diff --git a/test/integration/cases/gpu/gpu_golden_radial_mode2_cg.cfg b/test/integration/cases/gpu/gpu_golden_radial_mode2_cg.cfg
new file mode 100644
index 0000000..0b4dcc0
--- /dev/null
+++ b/test/integration/cases/gpu/gpu_golden_radial_mode2_cg.cfg
@@ -0,0 +1,24 @@
+[FILES]
+siemens_dat=data/radial_phantom/meas_MID00135_FID20082_CV_Radial_Golden_Angle_128_512_views.dat
+siemens_parameter_xml=IsmrmrdParameterMap.xml
+siemens_parameter_xsl=IsmrmrdParameterMap.xsl
+siemens_dependency_measurement1=0
+siemens_dependency_measurement2=0
+siemens_dependency_measurement3=0
+siemens_dependency_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_dependency_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_data_measurement=0
+out_folder=test
+ismrmrd=golden_radial.h5
+result_h5=golden_radial_mode2_cg_out.h5
+reference_h5= data/radial_phantom/golden_radial_mode2.h5
+
+[TEST]
+gadgetron_configuration=golden_radial_mode2_gpusense_cg.xml
+reference_dataset=cg/image_0.img
+result_dataset=golden_radial_mode2_gpusense_cg.xml/image_0/data
+compare_dimensions=1
+compare_values=1
+compare_scales=1
+comparison_threshold_values=1e-2
+comparison_threshold_scales=1e-2
diff --git a/test/integration/cases/gpu/gpu_golden_radial_mode2_ktsense.cfg b/test/integration/cases/gpu/gpu_golden_radial_mode2_ktsense.cfg
new file mode 100644
index 0000000..216b9ab
--- /dev/null
+++ b/test/integration/cases/gpu/gpu_golden_radial_mode2_ktsense.cfg
@@ -0,0 +1,24 @@
+[FILES]
+siemens_dat=data/radial_phantom/meas_MID00135_FID20082_CV_Radial_Golden_Angle_128_512_views.dat
+siemens_parameter_xml=IsmrmrdParameterMap.xml
+siemens_parameter_xsl=IsmrmrdParameterMap.xsl
+siemens_dependency_measurement1=0
+siemens_dependency_measurement2=0
+siemens_dependency_measurement3=0
+siemens_dependency_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_dependency_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_data_measurement=0
+out_folder=test
+ismrmrd=golden_radial.h5
+result_h5=golden_radial_mode2_ktsense_out.h5
+reference_h5= data/radial_phantom/golden_radial_mode2.h5
+
+[TEST]
+gadgetron_configuration=golden_radial_mode2_gpu_ktsense.xml
+reference_dataset=kt/image_0.img
+result_dataset=golden_radial_mode2_gpu_ktsense.xml/image_0/data
+compare_dimensions=1
+compare_values=1
+compare_scales=1
+comparison_threshold_values=1e-2
+comparison_threshold_scales=1e-2
diff --git a/test/integration/cases/gpu/gpu_golden_radial_mode2_realtime.cfg b/test/integration/cases/gpu/gpu_golden_radial_mode2_realtime.cfg
new file mode 100644
index 0000000..4e006c1
--- /dev/null
+++ b/test/integration/cases/gpu/gpu_golden_radial_mode2_realtime.cfg
@@ -0,0 +1,24 @@
+[FILES]
+siemens_dat=data/radial_phantom/meas_MID00135_FID20082_CV_Radial_Golden_Angle_128_512_views.dat
+siemens_parameter_xml=IsmrmrdParameterMap.xml
+siemens_parameter_xsl=IsmrmrdParameterMap.xsl
+siemens_dependency_measurement1=0
+siemens_dependency_measurement2=0
+siemens_dependency_measurement3=0
+siemens_dependency_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_dependency_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_data_measurement=0
+out_folder=test
+ismrmrd=golden_radial.h5
+result_h5=golden_radial_mode2_realtime_out.h5
+reference_h5= data/radial_phantom/golden_radial_mode2.h5
+
+[TEST]
+gadgetron_configuration=golden_radial_mode2_realtime.xml
+reference_dataset=realtime/image_0.img
+result_dataset=golden_radial_mode2_realtime.xml/image_0/data
+compare_dimensions=1
+compare_values=1
+compare_scales=1
+comparison_threshold_values=1e-2
+comparison_threshold_scales=1e-2
diff --git a/test/integration/cases/gpu/gpu_grappa_simple.cfg b/test/integration/cases/gpu/gpu_grappa_simple.cfg
new file mode 100644
index 0000000..9b61809
--- /dev/null
+++ b/test/integration/cases/gpu/gpu_grappa_simple.cfg
@@ -0,0 +1,25 @@
+[FILES]
+siemens_dat=data/rtgrappa/acc_data_with_device_2.dat
+siemens_parameter_xml=IsmrmrdParameterMap.xml
+siemens_parameter_xsl=IsmrmrdParameterMap.xsl
+siemens_dependency_measurement1=0
+siemens_dependency_measurement2=0
+siemens_dependency_measurement3=0
+siemens_dependency_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_dependency_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_data_measurement=0
+out_folder=test
+ismrmrd=grappa_rate2.h5
+result_h5=grappa_rate2_out.h5
+reference_h5= data/rtgrappa/grappa_rate2_out_20141024.h5
+
+[TEST]
+gadgetron_configuration=grappa_float.xml
+reference_dataset=grappa_float.xml/image_0/data
+result_dataset=grappa_float.xml/image_0/data
+compare_dimensions=1
+compare_values=0
+compare_scales=1
+# Note that this reconstruction does not yield the same result every time, timing of GPU weights update changes the noise behavior
+comparison_threshold_values=1e-1
+comparison_threshold_scales=1e-2
diff --git a/test/integration/cases/gpu/gpu_spiral.cfg b/test/integration/cases/gpu/gpu_spiral.cfg
new file mode 100644
index 0000000..b418ab5
--- /dev/null
+++ b/test/integration/cases/gpu/gpu_spiral.cfg
@@ -0,0 +1,24 @@
+[FILES]
+siemens_dat=data/spiral/meas_MID1132_MiniIRT_spiral_16int_tr500_acc1_10reps_FID13142.dat
+siemens_parameter_xml=IsmrmrdParameterMap.xml
+siemens_parameter_xsl=IsmrmrdParameterMap.xsl
+siemens_dependency_measurement1=0
+siemens_dependency_measurement2=0
+siemens_dependency_measurement3=0
+siemens_dependency_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_dependency_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_data_measurement=0
+out_folder=test
+ismrmrd=simple_spiral.h5
+result_h5=simple_spiral_out.h5
+reference_h5= data/spiral/simple_spiral_out.h5
+
+[TEST]
+gadgetron_configuration=spiral_flow_gpusense_cg.xml
+reference_dataset=spiral_cg/image_0.img
+result_dataset=spiral_flow_gpusense_cg.xml/image_0/data
+compare_dimensions=1
+compare_values=1
+compare_scales=1
+comparison_threshold_values=1e-2
+comparison_threshold_scales=1e-2
diff --git a/test/integration/cases/gpu/gpu_spiral_sb.cfg b/test/integration/cases/gpu/gpu_spiral_sb.cfg
new file mode 100644
index 0000000..4ef49ea
--- /dev/null
+++ b/test/integration/cases/gpu/gpu_spiral_sb.cfg
@@ -0,0 +1,24 @@
+[FILES]
+siemens_dat=data/spiral/meas_MID1132_MiniIRT_spiral_16int_tr500_acc1_10reps_FID13142.dat
+siemens_parameter_xml=IsmrmrdParameterMap.xml
+siemens_parameter_xsl=IsmrmrdParameterMap.xsl
+siemens_dependency_measurement1=0
+siemens_dependency_measurement2=0
+siemens_dependency_measurement3=0
+siemens_dependency_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_dependency_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_data_measurement=0
+out_folder=test
+ismrmrd=simple_spiral.h5
+result_h5=simple_spiral_out.h5
+reference_h5= data/spiral/simple_spiral_out.h5
+
+[TEST]
+gadgetron_configuration=spiral_flow_gpusense_sb.xml
+reference_dataset=spiral_sb/image_0.img
+result_dataset=spiral_flow_gpusense_sb.xml/image_0/data
+compare_dimensions=1
+compare_values=1
+compare_scales=1
+comparison_threshold_values=1e-2
+comparison_threshold_scales=1e-2
diff --git a/test/integration/cases/linux_mac_only/gpu_golden_radial_mode2_sb.cfg b/test/integration/cases/linux_mac_only/gpu_golden_radial_mode2_sb.cfg
new file mode 100644
index 0000000..1c0493b
--- /dev/null
+++ b/test/integration/cases/linux_mac_only/gpu_golden_radial_mode2_sb.cfg
@@ -0,0 +1,24 @@
+[FILES]
+siemens_dat=data/radial_phantom/meas_MID00135_FID20082_CV_Radial_Golden_Angle_128_512_views.dat
+siemens_parameter_xml=IsmrmrdParameterMap.xml
+siemens_parameter_xsl=IsmrmrdParameterMap.xsl
+siemens_dependency_measurement1=0
+siemens_dependency_measurement2=0
+siemens_dependency_measurement3=0
+siemens_dependency_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_dependency_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_data_measurement=0
+out_folder=test
+ismrmrd=golden_radial.h5
+result_h5=golden_radial_mode2_sb_out.h5
+reference_h5= data/radial_phantom/golden_radial_mode2.h5
+
+[TEST]
+gadgetron_configuration=golden_radial_mode2_gpusense_sb.xml
+reference_dataset=sb/image_0.img
+result_dataset=golden_radial_mode2_gpusense_sb.xml/image_0/data
+compare_dimensions=1
+compare_values=1
+compare_scales=1
+comparison_threshold_values=1e-2
+comparison_threshold_scales=1e-2
diff --git a/test/integration/cases/mkl/gtplus_3D_head.cfg b/test/integration/cases/mkl/gtplus_3D_head.cfg
new file mode 100644
index 0000000..c0bc5d8
--- /dev/null
+++ b/test/integration/cases/mkl/gtplus_3D_head.cfg
@@ -0,0 +1,24 @@
+[FILES]
+siemens_dat=data/gtplus/3D_head/meas_MID00156_FID05944_GRE_128iso_p2x2.dat
+siemens_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_dependency_measurement1=0
+siemens_dependency_measurement2=-1
+siemens_dependency_measurement3=-1
+siemens_dependency_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_dependency_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_data_measurement=1
+out_folder=test
+ismrmrd=gtplus_3D_head.h5
+result_h5=gtplus_3D_head_out.h5
+reference_h5= data/gtplus/3D_head/gtplus_3D_ref_20140924.h5
+
+[TEST]
+gadgetron_configuration=GT_3DT_Cartesian.xml
+reference_dataset=gtplus_3D/image_0/data
+result_dataset=GT_3DT_Cartesian.xml/image_0/data
+compare_dimensions=1
+compare_values=1
+compare_scales=1
+comparison_threshold_values=0.1
+comparison_threshold_scales=0.1
diff --git a/test/integration/cases/mkl/gtplus_FatWater.cfg b/test/integration/cases/mkl/gtplus_FatWater.cfg
new file mode 100644
index 0000000..68e6e9e
--- /dev/null
+++ b/test/integration/cases/mkl/gtplus_FatWater.cfg
@@ -0,0 +1,25 @@
+[FILES]
+siemens_dat=data/gtplus/FatWater/meas_MID00342_3e2i_R4.dat
+siemens_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_dependency_measurement1=0
+siemens_dependency_measurement2=-1
+siemens_dependency_measurement3=-1
+siemens_dependency_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_dependency_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_data_measurement=1
+out_folder=test
+ismrmrd=gtplus_FatWater.h5
+result_h5=gtplus_FatWater_out.h5
+reference_h5= data/gtplus/FatWater/gtplus_FatWater_ref.h5
+
+[TEST]
+gadgetron_configuration=GT_2DT_FatWater.xml
+reference_dataset=gtplus_FatWater/image_0.img
+result_dataset=GT_2DT_FatWater.xml/image_0/data
+
+compare_dimensions=1
+compare_values=1
+compare_scales=1
+comparison_threshold_values=0.75
+comparison_threshold_scales=0.1
diff --git a/test/integration/cases/mkl/gtplus_FetalHASTE.cfg b/test/integration/cases/mkl/gtplus_FetalHASTE.cfg
new file mode 100644
index 0000000..acf273e
--- /dev/null
+++ b/test/integration/cases/mkl/gtplus_FetalHASTE.cfg
@@ -0,0 +1,24 @@
+[FILES]
+siemens_dat=data/gtplus/FetalHASTE/raw30488.dat
+siemens_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_dependency_measurement1=0
+siemens_dependency_measurement2=-1
+siemens_dependency_measurement3=-1
+siemens_dependency_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_dependency_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_data_measurement=1
+out_folder=test
+ismrmrd=gtplus_FetalHASTE.h5
+result_h5=gtplus_FetalHASTE_out.h5
+reference_h5= data/gtplus/FetalHASTE/gtplus_FetalHASTE_ref_20140826.h5
+
+[TEST]
+gadgetron_configuration=GT_2DT_HASTE.xml
+reference_dataset=gtplus_FetalHASTE/image_0.img
+result_dataset=GT_2DT_HASTE.xml/image_0/data
+compare_dimensions=1
+compare_values=1
+compare_scales=1
+comparison_threshold_values=0.75
+comparison_threshold_scales=0.75
diff --git a/test/integration/cases/mkl/gtplus_LGE.cfg b/test/integration/cases/mkl/gtplus_LGE.cfg
new file mode 100644
index 0000000..8256910
--- /dev/null
+++ b/test/integration/cases/mkl/gtplus_LGE.cfg
@@ -0,0 +1,24 @@
+[FILES]
+siemens_dat=data/gtplus/LGE/meas_MID00083_9_slice.dat
+siemens_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_dependency_measurement1=0
+siemens_dependency_measurement2=-1
+siemens_dependency_measurement3=-1
+siemens_dependency_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_dependency_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_data_measurement=1
+out_folder=test
+ismrmrd=gtplus_LGE.h5
+result_h5=gtplus_LGE_out.h5
+reference_h5= data/gtplus/LGE/gtplus_LGE_ref_20140826.h5
+
+[TEST]
+gadgetron_configuration=GT_2DT_LGE.xml
+reference_dataset=gtplus_LGE/image_0.img
+result_dataset=GT_2DT_LGE.xml/image_0/data
+compare_dimensions=1
+compare_values=1
+compare_scales=1
+comparison_threshold_values=0.11
+comparison_threshold_scales=0.05
diff --git a/test/integration/cases/mkl/gtplus_Perfusion.cfg b/test/integration/cases/mkl/gtplus_Perfusion.cfg
new file mode 100644
index 0000000..88b2abd
--- /dev/null
+++ b/test/integration/cases/mkl/gtplus_Perfusion.cfg
@@ -0,0 +1,24 @@
+[FILES]
+siemens_dat=data/gtplus/Perfusion/meas_MID00045_R3_AIF_ON.dat
+siemens_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_dependency_measurement1=0
+siemens_dependency_measurement2=-1
+siemens_dependency_measurement3=-1
+siemens_dependency_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_dependency_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_data_measurement=1
+out_folder=test
+ismrmrd=gtplus_Perfusion.h5
+result_h5=gtplus_Perfusion_out.h5
+reference_h5= data/gtplus/Perfusion/gtplus_Perfusion_ref_20140826.h5
+
+[TEST]
+gadgetron_configuration=GT_2DT_Perfusion.xml
+reference_dataset=gtplus_Perfusion/image_0.img
+result_dataset=GT_2DT_Perfusion.xml/image_0/data
+compare_dimensions=1
+compare_values=1
+compare_scales=1
+comparison_threshold_values=0.1
+comparison_threshold_scales=0.075
diff --git a/test/integration/cases/mkl/gtplus_T2W.cfg b/test/integration/cases/mkl/gtplus_T2W.cfg
new file mode 100644
index 0000000..e2c0899
--- /dev/null
+++ b/test/integration/cases/mkl/gtplus_T2W.cfg
@@ -0,0 +1,24 @@
+[FILES]
+siemens_dat=data/gtplus/T2W/meas_MID00057_T2w.dat
+siemens_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_dependency_measurement1=0
+siemens_dependency_measurement2=-1
+siemens_dependency_measurement3=-1
+siemens_dependency_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_dependency_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_data_measurement=1
+out_folder=test
+ismrmrd=gtplus_T2W.h5
+result_h5=gtplus_T2W_out.h5
+reference_h5= data/gtplus/T2W/gtplus_T2W_ref.h5
+
+[TEST]
+gadgetron_configuration=GT_2DT_T2W.xml
+reference_dataset=gtplus_T2W/image_0.img
+result_dataset=GT_2DT_T2W.xml/image_0/data
+compare_dimensions=1
+compare_values=1
+compare_scales=1
+comparison_threshold_values=0.075
+comparison_threshold_scales=0.075
diff --git a/test/integration/cases/mkl/gtplus_localizer.cfg b/test/integration/cases/mkl/gtplus_localizer.cfg
new file mode 100644
index 0000000..7bd7380
--- /dev/null
+++ b/test/integration/cases/mkl/gtplus_localizer.cfg
@@ -0,0 +1,24 @@
+[FILES]
+siemens_dat=data/gtplus/localizer/meas_MID00026.dat
+siemens_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_dependency_measurement1=0
+siemens_dependency_measurement2=-1
+siemens_dependency_measurement3=-1
+siemens_dependency_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_dependency_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_data_measurement=1
+out_folder=test
+ismrmrd=gtplus_localizer.h5
+result_h5=gtplus_localizer_out.h5
+reference_h5= data/gtplus/localizer/gtplus_localizer_ref.h5
+
+[TEST]
+gadgetron_configuration=GT_2DT_Cartesian.xml
+reference_dataset=gtplus_localizer/image_0.img
+result_dataset=GT_2DT_Cartesian.xml/image_0/data
+compare_dimensions=1
+compare_values=1
+compare_scales=1
+comparison_threshold_values=0.75
+comparison_threshold_scales=0.75
diff --git a/test/integration/cases/mkl/gtplus_molli.cfg b/test/integration/cases/mkl/gtplus_molli.cfg
new file mode 100644
index 0000000..2f40686
--- /dev/null
+++ b/test/integration/cases/mkl/gtplus_molli.cfg
@@ -0,0 +1,24 @@
+[FILES]
+siemens_dat=data/gtplus/MOLLI/20100330_10h33m11s_5562.dat
+siemens_parameter_xml=IsmrmrdParameterMap_Siemens_VB17.xml
+siemens_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_dependency_measurement1=-1
+siemens_dependency_measurement2=-1
+siemens_dependency_measurement3=-1
+siemens_dependency_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_dependency_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_data_measurement=0
+out_folder=test
+ismrmrd=gtplus_molli.h5
+result_h5=gtplus_molli_out.h5
+reference_h5= data/gtplus/MOLLI/gtplus_molli_ref_20141105.h5
+
+[TEST]
+gadgetron_configuration=GT_2DT_MOLLI.xml
+reference_dataset=GT_2DT_MOLLI.xml/image_0/data
+result_dataset=GT_2DT_MOLLI.xml/image_0/data
+compare_dimensions=1
+compare_values=1
+compare_scales=1
+comparison_threshold_values=0.06
+comparison_threshold_scales=0.05
diff --git a/test/integration/cases/mkl/gtplus_real_time_cine.cfg b/test/integration/cases/mkl/gtplus_real_time_cine.cfg
new file mode 100644
index 0000000..82a04f9
--- /dev/null
+++ b/test/integration/cases/mkl/gtplus_real_time_cine.cfg
@@ -0,0 +1,24 @@
+[FILES]
+siemens_dat=data/gtplus/RealTimeCine/meas_MID21_CINE_R4.dat
+siemens_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_dependency_measurement1=0
+siemens_dependency_measurement2=-1
+siemens_dependency_measurement3=-1
+siemens_dependency_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_dependency_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_data_measurement=1
+out_folder=test
+ismrmrd=gtplus_real_time_cine.h5
+result_h5=gtplus_real_time_cine_out.h5
+reference_h5= data/gtplus/RealTimeCine/gtplus_real_time_cine_ref.h5
+
+[TEST]
+gadgetron_configuration=GT_2DT_RealTimeCine.xml
+reference_dataset=gtplus_real_time_cine/image_0.img
+result_dataset=GT_2DT_RealTimeCine.xml/image_0/data
+compare_dimensions=1
+compare_values=1
+compare_scales=1
+comparison_threshold_values=0.1
+comparison_threshold_scales=0.1
diff --git a/test/integration/cases/mkl/gtplus_real_time_cine_9slices.cfg b/test/integration/cases/mkl/gtplus_real_time_cine_9slices.cfg
new file mode 100644
index 0000000..6ca2376
--- /dev/null
+++ b/test/integration/cases/mkl/gtplus_real_time_cine_9slices.cfg
@@ -0,0 +1,24 @@
+[FILES]
+siemens_dat=data/gtplus/RealTimeCine_9slices/meas_MID00832.dat
+siemens_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_dependency_measurement1=0
+siemens_dependency_measurement2=-1
+siemens_dependency_measurement3=-1
+siemens_dependency_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_dependency_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_data_measurement=1
+out_folder=test
+ismrmrd=gtplus_real_time_cine_9slices.h5
+result_h5=gtplus_real_time_cine_9slices_out.h5
+reference_h5= data/gtplus/RealTimeCine_9slices/gtplus_real_time_cine_ref.h5
+
+[TEST]
+gadgetron_configuration=GT_2DT_RealTimeCine.xml
+reference_dataset=gtplus_real_time_cine_R5/image_0.img
+result_dataset=GT_2DT_RealTimeCine.xml/image_0/data
+compare_dimensions=1
+compare_values=1
+compare_scales=1
+comparison_threshold_values=0.75
+comparison_threshold_scales=0.75
diff --git a/test/integration/cases/mkl/gtplus_sasha.cfg b/test/integration/cases/mkl/gtplus_sasha.cfg
new file mode 100644
index 0000000..7bdf853
--- /dev/null
+++ b/test/integration/cases/mkl/gtplus_sasha.cfg
@@ -0,0 +1,24 @@
+[FILES]
+siemens_dat=data/gtplus/sasha/20140325_15h59m29s_7720.dat
+siemens_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_dependency_measurement1=0
+siemens_dependency_measurement2=-1
+siemens_dependency_measurement3=-1
+siemens_dependency_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_dependency_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_data_measurement=1
+out_folder=test
+ismrmrd=gtplus_sasha.h5
+result_h5=gtplus_sasha_out.h5
+reference_h5= data/gtplus/sasha/gtplus_sasha_ref.h5
+
+[TEST]
+gadgetron_configuration=GT_2DT_Cartesian_GFactor.xml
+reference_dataset=gtplus_sasha/image_0.img
+result_dataset=GT_2DT_Cartesian_GFactor.xml/image_0/data
+compare_dimensions=1
+compare_values=1
+compare_scales=1
+comparison_threshold_values=0.06
+comparison_threshold_scales=0.05
diff --git a/test/integration/cases/mkl/gtplus_snr_unit_recon_builtin_noise.cfg b/test/integration/cases/mkl/gtplus_snr_unit_recon_builtin_noise.cfg
new file mode 100644
index 0000000..7c7067b
--- /dev/null
+++ b/test/integration/cases/mkl/gtplus_snr_unit_recon_builtin_noise.cfg
@@ -0,0 +1,24 @@
+[FILES]
+siemens_dat=data/gtplus/snr_unit_recon_builtin_noise/meas_MID00127_FID02864_GRE_reps=150__WIP724_sPAT=4.dat
+siemens_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_dependency_measurement1=-1
+siemens_dependency_measurement2=-1
+siemens_dependency_measurement3=-1
+siemens_dependency_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_dependency_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_data_measurement=0
+out_folder=test
+ismrmrd=gtplus_snr_unit_recon_builtin_noise.h5
+result_h5=gtplus_snr_unit_recon_builtin_noise_out.h5
+reference_h5= data/gtplus/snr_unit_recon_builtin_noise/gtplus_snr_unit_recon_ref.h5
+
+[TEST]
+gadgetron_configuration=GT_2DT_Cartesian_GFactor.xml
+reference_dataset=gtplus_snr_unit_recon/image_0.img
+result_dataset=GT_2DT_Cartesian_GFactor.xml/image_0/data
+compare_dimensions=1
+compare_values=1
+compare_scales=1
+comparison_threshold_values=0.075
+comparison_threshold_scales=0.075
diff --git a/test/integration/cases/mkl/gtplus_snr_unit_recon_ipat4.cfg b/test/integration/cases/mkl/gtplus_snr_unit_recon_ipat4.cfg
new file mode 100644
index 0000000..599510b
--- /dev/null
+++ b/test/integration/cases/mkl/gtplus_snr_unit_recon_ipat4.cfg
@@ -0,0 +1,24 @@
+[FILES]
+siemens_dat=data/gtplus/snr_unit_recon_ipat4/meas_MID00175_FID02912_GRE_reps=150_iPAT=4.dat
+siemens_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_dependency_measurement1=0
+siemens_dependency_measurement2=-1
+siemens_dependency_measurement3=-1
+siemens_dependency_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_dependency_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_data_measurement=1
+out_folder=test
+ismrmrd=gtplus_snr_unit_recon_ipat4.h5
+result_h5=gtplus_snr_unit_recon_ipat4_out.h5
+reference_h5= data/gtplus/snr_unit_recon_ipat4/gtplus_snr_unit_recon_ref.h5
+
+[TEST]
+gadgetron_configuration=GT_2DT_Cartesian_GFactor.xml
+reference_dataset=gtplus_snr_unit_recon/image_0.img
+result_dataset=GT_2DT_Cartesian_GFactor.xml/image_0/data
+compare_dimensions=1
+compare_values=1
+compare_scales=1
+comparison_threshold_values=0.075
+comparison_threshold_scales=0.075
diff --git a/test/integration/cases/mkl/gtplus_snr_unit_recon_prospective_cine.cfg b/test/integration/cases/mkl/gtplus_snr_unit_recon_prospective_cine.cfg
new file mode 100644
index 0000000..37d9ce4
--- /dev/null
+++ b/test/integration/cases/mkl/gtplus_snr_unit_recon_prospective_cine.cfg
@@ -0,0 +1,24 @@
+[FILES]
+siemens_dat=data/gtplus/ProspectiveCine/meas_MID00209_FID07469_CV_gtPlus_2D_epat4.dat
+siemens_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_dependency_measurement1=0
+siemens_dependency_measurement2=-1
+siemens_dependency_measurement3=-1
+siemens_dependency_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_dependency_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_data_measurement=1
+out_folder=test
+ismrmrd=gtplus_snr_unit_recon_ProspectiveCine.h5
+result_h5=gtplus_snr_unit_recon_ProspectiveCine_out.h5
+reference_h5= data/gtplus/ProspectiveCine/gtplus_snr_unit_recon_ref_20140826.h5
+
+[TEST]
+gadgetron_configuration=GT_2DT_Cartesian_GFactor.xml
+reference_dataset=gtplus_snr_unit_recon/image_0.img
+result_dataset=GT_2DT_Cartesian_GFactor.xml/image_0/data
+compare_dimensions=1
+compare_values=1
+compare_scales=1
+comparison_threshold_values=0.05
+comparison_threshold_scales=0.05
diff --git a/test/integration/cases/mkl/gtplus_snr_unit_recon_spat2_asym_pf.cfg b/test/integration/cases/mkl/gtplus_snr_unit_recon_spat2_asym_pf.cfg
new file mode 100644
index 0000000..781108e
--- /dev/null
+++ b/test/integration/cases/mkl/gtplus_snr_unit_recon_spat2_asym_pf.cfg
@@ -0,0 +1,24 @@
+[FILES]
+siemens_dat=data/gtplus/snr_unit_recon_spat2_asym_pf/meas_MID00156_FID03210_GRE_reps=50_sPAT2_xRes256_dummy1_FOV240_strong_asym_7_8pf.dat
+siemens_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_dependency_measurement1=0
+siemens_dependency_measurement2=-1
+siemens_dependency_measurement3=-1
+siemens_dependency_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_dependency_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_data_measurement=1
+out_folder=test
+ismrmrd=gtplus_snr_unit_recon_spat2_asym_pf.h5
+result_h5=gtplus_snr_unit_recon_spat2_asym_pf_out.h5
+reference_h5= data/gtplus/snr_unit_recon_spat2_asym_pf/gtplus_snr_unit_recon_ref.h5
+
+[TEST]
+gadgetron_configuration=GT_2DT_Cartesian_GFactor.xml
+reference_dataset=gtplus_snr_unit_recon/image_0.img
+result_dataset=GT_2DT_Cartesian_GFactor.xml/image_0/data
+compare_dimensions=1
+compare_values=1
+compare_scales=1
+comparison_threshold_values=0.075
+comparison_threshold_scales=0.075
diff --git a/test/integration/cases/mkl/gtplus_snr_unit_recon_spat3.cfg b/test/integration/cases/mkl/gtplus_snr_unit_recon_spat3.cfg
new file mode 100644
index 0000000..3be75ed
--- /dev/null
+++ b/test/integration/cases/mkl/gtplus_snr_unit_recon_spat3.cfg
@@ -0,0 +1,24 @@
+[FILES]
+siemens_dat=data/gtplus/snr_unit_recon_spat3/meas_MID00152_FID03206_GRE_reps=20_sPAT3_xRes256_dummy1_FOV240.dat
+siemens_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_dependency_measurement1=0
+siemens_dependency_measurement2=-1
+siemens_dependency_measurement3=-1
+siemens_dependency_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_dependency_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_data_measurement=1
+out_folder=test
+ismrmrd=gtplus_snr_unit_recon_spat3.h5
+result_h5=gtplus_snr_unit_recon_spat3_out.h5
+reference_h5= data/gtplus/snr_unit_recon_spat3/gtplus_snr_unit_recon_ref.h5
+
+[TEST]
+gadgetron_configuration=GT_2DT_Cartesian_GFactor.xml
+reference_dataset=gtplus_snr_unit_recon/image_0.img
+result_dataset=GT_2DT_Cartesian_GFactor.xml/image_0/data
+compare_dimensions=1
+compare_values=1
+compare_scales=1
+comparison_threshold_values=0.075
+comparison_threshold_scales=0.075
diff --git a/test/integration/cases/mkl/gtplus_snr_unit_recon_tpat3.cfg b/test/integration/cases/mkl/gtplus_snr_unit_recon_tpat3.cfg
new file mode 100644
index 0000000..0e8c465
--- /dev/null
+++ b/test/integration/cases/mkl/gtplus_snr_unit_recon_tpat3.cfg
@@ -0,0 +1,24 @@
+[FILES]
+siemens_dat=data/gtplus/snr_unit_recon_tpat3/meas_MID00171_FID02908_GRE_reps=150_TPAT=3.dat
+siemens_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_dependency_measurement1=0
+siemens_dependency_measurement2=-1
+siemens_dependency_measurement3=-1
+siemens_dependency_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_dependency_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_data_measurement=1
+out_folder=test
+ismrmrd=gtplus_snr_unit_recon_tpat3.h5
+result_h5=gtplus_snr_unit_recon_tpat3_out.h5
+reference_h5= data/gtplus/snr_unit_recon_tpat3/gtplus_snr_unit_recon_ref.h5
+
+[TEST]
+gadgetron_configuration=GT_2DT_Cartesian_GFactor.xml
+reference_dataset=gtplus_snr_unit_recon/image_0.img
+result_dataset=GT_2DT_Cartesian_GFactor.xml/image_0/data
+compare_dimensions=1
+compare_values=1
+compare_scales=1
+comparison_threshold_values=0.075
+comparison_threshold_scales=0.075
diff --git a/test/integration/cases/python/simple_gre_python.cfg b/test/integration/cases/python/simple_gre_python.cfg
new file mode 100644
index 0000000..60c66df
--- /dev/null
+++ b/test/integration/cases/python/simple_gre_python.cfg
@@ -0,0 +1,24 @@
+[FILES]
+siemens_dat=data/simple_gre/meas_MiniGadgetron_GRE.dat
+siemens_parameter_xml=IsmrmrdParameterMap.xml
+siemens_parameter_xsl=IsmrmrdParameterMap.xsl
+siemens_dependency_measurement1=0
+siemens_dependency_measurement2=0
+siemens_dependency_measurement3=0
+siemens_dependency_parameter_xml=IsmrmrdParameterMap_Siemens.xml
+siemens_dependency_parameter_xsl=IsmrmrdParameterMap_Siemens.xsl
+siemens_data_measurement=0
+out_folder=test
+ismrmrd=simple_gre.h5
+result_h5=simple_gre_out.h5
+reference_h5= data/simple_gre/simple_gre_out.h5
+
+[TEST]
+gadgetron_configuration=python_short.xml
+reference_dataset=python_short.xml/image_1.img
+result_dataset=python_short.xml/image_1/data
+compare_dimensions=1
+compare_values=1
+compare_scales=1
+comparison_threshold_values=1e-5
+comparison_threshold_scales=1e-5
diff --git a/test/integration/data.txt b/test/integration/data.txt
new file mode 100644
index 0000000..d980d14
--- /dev/null
+++ b/test/integration/data.txt
@@ -0,0 +1,62 @@
+gre_3d/simple_gre_out_3d.h5: 60004b2677b9780af9c633e47e7acd1a
+gre_3d/meas_MID248_gre_FID30644.dat: 39ac16864627691cf7d84aa2ce13c1ae
+radial_phantom/fixed_radial_mode1.h5: 3a8e10388a3a11683c7e611537c1bd44
+radial_phantom/golden_radial_mode2.h5: 42c60da3121fa50b8c04e1711b8f4659
+radial_phantom/meas_MID00133_FID20080_CV_Radial_Fixed_Angle_128_x8_32phs.dat: 58f8de6b6e755c4d3dcd0c7dace3b8f6
+radial_phantom/meas_MID00135_FID20082_CV_Radial_Golden_Angle_128_512_views.dat: 0326afbb168982f4144704781a08b3ec
+rtgrappa/acc_data_with_device_2.dat: ac0b59c6c8989c94738e41e2c4b5ec13
+rtgrappa/grappa_rate2_out_20141024.h5: 8ca057808fcb15e795d7793674475e8a
+simple_gre/meas_MiniGadgetron_GRE.dat: 7c5c255522e42367546b4045560afcf8
+simple_gre/simple_gre_out.h5: 624ac3178e15e27e52489f330b3fffa5
+spiral/simple_spiral_out.h5: 44be83612c69f008ee71a47fecd3c7ed
+spiral/meas_MID1132_MiniIRT_spiral_16int_tr500_acc1_10reps_FID13142.dat: 763baf3d7d0acff185ec9a3c85d5a3f3
+gtplus/3D/gtplus_3D_l1spirit_ref.h5: b1df7a765522483c348c7a89550d3cac
+gtplus/3D/gtplus_3D_ref.h5: 4ba53157bf30e73041bda8f9b18bcb1b
+gtplus/3D/meas_MID00370_embedded_2by2.dat: bf92f5ac70492994a0816cafc3308854
+gtplus/3D_head/gtplus_3D_l1spirit_ref.h5: 996d4cf35cec9dcf0e0598cefed80d90
+gtplus/3D_head/gtplus_3D_ref.h5: 30e3e26657220f6d75640a6901d0bce9
+gtplus/3D_head/gtplus_3D_ref_20140924.h5: c2546f8fd01ae5bc7bf5713061507f78
+gtplus/3D_head/meas_MID00156_FID05944_GRE_128iso_p2x2.dat: 8077b0d4b957a0cffb7cdf0bdb0014fc
+gtplus/FatWater/gtplus_FatWater_ref.h5: 928d02fcffda37f7227a685b718af82e
+gtplus/FatWater/meas_MID00342_3e2i_R4.dat: 8d9e3aedae8b31f63551cd4e85cd6a53
+gtplus/FetalHASTE/gtplus_FetalHASTE_ref.h5: e0dcb71b3cedfad6178101c0f51ef7c0
+gtplus/FetalHASTE/gtplus_FetalHASTE_ref_20140826.h5: 6bac57c8df79cff1dbd4ccf0559ad444
+gtplus/FetalHASTE/raw30488.dat: 897973938d67a1094f7249520ff15e57
+gtplus/LGE/gtplus_LGE_ref.h5: d78d8ebe7393628648b7f8370f6debb6
+gtplus/LGE/gtplus_LGE_ref_20140826.h5: 80f0d661b08b21b602153b6f471d998f
+gtplus/LGE/meas_MID00083_9_slice.dat: 0912ac1bf7d2adb6a74515c563b91d2e
+gtplus/localizer/gtplus_localizer_ref.h5: 1b2dcf4a942f9616c957fbe444b625b5
+gtplus/localizer/meas_MID00026.dat: ef5954ad53996d5d1455a95985a743a6
+gtplus/MOLLI/20100330_10h33m11s_5562.dat: db50a52977aafd3c8d100fa9aa7d8dcf
+gtplus/MOLLI/gtplus_molli_ref.h5: e1c1e53a5cdbc78575a33d011cb9c65b
+gtplus/MOLLI/gtplus_molli_ref_20141105.h5: 719957e63625feccc50e5ac3615de38c
+gtplus/Perfusion/gtplus_Perfusion_ref.h5: 1845fd154dc204df36e11dfb4d1bba88
+gtplus/Perfusion/gtplus_Perfusion_ref_20140826.h5: da1a5e0ee6db499344f3fde4a44bf85b
+gtplus/Perfusion/meas_MID00045_R3_AIF_ON.dat: 7f82e4e95f83cf3e876abba8030f3b34
+gtplus/ProspectiveCine/gtplus_snr_unit_recon_ref.h5: cd8df1e755f731dfda828c46a503b90f
+gtplus/ProspectiveCine/gtplus_snr_unit_recon_ref_20140826.h5: 6b1c6042d2c8e98eb9983a14dbe91724
+gtplus/ProspectiveCine/meas_MID00209_FID07469_CV_gtPlus_2D_epat4.dat: c36f3c1a0fbb27fec13ea983aeceae13
+gtplus/RealTimeCine/gtplus_real_time_cine_l1spirit_ref.h5: 04a3063da8248b26cd01ad5475dffab9
+gtplus/RealTimeCine/gtplus_real_time_cine_ref.h5: 1e02fbbc9a3af892cec51dff1e12e55e
+gtplus/RealTimeCine/gtplus_real_time_cine_spirit_ref.h5: d9c774e7140145395d0a469c543ac3e9
+gtplus/RealTimeCine/meas_MID21_CINE_R4.dat: 3c985b16468580a056350a0fc5473934
+gtplus/RealTimeCine_9slices/gtplus_real_time_cine_l1spirit_ref.h5: d01c3a629565d18f34cf79ca0ed26133
+gtplus/RealTimeCine_9slices/gtplus_real_time_cine_ref.h5: 603a563ddd5d584226bfb3ede13a53cb
+gtplus/RealTimeCine_9slices/gtplus_real_time_cine_spirit_ref.h5: 1c00dec61da7dfdb3ff1dcba4c50d152
+gtplus/RealTimeCine_9slices/meas_MID00832.dat: a8ae3a7f00ffd33ca43b33ebbdf931b0
+gtplus/sasha/20140325_15h59m29s_7720.dat: 85df92b153e6199f4892a787d1f37e96
+gtplus/sasha/gtplus_sasha_ref.h5: 1ff0885e33829c51a89c90342fcf6221
+gtplus/snr_unit_recon_builtin_noise/gtplus_snr_unit_recon_ref.h5: 25748ce15e5c28573e21bed10c726c53
+gtplus/snr_unit_recon_builtin_noise/meas_MID00127_FID02864_GRE_reps=150__WIP724_sPAT=4.dat: 76e580e1cff6091048499cfcf944e755
+gtplus/snr_unit_recon_ipat4/gtplus_snr_unit_recon_ref.h5: 7c19ad053bcc05dfb4ff777d07bf4d35
+gtplus/snr_unit_recon_ipat4/meas_MID00175_FID02912_GRE_reps=150_iPAT=4.dat: c73c9b25dab6e401f022aeef2549c911
+gtplus/snr_unit_recon_no_pat/gtplus_snr_unit_recon_ref.h5: c83d76aa5c2247506a10043bb92649e0
+gtplus/snr_unit_recon_no_pat/meas_MID00032_FID22409_oil_gre_128_150reps_pause_alpha_10.dat: ed7fb5ba56ae466f59bbe94e94ec9ca0
+gtplus/snr_unit_recon_spat2_asym_pf/gtplus_snr_unit_recon_ref.h5: ce584033fea1d6b85b40aac89f3a466a
+gtplus/snr_unit_recon_spat2_asym_pf/meas_MID00156_FID03210_GRE_reps=50_sPAT2_xRes256_dummy1_FOV240_strong_asym_7_8pf.dat: 93013c76eb771d62dc2eb3b9a5ab24a9
+gtplus/snr_unit_recon_spat3/gtplus_snr_unit_recon_ref.h5: 32111c294adc19aee28a5d0e34b47bf2
+gtplus/snr_unit_recon_spat3/meas_MID00152_FID03206_GRE_reps=20_sPAT3_xRes256_dummy1_FOV240.dat: 853ab53d494ee5f085888c7cfe57172f
+gtplus/snr_unit_recon_tpat3/gtplus_snr_unit_recon_ref.h5: 71a665565321165be3c0ce6509bf170a
+gtplus/snr_unit_recon_tpat3/meas_MID00171_FID02908_GRE_reps=150_TPAT=3.dat: 21e219f663a27c22fec4b56216b2f3ed
+gtplus/T2W/gtplus_T2W_ref.h5: 646496067fca502f86f2770f39a71be1
+gtplus/T2W/meas_MID00057_T2w.dat: 46aa75c471a41c793006328a224a4001
diff --git a/test/integration/get_data.py b/test/integration/get_data.py
new file mode 100644
index 0000000..a1e02f6
--- /dev/null
+++ b/test/integration/get_data.py
@@ -0,0 +1,68 @@
+import os
+import sys
+import urllib2
+import hashlib
+
+DATAFILE = "data.txt"
+DATADIR = "data"
+HOST = 'http://gadgetrontestdata.s3-website-us-east-1.amazonaws.com'
+
+def md5sum(filename, blocksize=64*1024):
+    hsh = hashlib.md5()
+    with open(filename, "r+b") as f:
+        buf = f.read(blocksize)
+        while len(buf) > 0:
+            hsh.update(buf)
+            buf = f.read(blocksize)
+    return hsh.hexdigest()
+
+def load_checksums(datafile):
+    checksums = {}
+    with open(datafile) as f:
+        for line in f:
+            filepath, checksum = line.split(':')
+            checksums[filepath.strip()] = checksum.strip()
+    return checksums
+
+def download(url, dest):
+    furl = urllib2.urlopen(url)
+    with open(dest, 'wb') as fdest:
+        fdest.write(furl.read())
+
+def main():
+    # determine test dir from full path to this script
+    testdir = os.path.dirname(os.path.realpath(sys.argv[0]))
+    datadir = os.path.join(testdir, DATADIR)
+    datafile = os.path.join(testdir, DATAFILE)
+    if not os.path.isdir(datadir):
+        os.mkdir(datadir)
+
+    print("Reading list of data from %s" % datafile)
+    try:
+        checksums = load_checksums(datafile)
+    except IOError:
+        print("Failed to read %s" % datafile)
+        return
+
+    print("Storing test data in %s" % datadir)
+
+    for dataname,checksum in checksums.items():
+        datapath = os.path.join(datadir, dataname)
+        parent = os.path.dirname(datapath)
+        if not os.path.isdir(parent):
+            os.makedirs(parent)
+        url = '%s/%s' % (HOST, dataname)
+
+        print("Verifying: %s..." % dataname)
+        # if file is missing or its checksum doesn't match, download it
+        if not os.path.isfile(datapath) or md5sum(datapath) != checksum:
+            print("Downloading: %s..." % dataname)
+            try:
+                download(url, datapath)
+            except urllib2.HTTPError, e:
+                print("HTTP Error: %d %s" % (e.code, url))
+            except urllib2.URLError, e:
+                print("URL Error: %s - %s" % (e.reason, url))
+
+if __name__ == '__main__':
+    main()
diff --git a/test/integration/run_all_tests.py b/test/integration/run_all_tests.py
new file mode 100644
index 0000000..cbeace2
--- /dev/null
+++ b/test/integration/run_all_tests.py
@@ -0,0 +1,71 @@
+import ConfigParser
+import os
+import sys
+import glob
+import subprocess
+
+def main():
+    if len(sys.argv) < 4:
+        sys.stderr.write("Missing arguments\n")
+        prog = os.path.basename(sys.argv[0])
+        help = "Usage: %s <ismrmrd_home> <gadgetron home> <test case list file> <optional: chroot path>\n" % prog
+        sys.stderr.write(help)
+        sys.exit(1)
+    ismrmrd_home = sys.argv[1]
+    gadgetron_home = sys.argv[2]
+    test_case_list = sys.argv[3]
+    pwd = os.getcwd()
+
+    if len(sys.argv) >= 5:
+        chroot_path = sys.argv[4]
+
+    test_cases = open( test_case_list, 'r' )
+    content = test_cases.read().splitlines()
+
+    test_result = True
+
+    gadgetron_outfile = open('gadgetron.log', 'w')
+    client_outfile    = open('client.log', 'w')
+
+    for t in content:
+        print("Grabbing test case: " + t)
+
+        # We need to figure out where this test dumps log files
+        config = ConfigParser.RawConfigParser()
+        config.read(t)
+        out_folder = config.get('FILES', 'out_folder')
+        gadgetron_log_filename = os.path.join(pwd, out_folder, "gadgetron.log")
+        client_log_filename = os.path.join(pwd, out_folder, "client.log")
+
+        # Now run the test
+        if len(sys.argv) >= 5:
+            r = subprocess.call(["python", "run_gadgetron_test.py", ismrmrd_home, gadgetron_home, t, chroot_path])
+        else:
+            r = subprocess.call(["python", "run_gadgetron_test.py", ismrmrd_home, gadgetron_home, t])
+
+        # Grab the log files and append to master logs
+        gadgetron_outfile.write("==============================================\n")
+        gadgetron_outfile.write("   GADGETRON TEST CASE: " + t + "\n")
+        gadgetron_outfile.write("==============================================\n")
+        with open(gadgetron_log_filename) as infile:
+            gadgetron_outfile.write(infile.read())
+
+        client_outfile.write("==============================================\n")
+        client_outfile.write("   GADGETRON TEST CASE: " + t + "\n")
+        client_outfile.write("==============================================\n")
+        with open(client_log_filename) as infile:
+            client_outfile.write(infile.read())
+
+        if r != 0:
+            test_result = False
+            break
+
+    if test_result:
+        print("ALL TESTS: SUCCESS")
+        return 0
+    else:
+        print("ALL_TESTS:  FAILED")
+        return -100
+
+if __name__=="__main__":
+    sys.exit(main())
diff --git a/test/integration/run_gadgetron_test.py b/test/integration/run_gadgetron_test.py
new file mode 100644
index 0000000..c3d0a8a
--- /dev/null
+++ b/test/integration/run_gadgetron_test.py
@@ -0,0 +1,328 @@
+import subprocess
+import time
+import sys
+import h5py
+import numpy
+import ConfigParser
+import os
+import shutil
+import platform
+import time
+
+def run_test(environment, testcase_cfg_file, chroot_path, port):
+    print("Running test case: " + testcase_cfg_file)
+
+    pwd = os.getcwd()
+    config = ConfigParser.RawConfigParser()
+    config.read(testcase_cfg_file)
+
+    out_folder = config.get('FILES', 'out_folder')
+    siemens_dat = os.path.join(pwd, config.get('FILES', 'siemens_dat'))
+    ismrmrd = os.path.join(pwd, out_folder, config.get('FILES', 'ismrmrd'))
+    result_h5 = os.path.join(pwd, out_folder, config.get('FILES', 'result_h5'))
+    reference_h5 = os.path.join(pwd, config.get('FILES', 'reference_h5'))
+    siemens_parameter_xml = config.get('FILES', 'siemens_parameter_xml')
+    siemens_parameter_xsl = config.get('FILES', 'siemens_parameter_xsl')
+    siemens_dependency_measurement1 = config.getint('FILES', 'siemens_dependency_measurement1')
+    siemens_dependency_measurement2 = config.getint('FILES', 'siemens_dependency_measurement2')
+    siemens_dependency_measurement3 = config.getint('FILES', 'siemens_dependency_measurement3')
+    siemens_dependency_parameter_xml = config.get('FILES', 'siemens_dependency_parameter_xml')
+    siemens_dependency_parameter_xsl = config.get('FILES', 'siemens_dependency_parameter_xsl')
+    siemens_data_measurement = config.getint('FILES', 'siemens_data_measurement')
+    gadgetron_log_filename = os.path.join(pwd, out_folder, "gadgetron.log")
+    client_log_filename = os.path.join(pwd, out_folder, "client.log")
+
+    gadgetron_configuration = config.get('TEST', 'gadgetron_configuration')
+    reference_dataset = config.get('TEST', 'reference_dataset')
+    result_dataset = config.get('TEST', 'result_dataset')
+    compare_dimensions = config.getboolean('TEST', 'compare_dimensions')
+    compare_values = config.getboolean('TEST', 'compare_values')
+    compare_scales = config.getboolean('TEST', 'compare_scales')
+    comparison_threshold_values = config.getfloat('TEST', 'comparison_threshold_values')
+    comparison_threshold_scales = config.getfloat('TEST', 'comparison_threshold_scales')
+
+    dependency_1 = os.path.join(pwd, out_folder, "dependency_1.h5")
+    dependency_2 = os.path.join(pwd, out_folder, "dependency_2.h5")
+    dependency_3 = os.path.join(pwd, out_folder, "dependency_3.h5")
+
+    if not os.path.isfile(siemens_dat):
+        print("Can't find Siemens file %s" % siemens_dat)
+        return False
+
+    if not os.path.isfile(reference_h5):
+        print("Can't find reference HDF5 file %s" % reference_h5)
+        return False
+
+    if os.path.exists(out_folder):
+        shutil.rmtree(out_folder)
+        time.sleep(2)
+
+    os.makedirs(out_folder)
+
+    #inputfilename, gadgetronconfig, referencefile, h5dataset, gadgetron_log_filename, client_log_filename):
+
+    success = True
+    gadgetron_start = "sudo " + chroot_path + "../start.sh"
+
+    with open(gadgetron_log_filename, "w") as gf:
+        if chroot_path == "Empty":
+            p = subprocess.Popen(["gadgetron", "-p", port], env=environment, stdout=gf, stderr=gf)
+        else:
+            p = subprocess.Popen(gadgetron_start, shell=True, stdout=gf, stderr=gf)
+
+        time.sleep(2)
+
+        with open(client_log_filename, "w") as cf:
+            # if there are dependencies
+            if siemens_data_measurement > 0:
+
+                # ------------------------------------------------------------
+                # first dependency
+                if siemens_dependency_measurement1 >= 0:
+                    print("Converting Siemens .dat file to ISMRMRD for the first dependency measurement.")
+                    r = subprocess.call(["siemens_to_ismrmrd", "-X","-f", siemens_dat, "-m",
+                                        siemens_dependency_parameter_xml, "-x", siemens_dependency_parameter_xsl, "-o",
+                                        dependency_1, "-z", str(siemens_dependency_measurement1+1)],
+                                        env=environment, stdout=cf, stderr=cf)
+                    if r != 0:
+                        print("Failed to run siemens_to_ismrmrd for the first dependency measurement!")
+                        success = False
+
+                    print("Running Gadgetron recon on the first dependency measurement")
+                    r = 0
+                    r = subprocess.call(["gadgetron_ismrmrd_client", "-p", port, "-f", dependency_1, "-c",
+                                            "default_measurement_dependencies.xml"],
+                                            env=environment, stdout=cf, stderr=cf)
+                    if r != 0:
+                        print("Failed to run gadgetron_ismrmrd_client on the first dependency measurement!")
+                        success = False
+
+                # ------------------------------------------------------------
+                # second dependency
+                if siemens_dependency_measurement2 >= 0:
+                    print("Converting Siemens .dat file to ISMRMRD for the second dependency measurement.")
+                    r = subprocess.call(["siemens_to_ismrmrd", "-X", "-f", siemens_dat, "-m",
+                                        siemens_dependency_parameter_xml, "-x", siemens_dependency_parameter_xsl, "-o",
+                                        dependency_2, "-z", str(siemens_dependency_measurement2+1)],
+                                        env=environment, stdout=cf, stderr=cf)
+                    if r != 0:
+                        print("Failed to run siemens_to_ismrmrd for the second dependency measurement!")
+                        success = False
+
+                    print("Running Gadgetron recon on the second dependency measurement")
+                    r = 0
+                    r = subprocess.call(["gadgetron_ismrmrd_client", "-p", port, "-f" , dependency_2, "-c",
+                                            "default_measurement_dependencies.xml"],
+                                            env=environment, stdout=cf, stderr=cf)
+                    
+                    if r != 0:
+                        print("Failed to run gadgetron_ismrmrd_client on the second dependency measurement!")
+                        success = False
+
+                # ------------------------------------------------------------
+                # third dependency
+                if siemens_dependency_measurement3 >= 0:
+                    print("Converting Siemens .dat file to ISMRMRD for the third dependency measurement.")
+                    r = subprocess.call(["siemens_to_ismrmrd", "-X", "-f", siemens_dat, "-m",
+                                        siemens_dependency_parameter_xml, "-x", siemens_dependency_parameter_xsl, "-o",
+                                        dependency_3, "-z", str(siemens_dependency_measurement3+1)],
+                                        env=environment, stdout=cf, stderr=cf)
+                    if r != 0:
+                        print("Failed to run siemens_to_ismrmrd for the third dependency measurement!")
+                        success = False
+
+                    print("Running Gadgetron recon on the third dependency measurement")
+                    r = 0
+                    r = subprocess.call(["gadgetron_ismrmrd_client", "-p", port, "-f", dependency_3, "-c",
+                                            "default_measurement_dependencies.xml"],
+                                            env=environment, stdout=cf, stderr=cf)
+                    
+                    if r != 0:
+                        print("Failed to run gadgetron_ismrmrd_client on the third dependency measurement!")
+                        success = False
+
+            # ---------------------------------------------------------------------------------------------
+            # now run the data measurement
+            print("Converting Siemens .dat file to ISMRMRD for data measurement.")
+            cmd = ["siemens_to_ismrmrd", "-X", "-f", siemens_dat, "-m",
+                    siemens_parameter_xml, "-x", siemens_parameter_xsl,
+                    "-o", ismrmrd, "-z", str(siemens_data_measurement+1)]
+
+            r = subprocess.call(cmd, env=environment, stdout=cf, stderr=cf)
+            if r != 0:
+                print("Failed to run siemens_to_ismrmrd!")
+                success = False
+
+            print("Running Gadgetron recon on data measurement")
+            r = 0
+            start_time = time.time()
+            r = subprocess.call(["gadgetron_ismrmrd_client", "-p", port, "-f" , ismrmrd, "-c",
+                                    gadgetron_configuration, "-G", gadgetron_configuration, "-o", result_h5],
+                                    env=environment, stdout=cf, stderr=cf)
+            print "Elapsed time: " + str(time.time()-start_time)
+            if r != 0:
+                print("Failed to run gadgetron_ismrmrd_client!")
+                success = False
+
+        p.terminate()
+
+        # make sure the gadgetron is stopped
+        if chroot_path != "Empty":
+            gadgetron_stop="sudo kill `pgrep -U root start.sh`"
+            subprocess.call(gadgetron_stop, shell=True)
+            time.sleep(1)
+
+    if not success:
+        return False
+
+    print("Comparing results")
+
+    f1 = h5py.File(result_h5)
+    f2 = h5py.File(reference_h5)
+    d1 = f1[result_dataset]
+    d2 = f2[reference_dataset]
+
+    # The shape stored by the 1.0 API is always N x Nchan x Nz x Ny x Nx
+    # Prior to 1.0, if a dimension was a singleton, it could be missing
+    # h5py returns a fixed tuple for an array shape
+    # this bit turns it into a list and removes the singletons
+    # TODO: fix the shapes in the reference data
+    # shapes_match = (d1.shape == d2.shape)
+    a1 = numpy.asarray(d1.shape)
+    a1 = a1.tolist()
+    while a1.count(1) > 0:
+        a1.remove(1)
+    a2 = numpy.asarray(d2.shape)
+    a2 = a2.tolist()
+    while a2.count(1) > 0:
+        a2.remove(1)
+    #print(" Shape 1: " + str(d1.shape) + "  numpy: " + str(a1))
+    #print(" Shape 2: " + str(d2.shape) + "  numpy: " + str(a2))
+    #print(" Compare dimensions: " + str(compare_dimensions))
+    shapes_match = (a1 == a2)
+
+    # If the types in the hdf5 are unsigned short numpy produces norms, dot products etc. in unsigned short. And that _will_ overflow...
+    norm_diff = (numpy.linalg.norm(d1[...].flatten().astype('float32') -
+                        d2[...].flatten().astype('float32')) /
+            numpy.linalg.norm(d2[...].flatten().astype('float32')))
+
+    scale = (float(numpy.dot(d1[...].flatten().astype('float32'),
+                    d1[...].flatten().astype('float32'))) /
+            float(numpy.dot(d1[...].flatten().astype('float32'),
+                    d2[...].flatten().astype('float32'))))
+
+    result = True
+
+    if compare_dimensions:
+        print("   --Comparing dimensions: " + str(shapes_match))
+        result = result and shapes_match
+
+    if compare_values:
+        print("   --Comparing values, norm diff : %s (threshold: %s)" %
+                (str(norm_diff), str(comparison_threshold_values)))
+        result = result and (norm_diff < comparison_threshold_values)
+
+    if compare_scales:
+        print("   --Comparing image scales, ratio : %s (%s) (threshold: %s)" %
+                (str(scale), str(abs(1-scale)), str(comparison_threshold_scales)))
+        result = result and (abs(1-scale) < comparison_threshold_scales)
+
+    return result
+
+def main():
+    if len(sys.argv) < 4:
+        sys.stderr.write("Missing arguments\n")
+        prog = os.path.basename(sys.argv[0])
+        help = "Usage: %s <ismrmrd home> <gadgetron home> <test case config> <optional: chroot path>\n" % prog
+        sys.stderr.write(help)
+        sys.exit(1)
+
+    if len(sys.argv) >= 5:
+        if platform.system() != "Linux":
+            prog = os.path.basename(sys.argv[0])
+            help = "%s with chroot can only run in linux \n" % prog
+            sys.stderr.write(help)
+            sys.exit(1)
+
+    if len(sys.argv) >= 5:
+        if os.getuid() != 0:
+            prog = os.path.basename(sys.argv[0])
+            help = "%s with chroot requires root previlige to run \n" % prog
+            sys.stderr.write(help)
+            sys.exit(1)
+
+    chroot_path = "Empty"
+    port = "9003"
+    if len(sys.argv) >= 5:
+        chroot_path = sys.argv[4]
+        port = "9002"
+
+    myenv = dict()
+
+    if len(sys.argv) >= 5:
+        myenv["ISMRMRD_HOME"] = os.path.join(chroot_path, os.path.realpath(sys.argv[1]))
+        myenv["GADGETRON_HOME"] = os.path.join(chroot_path, os.path.realpath(sys.argv[2]))
+    else:
+        myenv["ISMRMRD_HOME"] = os.path.realpath(sys.argv[1])
+        myenv["GADGETRON_HOME"] = os.path.realpath(sys.argv[2])
+
+    myenv["PYTHONPATH"] = os.environ.get("PYTHONPATH", "")
+    test_case = sys.argv[3]
+
+    libpath = "LD_LIBRARY_PATH"
+    if platform.system() == "Darwin":
+        libpath = "DYLD_FALLBACK_LIBRARY_PATH"
+
+    if platform.system() == "Windows":
+        myenv["SystemRoot"] = os.environ.get('SystemRoot', "")
+        myenv["PATH"] = os.environ.get('Path', "")
+        myenv["PATH"] += myenv["ISMRMRD_HOME"] + "/lib;"
+        #myenv["PATH"] = myenv["ISMRMRD_HOME"] + "/lib;" + myenv["PATH"]
+        myenv["PATH"] += myenv["ISMRMRD_HOME"] + "/bin;"
+        #myenv["PATH"] = myenv["ISMRMRD_HOME"] + "/bin;" + myenv["PATH"]
+        myenv["PATH"] += myenv["GADGETRON_HOME"] + "/lib;"
+        #myenv["PATH"] = myenv["GADGETRON_HOME"] + "/lib;" + myenv["PATH"]
+        myenv["PATH"] += myenv["GADGETRON_HOME"] + "/bin;"
+        #myenv["PATH"] = myenv["GADGETRON_HOME"] + "/bin;" + myenv["PATH"]
+        myenv[libpath] = ""
+    else:
+        myenv[libpath] = myenv["ISMRMRD_HOME"] + "/lib:"
+        myenv[libpath] += myenv["GADGETRON_HOME"] + "/lib:"
+        myenv[libpath] += myenv["GADGETRON_HOME"] + "/../arma/lib:"
+        if len(sys.argv) >= 5:
+            myenv[libpath] += chroot_path + "/usr/local/cuda/lib64:"
+            myenv[libpath] += chroot_path + "/opt/intel/mkl/lib/intel64:"
+            myenv[libpath] += chroot_path + "/opt/intel/lib/intel64:"
+        else:
+            myenv[libpath] += "/usr/local/cuda/lib64:"
+            myenv[libpath] += "/opt/intel/mkl/lib/intel64:"
+            myenv[libpath] += "/opt/intel/lib/intel64:"
+        if os.environ.get(libpath, None) is not None:
+            myenv[libpath] += os.environ[libpath]
+        myenv["PATH"] = myenv["ISMRMRD_HOME"] + "/bin" + ":" + myenv["GADGETRON_HOME"] + "/bin"
+
+    myenv["ACE_DEBUG"] = "1"
+
+    if platform.system() == "Windows":
+        os.putenv('PATH', myenv['PATH'])
+    
+    print("Running Gadgetron test with: ")
+    print("  -- ISMRMRD_HOME  : " +  myenv["ISMRMRD_HOME"])
+    print("  -- GADGETRON_HOME  : " +  myenv["GADGETRON_HOME"])
+    print("  -- PATH            : " +  myenv["PATH"])
+    print("  -- " + libpath + " : " +  myenv[libpath])
+    if len(sys.argv) >= 5:
+        print("  -- chroot          : " +  chroot_path)
+    print("  -- TEST CASE       : " + test_case)
+
+    test_result = run_test(myenv, test_case, chroot_path, port)
+
+    if test_result:
+        print("TEST: " + test_case + " SUCCESS")
+        return 0
+    else:
+        print("TEST: " + test_case + " FAILED")
+        return -100
+
+if __name__=="__main__":
+    sys.exit(main())
diff --git a/test/tests.cpp b/test/tests.cpp
index 326bea1..c938a37 100644
--- a/test/tests.cpp
+++ b/test/tests.cpp
@@ -5,6 +5,7 @@
  *      Author: Dae
  */
 
+
 #include <gtest/gtest.h>
 
 int main(int argc, char **argv) {
diff --git a/test/unit/run_unit_tests.py b/test/unit/run_unit_tests.py
new file mode 100644
index 0000000..9d36301
--- /dev/null
+++ b/test/unit/run_unit_tests.py
@@ -0,0 +1,62 @@
+import subprocess
+import sys
+import os
+import platform
+
+def main():
+    if len(sys.argv) < 4:
+        sys.stderr.write("Missing arguments\n")
+        prog = os.path.basename(sys.argv[0])
+        help = "Usage: %s <ismrmrd home> <gadgetron home> <location of test_all.exe>\n" % prog
+        sys.stderr.write(help)
+        sys.exit(1)
+        
+    myenv = dict()
+    myenv["ISMRMRD_HOME"] = os.path.realpath(sys.argv[1])
+    myenv["GADGETRON_HOME"] = os.path.realpath(sys.argv[2])
+    myenv["UNITTEST_HOME"] = os.path.realpath(sys.argv[3])
+    myenv["PYTHONPATH"] = os.environ.get("PYTHONPATH", "")
+
+    libpath = "LD_LIBRARY_PATH"
+    if platform.system() == "Darwin":
+        libpath = "DYLD_FALLBACK_LIBRARY_PATH"
+
+    if platform.system() == "Windows":
+        myenv["SystemRoot"] = os.environ.get('SystemRoot', "")
+        myenv["PATH"] = os.environ.get('Path', "")
+        myenv["PATH"] += myenv["ISMRMRD_HOME"] + "/lib;"
+        myenv["PATH"] += myenv["GADGETRON_HOME"] + "/lib;"
+        myenv["PATH"] += myenv["UNITTEST_HOME"]
+        myenv[libpath] = ""
+    else:
+        myenv[libpath] = myenv["ISMRMRD_HOME"] + "/lib:"
+        myenv[libpath] += myenv["GADGETRON_HOME"] + "/lib:"
+        myenv[libpath] += myenv["GADGETRON_HOME"] + "/../arma/lib:"
+        myenv[libpath] += "/usr/local/cuda/lib64:"
+        myenv[libpath] += "/opt/intel/mkl/lib/intel64:"
+        myenv[libpath] += "/opt/intel/lib/intel64:"
+        if os.environ.get(libpath, None) is not None:
+            myenv[libpath] += os.environ[libpath]
+        myenv["PATH"] = myenv["ISMRMRD_HOME"] + "/bin" + ":" + myenv["GADGETRON_HOME"] + "/bin" + ":" + myenv["UNITTEST_HOME"]
+
+    myenv["ACE_DEBUG"] = "1"
+
+    if platform.system() == "Windows":
+        os.putenv('PATH', myenv['PATH'])
+    
+    print("Running unit tests with: ")
+    print("  -- ISMRMRD_HOME  : " +  myenv["ISMRMRD_HOME"])
+    print("  -- GADGETRON_HOME  : " +  myenv["GADGETRON_HOME"])
+    print("  -- PATH            : " +  myenv["PATH"])
+    print("  -- " + libpath + " : " +  myenv[libpath])
+    
+    r = subprocess.call("test_all.exe", env=myenv)
+    
+    if r != 0:
+        print("Failed to run unit tests!")
+        return -100
+
+    return 0
+
+if __name__=="__main__":
+    sys.exit(main())
diff --git a/toolboxes/CMakeLists.txt b/toolboxes/CMakeLists.txt
index c305e3d..782db4e 100644
--- a/toolboxes/CMakeLists.txt
+++ b/toolboxes/CMakeLists.txt
@@ -1,18 +1,42 @@
+if (WIN32)
+    ADD_DEFINITIONS(-DTIXML_USE_STL)
+endif (WIN32)
+
+if (MKL_FOUND)
+    INCLUDE_DIRECTORIES( ${MKL_INCLUDE_DIR} )
+    LINK_DIRECTORIES( ${MKL_LIB_DIR} ${MKL_COMPILER_LIB_DIR} )
+    link_libraries(${MKL_LIBRARIES})
+endif (MKL_FOUND)
+
 add_subdirectory(core)
+add_subdirectory(mri_core)
 
-IF(CUDA_FOUND OR ARMADILLO_FOUND)
-  add_subdirectory(operators)
-  add_subdirectory(solvers)
-ENDIF(CUDA_FOUND OR ARMADILLO_FOUND)
+add_subdirectory(operators)
+add_subdirectory(solvers)
 
-add_subdirectory(mri)
+add_subdirectory(fft)
 add_subdirectory(nfft)
+
+add_subdirectory(mri)
+add_subdirectory(ct)
+
 add_subdirectory(registration)
 
-IF (ACE_FOUND AND XSD_FOUND)
+IF (ACE_FOUND)
   add_subdirectory(gadgettools)
-ENDIF (ACE_FOUND AND XSD_FOUND)
+  add_subdirectory(cloudbus)
+ENDIF()
+
+IF (FFTW3_FOUND AND ISMRMRD_FOUND)
+  add_subdirectory(gtplus)
+ENDIF()
+
+find_package(BLAS)
+find_package(LAPACK)
+
+#if (NOT WIN32)
+  if (BLAS_FOUND AND LAPACK_FOUND AND MKL_FOUND)
+    add_subdirectory(linalg)
+  endif()
+#endif()
 
-IF (MKL_FOUND)
-    add_subdirectory(gtplus)
-ENDIF (MKL_FOUND)
\ No newline at end of file
diff --git a/toolboxes/cloudbus/CMakeLists.txt b/toolboxes/cloudbus/CMakeLists.txt
new file mode 100644
index 0000000..0b7f4e3
--- /dev/null
+++ b/toolboxes/cloudbus/CMakeLists.txt
@@ -0,0 +1,31 @@
+find_package(ACE REQUIRED)
+find_package(Boost REQUIRED)
+
+include_directories(${ACE_INCLUDE_DIR}
+                    ${Boost_INCLUDE_DIR} 
+                    )
+
+add_library(gadgetron_toolbox_cloudbus SHARED
+  CloudBus.cpp
+  CloudBus.h
+  cloudbus_export.h
+)
+
+target_link_libraries(gadgetron_toolbox_cloudbus 
+                     optimized ${ACE_LIBRARIES} debug ${ACE_DEBUG_LIBRARY})
+
+set_target_properties(gadgetron_toolbox_cloudbus PROPERTIES COMPILE_DEFINITIONS "__BUILD_GADGETRON_CLOUDBUS__")
+set_target_properties(gadgetron_toolbox_cloudbus  PROPERTIES VERSION ${GADGETRON_VERSION_STRING} SOVERSION ${GADGETRON_SOVERSION})
+
+add_executable(gadgetron_cloudbus cloudbus_main.cpp)
+target_link_libraries(gadgetron_cloudbus 
+                     gadgetron_toolbox_cloudbus 
+                     optimized ${ACE_LIBRARIES} debug ${ACE_DEBUG_LIBRARY} )
+
+install(TARGETS gadgetron_toolbox_cloudbus DESTINATION lib COMPONENT main)
+install(TARGETS gadgetron_cloudbus DESTINATION bin COMPONENT main)
+
+install(FILES 
+  CloudBus.h
+  cloudbus_export.h
+  DESTINATION include COMPONENT main)
diff --git a/toolboxes/cloudbus/CloudBus.cpp b/toolboxes/cloudbus/CloudBus.cpp
new file mode 100644
index 0000000..1e9ac00
--- /dev/null
+++ b/toolboxes/cloudbus/CloudBus.cpp
@@ -0,0 +1,220 @@
+#include "CloudBus.h"
+
+namespace Gadgetron
+{
+  CloudBus* CloudBus::instance_ = 0;
+  const char* CloudBus::mcast_inet_addr_ = GADGETRON_DEFAULT_MULTICAST_ADDR;
+  int CloudBus::mcast_port_ = GADGETRON_DEFAULT_MULTICAST_PORT;
+  bool CloudBus::query_mode_ = false; //Listen only is disabled default
+  int CloudBus::gadgetron_port_ = 9002; //Default port
+
+  CloudBusTask::CloudBusTask(int port, const char* addr)
+    : inherited()
+    , mcast_addr_(port, addr)
+    , mcast_dgram_(ACE_SOCK_Dgram_Mcast::OPT_BINDADDR_NO)
+  {
+  }
+
+  CloudBusTask::CloudBusTask()
+    : inherited()
+    , mcast_addr_(GADGETRON_DEFAULT_MULTICAST_PORT, GADGETRON_DEFAULT_MULTICAST_ADDR)
+    , mcast_dgram_(ACE_SOCK_Dgram_Mcast::OPT_BINDADDR_NO)
+  {
+  }
+    
+  int CloudBusTask::open(void*)
+  {
+    return this->activate( THR_NEW_LWP | THR_JOINABLE,1); //single thread
+  }
+
+  CloudBusReceiverTask::CloudBusReceiverTask(int port, const char* addr)
+    : CloudBusTask(port, addr)
+  {
+    
+  }
+
+  int CloudBusReceiverTask::open(void*)
+  {
+    if (mcast_dgram_.join(mcast_addr_) == -1) {
+      std::cout << "Error doing dgram join" << std::endl;
+      return -1;
+    }
+    return CloudBusTask::open();      
+  }
+
+  int CloudBusReceiverTask::close(u_long flags)
+  {
+    mcast_dgram_.leave(mcast_addr_);
+    return CloudBusTask::close(flags);
+  }
+
+  int CloudBusReceiverTask::svc(void)
+  {
+    char buffer[GADGETRON_NODE_INFO_MESSAGE_LENGTH]; //Size of message
+    GadgetronNodeInfo info;
+    ACE_INET_Addr peer_address;
+    while (mcast_dgram_.recv(buffer, GADGETRON_NODE_INFO_MESSAGE_LENGTH, peer_address) != -1)
+      {
+	info.uuid = boost::uuids::to_string(*((boost::uuids::uuid*)buffer));
+	info.address = std::string(peer_address.get_host_addr());
+	memcpy(&info.port              , buffer + 16,                    sizeof(uint32_t));
+	memcpy(&info.compute_capability, buffer + 16 + sizeof(uint32_t), sizeof(uint32_t));
+	CloudBus::instance()->update_node(info.uuid.c_str(), info);
+      }
+
+    return 0;
+  }
+
+  CloudBusSenderTask::CloudBusSenderTask(int port, const char* addr)
+    : CloudBusTask(port, addr)
+  {
+    
+  }
+
+  int CloudBusSenderTask::open(void*)
+  {
+    if (mcast_dgram_.open(mcast_addr_) == -1) {
+      std::cout << "Error doing dgram open" << std::endl;
+      return -1;
+    }
+    return CloudBusTask::open();      
+  }
+
+  int CloudBusSenderTask::svc(void)
+  {
+    char buffer[GADGETRON_NODE_INFO_MESSAGE_LENGTH]; //Size of message
+    if (CloudBus::instance()->uuid_.size() != 16) {
+      std::cout << "Severe problem, UUID is != 16" << std::endl;
+      std::cout << "uuid: " << CloudBus::instance()->uuid_ << "(" << CloudBus::instance()->uuid_.size() << ")" << std::endl;
+    }
+    
+    memcpy(buffer                        ,  CloudBus::instance()->uuid_.begin(), 16);
+    memcpy(buffer + 16,                    &CloudBus::instance()->node_info_.port, sizeof(uint32_t));
+    memcpy(buffer + 16 + sizeof(uint32_t), &CloudBus::instance()->node_info_.compute_capability, sizeof(uint32_t));
+    
+    while (true) {
+      if (!CloudBus::instance()->query_mode_) {
+	if (mcast_dgram_.send(buffer, GADGETRON_NODE_INFO_MESSAGE_LENGTH) == -1) {
+	  std::cout << "Failed to send dgram data" << std::endl;
+	}
+      }
+      CloudBus::instance()->remove_stale_nodes();
+      ACE_OS::sleep(5);//Sleep for 5 seconds
+    }
+
+    return 0;
+  }
+
+  CloudBus* CloudBus::instance()
+  {
+    if (!instance_)
+      {
+	instance_ = new CloudBus(mcast_port_, mcast_inet_addr_);
+	instance_->receiver_.open();
+	instance_->sender_.open();
+      }
+    return instance_;
+  }
+
+  
+  void CloudBus::set_mcast_address(const char* addr)
+  {
+    mcast_inet_addr_ = addr;
+  }
+
+  void CloudBus::set_mcast_port(int port)
+  {
+    mcast_port_ = port;
+  }
+
+  void CloudBus::set_query_only(bool m)
+  {
+    query_mode_ = m;
+  }
+ 
+  void CloudBus::set_gadgetron_port(uint32_t port)
+  {
+    gadgetron_port_ = port;
+  }
+
+  void CloudBus::wait()
+  {
+    sender_.wait();
+    receiver_.wait();
+    receiver_.close();
+  }
+
+  void CloudBus::get_node_info(std::vector<GadgetronNodeInfo>& nodes)
+  {
+    mtx_.acquire();
+    nodes.clear();
+    for (map_type_::iterator it = nodes_.begin(); it != nodes_.end(); ++it) {
+      GadgetronNodeInfo n = it->second.first;
+      nodes.push_back(n);
+    }
+    mtx_.release();
+  }
+  
+  size_t CloudBus::get_number_of_nodes()
+  {
+    size_t n = 0;
+    mtx_.acquire();
+    n = nodes_.size();
+    mtx_.release();
+    return n;
+  }
+
+  CloudBus::CloudBus(int port, const char* addr)
+    : receiver_(port, addr)
+    , sender_(port, addr)
+    , mtx_("CLOUDBUSMTX")
+    , uuid_(boost::uuids::random_generator()())
+  {
+    node_info_.port = gadgetron_port_;
+    set_compute_capability(1);
+    node_info_.uuid = boost::uuids::to_string(uuid_);
+    ACE_SOCK_Acceptor listener (ACE_Addr::sap_any);
+    ACE_INET_Addr local_addr;
+    listener.get_local_addr (local_addr);
+    node_info_.address = std::string(local_addr.get_host_name());
+  }
+
+  void CloudBus::update_node(const char* a, GadgetronNodeInfo& info)
+  {
+    mtx_.acquire();
+    std::string key(a);
+    map_type_::iterator it = nodes_.find(key);
+    if (it == nodes_.end()) {
+      if (info.uuid != node_info_.uuid) { //Reject stuff coming from myself
+	std::cout << "---->>>> New Cloud Node <<<<< ----- " << info.uuid << " (" << info.address << ":" << info.port << ", " << info.compute_capability << ")" << std::endl;
+      } 
+    } 
+
+    if (info.uuid != node_info_.uuid) {
+      nodes_[key] = std::pair<GadgetronNodeInfo,time_t>(info,time(NULL));
+    }
+    mtx_.release();
+  }
+
+  void CloudBus::remove_stale_nodes()
+  {
+    mtx_.acquire();
+    map_type_ new_nodes_;
+    time_t now = time(NULL);
+    for (map_type_::iterator it = nodes_.begin(); it != nodes_.end(); ++it) {
+      if (fabs(difftime(it->second.second,now)) > 30) {
+        GadgetronNodeInfo n = it->second.first;
+        std::cout << "---->>>> DELETING STALE CLOUD NODE <<<<< ----- " << n.uuid << " (" << n.address << ":" << n.port  << ", " << n.compute_capability << ")" << std::endl;
+      }
+      else
+      {
+        new_nodes_[it->first] = it->second;
+      }
+    }
+
+    nodes_.clear();
+    nodes_ = new_nodes_;
+
+    mtx_.release();
+  }
+}
diff --git a/toolboxes/cloudbus/CloudBus.h b/toolboxes/cloudbus/CloudBus.h
new file mode 100644
index 0000000..bde427b
--- /dev/null
+++ b/toolboxes/cloudbus/CloudBus.h
@@ -0,0 +1,117 @@
+#ifndef GADGETRON_CLOUDBUS_H
+#define GADGETRON_CLOUDBUS_H
+
+#include "cloudbus_export.h"
+#include <ace/Task.h>
+#include <ace/INET_Addr.h>
+#include <ace/SOCK_Dgram_Mcast.h>
+#include <ace/OS_NS_unistd.h>
+#include <ace/SOCK_Acceptor.h>
+
+#include <boost/uuid/uuid.hpp>
+#include <boost/uuid/uuid_generators.hpp>
+#include <boost/uuid/uuid_io.hpp>
+
+#include <iostream>
+#include <map>
+#include <utility>
+#include <time.h>
+#include <vector>
+
+#define GADGETRON_DEFAULT_MULTICAST_ADDR "224.9.9.2"
+#define GADGETRON_DEFAULT_MULTICAST_PORT 4148
+#define GADGETRON_NODE_INFO_MESSAGE_LENGTH 16+sizeof(uint32_t)*2 //16 bytes for uuid + 2 ints
+
+namespace Gadgetron
+{
+
+  struct GadgetronNodeInfo
+  {
+    std::string uuid;
+    std::string address;
+    uint32_t port;
+    uint32_t compute_capability;
+  };
+  
+  class CloudBusTask : public ACE_Task<ACE_MT_SYNCH>
+  {
+  public:
+    typedef ACE_Task<ACE_MT_SYNCH> inherited;    
+    CloudBusTask(int port, const char* addr);
+    CloudBusTask();
+    virtual int open(void* = 0);
+
+  protected:
+    ACE_SOCK_Dgram_Mcast mcast_dgram_;
+    ACE_INET_Addr mcast_addr_;
+  };
+
+  class CloudBusReceiverTask : public CloudBusTask
+  {
+  public:
+    CloudBusReceiverTask(int port, const char* addr);
+    virtual int open(void* = 0);
+    virtual int close(u_long flags = 0);
+    virtual int svc(void);
+  };
+
+
+  class CloudBusSenderTask : public CloudBusTask
+  {
+  public:
+    CloudBusSenderTask(int port, const char* addr);
+    virtual int open(void* = 0);
+    virtual int svc(void);
+  };
+
+  class EXPORTCLOUDBUS CloudBus
+  {
+    friend class CloudBusReceiverTask;
+    friend class CloudBusSenderTask;
+
+    typedef std::map<std::string, std::pair<GadgetronNodeInfo, time_t> > map_type_;
+
+  public:
+    static CloudBus* instance();
+    static void set_mcast_address(const char* addr);
+    static void set_mcast_port(int port);
+    static void set_query_only(bool m = true);
+    static void set_gadgetron_port(uint32_t port);
+
+    void set_compute_capability(uint32_t c)
+    {
+      node_info_.compute_capability = c;
+    }
+
+    void wait();
+
+    void get_node_info(std::vector<GadgetronNodeInfo>& nodes);
+    size_t get_number_of_nodes();
+
+  protected:
+    ///Protected constructor. 
+    CloudBus(int port, const char* addr);
+
+    void update_node(const char* a, GadgetronNodeInfo& info);
+    void remove_stale_nodes();
+    
+    static CloudBus* instance_;
+    static const char* mcast_inet_addr_;
+    static int mcast_port_;
+    static bool query_mode_; //Listen only
+    static int gadgetron_port_;
+
+    GadgetronNodeInfo node_info_;
+    map_type_ nodes_;
+    
+    CloudBusReceiverTask receiver_;
+    CloudBusSenderTask   sender_;
+    ACE_Thread_Mutex mtx_;
+
+    boost::uuids::uuid uuid_;
+  };
+
+
+}
+
+#endif
diff --git a/toolboxes/cloudbus/cloudbus_export.h b/toolboxes/cloudbus/cloudbus_export.h
new file mode 100644
index 0000000..b85fed2
--- /dev/null
+++ b/toolboxes/cloudbus/cloudbus_export.h
@@ -0,0 +1,14 @@
+#ifndef CLOUDBUS_EXPORT_H_
+#define CLOUDBUS_EXPORT_H_
+
+#if defined (WIN32)
+    #if defined (__BUILD_GADGETRON_CLOUDBUS__) || defined (gadgetron_toolbox_cloudbus_EXPORTS)
+        #define EXPORTCLOUDBUS __declspec(dllexport)
+    #else
+        #define EXPORTCLOUDBUS __declspec(dllimport)
+    #endif
+#else
+    #define EXPORTCLOUDBUS
+#endif
+
+#endif
diff --git a/toolboxes/cloudbus/cloudbus_main.cpp b/toolboxes/cloudbus/cloudbus_main.cpp
new file mode 100644
index 0000000..a212e4a
--- /dev/null
+++ b/toolboxes/cloudbus/cloudbus_main.cpp
@@ -0,0 +1,35 @@
+#include <iostream>
+
+#include "CloudBus.h"
+
+int main(int argc, char** argv)
+{
+  std::cout << "CloudBus Main Program" << std::endl;
+
+  int port = GADGETRON_DEFAULT_MULTICAST_PORT;
+  const char* addr = GADGETRON_DEFAULT_MULTICAST_ADDR;
+  bool query_only_mode = true;
+
+  if (argc > 1) {
+    addr = argv[1];
+    std::cout << "Setting multicast address to: " << addr << std::endl;
+  }
+
+  if (argc > 2) {
+    port = std::atoi(argv[2]);
+    std::cout << "Setting multicast port to: " << port << std::endl;
+  }
+
+  if (argc > 3)
+  {
+    query_only_mode = false;
+  }
+
+  //Port and address must be set before grabbing the instance for the first time. 
+  Gadgetron::CloudBus::set_mcast_address(addr);
+  Gadgetron::CloudBus::set_mcast_port(port);
+  Gadgetron::CloudBus::set_query_only(query_only_mode);
+  Gadgetron::CloudBus* cb = Gadgetron::CloudBus::instance();
+  cb->wait();
+  return 0;
+}
diff --git a/toolboxes/core/CMakeLists.txt b/toolboxes/core/CMakeLists.txt
index 67756a0..1d32b62 100644
--- a/toolboxes/core/CMakeLists.txt
+++ b/toolboxes/core/CMakeLists.txt
@@ -4,7 +4,7 @@ include_directories(
 
 configure_file(core_defines.h.in ${CMAKE_CURRENT_SOURCE_DIR}/core_defines.h)
 
-install(FILES 	
+install(FILES 
   core_defines.h
   NDArray.h
   complext.h
@@ -16,8 +16,8 @@ install(FILES
   GadgetronCommon.h
   GadgetronException.h
   GadgetronTimer.h
-  SerializableObject.h
-  DESTINATION include)
+  Gadgetron_enable_types.h
+  DESTINATION include COMPONENT main)
 
 add_subdirectory(cpu)
 
diff --git a/toolboxes/core/GadgetronCommon.h b/toolboxes/core/GadgetronCommon.h
index fb9a8f1..175bd5e 100644
--- a/toolboxes/core/GadgetronCommon.h
+++ b/toolboxes/core/GadgetronCommon.h
@@ -1,6 +1,8 @@
 #ifndef GADGETRONCOMMON_H
 #define GADGETRONCOMMON_H
 
+#include <iostream>
+
 #ifndef _WIN32
 
 #define GCC_VERSION (__GNUC__ * 10000           \
@@ -14,6 +16,15 @@
 
 #else
 
+    // disable warning 4251, needs to have dll-interface to be used by clients
+    #pragma warning( disable : 4251 )
+
+    // warning C4344: behavior change: use of explicit template arguments
+    #pragma warning( disable : 4344 )
+
+    // The POSIX name for this item is deprecated. Instead, use the ISO C++ conformant name
+    #pragma warning( disable : 4996 )
+
 #endif // _WIN32
 
 //MACROS FOR LOGGING
@@ -24,12 +35,17 @@
 #define GADGET_CONDITION_MSG(con, message) { if ( con ) GADGET_MSG(message) }
 #define GADGET_CONDITION_WARN_MSG(con, message) { if ( con ) GADGET_WARN_MSG(message) }
 
-#define GADGET_THROW(msg) { GADGET_ERROR_MSG(msg); BOOST_THROW_EXCEPTION( runtime_error(msg)); }
-#define GADGET_CHECK_THROW(con) { if ( !(con) ) { GADGET_ERROR_MSG(#con); BOOST_THROW_EXCEPTION( runtime_error(#con)); } }
+#define GADGET_THROW(msg) { GADGET_ERROR_MSG(msg); throw std::runtime_error(msg); }
+#define GADGET_CHECK_THROW(con) { if ( !(con) ) { GADGET_ERROR_MSG(#con); throw std::runtime_error(#con); } }
+
+#define GADGET_CATCH_THROW(con) { try { con; } catch(...) { GADGET_ERROR_MSG(#con); throw std::runtime_error(#con); } }
 
 #define GADGET_CHECK_RETURN(con, value) { if ( ! (con) ) { GADGET_ERROR_MSG("Returning '" << value << "' due to failed check: '" << #con << "'"); return (value); } }
 #define GADGET_CHECK_RETURN_FALSE(con) { if ( ! (con) ) { GADGET_ERROR_MSG("Returning false due to failed check: '" << #con << "'"); return false; } }
 
+#define GADGET_CHECK_EXCEPTION_RETURN(con, value) { try { con; } catch(...) { GADGET_ERROR_MSG("Returning '" << value << "' due to failed check: '" << #con << "'"); return (value); } }
+#define GADGET_CHECK_EXCEPTION_RETURN_FALSE(con) { try { con; } catch(...) { GADGET_ERROR_MSG("Returning false due to failed check: '" << #con << "'"); return false; } }
+
 #ifdef GADGET_DEBUG_MODE
 #define GADGET_DEBUG_CHECK_THROW(con) GADGET_CHECK_THROW(con)
 #define GADGET_DEBUG_CHECK_RETURN(con, value) GADGET_CHECK_RETURN(con, value)
@@ -55,6 +71,10 @@
 // MACROS for EXPORTING
 #define GADGET_EXPORT_ARRAY(debugFolder, exporter, a, filename) { if ( !debugFolder.empty() ) { exporter.exportArray(a, debugFolder+filename); } }
 #define GADGET_EXPORT_ARRAY_COMPLEX(debugFolder, exporter, a, filename) { if ( !debugFolder.empty() ) { exporter.exportArrayComplex(a, debugFolder+filename); } }
+#define GADGET_EXPORT_ARRAY_COMPLEX_REAL_IMAG(debugFolder, exporter, a, filename) { if ( !debugFolder.empty() ) { exporter.exportArrayComplexRealImag(a, debugFolder+filename); } }
+
+#define GADGET_EXPORT_IMAGE(debugFolder, exporter, a, filename) { if ( !debugFolder.empty() ) { exporter.exportImage(a, debugFolder+filename); } }
+#define GADGET_EXPORT_IMAGE_COMPLEX(debugFolder, exporter, a, filename) { if ( !debugFolder.empty() ) { exporter.exportImageComplex(a, debugFolder+filename); } }
 
 // MACROS FOR UTILITY
 #define GT_MIN(a,b)    (((a)<(b))?(a):(b))
@@ -63,5 +83,20 @@
 #define GT_SGN(a)      (((a)>=0)?(1):(-1))
 #define GT_PI          3.141592653589793238462
 #define GT_IMAGING_GEOMETRY_DELTA 0.001
+#define GT_MKINT(a)    (((a)>=0)?((int)((a)+0.5)):((int)((a)-0.5)))
+#define GT_SQR(a)      ((a)*(a))
+
+namespace Gadgetron
+{
+// array index type
+#ifdef gt_index_type
+    #undef gt_index_type
+#endif // gt_index_type
+#define gt_index_type long long
+
+struct rgb_type { unsigned char r,g,b; };
+struct rgba_type { unsigned char r,g,b,a; };
+
+}
 
 #endif  //GADGETRONCOMMON_H
diff --git a/toolboxes/core/GadgetronTimer.h b/toolboxes/core/GadgetronTimer.h
index 38942d9..8022da1 100644
--- a/toolboxes/core/GadgetronTimer.h
+++ b/toolboxes/core/GadgetronTimer.h
@@ -37,7 +37,7 @@ namespace Gadgetron{
         }
     }
 
-    GadgetronTimer(const char* name, bool timing=false) : name_(name), timing_in_destruction_(timing) 
+    GadgetronTimer(const char* name, bool timing=true) : name_(name), timing_in_destruction_(timing) 
     {
         if ( timing_in_destruction_ )
         {
@@ -74,7 +74,7 @@ namespace Gadgetron{
         start();
     }
 
-    virtual void stop()
+    virtual double stop()
     {
         double time_in_us = 0.0;
 #ifdef WIN32
@@ -85,6 +85,7 @@ namespace Gadgetron{
         time_in_us = ((end_.tv_sec * 1e6) + end_.tv_usec) - ((start_.tv_sec * 1e6) + start_.tv_usec);
 #endif
         std::cout << name_ << ": " << time_in_us/1000.0 << " ms" << std::endl; std::cout.flush();
+        return time_in_us;
     }
 
     void set_timing_in_destruction(bool timing) { timing_in_destruction_ = timing; }
diff --git a/toolboxes/core/Gadgetron_enable_types.h b/toolboxes/core/Gadgetron_enable_types.h
new file mode 100644
index 0000000..bc70a5e
--- /dev/null
+++ b/toolboxes/core/Gadgetron_enable_types.h
@@ -0,0 +1,12 @@
+#pragma once
+
+#include <boost/type_traits.hpp>
+#include "complext.h"
+
+namespace Gadgetron {
+	template<class T> struct enable_operators : public boost::false_type{};
+	template<> struct enable_operators<float> : public boost::true_type{};
+	template<> struct enable_operators<Gadgetron::complext<float> > : public boost::true_type{};
+	template<> struct enable_operators<double> : public boost::true_type{};
+	template<> struct enable_operators<Gadgetron::complext<double> > : public boost::true_type{};
+}
diff --git a/toolboxes/core/NDArray.h b/toolboxes/core/NDArray.h
index 38452e6..38d2d93 100644
--- a/toolboxes/core/NDArray.h
+++ b/toolboxes/core/NDArray.h
@@ -26,24 +26,10 @@ namespace Gadgetron{
         typedef T element_type;
         typedef T value_type;
 
-        void* operator new (size_t bytes)
-        {
-            return ::new char[bytes];
-        }
-
-        void operator delete (void *ptr)
-        {
-            delete [] static_cast <char *> (ptr);
-        } 
-
-        void * operator new(size_t s, void * p)
-        {
-            return p;
-        }
-
         NDArray () : data_(0), elements_(0), delete_data_on_destruct_(true)
         {
             dimensions_ = boost::shared_ptr< std::vector<size_t> >( new std::vector<size_t> );
+            offsetFactors_ = boost::shared_ptr< std::vector<size_t> >( new std::vector<size_t> );
         }
 
         virtual ~NDArray() {}
@@ -65,6 +51,10 @@ namespace Gadgetron{
 
         template<class S> bool dimensions_equal(const NDArray<S> *a) const
         {
+            //boost::shared_ptr<std::vector<size_t > > adims = a->get_dimensions();
+            //return ((this->dimensions_->size() == adims->size()) &&
+            //    std::equal(this->dimensions_->begin(), this->dimensions_->end(), adims->begin()));
+
             std::vector<size_t>* dim;
             a->get_dimensions(dim);
 
@@ -85,6 +75,7 @@ namespace Gadgetron{
 
         boost::shared_ptr< std::vector<size_t> > get_dimensions() const;
         void get_dimensions(std::vector<size_t>*& dim) const;
+        void get_dimensions(std::vector<size_t>& dim) const;
 
         T* get_data_ptr() const;
 
@@ -96,6 +87,8 @@ namespace Gadgetron{
         void delete_data_on_destruct(bool d);
 
         size_t calculate_offset(const std::vector<size_t>& ind) const;
+        size_t calculate_offset(const std::vector<gt_index_type>& ind) const;
+        static size_t calculate_offset(const std::vector<size_t>& ind, const std::vector<size_t>& offsetFactors);
 
         size_t calculate_offset(size_t x, size_t y) const;
         size_t calculate_offset(size_t x, size_t y, size_t z) const;
@@ -107,20 +100,26 @@ namespace Gadgetron{
         size_t calculate_offset(size_t x, size_t y, size_t z, size_t s, size_t p, size_t r, size_t a, size_t q, size_t u) const;
 
         size_t get_offset_factor(size_t dim) const;
+        void get_offset_factor(std::vector<size_t>& offset) const;
         boost::shared_ptr< std::vector<size_t> > get_offset_factor() const;
 
         size_t get_offset_factor_lastdim() const;
 
         void calculate_offset_factors(const std::vector<size_t>& dimensions);
+        static void calculate_offset_factors(const std::vector<size_t>& dimensions, std::vector<size_t>& offsetFactors);
 
         std::vector<size_t> calculate_index( size_t offset ) const;
         void calculate_index( size_t offset, std::vector<size_t>& index ) const;
+        static void calculate_index( size_t offset, const std::vector<size_t>& offsetFactors, std::vector<size_t>& index );
 
         void clear();
 
         T& operator()( const std::vector<size_t>& ind );
         const T& operator()( const std::vector<size_t>& ind ) const;
 
+        T& operator()( const std::vector<gt_index_type>& ind );
+        const T& operator()( const std::vector<gt_index_type>& ind ) const;
+
         T& operator()( size_t x );
         const T& operator()( size_t x ) const;
 
@@ -148,6 +147,19 @@ namespace Gadgetron{
         T& operator()( size_t x, size_t y, size_t z, size_t s, size_t p, size_t r, size_t a, size_t q, size_t u );
         const T& operator()( size_t x, size_t y, size_t z, size_t s, size_t p, size_t r, size_t a, size_t q, size_t u ) const;
 
+        /// whether a point is within the array range
+        bool point_in_range(const std::vector<gt_index_type>& ind) const;
+        bool point_in_range(const std::vector<size_t>& ind) const;
+        bool point_in_range(gt_index_type x) const;
+        bool point_in_range(gt_index_type x, gt_index_type y) const;
+        bool point_in_range(gt_index_type x, gt_index_type y, gt_index_type z) const;
+        bool point_in_range(gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s) const;
+        bool point_in_range(gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p) const;
+        bool point_in_range(gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r) const;
+        bool point_in_range(gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r, gt_index_type a) const;
+        bool point_in_range(gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r, gt_index_type a, gt_index_type q) const;
+        bool point_in_range(gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r, gt_index_type a, gt_index_type q, gt_index_type u) const;
+
     protected:
 
         virtual void allocate_memory() = 0;
@@ -269,6 +281,9 @@ namespace Gadgetron{
     template <typename T> 
     inline bool NDArray<T>::dimensions_equal(std::vector<size_t> *d) const
     {
+        //return ((this->dimensions_->size() == d->size()) &&
+        //    std::equal(this->dimensions_->begin(), this->dimensions_->end(), d->begin()));
+
         if ( this->dimensions_->size() != d->size() ) return false;
 
         size_t NDim = this->dimensions_->size();
@@ -306,13 +321,19 @@ namespace Gadgetron{
         return boost::shared_ptr< std::vector<size_t> >(tmp); 
     }
 
-     template <typename T> 
+    template <typename T> 
     inline void NDArray<T>::get_dimensions(std::vector<size_t>*& dim) const
     {
         dim = dimensions_.get();
     }
 
     template <typename T> 
+    inline void NDArray<T>::get_dimensions(std::vector<size_t>& dim) const
+    {
+        dim = *dimensions_;
+    }
+
+    template <typename T> 
     inline T* NDArray<T>::get_data_ptr() const
     { 
         return data_;
@@ -343,6 +364,19 @@ namespace Gadgetron{
     }
 
     template <typename T> 
+    size_t NDArray<T>::calculate_offset(const std::vector<size_t>& ind, const std::vector<size_t>& offsetFactors)
+    {
+        size_t offset = ind[0];
+
+        for( size_t i = 1; i < ind.size(); i++ )
+        {
+            offset += ind[i] * offsetFactors[i];
+        }
+
+        return offset;
+    }
+
+    template <typename T> 
     inline size_t NDArray<T>::calculate_offset(const std::vector<size_t>& ind) const
     {
         size_t offset = ind[0];
@@ -352,6 +386,15 @@ namespace Gadgetron{
     }
 
     template <typename T> 
+    inline size_t NDArray<T>::calculate_offset(const std::vector<gt_index_type>& ind) const
+    {
+        size_t offset = (size_t)(ind[0]);
+        for( size_t i = 1; i < dimensions_->size(); i++ )
+            offset += (size_t)(ind[i]) * (*offsetFactors_)[i];
+        return offset;
+    }
+
+    template <typename T> 
     inline size_t NDArray<T>::calculate_offset(size_t x, size_t y) const
     {
         GADGET_DEBUG_CHECK_THROW(dimensions_->size()==2);
@@ -416,6 +459,12 @@ namespace Gadgetron{
     }
 
     template <typename T> 
+    inline void NDArray<T>::get_offset_factor(std::vector<size_t>& offset) const
+    {
+        offset=*offsetFactors_;
+    }
+
+    template <typename T> 
     inline size_t NDArray<T>::get_offset_factor_lastdim() const
     {
         if( dimensions_->size() == 0 )
@@ -433,6 +482,22 @@ namespace Gadgetron{
     }
 
     template <typename T> 
+    void NDArray<T>::calculate_offset_factors(const std::vector<size_t>& dimensions, std::vector<size_t>& offsetFactors)
+    {
+        offsetFactors.resize(dimensions.size());
+        for( size_t i = 0; i < dimensions.size(); i++ )
+        {
+            size_t k = 1;
+            for( size_t j = 0; j < i; j++ )
+            {
+                k *= dimensions[j];
+            }
+
+            offsetFactors[i] = k;
+        }
+    }
+
+    template <typename T> 
     inline void NDArray<T>::calculate_offset_factors(const std::vector<size_t>& dimensions)
     {
         if ( offsetFactors_.get() == NULL ){
@@ -476,6 +541,18 @@ namespace Gadgetron{
     }
 
     template <typename T> 
+    void NDArray<T>::calculate_index( size_t offset, const std::vector<size_t>& offsetFactors, std::vector<size_t>& index )
+    {
+        index.resize(offsetFactors.size(), 0);
+
+        for( long long i = offsetFactors.size()-1; i>=0; i-- )
+        {
+            index[i] = offset / offsetFactors[i];
+            offset %= offsetFactors[i];
+        }
+    }
+
+    template <typename T> 
     void NDArray<T>::clear()
     {
         if ( this->delete_data_on_destruct_ ){
@@ -500,7 +577,7 @@ namespace Gadgetron{
     {
         size_t idx = this->calculate_offset(ind);
         GADGET_DEBUG_CHECK_THROW(idx < this->get_number_of_elements());
-        return this->get_data_ptr()[idx];
+        return this->data_[idx];
     }
 
     template <typename T> 
@@ -508,21 +585,37 @@ namespace Gadgetron{
     {
         size_t idx = this->calculate_offset(ind);
         GADGET_DEBUG_CHECK_THROW(idx < this->get_number_of_elements());
-        return this->get_data_ptr()[idx];
+        return this->data_[idx];
+    }
+
+    template <typename T> 
+    inline T& NDArray<T>::operator()( const std::vector<gt_index_type>& ind )
+    {
+        size_t idx = this->calculate_offset(ind);
+        GADGET_DEBUG_CHECK_THROW(idx < this->get_number_of_elements());
+        return this->data_[idx];
+    }
+
+    template <typename T> 
+    inline const T& NDArray<T>::operator()( const std::vector<gt_index_type>& ind ) const
+    {
+        size_t idx = this->calculate_offset(ind);
+        GADGET_DEBUG_CHECK_THROW(idx < this->get_number_of_elements());
+        return this->data_[idx];
     }
 
     template <typename T> 
     inline T& NDArray<T>::operator()( size_t x )
     {
         GADGET_DEBUG_CHECK_THROW(x < this->get_number_of_elements());
-        return this->get_data_ptr()[x];
+        return this->data_[x];
     }
 
     template <typename T> 
     inline const T& NDArray<T>::operator()( size_t x ) const
     {
         GADGET_DEBUG_CHECK_THROW(x < this->get_number_of_elements());
-        return this->get_data_ptr()[x];
+        return this->data_[x];
     }
 
     template <typename T> 
@@ -530,7 +623,7 @@ namespace Gadgetron{
     {
         size_t idx = this->calculate_offset(x, y);
         GADGET_DEBUG_CHECK_THROW(idx < this->get_number_of_elements());
-        return this->get_data_ptr()[idx];
+        return this->data_[idx];
     }
 
     template <typename T> 
@@ -538,7 +631,7 @@ namespace Gadgetron{
     {
         size_t idx = this->calculate_offset(x, y);
         GADGET_DEBUG_CHECK_THROW(idx < this->get_number_of_elements());
-        return this->get_data_ptr()[idx];
+        return this->data_[idx];
     }
 
     template <typename T> 
@@ -546,7 +639,7 @@ namespace Gadgetron{
     {
         size_t idx = this->calculate_offset(x, y, z);
         GADGET_DEBUG_CHECK_THROW(idx < this->get_number_of_elements());
-        return this->get_data_ptr()[idx];
+        return this->data_[idx];
     }
 
     template <typename T> 
@@ -554,7 +647,7 @@ namespace Gadgetron{
     {
         size_t idx = this->calculate_offset(x, y, z);
         GADGET_DEBUG_CHECK_THROW(idx < this->get_number_of_elements());
-        return this->get_data_ptr()[idx];
+        return this->data_[idx];
     }
 
     template <typename T> 
@@ -562,7 +655,7 @@ namespace Gadgetron{
     {
         size_t idx = this->calculate_offset(x, y, z, s);
         GADGET_DEBUG_CHECK_THROW(idx < this->get_number_of_elements());
-        return this->get_data_ptr()[idx];
+        return this->data_[idx];
     }
 
     template <typename T> 
@@ -570,7 +663,7 @@ namespace Gadgetron{
     {
         size_t idx = this->calculate_offset(x, y, z, s);
         GADGET_DEBUG_CHECK_THROW(idx < this->get_number_of_elements());
-        return this->get_data_ptr()[idx];
+        return this->data_[idx];
     }
 
     template <typename T> 
@@ -578,7 +671,7 @@ namespace Gadgetron{
     {
         size_t idx = this->calculate_offset(x, y, z, s, p);
         GADGET_DEBUG_CHECK_THROW(idx < this->get_number_of_elements());
-        return this->get_data_ptr()[idx];
+        return this->data_[idx];
     }
 
     template <typename T> 
@@ -586,7 +679,7 @@ namespace Gadgetron{
     {
         size_t idx = this->calculate_offset(x, y, z, s, p);
         GADGET_DEBUG_CHECK_THROW(idx < this->get_number_of_elements());
-        return this->get_data_ptr()[idx];
+        return this->data_[idx];
     }
 
     template <typename T> 
@@ -594,7 +687,7 @@ namespace Gadgetron{
     {
         size_t idx = this->calculate_offset(x, y, z, s, p, r);
         GADGET_DEBUG_CHECK_THROW(idx < this->get_number_of_elements());
-        return this->get_data_ptr()[idx];
+        return this->data_[idx];
     }
 
     template <typename T> 
@@ -602,7 +695,7 @@ namespace Gadgetron{
     {
         size_t idx = this->calculate_offset(x, y, z, s, p, r);
         GADGET_DEBUG_CHECK_THROW(idx < this->get_number_of_elements());
-        return this->get_data_ptr()[idx];
+        return this->data_[idx];
     }
 
     template <typename T> 
@@ -610,7 +703,7 @@ namespace Gadgetron{
     {
         size_t idx = this->calculate_offset(x, y, z, s, p, r, a);
         GADGET_DEBUG_CHECK_THROW(idx < this->get_number_of_elements());
-        return this->get_data_ptr()[idx];
+        return this->data_[idx];
     }
 
     template <typename T> 
@@ -618,7 +711,7 @@ namespace Gadgetron{
     {
         size_t idx = this->calculate_offset(x, y, z, s, p, r, a);
         GADGET_DEBUG_CHECK_THROW(idx < this->get_number_of_elements());
-        return this->get_data_ptr()[idx];
+        return this->data_[idx];
     }
 
     template <typename T> 
@@ -626,7 +719,7 @@ namespace Gadgetron{
     {
         size_t idx = this->calculate_offset(x, y, z, s, p, r, a, q);
         GADGET_DEBUG_CHECK_THROW(idx < this->get_number_of_elements());
-        return this->get_data_ptr()[idx];
+        return this->data_[idx];
     }
 
     template <typename T> 
@@ -634,7 +727,7 @@ namespace Gadgetron{
     {
         size_t idx = this->calculate_offset(x, y, z, s, p, r, a, q);
         GADGET_DEBUG_CHECK_THROW(idx < this->get_number_of_elements());
-        return this->get_data_ptr()[idx];
+        return this->data_[idx];
     }
 
     template <typename T> 
@@ -642,7 +735,7 @@ namespace Gadgetron{
     {
         size_t idx = this->calculate_offset(x, y, z, s, p, r, a, q, u);
         GADGET_DEBUG_CHECK_THROW(idx < this->get_number_of_elements());
-        return this->get_data_ptr()[idx];
+        return this->data_[idx];
     }
 
     template <typename T> 
@@ -650,7 +743,106 @@ namespace Gadgetron{
     {
         size_t idx = this->calculate_offset(x, y, z, s, p, r, a, q, u);
         GADGET_DEBUG_CHECK_THROW(idx < this->get_number_of_elements());
-        return this->get_data_ptr()[idx];
+        return this->data_[idx];
+    }
+
+    template <typename T> 
+    inline bool NDArray<T>::point_in_range(const std::vector<gt_index_type>& ind) const
+    {
+        unsigned int D = (*dimensions_).size();
+        if ( ind.size() != D ) return false;
+
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            if ( (ind[ii]>= (gt_index_type)(*dimensions_)[ii]) || (ind[ii]<0) )
+            {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    template <typename T> 
+    inline bool NDArray<T>::point_in_range(const std::vector<size_t>& ind) const
+    {
+        unsigned int D = (*dimensions_).size();
+        if ( ind.size() != D ) return false;
+
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            if ( ind[ii]>=(*dimensions_)[ii] )
+            {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    template <typename T> 
+    inline bool NDArray<T>::point_in_range(gt_index_type x) const
+    {
+        GADGET_DEBUG_CHECK_THROW((*dimensions_).size()==1);
+        return (x<(*dimensions_)[0] && x>=0);
+    }
+
+    template <typename T> 
+    inline bool NDArray<T>::point_in_range(gt_index_type x, gt_index_type y) const
+    {
+        GADGET_DEBUG_CHECK_THROW((*dimensions_).size()==2);
+        return ((x<(*dimensions_)[0]) && (y<(*dimensions_)[1]) && x>=0 && y>=0);
+    }
+
+    template <typename T> 
+    inline bool NDArray<T>::point_in_range(gt_index_type x, gt_index_type y, gt_index_type z) const
+    {
+        GADGET_DEBUG_CHECK_THROW((*dimensions_).size()==3);
+        return ( (x<(*dimensions_)[0]) && (y<(*dimensions_)[1]) && (z<(*dimensions_)[2]) && x>=0 && y>=0 && z>=0);
+    }
+
+    template <typename T> 
+    inline bool NDArray<T>::point_in_range(gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s) const
+    {
+        GADGET_DEBUG_CHECK_THROW((*dimensions_).size()==4);
+        return ( (x<(*dimensions_)[0]) && (y<(*dimensions_)[1]) && (z<(*dimensions_)[2]) && (s<(*dimensions_)[3]) && x>=0 && y>=0 && z>=0 && s>=0);
+    }
+
+    template <typename T> 
+    inline bool NDArray<T>::point_in_range(gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p) const
+    {
+        GADGET_DEBUG_CHECK_THROW((*dimensions_).size()==5);
+        return ( (x<(*dimensions_)[0]) && (y<(*dimensions_)[1]) && (z<(*dimensions_)[2]) && (s<(*dimensions_)[3]) && (p<(*dimensions_)[4]) && x>=0 && y>=0 && z>=0 && s>=0 && p>=0);
+    }
+
+    template <typename T> 
+    inline bool NDArray<T>::point_in_range(gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r) const
+    {
+        GADGET_DEBUG_CHECK_THROW((*dimensions_).size()==6);
+        return ( (x<(*dimensions_)[0]) && (y<(*dimensions_)[1]) && (z<(*dimensions_)[2]) && (s<(*dimensions_)[3]) && (p<(*dimensions_)[4]) && (r<(*dimensions_)[5]) && x>=0 && y>=0 && z>=0 && s>=0 && p>=0 && r>=0);
+    }
+
+    template <typename T> 
+    inline bool NDArray<T>::point_in_range(gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r, gt_index_type a) const
+    {
+        GADGET_DEBUG_CHECK_THROW((*dimensions_).size()==7);
+        return ( (x<(*dimensions_)[0]) && (y<(*dimensions_)[1]) && (z<(*dimensions_)[2]) && (s<(*dimensions_)[3]) && (p<(*dimensions_)[4]) && (r<(*dimensions_)[5]) && (a<(*dimensions_)[6]) && x>=0 && y>=0 && z>=0 && s>=0 && p>=0 && r>=0 && a>=0);
+    }
+
+    template <typename T> 
+    inline bool NDArray<T>::point_in_range(gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r, gt_index_type a, gt_index_type q) const
+    {
+        GADGET_DEBUG_CHECK_THROW((*dimensions_).size()==8);
+        return ( (x<(*dimensions_)[0]) && (y<(*dimensions_)[1]) && (z<(*dimensions_)[2]) && (s<(*dimensions_)[3]) && (p<(*dimensions_)[4]) && (r<(*dimensions_)[5]) && (a<(*dimensions_)[6]) && (q<(*dimensions_)[7]) && x>=0 && y>=0 && z>=0 && s>=0 && p>=0 && r>=0 && a>=0 && q>=0);
+    }
+
+    template <typename T> 
+    inline bool NDArray<T>::point_in_range(gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r, gt_index_type a, gt_index_type q, gt_index_type u) const
+    {
+        GADGET_DEBUG_CHECK_THROW((*dimensions_).size()==9);
+        return ( (x<(*dimensions_)[0]) && (y<(*dimensions_)[1]) && (z<(*dimensions_)[2]) && (s<(*dimensions_)[3]) && (p<(*dimensions_)[4]) && (r<(*dimensions_)[5]) && (a<(*dimensions_)[6]) && (q<(*dimensions_)[7]) && (u<(*dimensions_)[8]) && x>=0 && y>=0 && z>=0 && s>=0 && p>=0 && r>=0 && a>=0 && q>=0 && u>=0);
     }
 }
 
diff --git a/toolboxes/core/SerializableObject.h b/toolboxes/core/SerializableObject.h
deleted file mode 100644
index 3e28e5e..0000000
--- a/toolboxes/core/SerializableObject.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/** 
-    SerializeObject is the base class for serializable objects
-*/
-
-#pragma once
-
-#include "GadgetronCommon.h"
-#include "GadgetronException.h"
-#include "cpucore_export.h"
-
-#include <complex>
-#include <iostream>
-
-namespace Gadgetron
-{
-  class SerializableObject
-  {
-  public:
-    
-    SerializableObject() {}
-    virtual ~SerializableObject() {}
-    
-    // serialize and deserialize to/from the buffer
-    virtual bool serialize(char*& buf, size_t& len) const = 0; // Should be a void function
-    virtual bool deserialize(char* buf, size_t& len) = 0; // Should be a void function
-  };  
-}
diff --git a/toolboxes/core/complext.h b/toolboxes/core/complext.h
index 943e116..60cecc5 100644
--- a/toolboxes/core/complext.h
+++ b/toolboxes/core/complext.h
@@ -137,21 +137,21 @@ namespace Gadgetron{
   template <typename T> 
   inline std::ostream & operator<< (std::ostream & os, const complext<T>& a )
   {
-    os << a.real() << a.imag() << "i";
+    os << a.real() <<' ' << a.imag() << "i";
     return os;
   }
 
   template <> 
   inline std::ostream & operator<< (std::ostream & os, const complext<float>& a )
   {
-    os << a.real() << a.imag() << "i";
+    os << a.real() <<' ' << a.imag() << "i";
     return os;
   }
 
   template <> 
   inline std::ostream & operator<< (std::ostream & os, const complext<double>& a )
   {
-    os << a.real() << a.imag() << "i";
+    os << a.real() <<' ' << a.imag() << "i";
     return os;
   }
 
@@ -159,6 +159,10 @@ namespace Gadgetron{
   typedef complext<double> double_complext;
 
   template <class T> struct realType {};
+  template<> struct realType<short> {typedef double Type; };
+  template<> struct realType<unsigned short> {typedef double Type; };
+  template<> struct realType<int> {typedef double Type; };
+  template<> struct realType<unsigned int> {typedef double Type; };
   template<> struct realType<float_complext> {typedef float Type; };
   template<> struct realType<double_complext> {typedef double Type; };
   template<> struct realType<float> {typedef float Type; };
diff --git a/toolboxes/core/cpu/CMakeLists.txt b/toolboxes/core/cpu/CMakeLists.txt
index cfc2f19..af8cf4c 100644
--- a/toolboxes/core/cpu/CMakeLists.txt
+++ b/toolboxes/core/cpu/CMakeLists.txt
@@ -1,84 +1,91 @@
 if (WIN32)
-ADD_DEFINITIONS(-D__BUILD_GADGETRON_CPUCORE__)
+    ADD_DEFINITIONS(-D__BUILD_GADGETRON_CPUCORE__)
 endif (WIN32)
 
 include_directories(
-  ${FFTW3_INCLUDE_DIR}
-  ${CMAKE_SOURCE_DIR}/toolboxes/core
-  ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu
-  )
+    ${FFTW3_INCLUDE_DIR}
+    ${ISMRMRD_INCLUDE_DIR}
+    ${CMAKE_SOURCE_DIR}/toolboxes/core
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/image
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/algorithm
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/hostutils
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/math
+    ${CMAKE_SOURCE_DIR}/apps/gadgetron
+)
 
 if(WIN32)
     link_directories(${Boost_LIBRARY_DIRS})
 endif(WIN32)
 
-if (MKL_FOUND)
-    INCLUDE_DIRECTORIES( ${MKL_INCLUDE_DIR} )
-    LINK_DIRECTORIES( ${MKL_LIB_DIR} ${MKL_COMPILER_LIB_DIR} )
-endif (MKL_FOUND)
+#if (MKL_FOUND)
+#    INCLUDE_DIRECTORIES( ${MKL_INCLUDE_DIR} )
+#    LINK_DIRECTORIES( ${MKL_LIB_DIR} ${MKL_COMPILER_LIB_DIR} )
+#endif (MKL_FOUND)
 
-add_library(cpucore ${LIBTYPE} 
-  ../NDArray.h
-  ../complext.h
-  ../GadgetronException.h
-  ../GadgetronCommon.h
-  ../GadgetronTimer.h
-  ../SerializableObject.h
-  cpucore_export.h 
-  hoNDArray.h
-  hoNDArray.hxx
-  hoNDArray_utils.h
-  hoNDArray_fileio.h
-  hoNDFFT.h
-  ho2DArray.h
-  ho2DArray.hxx
-  ho3DArray.h
-  ho3DArray.hxx
-  ho4DArray.h
-  ho4DArray.hxx
-  ho5DArray.h
-  ho5DArray.hxx
-  ho6DArray.h
-  ho6DArray.hxx
-  ho7DArray.h
-  ho7DArray.hxx 
-  hoMatrix.h
-  hoNDFFT.cpp
-  )
+set(header_files ../NDArray.h
+                ../complext.h
+                ../GadgetronException.h
+                ../GadgetronCommon.h
+                ../GadgetronTimer.h
+                cpucore_export.h 
+                hoNDArray.h
+                hoNDArray.hxx
+                hoNDObjectArray.h
+                hoNDArray_utils.h
+                hoNDArray_fileio.h
+                ho2DArray.h
+                ho2DArray.hxx
+                ho3DArray.h
+                ho3DArray.hxx
+                ho4DArray.h
+                ho4DArray.hxx
+                ho5DArray.h
+                ho5DArray.hxx
+                ho6DArray.h
+                ho6DArray.hxx
+                ho7DArray.h
+                ho7DArray.hxx 
+                hoMatrix.h
+                hoMatrix.hxx
+                hoNDPoint.h
+                hoNDBoundaryHandler.h
+                hoNDBoundaryHandler.hxx
+                hoNDInterpolator.h
+                hoNDInterpolatorNearestNeighbor.hxx
+                hoNDInterpolatorLinear.hxx
+                hoNDInterpolatorBSpline.hxx )
 
-target_link_libraries(cpucore 
-  ${FFTW3_LIBRARIES} 
-  ${Boost_LIBRARIES} 
-  ${MKL_LIBRARIES} 
-  )
+set(image_files image/hoNDImage.h 
+            image/hoNDImage.hxx 
+            image/hoNDImageContainer2D.h )
 
-install(TARGETS cpucore DESTINATION lib)
+set(algorithm_files algorithm/hoNDBSpline.h algorithm/hoNDBSpline.hxx)
 
-install(FILES
-  cpucore_export.h 
-  hoNDArray.h
-  hoNDArray.hxx
-  hoNDArray_utils.h
-  hoNDArray_fileio.h
-  hoNDFFT.h
-  ho2DArray.h
-  ho2DArray.hxx
-  ho3DArray.h
-  ho3DArray.hxx
-  ho4DArray.h
-  ho4DArray.hxx
-  ho5DArray.h
-  ho5DArray.hxx
-  ho6DArray.h
-  ho6DArray.hxx
-  ho7DArray.h
-  ho7DArray.hxx
-  hoMatrix.h
-  hoMatrix.cpp
-  DESTINATION include)
+source_group(algorithm FILES ${algorithm_files})
+source_group(image FILES ${image_files})
+
+add_library(gadgetron_toolbox_cpucore ${LIBTYPE}
+                    hoMatrix.cpp 
+                    ${header_files} 
+                    ${image_files}  
+                    ${algorithm_files} )
+
+set_target_properties(gadgetron_toolbox_cpucore  PROPERTIES VERSION ${GADGETRON_VERSION_STRING} SOVERSION ${GADGETRON_SOVERSION})
 
-if (ARMADILLO_FOUND)
-    add_subdirectory(arma_math)
-endif (ARMADILLO_FOUND)
+target_link_libraries(gadgetron_toolbox_cpucore 
+                    ${Boost_LIBRARIES})
+
+install(TARGETS gadgetron_toolbox_cpucore DESTINATION lib COMPONENT main)
+
+install(FILES
+        ${header_files}
+        image/hoNDImage.h 
+        image/hoNDImage.hxx 
+        image/hoNDImageContainer2D.h 
+        algorithm/hoNDBSpline.h
+        algorithm/hoNDBSpline.hxx 
+        DESTINATION include COMPONENT main)
 
+add_subdirectory(math)
 add_subdirectory(hostutils)
diff --git a/toolboxes/core/cpu/algorithm/hoNDBSpline.h b/toolboxes/core/cpu/algorithm/hoNDBSpline.h
new file mode 100644
index 0000000..da0dbe7
--- /dev/null
+++ b/toolboxes/core/cpu/algorithm/hoNDBSpline.h
@@ -0,0 +1,191 @@
+/** \file       hoNDBSpline.h
+    \brief      N-dimensional inteprolation BSpline implemenation
+
+                The source code is partially from http://bigwww.epfl.ch/thevenaz/interpolation/
+                by Philippe Th�venaz
+
+                References:
+
+                [1] P. Th�venaz, T. Blu, M. Unser, "Interpolation Revisited," IEEE Trans on Medical Imaging, Vol 19, 7, 739-758, July 2000.
+                [2] M. Unser, A. Aldroubi and M. Eden, "B-Spline Signal Processing: Part I--Theory," IEEE Trans on Signal Processing, Vol 41, 2, 821-832, Feb 1993.
+                [3] M. Unser, A. Aldroubi and M. Eden, "B-Spline Signal Processing: Part II--Efficient Design and Applications," IEEE Trans on Signal Processing, Vol 41, 2, 834-848, Feb 1993.
+
+    \author     Hui Xue
+*/
+
+#pragma once
+
+#include "hoNDArray.h"
+#include "hoNDImage.h"
+
+namespace Gadgetron
+{
+    template <typename T, unsigned int D>
+    class hoNDBSpline
+    {
+    public:
+
+        typedef hoNDBSpline<T, D> Self;
+
+        typedef T element_type;
+        typedef T value_type;
+        typedef float coord_type;
+
+        /// type for bspline computation, can be 'float' or 'double'
+        typedef typename realType<T>::Type bspline_float_type;
+
+        typedef hoNDArray<T> ArrayType;
+        typedef hoNDImage<T, D> ImageType;
+
+        hoNDBSpline() {}
+        ~hoNDBSpline() {}
+
+        /// compute BSpline coefficient
+        bool computeBSplineCoefficients(const hoNDArray<T>& data, unsigned int SplineDegree, hoNDArray<T>& coeff);
+        bool computeBSplineCoefficients(const hoNDImage<T, D>& data, unsigned int SplineDegree, hoNDArray<T>& coeff);
+
+        bool computeBSplineCoefficients(const T* data, const std::vector<size_t>& dimension, unsigned int SplineDegree, T* coeff);
+        bool computeBSplineCoefficients(const T* data, size_t len, unsigned int SplineDegree, T* coeff);
+        bool computeBSplineCoefficients(const T* data, size_t sx, size_t sy, unsigned int SplineDegree, T* coeff);
+        bool computeBSplineCoefficients(const T* data, size_t sx, size_t sy, size_t sz, unsigned int SplineDegree, T* coeff);
+        bool computeBSplineCoefficients(const T* data, size_t sx, size_t sy, size_t sz, size_t st, unsigned int SplineDegree, T* coeff);
+        bool computeBSplineCoefficients(const T* data, size_t sx, size_t sy, size_t sz, size_t st, size_t sp, unsigned int SplineDegree, T* coeff);
+        bool computeBSplineCoefficients(const T* data, size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, unsigned int SplineDegree, T* coeff);
+        bool computeBSplineCoefficients(const T* data, size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, unsigned int SplineDegree, T* coeff);
+        bool computeBSplineCoefficients(const T* data, size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, size_t ss, unsigned int SplineDegree, T* coeff);
+        bool computeBSplineCoefficients(const T* data, size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, size_t ss, size_t su, unsigned int SplineDegree, T* coeff);
+
+        /// evaluate BSpline
+        /// derivative: can be 0/1/2, for 0-order, first-order and second-order derivative
+
+        T evaluateBSpline(const T* coeff, const std::vector<size_t>& dimension, unsigned int SplineDegree, 
+                        const std::vector<unsigned int>& derivative, 
+                        const coord_type* pos);
+
+        T evaluateBSpline(const T* coeff, const std::vector<size_t>& dimension, unsigned int SplineDegree, 
+                        const std::vector<unsigned int>& derivative, 
+                        const std::vector<coord_type>& pos);
+
+        T evaluateBSpline(const T* coeff, size_t len, unsigned int SplineDegree, 
+                        unsigned int dx, 
+                        coord_type x);
+
+        T evaluateBSpline(const T* coeff, size_t sx, size_t sy, unsigned int SplineDegree, 
+                        unsigned int dx, unsigned int dy, 
+                        coord_type x, coord_type y);
+
+        T evaluateBSpline(const T* coeff, size_t sx, size_t sy, size_t sz, unsigned int SplineDegree, 
+                        unsigned int dx, unsigned int dy, unsigned int dz, 
+                        coord_type x, coord_type y, coord_type z);
+
+        T evaluateBSpline(const T* coeff, size_t sx, size_t sy, size_t sz, size_t st, unsigned int SplineDegree, 
+                        unsigned int dx, unsigned int dy, unsigned int dz, unsigned int dt, 
+                        coord_type x, coord_type y, coord_type z, coord_type t);
+
+        T evaluateBSpline(const T* coeff, size_t sx, size_t sy, size_t sz, size_t st, size_t sp, unsigned int SplineDegree, 
+                        unsigned int dx, unsigned int dy, unsigned int dz, unsigned int dt, unsigned int dp, 
+                        coord_type x, coord_type y, coord_type z, coord_type t, coord_type p);
+
+        T evaluateBSpline(const T* coeff, size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, unsigned int SplineDegree, 
+                        unsigned int dx, unsigned int dy, unsigned int dz, unsigned int dt, unsigned int dp, unsigned int dq, 
+                        coord_type x, coord_type y, coord_type z, coord_type t, coord_type p, coord_type q);
+
+        T evaluateBSpline(const T* coeff, size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, unsigned int SplineDegree, 
+                        unsigned int dx, unsigned int dy, unsigned int dz, unsigned int dt, unsigned int dp, unsigned int dq, unsigned int dr, 
+                        coord_type x, coord_type y, coord_type z, coord_type t, coord_type p, coord_type q, coord_type r);
+
+        T evaluateBSpline(const T* coeff, size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, size_t ss, unsigned int SplineDegree, 
+                        unsigned int dx, unsigned int dy, unsigned int dz, unsigned int dt, unsigned int dp, unsigned int dq, unsigned int dr, unsigned int ds, 
+                        coord_type x, coord_type y, coord_type z, coord_type t, coord_type p, coord_type q, coord_type r, coord_type s);
+
+        T evaluateBSpline(const T* coeff, size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, size_t ss, size_t su, unsigned int SplineDegree, 
+                        unsigned int dx, unsigned int dy, unsigned int dz, unsigned int dt, unsigned int dp, unsigned int dq, unsigned int dr, unsigned int ds, unsigned int du, 
+                        coord_type x, coord_type y, coord_type z, coord_type t, coord_type p, coord_type q, coord_type r, coord_type s, coord_type u);
+
+
+        /// evaluate BSpline with pre-computed weights
+        T evaluateBSpline(const T* coeff, size_t sx, size_t sy, unsigned int SplineDegree, 
+                        bspline_float_type* xWeight, bspline_float_type* yWeight, 
+                        coord_type x, coord_type y);
+
+        T evaluateBSpline(const T* coeff, size_t sx, size_t sy, size_t sz, unsigned int SplineDegree, 
+                        bspline_float_type* xWeight, bspline_float_type* yWeight, bspline_float_type* zWeight, 
+                        coord_type x, coord_type y, coord_type z);
+
+        T evaluateBSpline(const T* coeff, size_t sx, size_t sy, size_t sz, size_t st, unsigned int SplineDegree, 
+                        bspline_float_type* xWeight, bspline_float_type* yWeight, bspline_float_type* zWeight, bspline_float_type* tWeight, 
+                        coord_type x, coord_type y, coord_type z, coord_type t);
+
+        T evaluateBSpline(const T* coeff, const std::vector<size_t>& dimension, unsigned int SplineDegree, 
+                        bspline_float_type** weight, const coord_type* pos);
+
+        T evaluateBSpline(const T* coeff, const std::vector<size_t>& dimension, unsigned int SplineDegree, 
+                        bspline_float_type** weight, const std::vector<coord_type>& pos);
+
+        /// compute the BSpline based derivative for an ND array
+        /// derivative indicates the order of derivatives for every dimension
+        bool computeBSplineDerivative(const hoNDArray<T>& data, const hoNDArray<T>& coeff, unsigned int SplineDegree, const std::vector<unsigned int>& derivative, hoNDArray<T>& deriv);
+        bool computeBSplineDerivative(const hoNDImage<T,D>& data, const hoNDArray<T>& coeff, unsigned int SplineDegree, const std::vector<unsigned int>& derivative, hoNDImage<T,D>& deriv);
+
+        /// print out the image information
+        void print(std::ostream& os) const;
+
+    protected:
+
+        /// these BSpline coefficients paramerters are modified from http://bigwww.epfl.ch/thevenaz/interpolation/
+        static void ConvertToInterpolationCoefficients(
+                                                        T               c[],            /* input samples --> output coefficients */
+                                                        size_t          DataLength,     /* number of samples or coefficients */
+                                                        bspline_float_type          z[],            /* poles */
+                                                        long            NbPoles,        /* number of poles */
+                                                        bspline_float_type          Tolerance       /* admissible relative error */ 
+                                                      );
+
+        static T InitialCausalCoefficient(
+                                            T           c[],                /* coefficients */
+                                            size_t      DataLength,         /* number of coefficients */
+                                            bspline_float_type      z,                  /* actual pole */
+                                            bspline_float_type      Tolerance           /* admissible relative error */
+                                         );
+
+        static T InitialAntiCausalCoefficient(
+                                                T           c[],                /* coefficients */
+                                                size_t      DataLength,         /* number of samples or coefficients */
+                                                bspline_float_type      z                   /* actual pole */
+                                             );
+
+        static void Pole(bspline_float_type* pole, unsigned int SplineDegree, unsigned int& NbPoles);
+
+        /// BSpline function
+        /// this function implements the symmetrical BSpline function of SplineDegree n
+        /// Equation 2.6 of reference [2]
+        static bspline_float_type BSpline(bspline_float_type x, unsigned int SplineDegree);
+
+        /// compute the discrete BSpline value
+        /// this function is modified from the source code at http://bigwww.epfl.ch/thevenaz/interpolation/
+        static void BSplineDiscrete(bspline_float_type x, unsigned int SplineDegree, bspline_float_type* weight, long long* xIndex);
+
+        /// compute the discrete BSpline value with the first order derivative
+        static void BSplineDiscreteFirstOrderDerivative(bspline_float_type x, unsigned int SplineDegree, bspline_float_type* weight, long long* xIndex);
+        /// compute the discrete BSpline value with the second order derivative
+        static void BSplineDiscreteSecondOrderDerivative(bspline_float_type x, unsigned int SplineDegree, bspline_float_type* weight, long long* xIndex);
+
+        /// compute BSpline interpolation locations
+        /// xIndex has at least SplineDegree elements
+        static void BSplineInterpolationLocation(bspline_float_type x, unsigned int SplineDegree, long long* xIndex);
+
+        /// apply mirror boundary condition for interpolation locations
+        static void BSplineInterpolationMirrorBoundaryCondition(unsigned int SplineDegree, long long* xIndex, size_t Width);
+
+        /// compute the derivative of BSpline
+        /// first order derivative dBSpline(x, SplineDegree)/dx = BSpline(x+0.5, SplineDegree-1) - BSpline(x-0.5, SplineDegree-1)
+        static bspline_float_type BSplineFirstOrderDerivative(bspline_float_type x, unsigned int SplineDegree);
+        /// second order derivative d2BSpline(x, SplineDegree)/dx2 = BSpline(x+1, SplineDegree-2) + BSpline(x-1, SplineDegree-2) - 2*BSpline(x, SplineDegree-2)
+        static bspline_float_type BSplineSecondOrderDerivative(bspline_float_type x, unsigned int SplineDegree);
+
+        /// compute BSpline interpolation locations and weights
+        static void computeBSplineInterpolationLocationsAndWeights(size_t len, unsigned int SplineDegree, unsigned int dx, coord_type x, bspline_float_type* weight, long long* xIndex);
+    };
+}
+
+#include "hoNDBSpline.hxx"
diff --git a/toolboxes/core/cpu/algorithm/hoNDBSpline.hxx b/toolboxes/core/cpu/algorithm/hoNDBSpline.hxx
new file mode 100644
index 0000000..77dd1a0
--- /dev/null
+++ b/toolboxes/core/cpu/algorithm/hoNDBSpline.hxx
@@ -0,0 +1,2133 @@
+/** \file       hoNDBSpline.hxx
+    \brief      Implementation of N-dimensional BSpline class for gadgetron
+    \author     Hui Xue
+*/
+
+#include "hoNDBSpline.h"
+
+namespace Gadgetron
+{
+    template <typename T, unsigned int D> 
+    bool hoNDBSpline<T, D>::computeBSplineCoefficients(const hoNDArray<T>& data, unsigned int SplineDegree, hoNDArray<T>& coeff)
+    {
+        size_t NDim = data.get_number_of_dimensions();
+
+        if ( !coeff.dimensions_equal(&data) )
+        {
+            coeff = data;
+        }
+
+        bool res;
+        switch (NDim)
+        {
+            case 1:
+            res = this->computeBSplineCoefficients(data.begin(), data.get_size(0), SplineDegree, coeff.begin());
+                break;
+
+            case 2:
+            res = this->computeBSplineCoefficients(data.begin(), data.get_size(0), data.get_size(1), SplineDegree, coeff.begin());
+                break;
+
+            case 3:
+            res = this->computeBSplineCoefficients(data.begin(), data.get_size(0), data.get_size(1), data.get_size(2), SplineDegree, coeff.begin());
+                break;
+
+            case 4:
+            res = this->computeBSplineCoefficients(data.begin(), data.get_size(0), data.get_size(1), data.get_size(2), data.get_size(3), SplineDegree, coeff.begin());
+                break;
+
+            case 5:
+            res = this->computeBSplineCoefficients(data.begin(), data.get_size(0), data.get_size(1), data.get_size(2), data.get_size(3), data.get_size(4), SplineDegree, coeff.begin());
+                break;
+
+            case 6:
+            res = this->computeBSplineCoefficients(data.begin(), data.get_size(0), data.get_size(1), data.get_size(2), data.get_size(3), data.get_size(4), data.get_size(5), SplineDegree, coeff.begin());
+                break;
+
+            case 7:
+            res = this->computeBSplineCoefficients(data.begin(), data.get_size(0), data.get_size(1), data.get_size(2), data.get_size(3), data.get_size(4), data.get_size(5), data.get_size(6), SplineDegree, coeff.begin());
+                break;
+
+            case 8:
+            res = this->computeBSplineCoefficients(data.begin(), data.get_size(0), data.get_size(1), data.get_size(2), data.get_size(3), data.get_size(4), data.get_size(5), data.get_size(6), data.get_size(7), SplineDegree, coeff.begin());
+                break;
+
+            case 9:
+            res = this->computeBSplineCoefficients(data.begin(), data.get_size(0), data.get_size(1), data.get_size(2), data.get_size(3), data.get_size(4), data.get_size(5), data.get_size(6), data.get_size(7), data.get_size(8), SplineDegree, coeff.begin());
+                break;
+
+            default:
+                boost::shared_ptr< std::vector<size_t> > dim = data.get_dimensions();
+                res = this->computeBSplineCoefficients(data.begin(), *dim, SplineDegree, coeff.begin());
+        }
+
+        return res;
+    }
+
+    template <typename T, unsigned int D> 
+    inline bool hoNDBSpline<T, D>::computeBSplineCoefficients(const hoNDImage<T, D>& data, unsigned int SplineDegree, hoNDArray<T>& coeff)
+    {
+        std::vector<size_t> dim;
+        data.get_dimensions(dim);
+        hoNDArray<T> dataTmp(dim, const_cast<T*>(data.begin()), false);
+        return this->computeBSplineCoefficients(dataTmp, SplineDegree, coeff);
+    }
+
+    template <typename T, unsigned int D> 
+    bool hoNDBSpline<T, D>::computeBSplineCoefficients(const T* data, const std::vector<size_t>& dimension, unsigned int SplineDegree, T* coeff)
+    {
+        try
+        {
+            unsigned int NbPoles;
+            bspline_float_type pole[4];
+            this->Pole(pole, SplineDegree, NbPoles);
+
+            GADGET_CHECK_RETURN_FALSE(D==dimension.size());
+
+            hoNDArray<T> coeffBuf( const_cast<std::vector<size_t>&>(dimension), coeff, false);
+            memcpy(coeff, data, sizeof(T)*coeffBuf.get_number_of_elements());
+
+            unsigned int d;
+            for ( d=0; d<D; d++ )
+            {
+                long long ii;
+
+                size_t len = dimension[d];
+                size_t num = coeffBuf.get_number_of_elements()/len;
+
+                size_t i;
+                std::vector<size_t> dimUsed(D-1);
+                for( i = 0; i<D; i++ )
+                {
+                    if ( i < d)
+                    {
+                        dimUsed[i] = dimension[i];
+                    }
+                    else if ( i > d )
+                    {
+                        dimUsed[i-1] = dimension[i];
+                    }
+                }
+
+                std::vector<size_t> offsetFactor(D-1, 1);
+                hoNDArray<T>::calculate_offset_factors(dimUsed, offsetFactor);
+
+                //for( i = 0; i<D-1; i++ )
+                //{
+                //    size_t k = 1;
+                //    for( j = 0; j < i; j++ )
+                //    {
+                //        k *= dimUsed[j];
+                //    }
+                //    offsetFactor[i] = k;
+                //}
+
+                #pragma omp parallel default(none) private(ii) shared(coeff, coeffBuf, pole, NbPoles, dimension, num, len, offsetFactor, d)
+                {
+                    T* buf = new T[ len ];
+
+                    std::vector<size_t> ind(D, 0);
+                    std::vector<size_t> indUsed(D-1, 0);
+
+                    #pragma omp for 
+                    for ( ii=0; ii<num; ii++ )
+                    {
+                        if ( d == 0 )
+                        {
+                            memcpy(buf, coeff+ii*len, sizeof(T)*len);
+
+                            this->ConvertToInterpolationCoefficients(buf, len, pole, NbPoles, DBL_EPSILON);
+
+                            memcpy(coeff+ii*len, buf, sizeof(T)*len);
+                        }
+                        else
+                        {
+                            hoNDArray<T>::calculate_index(ii, offsetFactor, indUsed);
+
+                            long long i;
+
+                            //size_t offset = ii;
+                            //for( i=D-2; i>=0; i-- )
+                            //{
+                            //    indUsed[i] = offset / offsetFactor[i];
+                            //    offset %= offsetFactor[i];
+                            //}
+
+                            for ( i=0; i<D; i++ )
+                            {
+                                if ( i < d )
+                                {
+                                    ind[i] = indUsed[i];
+                                }
+                                else if ( i > d )
+                                {
+                                    ind[i] = indUsed[i-1];
+                                }
+                            }
+
+                            for ( i=0; i<len; i++ )
+                            {
+                                ind[d] = i;
+                                buf[i] = coeffBuf(ind);
+                            }
+
+                            this->ConvertToInterpolationCoefficients(buf, len, pole, NbPoles, DBL_EPSILON);
+
+                            for ( i=0; i<len; i++ )
+                            {
+                                ind[d] = i;
+                                coeffBuf(ind) = buf[i];
+                            }
+                        }
+                    }
+
+                    delete [] buf;
+                }
+            }
+
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Error happened in hoNDBSpline<T, D>::computeBSplineCoefficients(const T* data, const std::vector<size_t>& dimension, unsigned int SplineDegree, T* coeff) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename T, unsigned int D> 
+    inline bool hoNDBSpline<T, D>::computeBSplineCoefficients(const T* data, size_t len, unsigned int SplineDegree, T* coeff)
+    {
+        try
+        {
+            unsigned int NbPoles;
+            bspline_float_type pole[4];
+            this->Pole(pole, SplineDegree, NbPoles);
+
+            memcpy(coeff, data, sizeof(T)*len);
+            this->ConvertToInterpolationCoefficients(coeff, len, pole, NbPoles, DBL_EPSILON);
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Error happened in hoNDBSpline<T, D>::computeBSplineCoefficients(const T* data, size_t len, unsigned int SplineDegree, T* coeff) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename T, unsigned int D> 
+    bool hoNDBSpline<T, D>::computeBSplineCoefficients(const T* data, size_t sx, size_t sy, unsigned int SplineDegree, T* coeff)
+    {
+        try
+        {
+            unsigned int NbPoles;
+            bspline_float_type pole[4];
+            this->Pole(pole, SplineDegree, NbPoles);
+
+            // x
+            long long y;
+            #pragma omp parallel default(none) private(y) shared(data, coeff, pole, NbPoles, sx, sy)
+            {
+                T* buf = new T[sx];
+
+                #pragma omp for 
+                for ( y=0; y<sy; y++ )
+                {
+                    memcpy(buf, data+y*sx, sizeof(T)*sx);
+
+                    this->ConvertToInterpolationCoefficients(buf, sx, pole, NbPoles, DBL_EPSILON);
+
+                    memcpy(coeff+y*sx, buf, sizeof(T)*sx);
+                }
+
+                delete [] buf;
+            }
+
+            // y
+            long long x;
+            #pragma omp parallel default(none) private(x) shared(data, coeff, pole, NbPoles, sx, sy)
+            {
+                T* buf = new T[sy];
+
+                #pragma omp for 
+                for ( x=0; x<sx; x++ )
+                {
+                    size_t y;
+
+                    for ( y=0; y<sy; y++ )
+                    {
+                        buf[y] = coeff[x + y*sx];
+                    }
+
+                    this->ConvertToInterpolationCoefficients(buf, sy, pole, NbPoles, DBL_EPSILON);
+
+                    for ( y=0; y<sy; y++ )
+                    {
+                        coeff[x + y*sx] = buf[y];
+                    }
+                }
+
+                delete [] buf;
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Error happened in hoNDBSpline<T, D>::computeBSplineCoefficients(const T* data, size_t sx, size_t sy, unsigned int SplineDegree, T* coeff) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename T, unsigned int D> 
+    bool hoNDBSpline<T, D>::computeBSplineCoefficients(const T* data, size_t sx, size_t sy, size_t sz, unsigned int SplineDegree, T* coeff)
+    {
+        try
+        {
+            unsigned int NbPoles;
+            bspline_float_type pole[4];
+            this->Pole(pole, SplineDegree, NbPoles);
+
+            // x
+            long long z;
+            #pragma omp parallel default(none) private(z) shared(data, coeff, pole, NbPoles, sx, sy, sz)
+            {
+                T* buf = new T[sx];
+
+                #pragma omp for 
+                for ( z=0; z<sz; z++ )
+                {
+                    for ( size_t y=0; y<sy; y++ )
+                    {
+                        memcpy(buf, data+z*sx*sy+y*sx, sizeof(T)*sx);
+
+                        this->ConvertToInterpolationCoefficients(buf, sx, pole, NbPoles, DBL_EPSILON);
+
+                        memcpy(coeff+z*sx*sy+y*sx, buf, sizeof(T)*sx);
+                    }
+                }
+
+                delete [] buf;
+            }
+
+            // y
+            #pragma omp parallel default(none) private(z) shared(data, coeff, pole, NbPoles, sx, sy, sz)
+            {
+                T* buf = new T[sy];
+
+                #pragma omp for 
+                for ( z=0; z<sz; z++ )
+                {
+                    for ( size_t x=0; x<sx; x++ )
+                    {
+                        size_t y;
+
+                        size_t offset = x + z*sx*sy;
+
+                        for ( y=0; y<sy; y++ )
+                        {
+                            buf[y] = coeff[offset + y*sx];
+                        }
+
+                        this->ConvertToInterpolationCoefficients(buf, sy, pole, NbPoles, DBL_EPSILON);
+
+                        for ( y=0; y<sy; y++ )
+                        {
+                            coeff[offset + y*sx] = buf[y];
+                        }
+                    }
+                }
+
+                delete [] buf;
+            }
+
+            // z
+            long long x;
+            #pragma omp parallel default(none) private(x) shared(data, coeff, pole, NbPoles, sx, sy, sz)
+            {
+                T* buf = new T[sz];
+
+                #pragma omp for 
+                for ( x=0; x<sx; x++ )
+                {
+                    for ( size_t y=0; y<sy; y++ )
+                    {
+                        size_t z;
+                        size_t offset = x + y*sx;
+
+                        for ( z=0; z<sz; z++ )
+                        {
+                            buf[z] = coeff[offset + z*sx*sy];
+                        }
+
+                        this->ConvertToInterpolationCoefficients(buf, sz, pole, NbPoles, DBL_EPSILON);
+
+                        for ( z=0; z<sz; z++ )
+                        {
+                            coeff[offset + z*sx*sy] = buf[z];
+                        }
+                    }
+                }
+
+                delete [] buf;
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Error happened in hoNDBSpline<T, D>::computeBSplineCoefficients(const T* data, size_t sx, size_t sy, size_t sz, unsigned int SplineDegree, T* coeff) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename T, unsigned int D> 
+    bool hoNDBSpline<T, D>::computeBSplineCoefficients(const T* data, size_t sx, size_t sy, size_t sz, size_t st, unsigned int SplineDegree, T* coeff)
+    {
+        try
+        {
+            unsigned int NbPoles;
+            bspline_float_type pole[4];
+            this->Pole(pole, SplineDegree, NbPoles);
+
+            long long x, y, z, t;
+
+            // x
+            #pragma omp parallel default(none) private(y, z, t) shared(data, coeff, pole, NbPoles, sx, sy, sz, st)
+            {
+                T* buf = new T[sx];
+
+                #pragma omp for 
+                for ( t=0; t<st; t++ )
+                {
+                    for ( z=0; z<sz; z++ )
+                    {
+                        for ( y=0; y<sy; y++ )
+                        {
+                            memcpy(buf, data+t*sx*sy*sz+z*sx*sy+y*sx, sizeof(T)*sx);
+
+                            this->ConvertToInterpolationCoefficients(buf, sx, pole, NbPoles, DBL_EPSILON);
+
+                            memcpy(coeff+t*sx*sy*sz+z*sx*sy+y*sx, buf, sizeof(T)*sx);
+                        }
+                    }
+                }
+
+                delete [] buf;
+            }
+
+            // y
+            #pragma omp parallel default(none) private(x, y, z, t) shared(data, coeff, pole, NbPoles, sx, sy, sz, st)
+            {
+                T* buf = new T[sy];
+
+                #pragma omp for 
+                for ( t=0; t<st; t++ )
+                {
+                    for ( z=0; z<sz; z++ )
+                    {
+                        for ( x=0; x<sx; x++ )
+                        {
+                            size_t offset = x + z*sx*sy + t*sx*sy*sz;
+
+                            for ( y=0; y<sy; y++ )
+                            {
+                                buf[y] = coeff[offset + y*sx];
+                            }
+
+                            this->ConvertToInterpolationCoefficients(buf, sy, pole, NbPoles, DBL_EPSILON);
+
+                            for ( y=0; y<sy; y++ )
+                            {
+                                coeff[offset + y*sx] = buf[y];
+                            }
+                        }
+                    }
+                }
+
+                delete [] buf;
+            }
+
+            // z
+            #pragma omp parallel default(none) private(x, y, z, t) shared(data, coeff, pole, NbPoles, sx, sy, sz, st)
+            {
+                T* buf = new T[sz];
+
+                #pragma omp for 
+                for ( t=0; t<st; t++ )
+                {
+                    for ( x=0; x<sx; x++ )
+                    {
+                        for ( y=0; y<sy; y++ )
+                        {
+                            size_t offset = x + y*sx + t*sx*sy*sz;
+
+                            for ( z=0; z<sz; z++ )
+                            {
+                                buf[z] = coeff[offset + z*sx*sy];
+                            }
+
+                            this->ConvertToInterpolationCoefficients(buf, sz, pole, NbPoles, DBL_EPSILON);
+
+                            for ( z=0; z<sz; z++ )
+                            {
+                                coeff[offset + z*sx*sy] = buf[z];
+                            }
+                        }
+                    }
+                }
+
+                delete [] buf;
+            }
+
+            // t
+            #pragma omp parallel default(none) private(x, y, z, t) shared(data, coeff, pole, NbPoles, sx, sy, sz, st)
+            {
+                T* buf = new T[st];
+
+                #pragma omp for 
+                for ( x=0; x<sx; x++ )
+                {
+                    for ( y=0; y<sy; y++ )
+                    {
+                        for ( z=0; z<sz; z++ )
+                        {
+                            size_t offset = x + y*sx + z*sx*sy;
+
+                            for ( t=0; t<st; t++ )
+                            {
+                                buf[t] = coeff[offset + t*sx*sy*sz];
+                            }
+
+                            this->ConvertToInterpolationCoefficients(buf, st, pole, NbPoles, DBL_EPSILON);
+
+                            for ( t=0; t<st; t++ )
+                            {
+                                coeff[offset + t*sx*sy*sz] = buf[t];
+                            }
+                        }
+                    }
+                }
+
+                delete [] buf;
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Error happened in hoNDBSpline<T, D>::computeBSplineCoefficients(const T* data, size_t sx, size_t sy, size_t sz, unsigned int SplineDegree, T* coeff) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename T, unsigned int D> 
+    bool hoNDBSpline<T, D>::computeBSplineCoefficients(const T* data, size_t sx, size_t sy, size_t sz, size_t st, size_t sp, unsigned int SplineDegree, T* coeff)
+    {
+        try
+        {
+            unsigned int NbPoles;
+            bspline_float_type pole[4];
+            this->Pole(pole, SplineDegree, NbPoles);
+
+            long long x, y, z, t, p;
+
+            // x
+            #pragma omp parallel default(none) private(y, z, t, p) shared(data, coeff, pole, NbPoles, sx, sy, sz, st, sp)
+            {
+                T* buf = new T[sx];
+
+                #pragma omp for 
+                for ( p=0; p<sp; p++ )
+                {
+                    for ( t=0; t<st; t++ )
+                    {
+                        for ( z=0; z<sz; z++ )
+                        {
+                            for ( y=0; y<sy; y++ )
+                            {
+                                memcpy(buf, data+p*sx*sy*sz*st+t*sx*sy*sz+z*sx*sy+y*sx, sizeof(T)*sx);
+
+                                this->ConvertToInterpolationCoefficients(buf, sx, pole, NbPoles, DBL_EPSILON);
+
+                                memcpy(coeff+p*sx*sy*sz*st+t*sx*sy*sz+z*sx*sy+y*sx, buf, sizeof(T)*sx);
+                            }
+                        }
+                    }
+                }
+
+                delete [] buf;
+            }
+
+            // y
+            #pragma omp parallel default(none) private(x, y, z, t, p) shared(data, coeff, pole, NbPoles, sx, sy, sz, st, sp)
+            {
+                T* buf = new T[sy];
+
+                #pragma omp for 
+                for ( p=0; p<sp; p++ )
+                {
+                    for ( t=0; t<st; t++ )
+                    {
+                        for ( z=0; z<sz; z++ )
+                        {
+                            for ( x=0; x<sx; x++ )
+                            {
+                                size_t offset = x + z*sx*sy + t*sx*sy*sz + p*sx*sy*sz*st;
+
+                                for ( y=0; y<sy; y++ )
+                                {
+                                    buf[y] = coeff[offset + y*sx];
+                                }
+
+                                this->ConvertToInterpolationCoefficients(buf, sy, pole, NbPoles, DBL_EPSILON);
+
+                                for ( y=0; y<sy; y++ )
+                                {
+                                    coeff[offset + y*sx] = buf[y];
+                                }
+                            }
+                        }
+                    }
+                }
+
+                delete [] buf;
+            }
+
+            // z
+            #pragma omp parallel default(none) private(x, y, z, t, p) shared(data, coeff, pole, NbPoles, sx, sy, sz, st, sp)
+            {
+                T* buf = new T[sz];
+
+                #pragma omp for 
+                for ( p=0; p<sp; p++ )
+                {
+                    for ( t=0; t<st; t++ )
+                    {
+                        for ( x=0; x<sx; x++ )
+                        {
+                            for ( y=0; y<sy; y++ )
+                            {
+                                size_t offset = x + y*sx + t*sx*sy*sz + p*sx*sy*sz*st;
+
+                                for ( z=0; z<sz; z++ )
+                                {
+                                    buf[z] = coeff[offset + z*sx*sy];
+                                }
+
+                                this->ConvertToInterpolationCoefficients(buf, sz, pole, NbPoles, DBL_EPSILON);
+
+                                for ( z=0; z<sz; z++ )
+                                {
+                                    coeff[offset + z*sx*sy] = buf[z];
+                                }
+                            }
+                        }
+                    }
+                }
+
+                delete [] buf;
+            }
+
+            // t
+            #pragma omp parallel default(none) private(x, y, z, t, p) shared(data, coeff, pole, NbPoles, sx, sy, sz, st, sp)
+            {
+                T* buf = new T[st];
+
+                #pragma omp for 
+                for ( p=0; p<sp; p++ )
+                {
+                    for ( x=0; x<sx; x++ )
+                    {
+                        for ( y=0; y<sy; y++ )
+                        {
+                            for ( z=0; z<sz; z++ )
+                            {
+                                size_t offset = x + y*sx + z*sx*sy + p*sx*sy*sz*st;
+
+                                for ( t=0; t<st; t++ )
+                                {
+                                    buf[t] = coeff[offset + t*sx*sy*sz];
+                                }
+
+                                this->ConvertToInterpolationCoefficients(buf, st, pole, NbPoles, DBL_EPSILON);
+
+                                for ( t=0; t<st; t++ )
+                                {
+                                    coeff[offset + t*sx*sy*sz] = buf[t];
+                                }
+                            }
+                        }
+                    }
+                }
+
+                delete [] buf;
+            }
+
+            // p
+            #pragma omp parallel default(none) private(x, y, z, t, p) shared(data, coeff, pole, NbPoles, sx, sy, sz, st, sp)
+            {
+                T* buf = new T[sp];
+
+                #pragma omp for 
+                for ( x=0; x<sx; x++ )
+                {
+                    for ( y=0; y<sy; y++ )
+                    {
+                        for ( z=0; z<sz; z++ )
+                        {
+                            for ( t=0; t<st; t++ )
+                            {
+                                size_t offset = x + y*sx + z*sx*sy + t*sx*sy*sz;
+
+                                for ( p=0; p<sp; p++ )
+                                {
+                                    buf[t] = coeff[offset + p*sx*sy*sz*st];
+                                }
+
+                                this->ConvertToInterpolationCoefficients(buf, sp, pole, NbPoles, DBL_EPSILON);
+
+                                for ( p=0; p<sp; p++ )
+                                {
+                                    coeff[offset + p*sx*sy*sz*st] = buf[t];
+                                }
+                            }
+                        }
+                    }
+                }
+
+                delete [] buf;
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Error happened in hoNDBSpline<T, D>::computeBSplineCoefficients(const T* data, size_t sx, size_t sy, size_t sz, size_t st, size_t sp, unsigned int SplineDegree, T* coeff) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename T, unsigned int D> 
+    inline bool hoNDBSpline<T, D>::computeBSplineCoefficients(const T* data, size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, unsigned int SplineDegree, T* coeff)
+    {
+        std::vector<size_t> dim(6);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        dim[3] = st;
+        dim[4] = sp;
+        dim[5] = sq;
+
+        return this->computeBSplineCoefficients(data, dim, SplineDegree, coeff);
+    }
+
+    template <typename T, unsigned int D> 
+    inline bool hoNDBSpline<T, D>::computeBSplineCoefficients(const T* data, size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, unsigned int SplineDegree, T* coeff)
+    {
+        std::vector<size_t> dim(7);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        dim[3] = st;
+        dim[4] = sp;
+        dim[5] = sq;
+        dim[6] = sr;
+
+        return this->computeBSplineCoefficients(data, dim, SplineDegree, coeff);
+    }
+
+    template <typename T, unsigned int D> 
+    inline bool hoNDBSpline<T, D>::computeBSplineCoefficients(const T* data, size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, size_t ss, unsigned int SplineDegree, T* coeff)
+    {
+        std::vector<size_t> dim(8);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        dim[3] = st;
+        dim[4] = sp;
+        dim[5] = sq;
+        dim[6] = sr;
+        dim[7] = ss;
+
+        return this->computeBSplineCoefficients(data, dim, SplineDegree, coeff);
+    }
+
+    template <typename T, unsigned int D> 
+    inline bool hoNDBSpline<T, D>::computeBSplineCoefficients(const T* data, size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, size_t ss, size_t su, unsigned int SplineDegree, T* coeff)
+    {
+        std::vector<size_t> dim(9);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        dim[3] = st;
+        dim[4] = sp;
+        dim[5] = sq;
+        dim[6] = sr;
+        dim[7] = ss;
+        dim[8] = su;
+
+        return this->computeBSplineCoefficients(data, dim, SplineDegree, coeff);
+    }
+
+    template <typename T, unsigned int D> 
+    inline T hoNDBSpline<T, D>::evaluateBSpline(const T* coeff, const std::vector<size_t>& dimension, unsigned int SplineDegree, 
+                                                const std::vector<unsigned int>& derivative, const coord_type* pos)
+    {
+        if ( D!=dimension.size() )
+        {
+            GADGET_ERROR_MSG("D!=dimension.get_number_of_dimensions()");
+            return T(0);
+        }
+
+        bspline_float_type weight[D][10];
+        long long index[D][10];
+
+        unsigned int ii, jj;
+        for ( ii=0; ii<D; ii++ )
+        {
+            computeBSplineInterpolationLocationsAndWeights(dimension[ii], SplineDegree, derivative[ii], pos[ii], weight[ii], index[ii]);
+        }
+
+        std::vector<size_t> splineDimension(D, SplineDegree);
+        std::vector<size_t> splineInd(D, 0);
+        std::vector<size_t> coeffInd(D, 0);
+
+        std::vector<size_t> offsetFactors(D, 0);
+        hoNDArray<T>::calculate_offset_factors(splineDimension, offsetFactors);
+
+        std::vector<size_t> coeffOffsetFactors(D, 0);
+        hoNDArray<T>::calculate_offset_factors(dimension, coeffOffsetFactors);
+
+        unsigned int num = (unsigned int)std::pow( (double)SplineDegree, (double)D);
+
+        T res = 0;
+
+        for ( ii=0; ii<num; ii++ )
+        {
+            hoNDArray<T>::calculate_index(ii, offsetFactors, splineInd);
+
+            for ( jj=0; jj<D; jj++ )
+            {
+                coeffInd[jj] = index[jj][ splineInd[jj] ];
+            }
+
+            size_t offset = hoNDArray<T>::calculate_offset(coeffInd, coeffOffsetFactors);
+
+            T v = coeff[offset];
+
+            for ( jj=0; jj<D; jj++ )
+            {
+                v *= weight[jj][ splineInd[jj] ];
+            }
+
+            res += v;
+        }
+
+        return res;
+    }
+
+    template <typename T, unsigned int D> 
+    inline T hoNDBSpline<T, D>::evaluateBSpline(const T* coeff, const std::vector<size_t>& dimension, unsigned int SplineDegree, 
+                                                const std::vector<unsigned int>& derivative, const std::vector<coord_type>& pos)
+    {
+        return this->evaluateBSpline(coeff, dimension, SplineDegree, derivative, &pos[0]);
+    }
+
+    template <typename T, unsigned int D> 
+    inline T hoNDBSpline<T, D>::evaluateBSpline(const T* coeff, size_t len, unsigned int SplineDegree, 
+                                                unsigned int dx, 
+                                                coord_type x)
+    {
+        bspline_float_type xWeight[10];
+        long long xIndex[10];
+
+        computeBSplineInterpolationLocationsAndWeights(len, SplineDegree, dx, x, xWeight, xIndex);
+
+        T res=0;
+        unsigned int ix;
+        for ( ix=0; ix<SplineDegree; ix++ )
+        {
+            res += coeff[ xIndex[ix] ] * xWeight[ix];
+        }
+
+        return res;
+    }
+
+    template <typename T, unsigned int D> 
+    inline T hoNDBSpline<T, D>::evaluateBSpline(const T* coeff, size_t sx, size_t sy, unsigned int SplineDegree, 
+                                                unsigned int dx, unsigned int dy, 
+                                                coord_type x, coord_type y)
+    {
+        bspline_float_type xWeight[10];
+        long long xIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(sx, SplineDegree, dx, x, xWeight, xIndex);
+
+        bspline_float_type yWeight[10];
+        long long yIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(sy, SplineDegree, dy, y, yWeight, yIndex);
+
+        T res=0;
+
+        unsigned int ix, iy;
+        for ( iy=0; iy<SplineDegree; iy++ )
+        {
+            for ( ix=0; ix<SplineDegree; ix++ )
+            {
+                res += coeff[ xIndex[ix] + sx*yIndex[iy] ] * xWeight[ix] * yWeight[iy];
+            }
+        }
+
+        return res;
+    }
+
+
+    template <typename T, unsigned int D> 
+    inline T hoNDBSpline<T, D>::evaluateBSpline(const T* coeff, size_t sx, size_t sy, size_t sz, unsigned int SplineDegree, 
+                                                unsigned int dx, unsigned int dy, unsigned int dz, 
+                                                coord_type x, coord_type y, coord_type z)
+    {
+        bspline_float_type xWeight[10];
+        long long xIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(sx, SplineDegree, dx, x, xWeight, xIndex);
+
+        bspline_float_type yWeight[10];
+        long long yIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(sy, SplineDegree, dy, y, yWeight, yIndex);
+
+        bspline_float_type zWeight[10];
+        long long zIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(sz, SplineDegree, dz, z, zWeight, zIndex);
+
+        T res=0;
+
+        unsigned int ix, iy, iz;
+        for ( iz=0; iz<SplineDegree; iz++ )
+        {
+            for ( iy=0; iy<SplineDegree; iy++ )
+            {
+                long long offset = yIndex[iy]*sx + zIndex[iz]*sx*sy;
+
+                for ( ix=0; ix<SplineDegree; ix++ )
+                {
+                    res += coeff[ xIndex[ix] + offset ] 
+                        * xWeight[ix] * yWeight[iy] * zWeight[iz];
+                }
+            }
+        }
+
+        return res;
+    }
+
+    template <typename T, unsigned int D> 
+    inline T hoNDBSpline<T, D>::evaluateBSpline(const T* coeff, size_t sx, size_t sy, size_t sz, size_t st, unsigned int SplineDegree, 
+                                                unsigned int dx, unsigned int dy, unsigned int dz, unsigned int dt, 
+                                                coord_type x, coord_type y, coord_type z, coord_type t)
+    {
+        bspline_float_type xWeight[10];
+        long long xIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(sx, SplineDegree, dx, x, xWeight, xIndex);
+
+        bspline_float_type yWeight[10];
+        long long yIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(sy, SplineDegree, dy, y, yWeight, yIndex);
+
+        bspline_float_type zWeight[10];
+        long long zIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(sz, SplineDegree, dz, z, zWeight, zIndex);
+
+        bspline_float_type tWeight[10];
+        long long tIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(st, SplineDegree, dt, t, tWeight, tIndex);
+
+        T res=0;
+
+        unsigned int ix, iy, iz, it;
+        for ( it=0; it<SplineDegree; it++ )
+        {
+            for ( iz=0; iz<SplineDegree; iz++ )
+            {
+                for ( iy=0; iy<SplineDegree; iy++ )
+                {
+                    long long offset = yIndex[iy]*sx + zIndex[iz]*sx*sy + tIndex[it]*sx*sy*sz;
+
+                    for ( ix=0; ix<SplineDegree; ix++ )
+                    {
+                        res += coeff[ xIndex[ix] + offset ] 
+                            * xWeight[ix] * yWeight[iy] * zWeight[iz] * tWeight[it];
+                    }
+                }
+            }
+        }
+
+        return res;
+    }
+
+    template <typename T, unsigned int D> 
+    inline T hoNDBSpline<T, D>::evaluateBSpline(const T* coeff, size_t sx, size_t sy, size_t sz, size_t st, size_t sp, unsigned int SplineDegree, 
+                                                unsigned int dx, unsigned int dy, unsigned int dz, unsigned int dt, unsigned int dp, 
+                                                coord_type x, coord_type y, coord_type z, coord_type t, coord_type p)
+    {
+        bspline_float_type xWeight[10];
+        long long xIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(sx, SplineDegree, dx, x, xWeight, xIndex);
+
+        bspline_float_type yWeight[10];
+        long long yIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(sy, SplineDegree, dy, y, yWeight, yIndex);
+
+        bspline_float_type zWeight[10];
+        long long zIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(sz, SplineDegree, dz, z, zWeight, zIndex);
+
+        bspline_float_type tWeight[10];
+        long long tIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(st, SplineDegree, dt, t, tWeight, tIndex);
+
+        bspline_float_type pWeight[10];
+        long long pIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(sp, SplineDegree, dp, p, pWeight, pIndex);
+
+        T res=0;
+
+        unsigned int ix, iy, iz, it, ip;
+
+        for ( ip=0; ip<SplineDegree; ip++ )
+        {
+            for ( it=0; it<SplineDegree; it++ )
+            {
+                for ( iz=0; iz<SplineDegree; iz++ )
+                {
+                    for ( iy=0; iy<SplineDegree; iy++ )
+                    {
+                        long long offset = yIndex[iy]*sx + zIndex[iz]*sx*sy + tIndex[it]*sx*sy*sz + pIndex[ip]*sx*sy*sz*st;
+
+                        for ( ix=0; ix<SplineDegree; ix++ )
+                        {
+                            res += coeff[ xIndex[ix] + offset ] 
+                                * xWeight[ix] * yWeight[iy] * zWeight[iz] * tWeight[it] * pWeight[ip];
+                        }
+                    }
+                }
+            }
+        }
+
+        return res;
+    }
+
+    template <typename T, unsigned int D> 
+    inline T hoNDBSpline<T, D>::evaluateBSpline(const T* coeff, size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, unsigned int SplineDegree, 
+                                                unsigned int dx, unsigned int dy, unsigned int dz, unsigned int dt, unsigned int dp, unsigned int dq, 
+                                                coord_type x, coord_type y, coord_type z, coord_type t, coord_type p, coord_type q)
+    {
+        bspline_float_type xWeight[10];
+        long long xIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(sx, SplineDegree, dx, x, xWeight, xIndex);
+
+        bspline_float_type yWeight[10];
+        long long yIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(sy, SplineDegree, dy, y, yWeight, yIndex);
+
+        bspline_float_type zWeight[10];
+        long long zIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(sz, SplineDegree, dz, z, zWeight, zIndex);
+
+        bspline_float_type tWeight[10];
+        long long tIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(st, SplineDegree, dt, t, tWeight, tIndex);
+
+        bspline_float_type pWeight[10];
+        long long pIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(sp, SplineDegree, dp, p, pWeight, pIndex);
+
+        bspline_float_type qWeight[10];
+        long long qIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(sq, SplineDegree, dq, q, qWeight, qIndex);
+
+        T res=0;
+
+        unsigned int ix, iy, iz, it, ip, iq;
+
+        for ( iq=0; iq<SplineDegree; iq++ )
+        {
+            for ( ip=0; ip<SplineDegree; ip++ )
+            {
+                for ( it=0; it<SplineDegree; it++ )
+                {
+                    for ( iz=0; iz<SplineDegree; iz++ )
+                    {
+                        for ( iy=0; iy<SplineDegree; iy++ )
+                        {
+                            long long offset = yIndex[iy]*sx 
+                                             + zIndex[iz]*sx*sy 
+                                             + tIndex[it]*sx*sy*sz 
+                                             + pIndex[ip]*sx*sy*sz*st
+                                             + qIndex[iq]*sx*sy*sz*st*sp;
+
+                            for ( ix=0; ix<SplineDegree; ix++ )
+                            {
+                                res += coeff[ xIndex[ix] + offset ] 
+                                    * xWeight[ix] * yWeight[iy] * zWeight[iz] * tWeight[it] * pWeight[ip] * qWeight[iq];
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        return res;
+    }
+
+    template <typename T, unsigned int D> 
+    inline T hoNDBSpline<T, D>::evaluateBSpline(const T* coeff, size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, unsigned int SplineDegree, 
+                                                unsigned int dx, unsigned int dy, unsigned int dz, unsigned int dt, unsigned int dp, unsigned int dq, unsigned int dr, 
+                                                coord_type x, coord_type y, coord_type z, coord_type t, coord_type p, coord_type q, coord_type r)
+    {
+        bspline_float_type xWeight[10];
+        long long xIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(sx, SplineDegree, dx, x, xWeight, xIndex);
+
+        bspline_float_type yWeight[10];
+        long long yIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(sy, SplineDegree, dy, y, yWeight, yIndex);
+
+        bspline_float_type zWeight[10];
+        long long zIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(sz, SplineDegree, dz, z, zWeight, zIndex);
+
+        bspline_float_type tWeight[10];
+        long long tIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(st, SplineDegree, dt, t, tWeight, tIndex);
+
+        bspline_float_type pWeight[10];
+        long long pIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(sp, SplineDegree, dp, p, pWeight, pIndex);
+
+        bspline_float_type qWeight[10];
+        long long qIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(sq, SplineDegree, dq, q, qWeight, qIndex);
+
+        bspline_float_type rWeight[10];
+        long long rIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(sr, SplineDegree, dr, r, rWeight, rIndex);
+
+        T res=0;
+
+        unsigned int ix, iy, iz, it, ip, iq, ir;
+
+        for ( ir=0; ir<SplineDegree; ir++ )
+        {
+            for ( iq=0; iq<SplineDegree; iq++ )
+            {
+                for ( ip=0; ip<SplineDegree; ip++ )
+                {
+                    for ( it=0; it<SplineDegree; it++ )
+                    {
+                        for ( iz=0; iz<SplineDegree; iz++ )
+                        {
+                            for ( iy=0; iy<SplineDegree; iy++ )
+                            {
+                                long long offset = yIndex[iy]*sx 
+                                                 + zIndex[iz]*sx*sy 
+                                                 + tIndex[it]*sx*sy*sz 
+                                                 + pIndex[ip]*sx*sy*sz*st
+                                                 + qIndex[iq]*sx*sy*sz*st*sp
+                                                 + rIndex[ir]*sx*sy*sz*st*sp*sq;
+
+                                for ( ix=0; ix<SplineDegree; ix++ )
+                                {
+                                    res += coeff[ xIndex[ix] + offset ] 
+                                        * xWeight[ix] * yWeight[iy] * zWeight[iz] * tWeight[it] * pWeight[ip] * qWeight[iq] * rWeight[ir];
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        return res;
+    }
+
+    template <typename T, unsigned int D> 
+    inline T hoNDBSpline<T, D>::evaluateBSpline(const T* coeff, size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, size_t ss, unsigned int SplineDegree, 
+                                                unsigned int dx, unsigned int dy, unsigned int dz, unsigned int dt, unsigned int dp, unsigned int dq, unsigned int dr, unsigned int ds, 
+                                                coord_type x, coord_type y, coord_type z, coord_type t, coord_type p, coord_type q, coord_type r, coord_type s)
+    {
+        bspline_float_type xWeight[10];
+        long long xIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(sx, SplineDegree, dx, x, xWeight, xIndex);
+
+        bspline_float_type yWeight[10];
+        long long yIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(sy, SplineDegree, dy, y, yWeight, yIndex);
+
+        bspline_float_type zWeight[10];
+        long long zIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(sz, SplineDegree, dz, z, zWeight, zIndex);
+
+        bspline_float_type tWeight[10];
+        long long tIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(st, SplineDegree, dt, t, tWeight, tIndex);
+
+        bspline_float_type pWeight[10];
+        long long pIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(sp, SplineDegree, dp, p, pWeight, pIndex);
+
+        bspline_float_type qWeight[10];
+        long long qIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(sq, SplineDegree, dq, q, qWeight, qIndex);
+
+        bspline_float_type rWeight[10];
+        long long rIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(sr, SplineDegree, dr, r, rWeight, rIndex);
+
+        bspline_float_type sWeight[10];
+        long long sIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(ss, SplineDegree, ds, s, sWeight, sIndex);
+
+        T res=0;
+
+        unsigned int ix, iy, iz, it, ip, iq, ir, is;
+
+        for ( is=0; is<SplineDegree; is++ )
+        {
+            for ( ir=0; ir<SplineDegree; ir++ )
+            {
+                for ( iq=0; iq<SplineDegree; iq++ )
+                {
+                    for ( ip=0; ip<SplineDegree; ip++ )
+                    {
+                        for ( it=0; it<SplineDegree; it++ )
+                        {
+                            for ( iz=0; iz<SplineDegree; iz++ )
+                            {
+                                for ( iy=0; iy<SplineDegree; iy++ )
+                                {
+                                    long long offset = yIndex[iy]*sx 
+                                                     + zIndex[iz]*sx*sy 
+                                                     + tIndex[it]*sx*sy*sz 
+                                                     + pIndex[ip]*sx*sy*sz*st
+                                                     + qIndex[iq]*sx*sy*sz*st*sp
+                                                     + rIndex[ir]*sx*sy*sz*st*sp*sq
+                                                     + sIndex[ir]*sx*sy*sz*st*sp*sq*sr;
+
+                                    for ( ix=0; ix<SplineDegree; ix++ )
+                                    {
+                                        res += coeff[ xIndex[ix] + offset ] 
+                                            * xWeight[ix] * yWeight[iy] * zWeight[iz] * tWeight[it] * pWeight[ip] * qWeight[iq] * rWeight[ir] * sWeight[is];
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        return res;
+    }
+
+    template <typename T, unsigned int D> 
+    inline T hoNDBSpline<T, D>::evaluateBSpline(const T* coeff, size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, size_t ss, size_t su, unsigned int SplineDegree, 
+                                                unsigned int dx, unsigned int dy, unsigned int dz, unsigned int dt, unsigned int dp, unsigned int dq, unsigned int dr, unsigned int ds, unsigned int du, 
+                                                coord_type x, coord_type y, coord_type z, coord_type t, coord_type p, coord_type q, coord_type r, coord_type s, coord_type u)
+    {
+        bspline_float_type xWeight[10];
+        long long xIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(sx, SplineDegree, dx, x, xWeight, xIndex);
+
+        bspline_float_type yWeight[10];
+        long long yIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(sy, SplineDegree, dy, y, yWeight, yIndex);
+
+        bspline_float_type zWeight[10];
+        long long zIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(sz, SplineDegree, dz, z, zWeight, zIndex);
+
+        bspline_float_type tWeight[10];
+        long long tIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(st, SplineDegree, dt, t, tWeight, tIndex);
+
+        bspline_float_type pWeight[10];
+        long long pIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(sp, SplineDegree, dp, p, pWeight, pIndex);
+
+        bspline_float_type qWeight[10];
+        long long qIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(sq, SplineDegree, dq, q, qWeight, qIndex);
+
+        bspline_float_type rWeight[10];
+        long long rIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(sr, SplineDegree, dr, r, rWeight, rIndex);
+
+        bspline_float_type sWeight[10];
+        long long sIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(ss, SplineDegree, ds, s, sWeight, sIndex);
+
+        bspline_float_type uWeight[10];
+        long long uIndex[10];
+        computeBSplineInterpolationLocationsAndWeights(su, SplineDegree, du, u, uWeight, uIndex);
+
+        T res=0;
+
+        unsigned int ix, iy, iz, it, ip, iq, ir, is, iu;
+
+        for ( iu=0; iu<SplineDegree; iu++ )
+        {
+            for ( is=0; is<SplineDegree; is++ )
+            {
+                for ( ir=0; ir<SplineDegree; ir++ )
+                {
+                    for ( iq=0; iq<SplineDegree; iq++ )
+                    {
+                        for ( ip=0; ip<SplineDegree; ip++ )
+                        {
+                            for ( it=0; it<SplineDegree; it++ )
+                            {
+                                for ( iz=0; iz<SplineDegree; iz++ )
+                                {
+                                    for ( iy=0; iy<SplineDegree; iy++ )
+                                    {
+                                        long long offset = yIndex[iy]*sx 
+                                                         + zIndex[iz]*sx*sy 
+                                                         + tIndex[it]*sx*sy*sz 
+                                                         + pIndex[ip]*sx*sy*sz*st
+                                                         + qIndex[iq]*sx*sy*sz*st*sp
+                                                         + rIndex[ir]*sx*sy*sz*st*sp*sq
+                                                         + sIndex[ir]*sx*sy*sz*st*sp*sq*sr
+                                                         + uIndex[ir]*sx*sy*sz*st*sp*sq*sr*ss;
+
+                                        for ( ix=0; ix<SplineDegree; ix++ )
+                                        {
+                                            res += coeff[ xIndex[ix] + offset ] 
+                                                * xWeight[ix] * yWeight[iy] * zWeight[iz] * tWeight[it] * pWeight[ip] * qWeight[iq] * rWeight[ir] * sWeight[is] * uWeight[iu];
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        return res;
+    }
+
+    template <typename T, unsigned int D> 
+    T hoNDBSpline<T, D>::evaluateBSpline(const T* coeff, size_t sx, size_t sy, unsigned int SplineDegree, 
+                                        bspline_float_type* xWeight, bspline_float_type* yWeight, 
+                                        coord_type x, coord_type y)
+    {
+        long long xIndex[10];
+        BSplineInterpolationLocation(x, SplineDegree, xIndex);
+        BSplineInterpolationMirrorBoundaryCondition(SplineDegree, xIndex, sx);
+
+        long long yIndex[10];
+        BSplineInterpolationLocation(y, SplineDegree, yIndex);
+        BSplineInterpolationMirrorBoundaryCondition(SplineDegree, yIndex, sy);
+
+        T res=0;
+
+        unsigned int ix, iy;
+        for ( iy=0; iy<SplineDegree; iy++ )
+        {
+            for ( ix=0; ix<SplineDegree; ix++ )
+            {
+                res += coeff[ xIndex[ix] + sx*yIndex[iy] ] * xWeight[ix] * yWeight[iy];
+            }
+        }
+
+        return res;
+    }
+
+    template <typename T, unsigned int D> 
+    T hoNDBSpline<T, D>::evaluateBSpline(const T* coeff, size_t sx, size_t sy, size_t sz, unsigned int SplineDegree, 
+                                        bspline_float_type* xWeight, bspline_float_type* yWeight, bspline_float_type* zWeight, 
+                                        coord_type x, coord_type y, coord_type z)
+    {
+        long long xIndex[10];
+        BSplineInterpolationLocation(x, SplineDegree, xIndex);
+        BSplineInterpolationMirrorBoundaryCondition(SplineDegree, xIndex, sx);
+
+        long long yIndex[10];
+        BSplineInterpolationLocation(y, SplineDegree, yIndex);
+        BSplineInterpolationMirrorBoundaryCondition(SplineDegree, yIndex, sy);
+
+        long long zIndex[10];
+        BSplineInterpolationLocation(z, SplineDegree, zIndex);
+        BSplineInterpolationMirrorBoundaryCondition(SplineDegree, zIndex, sz);
+
+        T res=0;
+
+        unsigned int ix, iy, iz;
+        for ( iz=0; iz<SplineDegree; iz++ )
+        {
+            for ( iy=0; iy<SplineDegree; iy++ )
+            {
+                long long offset = yIndex[iy]*sx + zIndex[iz]*sx*sy;
+
+                for ( ix=0; ix<SplineDegree; ix++ )
+                {
+                    res += coeff[ xIndex[ix] + offset ] 
+                        * xWeight[ix] * yWeight[iy] * zWeight[iz];
+                }
+            }
+        }
+
+        return res;
+    }
+
+    template <typename T, unsigned int D> 
+    T hoNDBSpline<T, D>::evaluateBSpline(const T* coeff, size_t sx, size_t sy, size_t sz, size_t st, unsigned int SplineDegree, 
+                                        bspline_float_type* xWeight, bspline_float_type* yWeight, bspline_float_type* zWeight, bspline_float_type* tWeight, 
+                                        coord_type x, coord_type y, coord_type z, coord_type t)
+    {
+        long long xIndex[10];
+        BSplineInterpolationLocation(x, SplineDegree, xIndex);
+        BSplineInterpolationMirrorBoundaryCondition(SplineDegree, xIndex, sx);
+
+        long long yIndex[10];
+        BSplineInterpolationLocation(y, SplineDegree, yIndex);
+        BSplineInterpolationMirrorBoundaryCondition(SplineDegree, yIndex, sy);
+
+        long long zIndex[10];
+        BSplineInterpolationLocation(z, SplineDegree, zIndex);
+        BSplineInterpolationMirrorBoundaryCondition(SplineDegree, zIndex, sz);
+
+        long long tIndex[10];
+        BSplineInterpolationLocation(t, SplineDegree, tIndex);
+        BSplineInterpolationMirrorBoundaryCondition(SplineDegree, tIndex, st);
+
+        T res=0;
+
+        unsigned int ix, iy, iz, it;
+        for ( it=0; it<SplineDegree; it++ )
+        {
+            for ( iz=0; iz<SplineDegree; iz++ )
+            {
+                for ( iy=0; iy<SplineDegree; iy++ )
+                {
+                    long long offset = yIndex[iy]*sx + zIndex[iz]*sx*sy + tIndex[it]*sx*sy*sz;
+
+                    for ( ix=0; ix<SplineDegree; ix++ )
+                    {
+                        res += coeff[ xIndex[ix] + offset ] 
+                            * xWeight[ix] * yWeight[iy] * zWeight[iz] * tWeight[it];
+                    }
+                }
+            }
+        }
+
+        return res;
+    }
+
+    template <typename T, unsigned int D> 
+    T hoNDBSpline<T, D>::evaluateBSpline(const T* coeff, const std::vector<size_t>& dimension, unsigned int SplineDegree, 
+                                        bspline_float_type** weight, const coord_type* pos)
+    {
+        long long index[D][10];
+
+        unsigned int ii, jj;
+        for ( ii=0; ii<D; ii++ )
+        {
+            BSplineInterpolationLocation(pos[ii], SplineDegree, index[ii]);
+            BSplineInterpolationMirrorBoundaryCondition(SplineDegree, index[ii], dimension[ii]);
+        }
+
+        std::vector<size_t> splineDimension(D, SplineDegree);
+        std::vector<size_t> splineInd(D, 0);
+        std::vector<size_t> coeffInd(D, 0);
+
+        std::vector<size_t> offsetFactors(D, 0);
+        hoNDArray<T>::calculate_offset_factors(splineDimension, offsetFactors);
+
+        std::vector<size_t> coeffOffsetFactors(D, 0);
+        hoNDArray<T>::calculate_offset_factors(dimension, coeffOffsetFactors);
+
+        unsigned int num = pow(SplineDegree, D);
+
+        T res = 0;
+
+        for ( ii=0; ii<num; ii++ )
+        {
+            hoNDArray<T>::calculate_index(ii, offsetFactors, splineInd);
+
+            for ( jj=0; jj<D; jj++ )
+            {
+                coeffInd[jj] = index[jj][ splineInd[jj] ];
+            }
+
+            size_t offset = hoNDArray<T>::calculate_offset(coeffInd, coeffOffsetFactors);
+
+            T v = coeff[offset];
+
+            for ( jj=0; jj<D; jj++ )
+            {
+                v *= weight[jj][ splineInd[jj] ];
+            }
+
+            res += v;
+        }
+
+        return res;
+    }
+
+    template <typename T, unsigned int D> 
+    inline T hoNDBSpline<T, D>::evaluateBSpline(const T* coeff, const std::vector<size_t>& dimension, unsigned int SplineDegree, 
+                                        bspline_float_type** weight, const std::vector<coord_type>& pos)
+    {
+        return this->evaluateBSpline(coeff, dimension, SplineDegree, weight, &pos[0]);
+    }
+
+    template <typename T, unsigned int D> 
+    bool hoNDBSpline<T, D>::computeBSplineDerivative(const hoNDArray<T>& data, const hoNDArray<T>& coeff, unsigned int SplineDegree, const std::vector<unsigned int>& derivative, hoNDArray<T>& deriv)
+    {
+        try
+        {
+        	std::vector<size_t> dimension;
+        	data.get_dimensions(dimension);
+
+            if ( D!=data.get_number_of_dimensions() )
+            {
+                GADGET_ERROR_MSG("computeBSplineDerivative(hoNDArray) : D!=dimension.get_number_of_dimensions() ... ");
+                return T(0);
+            }
+
+            if ( !deriv.dimension_equal(&data) )
+            {
+                deriv.create(data.get_dimensions());
+            }
+
+            // only need to compute the weights once, since this is the integer point computation
+            bspline_float_type weight[D][10];
+            long long index[D][10];
+
+            unsigned int ii;
+            for ( ii=0; ii<D; ii++ )
+            {
+                computeBSplineInterpolationLocationsAndWeights(dimension[ii], SplineDegree, derivative[ii], dimension[ii]/2, weight[ii], index[ii]);
+            }
+
+            if ( D == 2 )
+            {
+                size_t sx = data.get_size(0);
+                size_t sy = data.get_size(1);
+
+                long long y;
+
+                #pragma omp parallel for default(none) private(y) shared(sx, sy, deriv, coeff, SplineDegree, weight)
+                for ( y=0; y<sy; y++ )
+                {
+                    for ( size_t x=0; x<sx; x++ )
+                    {
+                        deriv(x, y) = evaluateBSpline(coeff.begin(), sx, sy, SplineDegree, weight[0], weight[1], x, y);
+                    }
+                }
+            }
+            else if ( D == 3 )
+            {
+                size_t sx = data.get_size(0);
+                size_t sy = data.get_size(1);
+                size_t sz = data.get_size(2);
+
+                long long z;
+
+                #pragma omp parallel for default(none) private(z) shared(sx, sy, sz, deriv, coeff, SplineDegree, weight)
+                for ( z=0; z<sz; z++ )
+                {
+                    for ( size_t y=0; y<sy; y++ )
+                    {
+                        for ( size_t x=0; x<sx; x++ )
+                        {
+                            deriv(x, y, z) = evaluateBSpline(coeff.begin(), sx, sy, sz, SplineDegree, weight[0], weight[1], weight[2], x, y, z);
+                        }
+                    }
+                }
+            }
+            else if ( D == 4 )
+            {
+                size_t sx = data.get_size(0);
+                size_t sy = data.get_size(1);
+                size_t sz = data.get_size(2);
+                size_t st = data.get_size(3);
+
+                long long t;
+
+                #pragma omp parallel for default(none) private(t) shared(sx, sy, sz, st, deriv, coeff, SplineDegree, weight)
+                for ( t=0; t<st; t++ )
+                {
+                    for ( size_t z=0; z<sz; z++ )
+                    {
+                        for ( size_t y=0; y<sy; y++ )
+                        {
+                            for ( size_t x=0; x<sx; x++ )
+                            {
+                                deriv(x, y, z, t) = evaluateBSpline(coeff.begin(), sx, sy, sz, st, SplineDegree, weight[0], weight[1], weight[2], weight[3], x, y, z, t);
+                            }
+                        }
+                    }
+                }
+            }
+            else
+            {
+                size_t num = data.get_number_of_elements();
+
+                long long ii;
+                #pragma omp parallel default(none) private(ii) shared(num, data, coeff, dimension, SplineDegree, weight)
+                {
+                    std::vector<size_t> ind(D);
+                    std::vector<coord_type> pos(D);
+
+                    #pragma omp for 
+                    for ( ii=0; ii<num; ii++ )
+                    {
+                        data.calculate_index(ii, ind);
+
+                        for ( unsigned int jj=0; jj<D; jj++ )
+                        {
+                            pos[jj] = ind[jj];
+                        }
+
+                        deriv(ii) = evaluateBSpline(coeff.begin(), dimension, SplineDegree, weight, pos);
+                    }
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoNDBSpline<T, D>::computeBSplineDerivative(const hoNDArray<T>& data, const hoNDArray<T>& coeff, const std::vector<unsigned int>& derivative, hoNDArray<T>& deriv) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename T, unsigned int D> 
+    bool hoNDBSpline<T, D>::computeBSplineDerivative(const hoNDImage<T,D>& data, const hoNDArray<T>& coeff, unsigned int SplineDegree, const std::vector<unsigned int>& derivative, hoNDImage<T,D>& deriv)
+    {
+        hoNDArray<T> dataTmp(data.get_dimensions(), const_cast<T*>(data.begin()), false);
+
+        if ( !deriv.dimension_equal(&data) )
+        {
+            deriv = data;
+        }
+
+        hoNDArray<T> derivTmp(deriv.get_dimensions(), deriv.begin(), false);
+
+        return computeBSplineDerivative(dataTmp, coeff, SplineDegree, derivative, derivTmp);
+    }
+
+    template <typename T, unsigned int D> 
+    void hoNDBSpline<T, D>::print(std::ostream& os) const
+    {
+        using namespace std;
+
+        os << "--------------Gagdgetron ND BSpline -------------" << endl;
+        os << "Dimension is : " << D << endl;
+        std::string elemTypeName = std::string(typeid(T).name());
+        os << "Data type is : " << elemTypeName << std::endl;
+    }
+
+    template <typename T, unsigned int D> 
+    void hoNDBSpline<T, D>::ConvertToInterpolationCoefficients(T c[], size_t DataLength, bspline_float_type z[], long NbPoles, bspline_float_type Tolerance)
+    { /* begin ConvertToInterpolationCoefficients */
+
+        double Lambda = 1.0;
+        long n, k;
+
+        /* special case required by mirror boundaries */
+        if (DataLength == 1L)
+        {
+            return;
+        }
+
+        /* compute the overall gain */
+        for (k = 0L; k < NbPoles; k++)
+        {
+            Lambda = Lambda * (1.0 - z[k]) * (1.0 - 1.0 / z[k]);
+        }
+
+        /* apply the gain */
+        for (n = 0L; n < DataLength; n++)
+        {
+            c[n] *= Lambda;
+        }
+
+        /* loop over all poles */
+        for (k = 0L; k < NbPoles; k++)
+        {
+            /* causal initialization */
+            c[0] = InitialCausalCoefficient(c, DataLength, z[k], Tolerance);
+
+            /* causal recursion */
+            for (n = 1L; n < DataLength; n++)
+            {
+                c[n] += z[k] * c[n - 1L];
+            }
+
+            /* anticausal initialization */
+            c[DataLength - 1L] = InitialAntiCausalCoefficient(c, DataLength, z[k]);
+
+            /* anticausal recursion */
+            for (n = DataLength - 2L; 0 <= n; n--)
+            {
+                c[n] = z[k] * (c[n + 1L] - c[n]);
+            }
+        }
+    } /* end ConvertToInterpolationCoefficients */
+
+    template <typename T, unsigned int D> 
+    T hoNDBSpline<T, D>::InitialCausalCoefficient(T c[], size_t DataLength, bspline_float_type z, bspline_float_type Tolerance)
+    { /* begin InitialCausalCoefficient */
+
+        T Sum;
+        bspline_float_type zn, z2n, iz;
+        size_t n, Horizon;
+
+        /* this initialization corresponds to mirror boundaries */
+        Horizon = DataLength;
+        if (Tolerance > 0.0)
+        {
+            Horizon = (size_t)std::ceil(log(Tolerance) / log(fabs(z)));
+        }
+
+        if (Horizon < DataLength)
+        {
+            /* accelerated loop */
+            zn = z;
+            Sum = c[0];
+            for (n = 1; n < Horizon; n++) {
+                Sum += zn * c[n];
+                zn *= z;
+            }
+            return(Sum);
+        }
+        else
+        {
+            /* full loop */
+            zn = z;
+            iz = (bspline_float_type)(1.0) / z;
+            z2n = pow(z, (bspline_float_type)(DataLength - 1L));
+            Sum = c[0] + z2n * c[DataLength - 1L];
+            z2n *= z2n * iz;
+            for (n = 1L; n <= DataLength - 2L; n++)
+            {
+                Sum += (zn + z2n) * c[n];
+                zn *= z;
+                z2n *= iz;
+            }
+            return( Sum / (bspline_float_type)(1.0 - zn * zn) );
+        }
+    } /* end InitialCausalCoefficient */
+
+    template <typename T, unsigned int D> 
+    T hoNDBSpline<T, D>::InitialAntiCausalCoefficient(T c[], size_t DataLength, bspline_float_type z)
+    { /* begin InitialAntiCausalCoefficient */
+
+        /* this initialization corresponds to mirror boundaries */
+        return((z / (z * z - (bspline_float_type)1.0)) * (z * c[DataLength - 2L] + c[DataLength - 1L]));
+    } /* end InitialAntiCausalCoefficient */
+
+    template <typename T, unsigned int D> 
+    inline void hoNDBSpline<T, D>::Pole(bspline_float_type* Pole, unsigned int SplineDegree, unsigned int& NbPoles)
+    {
+        switch (SplineDegree) 
+        {
+            case 2:
+                NbPoles = 1;
+                Pole[0] = (bspline_float_type)( std::sqrt(8.0) - 3.0 );
+                break;
+
+            case 3:
+                NbPoles = 1;
+                Pole[0] = (bspline_float_type)( std::sqrt(3.0) - 2.0 );
+                break;
+
+            case 4:
+                NbPoles = 2;
+                Pole[0] = (bspline_float_type)( std::sqrt(664.0 - std::sqrt(438976.0)) + std::sqrt(304.0) - 19.0 );
+                Pole[1] = (bspline_float_type)( std::sqrt(664.0 + std::sqrt(438976.0)) - std::sqrt(304.0) - 19.0 );
+                break;
+
+            case 5:
+                NbPoles = 2;
+                Pole[0] = (bspline_float_type)( std::sqrt(135.0 / 2.0 - std::sqrt(17745.0 / 4.0)) + std::sqrt(105.0 / 4.0)
+                    - 13.0 / 2.0 );
+                Pole[1] = (bspline_float_type)( std::sqrt(135.0 / 2.0 + std::sqrt(17745.0 / 4.0)) - std::sqrt(105.0 / 4.0)
+                    - 13.0 / 2.0 );
+                break;
+
+            case 6:
+                NbPoles = 3;
+                Pole[0] = (bspline_float_type)( -0.48829458930304475513011803888378906211227916123938 );
+                Pole[1] = (bspline_float_type)( -0.081679271076237512597937765737059080653379610398148 );
+                Pole[2] = (bspline_float_type)( -0.0014141518083258177510872439765585925278641690553467 );
+                break;
+
+            case 7:
+                NbPoles = 3;
+                Pole[0] = (bspline_float_type)( -0.53528043079643816554240378168164607183392315234269 );
+                Pole[1] = (bspline_float_type)( -0.12255461519232669051527226435935734360548654942730 );
+                Pole[2] = (bspline_float_type)( -0.0091486948096082769285930216516478534156925639545994 );
+                break;
+
+            case 8:
+                NbPoles = 4;
+                Pole[0] = (bspline_float_type)( -0.57468690924876543053013930412874542429066157804125 );
+                Pole[1] = (bspline_float_type)( -0.16303526929728093524055189686073705223476814550830 );
+                Pole[2] = (bspline_float_type)( -0.023632294694844850023403919296361320612665920854629 );
+                Pole[3] = (bspline_float_type)( -0.00015382131064169091173935253018402160762964054070043 );
+                break;
+
+            case 9:
+                NbPoles = 4;
+                Pole[0] = (bspline_float_type)( -0.60799738916862577900772082395428976943963471853991 );
+                Pole[1] = (bspline_float_type)( -0.20175052019315323879606468505597043468089886575747 );
+                Pole[2] = (bspline_float_type)( -0.043222608540481752133321142979429688265852380231497 );
+                Pole[3] = (bspline_float_type)( -0.0021213069031808184203048965578486234220548560988624 );
+                break;
+
+            default:
+                GADGET_ERROR_MSG("Only 2 - 9 order BSpline is supported ... ");
+                return;
+        }
+    }
+
+    template <typename T, unsigned int D> 
+    inline typename hoNDBSpline<T, D>::bspline_float_type hoNDBSpline<T, D>::BSpline(bspline_float_type x, unsigned int SplineDegree)
+    {
+        if ( x < -( (bspline_float_type)SplineDegree+1)/2.0 )
+        {
+            return 0.0;
+        }
+
+        // follow the notation of origin paper
+
+        unsigned int j, t;
+
+        bspline_float_type value = 0.0;
+        for ( j=0; j<=SplineDegree+1; j++ )
+        {
+            if ( ( x-j+0.5*(SplineDegree+1) ) >= 0 )
+            {
+                bspline_float_type v1 = 1.0;
+                for ( t=1; t<=j; t++ )
+                {
+                    v1 *= t;
+                }
+
+                bspline_float_type v2 = 1.0;
+                for ( t=1; t<=SplineDegree+1-j; t++ )
+                {
+                    v2 *= t;
+                }
+
+                value += (bspline_float_type)( ( std::pow(double(-1), double(j) ) * (SplineDegree+1) / (v2 * v1) ) * std::pow(x-j+0.5*(SplineDegree+1), double(SplineDegree) ) );
+            }
+        }
+
+        return value;
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDBSpline<T, D>::BSplineInterpolationLocation(bspline_float_type x, unsigned int SplineDegree, long long* xIndex)
+    {
+        long long i, k;
+
+        /* compute the interpolation indexes */
+        if (SplineDegree & 1L)
+        {
+            i = (long long)std::floor(x) - (long long)SplineDegree / 2L;
+            for (k = 0L; k <= SplineDegree; k++)
+            {
+                xIndex[k] = i++;
+            }
+        }
+        else
+        {
+            i = (long long)std::floor(x + 0.5) - (long long)SplineDegree / 2L;
+            for (k = 0L; k <= SplineDegree; k++)
+            {
+                xIndex[k] = i++;
+            }
+        }
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDBSpline<T, D>::BSplineInterpolationMirrorBoundaryCondition(unsigned int SplineDegree, long long* xIndex, size_t Width)
+    {
+        long long Width2 = 2 * Width - 2;
+
+        unsigned int k;
+
+        /* apply the mirror boundary conditions */
+        for (k = 0; k <= SplineDegree; k++)
+        {
+            xIndex[k] = (Width == 1L) ? (0L) : ((xIndex[k] < 0L) ?
+                (-xIndex[k] - Width2 * ((-xIndex[k]) / Width2))
+                : (xIndex[k] - Width2 * (xIndex[k] / Width2)));
+
+            if (Width <= xIndex[k])
+            {
+                xIndex[k] = Width2 - xIndex[k];
+            }
+        }
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDBSpline<T, D>::BSplineDiscrete(bspline_float_type x, unsigned int SplineDegree, bspline_float_type* xWeight, long long* xIndex)
+    {
+        bspline_float_type w, w2, w4, t, t0, t1;
+
+        ///* compute the interpolation indexes */
+        //if (SplineDegree & 1L)
+        //{
+        //    i = (long)std::floor(x) - SplineDegree / 2L;
+        //    for (k = 0L; k <= SplineDegree; k++)
+        //    {
+        //        xIndex[k] = i++;
+        //    }
+        //}
+        //else
+        //{
+        //    i = (long)std::floor(x + 0.5) - SplineDegree / 2L;
+        //    for (k = 0L; k <= SplineDegree; k++)
+        //    {
+        //        xIndex[k] = i++;
+        //    }
+        //}
+
+        // BSplineInterpolationLocation(x, SplineDegree, xIndex);
+
+        /* compute the interpolation weights */
+        switch (SplineDegree)
+        {
+            case 2L:
+                /* x */
+                w = x - (bspline_float_type)xIndex[1];
+                xWeight[1] = 3.0 / 4.0 - w * w;
+                xWeight[2] = (1.0 / 2.0) * (w - xWeight[1] + 1.0);
+                xWeight[0] = 1.0 - xWeight[1] - xWeight[2];
+                break;
+            case 3L:
+                /* x */
+                w = x - (bspline_float_type)xIndex[1];
+                xWeight[3] = (1.0 / 6.0) * w * w * w;
+                xWeight[0] = (1.0 / 6.0) + (1.0 / 2.0) * w * (w - 1.0) - xWeight[3];
+                xWeight[2] = w + xWeight[0] - 2.0 * xWeight[3];
+                xWeight[1] = 1.0 - xWeight[0] - xWeight[2] - xWeight[3];
+                break;
+            case 4L:
+                /* x */
+                w = x - (bspline_float_type)xIndex[2];
+                w2 = w * w;
+                t = (1.0 / 6.0) * w2;
+                xWeight[0] = 1.0 / 2.0 - w;
+                xWeight[0] *= xWeight[0];
+                xWeight[0] *= (1.0 / 24.0) * xWeight[0];
+                t0 = w * (t - 11.0 / 24.0);
+                t1 = 19.0 / 96.0 + w2 * (1.0 / 4.0 - t);
+                xWeight[1] = t1 + t0;
+                xWeight[3] = t1 - t0;
+                xWeight[4] = xWeight[0] + t0 + (1.0 / 2.0) * w;
+                xWeight[2] = 1.0 - xWeight[0] - xWeight[1] - xWeight[3] - xWeight[4];
+                break;
+            case 5L:
+                /* x */
+                w = x - (bspline_float_type)xIndex[2];
+                w2 = w * w;
+                xWeight[5] = (1.0 / 120.0) * w * w2 * w2;
+                w2 -= w;
+                w4 = w2 * w2;
+                w -= 1.0 / 2.0;
+                t = w2 * (w2 - 3.0);
+                xWeight[0] = (1.0 / 24.0) * (1.0 / 5.0 + w2 + w4) - xWeight[5];
+                t0 = (1.0 / 24.0) * (w2 * (w2 - 5.0) + 46.0 / 5.0);
+                t1 = (-1.0 / 12.0) * w * (t + 4.0);
+                xWeight[2] = t0 + t1;
+                xWeight[3] = t0 - t1;
+                t0 = (1.0 / 16.0) * (9.0 / 5.0 - t);
+                t1 = (1.0 / 24.0) * w * (w4 - w2 - 5.0);
+                xWeight[1] = t0 + t1;
+                xWeight[4] = t0 - t1;
+                break;
+            case 6L:
+                /* x */
+                w = x - (bspline_float_type)xIndex[3];
+                xWeight[0] = 1.0 / 2.0 - w;
+                xWeight[0] *= xWeight[0] * xWeight[0];
+                xWeight[0] *= xWeight[0] / 720.0;
+                xWeight[1] = (361.0 / 192.0 - w * (59.0 / 8.0 + w
+                    * (-185.0 / 16.0 + w * (25.0 / 3.0 + w * (-5.0 / 2.0 + w)
+                    * (1.0 / 2.0 + w))))) / 120.0;
+                xWeight[2] = (10543.0 / 960.0 + w * (-289.0 / 16.0 + w
+                    * (79.0 / 16.0 + w * (43.0 / 6.0 + w * (-17.0 / 4.0 + w
+                    * (-1.0 + w)))))) / 48.0;
+                w2 = w * w;
+                xWeight[3] = (5887.0 / 320.0 - w2 * (231.0 / 16.0 - w2
+                    * (21.0 / 4.0 - w2))) / 36.0;
+                xWeight[4] = (10543.0 / 960.0 + w * (289.0 / 16.0 + w
+                    * (79.0 / 16.0 + w * (-43.0 / 6.0 + w * (-17.0 / 4.0 + w
+                    * (1.0 + w)))))) / 48.0;
+                xWeight[6] = 1.0 / 2.0 + w;
+                xWeight[6] *= xWeight[6] * xWeight[6];
+                xWeight[6] *= xWeight[6] / 720.0;
+                xWeight[5] = 1.0 - xWeight[0] - xWeight[1] - xWeight[2] - xWeight[3]
+                    - xWeight[4] - xWeight[6];
+                break;
+            case 7L:
+                /* x */
+                w = x - (bspline_float_type)xIndex[3];
+                xWeight[0] = 1.0 - w;
+                xWeight[0] *= xWeight[0];
+                xWeight[0] *= xWeight[0] * xWeight[0];
+                xWeight[0] *= (1.0 - w) / 5040.0;
+                w2 = w * w;
+                xWeight[1] = (120.0 / 7.0 + w * (-56.0 + w * (72.0 + w
+                    * (-40.0 + w2 * (12.0 + w * (-6.0 + w)))))) / 720.0;
+                xWeight[2] = (397.0 / 7.0 - w * (245.0 / 3.0 + w * (-15.0 + w
+                    * (-95.0 / 3.0 + w * (15.0 + w * (5.0 + w
+                    * (-5.0 + w))))))) / 240.0;
+                xWeight[3] = (2416.0 / 35.0 + w2 * (-48.0 + w2 * (16.0 + w2
+                    * (-4.0 + w)))) / 144.0;
+                xWeight[4] = (1191.0 / 35.0 - w * (-49.0 + w * (-9.0 + w
+                    * (19.0 + w * (-3.0 + w) * (-3.0 + w2))))) / 144.0;
+                xWeight[5] = (40.0 / 7.0 + w * (56.0 / 3.0 + w * (24.0 + w
+                    * (40.0 / 3.0 + w2 * (-4.0 + w * (-2.0 + w)))))) / 240.0;
+                xWeight[7] = w2;
+                xWeight[7] *= xWeight[7] * xWeight[7];
+                xWeight[7] *= w / 5040.0;
+                xWeight[6] = 1.0 - xWeight[0] - xWeight[1] - xWeight[2] - xWeight[3]
+                    - xWeight[4] - xWeight[5] - xWeight[7];
+                break;
+            case 8L:
+                /* x */
+                w = x - (bspline_float_type)xIndex[4];
+                xWeight[0] = 1.0 / 2.0 - w;
+                xWeight[0] *= xWeight[0];
+                xWeight[0] *= xWeight[0];
+                xWeight[0] *= xWeight[0] / 40320.0;
+                w2 = w * w;
+                xWeight[1] = (39.0 / 16.0 - w * (6.0 + w * (-9.0 / 2.0 + w2)))
+                    * (21.0 / 16.0 + w * (-15.0 / 4.0 + w * (9.0 / 2.0 + w
+                    * (-3.0 + w)))) / 5040.0;
+                xWeight[2] = (82903.0 / 1792.0 + w * (-4177.0 / 32.0 + w
+                    * (2275.0 / 16.0 + w * (-487.0 / 8.0 + w * (-85.0 / 8.0 + w
+                    * (41.0 / 2.0 + w * (-5.0 + w * (-2.0 + w)))))))) / 1440.0;
+                xWeight[3] = (310661.0 / 1792.0 - w * (14219.0 / 64.0 + w
+                    * (-199.0 / 8.0 + w * (-1327.0 / 16.0 + w * (245.0 / 8.0 + w
+                    * (53.0 / 4.0 + w * (-8.0 + w * (-1.0 + w)))))))) / 720.0;
+                xWeight[4] = (2337507.0 / 8960.0 + w2 * (-2601.0 / 16.0 + w2
+                    * (387.0 / 8.0 + w2 * (-9.0 + w2)))) / 576.0;
+                xWeight[5] = (310661.0 / 1792.0 - w * (-14219.0 / 64.0 + w
+                    * (-199.0 / 8.0 + w * (1327.0 / 16.0 + w * (245.0 / 8.0 + w
+                    * (-53.0 / 4.0 + w * (-8.0 + w * (1.0 + w)))))))) / 720.0;
+                xWeight[7] = (39.0 / 16.0 - w * (-6.0 + w * (-9.0 / 2.0 + w2)))
+                    * (21.0 / 16.0 + w * (15.0 / 4.0 + w * (9.0 / 2.0 + w
+                    * (3.0 + w)))) / 5040.0;
+                xWeight[8] = 1.0 / 2.0 + w;
+                xWeight[8] *= xWeight[8];
+                xWeight[8] *= xWeight[8];
+                xWeight[8] *= xWeight[8] / 40320.0;
+                xWeight[6] = 1.0 - xWeight[0] - xWeight[1] - xWeight[2] - xWeight[3]
+                    - xWeight[4] - xWeight[5] - xWeight[7] - xWeight[8];
+                break;
+            case 9L:
+                /* x */
+                w = x - (bspline_float_type)xIndex[4];
+                xWeight[0] = 1.0 - w;
+                xWeight[0] *= xWeight[0];
+                xWeight[0] *= xWeight[0];
+                xWeight[0] *= xWeight[0] * (1.0 - w) / 362880.0;
+                xWeight[1] = (502.0 / 9.0 + w * (-246.0 + w * (472.0 + w
+                    * (-504.0 + w * (308.0 + w * (-84.0 + w * (-56.0 / 3.0 + w
+                    * (24.0 + w * (-8.0 + w))))))))) / 40320.0;
+                xWeight[2] = (3652.0 / 9.0 - w * (2023.0 / 2.0 + w * (-952.0 + w
+                    * (938.0 / 3.0 + w * (112.0 + w * (-119.0 + w * (56.0 / 3.0 + w
+                    * (14.0 + w * (-7.0 + w))))))))) / 10080.0;
+                xWeight[3] = (44117.0 / 42.0 + w * (-2427.0 / 2.0 + w * (66.0 + w
+                    * (434.0 + w * (-129.0 + w * (-69.0 + w * (34.0 + w * (6.0 + w
+                    * (-6.0 + w))))))))) / 4320.0;
+                w2 = w * w;
+                xWeight[4] = (78095.0 / 63.0 - w2 * (700.0 + w2 * (-190.0 + w2
+                    * (100.0 / 3.0 + w2 * (-5.0 + w))))) / 2880.0;
+                xWeight[5] = (44117.0 / 63.0 + w * (809.0 + w * (44.0 + w
+                    * (-868.0 / 3.0 + w * (-86.0 + w * (46.0 + w * (68.0 / 3.0 + w
+                    * (-4.0 + w * (-4.0 + w))))))))) / 2880.0;
+                xWeight[6] = (3652.0 / 21.0 - w * (-867.0 / 2.0 + w * (-408.0 + w
+                    * (-134.0 + w * (48.0 + w * (51.0 + w * (-4.0 + w) * (-1.0 + w)
+                    * (2.0 + w))))))) / 4320.0;
+                xWeight[7] = (251.0 / 18.0 + w * (123.0 / 2.0 + w * (118.0 + w
+                    * (126.0 + w * (77.0 + w * (21.0 + w * (-14.0 / 3.0 + w
+                    * (-6.0 + w * (-2.0 + w))))))))) / 10080.0;
+                xWeight[9] = w2 * w2;
+                xWeight[9] *= xWeight[9] * w / 362880.0;
+                xWeight[8] = 1.0 - xWeight[0] - xWeight[1] - xWeight[2] - xWeight[3]
+                    - xWeight[4] - xWeight[5] - xWeight[6] - xWeight[7] - xWeight[9];
+                break;
+            default:
+                GADGET_ERROR_MSG("Invalid spline degree " << SplineDegree);
+        }
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDBSpline<T, D>::BSplineDiscreteFirstOrderDerivative(bspline_float_type x, unsigned int SplineDegree, bspline_float_type* weight, long long* xIndex)
+    {
+        unsigned int k;
+        for ( k=0; k<SplineDegree; k++ )
+        {
+            weight[k] = BSplineFirstOrderDerivative(x-xIndex[k], SplineDegree);
+        }
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDBSpline<T, D>::BSplineDiscreteSecondOrderDerivative(bspline_float_type x, unsigned int SplineDegree, bspline_float_type* weight, long long* xIndex)
+    {
+        unsigned int k;
+        for ( k=0; k<SplineDegree; k++ )
+        {
+            weight[k] = BSplineSecondOrderDerivative(x-xIndex[k], SplineDegree);
+        }
+    }
+
+    template <typename T, unsigned int D> 
+    inline typename hoNDBSpline<T, D>::bspline_float_type hoNDBSpline<T, D>::BSplineFirstOrderDerivative(bspline_float_type x, unsigned int SplineDegree)
+    {
+        return ( BSpline(x+0.5, SplineDegree-1) - BSpline(x-0.5, SplineDegree-1) );
+    }
+
+    template <typename T, unsigned int D> 
+    inline typename hoNDBSpline<T, D>::bspline_float_type hoNDBSpline<T, D>::BSplineSecondOrderDerivative(bspline_float_type x, unsigned int SplineDegree)
+    {
+        return ( BSpline(x+1, SplineDegree-2) + BSpline(x-1, SplineDegree-2) - 2*BSpline(x, SplineDegree-2) );
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDBSpline<T, D>::computeBSplineInterpolationLocationsAndWeights(size_t len, unsigned int SplineDegree, unsigned int dx, coord_type x, bspline_float_type* weight, long long* xIndex)
+    {
+        BSplineInterpolationLocation(x, SplineDegree, xIndex);
+
+        if ( dx == 0 )
+        {
+            BSplineDiscrete(x, SplineDegree, weight, xIndex);
+        }
+        else if ( dx == 1 )
+        {
+            BSplineDiscreteFirstOrderDerivative(x, SplineDegree, weight, xIndex);
+        }
+        else if ( dx == 2 )
+        {
+            BSplineDiscreteSecondOrderDerivative(x, SplineDegree, weight, xIndex);
+        }
+        else
+        {
+            GADGET_ERROR_MSG("Derivative order must be 0/1/2 ... ");
+            return;
+        }
+
+        BSplineInterpolationMirrorBoundaryCondition(SplineDegree, xIndex, len);
+    }
+}
diff --git a/toolboxes/core/cpu/arma_math/CMakeLists.txt b/toolboxes/core/cpu/arma_math/CMakeLists.txt
deleted file mode 100644
index e5b2873..0000000
--- a/toolboxes/core/cpu/arma_math/CMakeLists.txt
+++ /dev/null
@@ -1,48 +0,0 @@
-if (WIN32)
-  ADD_DEFINITIONS(-D__BUILD_GADGETRON_CPUCORE_MATH__)
-endif (WIN32)
-
-include_directories(
-  ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu
-  ${ARMADILLO_INCLUDE_DIRS}
-  )
-
-if (MKL_FOUND)
-  INCLUDE_DIRECTORIES( ${MKL_INCLUDE_DIR} )
-  LINK_DIRECTORIES( ${MKL_LIB_DIR} ${MKL_COMPILER_LIB_DIR} )
-endif (MKL_FOUND)
-
-add_library(cpucore_math ${LIBTYPE} 
-  hoNDArray_operators.h
-  hoNDArray_operators.cpp
-  hoNDArray_elemwise.h
-  hoNDArray_elemwise.cpp
-  hoNDArray_blas.h
-  hoNDArray_blas.cpp
-  hoNDArray_reductions.cpp
-  )
-
-if (MKL_FOUND)
-  target_link_libraries(cpucore_math 
-    cpucore
-    ${ARMADILLO_LIBRARIES}
-    ${MKL_LIBRARIES}
-    )
-else (MKL_FOUND)
-  target_link_libraries(cpucore_math 
-    cpucore
-    ${ARMADILLO_LIBRARIES}
-    )
-endif (MKL_FOUND)
-
-install(TARGETS cpucore_math DESTINATION lib)
-
-install(FILES 	
-  cpucore_math_export.h
-  hoArmadillo.h
-  hoNDArray_operators.h
-  hoNDArray_elemwise.h
-  hoNDArray_blas.h
-  hoNDArray_reductions.h
-  hoNDArray_math.h
-  DESTINATION include)
diff --git a/toolboxes/core/cpu/arma_math/cpucore_math_export.h b/toolboxes/core/cpu/arma_math/cpucore_math_export.h
deleted file mode 100644
index c7e8dd4..0000000
--- a/toolboxes/core/cpu/arma_math/cpucore_math_export.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/** \file cpucore_math_export.h
-    \brief Required definitions for Windows, importing/exporting dll symbols 
-*/
-
-#ifndef CPUCORE_MATH_EXPORT_H_
-#define CPUCORE_MATH_EXPORT_H_
-
-#if defined (WIN32)
-    #ifdef BUILD_TOOLBOX_STATIC
-        #define EXPORTCPUCOREMATH
-    #else
-        #if defined (__BUILD_GADGETRON_CPUCORE_MATH__) || defined (cpucore_math_EXPORTS)
-            #define EXPORTCPUCOREMATH __declspec(dllexport)
-        #else
-            #define EXPORTCPUCOREMATH __declspec(dllimport)
-        #endif
-    #endif
-#else
-#define EXPORTCPUCOREMATH
-#endif
-
-#endif /* CPUCORE_MATH_EXPORT_H_ */
diff --git a/toolboxes/core/cpu/arma_math/hoArmadillo.h b/toolboxes/core/cpu/arma_math/hoArmadillo.h
deleted file mode 100644
index abb481c..0000000
--- a/toolboxes/core/cpu/arma_math/hoArmadillo.h
+++ /dev/null
@@ -1,84 +0,0 @@
-#pragma once
-#define ARMA_64BIT_WORD
-#include "hoNDArray.h"
-#include <armadillo>
-
-/** \file hoArmadillo.h
-\brief Utilities to create an Armadillo matrix or column vector from an hoNDArray.
-
-Utilities to create an Armadillo matrix or column vector from an hoNDArray.
-A helper function that creates an hoNDArray from an Armadillo matrix or vector is deliberatly omitted:
-The reccomended approach to using Armadillo's functionality and providing an hoNDArray of the result is 
-1) create an hoNDArray to hold the result, 
-2) convert this array to an Armadillo matrix or vector using the utilities provided in this header,
-3) assign the desired Armadillo computation to this array.
-This approach ensures that the Gadgetron -- and not Armadillo -- is responsible for subsequent memory handling.
-We refer to hoNDArray_math.h for some specific examples on how to use this Armadillo interface.
-*/
-
-namespace Gadgetron{
-
-  /**
-   * @brief Creates an Armadillo matrix from a two-dimensional hoNDArray.
-   * @param[in] x Input array.
-   * @return An Armadillo array mapped to the data pointer of the hoNDArray.
-   */
-  template<class T> arma::Mat<typename stdType<T>::Type> as_arma_matrix( hoNDArray<T> *x )
-  {
-    if( x->get_number_of_dimensions() != 2 )
-      throw std::runtime_error("Wrong number of dimensions. Cannot convert hoNDArray to matrix");
-    return arma::Mat<typename stdType<T>::Type>( (typename stdType<T>::Type*) x->get_data_ptr(), x->get_size(0), x->get_size(1), false, true );
-  }
-
-  /**
-   * @brief Creates an Armadillo matrix from a two-dimensional hoNDArray.
-   * @param[in] x Input array.
-   * @return An Armadillo array mapped to the data pointer of the hoNDArray.
-   */
-  template<class T> const arma::Mat<typename stdType<T>::Type> as_arma_matrix( const hoNDArray<T> *x )
-  {
-    if( x->get_number_of_dimensions() != 2 )
-      throw std::runtime_error("Wrong number of dimensions. Cannot convert hoNDArray to matrix");
-    return arma::Mat<typename stdType<T>::Type>( (typename stdType<T>::Type*) x->get_data_ptr(), x->get_size(0), x->get_size(1), false, true );
-  }
-  
-  /**
-   * @brief Creates an Armadillo column vector from an arbitrary-dimensional hoNDArray.
-   * @param[in] x Input array.
-   * @return An Armadillo array mapped to the data pointer of the hoNDArray.
-   */
-  template<class T> arma::Col<typename stdType<T>::Type > as_arma_col( hoNDArray<T> *x )
-  {
-    return arma::Col<typename stdType<T>::Type>( (typename stdType<T>::Type*) x->get_data_ptr(), x->get_number_of_elements(), false, true );
-  }
-
-  /**
-   * @brief Creates an Armadillo column vector from an arbitrary-dimensional hoNDArray.
-   * @param[in] x Input array.
-   * @return An Armadillo array mapped to the data pointer of the hoNDArray.
-   */
-  template<class T> const arma::Col<typename stdType<T>::Type > as_arma_col( const hoNDArray<T> *x )
-  {
-    return arma::Col<typename stdType<T>::Type>( (typename stdType<T>::Type*) x->get_data_ptr(), x->get_number_of_elements(), false, true );
-  }
-
-  /**
-     * @brief Creates an Armadillo row vector from an arbitrary-dimensional hoNDArray.
-     * @param[in] x Input array.
-     * @return An Armadillo array mapped to the data pointer of the hoNDArray.
-     */
-    template<class T> arma::Row<typename stdType<T>::Type > as_arma_row( hoNDArray<T> *x )
-    {
-      return arma::Row<typename stdType<T>::Type>( (typename stdType<T>::Type*) x->get_data_ptr(), x->get_number_of_elements(), false, true );
-    }
-
-    /**
-     * @brief Creates an Armadillo row vector from an arbitrary-dimensional hoNDArray.
-     * @param[in] x Input array.
-     * @return An Armadillo array mapped to the data pointer of the hoNDArray.
-     */
-    template<class T> const arma::Row<typename stdType<T>::Type > as_arma_row( const hoNDArray<T> *x )
-    {
-      return arma::Row<typename stdType<T>::Type>( (typename stdType<T>::Type*) x->get_data_ptr(), x->get_number_of_elements(), false, true );
-    }
-}
diff --git a/toolboxes/core/cpu/arma_math/hoNDArray_blas.cpp b/toolboxes/core/cpu/arma_math/hoNDArray_blas.cpp
deleted file mode 100644
index 47c793b..0000000
--- a/toolboxes/core/cpu/arma_math/hoNDArray_blas.cpp
+++ /dev/null
@@ -1,648 +0,0 @@
-#include "hoNDArray_blas.h"
-
-namespace Gadgetron{
-
-    template<class T> T dot( hoNDArray<T> *x, hoNDArray<T> *y, bool cc )
-    {
-        if( x == 0x0 || y == 0x0 )
-            throw std::runtime_error("Gadgetron::dot(): Invalid input array");
-
-        if( x->get_number_of_elements() != y->get_number_of_elements() )
-            throw std::runtime_error("Gadgetron::dot(): Array sizes mismatch");
-
-        arma::Col<typename stdType<T>::Type> xM = as_arma_col(x);
-        arma::Col<typename stdType<T>::Type> yM = as_arma_col(y);
-        typename stdType<T>::Type res = (cc) ? arma::cdot(xM,yM) : arma::dot(xM,yM);
-        return *((T*)(&res));
-    }
-
-    template<class T> typename realType<T>::Type asum( hoNDArray<T> *x )
-    {
-        if( x == 0x0 )
-            throw std::runtime_error("Gadgetron::asum(): Invalid input array");
-
-        typedef typename realType<T>::Type realT;
-        arma::Col<typename stdType<T>::Type> xM = as_arma_col(x);
-        return realT(arma::norm(xM,1));
-    }
-
-    template<class T> T asum( hoNDArray< std::complex<T> > *x )
-    {
-        if( x == 0x0 )
-            throw std::runtime_error("Gadgetron::asum(): Invalid input array");
-
-        return arma::norm(arma::abs(real(as_arma_col(x)))+arma::abs(imag(as_arma_col(x))),1);
-    }
-
-    template<class T> T asum( hoNDArray< complext<T> > *x )
-    {
-        if( x == 0x0 )
-            throw std::runtime_error("Gadgetron::asum(): Invalid input array");
-
-        return arma::norm(arma::abs(real(as_arma_col(x)))+arma::abs(imag(as_arma_col(x))),1);
-    }
-
-    template<class T> typename realType<T>::Type nrm2( hoNDArray<T> *x )
-    {
-        if( x == 0x0 )
-            throw std::runtime_error("Gadgetron::nrm2(): Invalid input array");
-
-        typedef typename realType<T>::Type realT;
-        arma::Col<typename stdType<T>::Type> xM = as_arma_col(x);
-        return realT(arma::norm(xM,2));
-    }
-
-    template<class T> typename realType<T>::Type nrm1( hoNDArray<T> *x )
-    {
-        if( x == 0x0 )
-            BOOST_THROW_EXCEPTION(runtime_error("Gadgetron::nrm1(): Invalid input array"));
-
-        typedef typename realType<T>::Type realT;
-        arma::Col<typename stdType<T>::Type> xM = as_arma_col(x);
-        return realT(arma::norm(xM,1));
-    }
-
-    template<class T> size_t amin( hoNDArray<T> *x )
-    {
-        if( x == 0x0 )
-            throw std::runtime_error("Gadgetron::amin(): Invalid input array");
-
-        typedef typename realType<T>::Type realT;
-        arma::Col<realT> xM = arma::abs(as_arma_col(x));
-	arma::uword idx;
-        realT min = xM.min(idx);
-        return idx;
-    }
-
-    template<class T> size_t amin( hoNDArray< std::complex<T> > *x )
-    {
-        if( x == 0x0 )
-            throw std::runtime_error("Gadgetron::amin(): Invalid input array");
-
-        arma::Col<T> xM = arma::abs(real(as_arma_col(x)))+arma::abs(imag(as_arma_col(x)));
-	arma::uword idx;
-        T min = xM.min(idx);
-        return idx;
-    }
-
-    template<class T> size_t amin( hoNDArray< complext<T> > *x )
-    {
-        if( x == 0x0 )
-            throw std::runtime_error("Gadgetron::amin(): Invalid input array");
-
-        arma::Col<T> xM = arma::abs(real(as_arma_col(x)))+arma::abs(imag(as_arma_col(x)));
-	arma::uword idx;
-        T min = xM.min(idx);
-        return idx;
-    }
-
-    template<class T> size_t amax( hoNDArray<T> *x )
-    {
-        if( x == 0x0 )
-            throw std::runtime_error("Gadgetron::amax(): Invalid input array");
-
-        typedef typename realType<T>::Type realT;
-        arma::Col<realT> xM = arma::abs(as_arma_col(x));
-	arma::uword idx;
-        realT max = xM.max(idx);
-        return idx;
-    }
-
-    template<class T> size_t amax( hoNDArray< std::complex<T> > *x )
-    {
-        if( x == 0x0 )
-            throw std::runtime_error("Gadgetron::amax(): Invalid input array");
-
-        arma::Col<T> xM = arma::abs(real(as_arma_col(x)))+arma::abs(imag(as_arma_col(x)));
-	arma::uword idx;
-        T max = xM.max(idx);
-        return idx;
-    }
-
-    template<class T> size_t amax( hoNDArray< complext<T> > *x )
-    {
-        if( x == 0x0 )
-            throw std::runtime_error("Gadgetron::amax(): Invalid input array");
-
-        arma::Col<T> xM = arma::abs(real(as_arma_col(x)))+arma::abs(imag(as_arma_col(x)));
-	arma::uword idx;
-        T max = xM.max(idx);
-        return idx;
-    }
-
-    template<class T> void axpy( T a, hoNDArray<T> *x, hoNDArray<T> *y )
-    {
-        if( x == 0x0 || y == 0x0 )
-            throw std::runtime_error("Gadgetron::axpy(): Invalid input array");
-
-        if( x->get_number_of_elements() != y->get_number_of_elements() )
-            throw std::runtime_error("Gadgetron::axpy(): Array sizes mismatch");
-
-        typedef typename stdType<T>::Type stdT;
-        arma::Col<stdT> xM = as_arma_col(x);
-        arma::Col<stdT> yM = as_arma_col(y);
-        stdT a2 = *((stdT*)(&a));
-        yM += (a2*xM);
-    }
-
-    #ifdef USE_MKL
-
-    template<> float nrm1( hoNDArray<float> *x )
-    {
-        if ( x == NULL ) return 0;
-        MKL_INT N = x->get_number_of_elements();
-        MKL_INT incx = 1;
-        return(sasum(&N, x->begin(), &incx));
-    }
-
-    template<> double nrm1( hoNDArray<double> *x )
-    {
-        if ( x == NULL ) return 0;
-        MKL_INT N = x->get_number_of_elements();
-        MKL_INT incx = 1;
-        return(dasum(&N, x->begin(), &incx));
-    }
-
-    // BLAS dotc and dotu
-    // res = conj(x) dot y
-    GT_Complex8 dotc(const hoNDArray<GT_Complex8>& x, const hoNDArray<GT_Complex8>& y)
-    {
-        if ( x.get_number_of_elements() != y.get_number_of_elements() )
-        {
-            GADGET_ERROR_MSG("dotc(x, y), inputs have differnet length ...");
-            return 0.0;
-        }
-
-        MKL_INT N = x.get_number_of_elements();
-        MKL_INT incx(1), incy(1);
-        GT_Complex8 r;
-        cdotc(reinterpret_cast<MKL_Complex8*>(&r), &N, reinterpret_cast<const MKL_Complex8*>(x.begin()), &incx, reinterpret_cast<const MKL_Complex8*>(y.begin()), &incy);
-        return r;
-    }
-
-    GT_Complex16 dotc(const hoNDArray<GT_Complex16>& x, const hoNDArray<GT_Complex16>& y)
-    {
-        if ( x.get_number_of_elements() != y.get_number_of_elements() )
-        {
-            GADGET_ERROR_MSG("dotc(x, y), inputs have differnet length ...");
-            return 0;
-        }
-
-        MKL_INT N = x.get_number_of_elements();
-        MKL_INT incx(1), incy(1);
-        GT_Complex16 r;
-        zdotc(reinterpret_cast<MKL_Complex16*>(&r), &N, reinterpret_cast<const MKL_Complex16*>(x.begin()), &incx, reinterpret_cast<const MKL_Complex16*>(y.begin()), &incy);
-        return r;
-    }
-
-    // res = x dot y
-    GT_Complex8 dotu(const hoNDArray<GT_Complex8>& x, const hoNDArray<GT_Complex8>& y)
-    {
-        if ( x.get_number_of_elements() != y.get_number_of_elements() )
-        {
-            GADGET_ERROR_MSG("dotu(x, y), inputs have differnet length ...");
-            return 0;
-        }
-
-        MKL_INT N = x.get_number_of_elements();
-        MKL_INT incx(1), incy(1);
-        GT_Complex8 r;
-        cdotu(reinterpret_cast<MKL_Complex8*>(&r), &N, reinterpret_cast<const MKL_Complex8*>(x.begin()), &incx, reinterpret_cast<const MKL_Complex8*>(y.begin()), &incy);
-        return r;
-    }
-
-    GT_Complex16 dotu(const hoNDArray<GT_Complex16>& x, const hoNDArray<GT_Complex16>& y)
-    {
-        if ( x.get_number_of_elements() != y.get_number_of_elements() )
-        {
-            GADGET_ERROR_MSG("dotu(x, y), inputs have differnet length ...");
-            return 0;
-        }
-
-        MKL_INT N = x.get_number_of_elements();
-        MKL_INT incx(1), incy(1);
-        GT_Complex16 r;
-        zdotu(reinterpret_cast<MKL_Complex16*>(&r), &N, reinterpret_cast<const MKL_Complex16*>(x.begin()), &incx, reinterpret_cast<const MKL_Complex16*>(y.begin()), &incy);
-        return r;
-    }
-
-    // other variants for axpy
-    // r = a*x+y
-    bool axpy(float a, const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& r)
-    {
-        try
-        {
-            GADGET_CHECK_RETURN_FALSE(x.get_number_of_elements()==y.get_number_of_elements());
-
-            if ( r.get_number_of_elements() != x.get_number_of_elements() )
-            {
-                r = y;
-            }
-            else
-            {
-                if ( &r != &y )
-                {
-                    memcpy(r.begin(), y.begin(), r.get_number_of_bytes());
-                }
-            }
-
-            MKL_INT N = (MKL_INT)(x.get_number_of_elements());
-            const MKL_INT incX(1), incY(1);
-
-            cblas_saxpy (N, a, x.begin(), incX, r.begin(), incY);
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors in axpy(float a, const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& r) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    bool axpy(double a, const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& r)
-    {
-        try
-        {
-            GADGET_CHECK_RETURN_FALSE(x.get_number_of_elements()==y.get_number_of_elements());
-
-            if ( r.get_number_of_elements() != x.get_number_of_elements() )
-            {
-                r = y;
-            }
-            else
-            {
-                if ( &r != &y )
-                {
-                    memcpy(r.begin(), y.begin(), r.get_number_of_bytes());
-                }
-            }
-
-            MKL_INT N = (MKL_INT)(x.get_number_of_elements());
-            const MKL_INT incX(1), incY(1);
-
-            cblas_daxpy (N, a, x.begin(), incX, r.begin(), incY);
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors in axpy(double a, const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& r) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    bool axpy(const GT_Complex8& a, const hoNDArray<GT_Complex8>& x, const hoNDArray<GT_Complex8>& y, hoNDArray<GT_Complex8>& r)
-    {
-        try
-        {
-            GADGET_CHECK_RETURN_FALSE(x.get_number_of_elements()==y.get_number_of_elements());
-
-            if ( r.get_number_of_elements() != x.get_number_of_elements() )
-            {
-                r = y;
-            }
-            else
-            {
-                if ( &r != &y )
-                {
-                    memcpy(r.begin(), y.begin(), r.get_number_of_bytes());
-                }
-            }
-
-            MKL_INT N = (MKL_INT)(x.get_number_of_elements());
-            const MKL_INT incX(1), incY(1);
-
-            cblas_caxpy (N, &a, x.begin(), incX, r.begin(), incY);
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors in axpy(const GT_Complex8& a, const hoNDArray<GT_Complex8>& x, const hoNDArray<GT_Complex8>& y, hoNDArray<GT_Complex8>& r) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    bool axpy(const GT_Complex16& a, const hoNDArray<GT_Complex16>& x, const hoNDArray<GT_Complex16>& y, hoNDArray<GT_Complex16>& r)
-    {
-        try
-        {
-            GADGET_CHECK_RETURN_FALSE(x.get_number_of_elements()==y.get_number_of_elements());
-
-            if ( r.get_number_of_elements() != x.get_number_of_elements() )
-            {
-                r = y;
-            }
-            else
-            {
-                if ( &r != &y )
-                {
-                    memcpy(r.begin(), y.begin(), r.get_number_of_bytes());
-                }
-            }
-
-            MKL_INT N = (MKL_INT)(x.get_number_of_elements());
-            const MKL_INT incX(1), incY(1);
-
-            cblas_zaxpy (N, &a, x.begin(), incX, r.begin(), incY);
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors in axpy(const GT_Complex16& a, const hoNDArray<GT_Complex16>& x, const hoNDArray<GT_Complex16>& y, hoNDArray<GT_Complex16>& r) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    // vector-scalar product
-    // r = a*x
-    bool scal(float a, hoNDArray<float>& x)
-    {
-        try
-        {
-            cblas_sscal ((MKL_INT)(x.get_number_of_elements()), a, x.begin(), 1);
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors in scal(float a, hoNDArray<float>& x) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    bool scal(double a, hoNDArray<double>& x)
-    {
-        try
-        {
-            cblas_dscal ((MKL_INT)(x.get_number_of_elements()), a, x.begin(), 1);
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors in scal(double a, hoNDArray<double>& x) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    bool scal(float a, hoNDArray<GT_Complex8>& x)
-    {
-        try
-        {
-            GT_Complex8 alpha = GT_Complex8(a);
-            cblas_cscal (x.get_number_of_elements(), &alpha, x.begin(), 1);
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors in scal(float a, hoNDArray<GT_Complex8>& x) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    bool scal(double a, hoNDArray<GT_Complex16>& x)
-    {
-        try
-        {
-            GT_Complex16 alpha = GT_Complex16(a);
-            cblas_zscal (x.get_number_of_elements(), &alpha, x.begin(), 1);
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors in scal(double a, hoNDArray<GT_Complex16>& x) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    bool scal(GT_Complex8 a, hoNDArray<GT_Complex8>& x)
-    {
-        try
-        {
-            cblas_cscal (x.get_number_of_elements(), &a, x.begin(), 1);
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors in scal(GT_Complex8 a, hoNDArray<GT_Complex8>& x) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    bool scal(GT_Complex16 a, hoNDArray<GT_Complex16>& x)
-    {
-        try
-        {
-            cblas_zscal (x.get_number_of_elements(), &a, x.begin(), 1);
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors in scal(GT_Complex16 a, hoNDArray<GT_Complex16>& x) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    // -----------------------
-
-    bool scal(float a, float*x, long long N)
-    {
-        try
-        {
-            cblas_sscal ((MKL_INT)(N), a, x, 1);
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors in scal(float a, float*x, long long N) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    bool scal(double a, double*x, long long N)
-    {
-        try
-        {
-            cblas_dscal ((MKL_INT)(N), a, x, 1);
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors in scal(double a, double*x, long long N) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    bool scal(float a, GT_Complex8*x, long long N)
-    {
-        try
-        {
-            GT_Complex8 alpha = GT_Complex8(a);
-            cblas_cscal (N, &alpha, x, 1);
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors in scal(float a, GT_Complex8*x, long long N) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    bool scal(double a, GT_Complex16*x, long long N)
-    {
-        try
-        {
-            GT_Complex16 alpha = GT_Complex16(a);
-            cblas_zscal (N, &alpha, x, 1);
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors in scal(double a, GT_Complex16*x, long long N) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    bool scal(GT_Complex8 a, GT_Complex8*x, long long N)
-    {
-        try
-        {
-            cblas_cscal (N, &a, x, 1);
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors in scal(GT_Complex8 a, GT_Complex8*x, long long N) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    bool scal(GT_Complex16 a, GT_Complex16*x, long long N)
-    {
-        try
-        {
-            cblas_zscal (N, &a, x, 1);
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors in scal(GT_Complex16 a, GT_Complex16*x, long long N) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    // sort the vector
-    // isascending: true for ascending and false for descending
-    bool sort(const hoNDArray<float>& x, hoNDArray<float>& r, bool isascending)
-    {
-        if ( &r != &x )
-        {
-            if ( r.get_number_of_elements()!=x.get_number_of_elements())
-            {
-                r = x;
-            }
-            else
-            {
-                memcpy(r.begin(), x.begin(), x.get_number_of_bytes());
-            }
-        }
-
-        if ( isascending )
-        {
-            GADGET_CHECK_RETURN_FALSE(LAPACKE_slasrt('I', r.get_number_of_elements(), r.begin())==0);
-        }
-        else
-        {
-            GADGET_CHECK_RETURN_FALSE(LAPACKE_slasrt('D', r.get_number_of_elements(), r.begin())==0);
-        }
-
-        return true;
-    }
-
-    bool sort(const hoNDArray<double>& x, hoNDArray<double>& r, bool isascending)
-    {
-        if ( &r != &x )
-        {
-            if ( r.get_number_of_elements()!=x.get_number_of_elements())
-            {
-                r = x;
-            }
-            else
-            {
-                memcpy(r.begin(), x.begin(), x.get_number_of_bytes());
-            }
-        }
-
-        if ( isascending )
-        {
-            GADGET_CHECK_RETURN_FALSE(LAPACKE_dlasrt('I', r.get_number_of_elements(), r.begin())==0);
-        }
-        else
-        {
-            GADGET_CHECK_RETURN_FALSE(LAPACKE_dlasrt('D', r.get_number_of_elements(), r.begin())==0);
-        }
-
-        return true;
-    }
-
-    #endif // USE_MKL
-
-    //
-    // Instantiation
-    //
-
-    template EXPORTCPUCOREMATH float dot<float>( hoNDArray<float>*, hoNDArray<float>*, bool );
-    template EXPORTCPUCOREMATH float asum<float>( hoNDArray<float>* );
-    template EXPORTCPUCOREMATH float nrm2<float>( hoNDArray<float>* );
-    template EXPORTCPUCOREMATH size_t amin<float>( hoNDArray<float>* );
-    template EXPORTCPUCOREMATH size_t amax<float>( hoNDArray<float>* );
-    template EXPORTCPUCOREMATH void axpy<float>( float, hoNDArray<float>*, hoNDArray<float>* );
-
-    template EXPORTCPUCOREMATH double dot<double>( hoNDArray<double>*, hoNDArray<double>*, bool );
-    template EXPORTCPUCOREMATH double asum<double>( hoNDArray<double>* );
-    template EXPORTCPUCOREMATH double nrm2<double>( hoNDArray<double>* );
-    template EXPORTCPUCOREMATH size_t amin<double>( hoNDArray<double>* );
-    template EXPORTCPUCOREMATH size_t amax<double>( hoNDArray<double>* );
-    template EXPORTCPUCOREMATH void axpy<double>( double, hoNDArray<double>*, hoNDArray<double>* );
-
-    template EXPORTCPUCOREMATH std::complex<float> dot< std::complex<float> >( hoNDArray< std::complex<float> >*, hoNDArray< std::complex<float> >*, bool );
-    template EXPORTCPUCOREMATH float asum<float>( hoNDArray< std::complex<float> >* );
-    template EXPORTCPUCOREMATH float nrm2< std::complex<float> >( hoNDArray< std::complex<float> >* );
-    template EXPORTCPUCOREMATH size_t amin<float>( hoNDArray< std::complex<float> >* );
-    template EXPORTCPUCOREMATH size_t amax<float>( hoNDArray< std::complex<float> >* );
-    template EXPORTCPUCOREMATH void axpy< std::complex<float> >( std::complex<float> , hoNDArray< std::complex<float> >*, hoNDArray< std::complex<float> >* );
-
-    template EXPORTCPUCOREMATH std::complex<double> dot< std::complex<double> >( hoNDArray< std::complex<double> >*, hoNDArray< std::complex<double> >*, bool );
-    template EXPORTCPUCOREMATH double asum<double>( hoNDArray< std::complex<double> >* );
-    template EXPORTCPUCOREMATH double nrm2< std::complex<double> >( hoNDArray< std::complex<double> >* );
-    template EXPORTCPUCOREMATH size_t amin<double>( hoNDArray< std::complex<double> >* );
-    template EXPORTCPUCOREMATH size_t amax<double>( hoNDArray< std::complex<double> >* );
-    template EXPORTCPUCOREMATH void axpy< std::complex<double> >( std::complex<double> , hoNDArray< std::complex<double> >*, hoNDArray< std::complex<double> >* );
-
-    template EXPORTCPUCOREMATH complext<float> dot< complext<float> >( hoNDArray< complext<float> >*, hoNDArray< complext<float> >*, bool );
-    template EXPORTCPUCOREMATH float asum<float>( hoNDArray< complext<float> >* );
-    template EXPORTCPUCOREMATH float nrm2< complext<float> >( hoNDArray< complext<float> >* );
-    template EXPORTCPUCOREMATH size_t amin<float>( hoNDArray< complext<float> >* );
-    template EXPORTCPUCOREMATH size_t amax<float>( hoNDArray< complext<float> >* );
-    template EXPORTCPUCOREMATH void axpy< complext<float> >( complext<float> , hoNDArray< complext<float> >*, hoNDArray< complext<float> >* );
-
-    template EXPORTCPUCOREMATH complext<double> dot< complext<double> >( hoNDArray< complext<double> >*, hoNDArray< complext<double> >*, bool );
-    template EXPORTCPUCOREMATH double asum<double>( hoNDArray< complext<double> >* );
-    template EXPORTCPUCOREMATH double nrm2< complext<double> >( hoNDArray< complext<double> >* );
-    template EXPORTCPUCOREMATH size_t amin<double>( hoNDArray< complext<double> >* );
-    template EXPORTCPUCOREMATH size_t amax<double>( hoNDArray< complext<double> >* );
-    template EXPORTCPUCOREMATH void axpy< complext<double> >( complext<double> , hoNDArray< complext<double> >*, hoNDArray< complext<double> >* );
-}
diff --git a/toolboxes/core/cpu/arma_math/hoNDArray_blas.h b/toolboxes/core/cpu/arma_math/hoNDArray_blas.h
deleted file mode 100644
index d82a12c..0000000
--- a/toolboxes/core/cpu/arma_math/hoNDArray_blas.h
+++ /dev/null
@@ -1,181 +0,0 @@
-/** \file hoNDArray_blas.h
-    \brief BLAS level-1 functions on the hoNDArray class.
-    
-    hoNDArray_blas.h provides BLAS level-1 functions on the hoNDArray class.
-    The hoNDArray is temporarily reshaped to a column vector for the respective operations.
-    The implementation is based on Armadillo.
-    This code is purposely split into a header and underlying implementation (.cpp) 
-    as this allows specific instantiation of the supported template types.     
-    The supported types are float, double, std::complex<float>, std::complex<double>, 
-    Gadgetron::complext<float>, and Gadgetron::complext<double>.
-    There are currently no amin and amax functions instantiated for complex types 
-    since Armadillo lacks an obvious method to compute the element-wise l1-norm.
-*/
-
-#pragma once
-
-#include "hoNDArray.h"
-#include "hoArmadillo.h"
-#include "complext.h"
-#include "cpucore_math_export.h"
-#include "GadgetronCommon.h"
-#include <complex>
-
-#ifdef USE_MKL
-#include "mkl.h"
-#endif // USE_MKL
-
-#ifdef GT_Complex8
-#undef GT_Complex8
-#endif // GT_Complex8
-typedef std::complex<float> GT_Complex8;
-
-#ifdef GT_Complex16
-#undef GT_Complex16
-#endif // GT_Complex16
-typedef std::complex<double> GT_Complex16;
-
-namespace Gadgetron{
-
-  /**
-   * @brief Calculates the dot product of two arrays (as vectors).
-   * @param[in] x Array 1. For complex arrays the complex conjugate of x is used.
-   * @param[in] y Array 2.
-   * @param[in] cc Specifies whether to use the complex conjugate of x (when applicable).
-   * @return The dot product of x and y
-   */
-  template<class T> EXPORTCPUCOREMATH T dot( hoNDArray<T> *x, hoNDArray<T> *y, bool cc = true );
-
-  /**
-   * @brief Calculates the sum of the l1-norms of the array entries
-   * @param[in] arr Input array
-   * @return The l1-norm of the array
-   */
-  template<class T> EXPORTCPUCOREMATH typename realType<T>::Type asum( hoNDArray<T> *x );
-
-  /**
-   * @brief Calculates the sum of the l1-norms of the array entries
-   * @param[in] arr Input array
-   * @return The l1-norm of the array
-   */
-  template<class T> EXPORTCPUCOREMATH T asum( hoNDArray< std::complex<T> > *x );
-
-  /**
-   * @brief Calculates the sum of the l1-norms of the array entries
-   * @param[in] arr Input array
-   * @return The l1-norm of the array
-   */
-  template<class T> EXPORTCPUCOREMATH T asum( hoNDArray< complext<T> > *x );
-
-  /**
-   * @brief Calculates the l2-norm of the array (as a vector)
-   * @param[in] arr Input array
-   * @return The l2-norm of the array
-   */
-  template<class T> EXPORTCPUCOREMATH typename realType<T>::Type nrm2( hoNDArray<T> *x );
-
-  /**
-   * @brief Calculates the l1-norm of the array (as a vector)
-   * @param[in] arr Input array
-   * @return The l1-norm of the array
-   */
-  template<class T> EXPORTCPUCOREMATH typename realType<T>::Type nrm1( hoNDArray<T> *x );
-
-  /**
-   * @brief Returns the index of the array element with the smallest absolute value (l1 norm)
-   * @param[in] x Input data
-   * @return The array index corresponding to the smallest element in the array (0-indexing)
-   */
-  template<class T> EXPORTCPUCOREMATH size_t amin( hoNDArray<T> *x );
- 
-  /**
-   * @brief Returns the index of the array element with the smallest absolute value (l1 norm)
-   * @param[in] x Input data
-   * @return The array index corresponding to the smallest element in the array (0-indexing)
-   */
-  template<class T> EXPORTCPUCOREMATH size_t amin( hoNDArray< std::complex<T> > *x );
-
-  /**
-   * @brief Returns the index of the array element with the smallest absolute value (l1 norm)
-   * @param[in] x Input data
-   * @return The array index corresponding to the smallest element in the array (0-indexing)
-   */
-  template<class T> EXPORTCPUCOREMATH size_t amin( hoNDArray< complext<T> > *x );
-
-  /**
-   * @brief Returns the index of the array element with the largest absolute value (l1-norm)
-   * @param[in] x Input data
-   * @return The array index corresponding to the largest element in the array (0-indexing)
-   */
-  template<class T> EXPORTCPUCOREMATH size_t amax( hoNDArray<T> *x );
-
-  /**
-   * @brief Returns the index of the array element with the largest absolute value (l1-norm)
-   * @param[in] x Input data
-   * @return The array index corresponding to the largest element in the array (0-indexing)
-   */
-  template<class T> EXPORTCPUCOREMATH size_t amax( hoNDArray< std::complex<T> > *x );
-
-  /**
-   * @brief Returns the index of the array element with the largest absolute value (l1-norm)
-   * @param[in] x Input data
-   * @return The array index corresponding to the largest element in the array (0-indexing)
-   */
-  template<class T> EXPORTCPUCOREMATH size_t amax( hoNDArray< complext<T> > *x );
-
-  /**
-   * @brief Calculates y = a*x+y in which x and y are considered as vectors
-   * @param[in] a Scalar value
-   * @param[in] x Array
-   * @param[in,out] y Array
-   */
-  template<class T> EXPORTCPUCOREMATH void axpy( T a, hoNDArray<T> *x, hoNDArray<T> *y );
-
-  /**
-   * Besides the functions calling the arma, there are some more functions directly calling the MKL routines
-   */
-
-#ifdef USE_MKL
-
-  template<> EXPORTCPUCOREMATH float nrm1( hoNDArray<float> *x );
-  template<> EXPORTCPUCOREMATH double nrm1( hoNDArray<double> *x );
-
-  // BLAS dotc and dotu
-  // res = conj(x) dot y
-  EXPORTCPUCOREMATH GT_Complex8 dotc(const hoNDArray<GT_Complex8>& x, const hoNDArray<GT_Complex8>& y);
-  EXPORTCPUCOREMATH GT_Complex16 dotc(const hoNDArray<GT_Complex16>& x, const hoNDArray<GT_Complex16>& y);
-
-  // res = x dot y
-  EXPORTCPUCOREMATH GT_Complex8 dotu(const hoNDArray<GT_Complex8>& x, const hoNDArray<GT_Complex8>& y);
-  EXPORTCPUCOREMATH GT_Complex16 dotu(const hoNDArray<GT_Complex16>& x, const hoNDArray<GT_Complex16>& y);
-
-  // other variants for axpy
-  // r = a*x+y
-  EXPORTCPUCOREMATH bool axpy(float a, const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& r);
-  EXPORTCPUCOREMATH bool axpy(double a, const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& r);
-  EXPORTCPUCOREMATH bool axpy(const GT_Complex8& a, const hoNDArray<GT_Complex8>& x, const hoNDArray<GT_Complex8>& y, hoNDArray<GT_Complex8>& r);
-  EXPORTCPUCOREMATH bool axpy(const GT_Complex16& a, const hoNDArray<GT_Complex16>& x, const hoNDArray<GT_Complex16>& y, hoNDArray<GT_Complex16>& r);
-
-  // vector-scalar product
-  // r = a*x
-  EXPORTCPUCOREMATH bool scal(float a, hoNDArray<float>& x);
-  EXPORTCPUCOREMATH bool scal(double a, hoNDArray<double>& x);
-  EXPORTCPUCOREMATH bool scal(float a, hoNDArray<GT_Complex8>& x);
-  EXPORTCPUCOREMATH bool scal(double a, hoNDArray<GT_Complex16>& x);
-  EXPORTCPUCOREMATH bool scal(GT_Complex8 a, hoNDArray<GT_Complex8>& x);
-  EXPORTCPUCOREMATH bool scal(GT_Complex16 a, hoNDArray<GT_Complex16>& x);
-
-  EXPORTCPUCOREMATH bool scal(float a, float*x, long long N);
-  EXPORTCPUCOREMATH bool scal(double a, double*x, long long N);
-  EXPORTCPUCOREMATH bool scal(float a, GT_Complex8*x, long long N);
-  EXPORTCPUCOREMATH bool scal(double a, GT_Complex16*x, long long N);
-  EXPORTCPUCOREMATH bool scal(GT_Complex8 a, GT_Complex8*x, long long N);
-  EXPORTCPUCOREMATH bool scal(GT_Complex16 a, GT_Complex16*x, long long N);
-
-  // sort the vector
-  // isascending: true for ascending and false for descending
-  EXPORTCPUCOREMATH bool sort(const hoNDArray<float>& x, hoNDArray<float>& r, bool isascending);
-  EXPORTCPUCOREMATH bool sort(const hoNDArray<double>& x, hoNDArray<double>& r, bool isascending);
-
-#endif // USE_MKL
-}
diff --git a/toolboxes/core/cpu/arma_math/hoNDArray_elemwise.cpp b/toolboxes/core/cpu/arma_math/hoNDArray_elemwise.cpp
deleted file mode 100644
index 6084f76..0000000
--- a/toolboxes/core/cpu/arma_math/hoNDArray_elemwise.cpp
+++ /dev/null
@@ -1,4810 +0,0 @@
-#include "hoNDArray_elemwise.h"
-#include "hoNDArray_operators.h"
-#include "hoNDArray_blas.h"
-#include "complext.h"
-#include "hoArmadillo.h"
-
-
-#ifdef USE_OMP
-#include <omp.h>
-#endif
-
-namespace Gadgetron{
-
-    template<class T> boost::shared_ptr< hoNDArray<typename realType<T>::Type> > abs( hoNDArray<T> *x )
-    { 
-        if( x == 0x0 )
-            throw std::runtime_error("Gadgetron::abs(): Invalid input array");
-
-        boost::shared_ptr< hoNDArray<typename realType<T>::Type> > result(new hoNDArray<typename realType<T>::Type>());
-        result->create(x->get_dimensions());
-        arma::Col<typename realType<T>::Type> aRes = as_arma_col(result.get());
-        aRes = arma::abs(as_arma_col(x));
-        return result;
-    }
-
-    template<class T> void abs_inplace( hoNDArray<T> *x )
-    { 
-        if( x == 0x0 )
-            throw std::runtime_error("Gadgetron::abs_inplace(): Invalid input array");
-
-        arma::Col<typename realType<T>::Type> aRes = as_arma_col(x);
-        aRes = arma::abs(aRes);
-    }  
-
-    template<class T> boost::shared_ptr< hoNDArray<typename realType<T>::Type> > abs_square( hoNDArray<T> *x )
-    { 
-        if( x == 0x0 )
-            throw std::runtime_error("Gadgetron::abs_square(): Invalid input array");
-
-        boost::shared_ptr< hoNDArray<typename realType<T>::Type> > result(new hoNDArray<typename realType<T>::Type>());
-        result->create(x->get_dimensions());
-        arma::Col<typename realType<T>::Type> aRes = as_arma_col(result.get());
-        aRes = arma::square(abs(as_arma_col(x)));
-        return result;
-    }
-
-    template<class T> boost::shared_ptr< hoNDArray<T> > sqrt( hoNDArray<T> *x )
-    { 
-        if( x == 0x0 )
-            throw std::runtime_error("Gadgetron::sqrt(): Invalid input array");
-
-        boost::shared_ptr< hoNDArray<T> > result(new hoNDArray<T>());
-        result->create(x->get_dimensions());
-        arma::Col<typename stdType<T>::Type> aRes = as_arma_col(result.get());
-        aRes = arma::sqrt(as_arma_col(x));
-        return result;
-    }
-
-    template<class T> void sqrt_inplace( hoNDArray<T> *x )
-    { 
-        if( x == 0x0 )
-            throw std::runtime_error("Gadgetron::sqrt_inplace(): Invalid input array");
-
-        arma::Col<typename stdType<T>::Type> aRes = as_arma_col(x);
-        aRes = arma::sqrt(aRes);
-    }
-
-    template<class T> boost::shared_ptr< hoNDArray<T> > square( hoNDArray<T> *x )
-    { 
-        if( x == 0x0 )
-            throw std::runtime_error("Gadgetron::square(): Invalid input array");
-
-        boost::shared_ptr< hoNDArray<T> > result(new hoNDArray<T>());
-        result->create(x->get_dimensions());
-        arma::Col<typename stdType<T>::Type> aRes = as_arma_col(result.get());
-        aRes = arma::square(as_arma_col(x));
-        return result;
-    }
-
-    template<class T> void square_inplace( hoNDArray<T> *x )
-    { 
-        if( x == 0x0 )
-            throw std::runtime_error("Gadgetron::square_inplace(): Invalid input array");
-
-        arma::Col<typename stdType<T>::Type> aRes = as_arma_col(x);
-        aRes = arma::square(aRes);
-    }  
-
-    template<class T> boost::shared_ptr< hoNDArray<T> > reciprocal( hoNDArray<T> *x )
-    { 
-        if( x == 0x0 )
-            throw std::runtime_error("Gadgetron::reciprocal(): Invalid input array");
-
-        arma::Col<typename stdType<T>::Type> ones(x->get_number_of_elements());
-        ones.ones();
-        boost::shared_ptr< hoNDArray<T> > result(new hoNDArray<T>());
-        result->create(x->get_dimensions());
-        arma::Col<typename stdType<T>::Type> aRes = as_arma_col(result.get());
-        aRes = ones/as_arma_col(x);
-        return result;
-    }
-
-    template<class T> void reciprocal_inplace( hoNDArray<T> *x )
-    { 
-        if( x == 0x0 )
-            throw std::runtime_error("Gadgetron::reciprocal_inplace(): Invalid input array");
-
-        arma::Col<typename stdType<T>::Type> aRes = as_arma_col(x);
-        arma::Col<typename stdType<T>::Type> ones(x->get_number_of_elements());
-        ones.ones();
-        aRes = ones/aRes;
-    }
-
-    template<class T> boost::shared_ptr< hoNDArray<T> > reciprocal_sqrt( hoNDArray<T> *x )
-    { 
-        if( x == 0x0 )
-            throw std::runtime_error("Gadgetron::reciprocal_sqrt(): Invalid input array");
-
-        arma::Col<typename stdType<T>::Type> ones(x->get_number_of_elements());
-        ones.ones();   
-        boost::shared_ptr< hoNDArray<T> > result(new hoNDArray<T>());
-        result->create(x->get_dimensions());
-        arma::Col<typename stdType<T>::Type> aRes = as_arma_col(result.get());
-        aRes = ones/arma::sqrt(as_arma_col(x));
-        return result;
-    }
-
-    template<class T> void reciprocal_sqrt_inplace( hoNDArray<T> *x )
-    { 
-        if( x == 0x0 )
-            throw std::runtime_error("Gadgetron::reciprocal_sqrt_inplace(): Invalid input array");
-
-        arma::Col<typename stdType<T>::Type> ones(x->get_number_of_elements());
-        ones.ones();
-        arma::Col<typename stdType<T>::Type> aRes = as_arma_col(x);
-        aRes = ones/arma::sqrt(aRes);
-    }
-
-    template<class T> boost::shared_ptr< hoNDArray<T> > sgn( hoNDArray<T> *x )
-    {
-        if( x == 0x0 )
-            throw std::runtime_error("Gadgetron::sgn(): Invalid input array");
-
-        boost::shared_ptr< hoNDArray<T> > res( new hoNDArray<T>() );
-        res->create(x->get_dimensions());   
-#ifdef USE_OMP
-#pragma omp parallel for
-#endif
-        for( long long i = 0; i < res->get_number_of_elements(); i++ ){
-            res->get_data_ptr()[i] = sgn(x->get_data_ptr()[i]);
-        }
-        return res;
-    }
-
-    template<class T> void sgn_inplace( hoNDArray<T> *x )
-    {
-        if( x == 0x0 )
-            throw std::runtime_error("Gadgetron::sgn_inplace(): Invalid input array");
-
-#ifdef USE_OMP
-#pragma omp parallel for
-#endif
-        for( long long i = 0; i < x->get_number_of_elements(); i++ ) 
-            x->get_data_ptr()[i] = sgn(x->get_data_ptr()[i]);
-    }
-
-    template<class T> boost::shared_ptr< hoNDArray<typename realType<T>::Type> > real( hoNDArray<T> *x )
-    {
-        if( x == 0x0 )
-            throw std::runtime_error("Gadgetron::real(): Invalid input array");
-
-        boost::shared_ptr< hoNDArray<typename realType<T>::Type> > result(new hoNDArray<typename realType<T>::Type>());
-        result->create(x->get_dimensions());
-        arma::Col<typename realType<T>::Type> aRes = as_arma_col(result.get());
-        aRes = arma::real(as_arma_col(x));
-        return result;
-    }
-
-    template<class T> boost::shared_ptr< hoNDArray<typename realType<T>::Type> > imag( hoNDArray<T> *x )
-    {
-        if( x == 0x0 )
-            throw std::runtime_error("Gadgetron::imag(): Invalid input array");
-
-        boost::shared_ptr< hoNDArray<typename realType<T>::Type> > result(new hoNDArray<typename realType<T>::Type>());
-        result->create(x->get_dimensions());
-        arma::Col<typename realType<T>::Type> aRes = as_arma_col(result.get());
-        aRes = arma::imag(as_arma_col(x));
-        return result;
-    }
-
-    template<class T> boost::shared_ptr< hoNDArray<T> > conj( hoNDArray<T> *x )
-    {
-        if( x == 0x0 )
-            throw std::runtime_error("Gadgetron::conj(): Invalid input array");
-
-        boost::shared_ptr< hoNDArray<T> > result(new hoNDArray<T>());
-        result->create(x->get_dimensions());
-        arma::Col<typename stdType<T>::Type> aRes = as_arma_col(result.get());
-        aRes = arma::conj(as_arma_col(x));
-        return result;
-    }
-
-    template<class T> boost::shared_ptr< hoNDArray<T> > real_to_complex( hoNDArray<typename realType<T>::Type> *x )
-    {
-        if( x == 0x0 )
-            BOOST_THROW_EXCEPTION(runtime_error("Gadgetron::real_to_complex(): Invalid input array"));
-
-        boost::shared_ptr< hoNDArray<T> > result(new hoNDArray<T>());
-        result->create(x->get_dimensions());
-        arma::Col<typename stdType<T>::Type> aRes = as_arma_col(result.get());
-        aRes = arma::Col<typename stdType<T>::Type>(as_arma_col(x), arma::Col<typename realType<T>::Type>(x->get_number_of_elements()).zeros());
-        return result;
-    }
-
-    template<class T> boost::shared_ptr< hoNDArray<T> > real_imag_to_complex( hoNDArray<typename realType<T>::Type>* real, hoNDArray<typename realType<T>::Type>* imag )
-    {
-        if( real==0x0 || imag==0x0 )
-            BOOST_THROW_EXCEPTION(runtime_error("Gadgetron::real_imag_to_complex(): Invalid input array"));
-
-        if( real->get_number_of_elements() != imag->get_number_of_elements() )
-            BOOST_THROW_EXCEPTION(runtime_error("Gadgetron::real_imag_to_complex(): Invalid input array"));
-
-        boost::shared_ptr< hoNDArray<T> > result(new hoNDArray<T>());
-        result->create(real->get_dimensions());
-
-        T* pRes = result->begin();
-
-        size_t N = real->get_number_of_elements();
-        for ( size_t n=0; n<N; n++ )
-        {
-            pRes[n] = T(real->at(n), imag->at(n));
-        }
-
-        return result;
-    }
-
-    template<class T> 
-    bool real_imag_to_complex(const hoNDArray<typename realType<T>::Type>& real, const hoNDArray<typename realType<T>::Type>& imag, hoNDArray<T>& cplx)
-    {
-        try
-        {
-            GADGET_CHECK_RETURN_FALSE(real.dimensions_equal(&imag));
-
-            if ( !cplx.dimensions_equal(&real) )
-            {
-                cplx.create(real.get_dimensions());
-            }
-
-            T* pRes = cplx.begin();
-            const typename realType<T>::Type* pReal = real.begin();
-            const typename realType<T>::Type* pImag = imag.begin();
-
-            size_t N = real.get_number_of_elements();
-
-            long long n;
-            #pragma omp parallel for private(n) shared(N, pRes, pReal, pImag)
-            for ( n=0; n<N; n++ )
-            {
-                pRes[n] = T(pReal[n], pImag[n]);
-            }
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors in real_imag_to_complex(...) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    template<class T> 
-    bool complex_to_real_imag(const hoNDArray<T>& cplx, hoNDArray<typename realType<T>::Type>& real, hoNDArray<typename realType<T>::Type>& imag)
-    {
-        try
-        {
-            if ( !real.dimensions_equal(&cplx) )
-            {
-                real.create(cplx.get_dimensions());
-            }
-
-            if ( !imag.dimensions_equal(&cplx) )
-            {
-                imag.create(cplx.get_dimensions());
-            }
-
-            const T* pRes = cplx.begin();
-            typename realType<T>::Type* pReal = real.begin();
-            typename realType<T>::Type* pImag = imag.begin();
-
-            size_t N = real.get_number_of_elements();
-
-            long long n;
-            #pragma omp parallel for default(none) private(n) shared(N, pRes, pReal, pImag)
-            for ( n=0; n<N; n++ )
-            {
-                pReal[n] = pRes[n].real();
-                pImag[n] = pRes[n].imag();
-            }
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors in complex_to_real_imag(...) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    template<> 
-    bool complex_to_real_imag(const hoNDArray<float>& cplx, hoNDArray<float>& real, hoNDArray<float>& imag)
-    {
-        try
-        {
-            if ( !real.dimensions_equal(&cplx) )
-            {
-                real.create(cplx.get_dimensions());
-            }
-
-            if ( !imag.dimensions_equal(&cplx) )
-            {
-                imag.create(cplx.get_dimensions());
-            }
-
-            const float* pRes = cplx.begin();
-            float* pReal = real.begin();
-            float* pImag = imag.begin();
-
-            size_t N = real.get_number_of_elements();
-
-            long long n;
-            #pragma omp parallel for default(none) private(n) shared(N, pRes, pReal, pImag)
-            for ( n=0; n<N; n++ )
-            {
-                pReal[n] = pRes[n];
-                pImag[n] = 0;
-            }
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors in complex_to_real_imag(...) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    template<> 
-    bool complex_to_real_imag(const hoNDArray<double>& cplx, hoNDArray<double>& real, hoNDArray<double>& imag)
-    {
-        try
-        {
-            if ( !real.dimensions_equal(&cplx) )
-            {
-                real.create(cplx.get_dimensions());
-            }
-
-            if ( !imag.dimensions_equal(&cplx) )
-            {
-                imag.create(cplx.get_dimensions());
-            }
-
-            const double* pRes = cplx.begin();
-            double* pReal = real.begin();
-            double* pImag = imag.begin();
-
-            size_t N = real.get_number_of_elements();
-
-            long long n;
-            #pragma omp parallel for default(none) private(n) shared(N, pRes, pReal, pImag)
-            for ( n=0; n<N; n++ )
-            {
-                pReal[n] = pRes[n];
-                pImag[n] = 0;
-            }
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors in complex_to_real_imag(...) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    template<class T> 
-    bool complex_to_real(const hoNDArray<T>& cplx, hoNDArray<typename realType<T>::Type>& real)
-    {
-        try
-        {
-            if ( !real.dimensions_equal(&cplx) )
-            {
-                real.create(cplx.get_dimensions());
-            }
-
-            const T* pRes = cplx.begin();
-            typename realType<T>::Type* pReal = real.begin();
-
-            size_t N = real.get_number_of_elements();
-
-            long long n;
-            #pragma omp parallel for default(none) private(n) shared(N, pRes, pReal)
-            for ( n=0; n<N; n++ )
-            {
-                pReal[n] = pRes[n].real();
-            }
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors in complex_to_real(...) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    template<class T> 
-    bool complex_to_imag(const hoNDArray<T>& cplx, hoNDArray<typename realType<T>::Type>& imag)
-    {
-        try
-        {
-            if ( !imag.dimensions_equal(&cplx) )
-            {
-                imag.create(cplx.get_dimensions());
-            }
-
-            const T* pRes = cplx.begin();
-            typename realType<T>::Type* pImag = imag.begin();
-
-            size_t N = imag.get_number_of_elements();
-
-            long long n;
-            #pragma omp parallel for default(none) private(n) shared(N, pRes, pImag)
-            for ( n=0; n<(long long)N; n++ )
-            {
-                pImag[n] = pRes[n].imag();
-            }
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors in complex_to_imag(...) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    template<class T> inline void clear( hoNDArray<T> *x )
-    {
-        if( x == 0x0 )
-            throw std::runtime_error("Gadgetron::clear(): Invalid input array");
-
-        if ( x->get_number_of_elements() > 0 )
-        {
-            memset( x->get_data_ptr(), 0, x->get_number_of_elements()*sizeof(T));
-        }
-    }
-
-    template<class T> inline void clear( hoNDArray<T>& x )
-    {
-        if ( x.get_number_of_elements() > 0 )
-        {
-            memset( x.get_data_ptr(), 0, x.get_number_of_elements()*sizeof(T));
-        }
-    }
-
-    template<class T> void fill( hoNDArray<T> *x, T val )
-    {
-        if( x == 0x0 )
-            throw std::runtime_error("Gadgetron::fill(): Invalid input array");
-
-        arma::Col<typename stdType<T>::Type> aRes = as_arma_col(x);
-        aRes.fill(*((typename stdType<T>::Type*)&val));
-    }
-
-    //
-    // TODO:
-    // The clamp functions could (probably) be implemented much like we use Thrust for the device versions
-    // - i.e. using Armadillo's transform on the array.
-    // However this requires a newer version of Armadillo as current Linux distributions provide...
-    //
-
-    template<typename T> struct hoNDA_clamp //: public thrust::unary_function<T,T>
-    {
-      hoNDA_clamp( T _min, T _max, T _min_val, T _max_val ) : min(_min), max(_max), min_val(_min_val), max_val(_max_val) {}
-        T operator()(const T &x) const 
-        {
-            if( x < min ) return min_val;
-            else if ( x >= max) return max_val;
-            else return x;
-        }
-      T min, max;
-      T min_val, max_val;
-    };
-
-    template<typename T> struct hoNDA_clamp< std::complex<T> > //: public thrust::unary_function< std::complex<T>, std::complex<T> >
-    {
-      hoNDA_clamp( T _min, T _max, std::complex<T> _min_val, std::complex<T> _max_val ) : min(_min), max(_max), min_val(_min_val), max_val(_max_val) {}
-        std::complex<T> operator()(const std::complex<T> &x) const 
-        {
-            if( real(x) < min ) return min_val;
-            else if ( real(x) >= max) return max_val;
-            else return std::complex<T>(real(x));
-        }
-      T min, max;
-      std::complex<T> min_val, max_val;
-    };
-
-    template<typename T> struct hoNDA_clamp< complext<T> > //: public thrust::unary_function< complext<T>, complext<T> >
-    {
-        hoNDA_clamp( T _min, T _max, complext<T> _min_val, complext<T> _max_val ) : min(_min), max(_max), min_val(_min_val), max_val(_max_val) {}
-        complext<T> operator()(const complext<T> &x) const 
-        {
-            if( real(x) < min ) return min_val;
-            else if ( real(x) >= max) return max_val;
-            else return complext<T>(real(x));
-        }
-        T min, max;
-        complext<T> min_val, max_val;
-    };
-
-    template<class T> void clamp( hoNDArray<T> *x, 
-                                  typename realType<T>::Type min, typename realType<T>::Type max, T min_val, T max_val )
-    { 
-        if( x == 0x0 )
-            throw std::runtime_error("Gadgetron::clamp(): Invalid input array");
-
-        hoNDA_clamp<T> functor(min, max, min_val, max_val);
-        std::transform(x->begin(),x->end(),x->begin(),functor);
-    }  
-
-    template<class T> void clamp( hoNDArray<T> *x, typename realType<T>::Type min, typename realType<T>::Type max )
-    {
-        clamp(x,min,max,T(min),T(max));
-    }
-
-    template<typename T> struct hoNDA_clamp_min //: public thrust::unary_function<T,T>
-    {
-        hoNDA_clamp_min( T _min ) : min(_min) {}
-        T operator()(const T &x) const 
-        {
-            if( x < min ) return min;
-            else return x;
-        }
-        T min;
-    };
-
-    template<typename T> struct hoNDA_clamp_min< std::complex<T> > //: public thrust::unary_function< std::complex<T>, std::complex<T> >
-    {
-        hoNDA_clamp_min( T _min ) : min(_min) {}
-        std::complex<T> operator()(const std::complex<T> &x) const 
-        {
-            if( real(x) < min ) return std::complex<T>(min);
-            else return std::complex<T>(real(x));
-        }
-        T min;
-    };
-
-    template<typename T> struct hoNDA_clamp_min< complext<T> > //: public thrust::unary_function< complext<T>, complext<T> >
-    {
-        hoNDA_clamp_min( T _min ) : min(_min) {}
-        complext<T> operator()(const complext<T> &x) const 
-        {
-            if( real(x) < min ) return complext<T>(min);
-            else return complext<T>(real(x));
-        }
-        T min;
-    };
-
-    template<class T> void clamp_min( hoNDArray<T> *x, typename realType<T>::Type min )
-    { 
-        if( x == 0x0 )
-            throw std::runtime_error("Gadgetron::clamp_min(): Invalid input array");
-
-        hoNDA_clamp_min<T> functor(min);
-        std::transform(x->begin(),x->end(),x->begin(),functor);
-    }  
-
-    template<typename T> struct hoNDA_clamp_max //: public thrust::unary_function<T,T>
-    {
-        hoNDA_clamp_max( T _max ) : max(_max) {}
-        T operator()(const T &x) const 
-        {
-            if( x > max ) return max;
-            else return x;
-        }
-        T max;
-    };
-
-    template<typename T> struct hoNDA_clamp_max< std::complex<T> > //: public thrust::unary_function< std::complex<T>, std::complex<T> >
-    {
-        hoNDA_clamp_max( T _max ) : max(_max) {}
-        std::complex<T> operator()(const std::complex<T> &x) const 
-        {
-            if( real(x) > max ) return std::complex<T>(max);
-            else return std::complex<T>(real(x));
-        }
-        T max;
-    };
-
-    template<typename T> struct hoNDA_clamp_max< complext<T> > //: public thrust::unary_function< complext<T>, complext<T> >
-    {
-        hoNDA_clamp_max( T _max ) : max(_max) {}
-        complext<T> operator()(const complext<T> &x) const 
-        {
-            if( real(x) > max ) return complext<T>(max);
-            else return complext<T>(real(x));
-        }
-        T max;
-    };
-
-    template<class T> void clamp_max( hoNDArray<T> *x, typename realType<T>::Type max )
-    { 
-        if( x == 0x0 )
-            throw std::runtime_error("Gadgetron::clamp_max(): Invalid input array");
-
-        hoNDA_clamp_max<T> functor(max);
-        std::transform(x->begin(),x->end(),x->begin(),functor);
-    }
-
-    template<class T> void normalize( hoNDArray<T> *x, typename realType<T>::Type val )
-    {
-        if( x == 0x0 )
-            throw std::runtime_error("Gadgetron::normalize(): Invalid input array");
-
-        size_t max_idx = amax(x);
-        T max_val_before = x->get_data_ptr()[max_idx];
-        typename realType<T>::Type scale = val/abs(max_val_before);
-        *x *= scale;
-    }
-
-    template<class T> void shrink1( hoNDArray<T> *x, typename realType<T>::Type gamma, hoNDArray<T> *out )
-    {
-        if( x == 0x0 )
-            throw std::runtime_error("Gadgetron::shrink1(): Invalid input array");
-
-        T *outPtr = (out==0x0) ? x->get_data_ptr() : out->get_data_ptr();
-
-#ifdef USE_OMP
-#pragma omp parallel for
-#endif
-        for( long long i = 0; i < x->get_number_of_elements(); i++ ) {
-            T prev = x->get_data_ptr()[i];
-            typename realType<T>::Type absPrev = abs(prev);
-            T sgnPrev = (absPrev <= typename realType<T>::Type(0)) ? T(0) : prev/absPrev;
-            outPtr[i] = sgnPrev*std::max(absPrev-gamma, typename realType<T>::Type(0));
-        } 
-    }
-
-    template<class T> void pshrink( hoNDArray<T> *x, typename realType<T>::Type gamma,typename realType<T>::Type p, hoNDArray<T> *out )
-    {
-        if( x == 0x0 )
-            throw std::runtime_error("Gadgetron::pshrink(): Invalid input array");
-
-        T *outPtr = (out==0x0) ? x->get_data_ptr() : out->get_data_ptr();
-
-#ifdef USE_OMP
-#pragma omp parallel for
-#endif
-        for( long long i = 0; i < x->get_number_of_elements(); i++ ) {
-            T prev = x->get_data_ptr()[i];
-            typename realType<T>::Type absPrev = abs(prev);
-            T sgnPrev = (absPrev <= typename realType<T>::Type(0)) ? T(0) : prev/absPrev;
-            outPtr[i] = sgnPrev*std::max(absPrev-gamma*std::pow(absPrev,p-1), typename realType<T>::Type(0));
-        }
-    }
-
-    template<class T> void shrinkd ( hoNDArray<T> *_x, hoNDArray<typename realType<T>::Type> *_s, typename realType<T>::Type gamma, hoNDArray<T> *out )
-    {
-        if( _x == 0x0  || _s == 0 )
-            throw std::runtime_error("Gadgetron::shrinkd(): Invalid input array");
-
-        T *outPtr = (out==0x0) ? _x->get_data_ptr() : out->get_data_ptr();
-
-#ifdef USE_OMP
-#pragma omp parallel for
-#endif
-        for( long long i = 0; i < _x->get_number_of_elements(); i++ ) {
-            T x = _x->get_data_ptr()[i];
-            typename realType<T>::Type s = _s->get_data_ptr()[i];
-            if (s > gamma)
-            	outPtr[i] = x/s*(s-gamma);
-            else
-            	outPtr[i] = 0;
-        } 
-    }
-
-    template<class T> void pshrinkd( hoNDArray<T> *_x, hoNDArray<typename realType<T>::Type> *_s, typename realType<T>::Type gamma,typename realType<T>::Type p, hoNDArray<T> *out )
-    {
-        if( _x == 0x0 )
-            throw std::runtime_error("Gadgetron::pshrinkd(): Invalid input array");
-
-        T *outPtr = (out==0x0) ? _x->get_data_ptr() : out->get_data_ptr();
-
-#ifdef USE_OMP
-#pragma omp parallel for
-#endif
-        for( long long i = 0; i < _x->get_number_of_elements(); i++ )
-        {
-            T x = _x->get_data_ptr()[i];
-            typename realType<T>::Type s = _s->get_data_ptr()[i];
-            outPtr[i] = x/s*std::max(s-gamma*std::pow(s,p-1),typename realType<T>::Type(0));
-        }
-    }
-
-    #ifdef USE_MKL
-
-    // ----------------------------------------------------------------------------------------
-    // float
-    // ----------------------------------------------------------------------------------------
-
-    bool add(const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& r)
-    {
-        GADGET_DEBUG_CHECK_RETURN_FALSE(x.get_number_of_elements()==y.get_number_of_elements());
-        if ( r.get_number_of_elements()!=x.get_number_of_elements())
-        {
-            r = x;
-        }
-
-        vsAdd(x.get_number_of_elements(), x.begin(), y.begin(), r.begin());
-        GADGET_CHECK_RETURN_FALSE(vmlGetErrStatus()==0);
-
-        return true;
-    }
-
-    bool subtract(const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& r)
-    {
-        GADGET_DEBUG_CHECK_RETURN_FALSE(x.get_number_of_elements()==y.get_number_of_elements());
-        if ( r.get_number_of_elements()!=x.get_number_of_elements())
-        {
-            r = x;
-        }
-
-        vsSub(x.get_number_of_elements(), x.begin(), y.begin(), r.begin());
-        GADGET_CHECK_RETURN_FALSE(vmlGetErrStatus()==0);
-
-        return true;
-    }
-
-    bool multiply(const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& r)
-    {
-        GADGET_DEBUG_CHECK_RETURN_FALSE(x.get_number_of_elements()==y.get_number_of_elements());
-        if ( r.get_number_of_elements()!=x.get_number_of_elements())
-        {
-            r = x;
-        }
-
-        vsMul(x.get_number_of_elements(), x.begin(), y.begin(), r.begin());
-        GADGET_CHECK_RETURN_FALSE(vmlGetErrStatus()==0);
-
-        return true;
-    }
-
-    bool divide(const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& r)
-    {
-        GADGET_DEBUG_CHECK_RETURN_FALSE(x.get_number_of_elements()==y.get_number_of_elements());
-        if ( r.get_number_of_elements()!=x.get_number_of_elements())
-        {
-            r = x;
-        }
-
-        vsDiv(x.get_number_of_elements(), x.begin(), y.begin(), r.begin());
-        GADGET_CHECK_RETURN_FALSE(vmlGetErrStatus()==0);
-
-        return true;
-    }
-
-    bool absolute(const hoNDArray<float>& x, hoNDArray<float>& r)
-    {
-        if ( r.get_number_of_elements()!=x.get_number_of_elements())
-        {
-            r = x;
-        }
-
-        vsAbs(x.get_number_of_elements(), x.begin(), r.begin());
-        GADGET_CHECK_RETURN_FALSE(vmlGetErrStatus()==0);
-
-        return true;
-    }
-
-    bool argument(const hoNDArray<float>& x, hoNDArray<float>& r)
-    {
-        if ( r.get_number_of_elements()!=x.get_number_of_elements())
-        {
-            r = x;
-        }
-
-        memset(r.begin(), 0, r.get_number_of_bytes());
-
-        return true;
-    }
-
-    bool sqrt(const hoNDArray<float>& x, hoNDArray<float>& r)
-    {
-        if ( r.get_number_of_elements()!=x.get_number_of_elements())
-        {
-            r = x;
-        }
-
-        vsSqrt(x.get_number_of_elements(), x.begin(), r.begin());
-        GADGET_CHECK_RETURN_FALSE(vmlGetErrStatus()==0);
-
-        return true;
-    }
-
-    bool minAbsolute(const hoNDArray<float>& x, float& r, size_t& ind)
-    {
-        try
-        {
-            MKL_INT n = x.get_number_of_elements();
-            MKL_INT incx = 1;
-            ind = (size_t)(isamin(&n, x.begin(), &incx));
-            r = x.at(ind);
-        }
-        catch(...)
-        {
-            return false;
-        }
-
-        return true;
-    }
-
-    bool maxAbsolute(const hoNDArray<float>& x, float& r, size_t& ind)
-    {
-        try
-        {
-            MKL_INT n = x.get_number_of_elements();
-            MKL_INT incx = 1;
-            ind = (size_t)(isamax(&n, x.begin(), &incx));
-            r = x.at(ind);
-        }
-        catch(...)
-        {
-            return false;
-        }
-
-        return true;
-    }
-
-    bool addEpsilon(hoNDArray<float>& x)
-    {
-        try
-        {
-            size_t n = x.get_number_of_elements();
-            float* pX = x.begin();
-
-            long long i;
-
-            #pragma omp parallel for default(none) private(i) shared(n, pX)
-            for (i=0; i<(long long)n; i++ )
-            {
-                if ( GT_ABS(pX[i]) < FLT_EPSILON )
-                {
-                    pX[i] += GT_SGN(pX[i])*FLT_EPSILON;
-                }
-            }
-        }
-        catch(...)
-        {
-            return false;
-        }
-
-        return true;
-    }
-
-    bool norm2(const hoNDArray<float>& x, float& r)
-    {
-        try
-        {
-            MKL_INT incx = 1;
-            MKL_INT n = x.get_number_of_elements();
-            r = snrm2(&n, x.begin(), &incx);
-        }
-        catch(...)
-        {
-            return false;
-        }
-
-        return true;
-    }
-
-    bool norm1(const hoNDArray<float>& x, float& r)
-    {
-        try
-        {
-            MKL_INT incx = 1;
-            MKL_INT n = x.get_number_of_elements();
-            r = sasum(&n, x.begin(), &incx);
-        }
-        catch(...)
-        {
-            return false;
-        }
-
-        return true;
-    }
-
-    bool conv2(const hoNDArray<float>& x, const hoNDArray<float>& ker, hoNDArray<float>& z)
-    {
-        try
-        {
-            if ( !z.dimensions_equal(&x) )
-            {
-                z = x;
-            }
-
-            size_t RO = x.get_size(0);
-            size_t E1 = x.get_size(1);
-
-            size_t kerRO = ker.get_size(0);
-            size_t kerE1 = ker.get_size(1);
-
-            size_t num = x.get_number_of_elements()/(RO*E1);
-
-            int status;
-            VSLConvTaskPtr task;
-
-            MKL_INT kerShape[2];
-            kerShape[0] = kerRO; kerShape[1] = kerE1;
-
-            MKL_INT xshape[2];
-            xshape[0] = RO; xshape[1] = E1;
-
-            MKL_INT start[2];
-            start[0] = kerRO/2;
-            start[1] = kerE1/2;
-
-            MKL_INT kerStride[2], xstride[2], zstride[2];
-            kerStride[0] = 1; kerStride[1] = kerRO;
-            xstride[0] = 1; xstride[1] = RO;
-            zstride[0] = 1; zstride[1] = RO;
-
-            const float* pX = x.begin();
-            const float* pKer = ker.begin();
-            float* pZ = z.begin();
-
-            if ( num == 1 )
-            {
-                status = vslsConvNewTask(&task, VSL_CONV_MODE_AUTO, 2, kerShape, xshape, xshape);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                status = vslConvSetStart(task, start);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                status = vslsConvExec(task, pKer, kerStride, pX, xstride, pZ, zstride);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                 vslConvDeleteTask(&task);
-            }
-            else
-            {
-                status = vslsConvNewTaskX(&task, VSL_CONV_MODE_AUTO, 2, kerShape, xshape, xshape, pKer, kerStride);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                status = vslConvSetStart(task, start);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                long long n;
-
-                #pragma omp parallel for default(none) private(n) shared(num, task, pX, RO, E1, status, xstride, pZ, zstride)
-                for ( n=0; n<(long long)num; n++ )
-                {
-                    status = vslsConvExecX(task, pX+n*RO*E1, xstride, pZ+n*RO*E1, zstride);
-                }
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                vslConvDeleteTask(&task);
-            }
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors happened in conv2(const hoNDArray<float>& x, const hoNDArray<float>& ker, hoNDArray<float>& z) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    bool conv3(const hoNDArray<float>& x, const hoNDArray<float>& ker, hoNDArray<float>& z)
-    {
-        try
-        {
-            if ( !z.dimensions_equal(&x) )
-            {
-                z = x;
-            }
-
-            size_t RO = x.get_size(0);
-            size_t E1 = x.get_size(1);
-            size_t E2 = x.get_size(2);
-
-            size_t kerRO = ker.get_size(0);
-            size_t kerE1 = ker.get_size(1);
-            size_t kerE2 = ker.get_size(2);
-
-            size_t num = x.get_number_of_elements()/(RO*E1*E2);
-
-            int status;
-            VSLConvTaskPtr task;
-
-            MKL_INT kerShape[3];
-            kerShape[0] = kerRO; kerShape[1] = kerE1; kerShape[2] = kerE2;
-
-            MKL_INT xshape[3];
-            xshape[0] = RO; xshape[1] = E1; xshape[2] = E2;
-
-            MKL_INT start[3];
-            start[0] = kerRO/2;
-            start[1] = kerE1/2;
-            start[2] = kerE2/2;
-
-            MKL_INT kerStride[3], xstride[3], zstride[3];
-            kerStride[0] = 1; kerStride[1] = kerRO; kerStride[2] = kerRO*kerE1;
-            xstride[0] = 1; xstride[1] = RO; xstride[2] = RO*E1;
-            zstride[0] = 1; zstride[1] = RO; zstride[2] = RO*E1;
-
-            const float* pX = x.begin();
-            const float* pKer = ker.begin();
-            float* pZ = z.begin();
-
-            if ( num == 1 )
-            {
-                status = vslsConvNewTask(&task, VSL_CONV_MODE_AUTO, 3, kerShape, xshape, xshape);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                status = vslConvSetStart(task, start);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                status = vslsConvExec(task, pKer, kerStride, pX, xstride, pZ, zstride);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                 vslConvDeleteTask(&task);
-            }
-            else
-            {
-                status = vslsConvNewTaskX(&task, VSL_CONV_MODE_AUTO, 3, kerShape, xshape, xshape, pKer, kerStride);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                status = vslConvSetStart(task, start);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                long long n;
-
-                #pragma omp parallel for default(none) private(n) shared(num, task, pX, RO, E1, E2, status, xstride, pZ, zstride)
-                for ( n=0; n<(long long)num; n++ )
-                {
-                    status = vslsConvExecX(task, pX+n*RO*E1*E2, xstride, pZ+n*RO*E1*E2, zstride);
-                }
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                vslConvDeleteTask(&task);
-            }
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors happened in conv3(const hoNDArray<float>& x, const hoNDArray<float>& ker, hoNDArray<float>& z) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    bool inv(const hoNDArray<float>& x, hoNDArray<float>& r)
-    {
-        try
-        {
-            if ( !r.dimensions_equal(&x) )
-            {
-                r = x;
-            }
-
-            long long n = x.get_number_of_elements();
-            vsInv(n, x.begin(), r.begin());
-            GADGET_CHECK_RETURN_FALSE(vmlGetErrStatus()==0);
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors happened in inv(const hoNDArray<float>& x, hoNDArray<float>& r) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    // ----------------------------------------------------------------------------------------
-    // double
-    // ----------------------------------------------------------------------------------------
-
-    bool add(const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& r)
-    {
-        GADGET_DEBUG_CHECK_RETURN_FALSE(x.get_number_of_elements()==y.get_number_of_elements());
-        if ( r.get_number_of_elements()!=x.get_number_of_elements())
-        {
-            r = x;
-        }
-
-        vdAdd(x.get_number_of_elements(), x.begin(), y.begin(), r.begin());
-        GADGET_CHECK_RETURN_FALSE(vmlGetErrStatus()==0);
-
-        return true;
-    }
-
-    bool subtract(const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& r)
-    {
-        GADGET_DEBUG_CHECK_RETURN_FALSE(x.get_number_of_elements()==y.get_number_of_elements());
-        if ( r.get_number_of_elements()!=x.get_number_of_elements())
-        {
-            r = x;
-        }
-
-        vdSub(x.get_number_of_elements(), x.begin(), y.begin(), r.begin());
-        GADGET_CHECK_RETURN_FALSE(vmlGetErrStatus()==0);
-
-        return true;
-    }
-
-    bool multiply(const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& r)
-    {
-        GADGET_DEBUG_CHECK_RETURN_FALSE(x.get_number_of_elements()==y.get_number_of_elements());
-        if ( r.get_number_of_elements()!=x.get_number_of_elements())
-        {
-            r = x;
-        }
-
-        vdMul(x.get_number_of_elements(), x.begin(), y.begin(), r.begin());
-        GADGET_CHECK_RETURN_FALSE(vmlGetErrStatus()==0);
-
-        return true;
-    }
-
-    bool divide(const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& r)
-    {
-        GADGET_DEBUG_CHECK_RETURN_FALSE(x.get_number_of_elements()==y.get_number_of_elements());
-        if ( r.get_number_of_elements()!=x.get_number_of_elements())
-        {
-            r = x;
-        }
-
-        vdDiv(x.get_number_of_elements(), x.begin(), y.begin(), r.begin());
-        GADGET_CHECK_RETURN_FALSE(vmlGetErrStatus()==0);
-
-        return true;
-    }
-
-    bool absolute(const hoNDArray<double>& x, hoNDArray<double>& r)
-    {
-        if ( r.get_number_of_elements()!=x.get_number_of_elements())
-        {
-            r = x;
-        }
-
-        vdAbs(x.get_number_of_elements(), x.begin(), r.begin());
-        GADGET_CHECK_RETURN_FALSE(vmlGetErrStatus()==0);
-
-        return true;
-    }
-
-    bool argument(const hoNDArray<double>& x, hoNDArray<double>& r)
-    {
-        if ( r.get_number_of_elements()!=x.get_number_of_elements())
-        {
-            r = x;
-        }
-
-        memset(r.begin(), 0, r.get_number_of_bytes());
-
-        return true;
-    }
-
-    bool sqrt(const hoNDArray<double>& x, hoNDArray<double>& r)
-    {
-        if ( r.get_number_of_elements()!=x.get_number_of_elements())
-        {
-            r = x;
-        }
-
-        vdSqrt(x.get_number_of_elements(), x.begin(), r.begin());
-        GADGET_CHECK_RETURN_FALSE(vmlGetErrStatus()==0);
-
-        return true;
-    }
-
-    bool minAbsolute(const hoNDArray<double>& x, double& r, size_t& ind)
-    {
-        try
-        {
-            MKL_INT n = x.get_number_of_elements();
-            MKL_INT incx = 1;
-            ind = (size_t)(idamin(&n, x.begin(), &incx));
-            r = x.at(ind);
-        }
-        catch(...)
-        {
-            return false;
-        }
-
-        return true;
-    }
-
-    bool maxAbsolute(const hoNDArray<double>& x, double& r, size_t& ind)
-    {
-        try
-        {
-            MKL_INT n = x.get_number_of_elements();
-            MKL_INT incx = 1;
-            ind = (size_t)(idamax(&n, x.begin(), &incx));
-            r = x.at(ind);
-        }
-        catch(...)
-        {
-            return false;
-        }
-
-        return true;
-    }
-
-    bool addEpsilon(hoNDArray<double>& x)
-    {
-        try
-        {
-            size_t n = x.get_number_of_elements();
-            double* pX = x.begin();
-
-            long long i;
-
-            #pragma omp parallel for default(none) private(i) shared(n, pX)
-            for (i=0; i<(long long)n; i++ )
-            {
-                if ( GT_ABS(pX[i]) < DBL_EPSILON )
-                {
-                    pX[i] += GT_SGN(pX[i])*DBL_EPSILON;
-                }
-            }
-        }
-        catch(...)
-        {
-            return false;
-        }
-
-        return true;
-    }
-
-    bool norm2(const hoNDArray<double>& x, double& r)
-    {
-        try
-        {
-            MKL_INT incx = 1;
-            MKL_INT n = x.get_number_of_elements();
-            r = dnrm2(&n, x.begin(), &incx);
-        }
-        catch(...)
-        {
-            return false;
-        }
-
-        return true;
-    }
-
-    bool norm1(const hoNDArray<double>& x, double& r)
-    {
-        try
-        {
-            MKL_INT incx = 1;
-            MKL_INT n = x.get_number_of_elements();
-            r = dasum(&n, x.begin(), &incx);
-        }
-        catch(...)
-        {
-            return false;
-        }
-
-        return true;
-    }
-
-    bool conv2(const hoNDArray<double>& x, const hoNDArray<double>& ker, hoNDArray<double>& z)
-    {
-        try
-        {
-            if ( !z.dimensions_equal(&x) )
-            {
-                z = x;
-            }
-
-            size_t RO = x.get_size(0);
-            size_t E1 = x.get_size(1);
-
-            size_t kerRO = ker.get_size(0);
-            size_t kerE1 = ker.get_size(1);
-
-            size_t num = x.get_number_of_elements()/(RO*E1);
-
-            int status;
-            VSLConvTaskPtr task;
-
-            MKL_INT kerShape[2];
-            kerShape[0] = kerRO; kerShape[1] = kerE1;
-
-            MKL_INT xshape[2];
-            xshape[0] = RO; xshape[1] = E1;
-
-            MKL_INT start[2];
-            start[0] = kerRO/2;
-            start[1] = kerE1/2;
-
-            MKL_INT kerStride[2], xstride[2], zstride[2];
-            kerStride[0] = 1; kerStride[1] = kerRO;
-            xstride[0] = 1; xstride[1] = RO;
-            zstride[0] = 1; zstride[1] = RO;
-
-            const double* pX = x.begin();
-            const double* pKer = ker.begin();
-            double* pZ = z.begin();
-
-            if ( num == 1 )
-            {
-                status = vsldConvNewTask(&task, VSL_CONV_MODE_AUTO, 2, kerShape, xshape, xshape);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                status = vslConvSetStart(task, start);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                status = vsldConvExec(task, pKer, kerStride, pX, xstride, pZ, zstride);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                 vslConvDeleteTask(&task);
-            }
-            else
-            {
-                status = vsldConvNewTaskX(&task, VSL_CONV_MODE_AUTO, 2, kerShape, xshape, xshape, pKer, kerStride);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                status = vslConvSetStart(task, start);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                long long n;
-
-                #pragma omp parallel for default(none) private(n) shared(num, task, pX, RO, E1, status, xstride, pZ, zstride)
-                for ( n=0; n<(long long)num; n++ )
-                {
-                    status = vsldConvExecX(task, pX+n*RO*E1, xstride, pZ+n*RO*E1, zstride);
-                }
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                vslConvDeleteTask(&task);
-            }
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors happened in conv2(const hoNDArray<double>& x, const hoNDArray<double>& ker, hoNDArray<double>& z) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    bool conv3(const hoNDArray<double>& x, const hoNDArray<double>& ker, hoNDArray<double>& z)
-    {
-        try
-        {
-            if ( !z.dimensions_equal(&x) )
-            {
-                z = x;
-            }
-
-            size_t RO = x.get_size(0);
-            size_t E1 = x.get_size(1);
-            size_t E2 = x.get_size(2);
-
-            size_t kerRO = ker.get_size(0);
-            size_t kerE1 = ker.get_size(1);
-            size_t kerE2 = ker.get_size(2);
-
-            size_t num = x.get_number_of_elements()/(RO*E1*E2);
-
-            int status;
-            VSLConvTaskPtr task;
-
-            MKL_INT kerShape[3];
-            kerShape[0] = kerRO; kerShape[1] = kerE1; kerShape[2] = kerE2;
-
-            MKL_INT xshape[3];
-            xshape[0] = RO; xshape[1] = E1; xshape[2] = E2;
-
-            MKL_INT start[3];
-            start[0] = kerRO/2;
-            start[1] = kerE1/2;
-            start[2] = kerE2/2;
-
-            MKL_INT kerStride[3], xstride[3], zstride[3];
-            kerStride[0] = 1; kerStride[1] = kerRO; kerStride[2] = kerRO*kerE1;
-            xstride[0] = 1; xstride[1] = RO; xstride[2] = RO*E1;
-            zstride[0] = 1; zstride[1] = RO; zstride[2] = RO*E1;
-
-            const double* pX = x.begin();
-            const double* pKer = ker.begin();
-            double* pZ = z.begin();
-
-            if ( num == 1 )
-            {
-                status = vsldConvNewTask(&task, VSL_CONV_MODE_AUTO, 3, kerShape, xshape, xshape);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                status = vslConvSetStart(task, start);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                status = vsldConvExec(task, pKer, kerStride, pX, xstride, pZ, zstride);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                 vslConvDeleteTask(&task);
-            }
-            else
-            {
-                status = vsldConvNewTaskX(&task, VSL_CONV_MODE_AUTO, 3, kerShape, xshape, xshape, pKer, kerStride);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                status = vslConvSetStart(task, start);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                long long n;
-
-                #pragma omp parallel for default(none) private(n) shared(num, task, pX, RO, E1, E2, status, xstride, pZ, zstride)
-                for ( n=0; n<(long long)num; n++ )
-                {
-                    status = vsldConvExecX(task, pX+n*RO*E1*E2, xstride, pZ+n*RO*E1*E2, zstride);
-                }
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                vslConvDeleteTask(&task);
-            }
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors happened in conv3(const hoNDArray<double>& x, const hoNDArray<double>& ker, hoNDArray<double>& z) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    bool inv(const hoNDArray<double>& x, hoNDArray<double>& r)
-    {
-        try
-        {
-            if ( !r.dimensions_equal(&x) )
-            {
-                r = x;
-            }
-
-            long long n = x.get_number_of_elements();
-            vdInv(n, x.begin(), r.begin());
-            GADGET_CHECK_RETURN_FALSE(vmlGetErrStatus()==0);
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors happened in inv(const hoNDArray<double>& x, hoNDArray<double>& r) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    // ----------------------------------------------------------------------------------------
-    // GT_Complex8
-    // ----------------------------------------------------------------------------------------
-
-    bool add(const hoNDArray<GT_Complex8>& x, const hoNDArray<GT_Complex8>& y, hoNDArray<GT_Complex8>& r)
-    {
-        GADGET_DEBUG_CHECK_RETURN_FALSE(x.get_number_of_elements()==y.get_number_of_elements());
-        if ( r.get_number_of_elements()!=x.get_number_of_elements())
-        {
-            r = x;
-        }
-
-        vcAdd(x.get_number_of_elements(), reinterpret_cast<const MKL_Complex8*>(x.begin()), reinterpret_cast<const MKL_Complex8*>(y.begin()), reinterpret_cast<MKL_Complex8*>(r.begin()));
-        GADGET_CHECK_RETURN_FALSE(vmlGetErrStatus()==0);
-
-        return true;
-    }
-
-    bool subtract(const hoNDArray<GT_Complex8>& x, const hoNDArray<GT_Complex8>& y, hoNDArray<GT_Complex8>& r)
-    {
-        GADGET_DEBUG_CHECK_RETURN_FALSE(x.get_number_of_elements()==y.get_number_of_elements());
-        if ( r.get_number_of_elements()!=x.get_number_of_elements())
-        {
-            r = x;
-        }
-
-        vcSub(x.get_number_of_elements(), reinterpret_cast<const MKL_Complex8*>(x.begin()), reinterpret_cast<const MKL_Complex8*>(y.begin()), reinterpret_cast<MKL_Complex8*>(r.begin()));
-        GADGET_CHECK_RETURN_FALSE(vmlGetErrStatus()==0);
-
-        return true;
-    }
-
-    bool multiply(const hoNDArray<GT_Complex8>& x, const hoNDArray<GT_Complex8>& y, hoNDArray<GT_Complex8>& r)
-    {
-        GADGET_DEBUG_CHECK_RETURN_FALSE(x.get_number_of_elements()==y.get_number_of_elements());
-        if ( r.get_number_of_elements()!=x.get_number_of_elements())
-        {
-            r = x;
-        }
-
-        vcMul(x.get_number_of_elements(), reinterpret_cast<const MKL_Complex8*>(x.begin()), reinterpret_cast<const MKL_Complex8*>(y.begin()), reinterpret_cast<MKL_Complex8*>(r.begin()));
-        GADGET_CHECK_RETURN_FALSE(vmlGetErrStatus()==0);
-
-        return true;
-    }
-
-    bool divide(const hoNDArray<GT_Complex8>& x, const hoNDArray<GT_Complex8>& y, hoNDArray<GT_Complex8>& r)
-    {
-        GADGET_DEBUG_CHECK_RETURN_FALSE(x.get_number_of_elements()==y.get_number_of_elements());
-        if ( r.get_number_of_elements()!=x.get_number_of_elements())
-        {
-            r = x;
-        }
-
-        vcDiv(x.get_number_of_elements(), reinterpret_cast<const MKL_Complex8*>(x.begin()), reinterpret_cast<const MKL_Complex8*>(y.begin()), reinterpret_cast<MKL_Complex8*>(r.begin()));
-        GADGET_CHECK_RETURN_FALSE(vmlGetErrStatus()==0);
-
-        return true;
-    }
-
-    bool absolute(const hoNDArray<GT_Complex8>& x, hoNDArray<float>& r)
-    {
-        if ( r.get_number_of_elements()!=x.get_number_of_elements())
-        {
-            r.create(x.get_dimensions());
-        }
-
-        vcAbs(x.get_number_of_elements(), reinterpret_cast<const MKL_Complex8*>(x.begin()), r.begin());
-        GADGET_CHECK_RETURN_FALSE(vmlGetErrStatus()==0);
-
-        return true;
-    }
-
-    bool absolute(const hoNDArray<GT_Complex8>& x, hoNDArray<GT_Complex8>& r)
-    {
-        if ( r.get_number_of_elements()!=x.get_number_of_elements())
-        {
-            r.create(x.get_dimensions());
-        }
-
-        hoNDArray<float> rTmp;
-        rTmp.create(x.get_dimensions());
-
-        vcAbs(x.get_number_of_elements(), reinterpret_cast<const MKL_Complex8*>(x.begin()), rTmp.begin());
-        GADGET_CHECK_RETURN_FALSE(vmlGetErrStatus()==0);
-
-        //GADGET_CHECK_RETURN_FALSE(r.copyFrom(rTmp));
-	r.copyFrom(rTmp);
-
-        return true;
-    }
-
-    bool sqrt(const hoNDArray<GT_Complex8>& x, hoNDArray<GT_Complex8>& r)
-    {
-        if ( r.get_number_of_elements()!=x.get_number_of_elements())
-        {
-            r.create(x.get_dimensions());
-        }
-
-        vcSqrt(x.get_number_of_elements(), reinterpret_cast<const MKL_Complex8*>(x.begin()), reinterpret_cast<MKL_Complex8*>(r.begin()));
-        GADGET_CHECK_RETURN_FALSE(vmlGetErrStatus()==0);
-
-        return true;
-    }
-
-    bool minAbsolute(const hoNDArray<GT_Complex8>& x, GT_Complex8& r, size_t& ind)
-    {
-        try
-        {
-            MKL_INT n = x.get_number_of_elements();
-            MKL_INT incx = 1;
-            ind = (size_t)(icamin(&n, reinterpret_cast<const MKL_Complex8*>(x.begin()), &incx));
-            r = x.at(ind);
-        }
-        catch(...)
-        {
-            return false;
-        }
-
-        return true;
-    }
-
-    bool maxAbsolute(const hoNDArray<GT_Complex8>& x, GT_Complex8& r, size_t& ind)
-    {
-        try
-        {
-            MKL_INT n = x.get_number_of_elements();
-            MKL_INT incx = 1;
-            ind = (size_t)(icamax(&n, reinterpret_cast<const MKL_Complex8*>(x.begin()), &incx));
-            r = x.at(ind);
-        }
-        catch(...)
-        {
-            return false;
-        }
-
-        return true;
-    }
-
-    bool multiplyConj(const hoNDArray<GT_Complex8>& x, const hoNDArray<GT_Complex8>& y, hoNDArray<GT_Complex8>& r)
-    {
-        GADGET_DEBUG_CHECK_RETURN_FALSE(x.get_number_of_elements()==y.get_number_of_elements());
-        if ( r.get_number_of_elements()!=x.get_number_of_elements())
-        {
-            r = x;
-        }
-
-        vcMulByConj(x.get_number_of_elements(), reinterpret_cast<const MKL_Complex8*>(x.begin()), reinterpret_cast<const MKL_Complex8*>(y.begin()), reinterpret_cast<MKL_Complex8*>(r.begin()));
-        GADGET_CHECK_RETURN_FALSE(vmlGetErrStatus()==0);
-
-        return true;
-    }
-
-    bool argument(const hoNDArray<GT_Complex8>& x, hoNDArray<float>& r)
-    {
-        if ( r.get_number_of_elements()!=x.get_number_of_elements())
-        {
-            r.create(x.get_dimensions());
-        }
-
-        vcArg(x.get_number_of_elements(), reinterpret_cast<const MKL_Complex8*>(x.begin()), r.begin());
-        GADGET_CHECK_RETURN_FALSE(vmlGetErrStatus()==0);
-
-        return true;
-    }
-
-    bool conjugate(const hoNDArray<GT_Complex8>& x, hoNDArray<GT_Complex8>& r)
-    {
-        if ( r.get_number_of_elements()!=x.get_number_of_elements())
-        {
-            r.create(x.get_dimensions());
-        }
-
-        vcConj(x.get_number_of_elements(), reinterpret_cast<const MKL_Complex8*>(x.begin()), reinterpret_cast<MKL_Complex8*>(r.begin()));
-        GADGET_CHECK_RETURN_FALSE(vmlGetErrStatus()==0);
-
-        return true;
-    }
-
-    bool addEpsilon(hoNDArray<GT_Complex8>& x)
-    {
-        try
-        {
-            size_t n = x.get_number_of_elements();
-            GT_Complex8* pX = x.begin();
-
-            long long i;
-
-            #pragma omp parallel for default(none) private(i) shared(n, pX)
-            for (i=0; i<(long long)n; i++ )
-            {
-                if ( std::abs(pX[i]) < FLT_EPSILON )
-                {
-                    pX[i] += FLT_EPSILON;
-                }
-            }
-        }
-        catch(...)
-        {
-            return false;
-        }
-
-        return true;
-    }
-
-    bool norm2(const hoNDArray<GT_Complex8>& x, float& r)
-    {
-        try
-        {
-            MKL_INT incx = 1;
-            MKL_INT n = x.get_number_of_elements();
-            r = scnrm2(&n, reinterpret_cast<const MKL_Complex8*>(x.begin()), &incx);
-        }
-        catch(...)
-        {
-            return false;
-        }
-
-        return true;
-    }
-
-    bool norm1(const hoNDArray<GT_Complex8>& x, float& r)
-    {
-        try
-        {
-            hoNDArray<float> a;
-            GADGET_CHECK_RETURN_FALSE(absolute(x, a));
-            GADGET_CHECK_RETURN_FALSE(norm1(a, r));
-        }
-        catch(...)
-        {
-            return false;
-        }
-
-        return true;
-    }
-
-    bool dotc(const hoNDArray<GT_Complex8>& x, const hoNDArray<GT_Complex8>& y, GT_Complex8& r)
-    {
-        try
-        {
-            GADGET_DEBUG_CHECK_RETURN_FALSE(x.get_number_of_elements()==y.get_number_of_elements());
-
-            MKL_INT N = x.get_number_of_elements();
-            MKL_INT incx(1), incy(1);
-            cdotc(reinterpret_cast<MKL_Complex8*>(&r), &N, reinterpret_cast<const MKL_Complex8*>(x.begin()), &incx, 
-                    reinterpret_cast<const MKL_Complex8*>(y.begin()), &incy);
-        }
-        catch(...)
-        {
-            return false;
-        }
-
-        return true;
-    }
-
-    bool conv2(const hoNDArray<GT_Complex8>& x, const hoNDArray<GT_Complex8>& ker, hoNDArray<GT_Complex8>& z)
-    {
-        try
-        {
-            if ( !z.dimensions_equal(&x) )
-            {
-                z = x;
-            }
-
-            size_t RO = x.get_size(0);
-            size_t E1 = x.get_size(1);
-
-            size_t kerRO = ker.get_size(0);
-            size_t kerE1 = ker.get_size(1);
-
-            size_t num = x.get_number_of_elements()/(RO*E1);
-
-            int status;
-            VSLConvTaskPtr task;
-
-            MKL_INT kerShape[2];
-            kerShape[0] = kerRO; kerShape[1] = kerE1;
-
-            MKL_INT xshape[2];
-            xshape[0] = RO; xshape[1] = E1;
-
-            MKL_INT start[2];
-            start[0] = kerRO/2;
-            start[1] = kerE1/2;
-
-            MKL_INT kerStride[2], xstride[2], zstride[2];
-            kerStride[0] = 1; kerStride[1] = kerRO;
-            xstride[0] = 1; xstride[1] = RO;
-            zstride[0] = 1; zstride[1] = RO;
-
-            const MKL_Complex8* pX = reinterpret_cast<const MKL_Complex8*>(x.begin());
-            const MKL_Complex8* pKer = reinterpret_cast<const MKL_Complex8*>(ker.begin());
-            MKL_Complex8* pZ = reinterpret_cast<MKL_Complex8*>(z.begin());
-
-            if ( num == 1 )
-            {
-                status = vslcConvNewTask(&task, VSL_CONV_MODE_AUTO, 2, kerShape, xshape, xshape);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                status = vslConvSetStart(task, start);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                status = vslcConvExec(task, pKer, kerStride, pX, xstride, pZ, zstride);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                 vslConvDeleteTask(&task);
-            }
-            else
-            {
-                status = vslcConvNewTaskX(&task, VSL_CONV_MODE_AUTO, 2, kerShape, xshape, xshape, pKer, kerStride);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                status = vslConvSetStart(task, start);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                long long n;
-
-                #pragma omp parallel for default(none) private(n) shared(num, task, pX, RO, E1, status, xstride, pZ, zstride)
-                for ( n=0; n<(long long)num; n++ )
-                {
-                    status = vslcConvExecX(task, pX+n*RO*E1, xstride, pZ+n*RO*E1, zstride);
-                }
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                vslConvDeleteTask(&task);
-            }
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors happened in conv2(const hoNDArray<GT_Complex8>& x, const hoNDArray<GT_Complex8>& ker, hoNDArray<GT_Complex8>& z) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    bool conv3(const hoNDArray<GT_Complex8>& x, const hoNDArray<GT_Complex8>& ker, hoNDArray<GT_Complex8>& z)
-    {
-        try
-        {
-            if ( !z.dimensions_equal(&x) )
-            {
-                z = x;
-            }
-
-            size_t RO = x.get_size(0);
-            size_t E1 = x.get_size(1);
-            size_t E2 = x.get_size(2);
-
-            size_t kerRO = ker.get_size(0);
-            size_t kerE1 = ker.get_size(1);
-            size_t kerE2 = ker.get_size(2);
-
-            size_t num = x.get_number_of_elements()/(RO*E1*E2);
-
-            int status;
-            VSLConvTaskPtr task;
-
-            MKL_INT kerShape[3];
-            kerShape[0] = kerRO; kerShape[1] = kerE1; kerShape[2] = kerE2;
-
-            MKL_INT xshape[3];
-            xshape[0] = RO; xshape[1] = E1; xshape[2] = E2;
-
-            MKL_INT start[3];
-            start[0] = kerRO/2;
-            start[1] = kerE1/2;
-            start[2] = kerE2/2;
-
-            MKL_INT kerStride[3], xstride[3], zstride[3];
-            kerStride[0] = 1; kerStride[1] = kerRO; kerStride[2] = kerRO*kerE1;
-            xstride[0] = 1; xstride[1] = RO; xstride[2] = RO*E1;
-            zstride[0] = 1; zstride[1] = RO; zstride[2] = RO*E1;
-
-            const MKL_Complex8* pX = reinterpret_cast<const MKL_Complex8*>(x.begin());
-            const MKL_Complex8* pKer = reinterpret_cast<const MKL_Complex8*>(ker.begin());
-            MKL_Complex8* pZ = reinterpret_cast<MKL_Complex8*>(z.begin());
-
-            if ( num == 1 )
-            {
-                status = vslcConvNewTask(&task, VSL_CONV_MODE_AUTO, 3, kerShape, xshape, xshape);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                status = vslConvSetStart(task, start);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                status = vslcConvExec(task, pKer, kerStride, pX, xstride, pZ, zstride);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                 vslConvDeleteTask(&task);
-            }
-            else
-            {
-                status = vslcConvNewTaskX(&task, VSL_CONV_MODE_AUTO, 3, kerShape, xshape, xshape, pKer, kerStride);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                status = vslConvSetStart(task, start);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                long long n;
-
-                #pragma omp parallel for default(none) private(n) shared(num, task, pX, RO, E1, E2, status, xstride, pZ, zstride)
-                for ( n=0; n<(long long)num; n++ )
-                {
-                    status = vslcConvExecX(task, pX+n*RO*E1*E2, xstride, pZ+n*RO*E1*E2, zstride);
-                }
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                vslConvDeleteTask(&task);
-            }
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors happened in conv3(const hoNDArray<GT_Complex8>& x, const hoNDArray<GT_Complex8>& ker, hoNDArray<GT_Complex8>& z) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    bool inv(const hoNDArray<GT_Complex8>& x, hoNDArray<GT_Complex8>& r)
-    {
-        try
-        {
-            if ( !r.dimensions_equal(&x) )
-            {
-                r = x;
-            }
-
-            const GT_Complex8* pX = x.begin();
-            GT_Complex8* pR = r.begin();
-
-            GT_Complex8 v(1.0);
-            long long n = x.get_number_of_elements();
-            long long ii;
-
-            #pragma omp parallel for default(none) private(ii) shared(n, pX, pR, v)
-            for ( ii=0; ii<n; ii++ )
-            {
-                pR[ii] = v/pX[ii];
-            }
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors happened in inv(const hoNDArray<GT_Complex8>& x, hoNDArray<GT_Complex8>& r) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    // ----------------------------------------------------------------------------------------
-    // GT_Complex16
-    // ----------------------------------------------------------------------------------------
-
-    bool add(const hoNDArray<GT_Complex16>& x, const hoNDArray<GT_Complex16>& y, hoNDArray<GT_Complex16>& r)
-    {
-        GADGET_DEBUG_CHECK_RETURN_FALSE(x.get_number_of_elements()==y.get_number_of_elements());
-        if ( r.get_number_of_elements()!=x.get_number_of_elements())
-        {
-            r = x;
-        }
-
-        vzAdd(x.get_number_of_elements(), reinterpret_cast<const MKL_Complex16*>(x.begin()), reinterpret_cast<const MKL_Complex16*>(y.begin()), reinterpret_cast<MKL_Complex16*>(r.begin()));
-        GADGET_CHECK_RETURN_FALSE(vmlGetErrStatus()==0);
-
-        return true;
-    }
-
-    bool subtract(const hoNDArray<GT_Complex16>& x, const hoNDArray<GT_Complex16>& y, hoNDArray<GT_Complex16>& r)
-    {
-        GADGET_DEBUG_CHECK_RETURN_FALSE(x.get_number_of_elements()==y.get_number_of_elements());
-        if ( r.get_number_of_elements()!=x.get_number_of_elements())
-        {
-            r = x;
-        }
-
-        vzSub(x.get_number_of_elements(), reinterpret_cast<const MKL_Complex16*>(x.begin()), reinterpret_cast<const MKL_Complex16*>(y.begin()), reinterpret_cast<MKL_Complex16*>(r.begin()));
-        GADGET_CHECK_RETURN_FALSE(vmlGetErrStatus()==0);
-
-        return true;
-    }
-
-    bool multiply(const hoNDArray<GT_Complex16>& x, const hoNDArray<GT_Complex16>& y, hoNDArray<GT_Complex16>& r)
-    {
-        GADGET_DEBUG_CHECK_RETURN_FALSE(x.get_number_of_elements()==y.get_number_of_elements());
-        if ( r.get_number_of_elements()!=x.get_number_of_elements())
-        {
-            r = x;
-        }
-
-        vzMul(x.get_number_of_elements(), reinterpret_cast<const MKL_Complex16*>(x.begin()), reinterpret_cast<const MKL_Complex16*>(y.begin()), reinterpret_cast<MKL_Complex16*>(r.begin()));
-        GADGET_CHECK_RETURN_FALSE(vmlGetErrStatus()==0);
-
-        return true;
-    }
-
-    bool divide(const hoNDArray<GT_Complex16>& x, const hoNDArray<GT_Complex16>& y, hoNDArray<GT_Complex16>& r)
-    {
-        GADGET_DEBUG_CHECK_RETURN_FALSE(x.get_number_of_elements()==y.get_number_of_elements());
-        if ( r.get_number_of_elements()!=x.get_number_of_elements())
-        {
-            r = x;
-        }
-
-        vzDiv(x.get_number_of_elements(), reinterpret_cast<const MKL_Complex16*>(x.begin()), reinterpret_cast<const MKL_Complex16*>(y.begin()), reinterpret_cast<MKL_Complex16*>(r.begin()));
-        GADGET_CHECK_RETURN_FALSE(vmlGetErrStatus()==0);
-
-        return true;
-    }
-
-    bool absolute(const hoNDArray<GT_Complex16>& x, hoNDArray<double>& r)
-    {
-        if ( r.get_number_of_elements()!=x.get_number_of_elements())
-        {
-            r.create(x.get_dimensions());
-        }
-
-        vzAbs(x.get_number_of_elements(), reinterpret_cast<const MKL_Complex16*>(x.begin()), r.begin());
-        GADGET_CHECK_RETURN_FALSE(vmlGetErrStatus()==0);
-
-        return true;
-    }
-
-    bool absolute(const hoNDArray<GT_Complex16>& x, hoNDArray<GT_Complex16>& r)
-    {
-        if ( r.get_number_of_elements()!=x.get_number_of_elements())
-        {
-            r.create(x.get_dimensions());
-        }
-
-        hoNDArray<double> rTmp;
-        rTmp.create(x.get_dimensions());
-
-        vzAbs(x.get_number_of_elements(), reinterpret_cast<const MKL_Complex16*>(x.begin()), rTmp.begin());
-        GADGET_CHECK_RETURN_FALSE(vmlGetErrStatus()==0);
-
-        //GADGET_CHECK_RETURN_FALSE(r.copyFrom(rTmp));
-	r.copyFrom(rTmp);
-
-        return true;
-    }
-
-    bool sqrt(const hoNDArray<GT_Complex16>& x, hoNDArray<GT_Complex16>& r)
-    {
-        if ( r.get_number_of_elements()!=x.get_number_of_elements())
-        {
-            r.create(x.get_dimensions());
-        }
-
-        vzSqrt(x.get_number_of_elements(), reinterpret_cast<const MKL_Complex16*>(x.begin()), reinterpret_cast<MKL_Complex16*>(r.begin()));
-        GADGET_CHECK_RETURN_FALSE(vmlGetErrStatus()==0);
-
-        return true;
-    }
-
-    bool minAbsolute(const hoNDArray<GT_Complex16>& x, GT_Complex16& r, size_t& ind)
-    {
-        try
-        {
-            MKL_INT n = x.get_number_of_elements();
-            MKL_INT incx = 1;
-            ind = (size_t)(izamin(&n, reinterpret_cast<const MKL_Complex16*>(x.begin()), &incx));
-            r = x.at(ind);
-        }
-        catch(...)
-        {
-            return false;
-        }
-
-        return true;
-    }
-
-    bool maxAbsolute(const hoNDArray<GT_Complex16>& x, GT_Complex16& r, size_t& ind)
-    {
-        try
-        {
-            MKL_INT n = x.get_number_of_elements();
-            MKL_INT incx = 1;
-            ind = (size_t)(izamax(&n, reinterpret_cast<const MKL_Complex16*>(x.begin()), &incx));
-            r = x.at(ind);
-        }
-        catch(...)
-        {
-            return false;
-        }
-
-        return true;
-    }
-
-    bool multiplyConj(const hoNDArray<GT_Complex16>& x, const hoNDArray<GT_Complex16>& y, hoNDArray<GT_Complex16>& r)
-    {
-        GADGET_DEBUG_CHECK_RETURN_FALSE(x.get_number_of_elements()==y.get_number_of_elements());
-        if ( r.get_number_of_elements()!=x.get_number_of_elements())
-        {
-            r = x;
-        }
-
-        vzMulByConj(x.get_number_of_elements(), reinterpret_cast<const MKL_Complex16*>(x.begin()), reinterpret_cast<const MKL_Complex16*>(y.begin()), reinterpret_cast<MKL_Complex16*>(r.begin()));
-        GADGET_CHECK_RETURN_FALSE(vmlGetErrStatus()==0);
-
-        return true;
-    }
-
-    bool argument(const hoNDArray<GT_Complex16>& x, hoNDArray<double>& r)
-    {
-        if ( r.get_number_of_elements()!=x.get_number_of_elements())
-        {
-            r.create(x.get_dimensions());
-        }
-
-        vzArg(x.get_number_of_elements(), reinterpret_cast<const MKL_Complex16*>(x.begin()), r.begin());
-        GADGET_CHECK_RETURN_FALSE(vmlGetErrStatus()==0);
-
-        return true;
-    }
-
-    bool conjugate(const hoNDArray<GT_Complex16>& x, hoNDArray<GT_Complex16>& r)
-    {
-        if ( r.get_number_of_elements()!=x.get_number_of_elements())
-        {
-            r.create(x.get_dimensions());
-        }
-
-        vzConj(x.get_number_of_elements(), reinterpret_cast<const MKL_Complex16*>(x.begin()), reinterpret_cast<MKL_Complex16*>(r.begin()));
-        GADGET_CHECK_RETURN_FALSE(vmlGetErrStatus()==0);
-
-        return true;
-    }
-
-    bool addEpsilon(hoNDArray<GT_Complex16>& x)
-    {
-        try
-        {
-            size_t n = x.get_number_of_elements();
-            GT_Complex16* pX = x.begin();
-
-            long long i;
-
-            #pragma omp parallel for default(none) private(i) shared(n, pX)
-            for (i=0; i<(long long)n; i++ )
-            {
-                if ( std::abs(pX[i]) < DBL_EPSILON )
-                {
-                    pX[i] += DBL_EPSILON;
-                }
-            }
-        }
-        catch(...)
-        {
-            return false;
-        }
-
-        return true;
-    }
-
-    bool norm2(const hoNDArray<GT_Complex16>& x, double& r)
-    {
-        try
-        {
-            MKL_INT incx = 1;
-            MKL_INT n = x.get_number_of_elements();
-            r = dznrm2(&n, reinterpret_cast<const MKL_Complex16*>(x.begin()), &incx);
-        }
-        catch(...)
-        {
-            return false;
-        }
-
-        return true;
-    }
-
-    bool norm1(const hoNDArray<GT_Complex16>& x, double& r)
-    {
-        try
-        {
-            hoNDArray<double> a;
-            GADGET_CHECK_RETURN_FALSE(absolute(x, a));
-            GADGET_CHECK_RETURN_FALSE(norm1(a, r));
-        }
-        catch(...)
-        {
-            return false;
-        }
-
-        return true;
-    }
-
-    bool dotc(const hoNDArray<GT_Complex16>& x, const hoNDArray<GT_Complex16>& y, GT_Complex16& r)
-    {
-        try
-        {
-            GADGET_DEBUG_CHECK_RETURN_FALSE(x.get_number_of_elements()==y.get_number_of_elements());
-
-            MKL_INT N = x.get_number_of_elements();
-            MKL_INT incx(1), incy(1);
-            zdotc(reinterpret_cast<MKL_Complex16*>(&r), &N, reinterpret_cast<const MKL_Complex16*>(x.begin()), &incx, 
-                    reinterpret_cast<const MKL_Complex16*>(y.begin()), &incy);
-        }
-        catch(...)
-        {
-            return false;
-        }
-
-        return true;
-    }
-
-    bool conv2(const hoNDArray<GT_Complex16>& x, const hoNDArray<GT_Complex16>& ker, hoNDArray<GT_Complex16>& z)
-    {
-        try
-        {
-            if ( !z.dimensions_equal(&x) )
-            {
-                z = x;
-            }
-
-            size_t RO = x.get_size(0);
-            size_t E1 = x.get_size(1);
-
-            size_t kerRO = ker.get_size(0);
-            size_t kerE1 = ker.get_size(1);
-
-            size_t num = x.get_number_of_elements()/(RO*E1);
-
-            int status;
-            VSLConvTaskPtr task;
-
-            MKL_INT kerShape[2];
-            kerShape[0] = kerRO; kerShape[1] = kerE1;
-
-            MKL_INT xshape[2];
-            xshape[0] = RO; xshape[1] = E1;
-
-            MKL_INT start[2];
-            start[0] = kerRO/2;
-            start[1] = kerE1/2;
-
-            MKL_INT kerStride[2], xstride[2], zstride[2];
-            kerStride[0] = 1; kerStride[1] = kerRO;
-            xstride[0] = 1; xstride[1] = RO;
-            zstride[0] = 1; zstride[1] = RO;
-
-            const MKL_Complex16* pX = reinterpret_cast<const MKL_Complex16*>(x.begin());
-            const MKL_Complex16* pKer = reinterpret_cast<const MKL_Complex16*>(ker.begin());
-            MKL_Complex16* pZ = reinterpret_cast<MKL_Complex16*>(z.begin());
-
-            if ( num == 1 )
-            {
-                status = vslzConvNewTask(&task, VSL_CONV_MODE_AUTO, 2, kerShape, xshape, xshape);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                status = vslConvSetStart(task, start);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                status = vslzConvExec(task, pKer, kerStride, pX, xstride, pZ, zstride);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                 vslConvDeleteTask(&task);
-            }
-            else
-            {
-                status = vslzConvNewTaskX(&task, VSL_CONV_MODE_AUTO, 2, kerShape, xshape, xshape, pKer, kerStride);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                status = vslConvSetStart(task, start);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                long long n;
-
-                #pragma omp parallel for default(none) private(n) shared(num, task, pX, RO, E1, status, xstride, pZ, zstride)
-                for ( n=0; n<(long long)num; n++ )
-                {
-                    status = vslzConvExecX(task, pX+n*RO*E1, xstride, pZ+n*RO*E1, zstride);
-                }
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                vslConvDeleteTask(&task);
-            }
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors happened in conv2(const hoNDArray<GT_Complex16>& x, const hoNDArray<GT_Complex16>& ker, hoNDArray<GT_Complex16>& z) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    bool conv3(const hoNDArray<GT_Complex16>& x, const hoNDArray<GT_Complex16>& ker, hoNDArray<GT_Complex16>& z)
-    {
-        try
-        {
-            if ( !z.dimensions_equal(&x) )
-            {
-                z = x;
-            }
-
-            size_t RO = x.get_size(0);
-            size_t E1 = x.get_size(1);
-            size_t E2 = x.get_size(2);
-
-            size_t kerRO = ker.get_size(0);
-            size_t kerE1 = ker.get_size(1);
-            size_t kerE2 = ker.get_size(2);
-
-            size_t num = x.get_number_of_elements()/(RO*E1*E2);
-
-            int status;
-            VSLConvTaskPtr task;
-
-            MKL_INT kerShape[3];
-            kerShape[0] = kerRO; kerShape[1] = kerE1; kerShape[2] = kerE2;
-
-            MKL_INT xshape[3];
-            xshape[0] = RO; xshape[1] = E1; xshape[2] = E2;
-
-            MKL_INT start[3];
-            start[0] = kerRO/2;
-            start[1] = kerE1/2;
-            start[2] = kerE2/2;
-
-            MKL_INT kerStride[3], xstride[3], zstride[3];
-            kerStride[0] = 1; kerStride[1] = kerRO; kerStride[2] = kerRO*kerE1;
-            xstride[0] = 1; xstride[1] = RO; xstride[2] = RO*E1;
-            zstride[0] = 1; zstride[1] = RO; zstride[2] = RO*E1;
-
-            const MKL_Complex16* pX = reinterpret_cast<const MKL_Complex16*>(x.begin());
-            const MKL_Complex16* pKer = reinterpret_cast<const MKL_Complex16*>(ker.begin());
-            MKL_Complex16* pZ = reinterpret_cast<MKL_Complex16*>(z.begin());
-
-            if ( num == 1 )
-            {
-                status = vslzConvNewTask(&task, VSL_CONV_MODE_AUTO, 3, kerShape, xshape, xshape);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                status = vslConvSetStart(task, start);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                status = vslzConvExec(task, pKer, kerStride, pX, xstride, pZ, zstride);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                 vslConvDeleteTask(&task);
-            }
-            else
-            {
-                status = vslzConvNewTaskX(&task, VSL_CONV_MODE_AUTO, 3, kerShape, xshape, xshape, pKer, kerStride);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                status = vslConvSetStart(task, start);
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                long long n;
-
-                #pragma omp parallel for default(none) private(n) shared(num, task, pX, RO, E1, E2, status, xstride, pZ, zstride)
-                for ( n=0; n<(long long)num; n++ )
-                {
-                    status = vslzConvExecX(task, pX+n*RO*E1*E2, xstride, pZ+n*RO*E1*E2, zstride);
-                }
-                GADGET_CHECK_RETURN_FALSE(status==VSL_STATUS_OK);
-
-                vslConvDeleteTask(&task);
-            }
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors happened in conv3(const hoNDArray<GT_Complex16>& x, const hoNDArray<GT_Complex16>& ker, hoNDArray<GT_Complex16>& z) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    bool inv(const hoNDArray<GT_Complex16>& x, hoNDArray<GT_Complex16>& r)
-    {
-        try
-        {
-            if ( !r.dimensions_equal(&x) )
-            {
-                r = x;
-            }
-
-            const GT_Complex16* pX = x.begin();
-            GT_Complex16* pR = r.begin();
-
-            GT_Complex16 v(1.0);
-            long long n = x.get_number_of_elements();
-            long long ii;
-
-            #pragma omp parallel for default(none) private(ii) shared(n, pX, pR, v)
-            for ( ii=0; ii<n; ii++ )
-            {
-                pR[ii] = v/pX[ii];
-            }
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors happened in inv(const hoNDArray<GT_Complex16>& x, hoNDArray<GT_Complex16>& r) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    // ----------------------------------------------------------------------------------------
-    // templated functions
-    // ----------------------------------------------------------------------------------------
-
-    template<typename T> 
-    bool sumOverLastDimension(const hoNDArray<T>& x, hoNDArray<T>& r)
-    {
-        try
-        {
-            boost::shared_ptr< std::vector<size_t> > dim = x.get_dimensions();
-            size_t NDim = dim->size();
-
-            std::vector<size_t> dimR(NDim-1);
-
-            size_t d;
-            for ( d=0; d<NDim-1; d++ )
-            {
-                dimR[d] = (*dim)[d];
-            }
-
-            if ( !r.dimensions_equal(&dimR) )
-            {
-                r.create(&dimR);
-            }
-
-            // Gadgetron::clear(&r);
-
-            if ( x.get_size(NDim-1) <= 1 )
-            {
-                memcpy(r.begin(), x.begin(), x.get_number_of_bytes());
-                return true;
-            }
-
-            size_t lastDim = x.get_size(NDim-1);
-            size_t NR = r.get_number_of_elements();
-            T* pA = const_cast<T*>(x.begin());
-            T* pR = r.begin();
-
-            memcpy(pR, pA, sizeof(T)*NR);
-
-            // sum over the last dim
-            hoNDArray<T> tmp;
-            for ( d=1; d<lastDim; d++ )
-            {
-                tmp.create(&dimR, pA+d*NR);
-                add(tmp, r, r);
-            }
-        }
-        catch (...)
-        {
-            GADGET_ERROR_MSG("Errors in sumOverLastDimension(const hoNDArray<T>& x, hoNDArray<T>& r) ... ");
-            return false;
-        }
-        return true;
-    }
-
-    template<typename T> 
-    bool sumOverSecondLastDimension(const hoNDArray<T>& x, hoNDArray<T>& r)
-    {
-        try
-        {
-            boost::shared_ptr< std::vector<size_t> > dim = x.get_dimensions();
-            size_t NDim = dim->size();
-
-            if ( NDim < 2 ) return true;
-
-            std::vector<size_t> dimR(NDim-1);
-            std::vector<size_t> dimRInternal(NDim-2);
-
-            size_t d;
-            for ( d=0; d<NDim-2; d++ )
-            {
-                dimR[d] = (*dim)[d];
-                dimRInternal[d] = (*dim)[d];
-            }
-            dimR[NDim-2] = (*dim)[NDim-1];
-
-            if ( !r.dimensions_equal(&dimR) )
-            {
-                r.create(&dimR);
-            }
-
-            if ( x.get_size(NDim-2) <= 1 )
-            {
-                memcpy(r.begin(), x.begin(), x.get_number_of_bytes());
-                return true;
-            }
-
-            size_t lastDim = x.get_size(NDim-1);
-            size_t secondLastDim = x.get_size(NDim-2);
-            size_t NS = x.get_number_of_elements()/lastDim;
-            size_t NR = r.get_number_of_elements()/lastDim;
-            T* pA = const_cast<T*>(x.begin());
-            T* pR = r.begin();
-
-            //int l;
-            //#pragma omp parallel default(none) private(l) shared(lastDim, secondLastDim, NR, pA, pR, dimRInternal)
-            //{
-            //    hoNDArray<T> tmp(&dimRInternal);
-
-            //    #pragma omp for
-            //    for ( l=0; l<(int)lastDim; l++ )
-            //    {
-            //        memcpy(tmp.begin(), pA+l*NR*secondLastDim, sizeof(T)*NR);
-            //        for ( size_t s=1; s<secondLastDim; s++ )
-            //        {
-            //            hoNDArray<T> tmp2;
-            //            tmp2.create(&dimRInternal, pA+l*NR*secondLastDim+s*NR);
-            //            add(tmp, tmp2, tmp);
-            //        }
-
-            //        memcpy(pR+l*NR, tmp.begin(), sizeof(T)*NR);
-            //    }
-            //}
-
-            int l;
-            #pragma omp parallel default(none) private(l) shared(lastDim, secondLastDim, NS, NR, pA, pR, dimRInternal)
-            {
-                hoNDArray<T> tmp, tmp2;
-
-                #pragma omp for
-                for ( l=0; l<(int)lastDim; l++ )
-                {
-                    memcpy(pR+l*NR, pA+l*NS, sizeof(T)*NR);
-                    tmp.create(&dimRInternal, pR+l*NR);
-                    for ( size_t s=1; s<secondLastDim; s++ )
-                    {
-                        tmp2.create(&dimRInternal, pA+l*NS+s*NR);
-                        add(tmp, tmp2, tmp);
-                    }
-                }
-            }
-        }
-        catch (...)
-        {
-            GADGET_ERROR_MSG("Errors in sumOverSecondLastDimension(const hoNDArray<T>& x, hoNDArray<T>& r) ... ");
-            return false;
-        }
-        return true;
-    }
-
-    // e.g. x is 3D and y is 4D array, r(:,:,:,n) = y(:,:,:,n) .* x
-    template<typename T> 
-    bool multiplyOverLastDimension(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r)
-    {
-        try
-        {
-            boost::shared_ptr< std::vector<size_t> > dimX = x.get_dimensions();
-            boost::shared_ptr< std::vector<size_t> > dimY = y.get_dimensions();
-
-            size_t NDim = dimY->size();
-
-            GADGET_CHECK_RETURN_FALSE(dimX->size()==NDim-1);
-
-            if ( !r.dimensions_equal(dimY.get()) )
-            {
-                r.create(dimY);
-            }
-
-            if ( y.get_size(NDim-1) <= 1 )
-            {
-                GADGET_CHECK_RETURN_FALSE(multiply(x, y, r));
-                return true;
-            }
-
-            size_t lastDim = y.get_size(NDim-1);
-            size_t N = x.get_number_of_elements();
-            const T* pX = x.begin();
-            const T* pY = y.begin();
-            T* pR = r.begin();
-
-            int d;
-
-            #ifdef GCC_OLD_FLAG
-                #pragma omp parallel default(none) private(d) shared(dimX, lastDim, N, pY, pR)
-            #else
-                #pragma omp parallel default(none) private(d) shared(x, dimX, lastDim, N, pY, pR)
-            #endif
-            {
-                hoNDArray<T> tmpY, tmpR;
-
-                #pragma omp for
-                for ( d=0; d<(int)lastDim; d++ )
-                {
-                    tmpY.create(dimX.get(), const_cast<T*>(pY+d*N));
-                    tmpR.create(dimX.get(), pR+d*N);
-                    multiply(x, tmpY, tmpR);
-                }
-            }
-        }
-        catch (...)
-        {
-            GADGET_ERROR_MSG("Errors in multiplyOverLastDimension(const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& r) ... ");
-            return false;
-        }
-        return true;
-    }
-
-    // e.g. x is 3D and y is 4D array, r(:,:,:,n) = y(:,:,:,n) ./ x
-    template<typename T> 
-    bool divideOverLastDimension(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r)
-    {
-        try
-        {
-            boost::shared_ptr< std::vector<size_t> > dimX = x.get_dimensions();
-            boost::shared_ptr< std::vector<size_t> > dimY = y.get_dimensions();
-
-            size_t NDim = dimY->size();
-
-            GADGET_CHECK_RETURN_FALSE(dimX->size()==NDim-1);
-
-            if ( !r.dimensions_equal(dimY.get()) )
-            {
-                r.create(dimY);
-            }
-
-            if ( y.get_size(NDim-1) <= 1 )
-            {
-                GADGET_CHECK_RETURN_FALSE(divide(y, x, r));
-                return true;
-            }
-
-            size_t lastDim = y.get_size(NDim-1);
-            size_t N = x.get_number_of_elements();
-            T* pY = const_cast<T*>(y.begin());
-            T* pR = r.begin();
-
-            int d;
-
-            #ifdef GCC_OLD_FLAG
-                #pragma omp parallel default(none) private(d) shared(dimX, lastDim, N, pY, pR)
-            #else
-                #pragma omp parallel default(none) private(d) shared(x, dimX, lastDim, N, pY, pR)
-            #endif
-            {
-                hoNDArray<T> tmpY, tmpR;
-
-                #pragma omp for
-                for ( d=0; d<(int)lastDim; d++ )
-                {
-                    tmpY.create(dimX, pY+d*N);
-                    tmpR.create(dimX, pR+d*N);
-                    divide(tmpY, x, tmpR);
-                }
-            }
-        }
-        catch (...)
-        {
-            GADGET_ERROR_MSG("Errors in divideOverLastDimension(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r) ... ");
-            return false;
-        }
-        return true;
-    }
-
-    template<typename T> 
-    bool sumOver1stDimension(const hoNDArray<T>& x, hoNDArray<T>& r)
-    {
-        try
-        {
-            size_t RO = x.get_size(0);
-            size_t num = x.get_number_of_elements()/(RO);
-
-            boost::shared_ptr< std::vector<size_t> > dim = x.get_dimensions();
-
-            std::vector<size_t> dimAve(*dim);
-            dimAve[0] = 1;
-            r.create(&dimAve);
-
-            const T* pX = x.begin();
-            T* pR = r.begin();
-
-            int n;
-            #pragma omp parallel for default(none) private(n) shared(RO, num, pX, pR)
-            for ( n=0; n<(int)num; n++ )
-            {
-                T xsum = pX[n*RO];
-                for (size_t ro=1; ro<RO; ro++ )
-                {
-                    xsum += pX[n*RO+ro];
-                }
-
-                pR[n] = xsum;
-            }
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors in sumOver1stDimension(...) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    template<typename T> 
-    bool sumOver2ndDimension(const hoNDArray<T>& x, hoNDArray<T>& r)
-    {
-        try
-        {
-            size_t NDim = x.get_number_of_dimensions();
-
-            if ( NDim < 2 )
-            {
-                r = x;
-                return true;
-            }
-
-            size_t RO = x.get_size(0);
-            size_t E1 = x.get_size(1);
-
-            size_t num = x.get_number_of_elements()/(RO*E1);
-
-            boost::shared_ptr< std::vector<size_t> > dim = x.get_dimensions();
-
-            std::vector<size_t> dimAve(*dim);
-            dimAve[1] = 1;
-            r.create(&dimAve);
-
-            int n;
-            #ifdef GCC_OLD_FLAG
-                #pragma omp parallel for default(none) private(n) shared(RO, E1, num)
-            #else
-                #pragma omp parallel for default(none) private(n) shared(RO, E1, num, x, r)
-            #endif
-            for ( n=0; n<(int)num; n++ )
-            {
-                hoNDArray<T> xsum(RO, const_cast<T*>(r.begin()+n*RO));
-                memcpy(xsum.begin(), x.begin()+n*RO*E1, xsum.get_number_of_bytes());
-
-                for (size_t e1=1; e1<E1; e1++ )
-                {
-                    hoNDArray<T> x1D(RO, const_cast<T*>(x.begin()+n*RO*E1+e1*RO));
-                    Gadgetron::add(x1D, xsum, xsum);
-                }
-            }
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors in sumOver2ndDimension(...) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    template<typename T> 
-    bool sumOver3rdDimension(const hoNDArray<T>& x, hoNDArray<T>& r)
-    {
-        try
-        {
-            size_t NDim = x.get_number_of_dimensions();
-
-            if ( NDim < 3 )
-            {
-                r = x;
-                return true;
-            }
-
-            size_t RO = x.get_size(0);
-            size_t E1 = x.get_size(1);
-            size_t CHA = x.get_size(2);
-
-            size_t num = x.get_number_of_elements()/(RO*E1*CHA);
-
-            boost::shared_ptr< std::vector<size_t> > dim = x.get_dimensions();
-
-            std::vector<size_t> dimAve(*dim);
-            dimAve[2] = 1;
-            r.create(&dimAve);
-
-            int n;
-            #ifdef GCC_OLD_FLAG
-                #pragma omp parallel for default(none) private(n) shared(RO, E1, CHA, num)
-            #else
-                #pragma omp parallel for default(none) private(n) shared(RO, E1, CHA, num, x, r)
-            #endif 
-            for ( n=0; n<(int)num; n++ )
-            {
-                hoNDArray<T> xsum(RO, E1, const_cast<T*>(r.begin()+n*RO*E1));
-                memcpy(xsum.begin(), x.begin()+n*RO*E1*CHA, xsum.get_number_of_bytes());
-
-                for (size_t cha=1; cha<CHA; cha++ )
-                {
-                    hoNDArray<T> x2D(RO, E1, const_cast<T*>(x.begin()+n*RO*E1*CHA+cha*RO*E1));
-                    Gadgetron::add(x2D, xsum, xsum);
-                }
-            }
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors in sumOver3rdDimension(...) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    template<typename T> bool sumOver4thDimension(const hoNDArray<T>& x, hoNDArray<T>& r)
-    {
-        try
-        {
-            size_t NDim = x.get_number_of_dimensions();
-
-            if ( NDim < 4 )
-            {
-                r = x;
-                return true;
-            }
-
-            size_t RO = x.get_size(0);
-            size_t E1 = x.get_size(1);
-            size_t CHA = x.get_size(2);
-            size_t N = x.get_size(3);
-
-            size_t num = x.get_number_of_elements()/(RO*E1*CHA*N);
-
-            boost::shared_ptr< std::vector<size_t> > dim = x.get_dimensions();
-
-            std::vector<size_t> dimAve(*dim);
-            dimAve[3] = 1;
-            r.create(&dimAve);
-
-            int n;
-            #ifdef GCC_OLD_FLAG
-                #pragma omp parallel for default(none) private(n) shared(RO, E1, CHA, N, num)
-            #else
-                #pragma omp parallel for default(none) private(n) shared(RO, E1, CHA, N, num, x, r)
-            #endif
-            for ( n=0; n<(int)num; n++ )
-            {
-                hoNDArray<T> xsum(RO, E1, CHA, const_cast<T*>(r.begin()+n*RO*E1*CHA));
-                memcpy(xsum.begin(), x.begin()+n*RO*E1*CHA*N, xsum.get_number_of_bytes());
-
-                for (size_t nn=1; nn<N; nn++ )
-                {
-                    hoNDArray<T> x3D(RO, E1, CHA, const_cast<T*>(x.begin()+n*RO*E1*CHA*N+nn*RO*E1*CHA));
-                    Gadgetron::add(x3D, xsum, xsum);
-                }
-            }
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors in sumOver4thDimension(const hoNDArray<T>& x, hoNDArray<T>& r) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    template<typename T> bool sumOver5thDimension(const hoNDArray<T>& x, hoNDArray<T>& r)
-    {
-        try
-        {
-            size_t NDim = x.get_number_of_dimensions();
-
-            if ( NDim < 5 )
-            {
-                r = x;
-                return true;
-            }
-
-            size_t RO = x.get_size(0);
-            size_t E1 = x.get_size(1);
-            size_t CHA = x.get_size(2);
-            size_t N = x.get_size(3);
-            size_t S = x.get_size(4);
-
-            size_t num = x.get_number_of_elements()/(RO*E1*CHA*N*S);
-
-            boost::shared_ptr< std::vector<size_t> > dim = x.get_dimensions();
-
-            std::vector<size_t> dimAve(*dim);
-            dimAve[4] = 1;
-            r.create(&dimAve);
-
-            int n;
-            #ifdef GCC_OLD_FLAG
-                #pragma omp parallel for default(none) private(n) shared(RO, E1, CHA, N, S, num) if (num > 4)
-            #else
-                #pragma omp parallel for default(none) private(n) shared(RO, E1, CHA, N, S, num, x, r) if (num > 4)
-            #endif
-            for ( n=0; n<(int)num; n++ )
-            {
-                hoNDArray<T> xsum(RO, E1, CHA, N, const_cast<T*>(r.begin()+n*RO*E1*CHA*N));
-                memcpy(xsum.begin(), x.begin()+n*RO*E1*CHA*N*S, xsum.get_number_of_bytes());
-
-                for (size_t s=1; s<S; s++ )
-                {
-                    hoNDArray<T> x4D(RO, E1, CHA, N, const_cast<T*>(x.begin()+n*RO*E1*CHA*N*S+s*RO*E1*CHA*N));
-                    Gadgetron::add(x4D, xsum, xsum);
-                }
-            }
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors in sumOver5thDimension(const hoNDArray<T>& x, hoNDArray<T>& r) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    // e.g. x is 3D and y is 4D array, r(:,:,n,:) = y(:,:,n,:) .* x3D
-    template<typename T> 
-    bool multiplyOver3rdDimension(const hoNDArray<T>& x3D, const hoNDArray<T>& y4D, hoNDArray<T>& r)
-    {
-        try
-        {
-            boost::shared_ptr< std::vector<size_t> > dimX = x3D.get_dimensions();
-            boost::shared_ptr< std::vector<size_t> > dimY = y4D.get_dimensions();
-
-            size_t NDim = dimY->size();
-
-            GADGET_CHECK_RETURN_FALSE(dimX->size()>=3);
-            GADGET_CHECK_RETURN_FALSE(NDim>=4);
-            GADGET_CHECK_RETURN_FALSE((*dimX)[0]==(*dimY)[0]);
-            GADGET_CHECK_RETURN_FALSE((*dimX)[1]==(*dimY)[1]);
-
-            if ( !r.dimensions_equal(dimY.get()) )
-            {
-                r.create(dimY);
-            }
-
-            int t, N2D = x3D.get_size(0)*x3D.get_size(1);
-            int sz = y4D.get_size(2);
-            int st = y4D.get_number_of_elements()/(N2D*sz);
-
-            if ( sz == 1 )
-            {
-                GADGET_CHECK_RETURN_FALSE(multiply(x3D, y4D, r));
-                return true;
-            }
-
-            const T* pX = x3D.begin();
-            const T* pY = y4D.begin();
-            T* pR = r.begin();
-
-            std::vector<size_t> dim2D(2);
-            dim2D[0] = (*dimY)[0];
-            dim2D[1] = (*dimY)[1];
-
-            #pragma omp parallel for default(none) private(t) shared(N2D, sz, st, dim2D, pX, pY, pR)
-            for ( t=0; t<st; t++ )
-            {
-                hoNDArray<T> tmpX, tmpY, tmpR;
-                tmpX.create(&dim2D, const_cast<T*>(pX+t*N2D));
-
-                for ( int z=0; z<sz; z++ )
-                {
-                    tmpY.create(&dim2D, const_cast<T*>(pY+t*N2D*sz+z*N2D));
-                    tmpR.create(&dim2D, pR+t*N2D*sz+z*N2D);
-                    multiply(tmpX, tmpY, tmpR);
-                }
-            }
-        }
-        catch (...)
-        {
-            GADGET_ERROR_MSG("Errors in multiplyOver3rdDimension(const hoNDArray<float>& x3D, const hoNDArray<float>& y4D, hoNDArray<float>& r) ... ");
-            return false;
-        }
-        return true;
-    }
-
-    template<typename T> 
-    bool multiplyOver4thDimension(const hoNDArray<T>& x4D, const hoNDArray<T>& y5D, hoNDArray<T>& r)
-    {
-        try
-        {
-            boost::shared_ptr< std::vector<size_t> > dimX = x4D.get_dimensions();
-            boost::shared_ptr< std::vector<size_t> > dimY = y5D.get_dimensions();
-
-            size_t NDim = dimY->size();
-
-            GADGET_CHECK_RETURN_FALSE(dimX->size()>=4);
-            GADGET_CHECK_RETURN_FALSE((*dimX)[0]==(*dimY)[0]);
-            GADGET_CHECK_RETURN_FALSE((*dimX)[1]==(*dimY)[1]);
-            GADGET_CHECK_RETURN_FALSE((*dimX)[2]==(*dimY)[2]);
-
-            if ( !r.dimensions_equal(dimY.get()) )
-            {
-                r.create(dimY);
-            }
-
-            size_t RO = (*dimX)[0];
-            size_t E1 = (*dimX)[1];
-            size_t CHA = (*dimX)[2];
-
-            int t, N3D = RO*E1*CHA;
-
-            size_t N = (*dimY)[3];
-            size_t num = x4D.get_number_of_elements()/(RO*E1*CHA);
-
-            const T* pX = x4D.begin();
-            const T* pY = y5D.begin();
-            T* pR = r.begin();
-
-            std::vector<size_t> dim3D(3);
-            dim3D[0] = RO;
-            dim3D[1] = E1;
-            dim3D[2] = CHA;
-
-            #pragma omp parallel for default(none) private(t) shared(N3D, N, dim3D, pX, pY, pR, num)
-            for ( t=0; t<(int)num; t++ )
-            {
-                hoNDArray<T> tmpX, tmpY, tmpR;
-                tmpX.create(&dim3D, const_cast<T*>(pX+t*N3D));
-
-                for ( int n=0; n<N; n++ )
-                {
-                    tmpY.create(&dim3D, const_cast<T*>(pY+t*N3D*N+n*N3D));
-                    tmpR.create(&dim3D, pR+t*N3D*N+n*N3D);
-                    multiply(tmpX, tmpY, tmpR);
-                }
-            }
-        }
-        catch (...)
-        {
-            GADGET_ERROR_MSG("Errors in multiplyOver4thDimension(const hoNDArray<float>& x4D, const hoNDArray<float>& y5D, hoNDArray<float>& r) ... ");
-            return false;
-        }
-        return true;
-    }
-
-    template<typename T> 
-    bool multiplyOver4thDimensionExcept(const hoNDArray<T>& x4D, const hoNDArray<T>& y5D, size_t n, hoNDArray<T>& r, bool copyY2R)
-    {
-        try
-        {
-            boost::shared_ptr< std::vector<size_t> > dimX = x4D.get_dimensions();
-            boost::shared_ptr< std::vector<size_t> > dimY = y5D.get_dimensions();
-
-            size_t NDim = dimY->size();
-
-            GADGET_CHECK_RETURN_FALSE(dimX->size()>=4);
-            GADGET_CHECK_RETURN_FALSE((*dimX)[0]==(*dimY)[0]);
-            GADGET_CHECK_RETURN_FALSE((*dimX)[1]==(*dimY)[1]);
-            GADGET_CHECK_RETURN_FALSE((*dimX)[2]==(*dimY)[2]);
-
-            const T* pX = x4D.begin();
-            const T* pY = y5D.begin();
-            T* pR = r.begin();
-
-            if ( (pR!=pY) && (!r.dimensions_equal(dimY.get())) )
-            {
-                r.create(dimY);
-                pR = r.begin();
-            }
-
-            size_t RO = (*dimX)[0];
-            size_t E1 = (*dimX)[1];
-            size_t CHA = (*dimX)[2];
-
-            int t, N3D = RO*E1*CHA;
-
-            size_t N = (*dimY)[3];
-            size_t num = x4D.get_number_of_elements()/(RO*E1*CHA);
-
-            std::vector<size_t> dim3D(3);
-            dim3D[0] = RO;
-            dim3D[1] = E1;
-            dim3D[2] = CHA;
-
-            #pragma omp parallel for default(none) private(t) shared(N3D, N, dim3D, pX, pY, pR, num, n, copyY2R)
-            for ( t=0; t<(int)num; t++ )
-            {
-                hoNDArray<T> tmpX, tmpY, tmpR;
-                tmpX.create(&dim3D, const_cast<T*>(pX+t*N3D));
-
-                for ( int z=0; z<N; z++ )
-                {
-                    if ( z != n )
-                    {
-                        tmpY.create(&dim3D, const_cast<T*>(pY+t*N3D*N+z*N3D));
-                        tmpR.create(&dim3D, pR+t*N3D*N+z*N3D);
-                        multiply(tmpX, tmpY, tmpR);
-                    }
-                    else
-                    {
-                        if ( pR != pY )
-                        {
-                            if ( copyY2R )
-                            {
-                                memcpy(pR+t*N3D*N+z*N3D, const_cast<T*>(pY+t*N3D*N+z*N3D), sizeof(T)*N3D);
-                            }
-                        }
-                    }
-                }
-            }
-        }
-        catch (...)
-        {
-            GADGET_ERROR_MSG("Errors in multiplyOver4thDimensionExcept(const hoNDArray<float>& x4D, const hoNDArray<float>& y5D, size_t n, hoNDArray<float>& r, bool copyY2R) ... ");
-            return false;
-        }
-        return true;
-    }
-
-    template<typename T> 
-    bool multiplyOver5thDimension(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r)
-    {
-        try
-        {
-            boost::shared_ptr< std::vector<size_t> > dimX = x.get_dimensions();
-            boost::shared_ptr< std::vector<size_t> > dimY = y.get_dimensions();
-
-            size_t NDim = dimY->size();
-
-            GADGET_CHECK_RETURN_FALSE(dimX->size()>=5);
-            GADGET_CHECK_RETURN_FALSE((*dimX)[0]==(*dimY)[0]);
-            GADGET_CHECK_RETURN_FALSE((*dimX)[1]==(*dimY)[1]);
-            GADGET_CHECK_RETURN_FALSE((*dimX)[2]==(*dimY)[2]);
-            GADGET_CHECK_RETURN_FALSE((*dimX)[3]==(*dimY)[3]);
-
-            if ( !r.dimensions_equal(dimY.get()) )
-            {
-                r.create(dimY);
-            }
-
-            size_t RO = (*dimX)[0];
-            size_t E1 = (*dimX)[1];
-            size_t E2 = (*dimX)[2];
-            size_t CHA = (*dimX)[3];
-
-            int t, N4D = RO*E1*E2*CHA;
-
-            size_t N = (*dimY)[4];
-            size_t num = x.get_number_of_elements()/N4D;
-
-            const T* pX = x.begin();
-            const T* pY = y.begin();
-            T* pR = r.begin();
-
-            std::vector<size_t> dim4D(4);
-            dim4D[0] = RO;
-            dim4D[1] = E1;
-            dim4D[2] = E2;
-            dim4D[3] = CHA;
-
-            #pragma omp parallel for default(none) private(t) shared(N4D, N, dim4D, pX, pY, pR, num)
-            for ( t=0; t<(int)num; t++ )
-            {
-                hoNDArray<T> tmpX, tmpY, tmpR;
-                tmpX.create(&dim4D, const_cast<T*>(pX+t*N4D));
-
-                for ( int n=0; n<N; n++ )
-                {
-                    tmpY.create(&dim4D, const_cast<T*>(pY+t*N4D*N+n*N4D));
-                    tmpR.create(&dim4D, pR+t*N4D*N+n*N4D);
-                    multiply(tmpX, tmpY, tmpR);
-                }
-            }
-        }
-        catch (...)
-        {
-            GADGET_ERROR_MSG("Errors in multiplyOver5thDimension(const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& r) ... ");
-            return false;
-        }
-        return true;
-    }
-
-    template<typename T> 
-    bool multiplyOver5thDimensionExcept(const hoNDArray<T>& x, const hoNDArray<T>& y, size_t n, hoNDArray<T>& r, bool copyY2R)
-    {
-        try
-        {
-            boost::shared_ptr< std::vector<size_t> > dimX = x.get_dimensions();
-            boost::shared_ptr< std::vector<size_t> > dimY = y.get_dimensions();
-
-            size_t NDim = dimY->size();
-
-            GADGET_CHECK_RETURN_FALSE(dimX->size()>=5);
-            GADGET_CHECK_RETURN_FALSE((*dimX)[0]==(*dimY)[0]);
-            GADGET_CHECK_RETURN_FALSE((*dimX)[1]==(*dimY)[1]);
-            GADGET_CHECK_RETURN_FALSE((*dimX)[2]==(*dimY)[2]);
-            GADGET_CHECK_RETURN_FALSE((*dimX)[3]==(*dimY)[3]);
-
-            const T* pX = x.begin();
-            const T* pY = y.begin();
-            T* pR = r.begin();
-
-            if ( (pR!=pY) && (!r.dimensions_equal(dimY.get())) )
-            {
-                r.create(dimY);
-                pR = r.begin();
-            }
-
-            size_t RO = (*dimX)[0];
-            size_t E1 = (*dimX)[1];
-            size_t E2 = (*dimX)[2];
-            size_t CHA = (*dimX)[3];
-
-            int t, N4D = RO*E1*E2*CHA;
-
-            size_t N = (*dimY)[4];
-            size_t num = x.get_number_of_elements()/N4D;
-
-            std::vector<size_t> dim4D(4);
-            dim4D[0] = RO;
-            dim4D[1] = E1;
-            dim4D[2] = E2;
-            dim4D[3] = CHA;
-
-            #pragma omp parallel for default(none) private(t) shared(N4D, dim4D, pX, pY, pR, num, n, N, copyY2R)
-            for ( t=0; t<(int)num; t++ )
-            {
-                hoNDArray<T> tmpX, tmpY, tmpR;
-                tmpX.create(&dim4D, const_cast<T*>(pX+t*N4D));
-
-                for ( int z=0; z<N; z++ )
-                {
-                    if ( z != n )
-                    {
-                        tmpY.create(&dim4D, const_cast<T*>(pY+t*N4D*N+z*N4D));
-                        tmpR.create(&dim4D, pR+t*N4D*N+z*N4D);
-                        multiply(tmpX, tmpY, tmpR);
-                    }
-                    else
-                    {
-                        if ( pR != pY )
-                        {
-                            if ( copyY2R )
-                            {
-                                memcpy(pR+t*N4D*N+z*N4D, const_cast<T*>(pY+t*N4D*N+z*N4D), sizeof(T)*N4D);
-                            }
-                        }
-                    }
-                }
-            }
-        }
-        catch (...)
-        {
-            GADGET_ERROR_MSG("Errors in multiplyOver5thDimensionExcept(const hoNDArray<T>& x, const hoNDArray<T>& y, size_t n, hoNDArray<T>& r, bool copyY2R) ... ");
-            return false;
-        }
-        return true;
-    }
-
-    template <typename T> 
-    bool multipleAdd(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r)
-    {
-        GADGET_DEBUG_CHECK_RETURN_FALSE(x.get_number_of_elements()<=y.get_number_of_elements());
-        if ( r.get_number_of_elements()!=y.get_number_of_elements())
-        {
-            r = y;
-        }
-
-        int Nx = x.get_number_of_elements();
-        int N = y.get_number_of_elements() / Nx;
-
-        int n;
-
-        if ( typeid(T)==typeid(float) )
-        {
-            #ifdef GCC_OLD_FLAG
-                #pragma omp parallel for default(none) private(n) shared(Nx, N)
-            #else
-                #pragma omp parallel for default(none) private(n) shared(x, y, r, Nx, N)
-            #endif
-            for ( n=0; n<N; n++ )
-            {
-                vsAdd(x.get_number_of_elements(), reinterpret_cast<const float*>(x.begin()), reinterpret_cast<const float*>(y.begin()+n*Nx), reinterpret_cast<float*>(r.begin()+n*Nx));
-            }
-        }
-        else if ( typeid(T)==typeid(double) )
-        {
-            #ifdef GCC_OLD_FLAG
-                #pragma omp parallel for default(none) private(n) shared(Nx, N)
-            #else
-                #pragma omp parallel for default(none) private(n) shared(x, y, r, Nx, N)
-            #endif
-            for ( n=0; n<N; n++ )
-            {
-                vdAdd(x.get_number_of_elements(), reinterpret_cast<const double*>(x.begin()), reinterpret_cast<const double*>(y.begin()+n*Nx), reinterpret_cast<double*>(r.begin()+n*Nx));
-            }
-        }
-        else if ( typeid(T)==typeid(GT_Complex8) )
-        {
-            #ifdef GCC_OLD_FLAG
-                #pragma omp parallel for default(none) private(n) shared(Nx, N)
-            #else
-                #pragma omp parallel for default(none) private(n) shared(x, y, r, Nx, N)
-            #endif
-            for ( n=0; n<N; n++ )
-            {
-                vcAdd(x.get_number_of_elements(), reinterpret_cast<const MKL_Complex8*>(x.begin()), reinterpret_cast<const MKL_Complex8*>(y.begin()+n*Nx), reinterpret_cast<MKL_Complex8*>(r.begin()+n*Nx));
-            }
-        }
-        else if ( typeid(T)==typeid(GT_Complex16) )
-        {
-            #ifdef GCC_OLD_FLAG
-                #pragma omp parallel for default(none) private(n) shared(Nx, N)
-            #else
-                #pragma omp parallel for default(none) private(n) shared(x, y, r, Nx, N)
-            #endif
-            for ( n=0; n<N; n++ )
-            {
-                vzAdd(x.get_number_of_elements(), reinterpret_cast<const MKL_Complex16*>(x.begin()), reinterpret_cast<const MKL_Complex16*>(y.begin()+n*Nx), reinterpret_cast<MKL_Complex16*>(r.begin()+n*Nx));
-            }
-        }
-        else
-        {
-            GADGET_ERROR_MSG("multipleAdd : unsupported type " << typeid(T).name());
-            return false;
-        }
-
-        GADGET_CHECK_RETURN_FALSE(vmlGetErrStatus()==0);
-
-        return true;
-    }
-
-    template <typename T> 
-    bool multipleMultiply(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r)
-    {
-        GADGET_DEBUG_CHECK_RETURN_FALSE(x.get_number_of_elements()<=y.get_number_of_elements());
-        if ( r.get_number_of_elements()!=y.get_number_of_elements())
-        {
-            r = y;
-        }
-
-        int Nx = x.get_number_of_elements();
-        int N = y.get_number_of_elements() / Nx;
-
-        int n;
-
-        if ( typeid(T)==typeid(float) )
-        {
-            #ifdef GCC_OLD_FLAG
-                #pragma omp parallel for default(none) private(n) shared(Nx, N)
-            #else
-                #pragma omp parallel for default(none) private(n) shared(x, y, r, Nx, N)
-            #endif
-            for ( n=0; n<N; n++ )
-            {
-                vsMul(x.get_number_of_elements(), reinterpret_cast<const float*>(x.begin()), reinterpret_cast<const float*>(y.begin()+n*Nx), reinterpret_cast<float*>(r.begin()+n*Nx));
-            }
-        }
-        else if ( typeid(T)==typeid(double) )
-        {
-            #ifdef GCC_OLD_FLAG
-                #pragma omp parallel for default(none) private(n) shared(Nx, N)
-            #else
-                #pragma omp parallel for default(none) private(n) shared(x, y, r, Nx, N)
-            #endif
-            for ( n=0; n<N; n++ )
-            {
-                vdMul(x.get_number_of_elements(), reinterpret_cast<const double*>(x.begin()), reinterpret_cast<const double*>(y.begin()+n*Nx), reinterpret_cast<double*>(r.begin()+n*Nx));
-            }
-        }
-        else if ( typeid(T)==typeid(GT_Complex8) )
-        {
-            #ifdef GCC_OLD_FLAG
-                #pragma omp parallel for default(none) private(n) shared(Nx, N)
-            #else
-                #pragma omp parallel for default(none) private(n) shared(x, y, r, Nx, N)
-            #endif
-            for ( n=0; n<N; n++ )
-            {
-                vcMul(x.get_number_of_elements(), reinterpret_cast<const MKL_Complex8*>(x.begin()), reinterpret_cast<const MKL_Complex8*>(y.begin()+n*Nx), reinterpret_cast<MKL_Complex8*>(r.begin()+n*Nx));
-            }
-        }
-        else if ( typeid(T)==typeid(GT_Complex16) )
-        {
-            #ifdef GCC_OLD_FLAG
-                #pragma omp parallel for default(none) private(n) shared(Nx, N)
-            #else
-                #pragma omp parallel for default(none) private(n) shared(x, y, r, Nx, N)
-            #endif
-            for ( n=0; n<N; n++ )
-            {
-                vzMul(x.get_number_of_elements(), reinterpret_cast<const MKL_Complex16*>(x.begin()), reinterpret_cast<const MKL_Complex16*>(y.begin()+n*Nx), reinterpret_cast<MKL_Complex16*>(r.begin()+n*Nx));
-            }
-        }
-        else
-        {
-            GADGET_ERROR_MSG("multipleMultiply : unsupported type " << typeid(T).name());
-            return false;
-        }
-
-        GADGET_CHECK_RETURN_FALSE(vmlGetErrStatus()==0);
-
-        return true;
-    }
-
-    template <typename T> 
-    bool cropUpTo10DArray(const hoNDArray<T>& x, hoNDArray<T>& r, const std::vector<size_t>& startND, std::vector<size_t>& size)
-    {
-        GADGET_CHECK_RETURN_FALSE( startND.size() == size.size() );
-        GADGET_CHECK_RETURN_FALSE( startND.size() <= 10 );
-
-        r.create(&size);
-        if ( r.get_number_of_elements() == x.get_number_of_elements() )
-        {
-            r = x;
-            return true;
-        }
-
-        std::vector<size_t> start(10, 0);
-        std::vector<size_t> end(10, 0);
-
-        size_t ii;
-        for ( ii=0; ii<startND.size(); ii++ )
-        {
-            start[ii] = startND[ii];
-            end[ii] = start[ii] + size[ii] - 1;
-            GADGET_CHECK_RETURN_FALSE(end[ii] < x.get_size(ii));
-        }
-
-        // [Ro E1 Cha Slice E2 Con Phase Rep Set Seg]
-        size_t ro, e1, cha, n, s, con, phs, rep, set, seg;
-
-        std::vector<size_t> srcInd(10), dstInd(10);
-
-        for ( seg=start[9]; seg<=end[9]; seg++ )
-        {
-            srcInd[9] = seg; dstInd[9] = seg-start[9];
-
-            for ( set=start[8]; set<=end[8]; set++ )
-            {
-                srcInd[8] = set; dstInd[8] = set-start[8];
-
-                for ( rep=start[7]; rep<=end[7]; rep++ )
-                {
-                    srcInd[7] = rep; dstInd[7] = rep-start[7];
-
-                    for ( phs=start[6]; phs<=end[6]; phs++ )
-                    {
-                        srcInd[6] = phs; dstInd[6] = phs-start[6];
-
-                        for ( con=start[5]; con<=end[5]; con++ )
-                        {
-                            srcInd[5] = con; dstInd[5] = con-start[5];
-
-                            for ( s=start[4]; s<=end[4]; s++ )
-                            {
-                                srcInd[4] = s; dstInd[4] = s-start[4];
-
-                                for ( n=start[3]; n<=end[3]; n++ )
-                                {
-                                    srcInd[3] = n; dstInd[3] = n-start[3];
-
-                                    for ( cha=start[2]; cha<=end[2]; cha++ )
-                                    {
-                                        srcInd[2] = cha; dstInd[2] = cha-start[2];
-
-                                        for ( e1=start[1]; e1<=end[1]; e1++ )
-                                        {
-                                            srcInd[1] = e1; dstInd[1] = e1-start[1];
-
-                                            srcInd[0] = start[0];
-                                            dstInd[0] = 0;
-
-                                            int offsetSrc = x.calculate_offset(srcInd);
-                                            int offsetDst = r.calculate_offset(dstInd);
-
-                                            memcpy(r.begin()+offsetDst, x.begin()+offsetSrc, sizeof(T)*(end[0]-start[0]+1));
-
-                                            /*for ( ro=start[0]; ro<=end[0]; ro++ )
-                                            {
-                                                srcInd[0] = ro;
-                                                dstInd[0] = ro-start[0];
-
-                                                int offsetSrc = x.calculate_offset(srcInd);
-                                                int offsetDst = r.calculate_offset(dstInd);
-
-                                                r(offsetDst) = x(offsetSrc);
-                                            }*/
-                                        }
-                                    }
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-        }
-
-        return true;
-    }
-
-    template <typename T> 
-    bool setSubArrayUpTo10DArray(const hoNDArray<T>& x, hoNDArray<T>& r, const std::vector<size_t>& startND, std::vector<size_t>& size)
-    {
-        GADGET_CHECK_RETURN_FALSE( startND.size() == size.size() );
-        GADGET_CHECK_RETURN_FALSE( startND.size() <= 10 );
-
-        if ( r.get_number_of_elements() == x.get_number_of_elements() )
-        {
-            r = x;
-            return true;
-        }
-
-        std::vector<size_t> start(10, 0);
-        std::vector<size_t> end(10, 0);
-
-        size_t ii;
-        for ( ii=0; ii<startND.size(); ii++ )
-        {
-            start[ii] = startND[ii];
-            end[ii] = start[ii] + size[ii] - 1;
-            GADGET_CHECK_RETURN_FALSE(end[ii] < r.get_size(ii));
-        }
-
-        // [Ro E1 Cha Slice E2 Con Phase Rep Set Seg]
-        size_t ro, e1, cha, n, s, con, phs, rep, set, seg;
-
-        std::vector<size_t> srcInd(10), dstInd(10);
-
-        for ( seg=start[9]; seg<=end[9]; seg++ )
-        {
-            dstInd[9] = seg; srcInd[9] = seg-start[9];
-
-            for ( set=start[8]; set<=end[8]; set++ )
-            {
-                dstInd[8] = set; srcInd[8] = set-start[8];
-
-                for ( rep=start[7]; rep<=end[7]; rep++ )
-                {
-                    dstInd[7] = rep; srcInd[7] = rep-start[7];
-
-                    for ( phs=start[6]; phs<=end[6]; phs++ )
-                    {
-                        dstInd[6] = phs; srcInd[6] = phs-start[6];
-
-                        for ( con=start[5]; con<=end[5]; con++ )
-                        {
-                            dstInd[5] = con; srcInd[5] = con-start[5];
-
-                            for ( s=start[4]; s<=end[4]; s++ )
-                            {
-                                dstInd[4] = s; srcInd[4] = s-start[4];
-
-                                for ( n=start[3]; n<=end[3]; n++ )
-                                {
-                                    dstInd[3] = n; srcInd[3] = n-start[3];
-
-                                    for ( cha=start[2]; cha<=end[2]; cha++ )
-                                    {
-                                        dstInd[2] = cha; srcInd[2] = cha-start[2];
-
-                                        for ( e1=start[1]; e1<=end[1]; e1++ )
-                                        {
-                                            dstInd[1] = e1; srcInd[1] = e1-start[1];
-
-                                            dstInd[0] = start[0];
-                                            srcInd[0] = 0;
-
-                                            int offsetSrc = x.calculate_offset(srcInd);
-                                            int offsetDst = r.calculate_offset(dstInd);
-
-                                            memcpy(r.begin()+offsetDst, x.begin()+offsetSrc, sizeof(T)*(end[0]-start[0]+1));
-                                        }
-                                    }
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-        }
-
-        return true;
-    }
-
-    template<typename T> 
-    bool stdOver3rdDimension(const hoNDArray<T>& x, hoNDArray<T>& std, bool NMinusOne)
-    {
-        try
-        {
-            GADGET_DEBUG_CHECK_RETURN_FALSE(x.get_number_of_dimensions() >= 3);
-
-            size_t RO = x.get_size(0);
-            size_t E1 = x.get_size(1);
-            size_t CHA = x.get_size(2);
-
-            int num = (int)x.get_number_of_elements() / (RO*E1*CHA);
-
-            boost::shared_ptr< std::vector<size_t> > dim = x.get_dimensions();
-
-            std::vector<size_t> dimStd(*dim);
-            dimStd.erase(dimStd.begin()+2);
-            std.create(&dimStd);
-
-            std::vector<size_t> dim3D(3);
-            dim3D[0] = RO;
-            dim3D[1] = E1;
-            dim3D[2] = CHA;
-
-            T S(CHA);
-            if ( NMinusOne )
-            {
-                S = T(CHA-1);
-            }
-
-            T v(0), v1(0);
-            T S2 = T(1.0)/S;
-            T S3 = T(1.0)/T(CHA);
-
-            int n;
-
-            #ifdef GCC_OLD_FLAG
-                #pragma omp parallel for default(none) private(n) shared(num, RO, E1, CHA, S, S2, S3, v, v1)
-            #else
-                #pragma omp parallel for default(none) private(n) shared(num, RO, E1, CHA, x, std, S, S2, S3, v, v1)
-            #endif
-            for ( n=0; n<num; n++ )
-            {
-                hoNDArray<T> xTmp(RO, E1, CHA, const_cast<T*>(x.begin()+n*RO*E1*CHA));
-                hoNDArray<T> mean(RO, E1);
-
-                size_t ro, e1, cha;
-                for ( cha=0; cha<CHA; cha++ )
-                {
-                    for ( e1=0; e1<E1; e1++ )
-                    {
-                        for ( ro=0; ro<RO; ro++ )
-                        {
-                            mean(ro+e1*RO) += xTmp(cha*RO*E1+e1*RO+ro)*S3;
-                        }
-                    }
-                }
-
-                for ( e1=0; e1<E1; e1++ )
-                {
-                    for ( ro=0; ro<RO; ro++ )
-                    {
-                        int ind = e1*RO+ro;
-
-                        v = 0; v1 = 0;
-                        for ( cha=0; cha<CHA; cha++ )
-                        {
-                            v1 = std::abs(xTmp(cha*RO*E1+ind)-mean(ind));
-                            v += v1*v1;
-                        }
-
-                        v /= S;
-                        std(ind+n*RO*E1) = std::sqrt(v);
-                    }
-                }
-            }
-        }
-        catch(...)
-        {
-            GADGET_ERROR_MSG("Errors in stdOver3rdDimension(const hoNDArray<T>& x, hoNDArray<T>& std, bool NMinusOne) ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    /*template<typename T> 
-    bool permuteLastTwoDimensions(const hoNDArray<T>& x, hoNDArray<T>& r)
-    {
-        try
-        {
-            boost::shared_ptr< std::vector<size_t> > dimX = x.get_dimensions();
-
-            size_t NDim = dimX->size();
-
-            if ( NDim <= 2 )
-            {
-                r = x;
-                return true;
-            }
-
-            size_t E1 = x.get_size(NDim-2);
-            size_t E2 = x.get_size(NDim-1);
-
-            std::vector<size_t> dimR(*dimX);
-            dimR[NDim-2] = E2;
-            dimR[NDim-1] = E1;
-
-            r.create(&dimR);
-
-            size_t N = x.get_number_of_elements()/E1/E2;
-
-            const T* pX = x.begin();
-            T* pR = r.begin();
-
-            int e2;
-
-            #pragma omp parallel for default(none) private(e2) shared(E2, E1, pR, pX, N)
-            for ( e2=0; e2<(int)E2; e2++ )
-            {
-                for ( size_t e1=0; e1<E1; e1++ )
-                {
-                    memcpy(pR+e1*N*E2+e2*N, pX+e2*N*E1+e1*N, sizeof(T)*N);
-                }
-            }
-        }
-        catch (...)
-        {
-            GADGET_ERROR_MSG("Errors in permuteLastTwoDimensions(const hoNDArray<T>& x, hoNDArray<T>& r) ... ");
-            return false;
-        }
-        return true;
-    }*/
-
-    template<typename T> 
-    bool cropOver3rdDimension(const hoNDArray<T>& x, hoNDArray<T>& r, size_t start, size_t end)
-    {
-        try
-        {
-            boost::shared_ptr< std::vector<size_t> > dimX = x.get_dimensions();
-
-            size_t NDim = dimX->size();
-
-            if ( NDim <= 2 )
-            {
-                r = x;
-                return true;
-            }
-
-            size_t RO = x.get_size(0);
-            size_t E1 = x.get_size(1);
-            size_t E2 = x.get_size(2);
-
-            size_t E2_R = end-start+1;
-
-            if ( E2 <= E2_R )
-            {
-                r = x;
-                return true;
-            }
-
-            std::vector<size_t> dimR(*dimX);
-            dimR[2] = E2_R;
-
-            r.create(&dimR);
-
-            size_t N2D = RO*E1;
-            size_t N3D = RO*E1*E2;
-            size_t N3D_R = RO*E1*E2_R;
-
-            size_t N = x.get_number_of_elements()/N3D;
-
-            const T* pX = x.begin();
-            T* pR = r.begin();
-
-            size_t n;
-            for ( n=0; n<N; n++ )
-            {
-                int e2;
-                #pragma omp parallel for default(none) private(e2) shared(N2D, N3D, N3D_R, pX, pR, RO, E1, E2, n, start, end)
-                for ( e2=start; e2<=end; e2++ )
-                {
-                    memcpy(pR+n*N3D_R+(e2-start)*N2D, pX+n*N3D+e2*N2D, sizeof(T)*N2D);
-                }
-            }
-        }
-        catch (...)
-        {
-            GADGET_ERROR_MSG("Errors in cropOver3rdDimension(const hoNDArray<T>& x, hoNDArray<T>& r, size_t start, size_t end) ... ");
-            return false;
-        }
-        return true;
-    }
-
-    template<typename T> bool setSubArrayOver3rdDimension(const hoNDArray<T>& x, hoNDArray<T>& r, size_t start, size_t end)
-    {
-        try
-        {
-            boost::shared_ptr< std::vector<size_t> > dimR = r.get_dimensions();
-
-            size_t NDim = dimR->size();
-
-            if ( NDim <= 2 )
-            {
-                r = x;
-                return true;
-            }
-
-            size_t RO = r.get_size(0);
-            size_t E1 = r.get_size(1);
-            size_t E2 = r.get_size(2);
-
-            size_t E2_X = end-start+1;
-            GADGET_CHECK_RETURN_FALSE( E2_X == x.get_size(2) );
-
-            if ( E2_X >= E2 )
-            {
-                r = x;
-                return true;
-            }
-
-            size_t N2D = RO*E1;
-            size_t N3D = RO*E1*E2;
-            size_t N3D_X = RO*E1*E2_X;
-
-            size_t N = r.get_number_of_elements()/N3D;
-
-            const T* pX = x.begin();
-            T* pR = r.begin();
-
-            size_t n;
-            for ( n=0; n<N; n++ )
-            {
-                int e2;
-                #pragma omp parallel for default(none) private(e2) shared(N2D, N3D, N3D_X, pX, pR, RO, E1, E2, n, start, end)
-                for ( e2=start; e2<=end; e2++ )
-                {
-                    memcpy(pR+n*N3D+e2*N2D, pX+n*N3D_X+(e2-start)*N2D, sizeof(T)*N2D);
-                }
-            }
-        }
-        catch (...)
-        {
-            GADGET_ERROR_MSG("Errors in setSubArrayOver3rdDimension(const hoNDArray<T>& x, hoNDArray<T>& r, size_t start, size_t end) ... ");
-            return false;
-        }
-        return true;
-    }
-
-    template<typename T> 
-    bool permuteE2To3rdDimension(const hoNDArray<T>& x, hoNDArray<T>& r)
-    {
-        try
-        {
-            boost::shared_ptr< std::vector<size_t> > dimX = x.get_dimensions();
-
-            size_t NDim = dimX->size();
-
-            if ( NDim <= 5 )
-            {
-                r = x;
-                return true;
-            }
-
-            size_t RO = x.get_size(0);
-            size_t E1 = x.get_size(1);
-            size_t CHA = x.get_size(2);
-            size_t SLC = x.get_size(3);
-            size_t E2 = x.get_size(4);
-
-            std::vector<size_t> dimR(*dimX);
-            dimR[2] = E2;
-            dimR[3] = CHA;
-            dimR[4] = SLC;
-
-            r.create(&dimR);
-
-            size_t N2D = RO*E1;
-            size_t N5D = RO*E1*CHA*E2*SLC;
-
-            size_t N = x.get_number_of_elements()/N5D;
-
-            const T* pX = x.begin();
-            T* pR = r.begin();
-
-            size_t n;
-            for ( n=0; n<N; n++ )
-            {
-                int e2;
-                #pragma omp parallel for default(none) private(e2) shared(N5D, N2D, pX, pR, CHA, SLC, E2, n)
-                for ( e2=0; e2<E2; e2++ )
-                {
-                    for ( size_t slc=0; slc<SLC; slc++ )
-                    {
-                        for ( size_t cha=0; cha<CHA; cha++ )
-                        {
-                            memcpy(pR+n*N5D+slc*CHA*E2*N2D+cha*E2*N2D+e2*N2D, pX+n*N5D+e2*SLC*CHA*N2D+slc*CHA*N2D+cha*N2D, sizeof(T)*N2D);
-                        }
-                    }
-                }
-            }
-        }
-        catch (...)
-        {
-            GADGET_ERROR_MSG("Errors in permuteE2To3rdDimension(const hoNDArray<T>& x, hoNDArray<T>& r) ... ");
-            return false;
-        }
-        return true;
-    }
-
-    template<typename T> 
-    bool permuteE2To5thDimension(const hoNDArray<T>& x, hoNDArray<T>& r)
-    {
-        try
-        {
-            boost::shared_ptr< std::vector<size_t> > dimX = x.get_dimensions();
-
-            size_t NDim = dimX->size();
-
-            if ( NDim < 5 )
-            {
-                r = x;
-                return true;
-            }
-
-            size_t RO = x.get_size(0);
-            size_t E1 = x.get_size(1);
-            size_t E2 = x.get_size(2);
-            size_t CHA = x.get_size(3);
-            size_t SLC = x.get_size(4);
-
-            std::vector<size_t> dimR(*dimX);
-            dimR[2] = CHA;
-            dimR[3] = SLC;
-            dimR[4] = E2;
-
-            r.create(&dimR);
-
-            size_t N2D = RO*E1;
-            size_t N5D = RO*E1*CHA*E2*SLC;
-
-            size_t N = x.get_number_of_elements()/N5D;
-
-            const T* pX = x.begin();
-            T* pR = r.begin();
-
-            size_t n;
-            for ( n=0; n<N; n++ )
-            {
-                int e2;
-                #pragma omp parallel for default(none) private(e2) shared(N5D, N2D, pX, pR, CHA, SLC, E2, n)
-                for ( e2=0; e2<E2; e2++ )
-                {
-                    for ( size_t slc=0; slc<SLC; slc++ )
-                    {
-                        for ( size_t cha=0; cha<CHA; cha++ )
-                        {
-                            memcpy(pR+n*N5D+e2*SLC*CHA*N2D+slc*CHA*N2D+cha*N2D, pX+n*N5D+slc*CHA*E2*N2D+cha*E2*N2D+e2*N2D, sizeof(T)*N2D);
-                        }
-                    }
-                }
-            }
-        }
-        catch (...)
-        {
-            GADGET_ERROR_MSG("Errors in permuteE2To5thDimension(const hoNDArray<T>& x, hoNDArray<T>& r) ... ");
-            return false;
-        }
-        return true;
-    }
-
-    template<typename T> 
-    bool permuteROTo3rdDimensionFor3DRecon(const hoNDArray<T>& x, hoNDArray<T>& r)
-    {
-        try
-        {
-            boost::shared_ptr< std::vector<size_t> > dimX = x.get_dimensions();
-
-            size_t NDim = dimX->size();
-
-            if ( NDim < 3 )
-            {
-                r = x;
-                return true;
-            }
-
-            size_t RO = x.get_size(0);
-            size_t E1 = x.get_size(1);
-            size_t E2 = x.get_size(2);
-
-            std::vector<size_t> dimR(*dimX);
-            dimR[0] = E1;
-            dimR[1] = E2;
-            dimR[2] = RO;
-
-            r.create(&dimR);
-
-            size_t N3D = RO*E1*E2;
-
-            size_t N = x.get_number_of_elements()/N3D;
-
-            const T* pX = x.begin();
-            T* pR = r.begin();
-
-            long long n;
-
-            #pragma omp parallel for default(none) private(n) shared(RO, E1, E2, N, pR, N3D, pX)
-            for ( n=0; n<(long long)N; n++ )
-            {
-                T* pRn = pR + n*N3D;
-                T* pXn = const_cast<T*>(pX) + n*N3D;
-
-                for ( size_t e2=0; e2<E2; e2++ )
-                {
-                    for ( size_t e1=0; e1<E1; e1++ )
-                    {
-                        for ( size_t ro=0; ro<RO; ro++ )
-                        {
-                            pRn[e1+e2*E1+ro*E1*E2] = pXn[ro+e1*RO+e2*RO*E1];
-                        }
-                    }
-                }
-            }
-        }
-        catch (...)
-        {
-            GADGET_ERROR_MSG("Errors in permuteROTo3rdDimensionFor3DRecon(const hoNDArray<T>& x, hoNDArray<T>& r) ... ");
-            return false;
-        }
-        return true;
-    }
-
-    template<typename T> 
-    bool permuteROTo4thDimensionFor3DRecon(const hoNDArray<T>& x, hoNDArray<T>& r)
-    {
-        try
-        {
-            boost::shared_ptr< std::vector<size_t> > dimX = x.get_dimensions();
-
-            size_t NDim = dimX->size();
-
-            if ( NDim < 4 )
-            {
-                r = x;
-                return true;
-            }
-
-            size_t RO = x.get_size(0);
-            size_t E1 = x.get_size(1);
-            size_t E2 = x.get_size(2);
-            size_t CHA = x.get_size(3);
-
-            std::vector<size_t> dimR(*dimX);
-            dimR[0] = E1;
-            dimR[1] = E2;
-            dimR[2] = CHA;
-            dimR[3] = RO;
-
-            r.create(&dimR);
-
-            size_t N4D = RO*E1*E2*CHA;
-
-            size_t N = x.get_number_of_elements()/N4D;
-
-            const T* pX = x.begin();
-            T* pR = r.begin();
-
-            long long n;
-            for ( n=0; n<(long long)N; n++ )
-            {
-                T* pRn = pR + n*N4D;
-                T* pXn = const_cast<T*>(pX) + n*N4D;
-
-                long long cha;
-
-                #pragma omp parallel for default(none) private(cha) shared(RO, E1, E2, CHA, pXn, pRn)
-                for ( cha=0; cha<(long long)CHA; cha++ )
-                {
-                    for ( size_t e2=0; e2<E2; e2++ )
-                    {
-                        for ( size_t e1=0; e1<E1; e1++ )
-                        {
-                            for ( size_t ro=0; ro<RO; ro++ )
-                            {
-                                pRn[e1+e2*E1+cha*E1*E2+ro*E1*E2*CHA] = pXn[ro+e1*RO+e2*RO*E1+cha*RO*E1*E2];
-                            }
-                        }
-                    }
-                }
-            }
-        }
-        catch (...)
-        {
-            GADGET_ERROR_MSG("Errors in permuteROTo4thDimensionFor3DRecon(const hoNDArray<T>& x, hoNDArray<T>& r) ... ");
-            return false;
-        }
-        return true;
-    }
-
-    template<typename T> 
-    bool permuteROTo1stDimensionFor3DRecon(const hoNDArray<T>& x, hoNDArray<T>& r)
-    {
-        try
-        {
-            boost::shared_ptr< std::vector<size_t> > dimX = x.get_dimensions();
-
-            size_t NDim = dimX->size();
-
-            if ( NDim < 4 )
-            {
-                r = x;
-                return true;
-            }
-
-            size_t E1 = x.get_size(0);
-            size_t E2 = x.get_size(1);
-            size_t CHA = x.get_size(2);
-            size_t RO = x.get_size(3);
-
-            std::vector<size_t> dimR(*dimX);
-            dimR[0] = RO;
-            dimR[1] = E1;
-            dimR[2] = E2;
-            dimR[3] = CHA;
-
-            r.create(&dimR);
-
-            size_t N4D = RO*E1*E2*CHA;
-
-            size_t N = x.get_number_of_elements()/N4D;
-
-            const T* pX = x.begin();
-            T* pR = r.begin();
-
-            long long n;
-            for ( n=0; n<(long long)N; n++ )
-            {
-                T* pRn = pR + n*N4D;
-                T* pXn = const_cast<T*>(pX) + n*N4D;
-
-                long long cha;
-
-                #pragma omp parallel for default(none) private(cha) shared(RO, E1, E2, CHA, pXn, pRn)
-                for ( cha=0; cha<(long long)CHA; cha++ )
-                {
-                    for ( size_t e2=0; e2<E2; e2++ )
-                    {
-                        for ( size_t e1=0; e1<E1; e1++ )
-                        {
-                            size_t indRn = e1*RO+e2*RO*E1+cha*RO*E1*E2;
-                            size_t indXn = e1+e2*E1+cha*E1*E2;
-                            for ( size_t ro=0; ro<RO; ro++ )
-                            {
-                                pRn[ro+indRn] = pXn[indXn+ro*E1*E2*CHA];
-                            }
-                        }
-                    }
-                }
-            }
-        }
-        catch (...)
-        {
-            GADGET_ERROR_MSG("Errors in permuteROTo1stDimensionFor3DRecon(const hoNDArray<T>& x, hoNDArray<T>& r) ... ");
-            return false;
-        }
-        return true;
-    }
-
-    template<typename T> 
-    bool permute3rdDimensionTo1stDimension(const hoNDArray<T>& x, hoNDArray<T>& r)
-    {
-        try
-        {
-            boost::shared_ptr< std::vector<size_t> > dimX = x.get_dimensions();
-
-            size_t NDim = dimX->size();
-
-            if ( NDim < 3 )
-            {
-                r = x;
-                return true;
-            }
-
-            size_t RO = x.get_size(0);
-            size_t E1 = x.get_size(1);
-            size_t E2 = x.get_size(2);
-
-            std::vector<size_t> dimR(*dimX);
-            dimR[0] = E2;
-            dimR[1] = RO;
-            dimR[2] = E1;
-
-            r.create(&dimR);
-
-            size_t N3D = RO*E1*E2;
-
-            size_t N = x.get_number_of_elements()/N3D;
-
-            const T* pX = x.begin();
-            T* pR = r.begin();
-
-            long long n, e2;
-            for ( n=0; n<(long long)N; n++ )
-            {
-                T* pRn = pR + n*N3D;
-                T* pXn = const_cast<T*>(pX) + n*N3D;
-
-                #pragma omp parallel for default(none) private(e2) shared(RO, E1, E2, pXn, pRn)
-                for ( e2=0; e2<(long long)E2; e2++ )
-                {
-                    for ( size_t e1=0; e1<E1; e1++ )
-                    {
-                        size_t indRn = e2+e1*E2*RO;
-                        size_t indXn = e1*RO+e2*RO*E1;
-                        for ( size_t ro=0; ro<RO; ro++ )
-                        {
-                            pRn[ro*E2+indRn] = pXn[ro+indXn];
-                        }
-                    }
-                }
-            }
-        }
-        catch (...)
-        {
-            GADGET_ERROR_MSG("Errors in permute3rdDimensionTo1stDimension(const hoNDArray<T>& x, hoNDArray<T>& r) ... ");
-            return false;
-        }
-        return true;
-    }
-
-    template<typename T> 
-    bool permuteROTo5thDimensionFor3DRecon(const hoNDArray<T>& x, hoNDArray<T>& r)
-    {
-        try
-        {
-            boost::shared_ptr< std::vector<size_t> > dimX = x.get_dimensions();
-
-            size_t NDim = dimX->size();
-
-            if ( NDim < 5 )
-            {
-                r = x;
-                return true;
-            }
-
-            size_t RO = x.get_size(0);
-            size_t E1 = x.get_size(1);
-            size_t E2 = x.get_size(2);
-            size_t srcCHA = x.get_size(3);
-            size_t dstCHA = x.get_size(4);
-
-            std::vector<size_t> dimR(*dimX);
-            dimR[0] = E1;
-            dimR[1] = E2;
-            dimR[2] = srcCHA;
-            dimR[3] = dstCHA;
-            dimR[4] = RO;
-
-            r.create(&dimR);
-
-            size_t N5D = RO*E1*E2*srcCHA*dstCHA;
-
-            size_t N = x.get_number_of_elements()/N5D;
-
-            const T* pX = x.begin();
-            T* pR = r.begin();
-
-            long long n;
-            for ( n=0; n<(long long)N; n++ )
-            {
-                T* pRn = pR + n*N5D;
-                T* pXn = const_cast<T*>(pX) + n*N5D;
-
-                long long dcha;
-
-                #pragma omp parallel for default(none) private(dcha) shared(RO, E1, E2, srcCHA, dstCHA, pXn, pRn)
-                for ( dcha=0; dcha<(long long)dstCHA; dcha++ )
-                {
-                    for ( size_t scha=0; scha<(int)srcCHA; scha++ )
-                    {
-                        for ( size_t e2=0; e2<E2; e2++ )
-                        {
-                            for ( size_t e1=0; e1<E1; e1++ )
-                            {
-                                size_t indRn = e1+e2*E1+scha*E1*E2+dcha*E1*E2*srcCHA;
-                                size_t indXn = e1*RO+e2*RO*E1+scha*RO*E1*E2+dcha*RO*E1*E2*srcCHA;
-                                for ( size_t ro=0; ro<RO; ro++ )
-                                {
-                                    pRn[indRn+ro*E1*E2*srcCHA*dstCHA] = pXn[ro+indXn];
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-        }
-        catch (...)
-        {
-            GADGET_ERROR_MSG("Errors in permuteROTo5thDimensionFor3DRecon(const hoNDArray<T>& x, hoNDArray<T>& r) ... ");
-            return false;
-        }
-        return true;
-    }
-
-    template<typename T> 
-    bool imageDomainUnwrapping2D(const hoNDArray<T>& x, const hoNDArray<T>& kernel, hoNDArray<T>& buf, hoNDArray<T>& y)
-    {
-        try
-        {
-            T* pX = const_cast<T*>(x.begin());
-            T* ker = const_cast<T*>(kernel.begin());
-            T* pY = y.begin();
-
-            size_t ro = x.get_size(0);
-            size_t e1 = x.get_size(1);
-            size_t srcCHA = x.get_size(2);
-            size_t dstCHA = kernel.get_size(3);
-
-            if ( buf.get_number_of_elements() < ro*e1*srcCHA )
-            {
-                buf.create(ro, e1, srcCHA);
-            }
-            T* pBuf = buf.begin();
-
-            long long dCha;
-
-            //#pragma omp parallel default(shared)
-            {
-                //#ifdef WIN32
-                //    int tid = omp_get_thread_num();
-                //    DWORD_PTR mask = (1 << tid);
-                //    // GADGET_MSG("thread id : " << tid << " - mask : " << mask);
-                //    SetThreadAffinityMask( GetCurrentThread(), mask );
-                //#endif // WIN32
-
-                //#pragma omp for
-
-                if ( typeid(T)==typeid(GT_Complex8) )
-                {
-                    for ( dCha=0; dCha<dstCHA; dCha++ )
-                    {
-                        vcMul(ro*e1*srcCHA, reinterpret_cast<MKL_Complex8*>(pX), 
-                            reinterpret_cast<MKL_Complex8*>(ker+dCha*ro*e1*srcCHA), 
-                            reinterpret_cast<MKL_Complex8*>(pBuf));
-
-                        memcpy(pY+dCha*ro*e1, pBuf, sizeof(T)*ro*e1);
-                        for ( size_t sCha=1; sCha<srcCHA; sCha++ )
-                        {
-                            vcAdd(ro*e1, reinterpret_cast<MKL_Complex8*>(pY+dCha*ro*e1), 
-                                reinterpret_cast<MKL_Complex8*>(pBuf+sCha*ro*e1), 
-                                reinterpret_cast<MKL_Complex8*>(pY+dCha*ro*e1));
-                        }
-                    }
-                }
-                else if ( typeid(T)==typeid(GT_Complex16) )
-                {
-                    for ( dCha=0; dCha<dstCHA; dCha++ )
-                    {
-                        vzMul(ro*e1*srcCHA, reinterpret_cast<MKL_Complex16*>(pX), 
-                            reinterpret_cast<MKL_Complex16*>(ker+dCha*ro*e1*srcCHA), 
-                            reinterpret_cast<MKL_Complex16*>(pBuf));
-
-                        memcpy(pY+dCha*ro*e1, pBuf, sizeof(T)*ro*e1);
-                        for ( size_t sCha=1; sCha<srcCHA; sCha++ )
-                        {
-                            vzAdd(ro*e1, reinterpret_cast<MKL_Complex16*>(pY+dCha*ro*e1), 
-                                reinterpret_cast<MKL_Complex16*>(pBuf+sCha*ro*e1), 
-                                reinterpret_cast<MKL_Complex16*>(pY+dCha*ro*e1));
-                        }
-                    }
-                }
-            }
-        }
-        catch (...)
-        {
-            GADGET_ERROR_MSG("Errors in imageDomainUnwrapping2D(const hoNDArray<T>& x, const hoNDArray<T>& ker, hoNDArray<T>& buf, hoNDArray<T>& y) ... ");
-            return false;
-        }
-        return true;
-    }
-
-    template<typename T> 
-    bool imageDomainUnwrapping2DT(const hoNDArray<T>& x, const hoNDArray<T>& kernel, hoNDArray<T>& buf, hoNDArray<T>& y)
-    {
-        try
-        {
-            size_t ro = x.get_size(0);
-            size_t e1 = x.get_size(1);
-            size_t srcCHA = x.get_size(2);
-            size_t N = x.get_size(3);
-
-            size_t dstCHA = kernel.get_size(3);
-            size_t kerN = kernel.get_size(4);
-
-            if ( buf.get_number_of_elements() < ro*e1*srcCHA )
-            {
-                buf.create(ro, e1, srcCHA);
-            }
-            T* pBuf = buf.begin();
-
-            long long n, dCha;
-
-            //#pragma omp parallel default(shared)
-            {
-                //#ifdef WIN32
-                //    int tid = omp_get_thread_num();
-                //    DWORD_PTR mask = (1 << tid);
-                //    // GADGET_MSG("thread id : " << tid << " - mask : " << mask);
-                //    SetThreadAffinityMask( GetCurrentThread(), mask );
-                //#endif // WIN32
-
-                //#pragma omp for
-
-                if ( typeid(T)==typeid(GT_Complex8) )
-                {
-                    const T* pXN = x.begin();
-                    T* pYN = y.begin();
-                    T* pBufN = buf.begin();
-                    const T* pKerN = kernel.begin();
-
-                    omp_set_nested(1);
-
-                    //#pragma omp parallel for default(none) private(n) shared(N, ro, e1, srcCHA, dstCHA, kerN, pXN, pYN, pBufN, pKerN)
-                    //for ( n=0; n<N; n++ )
-                    //{
-                    //    const T* ker = pKerN + n*ro*e1*srcCHA*dstCHA;
-                    //    if ( kerN <= n )
-                    //    {
-                    //        ker = pKerN + (kerN-1)*ro*e1*srcCHA*dstCHA;
-                    //    }
-
-                    //    const T* pX = pXN + n*ro*e1*srcCHA;
-                    //    T* pY = pYN + n*ro*e1*dstCHA;
-                    //    T* pBuf =pBufN + n*ro*e1*srcCHA;
-
-                    //    for ( size_t dCha=0; dCha<dstCHA; dCha++ )
-                    //    {
-                    //        vcMul(ro*e1*srcCHA, reinterpret_cast<const MKL_Complex8*>(pX), 
-                    //            reinterpret_cast<const MKL_Complex8*>(ker+dCha*ro*e1*srcCHA), 
-                    //            reinterpret_cast<MKL_Complex8*>(pBuf));
-
-                    //        memcpy(pY+dCha*ro*e1, pBuf, sizeof(T)*ro*e1);
-                    //        for ( size_t sCha=1; sCha<srcCHA; sCha++ )
-                    //        {
-                    //            vcAdd(ro*e1, reinterpret_cast<MKL_Complex8*>(pY+dCha*ro*e1), 
-                    //                reinterpret_cast<MKL_Complex8*>(pBuf+sCha*ro*e1), 
-                    //                reinterpret_cast<MKL_Complex8*>(pY+dCha*ro*e1));
-                    //        }
-                    //    }
-                    //}
-
-                    // #pragma omp parallel for default(none) private(dCha, n) shared(N, ro, e1, srcCHA, dstCHA, kerN, pXN, pYN, pBufN, pKerN)
-                    for ( dCha=0; dCha<(long long)dstCHA; dCha++ )
-                    {
-                        for ( n=0; n<N; n++  )
-                        {
-                            const T* ker = pKerN + n*ro*e1*srcCHA*dstCHA;
-                            if ( kerN <= n )
-                            {
-                                ker = pKerN + (kerN-1)*ro*e1*srcCHA*dstCHA;
-                            }
-
-                            const T* pX = pXN + n*ro*e1*srcCHA;
-                            T* pBuf =pBufN + n*ro*e1*srcCHA;
-
-                            vcMul(ro*e1*srcCHA, reinterpret_cast<const MKL_Complex8*>(pX), 
-                                reinterpret_cast<const MKL_Complex8*>(ker+dCha*ro*e1*srcCHA), 
-                                reinterpret_cast<MKL_Complex8*>(pBuf));
-                        //}
-
-                        //for ( n=0; n<N; n++  )
-                        //{
-                            T* pY = pYN + n*ro*e1*dstCHA;
-                            //T* pBuf =pBufN + n*ro*e1*srcCHA;
-
-                            memcpy(pY+dCha*ro*e1, pBuf, sizeof(T)*ro*e1);
-                            for ( size_t sCha=1; sCha<srcCHA; sCha++ )
-                            {
-                                vcAdd(ro*e1, reinterpret_cast<MKL_Complex8*>(pY+dCha*ro*e1), 
-                                    reinterpret_cast<MKL_Complex8*>(pBuf+sCha*ro*e1), 
-                                    reinterpret_cast<MKL_Complex8*>(pY+dCha*ro*e1));
-                            }
-                        }
-                    }
-                }
-                else if ( typeid(T)==typeid(GT_Complex16) )
-                {
-                    for ( n=0; n<N; n++ )
-                    {
-                        const T* ker = kernel.begin() + n*ro*e1*srcCHA*dstCHA;
-                        if ( kerN <= n )
-                        {
-                            ker = kernel.begin() + (kerN-1)*ro*e1*srcCHA*dstCHA;
-                        }
-
-                        const T* pX = x.begin() + n*ro*e1*srcCHA;
-                        T* pY = y.begin() + n*ro*e1*dstCHA;
-
-                        for ( size_t dCha=0; dCha<dstCHA; dCha++ )
-                        {
-                            vzMul(ro*e1*srcCHA, reinterpret_cast<const MKL_Complex16*>(pX), 
-                                reinterpret_cast<const MKL_Complex16*>(ker+dCha*ro*e1*srcCHA), 
-                                reinterpret_cast<MKL_Complex16*>(pBuf));
-
-                            memcpy(pY+dCha*ro*e1, pBuf, sizeof(T)*ro*e1);
-                            for ( size_t sCha=1; sCha<srcCHA; sCha++ )
-                            {
-                                vzAdd(ro*e1, reinterpret_cast<MKL_Complex16*>(pY+dCha*ro*e1), 
-                                    reinterpret_cast<MKL_Complex16*>(pBuf+sCha*ro*e1), 
-                                    reinterpret_cast<MKL_Complex16*>(pY+dCha*ro*e1));
-                            }
-                        }
-                    }
-                }
-            }
-        }
-        catch (...)
-        {
-            GADGET_ERROR_MSG("Errors in imageDomainUnwrapping2DT(const hoNDArray<T>& x, const hoNDArray<T>& ker, hoNDArray<T>& buf, hoNDArray<T>& y) ... ");
-            return false;
-        }
-        return true;
-    }
-
-    template EXPORTCPUCOREMATH bool sumOverLastDimension(const hoNDArray<float>& x, hoNDArray<float>& r);
-    template EXPORTCPUCOREMATH bool sumOverLastDimension(const hoNDArray<double>& x, hoNDArray<double>& r);
-    template EXPORTCPUCOREMATH bool sumOverLastDimension(const hoNDArray<GT_Complex8>& x, hoNDArray<GT_Complex8>& r);
-    template EXPORTCPUCOREMATH bool sumOverLastDimension(const hoNDArray<GT_Complex16>& x, hoNDArray<GT_Complex16>& r);
-
-    template EXPORTCPUCOREMATH bool sumOverSecondLastDimension(const hoNDArray<float>& x, hoNDArray<float>& r);
-    template EXPORTCPUCOREMATH bool sumOverSecondLastDimension(const hoNDArray<double>& x, hoNDArray<double>& r);
-    template EXPORTCPUCOREMATH bool sumOverSecondLastDimension(const hoNDArray<GT_Complex8>& x, hoNDArray<GT_Complex8>& r);
-    template EXPORTCPUCOREMATH bool sumOverSecondLastDimension(const hoNDArray<GT_Complex16>& x, hoNDArray<GT_Complex16>& r);
-
-    template EXPORTCPUCOREMATH bool multiplyOverLastDimension(const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& r);
-    template EXPORTCPUCOREMATH bool multiplyOverLastDimension(const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& r);
-    template EXPORTCPUCOREMATH bool multiplyOverLastDimension(const hoNDArray<GT_Complex8>& x, const hoNDArray<GT_Complex8>& y, hoNDArray<GT_Complex8>& r);
-    template EXPORTCPUCOREMATH bool multiplyOverLastDimension(const hoNDArray<GT_Complex16>& x, const hoNDArray<GT_Complex16>& y, hoNDArray<GT_Complex16>& r);
-
-    template EXPORTCPUCOREMATH bool divideOverLastDimension(const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& r);
-    template EXPORTCPUCOREMATH bool divideOverLastDimension(const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& r);
-    template EXPORTCPUCOREMATH bool divideOverLastDimension(const hoNDArray<GT_Complex8>& x, const hoNDArray<GT_Complex8>& y, hoNDArray<GT_Complex8>& r);
-    template EXPORTCPUCOREMATH bool divideOverLastDimension(const hoNDArray<GT_Complex16>& x, const hoNDArray<GT_Complex16>& y, hoNDArray<GT_Complex16>& r);
-
-    template EXPORTCPUCOREMATH bool sumOver1stDimension(const hoNDArray<float>& x, hoNDArray<float>& r);
-    template EXPORTCPUCOREMATH bool sumOver1stDimension(const hoNDArray<double>& x, hoNDArray<double>& r);
-    template EXPORTCPUCOREMATH bool sumOver1stDimension(const hoNDArray<GT_Complex8>& x, hoNDArray<GT_Complex8>& r);
-    template EXPORTCPUCOREMATH bool sumOver1stDimension(const hoNDArray<GT_Complex16>& x, hoNDArray<GT_Complex16>& r);
-
-    template EXPORTCPUCOREMATH bool sumOver2ndDimension(const hoNDArray<float>& x, hoNDArray<float>& r);
-    template EXPORTCPUCOREMATH bool sumOver2ndDimension(const hoNDArray<double>& x, hoNDArray<double>& r);
-    template EXPORTCPUCOREMATH bool sumOver2ndDimension(const hoNDArray<GT_Complex8>& x, hoNDArray<GT_Complex8>& r);
-    template EXPORTCPUCOREMATH bool sumOver2ndDimension(const hoNDArray<GT_Complex16>& x, hoNDArray<GT_Complex16>& r);
-
-    template EXPORTCPUCOREMATH bool sumOver3rdDimension(const hoNDArray<float>& x, hoNDArray<float>& r);
-    template EXPORTCPUCOREMATH bool sumOver3rdDimension(const hoNDArray<double>& x, hoNDArray<double>& r);
-    template EXPORTCPUCOREMATH bool sumOver3rdDimension(const hoNDArray<GT_Complex8>& x, hoNDArray<GT_Complex8>& r);
-    template EXPORTCPUCOREMATH bool sumOver3rdDimension(const hoNDArray<GT_Complex16>& x, hoNDArray<GT_Complex16>& r);
-
-    template EXPORTCPUCOREMATH bool sumOver4thDimension(const hoNDArray<float>& x, hoNDArray<float>& r);
-    template EXPORTCPUCOREMATH bool sumOver4thDimension(const hoNDArray<double>& x, hoNDArray<double>& r);
-    template EXPORTCPUCOREMATH bool sumOver4thDimension(const hoNDArray<GT_Complex8>& x, hoNDArray<GT_Complex8>& r);
-    template EXPORTCPUCOREMATH bool sumOver4thDimension(const hoNDArray<GT_Complex16>& x, hoNDArray<GT_Complex16>& r);
-
-    template EXPORTCPUCOREMATH bool sumOver5thDimension(const hoNDArray<float>& x, hoNDArray<float>& r);
-    template EXPORTCPUCOREMATH bool sumOver5thDimension(const hoNDArray<double>& x, hoNDArray<double>& r);
-    template EXPORTCPUCOREMATH bool sumOver5thDimension(const hoNDArray<GT_Complex8>& x, hoNDArray<GT_Complex8>& r);
-    template EXPORTCPUCOREMATH bool sumOver5thDimension(const hoNDArray<GT_Complex16>& x, hoNDArray<GT_Complex16>& r);
-
-    template EXPORTCPUCOREMATH bool multiplyOver3rdDimension(const hoNDArray<float>& x3D, const hoNDArray<float>& y4D, hoNDArray<float>& r);
-    template EXPORTCPUCOREMATH bool multiplyOver3rdDimension(const hoNDArray<double>& x3D, const hoNDArray<double>& y4D, hoNDArray<double>& r);
-    template EXPORTCPUCOREMATH bool multiplyOver3rdDimension(const hoNDArray<GT_Complex8>& x3D, const hoNDArray<GT_Complex8>& y4D, hoNDArray<GT_Complex8>& r);
-    template EXPORTCPUCOREMATH bool multiplyOver3rdDimension(const hoNDArray<GT_Complex16>& x3D, const hoNDArray<GT_Complex16>& y4D, hoNDArray<GT_Complex16>& r);
-
-    template EXPORTCPUCOREMATH bool multiplyOver4thDimension(const hoNDArray<float>& x4D, const hoNDArray<float>& y5D, hoNDArray<float>& r);
-    template EXPORTCPUCOREMATH bool multiplyOver4thDimension(const hoNDArray<double>& x4D, const hoNDArray<double>& y5D, hoNDArray<double>& r);
-    template EXPORTCPUCOREMATH bool multiplyOver4thDimension(const hoNDArray<GT_Complex8>& x4D, const hoNDArray<GT_Complex8>& y5D, hoNDArray<GT_Complex8>& r);
-    template EXPORTCPUCOREMATH bool multiplyOver4thDimension(const hoNDArray<GT_Complex16>& x4D, const hoNDArray<GT_Complex16>& y5D, hoNDArray<GT_Complex16>& r);
-
-    template EXPORTCPUCOREMATH bool multiplyOver4thDimensionExcept(const hoNDArray<float>& x4D, const hoNDArray<float>& y5D, size_t n, hoNDArray<float>& r, bool copyY2R);
-    template EXPORTCPUCOREMATH bool multiplyOver4thDimensionExcept(const hoNDArray<double>& x4D, const hoNDArray<double>& y5D, size_t n, hoNDArray<double>& r, bool copyY2R);
-    template EXPORTCPUCOREMATH bool multiplyOver4thDimensionExcept(const hoNDArray<GT_Complex8>& x4D, const hoNDArray<GT_Complex8>& y5D, size_t n, hoNDArray<GT_Complex8>& r, bool copyY2R);
-    template EXPORTCPUCOREMATH bool multiplyOver4thDimensionExcept(const hoNDArray<GT_Complex16>& x4D, const hoNDArray<GT_Complex16>& y5D, size_t n, hoNDArray<GT_Complex16>& r, bool copyY2R);
-
-    template EXPORTCPUCOREMATH bool multiplyOver5thDimension(const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& r);
-    template EXPORTCPUCOREMATH bool multiplyOver5thDimension(const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& r);
-    template EXPORTCPUCOREMATH bool multiplyOver5thDimension(const hoNDArray<GT_Complex8>& x, const hoNDArray<GT_Complex8>& y, hoNDArray<GT_Complex8>& r);
-    template EXPORTCPUCOREMATH bool multiplyOver5thDimension(const hoNDArray<GT_Complex16>& x, const hoNDArray<GT_Complex16>& y, hoNDArray<GT_Complex16>& r);
-
-    template EXPORTCPUCOREMATH bool multiplyOver5thDimensionExcept(const hoNDArray<float>& x, const hoNDArray<float>& y, size_t n, hoNDArray<float>& r, bool copyY2R);
-    template EXPORTCPUCOREMATH bool multiplyOver5thDimensionExcept(const hoNDArray<double>& x, const hoNDArray<double>& y, size_t n, hoNDArray<double>& r, bool copyY2R);
-    template EXPORTCPUCOREMATH bool multiplyOver5thDimensionExcept(const hoNDArray<GT_Complex8>& x, const hoNDArray<GT_Complex8>& y, size_t n, hoNDArray<GT_Complex8>& r, bool copyY2R);
-    template EXPORTCPUCOREMATH bool multiplyOver5thDimensionExcept(const hoNDArray<GT_Complex16>& x, const hoNDArray<GT_Complex16>& y, size_t n, hoNDArray<GT_Complex16>& r, bool copyY2R);
-
-    template EXPORTCPUCOREMATH bool multipleAdd(const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& r);
-    template EXPORTCPUCOREMATH bool multipleAdd(const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& r);
-    template EXPORTCPUCOREMATH bool multipleAdd(const hoNDArray<GT_Complex8>& x, const hoNDArray<GT_Complex8>& y, hoNDArray<GT_Complex8>& r);
-    template EXPORTCPUCOREMATH bool multipleAdd(const hoNDArray<GT_Complex16>& x, const hoNDArray<GT_Complex16>& y, hoNDArray<GT_Complex16>& r);
-
-    template EXPORTCPUCOREMATH bool multipleMultiply(const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& r);
-    template EXPORTCPUCOREMATH bool multipleMultiply(const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& r);
-    template EXPORTCPUCOREMATH bool multipleMultiply(const hoNDArray<GT_Complex8>& x, const hoNDArray<GT_Complex8>& y, hoNDArray<GT_Complex8>& r);
-    template EXPORTCPUCOREMATH bool multipleMultiply(const hoNDArray<GT_Complex16>& x, const hoNDArray<GT_Complex16>& y, hoNDArray<GT_Complex16>& r);
-
-    template EXPORTCPUCOREMATH bool cropUpTo10DArray(const hoNDArray<short>& x, hoNDArray<short>& r, const std::vector<size_t>& start, std::vector<size_t>& size);
-    template EXPORTCPUCOREMATH bool cropUpTo10DArray(const hoNDArray<unsigned short>& x, hoNDArray<unsigned short>& r, const std::vector<size_t>& start, std::vector<size_t>& size);
-    template EXPORTCPUCOREMATH bool cropUpTo10DArray(const hoNDArray<float>& x, hoNDArray<float>& r, const std::vector<size_t>& start, std::vector<size_t>& size);
-    template EXPORTCPUCOREMATH bool cropUpTo10DArray(const hoNDArray<double>& x, hoNDArray<double>& r, const std::vector<size_t>& start, std::vector<size_t>& size);
-    template EXPORTCPUCOREMATH bool cropUpTo10DArray(const hoNDArray<GT_Complex8>& x, hoNDArray<GT_Complex8>& r, const std::vector<size_t>& start, std::vector<size_t>& size);
-    template EXPORTCPUCOREMATH bool cropUpTo10DArray(const hoNDArray<GT_Complex16>& x, hoNDArray<GT_Complex16>& r, const std::vector<size_t>& start, std::vector<size_t>& size);
-
-    template EXPORTCPUCOREMATH bool setSubArrayUpTo10DArray(const hoNDArray<short>& x, hoNDArray<short>& r, const std::vector<size_t>& start, std::vector<size_t>& size);
-    template EXPORTCPUCOREMATH bool setSubArrayUpTo10DArray(const hoNDArray<unsigned short>& x, hoNDArray<unsigned short>& r, const std::vector<size_t>& start, std::vector<size_t>& size);
-    template EXPORTCPUCOREMATH bool setSubArrayUpTo10DArray(const hoNDArray<float>& x, hoNDArray<float>& r, const std::vector<size_t>& start, std::vector<size_t>& size);
-    template EXPORTCPUCOREMATH bool setSubArrayUpTo10DArray(const hoNDArray<double>& x, hoNDArray<double>& r, const std::vector<size_t>& start, std::vector<size_t>& size);
-    template EXPORTCPUCOREMATH bool setSubArrayUpTo10DArray(const hoNDArray<GT_Complex8>& x, hoNDArray<GT_Complex8>& r, const std::vector<size_t>& start, std::vector<size_t>& size);
-    template EXPORTCPUCOREMATH bool setSubArrayUpTo10DArray(const hoNDArray<GT_Complex16>& x, hoNDArray<GT_Complex16>& r, const std::vector<size_t>& start, std::vector<size_t>& size);
-
-    template EXPORTCPUCOREMATH bool cropOver3rdDimension(const hoNDArray<short>& x, hoNDArray<short>& r, size_t start, size_t end);
-    template EXPORTCPUCOREMATH bool cropOver3rdDimension(const hoNDArray<unsigned short>& x, hoNDArray<unsigned short>& r, size_t start, size_t end);
-    template EXPORTCPUCOREMATH bool cropOver3rdDimension(const hoNDArray<float>& x, hoNDArray<float>& r, size_t start, size_t end);
-    template EXPORTCPUCOREMATH bool cropOver3rdDimension(const hoNDArray<double>& x, hoNDArray<double>& r, size_t start, size_t end);
-    template EXPORTCPUCOREMATH bool cropOver3rdDimension(const hoNDArray<GT_Complex8>& x, hoNDArray<GT_Complex8>& r, size_t start, size_t end);
-    template EXPORTCPUCOREMATH bool cropOver3rdDimension(const hoNDArray<GT_Complex16>& x, hoNDArray<GT_Complex16>& r, size_t start, size_t end);
-
-    template EXPORTCPUCOREMATH bool setSubArrayOver3rdDimension(const hoNDArray<short>& x, hoNDArray<short>& r, size_t start, size_t end);
-    template EXPORTCPUCOREMATH bool setSubArrayOver3rdDimension(const hoNDArray<unsigned short>& x, hoNDArray<unsigned short>& r, size_t start, size_t end);
-    template EXPORTCPUCOREMATH bool setSubArrayOver3rdDimension(const hoNDArray<float>& x, hoNDArray<float>& r, size_t start, size_t end);
-    template EXPORTCPUCOREMATH bool setSubArrayOver3rdDimension(const hoNDArray<double>& x, hoNDArray<double>& r, size_t start, size_t end);
-    template EXPORTCPUCOREMATH bool setSubArrayOver3rdDimension(const hoNDArray<GT_Complex8>& x, hoNDArray<GT_Complex8>& r, size_t start, size_t end);
-    template EXPORTCPUCOREMATH bool setSubArrayOver3rdDimension(const hoNDArray<GT_Complex16>& x, hoNDArray<GT_Complex16>& r, size_t start, size_t end);
-
-    template EXPORTCPUCOREMATH bool stdOver3rdDimension(const hoNDArray<float>& x, hoNDArray<float>& std, bool NMinusOne);
-    template EXPORTCPUCOREMATH bool stdOver3rdDimension(const hoNDArray<double>& x, hoNDArray<double>& std, bool NMinusOne);
-    template EXPORTCPUCOREMATH bool stdOver3rdDimension(const hoNDArray<GT_Complex8>& x, hoNDArray<GT_Complex8>& std, bool NMinusOne);
-    template EXPORTCPUCOREMATH bool stdOver3rdDimension(const hoNDArray<GT_Complex16>& x, hoNDArray<GT_Complex16>& std, bool NMinusOne);
-
-    //template EXPORTCPUCOREMATH bool permuteLastTwoDimensions(const hoNDArray<float>& x, hoNDArray<float>& r);
-    //template EXPORTCPUCOREMATH bool permuteLastTwoDimensions(const hoNDArray<double>& x, hoNDArray<double>& r);
-    //template EXPORTCPUCOREMATH bool permuteLastTwoDimensions(const hoNDArray<GT_Complex8>& x, hoNDArray<GT_Complex8>& r);
-    //template EXPORTCPUCOREMATH bool permuteLastTwoDimensions(const hoNDArray<GT_Complex16>& x, hoNDArray<GT_Complex16>& r);
-
-    template EXPORTCPUCOREMATH bool permuteE2To3rdDimension(const hoNDArray<float>& x, hoNDArray<float>& r);
-    template EXPORTCPUCOREMATH bool permuteE2To3rdDimension(const hoNDArray<double>& x, hoNDArray<double>& r);
-    template EXPORTCPUCOREMATH bool permuteE2To3rdDimension(const hoNDArray<GT_Complex8>& x, hoNDArray<GT_Complex8>& r);
-    template EXPORTCPUCOREMATH bool permuteE2To3rdDimension(const hoNDArray<GT_Complex16>& x, hoNDArray<GT_Complex16>& r);
-
-    template EXPORTCPUCOREMATH bool permuteE2To5thDimension(const hoNDArray<float>& x, hoNDArray<float>& r);
-    template EXPORTCPUCOREMATH bool permuteE2To5thDimension(const hoNDArray<double>& x, hoNDArray<double>& r);
-    template EXPORTCPUCOREMATH bool permuteE2To5thDimension(const hoNDArray<GT_Complex8>& x, hoNDArray<GT_Complex8>& r);
-    template EXPORTCPUCOREMATH bool permuteE2To5thDimension(const hoNDArray<GT_Complex16>& x, hoNDArray<GT_Complex16>& r);
-
-    template EXPORTCPUCOREMATH bool permuteROTo3rdDimensionFor3DRecon(const hoNDArray<float>& x, hoNDArray<float>& r);
-    template EXPORTCPUCOREMATH bool permuteROTo3rdDimensionFor3DRecon(const hoNDArray<double>& x, hoNDArray<double>& r);
-    template EXPORTCPUCOREMATH bool permuteROTo3rdDimensionFor3DRecon(const hoNDArray<GT_Complex8>& x, hoNDArray<GT_Complex8>& r);
-    template EXPORTCPUCOREMATH bool permuteROTo3rdDimensionFor3DRecon(const hoNDArray<GT_Complex16>& x, hoNDArray<GT_Complex16>& r);
-
-    template EXPORTCPUCOREMATH bool permuteROTo4thDimensionFor3DRecon(const hoNDArray<float>& x, hoNDArray<float>& r);
-    template EXPORTCPUCOREMATH bool permuteROTo4thDimensionFor3DRecon(const hoNDArray<double>& x, hoNDArray<double>& r);
-    template EXPORTCPUCOREMATH bool permuteROTo4thDimensionFor3DRecon(const hoNDArray<GT_Complex8>& x, hoNDArray<GT_Complex8>& r);
-    template EXPORTCPUCOREMATH bool permuteROTo4thDimensionFor3DRecon(const hoNDArray<GT_Complex16>& x, hoNDArray<GT_Complex16>& r);
-
-    template EXPORTCPUCOREMATH bool permuteROTo1stDimensionFor3DRecon(const hoNDArray<float>& x, hoNDArray<float>& r);
-    template EXPORTCPUCOREMATH bool permuteROTo1stDimensionFor3DRecon(const hoNDArray<double>& x, hoNDArray<double>& r);
-    template EXPORTCPUCOREMATH bool permuteROTo1stDimensionFor3DRecon(const hoNDArray<GT_Complex8>& x, hoNDArray<GT_Complex8>& r);
-    template EXPORTCPUCOREMATH bool permuteROTo1stDimensionFor3DRecon(const hoNDArray<GT_Complex16>& x, hoNDArray<GT_Complex16>& r);
-
-    template EXPORTCPUCOREMATH bool permute3rdDimensionTo1stDimension(const hoNDArray<float>& x, hoNDArray<float>& r);
-    template EXPORTCPUCOREMATH bool permute3rdDimensionTo1stDimension(const hoNDArray<double>& x, hoNDArray<double>& r);
-    template EXPORTCPUCOREMATH bool permute3rdDimensionTo1stDimension(const hoNDArray<GT_Complex8>& x, hoNDArray<GT_Complex8>& r);
-    template EXPORTCPUCOREMATH bool permute3rdDimensionTo1stDimension(const hoNDArray<GT_Complex16>& x, hoNDArray<GT_Complex16>& r);
-
-    template EXPORTCPUCOREMATH bool permuteROTo5thDimensionFor3DRecon(const hoNDArray<float>& x, hoNDArray<float>& r);
-    template EXPORTCPUCOREMATH bool permuteROTo5thDimensionFor3DRecon(const hoNDArray<double>& x, hoNDArray<double>& r);
-    template EXPORTCPUCOREMATH bool permuteROTo5thDimensionFor3DRecon(const hoNDArray<GT_Complex8>& x, hoNDArray<GT_Complex8>& r);
-    template EXPORTCPUCOREMATH bool permuteROTo5thDimensionFor3DRecon(const hoNDArray<GT_Complex16>& x, hoNDArray<GT_Complex16>& r);
-
-    template EXPORTCPUCOREMATH bool imageDomainUnwrapping2D(const hoNDArray<GT_Complex8>& x, const hoNDArray<GT_Complex8>& ker, hoNDArray<GT_Complex8>& buf, hoNDArray<GT_Complex8>& y);
-    template EXPORTCPUCOREMATH bool imageDomainUnwrapping2D(const hoNDArray<GT_Complex16>& x, const hoNDArray<GT_Complex16>& ker, hoNDArray<GT_Complex16>& buf, hoNDArray<GT_Complex16>& y);
-
-    template EXPORTCPUCOREMATH bool imageDomainUnwrapping2DT(const hoNDArray<GT_Complex8>& x, const hoNDArray<GT_Complex8>& ker, hoNDArray<GT_Complex8>& buf, hoNDArray<GT_Complex8>& y);
-    template EXPORTCPUCOREMATH bool imageDomainUnwrapping2DT(const hoNDArray<GT_Complex16>& x, const hoNDArray<GT_Complex16>& ker, hoNDArray<GT_Complex16>& buf, hoNDArray<GT_Complex16>& y);
-
-    #endif // USE_MKL
-
-    //
-    // Instantiation
-    //
-
-    template EXPORTCPUCOREMATH void clear<short>( hoNDArray<short>& );
-    template EXPORTCPUCOREMATH void clear<unsigned short>( hoNDArray<unsigned short>& );
-    template EXPORTCPUCOREMATH void clear<int>( hoNDArray<int>& );
-    template EXPORTCPUCOREMATH void clear<size_t>( hoNDArray<size_t>& );
-
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float> > abs<float>( hoNDArray<float>* );
-    template EXPORTCPUCOREMATH void abs_inplace<float>( hoNDArray<float>* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float> > abs_square<float>( hoNDArray<float>* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float> > sqrt<float>( hoNDArray<float>* );
-    template EXPORTCPUCOREMATH void sqrt_inplace<float>( hoNDArray<float>* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float> > square<float>( hoNDArray<float>* );
-    template EXPORTCPUCOREMATH void square_inplace<float>( hoNDArray<float>* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float> > reciprocal<float>( hoNDArray<float>* );
-    template EXPORTCPUCOREMATH void reciprocal_inplace<float>( hoNDArray<float>* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float> > reciprocal_sqrt<float>( hoNDArray<float>* );
-    template EXPORTCPUCOREMATH void reciprocal_sqrt_inplace<float>( hoNDArray<float>* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float> > sgn<float>( hoNDArray<float>* );
-    template EXPORTCPUCOREMATH void sgn_inplace<float>( hoNDArray<float>* );
-    template EXPORTCPUCOREMATH void clear<float>( hoNDArray<float>* );
-    template EXPORTCPUCOREMATH void clear<float>( hoNDArray<float>& );
-    template EXPORTCPUCOREMATH void fill<float>( hoNDArray<float>*, float );
-    template EXPORTCPUCOREMATH void clamp<float>( hoNDArray<float>*, float, float );
-    template EXPORTCPUCOREMATH void clamp_min<float>( hoNDArray<float>*, float );
-    template EXPORTCPUCOREMATH void clamp_max<float>( hoNDArray<float>*, float );
-    template EXPORTCPUCOREMATH void normalize<float>( hoNDArray<float>*, float );
-    template EXPORTCPUCOREMATH void shrink1<float>( hoNDArray<float>*, float, hoNDArray<float>* );
-    template EXPORTCPUCOREMATH void pshrink<float>( hoNDArray<float>*, float,float, hoNDArray<float>* );
-    template EXPORTCPUCOREMATH void shrinkd<float> ( hoNDArray<float>*, hoNDArray<float>*, float, hoNDArray<float>* );
-    template EXPORTCPUCOREMATH void pshrinkd<float> ( hoNDArray<float>*, hoNDArray<float>*, float, float, hoNDArray<float>* );
-
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double> > abs<double>( hoNDArray<double>* );
-    template EXPORTCPUCOREMATH void abs_inplace<double>( hoNDArray<double>* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double> > abs_square<double>( hoNDArray<double>* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double> > sqrt<double>( hoNDArray<double>* );
-    template EXPORTCPUCOREMATH void sqrt_inplace<double>( hoNDArray<double>* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double> > square<double>( hoNDArray<double>* );
-    template EXPORTCPUCOREMATH void square_inplace<double>( hoNDArray<double>* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double> > reciprocal<double>( hoNDArray<double>* );
-    template EXPORTCPUCOREMATH void reciprocal_inplace<double>( hoNDArray<double>* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double> > reciprocal_sqrt<double>( hoNDArray<double>* );
-    template EXPORTCPUCOREMATH void reciprocal_sqrt_inplace<double>( hoNDArray<double>* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double> > sgn<double>( hoNDArray<double>* );
-    template EXPORTCPUCOREMATH void sgn_inplace<double>( hoNDArray<double>* );
-    template EXPORTCPUCOREMATH void clear<double>( hoNDArray<double>* );
-    template EXPORTCPUCOREMATH void clear<double>( hoNDArray<double>& );
-    template EXPORTCPUCOREMATH void fill<double>( hoNDArray<double>*, double );
-    template EXPORTCPUCOREMATH void clamp<double>( hoNDArray<double>*, double, double );
-    template EXPORTCPUCOREMATH void clamp_min<double>( hoNDArray<double>*, double );
-    template EXPORTCPUCOREMATH void clamp_max<double>( hoNDArray<double>*, double );
-    template EXPORTCPUCOREMATH void normalize<double>( hoNDArray<double>*, double );
-    template EXPORTCPUCOREMATH void shrink1<double>( hoNDArray<double>*, double, hoNDArray<double>* );
-    template EXPORTCPUCOREMATH void pshrink<double>( hoNDArray<double>*, double,double, hoNDArray<double>* );
-    template EXPORTCPUCOREMATH void shrinkd<double> ( hoNDArray<double>*, hoNDArray<double>*, double, hoNDArray<double>* );
-    template EXPORTCPUCOREMATH void pshrinkd<double> ( hoNDArray<double>*, hoNDArray<double>*, double, double, hoNDArray<double>* );
-
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float> > abs< std::complex<float> >( hoNDArray< std::complex<float> >* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float> > abs_square< std::complex<float> >( hoNDArray< std::complex<float> >* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< std::complex<float> > > sqrt< std::complex<float> >( hoNDArray< std::complex<float> >* );
-    template EXPORTCPUCOREMATH void sqrt_inplace< std::complex<float> >( hoNDArray< std::complex<float> >* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< std::complex<float> > > square< std::complex<float> >( hoNDArray< std::complex<float> >* );
-    template EXPORTCPUCOREMATH void square_inplace< std::complex<float> >( hoNDArray< std::complex<float> >* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< std::complex<float> > > reciprocal< std::complex<float> >( hoNDArray< std::complex<float> >* );
-    template EXPORTCPUCOREMATH void reciprocal_inplace< std::complex<float> >( hoNDArray< std::complex<float> >* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< std::complex<float> > > reciprocal_sqrt< std::complex<float> >( hoNDArray< std::complex<float> >* );
-    template EXPORTCPUCOREMATH void reciprocal_sqrt_inplace< std::complex<float> >( hoNDArray< std::complex<float> >* );
-    template EXPORTCPUCOREMATH void clear< std::complex<float> >( hoNDArray< std::complex<float> >* );
-    template EXPORTCPUCOREMATH void clear< std::complex<float> >( hoNDArray< std::complex<float> >& );
-    template EXPORTCPUCOREMATH void fill< std::complex<float> >( hoNDArray< std::complex<float> >*, std::complex<float> );
-    template EXPORTCPUCOREMATH void clamp< std::complex<float> >( hoNDArray< std::complex<float> >*, float, float );
-    template EXPORTCPUCOREMATH void clamp_min< std::complex<float> >( hoNDArray< std::complex<float> >*, float );
-    template EXPORTCPUCOREMATH void clamp_max<std::complex<float> >( hoNDArray< std::complex<float> >*, float );
-    template EXPORTCPUCOREMATH void normalize< std::complex<float> >( hoNDArray< std::complex<float> >*, float );
-    template EXPORTCPUCOREMATH void shrink1< std::complex<float> >( hoNDArray< std::complex<float> >*, float, hoNDArray< std::complex<float> >* );
-    template EXPORTCPUCOREMATH void pshrink< std::complex<float> >( hoNDArray< std::complex<float> >*, float,float, hoNDArray< std::complex<float> >* );
-    template EXPORTCPUCOREMATH void shrinkd< std::complex<float> > ( hoNDArray< std::complex<float> >*, hoNDArray<float>*, float, hoNDArray< std::complex<float> >* );
-    template EXPORTCPUCOREMATH void pshrinkd< std::complex<float> > ( hoNDArray< std::complex<float> >*, hoNDArray<float>*, float, float, hoNDArray< std::complex<float> >* );
-
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double> > abs< std::complex<double> >( hoNDArray< std::complex<double> >* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double> > abs_square< std::complex<double> >( hoNDArray< std::complex<double> >* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< std::complex<double> > > sqrt< std::complex<double> >( hoNDArray< std::complex<double> >* );
-    template EXPORTCPUCOREMATH void sqrt_inplace< std::complex<double> >( hoNDArray< std::complex<double> >* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< std::complex<double> > > square< std::complex<double> >( hoNDArray< std::complex<double> >* );
-    template EXPORTCPUCOREMATH void square_inplace< std::complex<double> >( hoNDArray< std::complex<double> >* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< std::complex<double> > > reciprocal< std::complex<double> >( hoNDArray< std::complex<double> >* );
-    template EXPORTCPUCOREMATH void reciprocal_inplace< std::complex<double> >( hoNDArray< std::complex<double> >* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< std::complex<double> > > reciprocal_sqrt< std::complex<double> >( hoNDArray< std::complex<double> >* );
-    template EXPORTCPUCOREMATH void reciprocal_sqrt_inplace< std::complex<double> >( hoNDArray< std::complex<double> >* );
-    template EXPORTCPUCOREMATH void clear< std::complex<double> >( hoNDArray< std::complex<double> >* );
-    template EXPORTCPUCOREMATH void clear< std::complex<double> >( hoNDArray< std::complex<double> >& );
-    template EXPORTCPUCOREMATH void fill< std::complex<double> >( hoNDArray< std::complex<double> >*, std::complex<double> );
-    template EXPORTCPUCOREMATH void clamp< std::complex<double> >( hoNDArray< std::complex<double> >*, double, double );
-    template EXPORTCPUCOREMATH void clamp_min< std::complex<double> >( hoNDArray< std::complex<double> >*, double );
-    template EXPORTCPUCOREMATH void clamp_max<std::complex<double> >( hoNDArray< std::complex<double> >*, double );
-    template EXPORTCPUCOREMATH void normalize< std::complex<double> >( hoNDArray< std::complex<double> >*, double );
-    template EXPORTCPUCOREMATH void shrink1< std::complex<double> >( hoNDArray< std::complex<double> >*, double, hoNDArray< std::complex<double> >* );
-    template EXPORTCPUCOREMATH void pshrink< std::complex<double> >( hoNDArray< std::complex<double> >*, double,double, hoNDArray< std::complex<double> >* );
-    template EXPORTCPUCOREMATH void shrinkd< std::complex<double> > ( hoNDArray< std::complex<double> >*, hoNDArray<double>*, double, hoNDArray< std::complex<double> >* );
-    template EXPORTCPUCOREMATH void pshrinkd< std::complex<double> > ( hoNDArray< std::complex<double> >*, hoNDArray<double>*, double, double, hoNDArray< std::complex<double> >* );
-
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float> > abs< complext<float> >( hoNDArray< complext<float> >* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float> > abs_square< complext<float> >( hoNDArray< complext<float> >* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< complext<float> > > sqrt< complext<float> >( hoNDArray< complext<float> >* );
-    template EXPORTCPUCOREMATH void sqrt_inplace< complext<float> >( hoNDArray< complext<float> >* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< complext<float> > > square< complext<float> >( hoNDArray< complext<float> >* );
-    template EXPORTCPUCOREMATH void square_inplace< complext<float> >( hoNDArray< complext<float> >* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< complext<float> > > reciprocal< complext<float> >( hoNDArray< complext<float> >* );
-    template EXPORTCPUCOREMATH void reciprocal_inplace< complext<float> >( hoNDArray< complext<float> >* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< complext<float> > > reciprocal_sqrt< complext<float> >( hoNDArray< complext<float> >* );
-    template EXPORTCPUCOREMATH void reciprocal_sqrt_inplace< complext<float> >( hoNDArray< complext<float> >* );
-    template EXPORTCPUCOREMATH void clear< complext<float> >( hoNDArray< complext<float> >* );
-    template EXPORTCPUCOREMATH void clear< complext<float> >( hoNDArray< complext<float> >& );
-    template EXPORTCPUCOREMATH void fill< complext<float> >( hoNDArray< complext<float> >*, complext<float> );
-    template EXPORTCPUCOREMATH void clamp< complext<float> >( hoNDArray< complext<float> >*, float, float );
-    template EXPORTCPUCOREMATH void clamp_min< complext<float> >( hoNDArray< complext<float> >*, float );
-    template EXPORTCPUCOREMATH void clamp_max<complext<float> >( hoNDArray< complext<float> >*, float );
-    template EXPORTCPUCOREMATH void normalize< complext<float> >( hoNDArray< complext<float> >*, float );
-    template EXPORTCPUCOREMATH void shrink1< complext<float> >( hoNDArray< complext<float> >*, float, hoNDArray< complext<float> >* );
-    template EXPORTCPUCOREMATH void pshrink< complext<float> >( hoNDArray< complext<float> >*, float,float, hoNDArray< complext<float> >* );
-    template EXPORTCPUCOREMATH void shrinkd< complext<float> > ( hoNDArray< complext<float> >*, hoNDArray<float>*, float, hoNDArray< complext<float> >* );
-    template EXPORTCPUCOREMATH void pshrinkd< complext<float> > ( hoNDArray< complext<float> >*, hoNDArray<float>*, float, float, hoNDArray< complext<float> >* );
-
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double> > abs< complext<double> >( hoNDArray< complext<double> >* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double> > abs_square< complext<double> >( hoNDArray< complext<double> >* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< complext<double> > > sqrt< complext<double> >( hoNDArray< complext<double> >* );
-    template EXPORTCPUCOREMATH void sqrt_inplace< complext<double> >( hoNDArray< complext<double> >* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< complext<double> > > square< complext<double> >( hoNDArray< complext<double> >* );
-    template EXPORTCPUCOREMATH void square_inplace< complext<double> >( hoNDArray< complext<double> >* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< complext<double> > > reciprocal< complext<double> >( hoNDArray< complext<double> >* );
-    template EXPORTCPUCOREMATH void reciprocal_inplace< complext<double> >( hoNDArray< complext<double> >* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< complext<double> > > reciprocal_sqrt< complext<double> >( hoNDArray< complext<double> >* );
-    template EXPORTCPUCOREMATH void reciprocal_sqrt_inplace< complext<double> >( hoNDArray< complext<double> >* );
-    template EXPORTCPUCOREMATH void clear< complext<double> >( hoNDArray< complext<double> >* );
-    template EXPORTCPUCOREMATH void clear< complext<double> >( hoNDArray< complext<double> >& );
-    template EXPORTCPUCOREMATH void fill< complext<double> >( hoNDArray< complext<double> >*, complext<double> );
-    template EXPORTCPUCOREMATH void clamp< complext<double> >( hoNDArray< complext<double> >*, double, double );
-    template EXPORTCPUCOREMATH void clamp_min< complext<double> >( hoNDArray< complext<double> >*, double );
-    template EXPORTCPUCOREMATH void clamp_max<complext<double> >( hoNDArray< complext<double> >*, double );
-    template EXPORTCPUCOREMATH void normalize< complext<double> >( hoNDArray< complext<double> >*, double );
-    template EXPORTCPUCOREMATH void shrink1< complext<double> >( hoNDArray< complext<double> >*, double, hoNDArray< complext<double> >* );
-    template EXPORTCPUCOREMATH void pshrink< complext<double> >( hoNDArray< complext<double> >*, double,double, hoNDArray< complext<double> >* );
-    template EXPORTCPUCOREMATH void shrinkd< complext<double> > ( hoNDArray< complext<double> >*, hoNDArray<double>*, double, hoNDArray< complext<double> >* );
-    template EXPORTCPUCOREMATH void pshrinkd< complext<double> > ( hoNDArray< complext<double> >*, hoNDArray<double>*, double, double, hoNDArray< complext<double> >* );
-
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< std::complex<float> > > real_to_complex< std::complex<float> >( hoNDArray<float>* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< std::complex<float> > > real_imag_to_complex< std::complex<float> >( hoNDArray<float>*, hoNDArray<float>* );
-
-    template EXPORTCPUCOREMATH bool real_imag_to_complex(const hoNDArray<float>& real, const hoNDArray<float>& imag, hoNDArray< std::complex<float> >& cplx);
-    template EXPORTCPUCOREMATH bool real_imag_to_complex(const hoNDArray<float>& real, const hoNDArray<float>& imag, hoNDArray< float_complext >& cplx);
-
-    template EXPORTCPUCOREMATH bool complex_to_real_imag(const hoNDArray< std::complex<float> >& cplx, hoNDArray<float>& real, hoNDArray<float>& imag);
-    //template EXPORTCPUCOREMATH bool complex_to_real_imag(const hoNDArray< float_complext >& cplx, hoNDArray<float>& real, hoNDArray<float>& imag);
-
-    template EXPORTCPUCOREMATH bool complex_to_real(const hoNDArray< std::complex<float> >& cplx, hoNDArray<float>& real);
-    template EXPORTCPUCOREMATH bool complex_to_imag(const hoNDArray< std::complex<float> >& cplx, hoNDArray<float>& imag);
-
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float_complext> > real_to_complex<float_complext>( hoNDArray<float>* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float_complext> > real_imag_to_complex<float_complext>( hoNDArray<float>*, hoNDArray<float>* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float> > real<float>( hoNDArray<float>* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float> > real<std::complex<float> >( hoNDArray< std::complex<float> >* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float> > real<float_complext>( hoNDArray<float_complext>* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float> > imag<float>( hoNDArray<float>* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float> > imag<std::complex<float> >( hoNDArray< std::complex<float> >* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float> > imag<float_complext>( hoNDArray<float_complext>* );
-
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float> > conj<float>( hoNDArray<float>* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<std::complex<float> > > conj<std::complex<float> >( hoNDArray<std::complex<float> >* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float_complext> > conj<float_complext>( hoNDArray<float_complext>* );
-
-
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< std::complex<double> > > real_to_complex< std::complex<double> >( hoNDArray<double>* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< std::complex<double> > > real_imag_to_complex< std::complex<double> >( hoNDArray<double>*, hoNDArray<double>* );
-
-    template EXPORTCPUCOREMATH bool real_imag_to_complex(const hoNDArray<double>& real, const hoNDArray<double>& imag, hoNDArray< std::complex<double> >& cplx);
-    template EXPORTCPUCOREMATH bool real_imag_to_complex(const hoNDArray<double>& real, const hoNDArray<double>& imag, hoNDArray< double_complext >& cplx);
-
-    template EXPORTCPUCOREMATH bool complex_to_real_imag(const hoNDArray< std::complex<double> >& cplx, hoNDArray<double>& real, hoNDArray<double>& imag);
-    //template EXPORTCPUCOREMATH bool complex_to_real_imag(const hoNDArray< double >& cplx, hoNDArray<double>& real, hoNDArray<double>& imag);
-    //template EXPORTCPUCOREMATH bool complex_to_real_imag(const hoNDArray< float >& cplx, hoNDArray<float>& real, hoNDArray<float>& imag);
-    // template EXPORTCPUCOREMATH bool complex_to_real_imag(const hoNDArray< double_complext >& cplx, hoNDArray<double>& real, hoNDArray<double>& imag);
-
-    template EXPORTCPUCOREMATH bool complex_to_real(const hoNDArray< std::complex<double> >& cplx, hoNDArray<double>& real);
-    template EXPORTCPUCOREMATH bool complex_to_imag(const hoNDArray< std::complex<double> >& cplx, hoNDArray<double>& imag);
-
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double_complext> > real_to_complex<double_complext>( hoNDArray<double>* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double_complext> > real_imag_to_complex<double_complext>( hoNDArray<double>*, hoNDArray<double>* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double> > real<double>( hoNDArray<double>* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double> > real<std::complex<double> >( hoNDArray< std::complex<double> >* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double> > real<double_complext>( hoNDArray<double_complext>* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double> > imag<std::complex<double> >( hoNDArray< std::complex<double> >* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double> > imag<double>( hoNDArray<double>* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double> > imag<double_complext>( hoNDArray<double_complext>* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double> > conj<double>( hoNDArray<double>* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<std::complex<double> > > conj<std::complex<double> >( hoNDArray<std::complex<double> >* );
-    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double_complext> > conj<double_complext>( hoNDArray<double_complext>* );
-}
diff --git a/toolboxes/core/cpu/arma_math/hoNDArray_elemwise.h b/toolboxes/core/cpu/arma_math/hoNDArray_elemwise.h
deleted file mode 100644
index 5ed76b6..0000000
--- a/toolboxes/core/cpu/arma_math/hoNDArray_elemwise.h
+++ /dev/null
@@ -1,400 +0,0 @@
-/** \file hoNDArray_elemwise.h
-    \brief Element-wise math operations on the hoNDArray class.
-    
-    hoNDArray_elementwise.h defines element-wise array operations on the hoNDArray class.
-    Many of the provided functions come in two flavours:
-    1) A function that returns a smart pointer to a new array holding the result of the element-wise operation, and
-    2) A function that perform in-place element-wise computation replacing the input array.
-    When both versions are available the in-place version is suffixed _inplace.
-    Some functions (clear, fill, clamp, clamp_min, clamp_max, normalize, shrink1, shrinkd) are only provided as in-place operations,
-    and they do not carry the _inplace suffix in order to keep user code compact.
-    A few functions return a different type as its input array 
-    (abs on complex data, real, imag, real_to_std_complex, real_to_complext) and consequently is not offered as an in place operation.
-    The functions provided in hoNDArray_elemwise are deliberatly placed outside the NDArray derived classes
-    - to allow the NDArray classes to be lightweight header only data containers for both the cpu and gpu instances
-    - to allow for external library optimized implementations of the element-wise functions without adding such dependencies to the core data container
-    The present cpu implementation is based on Armadillo (whenever suitable functions are available).
-    The implementation is purposely split into a header and underlying implementation (.cpp) 
-    as this allows specific instantiation of the supported template types.     
-    The supported types are float, double, std::complex<float>, std::complex<double>, 
-    Gadgetron::complext<float> and Gadgetron::complext<double> -- with some deliberate omissions.
-*/
-
-#pragma once
-
-#include "hoNDArray.h"
-#include "cpucore_math_export.h"
-
-#include "GadgetronCommon.h"
-#include <complex>
-
-#ifdef USE_MKL
-#include "mkl.h"
-#endif // USE_MKL
-
-#ifdef GT_Complex8
-#undef GT_Complex8
-#endif // GT_Complex8
-typedef std::complex<float> GT_Complex8;
-
-#ifdef GT_Complex16
-#undef GT_Complex16
-#endif // GT_Complex16
-typedef std::complex<double> GT_Complex16;
-
-namespace Gadgetron{
-
-  /**
-   * @brief Calculates the element-wise absolute values (l2 norm) of the array entries
-   * @param[in] x Input array.
-   * @return A new array containing the element-wise absolute values of the input.
-   */
-  template<class T> EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<typename realType<T>::Type> > abs( hoNDArray<T> *x );
-
-  /**
-   * @brief Calculates the element-wise absolute values (l2 norm) of the array entries (in place).
-   * @param[in,out] x Input and output array.
-   */
-  template<class T> EXPORTCPUCOREMATH void abs_inplace( hoNDArray<T> *x );
-
-  /**
-   * @brief Calculates the element-wise squared absolute values of the array entries
-   * @param[in] x Input array.
-   * @return A new array containing the element-wise absolute values of the input.
-   */
-  template<class T> EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<typename realType<T>::Type> > abs_square( hoNDArray<T> *x );
-    
-  /**
-   * @brief Calculates the element-wise sqrt of the array entries.
-   * @param[in] x Input array.
-   * @return A new array containing the element-wise sqrt of the input.
-   */
-  template<class T> EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<T> > sqrt( hoNDArray<T> *x );
-
-  /**
-   * @brief Calculates the element-wise sqrt of the array entries (in place).
-   * @param[in,out] x Input and output array.
-   */
-  template<class T> EXPORTCPUCOREMATH void sqrt_inplace( hoNDArray<T> *x );
-  
-  /**
-   * @brief Calculates the element-wise square of the array entries.
-   * @param[in] x Input array.
-   * @return A new array containing the element-wise square of the input.
-   *
-   * For real numbers this functions is equivalent to square. 
-   * For complex arrays abs_square() and square() differ however.
-   */
-  template<class T> EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<T> > square( hoNDArray<T> *x );
-    
-  /**
-   * @brief Calculates the element-wise square of the array entries (in place).
-   * @param[in,out] x Input and output array.
-   */
-  template<class T> EXPORTCPUCOREMATH void square_inplace( hoNDArray<T> *x );
-    
-  /**
-   * @brief Calculates the element-wise reciprocal of the array entries.
-   * @param[in] x Input array.
-   * @return A new array containing the element-wise reciprocal of the input.
-   */
-  template<class T> EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<T> > reciprocal( hoNDArray<T> *x );
-  
-  /**
-   * @brief Calculates the element-wise reciprocal of the array entries (in place).
-   * @param[in,out] x Input and output array.
-   */
-  template<class T> EXPORTCPUCOREMATH void reciprocal_inplace( hoNDArray<T> *x );
-  
-  /**
-   * @brief Calculates the element-wise reciprocal sqrt of the array entries.
-   * @param[in] x Input array.
-   * @return A new array containing the element-wise reciprocal sqrt of the input.
-   */
-  template<class T> EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<T> > reciprocal_sqrt( hoNDArray<T> *x );
-  
-  /**
-   * @brief Calculates the element-wise reciprocal sqrt of the array entries (in place).
-   * @param[in,out] x Input and output array.
-   */
-  template<class T> EXPORTCPUCOREMATH void reciprocal_sqrt_inplace( hoNDArray<T> *x );
-  
-  /**
-   * @brief Calculates the elementwise signum function on the array.
-   * @param[in] x Input array.
-   * @return A new array containing the element-wise sgn of the input.
-   */
-  template<class T> EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<T> > sgn( hoNDArray<T> *x );
-  
-  /**
-   * @brief Calculates the elementwise signum function on the array (in place).
-   * @param[in,out] x Input and output array.
-   */
-  template<class T> EXPORTCPUCOREMATH void sgn_inplace( hoNDArray<T> *x );
-
-  /**
-   * @brief Extract the real component from a complex array.
-   * @param[in] x Input array.
-   * @return A new array of the real component of the complex array.
-   */
-  template<class T> EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<typename realType<T>::Type> > real( hoNDArray<T> *x );
-
-  /**
-   * @brief Extract the imaginary component from a complex array.
-   * @param[in] x Input array.
-   * @return A new array of the imaginary component of the complex array.
-   */
-  template<class T> EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<typename realType<T>::Type> > imag( hoNDArray<T> *x );
-
-  /**
-   * @brief Create a new array of the complex conjugate of the input array. For real arrays a copy of the input array is return.
-   * @param[in] x Input array.
-   * @return A new array of the complex conjugate of the input array.
-   */
-  template<class T> EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<T> > conj( hoNDArray<T> *x );
-
-  /**
-   * @brief Construct a complex array from a real array.
-   * @param[in] x Input array.
-   * @return A new complex array containing the input array in the real component and zeros in the imaginary component.
-   */
-  template<class T> EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<T> > 
-  real_to_complex( hoNDArray<typename realType<T>::Type> *x );
-
-  template<class T> EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<T> > 
-  real_imag_to_complex( hoNDArray<typename realType<T>::Type> *real, hoNDArray<typename realType<T>::Type>* imag);
-
-  template<class T> EXPORTCPUCOREMATH bool 
-  real_imag_to_complex(const hoNDArray<typename realType<T>::Type>& real, const hoNDArray<typename realType<T>::Type>& imag, hoNDArray<T>& cplx);
-
-  template<class T> EXPORTCPUCOREMATH bool 
-  complex_to_real_imag(const hoNDArray<T>& cplx, hoNDArray<typename realType<T>::Type>& real, hoNDArray<typename realType<T>::Type>& imag);
-
-  template<> EXPORTCPUCOREMATH bool complex_to_real_imag(const hoNDArray<float>& cplx, hoNDArray<float>& real, hoNDArray<float>& imag);
-  template<> EXPORTCPUCOREMATH bool complex_to_real_imag(const hoNDArray<double>& cplx, hoNDArray<double>& real, hoNDArray<double>& imag);
-
-  template<class T> EXPORTCPUCOREMATH bool complex_to_real(const hoNDArray<T>& cplx, hoNDArray<typename realType<T>::Type>& real);
-  template<class T> EXPORTCPUCOREMATH bool complex_to_imag(const hoNDArray<T>& cplx, hoNDArray<typename realType<T>::Type>& imag);
-
-  //
-  // From hereon the functions are all in-place although without the _inplace suffix...
-  //
-
-  /**
-   * @brief Clears the array to all zeros ( in place). Faster than fill.
-   * @param[in,out] x Input and output array.
-   */
-  template<class T> EXPORTCPUCOREMATH void clear( hoNDArray<T> *x );
-  template<class T> EXPORTCPUCOREMATH void clear( hoNDArray<T>& x );
-
-  /**
-   * @brief Fills the array with a user provided constant value (in place).
-   * @param[in,out] x Input and output array.
-   * @param[in] val Fill value.
-   */
-  template<class T> EXPORTCPUCOREMATH void fill( hoNDArray<T> *x, T val );
-
-  /**
-   * @brief Clamps all values in the array to the minimum and maximum values specified (in place).
-   * @param[in,out] x Input and output array.
-   * @param[in] min minimum value.
-   * @param[in] max maximum value.
-   * @param[in] min_val value to which everything below the minimum will be set
-   * @param[in] max_val value to which everything above the maximum will be set
-   */
-  template<class T> EXPORTCPUCOREMATH void clamp( hoNDArray<T> *x, typename realType<T>::Type min, typename realType<T>::Type max, T min_val, T max_val );
-  
-  /**
-   * @brief Clamps all values in the array to the minimum and maximum values specified (in place).
-   * @param[in,out] x Input and output array.
-   * @param[in] min minimum value.
-   * @param[in] max maximum value.
-   */
-  template<class T> EXPORTCPUCOREMATH void clamp( hoNDArray<T> *x, typename realType<T>::Type min, typename realType<T>::Type max );
-
-  /**
-   * @brief Clamps all values in the array to a minimum value allowed (in place).
-   * @param[in,out] x Input and output array.
-   * @param[in] min Minimum value.
-   */
-  template<class T> EXPORTCPUCOREMATH void clamp_min( hoNDArray<T> *x, typename realType<T>::Type min );
-
-  /**
-   * @brief Clamps all values in the array to a maximum value allowed (in place).
-   * @param[in,out] x Input and output array.
-   * @param[in] max Maximum value.
-   */
-  template<class T> EXPORTCPUCOREMATH void clamp_max( hoNDArray<T> *x, typename realType<T>::Type max );
-
-  /**
-   * @brief In place normalization (scaling) to a new maximum absolute array value val.
-   * @param[in,out] x Input and output array.
-   * @param[in] val New maximum absolute array value (according to the l2-norm)
-   */  
-  template<class T> EXPORTCPUCOREMATH void normalize( hoNDArray<T> *x, typename realType<T>::Type val = typename realType<T>::Type(1) );
-
-  /**
-   * @brief Shrinkage (soft thresholding), i.e. shrink(x,gamma) = x/abs(x)*max(abs(x)-gamma,0).
-   * @param[out] out Output array. Can be 0x0 in which case an in place transform is performed.
-   * @param[in,out] x Input array (and output array if out == 0x0).
-   * @param[in] gamma Shrinkage control parameter
-   */  
-  template<class T> EXPORTCPUCOREMATH void shrink1( hoNDArray<T> *x, typename realType<T>::Type gamma, hoNDArray<T> *out = 0x0 );
-
-  /**
-   * @brief In place p-shrinkage (soft thresholding), i.e. pshrink(x,gamma,p) = x/abs(x)*max(abs(x)-gamma*abs(x)^(p-1),0).
-   * @param[out] out Output array. Can be 0x0 in which case an in place transform is performed.
-   * @param[in,out] x Input array (and output array if out == 0x0).
-   * @param[in] gamma Shrinkage control parameter
-   * @param[in] p p value of the shrinkage. Should be less than 1 and more than 0.
-   */
-	template<class T> EXPORTCPUCOREMATH void pshrink( hoNDArray<T> *x, typename realType<T>::Type gamma,typename realType<T>::Type p, hoNDArray<T> *out = 0x0 );
-
-  /**
-   * @brief Shrinkage (soft thresholding, multi-dimensional), i.e. shrink(x,gamma,s) = x/s*max(s-gamma,0).
-   * @param[out] out Output array. Can be 0x0 in which case an in place transform is performed.
-   * @param[in,out] x Input array (and output array if out == 0x0).
-   * @param[in] s Input array, normalization.
-   * @param[in] gamma Shrinkage control parameter
-   */  
-  template<class T> EXPORTCPUCOREMATH void shrinkd ( hoNDArray<T> *x, hoNDArray<typename realType<T>::Type> *s, typename realType<T>::Type gamma, hoNDArray<T> *out = 0x0 );
-
-  /**
-   * @brief In place p-shrinkage (soft thresholding, multi-dimensional), i.e. pshrink(x,s,gamma,p) = x/s*max(s-gamma*s^(p-1),0).
-   * @param[out] out Output array. Can be 0x0 in which case an in place transform is performed.
-   * @param[in,out] x Input array (and output array if out == 0x0).
-   * @param[in] gamma Shrinkage control parameter
-   * @param[in] p p value of the shrinkage. Should be less than 1 and more than 0.
-   */
-  template<class T> EXPORTCPUCOREMATH void pshrinkd ( hoNDArray<T> *x, hoNDArray<typename realType<T>::Type> *s, typename realType<T>::Type gamma,typename realType<T>::Type p, hoNDArray<T> *out = 0x0 );
-
-#ifdef USE_MKL
-
-    // besides the arma calls, some functions are implemented with the MKL vector utilities
-
-    EXPORTCPUCOREMATH bool add(const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& r); // r = x + y
-    EXPORTCPUCOREMATH bool subtract(const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& r); // r = x - y
-    EXPORTCPUCOREMATH bool multiply(const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& r); // r = x * y
-    EXPORTCPUCOREMATH bool divide(const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& r); // r = x / y
-    EXPORTCPUCOREMATH bool absolute(const hoNDArray<float>& x, hoNDArray<float>& r); // r = abs(x)
-    EXPORTCPUCOREMATH bool argument(const hoNDArray<float>& x, hoNDArray<float>& r); // r = angle(x)
-    EXPORTCPUCOREMATH bool sqrt(const hoNDArray<float>& x, hoNDArray<float>& r); // r = sqrt(x)
-    EXPORTCPUCOREMATH bool minAbsolute(const hoNDArray<float>& x, float& r, size_t& ind); // minimal absolute value and index
-    EXPORTCPUCOREMATH bool maxAbsolute(const hoNDArray<float>& x, float& r, size_t& ind); // maximal absolute value and index
-    EXPORTCPUCOREMATH bool addEpsilon(hoNDArray<float>& x); // x = x + Epsilon if x==0, prepare for division
-    EXPORTCPUCOREMATH bool norm2(const hoNDArray<float>& x, float& r);
-    EXPORTCPUCOREMATH bool norm1(const hoNDArray<float>& x, float& r);
-    EXPORTCPUCOREMATH bool conv2(const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& z); // x: input data, y: convolution kernel, z: output; each 2D slice is convolved
-    EXPORTCPUCOREMATH bool conv3(const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& z); // x: input data, y: convolution kernel, z: output; each 3D volume is convolved
-    EXPORTCPUCOREMATH bool inv(const hoNDArray<float>& x, hoNDArray<float>& r); // r = 1/x
-
-    EXPORTCPUCOREMATH bool add(const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& r);
-    EXPORTCPUCOREMATH bool subtract(const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& r);
-    EXPORTCPUCOREMATH bool multiply(const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& r);
-    EXPORTCPUCOREMATH bool divide(const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& r);
-    EXPORTCPUCOREMATH bool absolute(const hoNDArray<double>& x, hoNDArray<double>& r);
-    EXPORTCPUCOREMATH bool argument(const hoNDArray<double>& x, hoNDArray<double>& r);
-    EXPORTCPUCOREMATH bool sqrt(const hoNDArray<double>& x, hoNDArray<double>& r);
-    EXPORTCPUCOREMATH bool minAbsolute(const hoNDArray<double>& x, double& r, size_t& ind);
-    EXPORTCPUCOREMATH bool maxAbsolute(const hoNDArray<double>& x, double& r, size_t& ind);
-    EXPORTCPUCOREMATH bool addEpsilon(hoNDArray<double>& x);
-    EXPORTCPUCOREMATH bool norm2(const hoNDArray<double>& x, double& r);
-    EXPORTCPUCOREMATH bool norm1(const hoNDArray<double>& x, double& r);
-    EXPORTCPUCOREMATH bool conv2(const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& z);
-    EXPORTCPUCOREMATH bool conv3(const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& z);
-    EXPORTCPUCOREMATH bool inv(const hoNDArray<double>& x, hoNDArray<double>& r);
-
-    EXPORTCPUCOREMATH bool add(const hoNDArray<GT_Complex8>& x, const hoNDArray<GT_Complex8>& y, hoNDArray<GT_Complex8>& r);
-    EXPORTCPUCOREMATH bool subtract(const hoNDArray<GT_Complex8>& x, const hoNDArray<GT_Complex8>& y, hoNDArray<GT_Complex8>& r);
-    EXPORTCPUCOREMATH bool multiply(const hoNDArray<GT_Complex8>& x, const hoNDArray<GT_Complex8>& y, hoNDArray<GT_Complex8>& r);
-    EXPORTCPUCOREMATH bool divide(const hoNDArray<GT_Complex8>& x, const hoNDArray<GT_Complex8>& y, hoNDArray<GT_Complex8>& r);
-    EXPORTCPUCOREMATH bool absolute(const hoNDArray<GT_Complex8>& x, hoNDArray<float>& r);
-    EXPORTCPUCOREMATH bool absolute(const hoNDArray<GT_Complex8>& x, hoNDArray<GT_Complex8>& r);
-    EXPORTCPUCOREMATH bool sqrt(const hoNDArray<GT_Complex8>& x, hoNDArray<GT_Complex8>& r);
-    EXPORTCPUCOREMATH bool minAbsolute(const hoNDArray<GT_Complex8>& x, GT_Complex8& r, size_t& ind);
-    EXPORTCPUCOREMATH bool maxAbsolute(const hoNDArray<GT_Complex8>& x, GT_Complex8& r, size_t& ind);
-    EXPORTCPUCOREMATH bool multiplyConj(const hoNDArray<GT_Complex8>& x, const hoNDArray<GT_Complex8>& y, hoNDArray<GT_Complex8>& r); // r = x * conj(y)
-    EXPORTCPUCOREMATH bool argument(const hoNDArray<GT_Complex8>& x, hoNDArray<float>& r); // r = angle(x)
-    EXPORTCPUCOREMATH bool conjugate(const hoNDArray<GT_Complex8>& x, hoNDArray<GT_Complex8>& r); // r = conj(x)
-    EXPORTCPUCOREMATH bool addEpsilon(hoNDArray<GT_Complex8>& x);
-    EXPORTCPUCOREMATH bool norm2(const hoNDArray<GT_Complex8>& x, float& r);
-    EXPORTCPUCOREMATH bool norm1(const hoNDArray<GT_Complex8>& x, float& r);
-    EXPORTCPUCOREMATH bool dotc(const hoNDArray<GT_Complex8>& x, const hoNDArray<GT_Complex8>& y, GT_Complex8& r); // x'*y, x and y are N*1 vector
-    EXPORTCPUCOREMATH bool conv2(const hoNDArray<GT_Complex8>& x, const hoNDArray<GT_Complex8>& y, hoNDArray<GT_Complex8>& z);
-    EXPORTCPUCOREMATH bool conv3(const hoNDArray<GT_Complex8>& x, const hoNDArray<GT_Complex8>& y, hoNDArray<GT_Complex8>& z);
-    EXPORTCPUCOREMATH bool inv(const hoNDArray<GT_Complex8>& x, hoNDArray<GT_Complex8>& r);
-
-    EXPORTCPUCOREMATH bool add(const hoNDArray<GT_Complex16>& x, const hoNDArray<GT_Complex16>& y, hoNDArray<GT_Complex16>& r);
-    EXPORTCPUCOREMATH bool subtract(const hoNDArray<GT_Complex16>& x, const hoNDArray<GT_Complex16>& y, hoNDArray<GT_Complex16>& r);
-    EXPORTCPUCOREMATH bool multiply(const hoNDArray<GT_Complex16>& x, const hoNDArray<GT_Complex16>& y, hoNDArray<GT_Complex16>& r);
-    EXPORTCPUCOREMATH bool divide(const hoNDArray<GT_Complex16>& x, const hoNDArray<GT_Complex16>& y, hoNDArray<GT_Complex16>& r);
-    EXPORTCPUCOREMATH bool absolute(const hoNDArray<GT_Complex16>& x, hoNDArray<double>& r);
-    EXPORTCPUCOREMATH bool absolute(const hoNDArray<GT_Complex16>& x, hoNDArray<GT_Complex16>& r);
-    EXPORTCPUCOREMATH bool sqrt(const hoNDArray<GT_Complex16>& x, hoNDArray<GT_Complex16>& r);
-    EXPORTCPUCOREMATH bool minAbsolute(const hoNDArray<GT_Complex16>& x, GT_Complex16& r, size_t& ind);
-    EXPORTCPUCOREMATH bool maxAbsolute(const hoNDArray<GT_Complex16>& x, GT_Complex16& r, size_t& ind);
-    EXPORTCPUCOREMATH bool multiplyConj(const hoNDArray<GT_Complex16>& x, const hoNDArray<GT_Complex16>& y, hoNDArray<GT_Complex16>& r);
-    EXPORTCPUCOREMATH bool argument(const hoNDArray<GT_Complex16>& x, hoNDArray<double>& r);
-    EXPORTCPUCOREMATH bool conjugate(const hoNDArray<GT_Complex16>& x, hoNDArray<GT_Complex16>& r);
-    EXPORTCPUCOREMATH bool addEpsilon(hoNDArray<GT_Complex16>& x);
-    EXPORTCPUCOREMATH bool norm2(const hoNDArray<GT_Complex16>& x, double& r);
-    EXPORTCPUCOREMATH bool norm1(const hoNDArray<GT_Complex16>& x, double& r);
-    EXPORTCPUCOREMATH bool dotc(const hoNDArray<GT_Complex16>& x, const hoNDArray<GT_Complex16>& y, GT_Complex16& r);
-    EXPORTCPUCOREMATH bool conv2(const hoNDArray<GT_Complex16>& x, const hoNDArray<GT_Complex16>& y, hoNDArray<GT_Complex16>& z);
-    EXPORTCPUCOREMATH bool conv3(const hoNDArray<GT_Complex16>& x, const hoNDArray<GT_Complex16>& y, hoNDArray<GT_Complex16>& z);
-    EXPORTCPUCOREMATH bool inv(const hoNDArray<GT_Complex16>& x, hoNDArray<GT_Complex16>& r);
-
-    template<typename T> EXPORTCPUCOREMATH bool sumOverLastDimension(const hoNDArray<T>& x, hoNDArray<T>& r); // e.g. for a 4D array, sum over the 4th dimension
-    template<typename T> EXPORTCPUCOREMATH bool sumOverSecondLastDimension(const hoNDArray<T>& x, hoNDArray<T>& r); // e.g. for a 4D array, sum over the 3rd dimension
-
-    template<typename T> EXPORTCPUCOREMATH bool multiplyOverLastDimension(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r); // e.g. x is 3D and y is 4D array, r(:,:,:,n) = y(:,:,:,n) .* x
-    template<typename T> EXPORTCPUCOREMATH bool divideOverLastDimension(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r); // e.g. x is 3D and y is 4D array, r(:,:,:,n) = y(:,:,:,n) ./ x
-
-    template<typename T> EXPORTCPUCOREMATH bool sumOver1stDimension(const hoNDArray<T>& x, hoNDArray<T>& r); // e.g. for a 2D array, sum over the 1st dimension and get an array of [1 E1]
-    template<typename T> EXPORTCPUCOREMATH bool sumOver2ndDimension(const hoNDArray<T>& x, hoNDArray<T>& r); // e.g. for a 3D array, sum over the 2nd dimension and get an array of [RO 1 CHA]
-    template<typename T> EXPORTCPUCOREMATH bool sumOver3rdDimension(const hoNDArray<T>& x, hoNDArray<T>& r); // e.g. for a 4D array, sum over the 3rd dimension and get an array of [RO E1 1 N]
-    template<typename T> EXPORTCPUCOREMATH bool sumOver4thDimension(const hoNDArray<T>& x, hoNDArray<T>& r); // e.g. for a 5D array [RO E1 CHA N S], sum over the 4th dimension and get an array of [RO E1 CHA 1 S]
-    template<typename T> EXPORTCPUCOREMATH bool sumOver5thDimension(const hoNDArray<T>& x, hoNDArray<T>& r); // e.g. for a 6D array, sum over the 5th dimension and get an array [RO E1 CHA N 1 P]
-
-    template<typename T> EXPORTCPUCOREMATH bool multiplyOver3rdDimension(const hoNDArray<T>& x3D, const hoNDArray<T>& y4D, hoNDArray<T>& r); // e.g. x is 3D and y is 4D array, r(:,:,n,:) = y(:,:,n,:) .* x
-    template<typename T> EXPORTCPUCOREMATH bool multiplyOver4thDimension(const hoNDArray<T>& x4D, const hoNDArray<T>& y5D, hoNDArray<T>& r); // e.g. x is 4D and y is 5D array, r(:,:,:,n,:) = y(:,:,:,n,:) .* x
-    template<typename T> EXPORTCPUCOREMATH bool multiplyOver5thDimension(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r); // e.g. x is 5D and y is 6D array, r(:,:,:,:, n,:) = y(:,:,:,:,n,:) .* x
-
-    template<typename T> EXPORTCPUCOREMATH bool multiplyOver4thDimensionExcept(const hoNDArray<T>& x4D, const hoNDArray<T>& y5D, size_t n, hoNDArray<T>& r, bool copyY2R=true); // e.g. x is 4D and y is 5D array, r(:,:,:,t,:) = y(:,:,:,t,:) .* x, except for r(:,:,:,n,:) = y(:,:,:,n,:)
-    template<typename T> EXPORTCPUCOREMATH bool multiplyOver5thDimensionExcept(const hoNDArray<T>& x, const hoNDArray<T>& y, size_t n, hoNDArray<T>& r, bool copyY2R=true); // e.g. x is 5D and y is 6D array, r(:,:,:,:,t,:) = y(:,:,:,:,t,:) .* x, except for r(:,:,:,:,n,:) = y(:,:,:,:,n,:)
-
-    template<typename T> EXPORTCPUCOREMATH bool multipleAdd(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r); // r = x + y for every part of y
-    template<typename T> EXPORTCPUCOREMATH bool multipleMultiply(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r); // r = x * y for every part of y
-    template<typename T> EXPORTCPUCOREMATH bool multipleDivide(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r); // r = x / y for every part of y
-
-    template<typename T> EXPORTCPUCOREMATH bool cropUpTo10DArray(const hoNDArray<T>& x, hoNDArray<T>& r, const std::vector<size_t>& startND, std::vector<size_t>& size);
-    template<typename T> EXPORTCPUCOREMATH bool setSubArrayUpTo10DArray(const hoNDArray<T>& x, hoNDArray<T>& r, const std::vector<size_t>& startND, std::vector<size_t>& size);
-
-    template<typename T> EXPORTCPUCOREMATH bool cropOver3rdDimension(const hoNDArray<T>& x, hoNDArray<T>& r, size_t start, size_t end);
-    template<typename T> EXPORTCPUCOREMATH bool setSubArrayOver3rdDimension(const hoNDArray<T>& x, hoNDArray<T>& r, size_t start, size_t end);
-
-    template<typename T> EXPORTCPUCOREMATH bool stdOver3rdDimension(const hoNDArray<T>& x, hoNDArray<T>& std, bool NMinusOne); // compute the standard deviation along the 3rd dimension, if NMinusOne == true, divided by N-1; otherwise, divided by N
-
-    // template<typename T> EXPORTCPUCOREMATH bool permuteLastTwoDimensions(const hoNDArray<T>& x, hoNDArray<T>& r); // x : [... E1 E2], r: [... E2 E1]
-
-    template<typename T> EXPORTCPUCOREMATH bool permuteE2To3rdDimension(const hoNDArray<T>& x, hoNDArray<T>& r); // x : [RO E1 CHA SLC E2 ...], r: [RO E1 E2 CHA SLC ...]
-    template<typename T> EXPORTCPUCOREMATH bool permuteE2To5thDimension(const hoNDArray<T>& x, hoNDArray<T>& r); // x : [RO E1 E2 CHA SLC ...], r: [RO E1 CHA SLC E2 ...]
-
-    template<typename T> EXPORTCPUCOREMATH bool permuteROTo3rdDimensionFor3DRecon(const hoNDArray<T>& x, hoNDArray<T>& r); // x : [RO E1 E2 ...], r: [E1 E2 RO ...]
-    template<typename T> EXPORTCPUCOREMATH bool permuteROTo4thDimensionFor3DRecon(const hoNDArray<T>& x, hoNDArray<T>& r); // x : [RO E1 E2 CHA ...], r: [E1 E2 CHA RO ...]
-    template<typename T> EXPORTCPUCOREMATH bool permuteROTo1stDimensionFor3DRecon(const hoNDArray<T>& x, hoNDArray<T>& r); // x : [E1 E2 CHA RO ...], r: [RO E1 E2 CHA ...]
-
-    template<typename T> EXPORTCPUCOREMATH bool permute3rdDimensionTo1stDimension(const hoNDArray<T>& x, hoNDArray<T>& r); // x : [RO E1 E2 CHA ...], r: [E2 RO E1 CHA ...]
-
-    template<typename T> EXPORTCPUCOREMATH bool permuteROTo5thDimensionFor3DRecon(const hoNDArray<T>& x, hoNDArray<T>& r); // x : [RO E1 E2 srcCHA dstCHA ...], r: [E1 E2 srcCHA dstCHA RO ...]
-
-    /// x : [RO E1 srcCHA], ker [RO E1 srcCHA dstCHA], buf is a buffer for computer, need to be pre-allocated [RO E1 srcCHA], y [RO E1 dstCHA]
-    /// for the sake of speed, no check is made in this function
-    template<typename T> EXPORTCPUCOREMATH bool imageDomainUnwrapping2D(const hoNDArray<T>& x, const hoNDArray<T>& ker, hoNDArray<T>& buf, hoNDArray<T>& y);
-
-    /// x : [RO E1 srcCHA N], ker [RO E1 srcCHA dstCHA 1 or N], buf is a buffer for computer, need to be pre-allocated [RO E1 srcCHA], y [RO E1 dstCHA N]
-    /// for the sake of speed, no check is made in this function
-    template<typename T> EXPORTCPUCOREMATH bool imageDomainUnwrapping2DT(const hoNDArray<T>& x, const hoNDArray<T>& ker, hoNDArray<T>& buf, hoNDArray<T>& y);
-
-#endif // USE_MKL
-}
diff --git a/toolboxes/core/cpu/arma_math/hoNDArray_math.h b/toolboxes/core/cpu/arma_math/hoNDArray_math.h
deleted file mode 100644
index a8b4224..0000000
--- a/toolboxes/core/cpu/arma_math/hoNDArray_math.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#pragma once
-
-#include "hoNDArray_blas.h"
-#include "hoNDArray_elemwise.h"
-#include "hoNDArray_operators.h"
-#include "hoNDArray_reductions.h"
diff --git a/toolboxes/core/cpu/arma_math/hoNDArray_operators.cpp b/toolboxes/core/cpu/arma_math/hoNDArray_operators.cpp
deleted file mode 100644
index 62e5a10..0000000
--- a/toolboxes/core/cpu/arma_math/hoNDArray_operators.cpp
+++ /dev/null
@@ -1,457 +0,0 @@
-#include "hoNDArray_operators.h"
-#include "hoArmadillo.h"
-
-namespace Gadgetron{
-
-    // Private utility to verify array dimensions. 
-    // It "replaces" NDArray::dimensions_equal() to support batch mode.
-    // There is an identical function for all array instances (currently hoNDArray, cuNDArray, hoCuNDAraay)
-    // !!! Remember to fix any bugs in all versions !!!
-    //
-    template<class T,class S> static bool compatible_dimensions( const hoNDArray<T> &x, const hoNDArray<S> &y )
-    {
-        return ((x.get_number_of_elements()%y.get_number_of_elements())==0);
-    }
-
-    template<class T> hoNDArray<T>& operator+= (hoNDArray<T> &x, const hoNDArray<T> &y)
-    {
-        if( compatible_dimensions<T,T>(x,y) ){
-            arma::Col<typename stdType<T>::Type> aY = as_arma_col(&y);
-            size_t num_batches = x.get_number_of_elements()/y.get_number_of_elements();
-            for( size_t batch=0; batch<num_batches; batch++ ){	
-                hoNDArray<T> tmp;
-                tmp.create( y.get_dimensions(), x.get_data_ptr()+batch*y.get_number_of_elements() );
-                arma::Col<typename stdType<T>::Type> aRes = as_arma_col(&tmp);
-                aRes += aY;
-            }
-            return x;    
-        } 
-        else {
-            throw std::runtime_error("hoNDArray::operator+=: Incompatible array dimensions");
-        }
-    }
-
-    template<class T> hoNDArray< std::complex<T> >& operator+= (hoNDArray< std::complex<T> > &x, const hoNDArray<T> &y)
-    {
-        if( compatible_dimensions<std::complex<T>,T>(x,y) ){
-            arma::Col< std::complex<T> > aY( as_arma_col(&y), arma::Col<T>(y.get_number_of_elements()).zeros() );
-            size_t num_batches = x.get_number_of_elements()/y.get_number_of_elements();
-            for( size_t batch=0; batch<num_batches; batch++ ){	
-                hoNDArray< std::complex<T> > tmp;
-                tmp.create( y.get_dimensions(), x.get_data_ptr()+batch*y.get_number_of_elements() );
-                arma::Col< std::complex<T> > aRes = as_arma_col(&tmp);
-                aRes += aY;
-            }
-            return x;
-        } 
-        else {
-            throw std::runtime_error("hoNDArray::operator+=: Incompatible array dimensions");
-        }
-    }
-
-    template<class T> hoNDArray< complext<T> >& operator+= (hoNDArray< complext<T> > &x, const hoNDArray<T> &y)
-    {
-        if( compatible_dimensions<complext<T>,T>(x,y) ){
-            arma::Col< std::complex<T> > aY( as_arma_col(&y), arma::Col<T>(y.get_number_of_elements()).zeros() );
-            size_t num_batches = x.get_number_of_elements()/y.get_number_of_elements();
-            for( size_t batch=0; batch<num_batches; batch++ ){	
-                hoNDArray< complext<T> > tmp;
-                tmp.create( y.get_dimensions(), x.get_data_ptr()+batch*y.get_number_of_elements() );
-                arma::Col< std::complex<T> > aRes = as_arma_col(&tmp);
-                aRes += aY;
-            }
-            return x;
-        } 
-        else {
-            throw std::runtime_error("hoNDArray::operator+=: Incompatible array dimensions");
-        }
-    }
-
-    template<class T> hoNDArray<T>& operator+= (hoNDArray<T> &x, const T &y)
-    {
-        arma::Col<typename stdType<T>::Type> aRes = as_arma_col(&x);
-        typename stdType<T>::Type aY = *((typename stdType<T>::Type*)&y);
-        aRes += aY;
-        return x;  
-    }
-
-    template<class T> hoNDArray< std::complex<T> >& operator+= (hoNDArray< std::complex<T> > &x, const T &y)
-    {
-        arma::Col< std::complex<T> > aRes = as_arma_col(&x);
-        std::complex<T> aY( y, T(0) );
-        aRes += aY;
-        return x;  
-    }
-
-    template<class T> hoNDArray< complext<T> >& operator+= (hoNDArray< complext<T> > &x, const T &y)
-    {
-        arma::Col< std::complex<T> > aRes = as_arma_col(&x);
-        std::complex<T> aY( y, T(0) );
-        aRes += aY;
-        return x;  
-    }
-
-    template<class T> hoNDArray<T>& operator-= (hoNDArray<T> &x, const hoNDArray<T> &y)
-    {
-        if( compatible_dimensions<T,T>(x,y) ){
-            arma::Col<typename stdType<T>::Type> aY = as_arma_col(&y);
-            size_t num_batches = x.get_number_of_elements()/y.get_number_of_elements();
-            for( size_t batch=0; batch<num_batches; batch++ ){	
-                hoNDArray<T> tmp;
-                tmp.create( y.get_dimensions(), x.get_data_ptr()+batch*y.get_number_of_elements() );
-                arma::Col<typename stdType<T>::Type> aRes = as_arma_col(&tmp);
-                aRes -= aY;
-            }
-            return x;    
-        } 
-        else {
-            throw std::runtime_error("hoNDArray::operator-=: Incompatible array dimensions");
-        }
-    }
-
-    template<class T> hoNDArray< std::complex<T> >& operator-= (hoNDArray< std::complex<T> > &x, const hoNDArray<T> &y)
-    {
-        if( compatible_dimensions<std::complex<T>,T>(x,y) ){
-            arma::Col< std::complex<T> > aY( as_arma_col(&y), arma::Col<T>(y.get_number_of_elements()).zeros() );
-            size_t num_batches = x.get_number_of_elements()/y.get_number_of_elements();
-            for( size_t batch=0; batch<num_batches; batch++ ){	
-                hoNDArray< std::complex<T> > tmp;
-                tmp.create( y.get_dimensions(), x.get_data_ptr()+batch*y.get_number_of_elements() );
-                arma::Col< std::complex<T> > aRes = as_arma_col(&tmp);
-                aRes -= aY;
-            }
-            return x;
-        } 
-        else {
-            throw std::runtime_error("hoNDArray::operator-=: Incompatible array dimensions");
-        }
-    }
-
-    template<class T> hoNDArray< complext<T> >& operator-= (hoNDArray< complext<T> > &x, const hoNDArray<T> &y)
-    {
-        if( compatible_dimensions<complext<T>,T>(x,y) ){
-            arma::Col< std::complex<T> > aY( as_arma_col(&y), arma::Col<T>(y.get_number_of_elements()).zeros() );
-            size_t num_batches = x.get_number_of_elements()/y.get_number_of_elements();
-            for( size_t batch=0; batch<num_batches; batch++ ){	
-                hoNDArray< complext<T> > tmp;
-                tmp.create( y.get_dimensions(), x.get_data_ptr()+batch*y.get_number_of_elements() );
-                arma::Col< std::complex<T> > aRes = as_arma_col(&tmp);
-                aRes -= arma::Col< std::complex<T> >( as_arma_col(&y), arma::Col<T>(y.get_number_of_elements()).zeros() );
-            }
-            return x;
-        } 
-        else {
-            throw std::runtime_error("hoNDArray::operator-=: Incompatible array dimensions");
-        }
-    }
-
-    template<class T> hoNDArray<T>& operator-= (hoNDArray<T> &x, const T &y)
-    {
-        arma::Col<typename stdType<T>::Type> aRes = as_arma_col(&x);
-        typename stdType<T>::Type aY = *((typename stdType<T>::Type*)&y);
-        aRes -= aY;
-        return x;  
-    }
-
-    template<class T> hoNDArray< std::complex<T> >& operator-= (hoNDArray< std::complex<T> > &x, const T &y)
-    {
-        arma::Col< std::complex<T> > aRes = as_arma_col(&x);
-        std::complex<T> aY( y, T(0) );
-        aRes -= aY;
-        return x;  
-    }
-
-    template<class T> hoNDArray< complext<T> >& operator-= (hoNDArray< complext<T> > &x, const T &y)
-    {
-        arma::Col< std::complex<T> > aRes = as_arma_col(&x);
-        std::complex<T> aY( y, T(0) );
-        aRes -= aY;
-        return x;  
-    }
-
-    template<class T> hoNDArray<T>& operator*= (hoNDArray<T> &x, const hoNDArray<T> &y)
-    {
-        if( compatible_dimensions<T,T>(x,y) ){
-            arma::Col<typename stdType<T>::Type> aY = as_arma_col(&y);
-            size_t num_batches = x.get_number_of_elements()/y.get_number_of_elements();
-            for( size_t batch=0; batch<num_batches; batch++ ){	
-                hoNDArray<T> tmp;
-                tmp.create( y.get_dimensions(), x.get_data_ptr()+batch*y.get_number_of_elements() );
-                arma::Col<typename stdType<T>::Type> aRes = as_arma_col(&tmp);
-                aRes %= aY;
-            }
-            return x;
-        } 
-        else {
-            throw std::runtime_error("hoNDArray::operator*=: Incompatible array dimensions");
-        }
-    }
-
-    template<class T> hoNDArray< std::complex<T> >& operator*= (hoNDArray< std::complex<T> > &x, const hoNDArray<T> &y)
-    {
-        if( compatible_dimensions<std::complex<T>,T>(x,y) ){
-            arma::Col< std::complex<T> > aY( as_arma_col(&y), arma::Col<T>(y.get_number_of_elements()).zeros() );    
-            size_t num_batches = x.get_number_of_elements()/y.get_number_of_elements();
-            for( size_t batch=0; batch<num_batches; batch++ ){	
-                hoNDArray< std::complex<T> > tmp;
-                tmp.create( y.get_dimensions(), x.get_data_ptr()+batch*y.get_number_of_elements() );
-                arma::Col< std::complex<T> > aRes = as_arma_col(&tmp);
-                aRes %= aY;
-            }
-            return x;
-        } 
-        else {
-            throw std::runtime_error("hoNDArray::operator*=: Incompatible array dimensions");
-        }
-    }
-
-    template<class T> hoNDArray< complext<T> >& operator*= (hoNDArray< complext<T> > &x, const hoNDArray<T> &y)
-    {
-        if( compatible_dimensions<complext<T>,T>(x,y) ){
-            arma::Col< std::complex<T> > aY( as_arma_col(&y), arma::Col<T>(y.get_number_of_elements()).zeros() );
-            size_t num_batches = x.get_number_of_elements()/y.get_number_of_elements();
-            for( size_t batch=0; batch<num_batches; batch++ ){	
-                hoNDArray< complext<T> > tmp;
-                tmp.create( y.get_dimensions(), x.get_data_ptr()+batch*y.get_number_of_elements() );
-                arma::Col< std::complex<T> > aRes = as_arma_col(&tmp);
-                aRes %= arma::Col< std::complex<T> >( as_arma_col(&y), arma::Col<T>(y.get_number_of_elements()).zeros() );
-            }
-            return x;
-        } 
-        else {
-            throw std::runtime_error("hoNDArray::operator*=: Incompatible array dimensions");
-        }
-    }
-
-    template<class T> hoNDArray<T>& operator*= (hoNDArray<T> &x, const T &y)
-    {
-        arma::Col<typename stdType<T>::Type> aRes = as_arma_col(&x);
-        typename stdType<T>::Type aY = *((typename stdType<T>::Type*)&y);
-        aRes *= aY;
-        return x;  
-    }
-
-    template<class T> hoNDArray< std::complex<T> >& operator*= (hoNDArray< std::complex<T> > &x, const T &y)
-    {
-        arma::Col< std::complex<T> > aRes = as_arma_col(&x);
-        std::complex<T> aY( y, T(0) );
-        aRes *= aY;
-        return x;  
-    }
-
-    template<class T> hoNDArray< complext<T> >& operator*= (hoNDArray< complext<T> > &x, const T &y)
-    {
-        arma::Col< std::complex<T> > aRes = as_arma_col(&x);
-        std::complex<T> aY( y, T(0) );
-        aRes *= aY;
-        return x;  
-    }
-
-    template<class T> hoNDArray<T>& operator/= (hoNDArray<T> &x, const hoNDArray<T> &y)
-    {
-        if( compatible_dimensions<T,T>(x,y) ){
-            arma::Col<typename stdType<T>::Type> aY = as_arma_col(&y);
-            size_t num_batches = x.get_number_of_elements()/y.get_number_of_elements();
-            for( size_t batch=0; batch<num_batches; batch++ ){	
-                hoNDArray<T> tmp;
-                tmp.create( y.get_dimensions(), x.get_data_ptr()+batch*y.get_number_of_elements() );
-                arma::Col<typename stdType<T>::Type> aRes = as_arma_col(&tmp);
-                aRes /= aY;
-            }
-            return x;
-        } 
-        else {
-            throw std::runtime_error("hoNDArray::operator/=: Incompatible array dimensions");
-        }
-    }
-
-    template<class T> hoNDArray< std::complex<T> >& operator/= (hoNDArray< std::complex<T> > &x, const hoNDArray<T> &y)
-    {
-        if( compatible_dimensions<std::complex<T>,T>(x,y) ){
-            arma::Col< std::complex<T> > aY( as_arma_col(&y), arma::Col<T>(y.get_number_of_elements()).zeros() );    
-            size_t num_batches = x.get_number_of_elements()/y.get_number_of_elements();
-            for( size_t batch=0; batch<num_batches; batch++ ){	
-                hoNDArray< std::complex<T> > tmp;
-                tmp.create( y.get_dimensions(), x.get_data_ptr()+batch*y.get_number_of_elements() );
-                arma::Col< std::complex<T> > aRes = as_arma_col(&tmp);
-                aRes /= aY;
-            }
-            return x;
-        } 
-        else {
-            throw std::runtime_error("hoNDArray::operator/=: Incompatible array dimensions");
-        }
-    }
-
-    template<class T> hoNDArray< complext<T> >& operator/= (hoNDArray< complext<T> > &x, const hoNDArray<T> &y)
-    {
-        if( compatible_dimensions<complext<T>,T>(x,y) ){
-            arma::Col< std::complex<T> > aY( as_arma_col(&y), arma::Col<T>(y.get_number_of_elements()).zeros() );
-            size_t num_batches = x.get_number_of_elements()/y.get_number_of_elements();
-            for( size_t batch=0; batch<num_batches; batch++ ){	
-                hoNDArray< complext<T> > tmp;
-                tmp.create( y.get_dimensions(), x.get_data_ptr()+batch*y.get_number_of_elements() );
-                arma::Col< std::complex<T> > aRes = as_arma_col(&tmp);
-                aRes /= arma::Col< std::complex<T> >( as_arma_col(&y), arma::Col<T>(y.get_number_of_elements()).zeros() );
-            }
-            return x;
-        } 
-        else {
-            throw std::runtime_error("hoNDArray::operator/=: Incompatible array dimensions");
-        }
-    }
-
-    template<class T> hoNDArray<T>& operator/= (hoNDArray<T> &x, const T &y)
-    {
-        arma::Col<typename stdType<T>::Type> aRes = as_arma_col(&x);
-        typename stdType<T>::Type aY = *((typename stdType<T>::Type*)&y);
-        aRes /= aY;
-        return x;  
-    }
-
-    template<class T> hoNDArray< std::complex<T> >& operator/= (hoNDArray< std::complex<T> > &x, const T &y)
-    {
-        arma::Col< std::complex<T> > aRes = as_arma_col(&x);
-        std::complex<T> aY( y, T(0) );
-        aRes /= aY;
-        return x;  
-    }
-
-    template<class T> hoNDArray< complext<T> >& operator/= (hoNDArray< complext<T> > &x, const T &y)
-    {
-        arma::Col< std::complex<T> > aRes = as_arma_col(&x);
-        std::complex<T> aY( y, T(0) );
-        aRes /= aY;
-        return x;  
-    }
-
-    //
-    // Instantiation
-    //
-
-    template EXPORTCPUCOREMATH hoNDArray<float>& operator+=<float>(hoNDArray<float>&, const hoNDArray<float>&);
-    template EXPORTCPUCOREMATH hoNDArray<float>& operator+=<float>(hoNDArray<float>&, const float&);
-    template EXPORTCPUCOREMATH hoNDArray<float>& operator-=<float>(hoNDArray<float>&, const hoNDArray<float>&);
-    template EXPORTCPUCOREMATH hoNDArray<float>& operator-=<float>(hoNDArray<float>&, const float&);
-    template EXPORTCPUCOREMATH hoNDArray<float>& operator*=<float>(hoNDArray<float>&, const hoNDArray<float>&);
-    template EXPORTCPUCOREMATH hoNDArray<float>& operator*=<float>(hoNDArray<float>&, const float&);
-    template EXPORTCPUCOREMATH hoNDArray<float>& operator/=<float>(hoNDArray<float>&, const hoNDArray<float>&);
-    template EXPORTCPUCOREMATH hoNDArray<float>& operator/=<float>(hoNDArray<float>&, const float&);
-
-    template EXPORTCPUCOREMATH hoNDArray<double>& operator+=<double>(hoNDArray<double>&, const hoNDArray<double>&);
-    template EXPORTCPUCOREMATH hoNDArray<double>& operator+=<double>(hoNDArray<double>&, const double&);
-    template EXPORTCPUCOREMATH hoNDArray<double>& operator-=<double>(hoNDArray<double>&, const hoNDArray<double>&);
-    template EXPORTCPUCOREMATH hoNDArray<double>& operator-=<double>(hoNDArray<double>&, const double&);
-    template EXPORTCPUCOREMATH hoNDArray<double>& operator*=<double>(hoNDArray<double>&, const hoNDArray<double>&);
-    template EXPORTCPUCOREMATH hoNDArray<double>& operator*=<double>(hoNDArray<double>&, const double&);
-    template EXPORTCPUCOREMATH hoNDArray<double>& operator/=<double>(hoNDArray<double>&, const hoNDArray<double>&);
-    template EXPORTCPUCOREMATH hoNDArray<double>& operator/=<double>(hoNDArray<double>&, const double&);
-
-    template EXPORTCPUCOREMATH hoNDArray< std::complex<float> >& operator+=< std::complex<float> > 
-        (hoNDArray< std::complex<float> >&, const hoNDArray< std::complex<float> >&);
-    template EXPORTCPUCOREMATH hoNDArray< std::complex<float> >& operator+=< std::complex<float> > 
-        (hoNDArray< std::complex<float> >&, const std::complex<float>&);
-    template EXPORTCPUCOREMATH hoNDArray< std::complex<float> >& operator-=< std::complex<float> > 
-        (hoNDArray< std::complex<float> >&, const hoNDArray< std::complex<float> >&);
-    template EXPORTCPUCOREMATH hoNDArray< std::complex<float> >& operator-=< std::complex<float> > 
-        (hoNDArray< std::complex<float> >&, const std::complex<float>&);
-    template EXPORTCPUCOREMATH hoNDArray< std::complex<float> >& operator*=< std::complex<float> >
-        (hoNDArray< std::complex<float> >&, const hoNDArray< std::complex<float> >&);
-    template EXPORTCPUCOREMATH hoNDArray< std::complex<float> >& operator*=< std::complex<float> >
-        (hoNDArray< std::complex<float> >&, const std::complex<float>&);
-    template EXPORTCPUCOREMATH hoNDArray< std::complex<float> >& operator/=< std::complex<float> > 
-        (hoNDArray< std::complex<float> >&, const hoNDArray< std::complex<float> >&);
-    template EXPORTCPUCOREMATH hoNDArray< std::complex<float> >& operator/=< std::complex<float> > 
-        (hoNDArray< std::complex<float> >&, const std::complex<float>&);
-
-    template EXPORTCPUCOREMATH hoNDArray< complext<float> >& operator+=< complext<float> > 
-        (hoNDArray< complext<float> >&, const hoNDArray< complext<float> >&);
-    template EXPORTCPUCOREMATH hoNDArray< complext<float> >& operator+=< complext<float> > 
-        (hoNDArray< complext<float> >&, const complext<float>&);
-    template EXPORTCPUCOREMATH hoNDArray< complext<float> >& operator-=< complext<float> > 
-        (hoNDArray< complext<float> >&, const hoNDArray< complext<float> >&);
-    template EXPORTCPUCOREMATH hoNDArray< complext<float> >& operator-=< complext<float> > 
-        (hoNDArray< complext<float> >&, const complext<float>&);
-    template EXPORTCPUCOREMATH hoNDArray< complext<float> >& operator*=< complext<float> >
-        (hoNDArray< complext<float> >&, const hoNDArray< complext<float> >&);
-    template EXPORTCPUCOREMATH hoNDArray< complext<float> >& operator*=< complext<float> >
-        (hoNDArray< complext<float> >&, const complext<float>&);
-    template EXPORTCPUCOREMATH hoNDArray< complext<float> >& operator/=< complext<float> > 
-        (hoNDArray< complext<float> >&, const hoNDArray< complext<float> >&);
-    template EXPORTCPUCOREMATH hoNDArray< complext<float> >& operator/=< complext<float> > 
-        (hoNDArray< complext<float> >&, const complext<float>&);
-
-    template EXPORTCPUCOREMATH hoNDArray< std::complex<float> >& operator+=<float>(hoNDArray< std::complex<float> >&, const hoNDArray<float>&);
-    template EXPORTCPUCOREMATH hoNDArray< std::complex<float> >& operator-=<float>(hoNDArray< std::complex<float> >&, const hoNDArray<float>&);
-    template EXPORTCPUCOREMATH hoNDArray< std::complex<float> >& operator*=<float>(hoNDArray< std::complex<float> >&, const hoNDArray<float>&);
-    template EXPORTCPUCOREMATH hoNDArray< std::complex<float> >& operator/=<float>(hoNDArray< std::complex<float> >&, const hoNDArray<float>&);
-
-    template EXPORTCPUCOREMATH hoNDArray< complext<float> >& operator+=<float>(hoNDArray< complext<float> >&, const hoNDArray<float>&);
-    template EXPORTCPUCOREMATH hoNDArray< complext<float> >& operator-=<float>(hoNDArray< complext<float> >&, const hoNDArray<float>&);
-    template EXPORTCPUCOREMATH hoNDArray< complext<float> >& operator*=<float>(hoNDArray< complext<float> >&, const hoNDArray<float>&);
-    template EXPORTCPUCOREMATH hoNDArray< complext<float> >& operator/=<float>(hoNDArray< complext<float> >&, const hoNDArray<float>&);
-
-    template EXPORTCPUCOREMATH hoNDArray< std::complex<float> >& operator+=<float>(hoNDArray< std::complex<float> >&, const float&);
-    template EXPORTCPUCOREMATH hoNDArray< std::complex<float> >& operator-=<float>(hoNDArray< std::complex<float> >&, const float&);
-    template EXPORTCPUCOREMATH hoNDArray< std::complex<float> >& operator*=<float>(hoNDArray< std::complex<float> >&, const float&);
-    template EXPORTCPUCOREMATH hoNDArray< std::complex<float> >& operator/=<float>(hoNDArray< std::complex<float> >&, const float&);
-
-    template EXPORTCPUCOREMATH hoNDArray< complext<float> >& operator+=<float>(hoNDArray< complext<float> >&, const float&);
-    template EXPORTCPUCOREMATH hoNDArray< complext<float> >& operator-=<float>(hoNDArray< complext<float> >&, const float&);
-    template EXPORTCPUCOREMATH hoNDArray< complext<float> >& operator*=<float>(hoNDArray< complext<float> >&, const float&);
-    template EXPORTCPUCOREMATH hoNDArray< complext<float> >& operator/=<float>(hoNDArray< complext<float> >&, const float&);
-
-    template EXPORTCPUCOREMATH hoNDArray< std::complex<double> >& operator+=< std::complex<double> > 
-        (hoNDArray< std::complex<double> >&, const hoNDArray< std::complex<double> >&);
-    template EXPORTCPUCOREMATH hoNDArray< std::complex<double> >& operator+=< std::complex<double> > 
-        (hoNDArray< std::complex<double> >&, const std::complex<double>&);
-    template EXPORTCPUCOREMATH hoNDArray< std::complex<double> >& operator-=< std::complex<double> > 
-        (hoNDArray< std::complex<double> >&, const hoNDArray< std::complex<double> >&);
-    template EXPORTCPUCOREMATH hoNDArray< std::complex<double> >& operator-=< std::complex<double> > 
-        (hoNDArray< std::complex<double> >&, const std::complex<double>&);
-    template EXPORTCPUCOREMATH hoNDArray< std::complex<double> >& operator*=< std::complex<double> >
-        (hoNDArray< std::complex<double> >&, const hoNDArray< std::complex<double> >&);
-    template EXPORTCPUCOREMATH hoNDArray< std::complex<double> >& operator*=< std::complex<double> >
-        (hoNDArray< std::complex<double> >&, const std::complex<double>&);
-    template EXPORTCPUCOREMATH hoNDArray< std::complex<double> >& operator/=< std::complex<double> > 
-        (hoNDArray< std::complex<double> >&, const hoNDArray< std::complex<double> >&);
-    template EXPORTCPUCOREMATH hoNDArray< std::complex<double> >& operator/=< std::complex<double> > 
-        (hoNDArray< std::complex<double> >&, const std::complex<double>&);
-
-    template EXPORTCPUCOREMATH hoNDArray< complext<double> >& operator+=< complext<double> > 
-        (hoNDArray< complext<double> >&, const hoNDArray< complext<double> >&);
-    template EXPORTCPUCOREMATH hoNDArray< complext<double> >& operator+=< complext<double> > 
-        (hoNDArray< complext<double> >&, const complext<double>&);
-    template EXPORTCPUCOREMATH hoNDArray< complext<double> >& operator-=< complext<double> > 
-        (hoNDArray< complext<double> >&, const hoNDArray< complext<double> >&);
-    template EXPORTCPUCOREMATH hoNDArray< complext<double> >& operator-=< complext<double> > 
-        (hoNDArray< complext<double> >&, const complext<double>&);
-    template EXPORTCPUCOREMATH hoNDArray< complext<double> >& operator*=< complext<double> >
-        (hoNDArray< complext<double> >&, const hoNDArray< complext<double> >&);
-    template EXPORTCPUCOREMATH hoNDArray< complext<double> >& operator*=< complext<double> >
-        (hoNDArray< complext<double> >&, const complext<double>&);
-    template EXPORTCPUCOREMATH hoNDArray< complext<double> >& operator/=< complext<double> > 
-        (hoNDArray< complext<double> >&, const hoNDArray< complext<double> >&);
-    template EXPORTCPUCOREMATH hoNDArray< complext<double> >& operator/=< complext<double> > 
-        (hoNDArray< complext<double> >&, const complext<double>&);
-
-    template EXPORTCPUCOREMATH hoNDArray< std::complex<double> >& operator+=<double>(hoNDArray< std::complex<double> >&, const hoNDArray<double>&);
-    template EXPORTCPUCOREMATH hoNDArray< std::complex<double> >& operator-=<double>(hoNDArray< std::complex<double> >&, const hoNDArray<double>&);
-    template EXPORTCPUCOREMATH hoNDArray< std::complex<double> >& operator*=<double>(hoNDArray< std::complex<double> >&, const hoNDArray<double>&);
-    template EXPORTCPUCOREMATH hoNDArray< std::complex<double> >& operator/=<double>(hoNDArray< std::complex<double> >&, const hoNDArray<double>&);
-
-    template EXPORTCPUCOREMATH hoNDArray< complext<double> >& operator+=<double>(hoNDArray< complext<double> >&, const hoNDArray<double>&);
-    template EXPORTCPUCOREMATH hoNDArray< complext<double> >& operator-=<double>(hoNDArray< complext<double> >&, const hoNDArray<double>&);
-    template EXPORTCPUCOREMATH hoNDArray< complext<double> >& operator*=<double>(hoNDArray< complext<double> >&, const hoNDArray<double>&);
-    template EXPORTCPUCOREMATH hoNDArray< complext<double> >& operator/=<double>(hoNDArray< complext<double> >&, const hoNDArray<double>&);
-
-    template EXPORTCPUCOREMATH hoNDArray< std::complex<double> >& operator+=<double>(hoNDArray< std::complex<double> >&, const double&);
-    template EXPORTCPUCOREMATH hoNDArray< std::complex<double> >& operator-=<double>(hoNDArray< std::complex<double> >&, const double&);
-    template EXPORTCPUCOREMATH hoNDArray< std::complex<double> >& operator*=<double>(hoNDArray< std::complex<double> >&, const double&);
-    template EXPORTCPUCOREMATH hoNDArray< std::complex<double> >& operator/=<double>(hoNDArray< std::complex<double> >&, const double&);
-
-    template EXPORTCPUCOREMATH hoNDArray< complext<double> >& operator+=<double>(hoNDArray< complext<double> >&, const double&);
-    template EXPORTCPUCOREMATH hoNDArray< complext<double> >& operator-=<double>(hoNDArray< complext<double> >&, const double&);
-    template EXPORTCPUCOREMATH hoNDArray< complext<double> >& operator*=<double>(hoNDArray< complext<double> >&, const double&);
-    template EXPORTCPUCOREMATH hoNDArray< complext<double> >& operator/=<double>(hoNDArray< complext<double> >&, const double&);
-}
diff --git a/toolboxes/core/cpu/arma_math/hoNDArray_operators.h b/toolboxes/core/cpu/arma_math/hoNDArray_operators.h
deleted file mode 100644
index f555128..0000000
--- a/toolboxes/core/cpu/arma_math/hoNDArray_operators.h
+++ /dev/null
@@ -1,239 +0,0 @@
-/** \file hoNDArray_operators.h
-    \brief Common element-wise arithmetic operators on the hoNDArray class.
-    
-    hoNDArray_operators.h defines element-wise arithmetic array operations on the hoNDArray class.
-    We define the common operators +=, -=, *= and \= for both array-array and array-constant operations.
-    We have deliberately omitted to define operator+, operator- etc. since this would require returning an hoNDArray,
-    in turn invoking an explicit memcpy by the assignment operator.
-    Batch mode functionality is provided.
-    The implementation is based on Armadillo.
-    This code is purposely split into a header and underlying implementation (.cpp) 
-    as this allows specific instantiation of the supported template types.     
-    The supported types are float, double, std::complex<float>, std::complex<double>, 
-    Gadgetron::complext<float> and Gadgetron::complext<double>. 
-    Scalars can be applied to complex numbers of corresponding precision.
-*/
-
-#pragma once
-
-#include "hoNDArray.h"
-#include "cpucore_math_export.h"
-
-namespace Gadgetron{
-
-  /**
-   * @brief Implementation of element-wise operator+= on two hoNDArrays.
-   * @param[in,out] x Input and output array.
-   * @param[in] y Input array.
- 
-   * Let y be an n-dimensional array. 
-   * Then the sizes of the first n array dimensions must match between x and y.
-   * If x contains further dimensions the operator is batched across those dimensions.
-   */
-  template<class T> EXPORTCPUCOREMATH hoNDArray<T>& operator+= (hoNDArray<T> &x, const hoNDArray<T> &y);
-
-  /**
-   * @brief Implementation of element-wise operator+= on a hoNDArray with a scalar value.
-   * @param[in,out] x Input and output array.
-   * @param[in] y Input scalar.
-   */
-  template<class T> EXPORTCPUCOREMATH hoNDArray<T>& operator+= (hoNDArray<T> &x, const T &y);
-
-  /**
-   * @brief Implementation of element-wise operator+= on two hoNDArrays.
-   * @param[in,out] x Input and output array.
-   * @param[in] y Input array.
- 
-   * Let y be an n-dimensional array. 
-   * Then the sizes of the first n array dimensions must match between x and y.
-   * If x contains further dimensions the operator is batched across those dimensions.
-   */
-  template<class T> EXPORTCPUCOREMATH hoNDArray< std::complex<T> >& operator+= (hoNDArray< std::complex<T> > &x, const hoNDArray<T> &y);
-
-  /**
-   * @brief Implementation of element-wise operator+= on a hoNDArray with a scalar value.
-   * @param[in,out] x Input and output array.
-   * @param[in] y Input scalar.
-   */
-  template<class T> EXPORTCPUCOREMATH hoNDArray< std::complex<T> >& operator+= (hoNDArray< std::complex<T> >&x, const T &y);
-
-  /**
-   * @brief Implementation of element-wise operator+= on two hoNDArrays.
-   * @param[in,out] x Input and output array.
-   * @param[in] y Input array.
- 
-   * Let y be an n-dimensional array. 
-   * Then the sizes of the first n array dimensions must match between x and y.
-   * If x contains further dimensions the operator is batched across those dimensions.
-   */
-  template<class T> EXPORTCPUCOREMATH hoNDArray< complext<T> >& operator+= (hoNDArray< complext<T> > &x, const hoNDArray<T> &y);
-
-  /**
-   * @brief Implementation of element-wise operator+= on a hoNDArray with a scalar value.
-   * @param[in,out] x Input and output array.
-   * @param[in] y Input scalar.
-   */
-  template<class T> EXPORTCPUCOREMATH hoNDArray< complext<T> >& operator+= (hoNDArray< complext<T> >&x, const T &y);
-
-  /**
-   * @brief Implementation of element-wise operator-= on two hoNDArrays.
-   * @param[in,out] x Input and output array.
-   * @param[in] y Input array.
- 
-   * Let y be an n-dimensional array. 
-   * Then the sizes of the first n array dimensions must match between x and y.
-   * If x contains further dimensions the operator is batched across those dimensions.
-   */
-  template<class T> EXPORTCPUCOREMATH hoNDArray<T>& operator-= (hoNDArray<T> &x, const hoNDArray<T> &y);
-
-  /**
-   * @brief Implementation of element-wise operator-= on a hoNDArray with a scalar value.
-   * @param[in,out] x Input and output array.
-   * @param[in] y Input scalar.
-   */
-  template<class T> EXPORTCPUCOREMATH hoNDArray<T>& operator-= (hoNDArray<T> &x, const T &y);
-
-  /**
-   * @brief Implementation of element-wise operator-= on two hoNDArrays.
-   * @param[in,out] x Input and output array.
-   * @param[in] y Input array.
- 
-   * Let y be an n-dimensional array. 
-   * Then the sizes of the first n array dimensions must match between x and y.
-   * If x contains further dimensions the operator is batched across those dimensions.
-   */
-  template<class T> EXPORTCPUCOREMATH hoNDArray< std::complex<T> >& operator-= (hoNDArray< std::complex<T > > &x, const hoNDArray<T> &y);
-
-  /**
-   * @brief Implementation of element-wise operator-= on a hoNDArray with a scalar value.
-   * @param[in,out] x Input and output array.
-   * @param[in] y Input scalar.
-   */
-  template<class T> EXPORTCPUCOREMATH hoNDArray< std::complex<T> >& operator-= (hoNDArray< std::complex<T> >&x, const T &y);
-
-  /**
-   * @brief Implementation of element-wise operator-= on two hoNDArrays.
-   * @param[in,out] x Input and output array.
-   * @param[in] y Input array.
- 
-   * Let y be an n-dimensional array. 
-   * Then the sizes of the first n array dimensions must match between x and y.
-   * If x contains further dimensions the operator is batched across those dimensions.
-   */
-  template<class T> EXPORTCPUCOREMATH hoNDArray< complext<T> >& operator-= (hoNDArray< complext<T > > &x, const hoNDArray<T> &y);
-
-  /**
-   * @brief Implementation of element-wise operator-= on a hoNDArray with a scalar value.
-   * @param[in,out] x Input and output array.
-   * @param[in] y Input scalar.
-   */
-  template<class T> EXPORTCPUCOREMATH hoNDArray< complext<T> >& operator-= (hoNDArray< complext<T> >&x, const T &y);
-
-  /**
-   * @brief Implementation of element-wise operator*= on two hoNDArrays.
-   * @param[in,out] x Input and output array.
-   * @param[in] y Input array.
- 
-   * Let y be an n-dimensional array. 
-   * Then the sizes of the first n array dimensions must match between x and y.
-   * If x contains further dimensions the operator is batched across those dimensions.
-   */
-  template<class T> EXPORTCPUCOREMATH hoNDArray<T>& operator*= (hoNDArray<T> &x, const hoNDArray<T> &y);
-
-  /**
-   * @brief Implementation of element-wise operator*= on a hoNDArray with a scalar value.
-   * @param[in,out] x Input and output array.
-   * @param[in] y Input scalar.
-   */
-  template<class T> EXPORTCPUCOREMATH hoNDArray<T>& operator*= (hoNDArray<T> &x, const T &y);
-
-  /**
-   * @brief Implementation of element-wise operator*= on two hoNDArrays.
-   * @param[in,out] x Input and output array.
-   * @param[in] y Input array.
- 
-   * Let y be an n-dimensional array. 
-   * Then the sizes of the first n array dimensions must match between x and y.
-   * If x contains further dimensions the operator is batched across those dimensions.
-   */
-  template<class T> EXPORTCPUCOREMATH hoNDArray< std::complex<T> >& operator*= (hoNDArray< std::complex<T> > &x, const hoNDArray<T> &y);
-
-  /**
-   * @brief Implementation of element-wise operator*= on a hoNDArray with a scalar value.
-   * @param[in,out] x Input and output array.
-   * @param[in] y Input scalar.
-   */
-  template<class T> EXPORTCPUCOREMATH hoNDArray< std::complex<T> >& operator*= (hoNDArray< std::complex<T> > &x, const T &y);
-
-  /**
-   * @brief Implementation of element-wise operator*= on two hoNDArrays.
-   * @param[in,out] x Input and output array.
-   * @param[in] y Input array.
- 
-   * Let y be an n-dimensional array. 
-   * Then the sizes of the first n array dimensions must match between x and y.
-   * If x contains further dimensions the operator is batched across those dimensions.
-   */
-  template<class T> EXPORTCPUCOREMATH hoNDArray< complext<T> >& operator*= (hoNDArray< complext<T> > &x, const hoNDArray<T> &y);
-
-  /**
-   * @brief Implementation of element-wise operator*= on a hoNDArray with a scalar value.
-   * @param[in,out] x Input and output array.
-   * @param[in] y Input scalar.
-   */
-  template<class T> EXPORTCPUCOREMATH hoNDArray< complext<T> >& operator*= (hoNDArray< complext<T> > &x, const T &y);
-
-  /**
-   * @brief Implementation of element-wise operator/= on two hoNDArrays.
-   * @param[in,out] x Input and output array.
-   * @param[in] y Input array.
- 
-   * Let y be an n-dimensional array. 
-   * Then the sizes of the first n array dimensions must match between x and y.
-   * If x contains further dimensions the operator is batched across those dimensions.
-   */
-  template<class T> EXPORTCPUCOREMATH hoNDArray<T>& operator/= (hoNDArray<T> &x, const hoNDArray<T> &y);
-
-  /**
-   * @brief Implementation of element-wise operator/= on a hoNDArray with a scalar value.
-   * @param[in,out] x Input and output array.
-   * @param[in] y Input scalar.
-   */
-  template<class T> EXPORTCPUCOREMATH hoNDArray<T>& operator/= (hoNDArray<T> &x, const T &y);
-
-  /**
-   * @brief Implementation of element-wise operator/= on two hoNDArrays.
-   * @param[in,out] x Input and output array.
-   * @param[in] y Input array.
- 
-   * Let y be an n-dimensional array. 
-   * Then the sizes of the first n array dimensions must match between x and y.
-   * If x contains further dimensions the operator is batched across those dimensions.
-   */
-  template<class T> EXPORTCPUCOREMATH hoNDArray< std::complex<T> >& operator/= (hoNDArray< std::complex<T> > &x, const hoNDArray<T> &y);
-
-  /**
-   * @brief Implementation of element-wise operator/= on a hoNDArray with a scalar value.
-   * @param[in,out] x Input and output array.
-   * @param[in] y Input scalar.
-   */
-  template<class T> EXPORTCPUCOREMATH hoNDArray< std::complex<T> >& operator/= (hoNDArray< std::complex<T> > &x, const T &y);
-
-  /**
-   * @brief Implementation of element-wise operator/= on two hoNDArrays.
-   * @param[in,out] x Input and output array.
-   * @param[in] y Input array.
- 
-   * Let y be an n-dimensional array. 
-   * Then the sizes of the first n array dimensions must match between x and y.
-   * If x contains further dimensions the operator is batched across those dimensions.
-   */
-  template<class T> EXPORTCPUCOREMATH hoNDArray< complext<T> >& operator/= (hoNDArray< complext<T> > &x, const hoNDArray<T> &y);
-
-  /**
-   * @brief Implementation of element-wise operator/= on a hoNDArray with a scalar value.
-   * @param[in,out] x Input and output array.
-   * @param[in] y Input scalar.
-   */
-  template<class T> EXPORTCPUCOREMATH hoNDArray< complext<T> >& operator/= (hoNDArray< complext<T> > &x, const T &y);
-}
diff --git a/toolboxes/core/cpu/arma_math/hoNDArray_reductions.cpp b/toolboxes/core/cpu/arma_math/hoNDArray_reductions.cpp
deleted file mode 100644
index 329710b..0000000
--- a/toolboxes/core/cpu/arma_math/hoNDArray_reductions.cpp
+++ /dev/null
@@ -1,41 +0,0 @@
-#include "hoNDArray_reductions.h"
-#include "hoArmadillo.h"
-
-namespace Gadgetron{
-
-template<class REAL> REAL max(hoNDArray<REAL>* data){
-	return as_arma_col(data).max();
-}
-template<class REAL> REAL min(hoNDArray<REAL>* data){
-	return as_arma_col(data).min();
-}
-
-
-template<class T> T mean(hoNDArray<T>* data){
-	return (typename stdType<T>::Type) arma::mean(as_arma_col(data));
-}
-
-
-template<class T> T sum(hoNDArray<T>* data){
-	return (typename stdType<T>::Type) arma::sum(as_arma_col(data));
-}
-
-
-template EXPORTCPUCOREMATH float max(hoNDArray<float>*);
-template EXPORTCPUCOREMATH float min(hoNDArray<float>*);
-template EXPORTCPUCOREMATH float mean(hoNDArray<float>*);
-template EXPORTCPUCOREMATH float sum(hoNDArray<float>*);
-
-template EXPORTCPUCOREMATH double max(hoNDArray<double>*);
-template EXPORTCPUCOREMATH double min(hoNDArray<double>*);
-template EXPORTCPUCOREMATH double mean(hoNDArray<double>*);
-template EXPORTCPUCOREMATH double sum(hoNDArray<double>*);
-
-
-template EXPORTCPUCOREMATH complext<double> mean(hoNDArray<complext<double> >*);
-template EXPORTCPUCOREMATH complext<double> sum(hoNDArray<complext<double> >*);
-
-template EXPORTCPUCOREMATH complext<float> mean(hoNDArray<complext<float> >*);
-template EXPORTCPUCOREMATH complext<float> sum(hoNDArray<complext<float> >*);
-}
-
diff --git a/toolboxes/core/cpu/arma_math/hoNDArray_reductions.h b/toolboxes/core/cpu/arma_math/hoNDArray_reductions.h
deleted file mode 100644
index 6ec4b45..0000000
--- a/toolboxes/core/cpu/arma_math/hoNDArray_reductions.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#pragma once
-
-#include "hoNDArray.h"
-#include "cpucore_math_export.h"
-
-namespace Gadgetron{
-
-  template<class REAL> EXPORTCPUCOREMATH REAL max(hoNDArray<REAL>* data);
-  template<class REAL> EXPORTCPUCOREMATH REAL min(hoNDArray<REAL>* data);
-  template<class T> EXPORTCPUCOREMATH T mean(hoNDArray<T>* data);
-  template<class T> EXPORTCPUCOREMATH T sum(hoNDArray<T>* data);
-}
diff --git a/toolboxes/core/cpu/dummy.cpp b/toolboxes/core/cpu/dummy.cpp
new file mode 100644
index 0000000..b89eb11
--- /dev/null
+++ b/toolboxes/core/cpu/dummy.cpp
@@ -0,0 +1,18 @@
+//
+// THIS IS A TEMPORARY FILE
+//
+// This file is to be removed from the repository once Hui merges his branch into mem_ops/development
+// Currently this is the only .cpp in this folder, and one needs to be present to satisfy cmake and generate the .lib
+//
+
+#ifdef WIN32
+#include <stdio.h>
+
+namespace Gadgetron {
+
+	void __declspec(dllexport) __this_is_a_temp_dummy_to_force_lib_file__(void){
+		printf("\n\nINSIDE DUMMY\n\n");
+	}
+}
+
+#endif
\ No newline at end of file
diff --git a/toolboxes/core/cpu/gadgetronmath.h b/toolboxes/core/cpu/gadgetronmath.h
new file mode 100644
index 0000000..91bed7f
--- /dev/null
+++ b/toolboxes/core/cpu/gadgetronmath.h
@@ -0,0 +1,26 @@
+#pragma once
+
+/** \file gadgetronmath.h
+\brief Math utility functionx
+
+*/
+
+#define _USE_MATH_DEFINES
+#include <math.h>
+
+namespace Gadgetron {
+
+template <typename T> T sinc(T x) {
+  
+  T val;
+  if (std::abs(x)<.01) {
+    // to 6th order
+    val = 1.0 - 1/6.*std::pow(M_PI*x,2) + 1/120.*std::pow(M_PI*x,4) - 1/5040.*std::pow(M_PI*x,6);
+  } else {
+    val = std::sin(M_PI*x) / (M_PI*x);
+  }
+
+  return val;
+}
+
+}
diff --git a/toolboxes/core/cpu/ho2DArray.h b/toolboxes/core/cpu/ho2DArray.h
index d913c60..95735c0 100644
--- a/toolboxes/core/cpu/ho2DArray.h
+++ b/toolboxes/core/cpu/ho2DArray.h
@@ -10,9 +10,11 @@ public:
 
     typedef hoNDArray<T> BaseClass;
 
+    using BaseClass::create;
+
     ho2DArray();
     ho2DArray(size_t sx, size_t sy);
-    ho2DArray(std::vector<size_t> *dimensions);
+    explicit ho2DArray(std::vector<size_t> *dimensions);
     ho2DArray(std::vector<size_t> *dimensions, T* data, bool delete_data_on_destruct = false);
     ho2DArray(size_t sx, size_t sy, T* data, bool delete_data_on_destruct = false);
     ho2DArray(boost::shared_ptr< std::vector<size_t> > dimensions);
@@ -33,6 +35,36 @@ public:
     T& operator()(size_t x , size_t y);
     const T& operator()(size_t x , size_t y) const;
 
+    T& operator()( const std::vector<size_t>& ind ) { return (*this)(ind[0], ind[1]); }
+    const T& operator()( const std::vector<size_t>& ind ) const  { return (*this)(ind[0], ind[1]); }
+
+    T& operator()( const std::vector<gt_index_type>& ind ) { return (*this)( (size_t)ind[0], (size_t)ind[1]); }
+    const T& operator()( const std::vector<gt_index_type>& ind ) const { return (*this)( (size_t)ind[0], (size_t)ind[1]); }
+
+    T& operator()( size_t x ) { return (*this)(x, 0); }
+    const T& operator()( size_t x ) const { return (*this)(x, 0); }
+
+    T& operator()( size_t x, size_t y, size_t z ) { return (*this)(x, y); }
+    const T& operator()( size_t x, size_t y, size_t z ) const { return (*this)(x, y); }
+
+    T& operator()( size_t x, size_t y, size_t z, size_t s ) { return (*this)(x, y); }
+    const T& operator()( size_t x, size_t y, size_t z, size_t s ) const { return (*this)(x, y); }
+
+    T& operator()( size_t x, size_t y, size_t z, size_t s, size_t p ) { return (*this)(x, y); }
+    const T& operator()( size_t x, size_t y, size_t z, size_t s, size_t p ) const { return (*this)(x, y); }
+
+    T& operator()( size_t x, size_t y, size_t z, size_t s, size_t p, size_t r ) { return (*this)(x, y); }
+    const T& operator()( size_t x, size_t y, size_t z, size_t s, size_t p, size_t r ) const { return (*this)(x, y); }
+
+    T& operator()( size_t x, size_t y, size_t z, size_t s, size_t p, size_t r, size_t a ) { return (*this)(x, y); }
+    const T& operator()( size_t x, size_t y, size_t z, size_t s, size_t p, size_t r, size_t a ) const { return (*this)(x, y); }
+
+    T& operator()( size_t x, size_t y, size_t z, size_t s, size_t p, size_t r, size_t a, size_t q ) { return (*this)(x, y); }
+    const T& operator()( size_t x, size_t y, size_t z, size_t s, size_t p, size_t r, size_t a, size_t q ) const { return (*this)(x, y); }
+
+    T& operator()( size_t x, size_t y, size_t z, size_t s, size_t p, size_t r, size_t a, size_t q, size_t u ) { return (*this)(x, y); }
+    const T& operator()( size_t x, size_t y, size_t z, size_t s, size_t p, size_t r, size_t a, size_t q, size_t u ) const { return (*this)(x, y); }
+
     virtual void print(std::ostream& os) const;
 
 protected:
diff --git a/toolboxes/core/cpu/ho3DArray.h b/toolboxes/core/cpu/ho3DArray.h
index 7109eb3..afeb109 100644
--- a/toolboxes/core/cpu/ho3DArray.h
+++ b/toolboxes/core/cpu/ho3DArray.h
@@ -12,7 +12,7 @@ public:
 
     ho3DArray();
     ho3DArray(size_t sx, size_t sy, size_t sz);
-    ho3DArray(std::vector<size_t> *dimensions);
+    explicit ho3DArray(std::vector<size_t> *dimensions);
     ho3DArray(std::vector<size_t> *dimensions, T* data, bool delete_data_on_destruct = false);
     ho3DArray(size_t sx, size_t sy, size_t sz, T* data, bool delete_data_on_destruct = false);
     ho3DArray(boost::shared_ptr< std::vector<size_t> > dimensions);
diff --git a/toolboxes/core/cpu/ho4DArray.h b/toolboxes/core/cpu/ho4DArray.h
index 28c1225..524bd56 100644
--- a/toolboxes/core/cpu/ho4DArray.h
+++ b/toolboxes/core/cpu/ho4DArray.h
@@ -12,7 +12,7 @@ public:
 
     ho4DArray();
     ho4DArray(size_t sx, size_t sy, size_t sz, size_t ss);
-    ho4DArray(std::vector<size_t> *dimensions);
+    explicit ho4DArray(std::vector<size_t> *dimensions);
     ho4DArray(std::vector<size_t> *dimensions, T* data, bool delete_data_on_destruct = false);
     ho4DArray(size_t sx, size_t sy, size_t sz, size_t ss, T* data, bool delete_data_on_destruct = false);
     ho4DArray(boost::shared_ptr< std::vector<size_t> > dimensions);
diff --git a/toolboxes/core/cpu/ho5DArray.h b/toolboxes/core/cpu/ho5DArray.h
index 860a687..50a4ed4 100644
--- a/toolboxes/core/cpu/ho5DArray.h
+++ b/toolboxes/core/cpu/ho5DArray.h
@@ -12,7 +12,7 @@ public:
 
     ho5DArray();
     ho5DArray(size_t sx, size_t sy, size_t sz, size_t ss, size_t sp);
-    ho5DArray(std::vector<size_t> *dimensions);
+    explicit ho5DArray(std::vector<size_t> *dimensions);
     ho5DArray(std::vector<size_t> *dimensions, T* data, bool delete_data_on_destruct = false);
     ho5DArray(size_t sx, size_t sy, size_t sz, size_t ss, size_t sp, T* data, bool delete_data_on_destruct = false);
     ho5DArray(boost::shared_ptr< std::vector<size_t> > dimensions);
diff --git a/toolboxes/core/cpu/ho5DArray.hxx b/toolboxes/core/cpu/ho5DArray.hxx
index dd9cf54..40fe733 100644
--- a/toolboxes/core/cpu/ho5DArray.hxx
+++ b/toolboxes/core/cpu/ho5DArray.hxx
@@ -168,7 +168,7 @@ bool ho5DArray<T>::createArray(size_t sx, size_t sy, size_t sz, size_t ss, size_
         dim[3] = ss;
         dim[4] = sp;
 
-        this->create(&dim);
+        this->create(&dim, data, delete_data_on_destruct);
         GADGET_CHECK_RETURN_FALSE(init_accesser());
     }
     catch(...)
diff --git a/toolboxes/core/cpu/ho6DArray.h b/toolboxes/core/cpu/ho6DArray.h
index 6fc6a90..2001237 100644
--- a/toolboxes/core/cpu/ho6DArray.h
+++ b/toolboxes/core/cpu/ho6DArray.h
@@ -12,7 +12,7 @@ public:
 
     ho6DArray();
     ho6DArray(size_t sx, size_t sy, size_t sz, size_t ss, size_t sp, size_t sr);
-    ho6DArray(std::vector<size_t> *dimensions);
+    explicit ho6DArray(std::vector<size_t> *dimensions);
     ho6DArray(std::vector<size_t> *dimensions, T* data, bool delete_data_on_destruct = false);
     ho6DArray(size_t sx, size_t sy, size_t sz, size_t ss, size_t sp, size_t sr, T* data, bool delete_data_on_destruct = false);
     ho6DArray(boost::shared_ptr< std::vector<size_t> > dimensions);
diff --git a/toolboxes/core/cpu/ho6DArray.hxx b/toolboxes/core/cpu/ho6DArray.hxx
index 61fe9da..7ebe904 100644
--- a/toolboxes/core/cpu/ho6DArray.hxx
+++ b/toolboxes/core/cpu/ho6DArray.hxx
@@ -172,7 +172,7 @@ bool ho6DArray<T>::createArray(size_t sx, size_t sy, size_t sz, size_t ss, size_
         dim[4] = sp;
         dim[5] = sr;
 
-        this->create(&dim);
+        this->create(&dim, data, delete_data_on_destruct);
         GADGET_CHECK_RETURN_FALSE(init_accesser());
     }
     catch(...)
diff --git a/toolboxes/core/cpu/ho7DArray.h b/toolboxes/core/cpu/ho7DArray.h
index 1e41b99..728441b 100644
--- a/toolboxes/core/cpu/ho7DArray.h
+++ b/toolboxes/core/cpu/ho7DArray.h
@@ -12,7 +12,7 @@ public:
 
     ho7DArray();
     ho7DArray(size_t sx, size_t sy, size_t sz, size_t ss, size_t sp, size_t sr, size_t sa);
-    ho7DArray(std::vector<size_t> *dimensions);
+    explicit ho7DArray(std::vector<size_t> *dimensions);
     ho7DArray(std::vector<size_t> *dimensions, T* data, bool delete_data_on_destruct = false);
     ho7DArray(size_t sx, size_t sy, size_t sz, size_t ss, size_t sp, size_t sr, size_t sa, T* data, bool delete_data_on_destruct = false);
     ho7DArray(boost::shared_ptr< std::vector<size_t> > dimensions);
diff --git a/toolboxes/core/cpu/ho7DArray.hxx b/toolboxes/core/cpu/ho7DArray.hxx
index 4c77b2e..98b6f91 100644
--- a/toolboxes/core/cpu/ho7DArray.hxx
+++ b/toolboxes/core/cpu/ho7DArray.hxx
@@ -175,7 +175,7 @@ bool ho7DArray<T>::createArray(size_t sx, size_t sy, size_t sz, size_t ss, size_
         dim[5] = sr;
         dim[6] = sa;
 
-        this->create(&dim);
+        this->create(&dim, data, delete_data_on_destruct);
         GADGET_CHECK_RETURN_FALSE(init_accesser());
     }
     catch(...)
diff --git a/toolboxes/core/cpu/hoMatrix.cpp b/toolboxes/core/cpu/hoMatrix.cpp
index b19ed8c..9e1cec5 100644
--- a/toolboxes/core/cpu/hoMatrix.cpp
+++ b/toolboxes/core/cpu/hoMatrix.cpp
@@ -1,512 +1,322 @@
 
-namespace Gadgetron
-{
-
-template <typename T> 
-hoMatrix<T>::hoMatrix() : BaseClass(1, 1)
-{
-}
-
-template <typename T> 
-hoMatrix<T>::hoMatrix(size_t rows, size_t cols) : BaseClass(rows, cols)
-{
-    this->fill(T(0));
-}
-
-template <typename T> 
-hoMatrix<T>::hoMatrix(size_t rows, size_t cols, T* data, bool delete_data_on_destruct)
-{
-    std::vector<size_t> dim(2);
-    dim[0] = rows;
-    dim[1] = cols;
-    this->create(&dim,data,delete_data_on_destruct);
-    GADGET_CHECK_THROW(this->init_accesser());
-}
-
-template <typename T> 
-hoMatrix<T>::~hoMatrix()
-{
-
-}
-
-template <typename T> 
-hoMatrix<T>::hoMatrix(const hoMatrix<T>& a) : BaseClass(a)
-{
-}
+#include "hoMatrix.h"
 
-template <typename T> 
-hoMatrix<T>& hoMatrix<T>::operator=(const hoMatrix& rhs)
+namespace Gadgetron
 {
-    if ( this == &rhs ) return *this;
-    BaseClass::operator=(rhs);
-    return *this;
-}
 
-template <typename T> 
-bool hoMatrix<T>::createMatrix(size_t rows, size_t cols)
+// C = A*B
+bool GeneralMatrixProduct(hoNDArray<float>& C, const hoNDArray<float>& A, bool transA, const hoNDArray<float>& B, bool transB)
 {
-    return this->createArray(rows, cols);
-}
+    try
+    {
+        typedef float T;
 
-template <typename T> 
-bool hoMatrix<T>::createMatrix(size_t rows, size_t cols, T* data, bool delete_data_on_destruct)
-{
-    return this->createArray(rows, cols, data, delete_data_on_destruct);
-}
+        size_t M = A.get_size(0);
+        size_t K = A.get_size(1);
+        if ( transA )
+        { 
+            M = A.get_size(1);
+            K = A.get_size(0);
+        }
 
-template <typename T> 
-inline T& hoMatrix<T>::operator()(size_t r, size_t c)
-{
-    GADGET_DEBUG_CHECK_THROW(c>=0 && r>=0 && r<(*dimensions_)[0] && c<(*dimensions_)[1]);
-    return accesser_[c][r];
-}
+        size_t K2 = B.get_size(0);
+        size_t N = B.get_size(1);
+        if ( transB )
+        {
+            K2 = B.get_size(1);
+            N = B.get_size(0);
+        }
 
-template <typename T> 
-inline const T& hoMatrix<T>::operator()(size_t r, size_t c) const
-{
-    GADGET_DEBUG_CHECK_THROW(c>=0 && r>=0 && c<(*dimensions_)[0] && r<(*dimensions_)[1]);
-    return accesser_[c][r];
-}
+        GADGET_CHECK_RETURN_FALSE(K==K2);
+        if ( (C.get_size(0)!=M) || (C.get_size(1)!=N) )
+        {
+            C.create(M, N);
+        }
 
-template <typename T> 
-inline size_t hoMatrix<T>::rows() const
-{
-    if ( dimensions_->empty() ) return 0;
-    return (*dimensions_)[0];
-}
+        const T* pA = A.begin();
+        const T* pB = B.begin();
+        T* pC = C.begin();
 
-template <typename T> 
-inline size_t hoMatrix<T>::cols() const
-{
-    if ( dimensions_->empty() ) return 0;
-    return (*dimensions_)[1];
-}
+        size_t m, n, k;
 
-template <typename T> 
-bool hoMatrix<T>::upperTri(const T& v)
-{
-    try
-    {
-        size_t r, c;
-        for (c=0; c<(*dimensions_)[1]; c++)
+        if ( !transA && !transB )
         {
-            for (r=0; r<(*dimensions_)[0]; r++)
+            for ( m=0; m<M; m++ )
             {
-                if ( c > r )
+                for ( n=0; n<N; n++ )
                 {
-                    (*this)(r, c) = v;
+                    pC[m+n*M] = 0;
+                    for ( k=0; k<K; k++ )
+                    {
+                        pC[m+n*M] += pA[m+k*M]*pB[k+n*K];
+                    }
                 }
             }
         }
-    }
-    catch (...)
-    {
-        GADGET_ERROR_MSG("Errors in hoMatrix<T>::upperTri(const T& v) ... ");
-        return false;
-    }
-    return true;
-}
 
-template <typename T> 
-bool hoMatrix<T>::lowerTri(const T& v)
-{
-    try
-    {
-        size_t r, c;
-        for (c=0; c<(*dimensions_)[1]; c++)
+        if ( transA && !transB )
         {
-            for (r=0; r<(*dimensions_)[0]; r++)
+            for ( m=0; m<M; m++ )
             {
-                if ( r > c )
+                for ( n=0; n<N; n++ )
                 {
-                    (*this)(r, c) = v;
+                    pC[m+n*M] = 0;
+                    for ( k=0; k<K; k++ )
+                    {
+                        pC[m+n*M] += pA[k+m*K]*pB[k+n*K];
+                    }
                 }
             }
         }
-    }
-    catch (...)
-    {
-        GADGET_ERROR_MSG("Errors in hoMatrix<T>::lowerTri(const T& v) ... ");
-        return false;
-    }
-    return true;
-}
-
-template <typename T> 
-bool hoMatrix<T>::sumOverRow(hoNDArray<T>& res) const
-{
-    try
-    {
-        size_t ROW = rows();
-        size_t COL = cols();
-
-        if ( res.get_number_of_elements() != ROW )
-        {
-            res.create(ROW);
-        }
-
-        T* pRes = res.begin();
-
-        size_t r, c;
-
-        for ( r=0; r<ROW; r++ )
-        {
-            pRes[r] = 0;
-        }
 
-        for ( c=0; c<COL; c++ )
+        if ( !transA && transB )
         {
-            for ( r=0; r<ROW; r++ )
+            for ( m=0; m<M; m++ )
             {
-                // res(r) += (*this)(r, c);
-                pRes[r] += this->data_[r+c*ROW];
+                for ( n=0; n<N; n++ )
+                {
+                    pC[m+n*M] = 0;
+                    for ( k=0; k<K; k++ )
+                    {
+                        pC[m+n*M] += pA[m+k*M]*pB[n+k*K];
+                    }
+                }
             }
         }
-    }
-    catch (...)
-    {
-        GADGET_ERROR_MSG("Errors in hoMatrix<T>::sumOverRow(hoNDArray<T>& r) ... ");
-        return false;
-    }
-
-    return true;
-}
-
-template <typename T> 
-bool hoMatrix<T>::sumOverCol(hoNDArray<T>& res) const
-{
-    try
-    {
-        size_t ROW = rows();
-        size_t COL = cols();
-
-        if ( res.get_number_of_elements() != COL )
-        {
-            res.create(COL);
-        }
-
-        T* pRes = res.begin();
-
-        size_t r;
-        long long c;
-
-        for ( c=0; c<(long long)COL; c++ )
-        {
-            pRes[c] = 0;
-        }
 
-        #pragma omp parallel for default(none) private(c, r) shared(COL, ROW, pRes) if ( COL > 16 )
-        for ( c=0; c<(long long)COL; c++ )
+        if ( transA && transB )
         {
-            T v(0);
-            for ( r=0; r<ROW; r++ )
+            for ( m=0; m<M; m++ )
             {
-                v += this->data_[r+c*ROW];
+                for ( n=0; n<N; n++ )
+                {
+                    pC[m+n*M] = 0;
+                    for ( k=0; k<K; k++ )
+                    {
+                        pC[m+n*M] += pA[k+m*K]*pB[n+k*K];
+                    }
+                }
             }
-            pRes[c] = v;
         }
     }
-    catch (...)
+    catch(...)
     {
-        GADGET_ERROR_MSG("Errors in hoMatrix<T>::sumOverCol(hoNDArray<T>& r) ... ");
+        GADGET_ERROR_MSG("Errors in GeneralMatrixProduct(hoNDArray<float>& C, const hoNDArray<float>& A, bool transA, const hoNDArray<float>& B, bool transB) ...");
         return false;
     }
-
     return true;
 }
 
-template <typename T> 
-bool hoMatrix<T>::subMatrix(Self& res, size_t startR, size_t endR, size_t startC, size_t endC) const
+bool GeneralMatrixProduct(hoNDArray<double>& C, const hoNDArray<double>& A, bool transA, const hoNDArray<double>& B, bool transB)
 {
     try
     {
-        size_t ROW = rows();
-        size_t COL = cols();
-
-        GADGET_CHECK_RETURN_FALSE(startR>=0&&startR<ROW);
-        GADGET_CHECK_RETURN_FALSE(startC>=0&&startC<COL);
-        GADGET_CHECK_RETURN_FALSE(endR>=0&&endR<ROW);
-        GADGET_CHECK_RETURN_FALSE(endC>=0&&endC<COL);
-        GADGET_CHECK_RETURN_FALSE(endR>=startR);
-        GADGET_CHECK_RETURN_FALSE(endC>=startC);
+        typedef double T;
 
-        GADGET_CHECK_RETURN_FALSE(res.createMatrix(endR-startR+1, endC-startC+1));
-
-        size_t r, c;
-        for ( r=startR; r<=endR; r++ )
-        {
-            for ( c=startC; c<=endC; c++ )
-            {
-                res(r-startR, c-startC) = (*this)(r, c);
-            }
+        size_t M = A.get_size(0);
+        size_t K = A.get_size(1);
+        if ( transA )
+        { 
+            M = A.get_size(1);
+            K = A.get_size(0);
         }
-    }
-    catch (...)
-    {
-        GADGET_ERROR_MSG("Errors in hoMatrix<T>::subMatrix(Self& res, size_t startR, size_t endR, size_t startC, size_t endC) ... ");
-        return false;
-    }
-
-    return true;
-}
 
-template <typename T> 
-bool hoMatrix<T>::operator == (const Self& m) const
-{
-    GADGET_CHECK_RETURN_FALSE(this->dimensions_equal(&m));
-    for ( size_t i=0; i<elements_; i++ )
-    { 
-        if (std::abs(data_[i]-m.data_[i])>DBL_EPSILON)
+        size_t K2 = B.get_size(0);
+        size_t N = B.get_size(1);
+        if ( transB )
         {
-            return false;
+            K2 = B.get_size(1);
+            N = B.get_size(0);
         }
-    }
-    return true;
-}
-
-template <typename T> 
-bool hoMatrix<T>::operator != (const Self& m) const
-{
-    return !(*this==m);
-}
-
-template <typename T> 
-void hoMatrix<T>::print(std::ostream& os) const
-{
-    using namespace std;
-    os.unsetf(std::ios::scientific);
 
-    os << "hoMatrix (row X col): " << this->rows() << " X " << this->cols() << " : " << std::string(typeid(T).name()) << endl;
-    size_t r, c;
-    for (r=0; r<(*dimensions_)[0]; r++) 
-    {
-        os << "r " << r << ":\t";
-        for (c=0; c<(*dimensions_)[1]; c++)
+        GADGET_CHECK_RETURN_FALSE(K==K2);
+        if ( (C.get_size(0)!=M) || (C.get_size(1)!=N) )
         {
-            os << setprecision(10) << (*this)(r,c) << "\t";
+            C.create(M, N);
         }
-        os << endl; 
-    }
-}
 
-template <typename T> 
-bool copyL2U(hoMatrix<T>& A)
-{
-    try
-    {
-        GADGET_CHECK_RETURN_FALSE(A.rows()==A.cols());
+        const T* pA = A.begin();
+        const T* pB = B.begin();
+        T* pC = C.begin();
 
-        size_t R = A.rows();
-        size_t C = A.cols();
+        size_t m, n, k;
 
-        size_t row, col;
-        for(row=0; row<R; row++) 
+        if ( !transA && !transB )
         {
-            for(col=0; col<row; col++ )
+            for ( m=0; m<M; m++ )
             {
-                A(col, row) = A(row, col);
+                for ( n=0; n<N; n++ )
+                {
+                    pC[m+n*M] = 0;
+                    for ( k=0; k<K; k++ )
+                    {
+                        pC[m+n*M] += pA[m+k*M]*pB[k+n*K];
+                    }
+                }
             }
         }
-    }
-    catch(...)
-    {
-        GADGET_ERROR_MSG("Errors in copyL2U(hoMatrix<T>& A) ... ");
-        return false;
-    }
-    return true;
-}
-
-template <typename T> 
-bool copyL2U(hoMatrix<T>& A, bool conj)
-{
-    try
-    {
-        GADGET_CHECK_RETURN_FALSE(A.rows()==A.cols());
-
-        size_t R = A.rows();
-        size_t row, col;
 
-        if ( conj )
+        if ( transA && !transB )
         {
-            for(row=0; row<R; row++) 
+            for ( m=0; m<M; m++ )
             {
-                for(col=0; col<row; col++ )
+                for ( n=0; n<N; n++ )
                 {
-                    A(col, row) = std::conj(A(row, col));
+                    pC[m+n*M] = 0;
+                    for ( k=0; k<K; k++ )
+                    {
+                        pC[m+n*M] += pA[k+m*K]*pB[k+n*K];
+                    }
                 }
             }
         }
-        else
+
+        if ( !transA && transB )
         {
-            for(row=0; row<R; row++) 
+            for ( m=0; m<M; m++ )
             {
-                for(col=0; col<row; col++ )
+                for ( n=0; n<N; n++ )
                 {
-                    A(col, row) = A(row, col);
+                    pC[m+n*M] = 0;
+                    for ( k=0; k<K; k++ )
+                    {
+                        pC[m+n*M] += pA[m+k*M]*pB[n+k*K];
+                    }
                 }
             }
         }
-    }
-    catch(...)
-    {
-        GADGET_ERROR_MSG("Errors in copyL2U(hoMatrix<T>& A, bool conj) ... ");
-        return false;
-    }
-    return true;
-}
-
-template <typename T> 
-bool copyU2L(hoMatrix<T>& A)
-{
-    try
-    {
-        GADGET_CHECK_RETURN_FALSE(A.rows()==A.cols());
-
-        size_t R = A.rows();
-        size_t C = A.cols();
 
-        size_t row, col;
-        for(row=0; row<R; row++) 
+        if ( transA && transB )
         {
-            for(col=row+1; col<C; col++ )
+            for ( m=0; m<M; m++ )
             {
-                A(col, row) = A(row, col);
+                for ( n=0; n<N; n++ )
+                {
+                    pC[m+n*M] = 0;
+                    for ( k=0; k<K; k++ )
+                    {
+                        pC[m+n*M] += pA[k+m*K]*pB[n+k*K];
+                    }
+                }
             }
         }
     }
     catch(...)
     {
-        GADGET_ERROR_MSG("Errors in copyU2L(hoMatrix<T>& A) ... ");
+        GADGET_ERROR_MSG("Errors in GeneralMatrixProduct(hoNDArray<double>& C, const hoNDArray<double>& A, bool transA, const hoNDArray<double>& B, bool transB) ...");
         return false;
     }
     return true;
 }
 
-template <typename T> 
-bool copyU2L(hoMatrix<T>& A, bool conj)
+bool GeneralMatrixProduct(hoNDArray< std::complex<float> >& C, const hoNDArray< std::complex<float> >& A, bool transA, const hoNDArray< std::complex<float> >& B, bool transB)
 {
     try
     {
-        GADGET_CHECK_RETURN_FALSE(A.rows()==A.cols());
+        typedef  std::complex<float>  T;
 
-        size_t R = A.rows();
-        size_t C = A.cols();
+        size_t M = A.get_size(0);
+        size_t K = A.get_size(1);
+        if ( transA )
+        { 
+            M = A.get_size(1);
+            K = A.get_size(0);
+        }
 
-        size_t row, col;
+        size_t K2 = B.get_size(0);
+        size_t N = B.get_size(1);
+        if ( transB )
+        {
+            K2 = B.get_size(1);
+            N = B.get_size(0);
+        }
 
-        if ( conj )
+        GADGET_CHECK_RETURN_FALSE(K==K2);
+        if ( (C.get_size(0)!=M) || (C.get_size(1)!=N) )
         {
-            for(row=0; row<R; row++) 
+            C.create(M, N);
+        }
+
+        const T* pA = A.begin();
+        const T* pB = B.begin();
+        T* pC = C.begin();
+
+        size_t m, n, k;
+
+        if ( !transA && !transB )
+        {
+            for ( m=0; m<M; m++ )
             {
-                for(col=row+1; col<C; col++ )
+                for ( n=0; n<N; n++ )
                 {
-                    A(col, row) = std::conj(A(row, col));
+                    pC[m+n*M] = 0;
+                    for ( k=0; k<K; k++ )
+                    {
+                        pC[m+n*M] += pA[m+k*M]*pB[k+n*K];
+                    }
                 }
             }
         }
-        else
+
+        if ( transA && !transB )
         {
-            for(row=0; row<R; row++) 
+            for ( m=0; m<M; m++ )
             {
-                for(col=row+1; col<C; col++ )
+                for ( n=0; n<N; n++ )
                 {
-                    A(col, row) = A(row, col);
+                    pC[m+n*M] = 0;
+                    for ( k=0; k<K; k++ )
+                    {
+                        pC[m+n*M] += std::conj(pA[k+m*K])*pB[k+n*K];
+                    }
                 }
             }
         }
-    }
-    catch(...)
-    {
-        GADGET_ERROR_MSG("Errors in copyU2L(hoMatrix<T>& A, bool conj) ... ");
-        return false;
-    }
-    return true;
-}
-
-template <typename T> 
-bool trans(const hoMatrix<T>& A, hoMatrix<T>& AT)
-{
-    try
-    {
-        if ( A.get_number_of_elements() == 0 ) return true;
-
-        if ( AT.rows()!=A.cols() || AT.cols()!=A.rows() )
-        {
-            AT.createMatrix(A.cols(), A.rows());
-        }
 
-        long long r, c;
-        #ifdef GCC_OLD_FLAG
-            #pragma omp parallel for default(none) private(r, c)
-        #else
-            #pragma omp parallel for default(none) private(r, c) shared(A, AT)
-        #endif
-        for ( c=0; c<(long long)A.cols(); c++ )
+        if ( !transA && transB )
         {
-            for ( r=0; r<(long long)A.rows(); r++ )
+            for ( m=0; m<M; m++ )
             {
-                AT(c,r) = A(r,c);
+                for ( n=0; n<N; n++ )
+                {
+                    pC[m+n*M] = 0;
+                    for ( k=0; k<K; k++ )
+                    {
+                        pC[m+n*M] += pA[m+k*M]*std::conj(pB[n+k*K]);
+                    }
+                }
             }
         }
-    }
-    catch (...)
-    {
-        GADGET_ERROR_MSG("Errors in trans(const hoMatrix<T>& A, hoMatrix<T>& AT) ... ");
-        return false;
-    }
-    return true;
-}
 
-template <typename T> 
-bool conjugatetrans(const hoMatrix<T>& A, hoMatrix<T>& AH)
-{
-    try
-    {
-        if ( A.get_number_of_elements() == 0 ) return true;
-
-        if ( AH.rows()!=A.cols() || AH.cols()!=A.rows() )
-        {
-            AH.createMatrix(A.cols(), A.rows());
-        }
-
-        long long r, c;
-        #ifdef GCC_OLD_FLAG
-            #pragma omp parallel for default(none) private(r, c)
-        #else
-            #pragma omp parallel for default(none) private(r, c) shared(A, AH)
-        #endif
-        for ( c=0; c<(long long)A.cols(); c++ )
+        if ( transA && transB )
         {
-            for ( r=0; r<(long long)A.rows(); r++ )
+            for ( m=0; m<M; m++ )
             {
-                AH(c,r) = std::conj(A(r,c));
+                for ( n=0; n<N; n++ )
+                {
+                    pC[m+n*M] = 0;
+                    for ( k=0; k<K; k++ )
+                    {
+                        pC[m+n*M] += std::conj(pA[k+m*K])*std::conj(pB[n+k*K]);
+                    }
+                }
             }
         }
     }
-    catch (...)
+    catch(...)
     {
-        GADGET_ERROR_MSG("Errors in conjugatetrans(const hoMatrix<T>& A, hoMatrix<T>& AH) ... ");
+        GADGET_ERROR_MSG("Errors in GeneralMatrixProduct(hoNDArray< std::complex<float> >& C, const hoNDArray< std::complex<float> >& A, bool transA, const hoNDArray< std::complex<float> >& B, bool transB) ...");
         return false;
     }
     return true;
 }
 
-inline bool conjugatetrans(const hoMatrix<float>& A, hoMatrix<float>& AH)
-{
-    return trans(A, AH);
-}
-
-inline bool conjugatetrans(const hoMatrix<double>& A, hoMatrix<double>& AH)
-{
-    return trans(A, AH);
-}
-
-// C = A*B
-template<typename T> 
-bool GeneralMatrixProduct(hoNDArray<T>& C, const hoNDArray<T>& A, bool transA, const hoNDArray<T>& B, bool transB)
+bool GeneralMatrixProduct(hoNDArray< std::complex<double> >& C, const hoNDArray< std::complex<double> >& A, bool transA, const hoNDArray< std::complex<double> >& B, bool transB)
 {
     try
     {
+        typedef  std::complex<double>  T;
+
         size_t M = A.get_size(0);
         size_t K = A.get_size(1);
         if ( transA )
@@ -564,746 +374,43 @@ bool GeneralMatrixProduct(hoNDArray<T>& C, const hoNDArray<T>& A, bool transA, c
                 }
             }
         }
-    }
-    catch(...)
-    {
-        GADGET_ERROR_MSG("Errors in GeneralMatrixProduct(hoNDArray<T>& C, const hoNDArray<T>& A, bool transA, const hoNDArray<T>& B, bool transB) ...");
-        return false;
-    }
-    return true;
-}
-
-// following matrix computation calls MKL functions
-#ifdef USE_MKL
 
-template<typename T> 
-bool GeneralMatrixProduct_gemm(hoNDArray<T>& C, 
-                            const hoNDArray<T>& A, bool transA, 
-                            const hoNDArray<T>& B, bool transB)
-{
-    try
-    {
-        char TA, TB;
-
-        MKL_INT lda = A.get_size(0);
-        MKL_INT ldb = B.get_size(0);
-        const T* pA = A.begin(); 
-        const T* pB = B.begin(); 
-
-        MKL_INT M = A.get_size(0);
-        MKL_INT K = A.get_size(1);
-        if ( transA )
-        { 
-            M = A.get_size(1);
-            K = A.get_size(0);
-        }
-
-        MKL_INT K2 = B.get_size(0);
-        MKL_INT N = B.get_size(1);
-        if ( transB )
-        {
-            K2 = B.get_size(1);
-            N = B.get_size(0);
-        }
-
-        GADGET_CHECK_RETURN_FALSE(K==K2);
-        if ( (C.get_size(0)!=M) || (C.get_size(1)!=N) )
-        {
-            C.create(M, N);
-        }
-
-        T* pC = C.begin();
-        MKL_INT ldc = C.get_size(0);
-
-        if ( typeid(T)==typeid(float) )
+        if ( !transA && transB )
         {
-            float alpha(1), beta(0);
-
-            if ( transA )
-            {
-                TA = 'T';
-            }
-            else
-            {
-                TA = 'N';
-            }
-
-            if ( transB )
-            {
-                TB = 'T';
-            }
-            else
-            {
-                TB = 'N';
-            }
-
-            if ( &A != &C )
-            {
-                sgemm(&TA, &TB, &M, &N, &K, &alpha, reinterpret_cast<const float*>(pA), &lda, reinterpret_cast<const float*>(pB), &ldb, &beta, reinterpret_cast<float*>(pC), &ldc);
-            }
-            else
-            {
-                hoNDArray<T> aTmp(A);
-                T* pATmp = aTmp.begin();
-                sgemm(&TA, &TB, &M, &N, &K, &alpha, reinterpret_cast<const float*>(pATmp), &lda, reinterpret_cast<const float*>(pB), &ldb, &beta, reinterpret_cast<float*>(pC), &ldc);
-            }
-        }
-        else if ( typeid(T)==typeid(double) )
-        {
-            double alpha(1), beta(0);
-
-            if ( transA )
-            {
-                TA = 'T';
-            }
-            else
-            {
-                TA = 'N';
-            }
-
-            if ( transB )
-            {
-                TB = 'T';
-            }
-            else
-            {
-                TB = 'N';
-            }
-
-            if ( &A != &C )
-            {
-                dgemm(&TA, &TB, &M, &N, &K, &alpha, reinterpret_cast<const double*>(pA), &lda, reinterpret_cast<const double*>(pB), &ldb, &beta, reinterpret_cast<double*>(pC), &ldc);
-            }
-            else
-            {
-                hoNDArray<T> aTmp(A);
-                T* pATmp = aTmp.begin();
-                dgemm(&TA, &TB, &M, &N, &K, &alpha, reinterpret_cast<const double*>(pATmp), &lda, reinterpret_cast<const double*>(pB), &ldb, &beta, reinterpret_cast<double*>(pC), &ldc);
-            }
-        }
-        else if ( typeid(T)==typeid(GT_Complex8) )
-        {
-            GT_Complex8 alpha(1), beta(0);
-
-            if ( transA )
-            {
-                TA = 'C';
-            }
-            else
-            {
-                TA = 'N';
-            }
-
-            if ( transB )
-            {
-                TB = 'C';
-            }
-            else
-            {
-                TB = 'N';
-            }
-
-            if ( &A != &C )
-            {
-                cgemm(&TA, &TB, &M, &N, &K, reinterpret_cast<MKL_Complex8*>(&alpha), reinterpret_cast<const MKL_Complex8*>(pA), &lda, reinterpret_cast<const MKL_Complex8*>(pB), &ldb, reinterpret_cast<MKL_Complex8*>(&beta), reinterpret_cast<MKL_Complex8*>(pC), &ldc);
-            }
-            else
-            {
-                hoNDArray<T> aTmp(A);
-                T* pATmp = aTmp.begin();
-                cgemm(&TA, &TB, &M, &N, &K, reinterpret_cast<MKL_Complex8*>(&alpha), reinterpret_cast<MKL_Complex8*>(pATmp), &lda, reinterpret_cast<const MKL_Complex8*>(pB), &ldb, reinterpret_cast<MKL_Complex8*>(&beta), reinterpret_cast<MKL_Complex8*>(pC), &ldc);
-            }
-        }
-        else if ( typeid(T)==typeid(GT_Complex16) )
-        {
-            GT_Complex16 alpha(1), beta(0);
-
-            if ( transA )
-            {
-                TA = 'C';
-            }
-            else
-            {
-                TA = 'N';
-            }
-
-            if ( transB )
-            {
-                TB = 'C';
-            }
-            else
-            {
-                TB = 'N';
-            }
-
-            if ( &A != &C )
-            {
-                zgemm(&TA, &TB, &M, &N, &K, reinterpret_cast<MKL_Complex16*>(&alpha), reinterpret_cast<const MKL_Complex16*>(pA), &lda, reinterpret_cast<const MKL_Complex16*>(pB), &ldb, reinterpret_cast<MKL_Complex16*>(&beta), reinterpret_cast<MKL_Complex16*>(pC), &ldc);
-            }
-            else
-            {
-                hoNDArray<T> aTmp(A);
-                T* pATmp = aTmp.begin();
-                zgemm(&TA, &TB, &M, &N, &K, reinterpret_cast<MKL_Complex16*>(&alpha), reinterpret_cast<MKL_Complex16*>(pATmp), &lda, reinterpret_cast<const MKL_Complex16*>(pB), &ldb, reinterpret_cast<MKL_Complex16*>(&beta), reinterpret_cast<MKL_Complex16*>(pC), &ldc);
-            }
-        }
-        else
-        {
-            GADGET_ERROR_MSG("GeneralMatrixProduct_gemm : unsupported type " << typeid(T).name() );
-            return false;
-        }
-    }
-    catch(...)
-    {
-        GADGET_ERROR_MSG("Errors in GeneralMatrixProduct_gemm(hoNDArray<T>& C, const hoNDArray<T>& A, bool transA, const hoNDArray<T>& B, bool transB) ...");
-        return false;
-    }
-    return true;
-}
-
-template<typename T> 
-bool GeneralMatrixProduct_gemm(hoMatrix<T>& C, 
-                            const hoMatrix<T>& A, bool transA, 
-                            const hoMatrix<T>& B, bool transB)
-{
-    try
-    {
-        char TA, TB;
-
-        MKL_INT lda = A.rows();
-        MKL_INT ldb = B.rows();
-        const T* pA = A.begin(); 
-        const T* pB = B.begin(); 
-
-        MKL_INT M = A.rows();
-        MKL_INT K = A.cols();
-        if ( transA )
-        {
-            M = A.cols();
-            K = A.rows();
-        }
-
-        MKL_INT K2 = B.rows();
-        MKL_INT N = B.cols();
-        if ( transB )
-        {
-            K2 = B.cols();
-            N = B.rows();
-        }
-
-        GADGET_CHECK_RETURN_FALSE(K==K2);
-        if ( (C.rows()!=M) || (C.cols()!=N) )
-        {
-            GADGET_CHECK_RETURN_FALSE(C.createMatrix(M, N));
-        }
-
-        T* pC = C.begin();
-        MKL_INT ldc = C.rows();
-
-        if ( typeid(T)==typeid(float) )
-        {
-            float alpha(1), beta(0);
-
-            if ( transA )
-            {
-                TA = 'T';
-            }
-            else
-            {
-                TA = 'N';
-            }
-
-            if ( transB )
-            {
-                TB = 'T';
-            }
-            else
-            {
-                TB = 'N';
-            }
-
-            if ( &A != &C )
-            {
-                sgemm(&TA, &TB, &M, &N, &K, &alpha, reinterpret_cast<const float*>(pA), &lda, reinterpret_cast<const float*>(pB), &ldb, &beta, reinterpret_cast<float*>(pC), &ldc);
-            }
-            else
-            {
-                hoNDArray<T> aTmp(A);
-                T* pATmp = aTmp.begin();
-                sgemm(&TA, &TB, &M, &N, &K, &alpha, reinterpret_cast<const float*>(pATmp), &lda, reinterpret_cast<const float*>(pB), &ldb, &beta, reinterpret_cast<float*>(pC), &ldc);
-            }
-        }
-        else if ( typeid(T)==typeid(double) )
-        {
-            double alpha(1), beta(0);
-
-            if ( transA )
-            {
-                TA = 'T';
-            }
-            else
-            {
-                TA = 'N';
-            }
-
-            if ( transB )
-            {
-                TB = 'T';
-            }
-            else
-            {
-                TB = 'N';
-            }
-
-            if ( &A != &C )
-            {
-                dgemm(&TA, &TB, &M, &N, &K, &alpha, reinterpret_cast<const double*>(pA), &lda, reinterpret_cast<const double*>(pB), &ldb, &beta, reinterpret_cast<double*>(pC), &ldc);
-            }
-            else
+            for ( m=0; m<M; m++ )
             {
-                hoNDArray<T> aTmp(A);
-                T* pATmp = aTmp.begin();
-                dgemm(&TA, &TB, &M, &N, &K, &alpha, reinterpret_cast<const double*>(pATmp), &lda, reinterpret_cast<const double*>(pB), &ldb, &beta, reinterpret_cast<double*>(pC), &ldc);
+                for ( n=0; n<N; n++ )
+                {
+                    pC[m+n*M] = 0;
+                    for ( k=0; k<K; k++ )
+                    {
+                        pC[m+n*M] += pA[m+k*M]*std::conj(pB[n+k*K]);
+                    }
+                }
             }
         }
-        else if ( typeid(T)==typeid(GT_Complex8) )
-        {
-            GT_Complex8 alpha(1), beta(0);
-
-            if ( transA )
-            {
-                TA = 'C';
-            }
-            else
-            {
-                TA = 'N';
-            }
-
-            if ( transB )
-            {
-                TB = 'C';
-            }
-            else
-            {
-                TB = 'N';
-            }
 
-            if ( &A != &C )
-            {
-                cgemm(&TA, &TB, &M, &N, &K, reinterpret_cast<MKL_Complex8*>(&alpha), reinterpret_cast<const MKL_Complex8*>(pA), &lda, reinterpret_cast<const MKL_Complex8*>(pB), &ldb, reinterpret_cast<MKL_Complex8*>(&beta), reinterpret_cast<MKL_Complex8*>(pC), &ldc);
-            }
-            else
-            {
-                hoNDArray<T> aTmp(A);
-                T* pATmp = aTmp.begin();
-                cgemm(&TA, &TB, &M, &N, &K, reinterpret_cast<MKL_Complex8*>(&alpha), reinterpret_cast<MKL_Complex8*>(pATmp), &lda, reinterpret_cast<const MKL_Complex8*>(pB), &ldb, reinterpret_cast<MKL_Complex8*>(&beta), reinterpret_cast<MKL_Complex8*>(pC), &ldc);
-            }
-        }
-        else if ( typeid(T)==typeid(GT_Complex16) )
+        if ( transA && transB )
         {
-            GT_Complex16 alpha(1), beta(0);
-
-            if ( transA )
-            {
-                TA = 'C';
-            }
-            else
-            {
-                TA = 'N';
-            }
-
-            if ( transB )
-            {
-                TB = 'C';
-            }
-            else
-            {
-                TB = 'N';
-            }
-
-            if ( &A != &C )
-            {
-                zgemm(&TA, &TB, &M, &N, &K, reinterpret_cast<MKL_Complex16*>(&alpha), reinterpret_cast<const MKL_Complex16*>(pA), &lda, reinterpret_cast<const MKL_Complex16*>(pB), &ldb, reinterpret_cast<MKL_Complex16*>(&beta), reinterpret_cast<MKL_Complex16*>(pC), &ldc);
-            }
-            else
+            for ( m=0; m<M; m++ )
             {
-                hoNDArray<T> aTmp(A);
-                T* pATmp = aTmp.begin();
-                zgemm(&TA, &TB, &M, &N, &K, reinterpret_cast<MKL_Complex16*>(&alpha), reinterpret_cast<MKL_Complex16*>(pATmp), &lda, reinterpret_cast<const MKL_Complex16*>(pB), &ldb, reinterpret_cast<MKL_Complex16*>(&beta), reinterpret_cast<MKL_Complex16*>(pC), &ldc);
+                for ( n=0; n<N; n++ )
+                {
+                    pC[m+n*M] = 0;
+                    for ( k=0; k<K; k++ )
+                    {
+                        pC[m+n*M] += std::conj(pA[k+m*K])*std::conj(pB[n+k*K]);
+                    }
+                }
             }
         }
-        else
-        {
-            GADGET_ERROR_MSG("GeneralMatrixProduct_gemm : unsupported type " << typeid(T).name() );
-            return false;
-        }
     }
     catch(...)
     {
-        GADGET_ERROR_MSG("Errors in GeneralMatrixProduct_gemm(hoMatrix<T>& C, const hoMatrix<T>& A, bool transA, const hoMatrix<T>& B, bool transB) ...");
+        GADGET_ERROR_MSG("Errors in GeneralMatrixProduct(hoNDArray< std::complex<double> >& C, const hoNDArray< std::complex<double> >& A, bool transA, const hoNDArray< std::complex<double> >& B, bool transB) ...");
         return false;
     }
     return true;
 }
 
-template<typename T> 
-bool CholeskyHermitianPositiveDefinite_potrf(hoMatrix<T>& A, char uplo)
-{
-    try
-    {
-        if( A.get_number_of_elements()==0 ) return true;
-        GADGET_CHECK_RETURN_FALSE(A.rows()==A.cols());
-
-        MKL_INT info;
-        lapack_int n = (lapack_int)(A.rows());
-        T* pA = A.begin();
-        lapack_int lda = (lapack_int)(A.rows());
-
-        if ( typeid(T)==typeid(float) )
-        {
-            spotrf(&uplo, &n, reinterpret_cast<float*>(pA), &lda, &info);
-        }
-        else if ( typeid(T)==typeid(double) )
-        {
-            dpotrf(&uplo, &n, reinterpret_cast<double*>(pA), &lda, &info);
-        }
-        else if ( typeid(T)==typeid(GT_Complex8) )
-        {
-            cpotrf(&uplo, &n, reinterpret_cast<MKL_Complex8*>(pA), &lda, &info);
-        }
-        else if ( typeid(T)==typeid(GT_Complex16) )
-        {
-            zpotrf(&uplo, &n, reinterpret_cast<MKL_Complex16*>(pA), &lda, &info);
-        }
-        else
-        {
-            GADGET_ERROR_MSG("CholeskyHermitianPositiveDefinite_potrf : unsupported type " << typeid(T).name());
-            return false;
-        }
-
-        GADGET_CHECK_RETURN_FALSE(info==0);
-
-        if ( uplo == 'U' )
-        {
-            GADGET_CHECK_RETURN_FALSE(A.lowerTri(0));
-        }
-        else
-        {
-            GADGET_CHECK_RETURN_FALSE(A.upperTri(0));
-        }
-    }
-    catch(...)
-    {
-        GADGET_ERROR_MSG("Errors in CholeskyHermitianPositiveDefinite_potrf(hoMatrix<T>& A, char uplo) ...");
-        return false;
-    }
-    return true;
-}
-
-template<typename T> 
-bool EigenAnalysis_syev_heev(hoMatrix<T>& A, hoMatrix<typename realType<T>::Type>& eigenValue)
-{
-    try
-    {
-        long long M = (long long)A.rows();
-        GADGET_CHECK_RETURN_FALSE(A.cols() == M);
-
-        if ( (eigenValue.rows()!=M) || (eigenValue.cols()!=1) )
-        {
-            GADGET_CHECK_RETURN_FALSE(eigenValue.createMatrix(M, 1));
-        }
-
-        MKL_INT info;
-        char jobz = 'V';
-        char uplo = 'L';
-        T* pA = A.begin();
-        typename realType<T>::Type* pEV = eigenValue.begin();
-
-        if ( typeid(T)==typeid(float) )
-        {
-            info = LAPACKE_ssyev(LAPACK_COL_MAJOR, jobz, uplo, M, reinterpret_cast<float*>(pA), M, reinterpret_cast<float*>(pEV));
-        }
-        else if ( typeid(T)==typeid(double) )
-        {
-            info = LAPACKE_dsyev(LAPACK_COL_MAJOR, jobz, uplo, M, reinterpret_cast<double*>(pA), M, reinterpret_cast<double*>(pEV));
-        }
-        else if ( typeid(T)==typeid(GT_Complex8) )
-        {
-            info = LAPACKE_cheev(LAPACK_COL_MAJOR, jobz, uplo, M, reinterpret_cast<MKL_Complex8*>(pA), M, reinterpret_cast<float*>(pEV));
-        }
-        else if ( typeid(T)==typeid(GT_Complex16) )
-        {
-            info = LAPACKE_zheev(LAPACK_COL_MAJOR, jobz, uplo, M, reinterpret_cast<MKL_Complex16*>(pA), M, reinterpret_cast<double*>(pEV));
-        }
-        else
-        {
-            GADGET_ERROR_MSG("EigenAnalysis_syev_heev : unsupported type " << typeid(T).name());
-            return false;
-        }
-
-        /*long long lwork;
-        lwork = M*M;
-
-        if ( typeid(T)==typeid(float) )
-        {
-            hoNDArray<float> work(M, M);
-            ssyev(&jobz, &uplo, &M, reinterpret_cast<float*>(pA), &M, reinterpret_cast<float*>(pEV), work.begin(), &lwork, &info);
-        }
-        else if ( typeid(T)==typeid(double) )
-        {
-            hoNDArray<double> work(M, M);
-            dsyev(&jobz, &uplo, &M, reinterpret_cast<double*>(pA), &M, reinterpret_cast<double*>(pEV), work.begin(), &lwork, &info);
-        }
-        else if ( typeid(T)==typeid(GT_Complex8) )
-        {
-            hoNDArray<GT_Complex8> work(M, M);
-            hoNDArray<float> rwork(3*M);
-            cheev(&jobz, &uplo, &M, reinterpret_cast<MKL_Complex8*>(pA), &M, reinterpret_cast<float*>(pEV), reinterpret_cast<MKL_Complex8*>(work.begin()), &lwork, rwork.begin(), &info);
-        }
-        else if ( typeid(T)==typeid(GT_Complex16) )
-        {
-            hoNDArray<GT_Complex16> work(M, M);
-            hoNDArray<double> rwork(3*M);
-            zheev(&jobz, &uplo, &M, reinterpret_cast<MKL_Complex16*>(pA), &M, reinterpret_cast<double*>(pEV), reinterpret_cast<MKL_Complex16*>(work.begin()), &lwork, rwork.begin(), &info);
-        }
-        else
-        {
-            GADGET_ERROR_MSG("EigenAnalysis_syev_heev : unsupported type " << typeid(T).name());
-            return false;
-        }*/
-
-        GADGET_CHECK_RETURN_FALSE(info==0);
-    }
-    catch (...)
-    {
-        GADGET_ERROR_MSG("Errors in EigenAnalysis_syev_heev(hoMatrix<T>& A, hoMatrix<typename realType<T>::Type>& eigenValue) ... ");
-        return false;
-    }
-    return true;
-}
-
-template<typename T> 
-bool EigenAnalysis_syev_heev2(hoMatrix<T>& A, hoMatrix<T>& eigenValue)
-{
-    try
-    {
-        long long M = (long long)A.rows();
-        GADGET_CHECK_RETURN_FALSE(A.cols() == M);
-
-        if ( (eigenValue.rows()!=M) || (eigenValue.cols()!=1) )
-        {
-            GADGET_CHECK_RETURN_FALSE(eigenValue.createMatrix(M, 1));
-        }
-
-        hoMatrix<typename realType<T>::Type> D(M, 1);
-        GADGET_CHECK_RETURN_FALSE(EigenAnalysis_syev_heev(A, D));
-        //GADGET_CHECK_RETURN_FALSE(eigenValue.copyFrom(D));
-        eigenValue.copyFrom(D);
-    }
-    catch (...)
-    {
-        GADGET_ERROR_MSG("Errors in EigenAnalysis_syev_heev2(hoMatrix<T>& A, hoMatrix<T>& eigenValue) ... ");
-        return false;
-    }
-    return true;
-}
-
-template<typename T> 
-bool SymmetricHermitianPositiveDefiniteInverse_potri(hoMatrix<T>& A)
-{
-    try
-    {
-        if( A.get_number_of_elements()==0 ) return true;
-        GADGET_CHECK_RETURN_FALSE(A.rows()==A.cols());
-
-        MKL_INT info;
-        char uplo = 'L';
-        lapack_int n = (lapack_int)A.rows();
-        T* pA = A.begin();
-        lapack_int lda = (lapack_int)A.rows();
-
-        if ( typeid(T)==typeid(float) )
-        {
-            info = LAPACKE_spotrf(LAPACK_COL_MAJOR, uplo, n, reinterpret_cast<float*>(pA), lda);
-            GADGET_CHECK_RETURN_FALSE(info==0);
-
-            info = LAPACKE_spotri(LAPACK_COL_MAJOR, uplo, n, reinterpret_cast<float*>(pA), lda);
-            GADGET_CHECK_RETURN_FALSE(info==0);
-        }
-        else if ( typeid(T)==typeid(double) )
-        {
-            info = LAPACKE_dpotrf(LAPACK_COL_MAJOR, uplo, n, reinterpret_cast<double*>(pA), lda);
-            GADGET_CHECK_RETURN_FALSE(info==0);
-
-            info = LAPACKE_dpotri(LAPACK_COL_MAJOR, uplo, n, reinterpret_cast<double*>(pA), lda);
-            GADGET_CHECK_RETURN_FALSE(info==0);
-        }
-        else if ( typeid(T)==typeid(GT_Complex8) )
-        {
-            info = LAPACKE_cpotrf(LAPACK_COL_MAJOR, uplo, n, reinterpret_cast<MKL_Complex8*>(pA), lda);
-            GADGET_CHECK_RETURN_FALSE(info==0);
-
-            info = LAPACKE_cpotri(LAPACK_COL_MAJOR, uplo, n, reinterpret_cast<MKL_Complex8*>(pA), lda);
-            GADGET_CHECK_RETURN_FALSE(info==0);
-        }
-        else if ( typeid(T)==typeid(GT_Complex16) )
-        {
-            info = LAPACKE_zpotrf(LAPACK_COL_MAJOR, uplo, n, reinterpret_cast<MKL_Complex16*>(pA), lda);
-            GADGET_CHECK_RETURN_FALSE(info==0);
-
-            info = LAPACKE_zpotri(LAPACK_COL_MAJOR, uplo, n, reinterpret_cast<MKL_Complex16*>(pA), lda);
-            GADGET_CHECK_RETURN_FALSE(info==0);
-        }
-        else
-        {
-            GADGET_ERROR_MSG("SymmetricHermitianPositiveDefiniteInverse_potri : unsupported type " << typeid(T).name());
-            return false;
-        }
-    }
-    catch(...)
-    {
-        GADGET_ERROR_MSG("Errors in SymmetricHermitianPositiveDefiniteInverse_potri(hoMatrix<T>& A) ...");
-        return false;
-    }
-    return true;
-}
-
-template<typename T> 
-bool TriangularInverse_trtri(hoMatrix<T>& A, char uplo)
-{
-    try
-    {
-        if( A.get_number_of_elements()==0 ) return true;
-        GADGET_CHECK_RETURN_FALSE(A.rows()==A.cols());
-
-        MKL_INT info;
-        char diag = 'N';
-        lapack_int n = (lapack_int)A.rows();
-        T* pA = A.begin();
-        lapack_int lda = (lapack_int)A.rows();
-
-        if ( typeid(T)==typeid(float) )
-        {
-            info = LAPACKE_strtri(LAPACK_COL_MAJOR, uplo, diag, n, reinterpret_cast<float*>(pA), lda);
-        }
-        else if ( typeid(T)==typeid(double) )
-        {
-            info = LAPACKE_dtrtri(LAPACK_COL_MAJOR, uplo, diag, n, reinterpret_cast<double*>(pA), lda);
-        }
-        else if ( typeid(T)==typeid(GT_Complex8) )
-        {
-            info = LAPACKE_ctrtri(LAPACK_COL_MAJOR, uplo, diag, n, reinterpret_cast<MKL_Complex8*>(pA), lda);
-        }
-        else if ( typeid(T)==typeid(GT_Complex16) )
-        {
-            info = LAPACKE_ztrtri(LAPACK_COL_MAJOR, uplo, diag, n, reinterpret_cast<MKL_Complex16*>(pA), lda);
-        }
-        else
-        {
-            GADGET_ERROR_MSG("TriangularInverse_trtri : unsupported type " << typeid(T).name());
-            return false;
-        }
-
-        GADGET_CHECK_RETURN_FALSE(info==0);
-    }
-    catch(...)
-    {
-        GADGET_ERROR_MSG("Errors in TriangularInverse_trtri(hoMatrix<float>& A, char uplo) ...");
-        return false;
-    }
-    return true;
-}
-
-template<typename T> 
-bool SymmetricHermitianPositiveDefiniteLinearSystem_posv(hoMatrix<T>& A, hoMatrix<T>& b)
-{
-    try
-    {
-        if( A.get_number_of_elements()==0 ) return true;
-        if( b.get_number_of_elements()==0 ) return true;
-        GADGET_CHECK_RETURN_FALSE(A.rows()==b.rows());
-
-        MKL_INT info;
-        char uplo = 'L';
-        lapack_int n = (lapack_int)A.rows();
-        lapack_int nrhs = (lapack_int)b.cols();
-        T* pA = A.begin();
-        lapack_int lda = (lapack_int)A.rows();
-        T* pB = b.begin();
-        lapack_int ldb = (lapack_int)b.rows();
-
-        if ( typeid(T)==typeid(float) )
-        {
-            info = LAPACKE_sposv(LAPACK_COL_MAJOR, uplo, n, nrhs, reinterpret_cast<float*>(pA), lda, reinterpret_cast<float*>(pB), ldb);
-        }
-        else if ( typeid(T)==typeid(double) )
-        {
-            info = LAPACKE_dposv(LAPACK_COL_MAJOR, uplo, n, nrhs, reinterpret_cast<double*>(pA), lda, reinterpret_cast<double*>(pB), ldb);
-        }
-        else if ( typeid(T)==typeid(GT_Complex8) )
-        {
-            info = LAPACKE_cposv(LAPACK_COL_MAJOR, uplo, n, nrhs, reinterpret_cast<MKL_Complex8*>(pA), lda, reinterpret_cast<MKL_Complex8*>(pB), ldb);
-        }
-        else if ( typeid(T)==typeid(GT_Complex16) )
-        {
-            info = LAPACKE_zposv(LAPACK_COL_MAJOR, uplo, n, nrhs, reinterpret_cast<MKL_Complex16*>(pA), lda, reinterpret_cast<MKL_Complex16*>(pB), ldb);
-        }
-        else
-        {
-            GADGET_ERROR_MSG("SymmetricHermitianPositiveDefiniteLinearSystem_posv : unsupported type " << typeid(T).name());
-            return false;
-        }
-
-        GADGET_CHECK_RETURN_FALSE(info==0);
-    }
-    catch(...)
-    {
-        GADGET_ERROR_MSG("Errors in SymmetricHermitianPositiveDefiniteLinearSystem_posv(hoMatrix<float>& A, hoMatrix<float>& b) ...");
-        return false;
-    }
-    return true;
-}
-
-template<typename T> 
-bool SolveLinearSystem_Tikhonov(hoMatrix<T>& A, hoMatrix<T>& b, hoMatrix<T>& x, double lamda)
-{
-    GADGET_CHECK_RETURN_FALSE(b.rows()==A.rows());
-
-    hoMatrix<T> AHA(A.cols(), A.cols());
-    GADGET_CHECK_RETURN_FALSE(GeneralMatrixProduct_gemm(AHA, A, true, A, false));
-
-    GADGET_CHECK_RETURN_FALSE(x.createMatrix(A.cols(), b.cols()));
-    GADGET_CHECK_RETURN_FALSE(GeneralMatrixProduct_gemm(x, A, true, b, false));
-
-    // apply the Tikhonov regularization
-    // Ideally, we shall apply the regularization is lamda*maxEigenValue
-    // However, computing the maximal eigenvalue is computational intensive
-    // A natural alternative is to use the trace of AHA matrix, which is the sum of all eigen values
-    // Since all eigen values are positive, the lamda*maxEigenValue is only ~10-20% different from lamda*sum(all eigenValues)
-    // for more information, refer to:
-    // Tikhonov A.N., Goncharsky A.V., Stepanov V.V., Yagola A.G., 1995, 
-    // Numerical Methods for the Solution of Ill-Posed Problems, Kluwer Academic Publishers.
-
-    size_t col = AHA.cols();
-    size_t c;
-
-    double trA = std::abs(AHA(0, 0));
-    for ( c=1; c<col; c++ )
-    {
-        trA += std::abs(AHA(c, c));
-    }
-
-    double value = trA*lamda/col;
-    for ( c=0; c<col; c++ )
-    {
-        AHA(c,c) = std::abs(AHA(c, c)) + value;
-    }
-
-    GADGET_CHECK_RETURN_FALSE(SymmetricHermitianPositiveDefiniteLinearSystem_posv(AHA, x));
-
-    return true;
-}
-
-#endif // USE_MKL
-
 }
diff --git a/toolboxes/core/cpu/hoMatrix.h b/toolboxes/core/cpu/hoMatrix.h
index 98ec73a..3983a29 100644
--- a/toolboxes/core/cpu/hoMatrix.h
+++ b/toolboxes/core/cpu/hoMatrix.h
@@ -1,21 +1,9 @@
 #pragma once
 
+#include "cpucore_export.h"
 #include "ho2DArray.h"
 #include "complext.h"
-
-#ifdef USE_MKL
-    #include "mkl.h"
-#endif // USE_MKL
-
-#ifdef GT_Complex8
-    #undef GT_Complex8
-#endif // GT_Complex8
-typedef std::complex<float> GT_Complex8;
-
-#ifdef GT_Complex16
-    #undef GT_Complex16
-#endif // GT_Complex16
-typedef std::complex<double> GT_Complex16; 
+#include <algorithm>
 
 namespace Gadgetron{
 
@@ -50,6 +38,10 @@ public:
     bool upperTri(const T& v);
     bool lowerTri(const T& v);
 
+    // copy upper triangle to the lower
+    bool copyUpperTriToLower();
+    bool copyLowerTriToUpper();
+
     // sum along row or col
     bool sumOverRow(hoNDArray<T>& res) const;
     bool sumOverCol(hoNDArray<T>& res) const;
@@ -57,6 +49,12 @@ public:
     // get the sub matrix
     bool subMatrix(Self& res, size_t startR, size_t endR, size_t startC, size_t endC) const;
 
+    // set the matrix to be identity
+    bool setIdentity();
+
+    // normalize the matrix, so the L2 norm of matrix is 1
+    bool normalize();
+
     bool operator == (const Self& m) const;
     bool operator != (const Self& m) const;
 
@@ -70,8 +68,41 @@ protected:
     using BaseClass::elements_;
     using BaseClass::delete_data_on_destruct_;
     using BaseClass::accesser_;
+    
+};
+
+/// for real matrix
+template <class T> class hoMatrixReal : public hoMatrix<T>
+{
+public:
+
+    typedef hoMatrixReal<T> Self;
+    typedef hoMatrix<T> BaseClass;
+
+    hoMatrixReal();
+    hoMatrixReal(size_t rows, size_t cols);
+    hoMatrixReal(size_t rows, size_t cols, T* data, bool delete_data_on_destruct = false);
+
+    virtual ~hoMatrixReal();
+
+    hoMatrixReal(const hoMatrixReal<T>& a);
+
+    /// sort along the row direction (sort along the 1st dimension)
+    bool sort_ascending_along_row();
+
+    /// sort along the column direction (sort along the 2nd dimension)
+    bool sort_ascending_along_column();
+
+protected:
+
+    using BaseClass::dimensions_;
+    using BaseClass::offsetFactors_;
+    using BaseClass::data_;
+    using BaseClass::elements_;
+    using BaseClass::delete_data_on_destruct_;
+    using BaseClass::accesser_;
 };
 
 }
 
-#include <hoMatrix.cpp>
+#include <hoMatrix.hxx>
diff --git a/toolboxes/core/cpu/hoMatrix.hxx b/toolboxes/core/cpu/hoMatrix.hxx
index ddd0c85..8806206 100644
--- a/toolboxes/core/cpu/hoMatrix.hxx
+++ b/toolboxes/core/cpu/hoMatrix.hxx
@@ -1,25 +1,26 @@
+
 namespace Gadgetron
 {
 
 template <typename T> 
-hoMatrix<T>::hoMatrix() : BaseClass()
+hoMatrix<T>::hoMatrix() : BaseClass(1, 1)
 {
 }
 
 template <typename T> 
-hoMatrix<T>::hoMatrix(unsigned int rows, unsigned int cols) : BaseClass(cols, rows)
+hoMatrix<T>::hoMatrix(size_t rows, size_t cols) : BaseClass(rows, cols)
 {
     this->fill(T(0));
 }
 
 template <typename T> 
-hoMatrix<T>::hoMatrix(unsigned int rows, unsigned int cols, T* data, bool delete_data_on_destruct)
+hoMatrix<T>::hoMatrix(size_t rows, size_t cols, T* data, bool delete_data_on_destruct)
 {
-    std::vector<unsigned int> dim(2);
-    dim[0] = sx;
-    dim[1] = sy;
-    this->create(dimensions,data,delete_data_on_destruct);
-    GADGET_CHECK_THROW(init_accesser());
+    std::vector<size_t> dim(2);
+    dim[0] = rows;
+    dim[1] = cols;
+    this->create(&dim,data,delete_data_on_destruct);
+    GADGET_CHECK_THROW(this->init_accesser());
 }
 
 template <typename T> 
@@ -29,35 +30,56 @@ hoMatrix<T>::~hoMatrix()
 }
 
 template <typename T> 
-bool hoMatrix<T>::createMatrix(unsigned int rows, unsigned int cols)
+hoMatrix<T>::hoMatrix(const hoMatrix<T>& a) : BaseClass(a)
+{
+}
+
+template <typename T> 
+hoMatrix<T>& hoMatrix<T>::operator=(const hoMatrix& rhs)
+{
+    if ( this == &rhs ) return *this;
+    BaseClass::operator=(rhs);
+    return *this;
+}
+
+template <typename T> 
+bool hoMatrix<T>::createMatrix(size_t rows, size_t cols)
+{
+    return this->createArray(rows, cols);
+}
+
+template <typename T> 
+bool hoMatrix<T>::createMatrix(size_t rows, size_t cols, T* data, bool delete_data_on_destruct)
 {
-    return this->createArray(cols, rows);
+    return this->createArray(rows, cols, data, delete_data_on_destruct);
 }
 
 template <typename T> 
 inline T& hoMatrix<T>::operator()(size_t r, size_t c)
 {
-    GADGET_DEBUG_CHECK_THROW(c<(*dimensions_)[0] && r<(*dimensions_)[1]);
-    return accesser_[r][c];
+    GADGET_DEBUG_CHECK_THROW(c>=0 && r>=0 && r<(*dimensions_)[0] && c<(*dimensions_)[1]);
+    return accesser_[c][r];
 }
 
 template <typename T> 
 inline const T& hoMatrix<T>::operator()(size_t r, size_t c) const
 {
-    GADGET_DEBUG_CHECK_THROW(c<(*dimensions_)[0] && r<(*dimensions_)[1]);
-    return accesser_[r][c];
+    GADGET_DEBUG_CHECK_THROW(c>=0 && r>=0 && c<(*dimensions_)[0] && r<(*dimensions_)[1]);
+    return accesser_[c][r];
 }
 
 template <typename T> 
-inline unsigned int hoMatrix<T>::rows() const
+inline size_t hoMatrix<T>::rows() const
 {
-    return (*dimensions_)[1];
+    if ( dimensions_->empty() ) return 0;
+    return (*dimensions_)[0];
 }
 
 template <typename T> 
-inline unsigned int hoMatrix<T>::cols() const
+inline size_t hoMatrix<T>::cols() const
 {
-    return (*dimensions_)[0];
+    if ( dimensions_->empty() ) return 0;
+    return (*dimensions_)[1];
 }
 
 template <typename T> 
@@ -65,15 +87,12 @@ bool hoMatrix<T>::upperTri(const T& v)
 {
     try
     {
-        unsigned int r, c;
-        for (r=0; r<(*dimensions_)[1]; r++)
+        size_t r, c;
+        for (r=0; r<(*dimensions_)[0]; r++)
         {
-            for (c=0; c<(*dimensions_)[0]; c++)
+            for (c=r+1; c<(*dimensions_)[1]; c++)
             {
-                if ( c > r )
-                {
-                    (*this)(r, c) = v;
-                }
+                (*this)(r, c) = v;
             }
         }
     }
@@ -90,15 +109,12 @@ bool hoMatrix<T>::lowerTri(const T& v)
 {
     try
     {
-        unsigned int r, c;
-        for (r=0; r<(*dimensions_)[1]; r++)
+        size_t r, c;
+        for (c=0; c<(*dimensions_)[1]; c++)
         {
-            for (c=0; c<(*dimensions_)[0]; c++)
+            for (r=c+1; r<(*dimensions_)[0]; r++)
             {
-                if ( r > c )
-                {
-                    (*this)(r, c) = v;
-                }
+                (*this)(r, c) = v;
             }
         }
     }
@@ -111,681 +127,614 @@ bool hoMatrix<T>::lowerTri(const T& v)
 }
 
 template <typename T> 
-bool hoMatrix<T>::operator == (const Self& m) const
+bool hoMatrix<T>::copyUpperTriToLower()
 {
-    GADGET_CHECK_RETURN_FALSE(this->dimensions_equal(&m));
-    for ( size_t i=0; i<elements_; i++ )
-    { 
-        if (std::abs(data_[i]-m.data_[i])>DBL_EPSILON) 
+    try
+    {
+        GADGET_CHECK_RETURN_FALSE((*dimensions_)[0]==(*dimensions_)[1]);
+
+        size_t r, c;
+        for (r=0; r<(*dimensions_)[0]; r++)
         {
-            return false;
+            for (c=r+1; c<(*dimensions_)[1]; c++)
+            {
+                (*this)(c, r)= (*this)(r, c);
+            }
         }
     }
-    return true;
-}
-
-template <typename T> 
-bool hoMatrix<T>::operator != (const Self& m) const
-{
-    return !(*this==m);
-}
-
-template <typename T> 
-void hoMatrix<T>::print(std::ostream& os) const
-{
-    using namespace std;
-    os.unsetf(std::ios::scientific);
-
-    os << "hoMatrix : " << (*dimensions_)[1] << " " << (*dimensions_)[0] << " : " << std::string(typeid(T).name()) << endl;
-    unsigned int r, c;
-    for (r=0; r<(*dimensions_)[1]; r++) 
+    catch (...)
     {
-        os << "r " << r << ":\t";
-        for (c=0; c<(*dimensions_)[0]; c++)
-        {
-            os << setprecision(16) << (*this)(r,c) << "\t";
-        }
-        os << endl; 
+        GADGET_ERROR_MSG("Errors in hoMatrix<T>::copyUpperTriToLower() ... ");
+        return false;
     }
+    return true;
 }
 
 template <typename T> 
-bool copyL2U(hoMatrix<T>& A)
+bool hoMatrix<T>::copyLowerTriToUpper()
 {
     try
     {
-        GADGET_CHECK_RETURN_FALSE(A.rows()==A.cols());
-
-        unsigned int R = A.rows();
-        unsigned int C = A.cols();
+        GADGET_CHECK_RETURN_FALSE((*dimensions_)[0]==(*dimensions_)[1]);
 
-        unsigned int row, col;
-        for(row=0; row<R; row++) 
+        size_t r, c;
+        for (c=0; c<(*dimensions_)[1]; c++)
         {
-            for(col=0; col<row; col++ )
+            for (r=c+1; r<(*dimensions_)[0]; r++)
             {
-                A(col, row) = A(row, col);
+                (*this)(c, r)= (*this)(r, c);
             }
         }
     }
-    catch(...)
+    catch (...)
     {
-        GADGET_ERROR_MSG("Errors in copyL2U(hoMatrix<T>& A) ... ");
+        GADGET_ERROR_MSG("Errors in hoMatrix<T>::copyUpperTriToLower() ... ");
         return false;
     }
     return true;
 }
 
 template <typename T> 
-bool copyL2U(hoMatrix<T>& A, bool conj)
+bool hoMatrix<T>::sumOverRow(hoNDArray<T>& res) const
 {
     try
     {
-        GADGET_CHECK_RETURN_FALSE(A.rows()==A.cols());
+        size_t ROW = rows();
+        size_t COL = cols();
 
-        unsigned int R = A.rows();
-        unsigned int row, col;
+        if ( res.get_number_of_elements() != ROW )
+        {
+            res.create(ROW);
+        }
 
-        if ( conj )
+        T* pRes = res.begin();
+
+        size_t r, c;
+
+        for ( r=0; r<ROW; r++ )
         {
-            for(row=0; row<R; row++) 
-            {
-                for(col=0; col<row; col++ )
-                {
-                    A(col, row) = std::conj(A(row, col));
-                }
-            }
+            pRes[r] = 0;
         }
-        else
+
+        for ( c=0; c<COL; c++ )
         {
-            for(row=0; row<R; row++) 
+            for ( r=0; r<ROW; r++ )
             {
-                for(col=0; col<row; col++ )
-                {
-                    A(col, row) = A(row, col);
-                }
+                // res(r) += (*this)(r, c);
+                pRes[r] += this->data_[r+c*ROW];
             }
         }
     }
-    catch(...)
+    catch (...)
     {
-        GADGET_ERROR_MSG("Errors in copyL2U(hoMatrix<T>& A, bool conj) ... ");
+        GADGET_ERROR_MSG("Errors in hoMatrix<T>::sumOverRow(hoNDArray<T>& r) ... ");
         return false;
     }
+
     return true;
 }
 
 template <typename T> 
-bool copyU2L(hoMatrix<T>& A)
+bool hoMatrix<T>::sumOverCol(hoNDArray<T>& res) const
 {
     try
     {
-        GADGET_CHECK_RETURN_FALSE(A.rows()==A.cols());
+        size_t ROW = rows();
+        size_t COL = cols();
 
-        unsigned int R = A.rows();
-        unsigned int C = A.cols();
+        if ( res.get_number_of_elements() != COL )
+        {
+            res.create(COL);
+        }
 
-        unsigned int row, col;
-        for(row=0; row<R; row++) 
+        T* pRes = res.begin();
+
+        size_t r;
+        long long c;
+
+        for ( c=0; c<(long long)COL; c++ )
         {
-            for(col=row+1; col<C; col++ )
+            pRes[c] = 0;
+        }
+
+        //for ( r=0; r<ROW; r++ )
+        //{
+        //    for ( c=0; c<COL; c++ )
+        //    {
+        //        // res(c) += (*this)(r, c);
+        //        pRes[c] += this->data_[r+c*ROW];
+        //    }
+        //}
+
+        T* pCurr = NULL;
+        T v(0);
+        // #pragma omp parallel for default(none) private(c, r) shared(COL, ROW, pRes) if ( COL > 16 )
+        for ( c=0; c<(long long)COL; c++ )
+        {
+            v = 0;
+            pCurr = this->data_ + c*ROW;
+            for ( r=0; r<ROW; r++ )
             {
-                A(col, row) = A(row, col);
+                v += pCurr[r];
             }
+            pRes[c] = v;
         }
+
+        //size_t r, c;
+        //for ( c=0; c<COL; c++ )
+        //{
+        //    T v = (*this)(0, c);
+        //    for ( r=1; r<ROW; r++ )
+        //    {
+        //        v += (*this)(r, c);
+        //        //v += this->data_[r+c*ROW];
+        //    }
+        //    res(c) = v;
+        //}
     }
-    catch(...)
+    catch (...)
     {
-        GADGET_ERROR_MSG("Errors in copyU2L(hoMatrix<T>& A) ... ");
+        GADGET_ERROR_MSG("Errors in hoMatrix<T>::sumOverCol(hoNDArray<T>& r) ... ");
         return false;
     }
+
     return true;
 }
 
 template <typename T> 
-bool copyU2L(hoMatrix<T>& A, bool conj)
+bool hoMatrix<T>::subMatrix(Self& res, size_t startR, size_t endR, size_t startC, size_t endC) const
 {
     try
     {
-        GADGET_CHECK_RETURN_FALSE(A.rows()==A.cols());
+        size_t ROW = rows();
+        size_t COL = cols();
 
-        unsigned int R = A.rows();
-        unsigned int C = A.cols();
+        GADGET_CHECK_RETURN_FALSE(startR<ROW);
+        GADGET_CHECK_RETURN_FALSE(startC<COL);
+        GADGET_CHECK_RETURN_FALSE(endR<ROW);
+        GADGET_CHECK_RETURN_FALSE(endC<COL);
+        GADGET_CHECK_RETURN_FALSE(endR>=startR);
+        GADGET_CHECK_RETURN_FALSE(endC>=startC);
 
-        unsigned int row, col;
+        GADGET_CHECK_RETURN_FALSE(res.createMatrix(endR-startR+1, endC-startC+1));
 
-        if ( conj )
+        size_t r, c;
+        for ( r=startR; r<=endR; r++ )
         {
-            for(row=0; row<R; row++) 
+            for ( c=startC; c<=endC; c++ )
             {
-                for(col=row+1; col<C; col++ )
-                {
-                    A(col, row) = std::conj(A(row, col));
-                }
-            }
-        }
-        else
-        {
-            for(row=0; row<R; row++) 
-            {
-                for(col=row+1; col<C; col++ )
-                {
-                    A(col, row) = A(row, col);
-                }
+                res(r-startR, c-startC) = (*this)(r, c);
             }
         }
     }
-    catch(...)
+    catch (...)
     {
-        GADGET_ERROR_MSG("Errors in copyU2L(hoMatrix<T>& A, bool conj) ... ");
+        GADGET_ERROR_MSG("Errors in hoMatrix<T>::subMatrix(Self& res, size_t startR, size_t endR, size_t startC, size_t endC) ... ");
         return false;
     }
+
     return true;
 }
 
 template <typename T> 
-bool trans(const hoMatrix<T>& A, hoMatrix<T>& AT)
+bool hoMatrix<T>::setIdentity()
 {
     try
     {
-        if ( A.get_number_of_elements() == 0 ) return true;
+        size_t ROW = this->rows();
+        size_t COL = this->cols();
 
-        if ( !AT.dimensions_equal(&A) )
-        {
-            AT.createMatrix(A.rows(), A.cols());
-        }
+        size_t N = GT_MIN(ROW, COL);
+
+        this->fill(T(0));
 
-        int r, c;
-        #pragma omp parallel for default(none) private(r, c) shared(A, AT)
-        for ( c=0; c<(int)A.cols(); c++ )
+        size_t r;
+        for ( r=0; r<N; r++ )
         {
-            for ( r=0; r<(int)A.rows(); r++ )
-            {
-                AT(c,r) = A(r,c);
-            }
+            (*this)(r, r) = T(1.0);
         }
     }
     catch (...)
     {
-        GADGET_ERROR_MSG("Errors in trans(const hoMatrix<T>& A, hoMatrix<T>& AT) ... ");
+        GADGET_ERROR_MSG("Errors in hoMatrix<T>::setIdentity() ... ");
         return false;
     }
+
     return true;
 }
 
 template <typename T> 
-bool conjugatetrans(const hoMatrix<T>& A, hoMatrix<T>& AH)
+bool hoMatrix<T>::normalize()
 {
     try
     {
-        if ( A.get_number_of_elements() == 0 ) return true;
+        T dist = std::abs(this->data_[0]);
+        dist *= dist;
 
-        if ( !AH.dimensions_equal(&A) )
+        unsigned int ii;
+        for ( ii=1; ii<this->element_; ii++ )
         {
-            AH.createMatrix(A.rows(), A.cols());
+            T v = std::abs(this->data_[ii]);
+            dist += v*v;
         }
 
-        int r, c;
-        #pragma omp parallel for default(none) private(r, c) shared(A, AH)
-        for ( c=0; c<(int)A.cols(); c++ )
+        dist = std::sqrt(dist);
+
+        if ( std::abs(dist) < DBL_EPSILON ) return false;
+
+        for ( ii=0; ii<this->element_; ii++ )
         {
-            for ( r=0; r<(int)A.rows(); r++ )
-            {
-                AH(c,r) = std::conj(A(r,c));
-            }
+            this->data_[ii] /= dist;
         }
     }
     catch (...)
     {
-        GADGET_ERROR_MSG("Errors in conjugatetrans(const hoMatrix<T>& A, hoMatrix<T>& AH) ... ");
+        GADGET_ERROR_MSG("Errors in hoMatrix<T>::normalize() ... ");
         return false;
     }
+
     return true;
 }
 
-// following matrix computation calls MKL functions
-#ifdef USE_MKL
-
-template<typename T> 
-bool GeneralMatrixProduct_gemm(hoMatrix<T>& C, 
-                            const hoMatrix<T>& A, bool transA, 
-                            const hoMatrix<T>& B, bool transB)
+template <typename T> 
+bool hoMatrix<T>::operator == (const Self& m) const
 {
-    try
-    {
-        CBLAS_TRANSPOSE TA, TB;
-
-        MKL_INT lda = A.cols();
-        MKL_INT ldb = B.cols();
-        const T* pA = A.begin(); 
-        const T* pB = B.begin(); 
-
-        MKL_INT M = A.rows();
-        MKL_INT K = A.cols();
-        if ( transA )
-        { 
-            M = A.cols();
-            K = A.rows();
+    GADGET_CHECK_RETURN_FALSE(this->dimensions_equal(&m));
+    for ( size_t i=0; i<elements_; i++ )
+    { 
+        if (std::abs(data_[i]-m.data_[i])>DBL_EPSILON)
+        {
+            return false;
         }
+    }
+    return true;
+}
 
-        MKL_INT N = B.cols();
-        MKL_INT K2 = B.rows();
-        if ( transB )
-        { 
-            N = B.rows();
-            K2 = B.cols();
-        }
+template <typename T> 
+bool hoMatrix<T>::operator != (const Self& m) const
+{
+    return !(*this==m);
+}
+
+template <typename T> 
+void hoMatrix<T>::print(std::ostream& os) const
+{
+    using namespace std;
+    os.unsetf(std::ios::scientific);
 
-        GADGET_CHECK_RETURN_FALSE(K==K2);
-        if ( (C.rows()!=M) || (C.cols()!=N) )
+    os << "hoMatrix (row X col): " << this->rows() << " X " << this->cols() << " : " << std::string(typeid(T).name()) << endl;
+    size_t r, c;
+    for (r=0; r<(*dimensions_)[0]; r++) 
+    {
+        os << "r " << r << ":\t";
+        for (c=0; c<(*dimensions_)[1]; c++)
         {
-            GADGET_CHECK_RETURN_FALSE(C.createMatrix(M, N));
+            os << setprecision(10) << (*this)(r,c) << "\t";
         }
+        os << endl; 
+    }
+}
 
-        T* pC = C.begin();
-        MKL_INT ldc = C.cols();
+// --------------------------------------------------------------------------------------------------------
 
-        T alpha(1), beta(0);
+template <typename T> 
+hoMatrixReal<T>::hoMatrixReal() : BaseClass()
+{
+}
 
-        if ( typeid(T)==typeid(float) )
-        {
-            if ( transA )
-            {
-                TA = CblasTrans;
-            }
-            else
-            {
-                TA = CblasNoTrans;
-            }
+template <typename T> 
+hoMatrixReal<T>::hoMatrixReal(size_t rows, size_t cols) : BaseClass(rows, cols)
+{
+}
 
-            if ( transB )
-            {
-                TB = CblasTrans;
-            }
-            else
-            {
-                TB = CblasNoTrans;
-            }
+template <typename T> 
+hoMatrixReal<T>::hoMatrixReal(size_t rows, size_t cols, T* data, bool delete_data_on_destruct) : BaseClass(rows, cols, delete_data_on_destruct)
+{
+}
 
-            if ( &A != &C )
-            {
-                cblas_sgemm(CblasRowMajor, TA, TB, M, N, K, 1, reinterpret_cast<const float*>(pA), lda, reinterpret_cast<const float*>(pB), ldb, 0, reinterpret_cast<float*>(pC), ldc);
-            }
-            else
-            {
-                hoMatrix<T> aTmp(A);
-                T* pATmp = aTmp.begin();
-                cblas_sgemm(CblasRowMajor, TransA, TransB, M, N, K, 1, reinterpret_cast<const float*>(pATmp), lda, reinterpret_cast<const float*>(pB), ldb, 0, reinterpret_cast<float*>(pC), ldc);
-            }
-        }
-        else if ( typeid(T)==typeid(double) )
-        {
-            if ( transA )
-            {
-                TA = CblasTrans;
-            }
-            else
-            {
-                TA = CblasNoTrans;
-            }
+template <typename T> 
+hoMatrixReal<T>::~hoMatrixReal()
+{
+}
 
-            if ( transB )
-            {
-                TB = CblasTrans;
-            }
-            else
-            {
-                TB = CblasNoTrans;
-            }
+template <typename T> 
+hoMatrixReal<T>::hoMatrixReal(const hoMatrixReal<T>& a) : BaseClass(a)
+{
+}
 
-            if ( &A != &C )
-            {
-                cblas_dgemm(CblasRowMajor, TA, TB, M, N, K, 1, reinterpret_cast<const double*>(pA), lda, reinterpret_cast<const double*>(pB), ldb, 0, reinterpret_cast<double*>(pC), ldc);
-            }
-            else
-            {
-                hoMatrix<T> aTmp(A);
-                T* pATmp = aTmp.begin();
-                cblas_dgemm(CblasRowMajor, TransA, TransB, M, N, K, 1, reinterpret_cast<const double*>(pATmp), lda, reinterpret_cast<const double*>(pB), ldb, 0, reinterpret_cast<double*>(pC), ldc);
-            }
-        }
-        else if ( typeid(T)==typeid(GT_Complex8) )
+template <typename T> 
+bool hoMatrixReal<T>::sort_ascending_along_row()
+{
+    try
+    {
+        size_t R = this->rows();
+        size_t C = this->cols();
+
+        size_t col;
+        for(col=0; col<C; col++) 
         {
-            if ( transA )
-            {
-                TA = CblasConjTrans;
-            }
-            else
-            {
-                TA = CblasNoTrans;
-            }
+            std::sort(data_+col*R, data_+(col+1)*R);
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Errors in hoMatrixReal<T>::sort_ascending_along_row() ... ");
+        return false;
+    }
+    return true;
+}
 
-            if ( transB )
-            {
-                TB = CblasConjTrans;
-            }
-            else
-            {
-                TB = CblasNoTrans;
-            }
+template <typename T> 
+bool hoMatrixReal<T>::sort_ascending_along_column()
+{
+    try
+    {
+        size_t R = this->rows();
+        size_t C = this->cols();
 
-            if ( &A != &C )
-            {
-                cblas_cgemm(CblasRowMajor, TA, TB, M, N, K, &alpha, pA, lda, pB, ldb, &beta, pC, ldc);
-            }
-            else
-            {
-                hoMatrix<T> aTmp(A);
-                T* pATmp = aTmp.begin();
-                cblas_cgemm(CblasRowMajor, TransA, TransB, M, N, K, &alpha, pATmp, lda, pB, ldb, &beta, pC, ldc);
-            }
-        }
-        else if ( typeid(T)==typeid(GT_Complex16) )
+        std::vector<T> buf(C);
+
+        size_t col, row;
+        for(row=0; row<R; row++) 
         {
-            if ( transA )
+            for(col=0; col<C; col++)
             {
-                TA = CblasConjTrans;
-            }
-            else
-            {
-                TA = CblasNoTrans;
+                buf[col] = data_[row + col*R];
             }
 
-            if ( transB )
-            {
-                TB = CblasConjTrans;
-            }
-            else
-            {
-                TB = CblasNoTrans;
-            }
+            std::sort(buf.begin(), buf.end());
 
-            if ( &A != &C )
+            for(col=0; col<C; col++)
             {
-                cblas_zgemm(CblasRowMajor, TA, TB, M, N, K, &alpha, pA, lda, pB, ldb, &beta, pC, ldc);
-            }
-            else
-            {
-                hoMatrix<T> aTmp(A);
-                T* pATmp = aTmp.begin();
-                cblas_zgemm(CblasRowMajor, TransA, TransB, M, N, K, &alpha, pATmp, lda, pB, ldb, &beta, pC, ldc);
+                data_[row + col*R] = buf[col];
             }
         }
-        else
-        {
-            GADGET_ERROR_MSG("GeneralMatrixProduct_gemm : unsupported type " << typeid(T));
-            return false;
-        }
     }
     catch(...)
     {
-        GADGET_ERROR_MSG("Errors in GeneralMatrixProduct_gemm(hoMatrix<T>& C, const hoMatrix<T>& A, bool transA, const hoMatrix<T>& B, bool transB) ...");
+        GADGET_ERROR_MSG("Errors in hoMatrixReal<T>::sort_ascending_along_column() ... ");
         return false;
     }
     return true;
 }
 
-template<typename T> 
-bool CholeskyHermitianPositiveDefinite_potrf(hoMatrix<T>& A, char uplo)
+// --------------------------------------------------------------------------------------------------------
+
+template <typename T> 
+bool copyL2U(hoMatrix<T>& A)
 {
     try
     {
-        if( A.get_number_of_elements()==0 ) return true;
         GADGET_CHECK_RETURN_FALSE(A.rows()==A.cols());
 
-        int info;
-        lapack_int n = (lapack_int)(A.rows());
-        T* pA = A.begin();
-        lapack_int lda = (lapack_int)(A.cols());
+        size_t R = A.rows();
+        size_t C = A.cols();
 
-        if ( typeid(T)==typeid(float) )
-        {
-            info = LAPACKE_spotrf(LAPACK_ROW_MAJOR, uplo, n, reinterpret_cast<float*>(pA), lda);
-        }
-        else if ( typeid(T)==typeid(double) )
-        {
-            info = LAPACKE_dpotrf(LAPACK_ROW_MAJOR, uplo, n, reinterpret_cast<double*>(pA), lda);
-        }
-        else if ( typeid(T)==typeid(GT_Complex8) )
-        {
-            info = LAPACKE_cpotrf(LAPACK_ROW_MAJOR, uplo, n, reinterpret_cast<MKL_Complex8*>(pA), lda);
-        }
-        else if ( typeid(T)==typeid(GT_Complex16) )
-        {
-            info = LAPACKE_zpotrf(LAPACK_ROW_MAJOR, uplo, n, reinterpret_cast<MKL_Complex16*>(pA), lda);
-        }
-        else
-        {
-            GADGET_ERROR_MSG("CholeskyHermitianPositiveDefinite_potrf : unsupported type " << typeid(T));
-            return false;
-        }
-
-        GADGET_CHECK_RETURN_FALSE(info==0);
-
-        if ( uplo == 'U' )
-        {
-            GADGET_CHECK_RETURN_FALSE(A.lowerTri(0));
-        }
-        else
+        size_t row, col;
+        for(row=0; row<R; row++) 
         {
-            GADGET_CHECK_RETURN_FALSE(A.upperTri(0));
+            for(col=0; col<row; col++ )
+            {
+                A(col, row) = A(row, col);
+            }
         }
     }
     catch(...)
     {
-        GADGET_ERROR_MSG("Errors in CholeskyHermitianPositiveDefinite_potrf(hoMatrix<T>& A, char uplo) ...");
+        GADGET_ERROR_MSG("Errors in copyL2U(hoMatrix<T>& A) ... ");
         return false;
     }
     return true;
 }
 
-template<typename T> 
-bool EigenAnalysis_syev_heev(hoMatrix<T>& A, hoMatrix<typename realType<T>::Type>& eigenValue)
+template <typename T> 
+bool copyL2U(hoMatrix<T>& A, bool conj)
 {
     try
     {
-        int M = (int)A.rows();
-        GADGET_CHECK_RETURN_FALSE(A.cols() == M));
-
-        if ( (eigenValue.rows()!=M) || (eigenValue.cols()!=1) )
-        {
-            GADGET_CHECK_RETURN_FALSE(D.createMatrix(M, 1));
-        }
+        GADGET_CHECK_RETURN_FALSE(A.rows()==A.cols());
 
-        int info;
-        char jobz = 'V';
-        char uplo = 'L';
-        T* pA = A.begin();
-        typename realType<T>::Type* pEV = eigenValue.begin();
+        size_t R = A.rows();
+        size_t row, col;
 
-        if ( typeid(T)==typeid(float) )
-        {
-            info = LAPACKE_ssyev(LAPACK_ROW_MAJOR, jobz, uplo, M, reinterpret_cast<float*>(pA), M, reinterpret_cast<float*>(pEV));
-        }
-        else if ( typeid(T)==typeid(double) )
-        {
-            info = LAPACKE_dsyev(LAPACK_ROW_MAJOR, jobz, uplo, M, reinterpret_cast<double*>(pA), M, reinterpret_cast<double*>(pEV));
-        }
-        else if ( typeid(T)==typeid(GT_Complex8) )
-        {
-            info = LAPACKE_cheev(LAPACK_ROW_MAJOR, jobz, uplo, M, reinterpret_cast<MKL_Complex8*>(pA), M, reinterpret_cast<float*>(pEV));
-        }
-        else if ( typeid(T)==typeid(GT_Complex16) )
+        if ( conj )
         {
-            info = LAPACKE_zheev(LAPACK_ROW_MAJOR, jobz, uplo, M, reinterpret_cast<MKL_Complex16*>(pA), M, reinterpret_cast<double*>(pEV));
+            for(row=0; row<R; row++) 
+            {
+                for(col=0; col<row; col++ )
+                {
+                    A(col, row) = std::conj(A(row, col));
+                }
+            }
         }
         else
         {
-            GADGET_ERROR_MSG("EigenAnalysis_syev_heev : unsupported type " << typeid(T));
-            return false;
+            for(row=0; row<R; row++) 
+            {
+                for(col=0; col<row; col++ )
+                {
+                    A(col, row) = A(row, col);
+                }
+            }
         }
-
-        GADGET_CHECK_RETURN_FALSE(info==0);
     }
-    catch (...)
+    catch(...)
     {
-        GADGET_ERROR_MSG("Errors in EigenAnalysis_syev_heev(hoMatrix<T>& A, hoMatrix<typename realType<T>::Type>& eigenValue) ... ");
+        GADGET_ERROR_MSG("Errors in copyL2U(hoMatrix<T>& A, bool conj) ... ");
         return false;
     }
     return true;
 }
 
-template<typename T> 
-bool SymmetricHermitianPositiveDefiniteInverse_potri(hoMatrix<T>& A)
+template <typename T> 
+bool copyU2L(hoMatrix<T>& A)
 {
     try
     {
-        if( A.get_number_of_elements()==0 ) return true;
         GADGET_CHECK_RETURN_FALSE(A.rows()==A.cols());
 
-        int info;
-        char uplo = 'L';
-        lapack_int n = (lapack_int)A.rows();
-        T* pA = A.begin();
-        lapack_int lda = (lapack_int)A.cols();
-
-        if ( typeid(T)==typeid(float) )
-        {
-            info = LAPACKE_spotrf(LAPACK_ROW_MAJOR, uplo, n, reinterpret_cast<float*>(pA), lda);
-            GADGET_CHECK_RETURN_FALSE(info==0);
-
-            info = LAPACKE_spotri(LAPACK_ROW_MAJOR, uplo, n, reinterpret_cast<float*>(pA), lda);
-            GADGET_CHECK_RETURN_FALSE(info==0);
-        }
-        else if ( typeid(T)==typeid(double) )
-        {
-            info = LAPACKE_dpotrf(LAPACK_ROW_MAJOR, uplo, n, reinterpret_cast<double*>(pA), lda);
-            GADGET_CHECK_RETURN_FALSE(info==0);
+        size_t R = A.rows();
+        size_t C = A.cols();
 
-            info = LAPACKE_dpotri(LAPACK_ROW_MAJOR, uplo, n, reinterpret_cast<double*>(pA), lda);
-            GADGET_CHECK_RETURN_FALSE(info==0);
-        }
-        else if ( typeid(T)==typeid(GT_Complex8) )
-        {
-            info = LAPACKE_cpotrf(LAPACK_ROW_MAJOR, uplo, n, reinterpret_cast<MKL_Complex8*>(pA), lda);
-            GADGET_CHECK_RETURN_FALSE(info==0);
-
-            info = LAPACKE_cpotri(LAPACK_ROW_MAJOR, uplo, n, reinterpret_cast<MKL_Complex8*>(pA), lda);
-            GADGET_CHECK_RETURN_FALSE(info==0);
-        }
-        else if ( typeid(T)==typeid(GT_Complex16) )
-        {
-            info = LAPACKE_zpotrf(LAPACK_ROW_MAJOR, uplo, n, reinterpret_cast<MKL_Complex16*>(pA), lda);
-            GADGET_CHECK_RETURN_FALSE(info==0);
-
-            info = LAPACKE_zpotri(LAPACK_ROW_MAJOR, uplo, n, reinterpret_cast<MKL_Complex16*>(pA), lda);
-            GADGET_CHECK_RETURN_FALSE(info==0);
-        }
-        else
+        size_t row, col;
+        for(row=0; row<R; row++) 
         {
-            GADGET_ERROR_MSG("SymmetricHermitianPositiveDefiniteInverse_potri : unsupported type " << typeid(T));
-            return false;
+            for(col=row+1; col<C; col++ )
+            {
+                A(col, row) = A(row, col);
+            }
         }
     }
     catch(...)
     {
-        GADGET_ERROR_MSG("Errors in SymmetricHermitianPositiveDefiniteInverse_potri(hoMatrix<T>& A) ...");
+        GADGET_ERROR_MSG("Errors in copyU2L(hoMatrix<T>& A) ... ");
         return false;
     }
     return true;
 }
 
-template<typename T> 
-bool TriangularInverse_trtri(hoMatrix<T>& A, char uplo)
+template <typename T> 
+bool copyU2L(hoMatrix<T>& A, bool conj)
 {
     try
     {
-        if( A.get_number_of_elements()==0 ) return true;
         GADGET_CHECK_RETURN_FALSE(A.rows()==A.cols());
 
-        int info;
-        char diag = 'N';
-        lapack_int n = (lapack_int)A.rows();
-        T* pA = A.begin();
-        lapack_int lda = (lapack_int)A.cols();
+        size_t R = A.rows();
+        size_t C = A.cols();
 
-        if ( typeid(T)==typeid(float) )
-        {
-            info = LAPACKE_strtri(LAPACK_ROW_MAJOR, uplo, diag, n, reinterpret_cast<float*>(pA), lda);
-        }
-        else if ( typeid(T)==typeid(double) )
-        {
-            info = LAPACKE_dtrtri(LAPACK_ROW_MAJOR, uplo, diag, n, reinterpret_cast<double*>(pA), lda);
-        }
-        else if ( typeid(T)==typeid(GT_Complex8) )
-        {
-            info = LAPACKE_ctrtri(LAPACK_ROW_MAJOR, uplo, diag, n, reinterpret_cast<MKL_Complex8*>(pA), lda);
-        }
-        else if ( typeid(T)==typeid(GT_Complex16) )
+        size_t row, col;
+
+        if ( conj )
         {
-            info = LAPACKE_ztrtri(LAPACK_ROW_MAJOR, uplo, diag, n, reinterpret_cast<MKL_Complex16*>(pA), lda);
+            for(row=0; row<R; row++) 
+            {
+                for(col=row+1; col<C; col++ )
+                {
+                    A(col, row) = std::conj(A(row, col));
+                }
+            }
         }
         else
         {
-            GADGET_ERROR_MSG("TriangularInverse_trtri : unsupported type " << typeid(T));
-            return false;
+            for(row=0; row<R; row++) 
+            {
+                for(col=row+1; col<C; col++ )
+                {
+                    A(col, row) = A(row, col);
+                }
+            }
         }
-
-        GADGET_CHECK_RETURN_FALSE(info==0);
     }
     catch(...)
     {
-        GADGET_ERROR_MSG("Errors in TriangularInverse_trtri(hoMatrix<float>& A, char uplo) ...");
+        GADGET_ERROR_MSG("Errors in copyU2L(hoMatrix<T>& A, bool conj) ... ");
         return false;
     }
     return true;
 }
 
-template<typename T> 
-bool SymmetricHermitianPositiveDefiniteLinearSystem_posv(hoMatrix<T>& A, hoMatrix<T>& b)
+template <typename T> 
+bool trans(const hoMatrix<T>& A, hoMatrix<T>& AT)
 {
     try
     {
-        if( A.get_number_of_elements()==0 ) return true;
-        if( b.get_number_of_elements()==0 ) return true;
-        GADGET_CHECK_RETURN_FALSE(A.rows()==b.rows());
-
-        int info;
-        char uplo = 'L';
-        lapack_int n = (lapack_int)A.rows();
-        lapack_int nrhs = (lapack_int)b.cols();
-        T* pA = A.begin();
-        lapack_int lda = (lapack_int)A.cols();
-        T* pB = b.begin();
-        lapack_int ldb = (lapack_int)b.cols();
+        if ( A.get_number_of_elements() == 0 ) return true;
 
-        if ( typeid(T)==typeid(float) )
+        if ( AT.rows()!=A.cols() || AT.cols()!=A.rows() )
         {
-            info = LAPACKE_sposv(LAPACK_ROW_MAJOR, uplo, n, nrhs, reinterpret_cast<float*>(pA), lda, reinterpret_cast<float*>(pB), ldb);
+            AT.createMatrix(A.cols(), A.rows());
         }
-        else if ( typeid(T)==typeid(double) )
-        {
-            info = LAPACKE_dposv(LAPACK_ROW_MAJOR, uplo, n, nrhs, reinterpret_cast<double*>(pA), lda, reinterpret_cast<double*>(pB), ldb);
-        }
-        else if ( typeid(T)==typeid(GT_Complex8) )
+
+        long long r, c;
+        #ifdef GCC_OLD_FLAG
+            #pragma omp parallel for default(none) private(r, c)
+        #else
+            #pragma omp parallel for default(none) private(r, c) shared(A, AT)
+        #endif
+        for ( c=0; c<(long long)A.cols(); c++ )
         {
-            info = LAPACKE_cposv(LAPACK_ROW_MAJOR, uplo, n, nrhs, reinterpret_cast<MKL_Complex8*>(pA), lda, reinterpret_cast<MKL_Complex8*>(pB), ldb);
+            for ( r=0; r<(long long)A.rows(); r++ )
+            {
+                AT(c,r) = A(r,c);
+            }
         }
-        else if ( typeid(T)==typeid(GT_Complex16) )
+    }
+    catch (...)
+    {
+        GADGET_ERROR_MSG("Errors in trans(const hoMatrix<T>& A, hoMatrix<T>& AT) ... ");
+        return false;
+    }
+    return true;
+}
+
+template <typename T> 
+bool conjugatetrans(const hoMatrix<T>& A, hoMatrix<T>& AH)
+{
+    try
+    {
+        if ( A.get_number_of_elements() == 0 ) return true;
+
+        if ( AH.rows()!=A.cols() || AH.cols()!=A.rows() )
         {
-            info = LAPACKE_zposv(LAPACK_ROW_MAJOR, uplo, n, nrhs, reinterpret_cast<MKL_Complex16*>(pA), lda, reinterpret_cast<MKL_Complex16*>(pB), ldb);
+            AH.createMatrix(A.cols(), A.rows());
         }
-        else
+
+        long long r, c;
+        #ifdef GCC_OLD_FLAG
+            #pragma omp parallel for default(none) private(r, c)
+        #else
+            #pragma omp parallel for default(none) private(r, c) shared(A, AH)
+        #endif
+        for ( c=0; c<(long long)A.cols(); c++ )
         {
-            GADGET_ERROR_MSG("SymmetricHermitianPositiveDefiniteLinearSystem_posv : unsupported type " << typeid(T));
-            return false;
+            for ( r=0; r<(long long)A.rows(); r++ )
+            {
+                AH(c,r) = std::conj(A(r,c));
+            }
         }
+    }
+    catch (...)
+    {
+        GADGET_ERROR_MSG("Errors in conjugatetrans(const hoMatrix<T>& A, hoMatrix<T>& AH) ... ");
+        return false;
+    }
+    return true;
+}
 
-        GADGET_CHECK_RETURN_FALSE(info==0);
+inline bool conjugatetrans(const hoMatrix<float>& A, hoMatrix<float>& AH)
+{
+    return trans(A, AH);
+}
+
+inline bool conjugatetrans(const hoMatrix<double>& A, hoMatrix<double>& AH)
+{
+    return trans(A, AH);
+}
+
+// C = A*B
+EXPORTCPUCORE bool GeneralMatrixProduct(hoNDArray<float>& C, const hoNDArray<float>& A, bool transA, const hoNDArray<float>& B, bool transB);
+EXPORTCPUCORE bool GeneralMatrixProduct(hoNDArray<double>& C, const hoNDArray<double>& A, bool transA, const hoNDArray<double>& B, bool transB);
+EXPORTCPUCORE bool GeneralMatrixProduct(hoNDArray< std::complex<float> >& C, const hoNDArray< std::complex<float> >& A, bool transA, const hoNDArray< std::complex<float> >& B, bool transB);
+EXPORTCPUCORE bool GeneralMatrixProduct(hoNDArray< std::complex<double> >& C, const hoNDArray< std::complex<double> >& A, bool transA, const hoNDArray< std::complex<double> >& B, bool transB);
+
+template<typename T> 
+bool GeneralMatrixProduct(hoMatrix<T>& C, const hoMatrix<T>& A, bool transA, const hoMatrix<T>& B, bool transB)
+{
+    try
+    {
+        hoNDArray<T> mC(C.get_dimensions(), C.begin(), false);
+        hoNDArray<T> mA(A.get_dimensions(), const_cast<T*>(A.begin()), false);
+        hoNDArray<T> mB(B.get_dimensions(), const_cast<T*>(B.begin()), false);
+
+        Gadgetron::GeneralMatrixProduct(mC, mA, transA, mB, transB);
     }
     catch(...)
     {
-        GADGET_ERROR_MSG("Errors in SymmetricHermitianPositiveDefiniteLinearSystem_posv(hoMatrix<float>& A, hoMatrix<float>& b) ...");
+        GADGET_ERROR_MSG("Errors in GeneralMatrixProduct(hoMatrix<T>& C, const hoMatrix<T>& A, bool transA, const hoMatrix<T>& B, bool transB) ...");
         return false;
     }
     return true;
 }
 
-#endif // USE_MKL
-
 }
diff --git a/toolboxes/core/cpu/hoNDArray.h b/toolboxes/core/cpu/hoNDArray.h
index 8c033f5..ab1f512 100644
--- a/toolboxes/core/cpu/hoNDArray.h
+++ b/toolboxes/core/cpu/hoNDArray.h
@@ -8,7 +8,6 @@
 #include "complext.h"
 #include "vector_td.h"
 #include "GadgetronCommon.h"
-#include "SerializableObject.h"
 
 #include "cpucore_export.h"
 
@@ -17,29 +16,27 @@
 #include <boost/shared_ptr.hpp>
 #include <stdexcept>
 
-#ifdef USE_MKL
-#include "mkl.h"
-#endif
-
 namespace Gadgetron{
 
-  template <typename T> class hoNDArray : public NDArray<T>, public SerializableObject
+  template <typename T> class hoNDArray : public NDArray<T>
   {
   public:
 
     typedef NDArray<T> BaseClass;
+    typedef float coord_type;
+    typedef T value_type;
 
     hoNDArray();
 
-    hoNDArray(std::vector<size_t> &dimensions);
-    hoNDArray(std::vector<size_t> *dimensions);
-    hoNDArray(boost::shared_ptr< std::vector<size_t> > dimensions);
+    explicit hoNDArray(std::vector<size_t> &dimensions);
+    explicit hoNDArray(std::vector<size_t> *dimensions);
+    explicit hoNDArray(boost::shared_ptr< std::vector<size_t> > dimensions);
 
     hoNDArray(std::vector<size_t> *dimensions, T* data, bool delete_data_on_destruct = false);
     hoNDArray(std::vector<size_t> &dimensions, T* data, bool delete_data_on_destruct = false);
     hoNDArray(boost::shared_ptr< std::vector<size_t> > dimensions, T* data, bool delete_data_on_destruct = false);
 
-    hoNDArray(size_t len);
+    explicit hoNDArray(size_t len);
     hoNDArray(size_t sx, size_t sy);
     hoNDArray(size_t sx, size_t sy, size_t sz);
     hoNDArray(size_t sx, size_t sy, size_t sz, size_t st);
@@ -61,7 +58,7 @@ namespace Gadgetron{
 
     // Copy constructors
     hoNDArray(const hoNDArray<T> &a);
-    hoNDArray(const hoNDArray<T> *a);
+    explicit hoNDArray(const hoNDArray<T> *a);
 
     // Assignment operator
     hoNDArray& operator=(const hoNDArray& rhs);
@@ -114,17 +111,22 @@ namespace Gadgetron{
     //const T& operator()( const std::vector<size_t>& ind ) const;
 
     template<typename T2> 
-      bool copyFrom(const hoNDArray<T2>& aArray) // Should be a void function
+    bool copyFrom(const hoNDArray<T2>& aArray) // Should be a void function
     {
-      if ( !this->dimensions_equal(&aArray) ){
+      if ( !this->dimensions_equal(&aArray) )
+      {
         this->create(aArray.get_dimensions());
-      }      
-      for ( size_t i=0; i<elements_; i++ ){
+      }
+
+      long long i;
+      #pragma omp parallel for default(none) private(i) shared(aArray)
+      for ( i=0; i<(long long)elements_; i++ )
+      {
         data_[i] = static_cast<T>(aArray(i));
       }
       return true;
     }
-  
+
     void get_sub_array(const std::vector<size_t>& start, std::vector<size_t>& size, hoNDArray<T>& out);
 
     virtual void print(std::ostream& os) const;
diff --git a/toolboxes/core/cpu/hoNDArray.hxx b/toolboxes/core/cpu/hoNDArray.hxx
index 51a525e..726c96a 100644
--- a/toolboxes/core/cpu/hoNDArray.hxx
+++ b/toolboxes/core/cpu/hoNDArray.hxx
@@ -1,980 +1,973 @@
 // This file is not to be included by anyone else than hoNDArray.h
-// Contains the "private" implementation of the container·
+// Contains the "private" implementation of the container
 //
 
 namespace Gadgetron
 {
-  template <typename T> 
-  hoNDArray<T>::hoNDArray() : NDArray<T>::NDArray() 
-  {
-  }
-
-  template <typename T> 
-  hoNDArray<T>::hoNDArray(std::vector<size_t> *dimensions) : NDArray<T>::NDArray()
-  {
-    this->create(dimensions);
-  }
-
-  template <typename T> 
-  hoNDArray<T>::hoNDArray(std::vector<size_t> &dimensions) : NDArray<T>::NDArray()
-  {
-    this->create(dimensions);
-  }
-
-  template <typename T> 
-  hoNDArray<T>::hoNDArray(boost::shared_ptr< std::vector<size_t> > dimensions) : NDArray<T>::NDArray()
-  {
-    this->create(dimensions);
-  }
-
-  template <typename T> 
-  hoNDArray<T>::hoNDArray(size_t len) : NDArray<T>::NDArray()
-  {
-    std::vector<size_t> dim(1);
-    dim[0] = len;
-    this->create(dim);
-  }
-
-  template <typename T> 
-  hoNDArray<T>::hoNDArray(size_t sx, size_t sy) : NDArray<T>::NDArray()
-  {
-    std::vector<size_t> dim(2);
-    dim[0] = sx;
-    dim[1] = sy;
-    this->create(dim);
-  }
-
-  template <typename T> 
-  hoNDArray<T>::hoNDArray(size_t sx, size_t sy, size_t sz) : NDArray<T>::NDArray()
-  {
-    std::vector<size_t> dim(3);
-    dim[0] = sx;
-    dim[1] = sy;
-    dim[2] = sz;
-    this->create(dim);
-  }
-
-  template <typename T> 
-  hoNDArray<T>::hoNDArray(size_t sx, size_t sy, size_t sz, size_t st) : NDArray<T>::NDArray()
-  {
-    std::vector<size_t> dim(4);
-    dim[0] = sx;
-    dim[1] = sy;
-    dim[2] = sz;
-    dim[3] = st;
-    this->create(dim);
-  }
-
-  template <typename T> 
-  hoNDArray<T>::hoNDArray(size_t sx, size_t sy, size_t sz, size_t st, size_t sp) : NDArray<T>::NDArray()
-  {
-    std::vector<size_t> dim(5);
-    dim[0] = sx;
-    dim[1] = sy;
-    dim[2] = sz;
-    dim[3] = st;
-    dim[4] = sp;
-    this->create(dim);
-  }
-
-  template <typename T> 
-  hoNDArray<T>::hoNDArray(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq) : NDArray<T>::NDArray()
-  {
-    std::vector<size_t> dim(6);
-    dim[0] = sx;
-    dim[1] = sy;
-    dim[2] = sz;
-    dim[3] = st;
-    dim[4] = sp;
-    dim[5] = sq;
-    this->create(dim);
-  }
-
-  template <typename T> 
-  hoNDArray<T>::hoNDArray(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr) : NDArray<T>::NDArray()
-  {
-    std::vector<size_t> dim(7);
-    dim[0] = sx;
-    dim[1] = sy;
-    dim[2] = sz;
-    dim[3] = st;
-    dim[4] = sp;
-    dim[5] = sq;
-    dim[6] = sr;
-    this->create(dim);
-  }
-
-  template <typename T> 
-  hoNDArray<T>::hoNDArray(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, size_t ss) : NDArray<T>::NDArray()
-  {
-    std::vector<size_t> dim(8);
-    dim[0] = sx;
-    dim[1] = sy;
-    dim[2] = sz;
-    dim[3] = st;
-    dim[4] = sp;
-    dim[5] = sq;
-    dim[6] = sr;
-    dim[7] = ss;
-    this->create(dim);
-  }
-
-  template <typename T> 
-  hoNDArray<T>::hoNDArray(std::vector<size_t> *dimensions, T* data, bool delete_data_on_destruct) : NDArray<T>::NDArray()
-  {
-    this->create(dimensions,data,delete_data_on_destruct);
-  }
-
-  template <typename T> 
-  hoNDArray<T>::hoNDArray(std::vector<size_t> &dimensions, T* data, bool delete_data_on_destruct) : NDArray<T>::NDArray()
-  {
-    this->create(dimensions,data,delete_data_on_destruct);
-  }
-
-  template <typename T> 
-  hoNDArray<T>::hoNDArray(boost::shared_ptr< std::vector<size_t> > dimensions, T* data, bool delete_data_on_destruct) : NDArray<T>::NDArray()
-  {
-    this->create(dimensions,data,delete_data_on_destruct);
-  }
-
-  template <typename T> 
-  hoNDArray<T>::hoNDArray(size_t len, T* data, bool delete_data_on_destruct) : NDArray<T>::NDArray()
-  {
-    std::vector<size_t> dim(1);
-    dim[0] = len;
-    this->create(&dim,data,delete_data_on_destruct);
-  }
-
-  template <typename T> 
-  hoNDArray<T>::hoNDArray(size_t sx, size_t sy, T* data, bool delete_data_on_destruct) : NDArray<T>::NDArray()
-  {
-    std::vector<size_t> dim(2);
-    dim[0] = sx;
-    dim[1] = sy;
-    this->create(&dim,data,delete_data_on_destruct);
-  }
-
-  template <typename T> 
-  hoNDArray<T>::hoNDArray(size_t sx, size_t sy, size_t sz, T* data, bool delete_data_on_destruct) : NDArray<T>::NDArray()
-  {
-    std::vector<size_t> dim(3);
-    dim[0] = sx;
-    dim[1] = sy;
-    dim[2] = sz;
-    this->create(&dim,data,delete_data_on_destruct);
-  }
-
-  template <typename T> 
-  hoNDArray<T>::hoNDArray(size_t sx, size_t sy, size_t sz, size_t st, T* data, bool delete_data_on_destruct) : NDArray<T>::NDArray()
-  {
-    std::vector<size_t> dim(4);
-    dim[0] = sx;
-    dim[1] = sy;
-    dim[2] = sz;
-    dim[3] = st;
-    this->create(&dim,data,delete_data_on_destruct);
-  }
-
-  template <typename T> 
-  hoNDArray<T>::hoNDArray(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, T* data, bool delete_data_on_destruct) : NDArray<T>::NDArray()
-  {
-    std::vector<size_t> dim(5);
-    dim[0] = sx;
-    dim[1] = sy;
-    dim[2] = sz;
-    dim[3] = st;
-    dim[4] = sp;
-    this->create(&dim,data,delete_data_on_destruct);
-  }
-
-  template <typename T> 
-  hoNDArray<T>::hoNDArray(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, T* data, bool delete_data_on_destruct) : NDArray<T>::NDArray()
-  {
-    std::vector<size_t> dim(6);
-    dim[0] = sx;
-    dim[1] = sy;
-    dim[2] = sz;
-    dim[3] = st;
-    dim[4] = sp;
-    dim[5] = sq;
-    this->create(&dim,data,delete_data_on_destruct);
-  }
-
-  template <typename T> 
-  hoNDArray<T>::hoNDArray(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, T* data, bool delete_data_on_destruct) : NDArray<T>::NDArray()
-  {
-    std::vector<size_t> dim(7);
-    dim[0] = sx;
-    dim[1] = sy;
-    dim[2] = sz;
-    dim[3] = st;
-    dim[4] = sp;
-    dim[5] = sq;
-    dim[6] = sr;
-    this->create(&dim,data,delete_data_on_destruct);
-  }
-
-  template <typename T> 
-  hoNDArray<T>::hoNDArray(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, size_t ss, T* data, bool delete_data_on_destruct) : NDArray<T>::NDArray()
-  {
-    std::vector<size_t> dim(8);
-    dim[0] = sx;
-    dim[1] = sy;
-    dim[2] = sz;
-    dim[3] = st;
-    dim[4] = sp;
-    dim[5] = sq;
-    dim[6] = sr;
-    dim[7] = ss;
-    this->create(&dim,data,delete_data_on_destruct);
-  }
-
-  template <typename T> 
-  hoNDArray<T>::~hoNDArray()
-  {
-    if (this->delete_data_on_destruct_){
-      deallocate_memory();
-    }
-  }
-
-  template <typename T> 
-  hoNDArray<T>::hoNDArray(const hoNDArray<T>  *a)
-  {
-    if(!a) throw std::runtime_error("hoNDArray<T>::hoNDArray(): 0x0 pointer provided");
-    this->data_ = 0;
-    this->dimensions_ = a->dimensions_;
-    this->offsetFactors_ = a->offsetFactors_;
-    allocate_memory();
-    memcpy( this->data_, a->data_, this->elements_*sizeof(T) );
-  }
-
-  template <typename T> 
-  hoNDArray<T>::hoNDArray(const hoNDArray<T> &a)
-  {
-    this->data_ = 0;
-    this->dimensions_ = a.dimensions_;
-    this->offsetFactors_ = a.offsetFactors_;
-    allocate_memory();
-    memcpy( this->data_, a.data_, this->elements_*sizeof(T) );
-  }
-
-  template <typename T> 
-  hoNDArray<T>& hoNDArray<T>::operator=(const hoNDArray<T>& rhs)
-  {
-    if ( &rhs == this ) return *this;
-
-    if ( rhs.get_number_of_elements() == 0 ){
-      this->clear();
-      return *this;
-    }
-
-    // Are the dimensions the same? Then we can just memcpy
-    if (this->dimensions_equal(&rhs)){
-      memcpy(this->data_, rhs.data_, this->elements_*sizeof(T));
-    }
-    else{
-      if (!this->delete_data_on_destruct_){
-        throw std::runtime_error("Array dimensions mismatch in hoNDArray::operator=. Cannot change dimensions of non-destructable array.");        
-      }
-      deallocate_memory();
-      this->data_ = 0;
-      this->dimensions_ = rhs.dimensions_;
-      this->offsetFactors_ = rhs.offsetFactors_;
-      allocate_memory();
-      memcpy( this->data_, rhs.data_, this->elements_*sizeof(T) );
-    }
-    return *this;
-  }
-
-  template <typename T> 
-  void hoNDArray<T>::create(std::vector<size_t>& dimensions)
-  {
-    if ( this->dimensions_equal(&dimensions) )
-      {
-        return;
-      }
-
-    this->clear();
-    BaseClass::create(dimensions);
-  }
-
-  template <typename T> 
-  void hoNDArray<T>::create(std::vector<size_t> *dimensions)
-  {
-    if ( this->dimensions_equal(dimensions) )
-      {
-        return;
-      }
-    this->clear();
-    BaseClass::create(dimensions);
-  }
-
-  template <typename T> 
-  void hoNDArray<T>::create(boost::shared_ptr< std::vector<size_t> > dimensions)
-  {
-    if ( this->dimensions_equal(dimensions.get()) )
-      {
-        return;
-      }
-    this->clear();
-    BaseClass::create(dimensions);
-  }
-
-  template <typename T> 
-  void hoNDArray<T>::create(std::vector<size_t> *dimensions, T* data, bool delete_data_on_destruct) 
-  {
-    if(!dimensions) throw std::runtime_error("hoNDArray<T>::create(): 0x0 pointer provided");
-    if(!data) throw std::runtime_error("hoNDArray<T>::create(): 0x0 pointer provided");
-
-    if ( this->dimensions_equal(dimensions) )
-      {
-        if ( this->delete_data_on_destruct_ ){
-          this->deallocate_memory();
+    template <typename T> 
+    hoNDArray<T>::hoNDArray() : NDArray<T>::NDArray() 
+    {
+    }
+
+    template <typename T> 
+    hoNDArray<T>::hoNDArray(std::vector<size_t> *dimensions) : NDArray<T>::NDArray()
+    {
+        this->create(dimensions);
+    }
+
+    template <typename T> 
+    hoNDArray<T>::hoNDArray(std::vector<size_t> &dimensions) : NDArray<T>::NDArray()
+    {
+        this->create(dimensions);
+    }
+
+    template <typename T> 
+    hoNDArray<T>::hoNDArray(boost::shared_ptr< std::vector<size_t> > dimensions) : NDArray<T>::NDArray()
+    {
+        this->create(dimensions);
+    }
+
+    template <typename T> 
+    hoNDArray<T>::hoNDArray(size_t len) : NDArray<T>::NDArray()
+    {
+        std::vector<size_t> dim(1);
+        dim[0] = len;
+        this->create(dim);
+    }
+
+    template <typename T> 
+    hoNDArray<T>::hoNDArray(size_t sx, size_t sy) : NDArray<T>::NDArray()
+    {
+        std::vector<size_t> dim(2);
+        dim[0] = sx;
+        dim[1] = sy;
+        this->create(dim);
+    }
+
+    template <typename T> 
+    hoNDArray<T>::hoNDArray(size_t sx, size_t sy, size_t sz) : NDArray<T>::NDArray()
+    {
+        std::vector<size_t> dim(3);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        this->create(dim);
+    }
+
+    template <typename T> 
+    hoNDArray<T>::hoNDArray(size_t sx, size_t sy, size_t sz, size_t st) : NDArray<T>::NDArray()
+    {
+        std::vector<size_t> dim(4);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        dim[3] = st;
+        this->create(dim);
+    }
+
+    template <typename T> 
+    hoNDArray<T>::hoNDArray(size_t sx, size_t sy, size_t sz, size_t st, size_t sp) : NDArray<T>::NDArray()
+    {
+        std::vector<size_t> dim(5);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        dim[3] = st;
+        dim[4] = sp;
+        this->create(dim);
+    }
+
+    template <typename T> 
+    hoNDArray<T>::hoNDArray(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq) : NDArray<T>::NDArray()
+    {
+        std::vector<size_t> dim(6);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        dim[3] = st;
+        dim[4] = sp;
+        dim[5] = sq;
+        this->create(dim);
+    }
+
+    template <typename T> 
+    hoNDArray<T>::hoNDArray(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr) : NDArray<T>::NDArray()
+    {
+        std::vector<size_t> dim(7);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        dim[3] = st;
+        dim[4] = sp;
+        dim[5] = sq;
+        dim[6] = sr;
+        this->create(dim);
+    }
+
+    template <typename T> 
+    hoNDArray<T>::hoNDArray(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, size_t ss) : NDArray<T>::NDArray()
+    {
+        std::vector<size_t> dim(8);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        dim[3] = st;
+        dim[4] = sp;
+        dim[5] = sq;
+        dim[6] = sr;
+        dim[7] = ss;
+        this->create(dim);
+    }
+
+    template <typename T> 
+    hoNDArray<T>::hoNDArray(std::vector<size_t> *dimensions, T* data, bool delete_data_on_destruct) : NDArray<T>::NDArray()
+    {
+        this->create(dimensions,data,delete_data_on_destruct);
+    }
+
+    template <typename T> 
+    hoNDArray<T>::hoNDArray(std::vector<size_t> &dimensions, T* data, bool delete_data_on_destruct) : NDArray<T>::NDArray()
+    {
+        this->create(dimensions,data,delete_data_on_destruct);
+    }
+
+    template <typename T> 
+    hoNDArray<T>::hoNDArray(boost::shared_ptr< std::vector<size_t> > dimensions, T* data, bool delete_data_on_destruct) : NDArray<T>::NDArray()
+    {
+        this->create(dimensions,data,delete_data_on_destruct);
+    }
+
+    template <typename T> 
+    hoNDArray<T>::hoNDArray(size_t len, T* data, bool delete_data_on_destruct) : NDArray<T>::NDArray()
+    {
+        std::vector<size_t> dim(1);
+        dim[0] = len;
+        this->create(&dim,data,delete_data_on_destruct);
+    }
+
+    template <typename T> 
+    hoNDArray<T>::hoNDArray(size_t sx, size_t sy, T* data, bool delete_data_on_destruct) : NDArray<T>::NDArray()
+    {
+        std::vector<size_t> dim(2);
+        dim[0] = sx;
+        dim[1] = sy;
+        this->create(&dim,data,delete_data_on_destruct);
+    }
+
+    template <typename T> 
+    hoNDArray<T>::hoNDArray(size_t sx, size_t sy, size_t sz, T* data, bool delete_data_on_destruct) : NDArray<T>::NDArray()
+    {
+        std::vector<size_t> dim(3);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        this->create(&dim,data,delete_data_on_destruct);
+    }
+
+    template <typename T> 
+    hoNDArray<T>::hoNDArray(size_t sx, size_t sy, size_t sz, size_t st, T* data, bool delete_data_on_destruct) : NDArray<T>::NDArray()
+    {
+        std::vector<size_t> dim(4);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        dim[3] = st;
+        this->create(&dim,data,delete_data_on_destruct);
+    }
+
+    template <typename T> 
+    hoNDArray<T>::hoNDArray(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, T* data, bool delete_data_on_destruct) : NDArray<T>::NDArray()
+    {
+        std::vector<size_t> dim(5);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        dim[3] = st;
+        dim[4] = sp;
+        this->create(&dim,data,delete_data_on_destruct);
+    }
+
+    template <typename T> 
+    hoNDArray<T>::hoNDArray(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, T* data, bool delete_data_on_destruct) : NDArray<T>::NDArray()
+    {
+        std::vector<size_t> dim(6);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        dim[3] = st;
+        dim[4] = sp;
+        dim[5] = sq;
+        this->create(&dim,data,delete_data_on_destruct);
+    }
+
+    template <typename T> 
+    hoNDArray<T>::hoNDArray(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, T* data, bool delete_data_on_destruct) : NDArray<T>::NDArray()
+    {
+        std::vector<size_t> dim(7);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        dim[3] = st;
+        dim[4] = sp;
+        dim[5] = sq;
+        dim[6] = sr;
+        this->create(&dim,data,delete_data_on_destruct);
+    }
+
+    template <typename T> 
+    hoNDArray<T>::hoNDArray(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, size_t ss, T* data, bool delete_data_on_destruct) : NDArray<T>::NDArray()
+    {
+        std::vector<size_t> dim(8);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        dim[3] = st;
+        dim[4] = sp;
+        dim[5] = sq;
+        dim[6] = sr;
+        dim[7] = ss;
+        this->create(&dim,data,delete_data_on_destruct);
+    }
+
+    template <typename T> 
+    hoNDArray<T>::~hoNDArray()
+    {
+        if (this->delete_data_on_destruct_){
+            deallocate_memory();
+        }
+    }
+
+    template <typename T> 
+    hoNDArray<T>::hoNDArray(const hoNDArray<T>  *a)
+    {
+        if(!a) throw std::runtime_error("hoNDArray<T>::hoNDArray(): 0x0 pointer provided");
+        this->data_ = 0;
+
+        std::vector<size_t>* tmp = new std::vector<size_t>;
+        this->dimensions_ = boost::shared_ptr< std::vector<size_t> >(tmp);
+        *(this->dimensions_) = *(a->dimensions_);
+
+        tmp = new std::vector<size_t>;
+        this->offsetFactors_ = boost::shared_ptr< std::vector<size_t> >(tmp);
+        *(this->offsetFactors_) = *(a->offsetFactors_);
+
+        if ( !this->dimensions_->empty() )
+        {
+            allocate_memory();
+            memcpy( this->data_, a->data_, this->elements_*sizeof(T) );
+        }
+        else
+        {
+            this->elements_ = 0;
+        }
+    }
+
+    template <typename T> 
+    hoNDArray<T>::hoNDArray(const hoNDArray<T> &a)
+    {
+        this->data_ = 0;
+
+        std::vector<size_t>* tmp = new std::vector<size_t>;
+        this->dimensions_ = boost::shared_ptr< std::vector<size_t> >(tmp);
+        *(this->dimensions_) = *(a.dimensions_);
+
+        tmp = new std::vector<size_t>;
+        this->offsetFactors_ = boost::shared_ptr< std::vector<size_t> >(tmp);
+        *(this->offsetFactors_) = *(a.offsetFactors_);
+
+        if ( !this->dimensions_->empty() )
+        {
+            allocate_memory();
+            memcpy( this->data_, a.data_, this->elements_*sizeof(T) );
+        }
+        else
+        {
+            this->elements_ = 0;
+        }
+    }
+
+    template <typename T> 
+    hoNDArray<T>& hoNDArray<T>::operator=(const hoNDArray<T>& rhs)
+    {
+        if ( &rhs == this ) return *this;
+
+        if ( rhs.get_number_of_elements() == 0 ){
+            this->clear();
+            return *this;
         }
-        
-        this->data_ = data;
-        this->delete_data_on_destruct_ = delete_data_on_destruct;
-      }
-    else
-      {
-        if ( this->delete_data_on_destruct_ ){
-          this->deallocate_memory();
-          this->data_ = NULL;
+
+        // Are the dimensions the same? Then we can just memcpy
+        if (this->dimensions_equal(&rhs)){
+            memcpy(this->data_, rhs.data_, this->elements_*sizeof(T));
         }
-        
-        BaseClass::create(dimensions, data, delete_data_on_destruct);
-      }
-  }
-
-  template <typename T> 
-  void hoNDArray<T>::create(std::vector<size_t> &dimensions, T* data, bool delete_data_on_destruct) 
-  {
-    if(!data) throw std::runtime_error("hoNDArray<T>::create(): 0x0 pointer provided");
-
-    if ( this->dimensions_equal(&dimensions) )
-      {
-        if ( this->delete_data_on_destruct_ ){
-          this->deallocate_memory();
+        else{
+            deallocate_memory();
+            this->data_ = 0;
+            *(this->dimensions_) = *(rhs.dimensions_);
+            *(this->offsetFactors_) = *(rhs.offsetFactors_);
+            allocate_memory();
+            memcpy( this->data_, rhs.data_, this->elements_*sizeof(T) );
         }
-        
-        this->data_ = data;
-        this->delete_data_on_destruct_ = delete_data_on_destruct;
-      }
-    else
-      {
-        if ( this->delete_data_on_destruct_ ){
-          this->deallocate_memory();
-          this->data_ = NULL;
+        return *this;
+    }
+
+    template <typename T> 
+    void hoNDArray<T>::create(std::vector<size_t>& dimensions)
+    {
+        if ( this->dimensions_equal(&dimensions) )
+        {
+            return;
         }
-        
-        BaseClass::create(dimensions, data, delete_data_on_destruct);
-      }
-  }
-
-  template <typename T> 
-  inline void hoNDArray<T>::create(boost::shared_ptr< std::vector<size_t> > dimensions, T* data, bool delete_data_on_destruct)
-  {
-    this->create(dimensions.get(), data, delete_data_on_destruct);
-  }
-
-  template <typename T> 
-  inline void hoNDArray<T>::create(size_t len)
-  {
-    std::vector<size_t> dim(1);
-    dim[0] = len;
-    this->create(dim);
-  }
-
-  template <typename T> 
-  inline void hoNDArray<T>::create(size_t sx, size_t sy)
-  {
-    std::vector<size_t> dim(2);
-    dim[0] = sx;
-    dim[1] = sy;
-    this->create(dim);
-  }
-
-  template <typename T> 
-  inline void hoNDArray<T>::create(size_t sx, size_t sy, size_t sz)
-  {
-    std::vector<size_t> dim(3);
-    dim[0] = sx;
-    dim[1] = sy;
-    dim[2] = sz;
-    this->create(dim);
-  }
-
-  template <typename T> 
-  inline void hoNDArray<T>::create(size_t sx, size_t sy, size_t sz, size_t st)
-  {
-    std::vector<size_t> dim(4);
-    dim[0] = sx;
-    dim[1] = sy;
-    dim[2] = sz;
-    dim[3] = st;
-    this->create(dim);
-  }
-
-  template <typename T> 
-  inline void hoNDArray<T>::create(size_t sx, size_t sy, size_t sz, size_t st, size_t sp)
-  {
-    std::vector<size_t> dim(5);
-    dim[0] = sx;
-    dim[1] = sy;
-    dim[2] = sz;
-    dim[3] = st;
-    dim[4] = sp;
-    this->create(dim);
-  }
-
-  template <typename T> 
-  inline void hoNDArray<T>::create(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq)
-  {
-    std::vector<size_t> dim(6);
-    dim[0] = sx;
-    dim[1] = sy;
-    dim[2] = sz;
-    dim[3] = st;
-    dim[4] = sp;
-    dim[5] = sq;
-    this->create(dim);
-  }
-
-  template <typename T> 
-  inline void hoNDArray<T>::create(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr)
-  {
-    std::vector<size_t> dim(7);
-    dim[0] = sx;
-    dim[1] = sy;
-    dim[2] = sz;
-    dim[3] = st;
-    dim[4] = sp;
-    dim[5] = sq;
-    dim[6] = sr;
-    this->create(dim);
-  }
-
-  template <typename T> 
-  inline void hoNDArray<T>::create(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, size_t ss)
-  {
-    std::vector<size_t> dim(8);
-    dim[0] = sx;
-    dim[1] = sy;
-    dim[2] = sz;
-    dim[3] = st;
-    dim[4] = sp;
-    dim[5] = sq;
-    dim[6] = sr;
-    dim[7] = ss;
-    this->create(dim);
-  }
-
-  template <typename T> 
-  inline void hoNDArray<T>::create(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, size_t ss, size_t su)
-  {
-    std::vector<size_t> dim(9);
-    dim[0] = sx;
-    dim[1] = sy;
-    dim[2] = sz;
-    dim[3] = st;
-    dim[4] = sp;
-    dim[5] = sq;
-    dim[6] = sr;
-    dim[7] = ss;
-    dim[8] = su;
-    this->create(dim);
-  }
-
-  template <typename T> 
-  inline void hoNDArray<T>::create(size_t len, T* data, bool delete_data_on_destruct)
-  {
-    std::vector<size_t> dim(1);
-    dim[0] = len;
-    this->create(&dim, data, delete_data_on_destruct);
-  }
-
-  template <typename T> 
-  inline void hoNDArray<T>::create(size_t sx, size_t sy, T* data, bool delete_data_on_destruct)
-  {
-    std::vector<size_t> dim(2);
-    dim[0] = sx;
-    dim[1] = sy;
-    this->create(&dim, data, delete_data_on_destruct);
-  }
-
-  template <typename T> 
-  inline void hoNDArray<T>::create(size_t sx, size_t sy, size_t sz, T* data, bool delete_data_on_destruct)
-  {
-    std::vector<size_t> dim(3);
-    dim[0] = sx;
-    dim[1] = sy;
-    dim[2] = sz;
-    this->create(&dim, data, delete_data_on_destruct);
-  }
-
-  template <typename T> 
-  inline void hoNDArray<T>::create(size_t sx, size_t sy, size_t sz, size_t st, T* data, bool delete_data_on_destruct)
-  {
-    std::vector<size_t> dim(4);
-    dim[0] = sx;
-    dim[1] = sy;
-    dim[2] = sz;
-    dim[3] = st;
-    this->create(&dim, data, delete_data_on_destruct);
-  }
-  template <typename T> 
-  inline void hoNDArray<T>::create(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, T* data, bool delete_data_on_destruct)
-  {
-    std::vector<size_t> dim(5);
-    dim[0] = sx;
-    dim[1] = sy;
-    dim[2] = sz;
-    dim[3] = st;
-    dim[4] = sp;
-    this->create(&dim, data, delete_data_on_destruct);
-  }
-
-  template <typename T> 
-  inline void hoNDArray<T>::create(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, T* data, bool delete_data_on_destruct)
-  {
-    std::vector<size_t> dim(6);
-    dim[0] = sx;
-    dim[1] = sy;
-    dim[2] = sz;
-    dim[3] = st;
-    dim[4] = sp;
-    dim[5] = sq;
-    this->create(&dim, data, delete_data_on_destruct);
-  }
-
-  template <typename T> 
-  inline void hoNDArray<T>::create(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, T* data, bool delete_data_on_destruct)
-  {
-    std::vector<size_t> dim(7);
-    dim[0] = sx;
-    dim[1] = sy;
-    dim[2] = sz;
-    dim[3] = st;
-    dim[4] = sp;
-    dim[5] = sq;
-    dim[6] = sr;
-    this->create(&dim, data, delete_data_on_destruct);
-  }
-
-  template <typename T> 
-  inline void hoNDArray<T>::create(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, size_t ss, T* data, bool delete_data_on_destruct)
-  {
-    std::vector<size_t> dim(8);
-    dim[0] = sx;
-    dim[1] = sy;
-    dim[2] = sz;
-    dim[3] = st;
-    dim[4] = sp;
-    dim[5] = sq;
-    dim[6] = sr;
-    dim[7] = ss;
-    this->create(&dim, data, delete_data_on_destruct);
-  }
-
-  template <typename T> 
-  inline void hoNDArray<T>::create(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, size_t ss, size_t su, T* data, bool delete_data_on_destruct)
-  {
-    std::vector<size_t> dim(9);
-    dim[0] = sx;
-    dim[1] = sy;
-    dim[2] = sz;
-    dim[3] = st;
-    dim[4] = sp;
-    dim[5] = sq;
-    dim[6] = sr;
-    dim[7] = ss;
-    dim[8] = su;
-    this->create(&dim, data, delete_data_on_destruct);
-  }
-
-  template <typename T> 
-  void hoNDArray<T>::fill(T value)
-  {
-    std::fill(this->get_data_ptr(), this->get_data_ptr()+this->get_number_of_elements(), value);
-  }
-
-  template <typename T> 
-  inline T* hoNDArray<T>::begin()
-  {
-    return this->data_;
-  }
-
-  template <typename T> 
-  inline const T* hoNDArray<T>::begin() const
-  {
-    return this->data_;
-  }
-
-  template <typename T> 
-  inline T* hoNDArray<T>::end()
-  {
-    return (this->data_+this->elements_);
-  }
-
-  template <typename T> 
-  inline const T* hoNDArray<T>::end() const
-  {
-    return (this->data_+this->elements_);
-  }
-
-  template <typename T> 
-  inline T& hoNDArray<T>::at( size_t idx )
-  {
-    /*if( idx >= this->get_number_of_elements() )
-      {
-      BOOST_THROW_EXCEPTION( runtime_error("hoNDArray::at(): index out of range."));
-      }*/
-    GADGET_DEBUG_CHECK_THROW(idx < this->get_number_of_elements());
-    return this->get_data_ptr()[idx];
-  }
-
-  template <typename T> 
-  inline const T& hoNDArray<T>::at( size_t idx ) const
-  {
-    /*if( idx >= this->get_number_of_elements() )
-      {
-      BOOST_THROW_EXCEPTION( runtime_error("hoNDArray::at(): index out of range."));
-      }*/
-    GADGET_DEBUG_CHECK_THROW(idx < this->get_number_of_elements());
-    return this->get_data_ptr()[idx];
-  }
-
-  template <typename T> 
-  inline T& hoNDArray<T>::operator[]( size_t idx )
-  {
-    /*if( idx >= this->get_number_of_elements() )
-      {
-      BOOST_THROW_EXCEPTION( runtime_error("hoNDArray::operator[]: index out of range."));
-      }*/
-    GADGET_DEBUG_CHECK_THROW(idx < this->get_number_of_elements());
-    return this->get_data_ptr()[idx];
-  }
-
-  //template <typename T> 
-  //inline T& hoNDArray<T>::operator()( size_t idx )
-  //{
-  //    /*if( idx >= this->get_number_of_elements() )
-  //    {
-  //    BOOST_THROW_EXCEPTION( runtime_error("hoNDArray::operator(): index out of range."));
-  //    }*/
-  //    GADGET_DEBUG_CHECK_THROW(idx < this->get_number_of_elements());
-  //    return this->get_data_ptr()[idx];
-  //}
-
-  //template <typename T> 
-  //inline const T& hoNDArray<T>::operator()( size_t idx ) const
-  //{
-  //    /*if( idx >= this->get_number_of_elements() )
-  //    {
-  //    BOOST_THROW_EXCEPTION( runtime_error("hoNDArray::operator(): index out of range."));
-  //    }*/
-  //    GADGET_DEBUG_CHECK_THROW(idx < this->get_number_of_elements());
-  //    return this->get_data_ptr()[idx];
-  //}
-
-  //template <typename T> 
-  //inline T& hoNDArray<T>::operator()( const std::vector<size_t>& ind )
-  //{
-  //    size_t idx = this->calculate_offset(ind);
-  //    /*if( idx >= this->get_number_of_elements() )
-  //    {
-  //    BOOST_THROW_EXCEPTION( runtime_error("hoNDArray::operator(): index out of range."));
-  //    }*/
-  //    GADGET_DEBUG_CHECK_THROW(idx < this->get_number_of_elements());
-  //    return this->get_data_ptr()[idx];
-  //}
-
-  //template <typename T> 
-  //inline const T& hoNDArray<T>::operator()( const std::vector<size_t>& ind ) const
-  //{
-  //    size_t idx = this->calculate_offset(ind);
-  //    /*if( idx >= this->get_number_of_elements() )
-  //    {
-  //    BOOST_THROW_EXCEPTION( runtime_error("hoNDArray::operator(): index out of range."));
-  //    }*/
-  //    GADGET_DEBUG_CHECK_THROW(idx < this->get_number_of_elements());
-  //    return this->get_data_ptr()[idx];
-  //}
-
-  template <typename T> 
-  void hoNDArray<T>::get_sub_array(const std::vector<size_t>& start, std::vector<size_t>& size, hoNDArray<T>& out)
-  {
-    if ( start.size() != size.size() ){
-      BOOST_THROW_EXCEPTION( runtime_error("hoNDArray<>::get_sub_array failed"));
-    }
-
-    if ( start.size() != (*dimensions_).size() ){
-      BOOST_THROW_EXCEPTION( runtime_error("hoNDArray<>::get_sub_array failed"));
-    }
-
-    out.create(&size);
-
-    if ( out.get_number_of_elements() == this->get_number_of_elements() ){
-      out = *this;
-      return;
-    }
-
-    std::vector<size_t> end(start.size());
-
-    size_t ii;
-    for ( ii=0; ii<start.size(); ii++ ){
-      end[ii] = start[ii] + size[ii] - 1;
-      if ( end[ii] >= (*dimensions_)[ii] ){
-        BOOST_THROW_EXCEPTION( runtime_error("hoNDArray<>::get_sub_array failed"));
-      }
-    }
-  }
-
-  template <typename T> 
-  void hoNDArray<T>::print(std::ostream& os) const
-  {
-    using namespace std;
-
-    os.unsetf(std::ios::scientific);
-    os.setf(ios::fixed);
-
-    size_t i;
-
-    os << "--------------Gagdgetron ND Array -------------" << endl;
-    os << "Array dimension is : " << dimensions_->size() << endl;
-
-    os << "Array size is : ";
-    for (i=0; i<dimensions_->size(); i++ ) 
-      os << (*dimensions_)[i] << " "; 
-    os << endl;
-
-    int elemTypeSize = sizeof(T);
-    std::string elemTypeName = std::string(typeid(T).name());
-
-    os << "Array data type is : " << elemTypeName << std::endl;
-    os << "Byte number for each element is : " << elemTypeSize << std::endl;
-    os << "Number of array size in bytes is : ";
-    os << elements_*elemTypeSize << std::endl;
-
-    //os << "-------------------------------------------" << std::endl;
-    //size_t numOfPrints = 20;
-    //if ( this->elements_ < numOfPrints ) numOfPrints = this->elements_;
-    //for (i=0; i<numOfPrints; i++) 
-    //{
-    //    os << i << " = " << (*this)(i) << std::endl;
-    //}
-    //os << "-------------------------------------------" << std::endl;
 
-    os << std::endl;
-  }
+        this->clear();
+        BaseClass::create(dimensions);
+    }
+
+    template <typename T> 
+    void hoNDArray<T>::create(std::vector<size_t> *dimensions)
+    {
+        if ( this->dimensions_equal(dimensions) )
+        {
+            return;
+        }
+        this->clear();
+        BaseClass::create(dimensions);
+    }
+
+    template <typename T> 
+    void hoNDArray<T>::create(boost::shared_ptr< std::vector<size_t> > dimensions)
+    {
+        if ( this->dimensions_equal(dimensions.get()) )
+        {
+            return;
+        }
+        this->clear();
+        BaseClass::create(dimensions);
+    }
+
+    template <typename T> 
+    void hoNDArray<T>::create(std::vector<size_t> *dimensions, T* data, bool delete_data_on_destruct) 
+    {
+        if(!dimensions) throw std::runtime_error("hoNDArray<T>::create(): 0x0 pointer provided");
+        if(!data) throw std::runtime_error("hoNDArray<T>::create(): 0x0 pointer provided");
+
+        if ( this->dimensions_equal(dimensions) )
+        {
+            if ( this->delete_data_on_destruct_ )
+            {
+                this->deallocate_memory();
+            }
+
+            this->data_ = data;
+            this->delete_data_on_destruct_ = delete_data_on_destruct;
+        }
+        else
+        {
+            if ( this->delete_data_on_destruct_ )
+            {
+                this->deallocate_memory();
+                this->data_ = NULL;
+            }
+
+            BaseClass::create(dimensions, data, delete_data_on_destruct);
+        }
+    }
+
+    template <typename T> 
+    void hoNDArray<T>::create(std::vector<size_t> &dimensions, T* data, bool delete_data_on_destruct) 
+    {
+        if(!data) throw std::runtime_error("hoNDArray<T>::create(): 0x0 pointer provided");
+
+        if ( this->dimensions_equal(&dimensions) )
+        {
+            if ( this->delete_data_on_destruct_ )
+            {
+                this->deallocate_memory();
+            }
+
+            this->data_ = data;
+            this->delete_data_on_destruct_ = delete_data_on_destruct;
+        }
+        else
+        {
+            if ( this->delete_data_on_destruct_ )
+            {
+                this->deallocate_memory();
+                this->data_ = NULL;
+            }
+
+            BaseClass::create(dimensions, data, delete_data_on_destruct);
+        }
+    }
+
+    template <typename T> 
+    inline void hoNDArray<T>::create(boost::shared_ptr< std::vector<size_t> > dimensions, T* data, bool delete_data_on_destruct)
+    {
+        this->create(dimensions.get(), data, delete_data_on_destruct);
+    }
+
+    template <typename T> 
+    inline void hoNDArray<T>::create(size_t len)
+    {
+        std::vector<size_t> dim(1);
+        dim[0] = len;
+        this->create(dim);
+    }
+
+    template <typename T> 
+    inline void hoNDArray<T>::create(size_t sx, size_t sy)
+    {
+        std::vector<size_t> dim(2);
+        dim[0] = sx;
+        dim[1] = sy;
+        this->create(dim);
+    }
+
+    template <typename T> 
+    inline void hoNDArray<T>::create(size_t sx, size_t sy, size_t sz)
+    {
+        std::vector<size_t> dim(3);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        this->create(dim);
+    }
+
+    template <typename T> 
+    inline void hoNDArray<T>::create(size_t sx, size_t sy, size_t sz, size_t st)
+    {
+        std::vector<size_t> dim(4);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        dim[3] = st;
+        this->create(dim);
+    }
+
+    template <typename T> 
+    inline void hoNDArray<T>::create(size_t sx, size_t sy, size_t sz, size_t st, size_t sp)
+    {
+        std::vector<size_t> dim(5);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        dim[3] = st;
+        dim[4] = sp;
+        this->create(dim);
+    }
+
+    template <typename T> 
+    inline void hoNDArray<T>::create(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq)
+    {
+        std::vector<size_t> dim(6);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        dim[3] = st;
+        dim[4] = sp;
+        dim[5] = sq;
+        this->create(dim);
+    }
+
+    template <typename T> 
+    inline void hoNDArray<T>::create(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr)
+    {
+        std::vector<size_t> dim(7);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        dim[3] = st;
+        dim[4] = sp;
+        dim[5] = sq;
+        dim[6] = sr;
+        this->create(dim);
+    }
+
+    template <typename T> 
+    inline void hoNDArray<T>::create(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, size_t ss)
+    {
+        std::vector<size_t> dim(8);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        dim[3] = st;
+        dim[4] = sp;
+        dim[5] = sq;
+        dim[6] = sr;
+        dim[7] = ss;
+        this->create(dim);
+    }
+
+    template <typename T> 
+    inline void hoNDArray<T>::create(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, size_t ss, size_t su)
+    {
+        std::vector<size_t> dim(9);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        dim[3] = st;
+        dim[4] = sp;
+        dim[5] = sq;
+        dim[6] = sr;
+        dim[7] = ss;
+        dim[8] = su;
+        this->create(dim);
+    }
+
+    template <typename T> 
+    inline void hoNDArray<T>::create(size_t len, T* data, bool delete_data_on_destruct)
+    {
+        std::vector<size_t> dim(1);
+        dim[0] = len;
+        this->create(&dim, data, delete_data_on_destruct);
+    }
+
+    template <typename T> 
+    inline void hoNDArray<T>::create(size_t sx, size_t sy, T* data, bool delete_data_on_destruct)
+    {
+        std::vector<size_t> dim(2);
+        dim[0] = sx;
+        dim[1] = sy;
+        this->create(&dim, data, delete_data_on_destruct);
+    }
+
+    template <typename T> 
+    inline void hoNDArray<T>::create(size_t sx, size_t sy, size_t sz, T* data, bool delete_data_on_destruct)
+    {
+        std::vector<size_t> dim(3);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        this->create(&dim, data, delete_data_on_destruct);
+    }
+
+    template <typename T> 
+    inline void hoNDArray<T>::create(size_t sx, size_t sy, size_t sz, size_t st, T* data, bool delete_data_on_destruct)
+    {
+        std::vector<size_t> dim(4);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        dim[3] = st;
+        this->create(&dim, data, delete_data_on_destruct);
+    }
+    template <typename T> 
+    inline void hoNDArray<T>::create(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, T* data, bool delete_data_on_destruct)
+    {
+        std::vector<size_t> dim(5);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        dim[3] = st;
+        dim[4] = sp;
+        this->create(&dim, data, delete_data_on_destruct);
+    }
+
+    template <typename T> 
+    inline void hoNDArray<T>::create(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, T* data, bool delete_data_on_destruct)
+    {
+        std::vector<size_t> dim(6);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        dim[3] = st;
+        dim[4] = sp;
+        dim[5] = sq;
+        this->create(&dim, data, delete_data_on_destruct);
+    }
+
+    template <typename T> 
+    inline void hoNDArray<T>::create(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, T* data, bool delete_data_on_destruct)
+    {
+        std::vector<size_t> dim(7);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        dim[3] = st;
+        dim[4] = sp;
+        dim[5] = sq;
+        dim[6] = sr;
+        this->create(&dim, data, delete_data_on_destruct);
+    }
+
+    template <typename T> 
+    inline void hoNDArray<T>::create(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, size_t ss, T* data, bool delete_data_on_destruct)
+    {
+        std::vector<size_t> dim(8);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        dim[3] = st;
+        dim[4] = sp;
+        dim[5] = sq;
+        dim[6] = sr;
+        dim[7] = ss;
+        this->create(&dim, data, delete_data_on_destruct);
+    }
+
+    template <typename T> 
+    inline void hoNDArray<T>::create(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, size_t ss, size_t su, T* data, bool delete_data_on_destruct)
+    {
+        std::vector<size_t> dim(9);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        dim[3] = st;
+        dim[4] = sp;
+        dim[5] = sq;
+        dim[6] = sr;
+        dim[7] = ss;
+        dim[8] = su;
+        this->create(&dim, data, delete_data_on_destruct);
+    }
 
-  template <typename T> 
-  void hoNDArray<T>::printContent(std::ostream& os) const
-  {
-    using namespace std;
+    template <typename T> 
+    void hoNDArray<T>::fill(T value)
+    {
+        std::fill(this->get_data_ptr(), this->get_data_ptr()+this->get_number_of_elements(), value);
+    }
+
+    template <typename T> 
+    inline T* hoNDArray<T>::begin()
+    {
+        return this->data_;
+    }
+
+    template <typename T> 
+    inline const T* hoNDArray<T>::begin() const
+    {
+        return this->data_;
+    }
+
+    template <typename T> 
+    inline T* hoNDArray<T>::end()
+    {
+        return (this->data_+this->elements_);
+    }
 
-    os.unsetf(std::ios::scientific);
-    os.setf(ios::fixed);
+    template <typename T> 
+    inline const T* hoNDArray<T>::end() const
+    {
+        return (this->data_+this->elements_);
+    }
 
-    print(os);
+    template <typename T> 
+    inline T& hoNDArray<T>::at( size_t idx )
+    {
+        /*if( idx >= this->get_number_of_elements() )
+        {
+        BOOST_THROW_EXCEPTION( runtime_error("hoNDArray::at(): index out of range."));
+        }*/
+        GADGET_DEBUG_CHECK_THROW(idx < this->get_number_of_elements());
+        return this->get_data_ptr()[idx];
+    }
 
-    //size_t i;
+    template <typename T> 
+    inline const T& hoNDArray<T>::at( size_t idx ) const
+    {
+        /*if( idx >= this->get_number_of_elements() )
+        {
+        BOOST_THROW_EXCEPTION( runtime_error("hoNDArray::at(): index out of range."));
+        }*/
+        GADGET_DEBUG_CHECK_THROW(idx < this->get_number_of_elements());
+        return this->get_data_ptr()[idx];
+    }
 
-    //os << "-------------------------------------------" << std::endl;
-    //size_t numOfPrints = this->elements_;
-    //if ( this->elements_ < numOfPrints ) numOfPrints = this->elements_;
-    //for (i=0; i<numOfPrints; i++) 
+    template <typename T> 
+    inline T& hoNDArray<T>::operator[]( size_t idx )
+    {
+        /*if( idx >= this->get_number_of_elements() )
+        {
+        BOOST_THROW_EXCEPTION( runtime_error("hoNDArray::operator[]: index out of range."));
+        }*/
+        GADGET_DEBUG_CHECK_THROW(idx < this->get_number_of_elements());
+        return this->get_data_ptr()[idx];
+    }
+
+    //template <typename T> 
+    //inline T& hoNDArray<T>::operator()( size_t idx )
     //{
-    //    os << i << " = " << (*this)(i) << std::endl;
+    //    /*if( idx >= this->get_number_of_elements() )
+    //    {
+    //    BOOST_THROW_EXCEPTION( runtime_error("hoNDArray::operator(): index out of range."));
+    //    }*/
+    //    GADGET_DEBUG_CHECK_THROW(idx < this->get_number_of_elements());
+    //    return this->get_data_ptr()[idx];
     //}
-    //os << "-------------------------------------------" << std::endl;
-    //os << std::endl;
-  }
-
-  template <typename T> 
-  void hoNDArray<T>::allocate_memory()
-  {
-    deallocate_memory();
-
-    this->elements_ = (*this->dimensions_)[0];
-    for (size_t i = 1; i < this->dimensions_->size(); i++)
-      {
-        this->elements_ *= (*this->dimensions_)[i];
-      }
-
-    if ( this->elements_ > 0 )
-      {
-        this->_allocate_memory(this->elements_, &this->data_);
-
-        if( this->data_ == 0x0 )
-          {
-            BOOST_THROW_EXCEPTION( bad_alloc("hoNDArray<>::allocate memory failed"));
-          }
-
-        this->delete_data_on_destruct_ = true;
-
-        // memset(this->data_, 0, sizeof(T)*this->elements_);
-      }
-  }
-
-  template <typename T> 
-  void hoNDArray<T>::deallocate_memory()
-  {
-    if( this->data_ ){
-      this->_deallocate_memory( this->data_ );
-      this->data_ = 0x0;
-    }
-  }
-
-  template <typename T> 
-  inline void hoNDArray<T>::_allocate_memory( size_t size, float** data )
-  {
-#ifdef USE_MKL
-    *data = (float*) mkl_malloc(size*sizeof(float), 4);
-#else
-    *data = (float*) malloc( size*sizeof(float) );
-#endif
-  }
-
-  template <typename T> 
-  inline void hoNDArray<T>::_deallocate_memory( float* data )
-  {
-#ifdef USE_MKL
-    mkl_free(data);
-#else
-    free(data);
-#endif
-  }
-
-  template <typename T> 
-  inline void hoNDArray<T>::_allocate_memory( size_t size, double** data )
-  {
-#ifdef USE_MKL
-    *data = (double*) mkl_malloc(size*sizeof(double), 4);
-#else
-    *data = (double*) malloc( size*sizeof(double) );
-#endif
-  }
-
-  template <typename T> 
-  inline void hoNDArray<T>::_deallocate_memory( double* data )
-  {
-#ifdef USE_MKL
-    mkl_free(data);
-#else
-    free(data);
-#endif
-  }
-
-  template <typename T> 
-  inline void hoNDArray<T>::_allocate_memory( size_t size, std::complex<float>** data )
-  {
-#ifdef USE_MKL
-    *data = (std::complex<float>*) mkl_malloc(size*sizeof(std::complex<float>), 4);
-#else
-    *data = (std::complex<float>*) malloc( size*sizeof(std::complex<float>) );
-#endif
-  }
-
-  template <typename T> 
-  inline void hoNDArray<T>::_deallocate_memory( std::complex<float>* data )
-  {
-#ifdef USE_MKL
-    mkl_free(data);
-#else
-    free(data);
-#endif
-  }
-
-  template <typename T> 
-  inline void hoNDArray<T>::_allocate_memory( size_t size, std::complex<double>** data )
-  {
-#ifdef USE_MKL
-    *data = (std::complex<double>*) mkl_malloc(size*sizeof(std::complex<double>), 4);
-#else
-    *data = (std::complex<double>*) malloc( size*sizeof(std::complex<double>) );
-#endif
-  }
-
-  template <typename T> 
-  inline void hoNDArray<T>::_deallocate_memory( std::complex<double>* data )
-  {
-#ifdef USE_MKL
-    mkl_free(data);
-#else
-    free(data);
-#endif
-  }
-
-  template <typename T> 
-  inline void hoNDArray<T>::_allocate_memory( size_t size, float_complext** data )
-  {
-    *data = (float_complext*) malloc( size*sizeof(float_complext) );
-  }
-
-  template <typename T> 
-  inline void hoNDArray<T>::_deallocate_memory( float_complext* data )
-  {
-    free( data );
-  }
-
-  template <typename T> 
-  inline void hoNDArray<T>::_allocate_memory( size_t size, double_complext** data )
-  {
-    *data = (double_complext*) malloc( size*sizeof(double_complext) );
-  }
-
-  template <typename T> 
-  inline void hoNDArray<T>::_deallocate_memory( double_complext* data )
-  {
-    free( data );
-  }
-
-  template <typename T> 
-  bool hoNDArray<T>::serialize(char*& buf, size_t& len) const
-  {
-    if ( buf != NULL ) delete[] buf;
-
-    size_t NDim = dimensions_->size();
-
-    // number of dimensions + dimension vector + contents
-    len = sizeof(size_t) + sizeof(size_t)*NDim + sizeof(T)*elements_;
-
-    buf = new char[len];
-
-    memcpy(buf, &NDim, sizeof(size_t));
-    if ( NDim > 0 )
-      {
-        memcpy(buf+sizeof(size_t), &((*dimensions_)[0]), sizeof(size_t)*NDim);
-        memcpy(buf+sizeof(size_t)+sizeof(size_t)*NDim, this->data_, sizeof(T)*elements_);
-      }
-
-    return true; // Temporary. Should not be a boolean function.
-  }
-
-  template <typename T> 
-  bool hoNDArray<T>::deserialize(char* buf, size_t& len)
-  {
-    size_t NDim;
-    memcpy(&NDim, buf, sizeof(size_t));
-
-    if ( NDim > 0 )
-      {
-        std::vector<size_t> dimensions(NDim);
-        memcpy(&dimensions[0], buf+sizeof(size_t), sizeof(size_t)*NDim);
-
-        // allocate memory
-        this->create(&dimensions);
-
-        // copy the content
-        memcpy(this->data_, buf+sizeof(size_t)+sizeof(size_t)*NDim, sizeof(T)*elements_);
-      }
-    else
-      {
-        this->clear();
-      }
 
-    len = sizeof(size_t)+sizeof(size_t)*NDim+sizeof(T)*elements_;
-    return true; // Temporary. Should not be a boolean function.
-  }  
+    //template <typename T> 
+    //inline const T& hoNDArray<T>::operator()( size_t idx ) const
+    //{
+    //    /*if( idx >= this->get_number_of_elements() )
+    //    {
+    //    BOOST_THROW_EXCEPTION( runtime_error("hoNDArray::operator(): index out of range."));
+    //    }*/
+    //    GADGET_DEBUG_CHECK_THROW(idx < this->get_number_of_elements());
+    //    return this->get_data_ptr()[idx];
+    //}
+
+    //template <typename T> 
+    //inline T& hoNDArray<T>::operator()( const std::vector<size_t>& ind )
+    //{
+    //    size_t idx = this->calculate_offset(ind);
+    //    /*if( idx >= this->get_number_of_elements() )
+    //    {
+    //    BOOST_THROW_EXCEPTION( runtime_error("hoNDArray::operator(): index out of range."));
+    //    }*/
+    //    GADGET_DEBUG_CHECK_THROW(idx < this->get_number_of_elements());
+    //    return this->get_data_ptr()[idx];
+    //}
+
+    //template <typename T> 
+    //inline const T& hoNDArray<T>::operator()( const std::vector<size_t>& ind ) const
+    //{
+    //    size_t idx = this->calculate_offset(ind);
+    //    /*if( idx >= this->get_number_of_elements() )
+    //    {
+    //    BOOST_THROW_EXCEPTION( runtime_error("hoNDArray::operator(): index out of range."));
+    //    }*/
+    //    GADGET_DEBUG_CHECK_THROW(idx < this->get_number_of_elements());
+    //    return this->get_data_ptr()[idx];
+    //}
+
+    template <typename T> 
+    void hoNDArray<T>::get_sub_array(const std::vector<size_t>& start, std::vector<size_t>& size, hoNDArray<T>& out)
+    {
+        if ( start.size() != size.size() ){
+            BOOST_THROW_EXCEPTION( runtime_error("hoNDArray<>::get_sub_array failed"));
+        }
+
+        if ( start.size() != (*dimensions_).size() ){
+            BOOST_THROW_EXCEPTION( runtime_error("hoNDArray<>::get_sub_array failed"));
+        }
+
+        out.create(&size);
+
+        if ( out.get_number_of_elements() == this->get_number_of_elements() ){
+            out = *this;
+            return;
+        }
+
+        std::vector<size_t> end(start.size());
+
+        size_t ii;
+        for ( ii=0; ii<start.size(); ii++ ){
+            end[ii] = start[ii] + size[ii] - 1;
+            if ( end[ii] >= (*dimensions_)[ii] ){
+                BOOST_THROW_EXCEPTION( runtime_error("hoNDArray<>::get_sub_array failed"));
+            }
+        }
+    }
+
+    template <typename T> 
+    void hoNDArray<T>::printContent(std::ostream& os) const
+    {
+        using namespace std;
+
+        os.unsetf(std::ios::scientific);
+        os.setf(ios::fixed);
+
+        size_t i;
+
+        os << "Array dimension is : " << dimensions_->size() << endl;
+
+        os << "Array size is : ";
+        for (i=0; i<dimensions_->size(); i++ ) 
+            os << (*dimensions_)[i] << " "; 
+        os << endl;
+
+        int elemTypeSize = sizeof(T);
+        std::string elemTypeName = std::string(typeid(T).name());
+
+        os << "Array data type is : " << elemTypeName << std::endl;
+        os << "Byte number for each element is : " << elemTypeSize << std::endl;
+        os << "Number of array size in bytes is : ";
+        os << elements_*elemTypeSize << std::endl;
+        os << "Delete data on destruction flag is : " << this->delete_data_on_destruct_ << endl;
+
+        //os << "-------------------------------------------" << std::endl;
+        //size_t numOfPrints = 20;
+        //if ( this->elements_ < numOfPrints ) numOfPrints = this->elements_;
+        //for (i=0; i<numOfPrints; i++) 
+        //{
+        //    os << i << " = " << (*this)(i) << std::endl;
+        //}
+        //os << "-------------------------------------------" << std::endl;
+
+        os << std::endl;
+    }
+
+    template <typename T> 
+    void hoNDArray<T>::print(std::ostream& os) const
+    {
+        using namespace std;
+
+        os.unsetf(std::ios::scientific);
+        os.setf(ios::fixed);
+
+        os << "--------------Gagdgetron ND Array -------------" << endl;
+        this->printContent(os);
+    }
+
+    template <typename T> 
+    void hoNDArray<T>::allocate_memory()
+    {
+        deallocate_memory();
+
+        if ( !this->dimensions_->empty() )
+        {
+            this->elements_ = (*this->dimensions_)[0];
+            for (size_t i = 1; i < this->dimensions_->size(); i++)
+            {
+                this->elements_ *= (*this->dimensions_)[i];
+            }
+
+            if ( this->elements_ > 0 )
+            {
+                this->_allocate_memory(this->elements_, &this->data_);
+
+                if( this->data_ == 0x0 )
+                {
+                    BOOST_THROW_EXCEPTION( bad_alloc("hoNDArray<>::allocate memory failed"));
+                }
+
+                this->delete_data_on_destruct_ = true;
+
+                // memset(this->data_, 0, sizeof(T)*this->elements_);
+            }
+        }
+        else
+        {
+            this->elements_ = 0;
+        }
+    }
+
+    template <typename T> 
+    void hoNDArray<T>::deallocate_memory()
+    {
+        if( this->data_ ){
+            this->_deallocate_memory( this->data_ );
+            this->data_ = 0x0;
+        }
+    }
+
+    template <typename T> 
+    inline void hoNDArray<T>::_allocate_memory( size_t size, float** data )
+    {
+        *data = (float*) malloc( size*sizeof(float) );
+    }
+
+    template <typename T> 
+    inline void hoNDArray<T>::_deallocate_memory( float* data )
+    {
+        free(data);
+    }
+
+    template <typename T> 
+    inline void hoNDArray<T>::_allocate_memory( size_t size, double** data )
+    {
+        *data = (double*) malloc( size*sizeof(double) );
+    }
+
+    template <typename T> 
+    inline void hoNDArray<T>::_deallocate_memory( double* data )
+    {
+        free(data);
+    }
+
+    template <typename T> 
+    inline void hoNDArray<T>::_allocate_memory( size_t size, std::complex<float>** data )
+    {
+        *data = (std::complex<float>*) malloc( size*sizeof(std::complex<float>) );
+    }
+
+    template <typename T> 
+    inline void hoNDArray<T>::_deallocate_memory( std::complex<float>* data )
+    {
+        free(data);
+    }
+
+    template <typename T> 
+    inline void hoNDArray<T>::_allocate_memory( size_t size, std::complex<double>** data )
+    {
+        *data = (std::complex<double>*) malloc( size*sizeof(std::complex<double>) );
+    }
+
+    template <typename T> 
+    inline void hoNDArray<T>::_deallocate_memory( std::complex<double>* data )
+    {
+        free(data);
+    }
+
+    template <typename T> 
+    inline void hoNDArray<T>::_allocate_memory( size_t size, float_complext** data )
+    {
+        *data = (float_complext*) malloc( size*sizeof(float_complext) );
+    }
+
+    template <typename T> 
+    inline void hoNDArray<T>::_deallocate_memory( float_complext* data )
+    {
+        free( data );
+    }
+
+    template <typename T> 
+    inline void hoNDArray<T>::_allocate_memory( size_t size, double_complext** data )
+    {
+        *data = (double_complext*) malloc( size*sizeof(double_complext) );
+    }
+
+    template <typename T> 
+    inline void hoNDArray<T>::_deallocate_memory( double_complext* data )
+    {
+        free( data );
+    }
+
+    template <typename T> 
+    bool hoNDArray<T>::serialize(char*& buf, size_t& len) const 
+    {
+        if ( buf != NULL ) delete[] buf;
+
+        size_t NDim = dimensions_->size();
+
+        // number of dimensions + dimension vector + contents
+        len = sizeof(size_t) + sizeof(size_t)*NDim + sizeof(T)*elements_;
+
+        buf = new char[len];
+
+        memcpy(buf, &NDim, sizeof(size_t));
+        if ( NDim > 0 )
+        {
+            memcpy(buf+sizeof(size_t), &((*dimensions_)[0]), sizeof(size_t)*NDim);
+            memcpy(buf+sizeof(size_t)+sizeof(size_t)*NDim, this->data_, sizeof(T)*elements_);
+        }
+
+        return true; // Temporary. Should not be a boolean function.
+    }
+
+    template <typename T> 
+    bool hoNDArray<T>::deserialize(char* buf, size_t& len)
+    {
+        size_t NDim;
+        memcpy(&NDim, buf, sizeof(size_t));
+
+        if ( NDim > 0 )
+        {
+            std::vector<size_t> dimensions(NDim);
+            memcpy(&dimensions[0], buf+sizeof(size_t), sizeof(size_t)*NDim);
+
+            // allocate memory
+            this->create(&dimensions);
+
+            // copy the content
+            memcpy(this->data_, buf+sizeof(size_t)+sizeof(size_t)*NDim, sizeof(T)*elements_);
+        }
+        else
+        {
+            this->clear();
+        }
+
+        len = sizeof(size_t)+sizeof(size_t)*NDim+sizeof(T)*elements_;
+        return true; // Temporary. Should not be a boolean function.
+    }
 }
diff --git a/toolboxes/core/cpu/hoNDArray_utils.h b/toolboxes/core/cpu/hoNDArray_utils.h
index a362439..b25925a 100644
--- a/toolboxes/core/cpu/hoNDArray_utils.h
+++ b/toolboxes/core/cpu/hoNDArray_utils.h
@@ -455,24 +455,24 @@ namespace Gadgetron {
 
         std::vector<size_t> ind(NDim, 0);
         ind[NDim-1] = indLastDim;
-        int offsetIndLastDim = x.calculate_offset(ind);
+        size_t offsetIndLastDim = x.calculate_offset(ind);
 
         size_t N = x.get_number_of_elements() / lastDim;
 
-        int l;
+        long long l;
 #ifdef GCC_OLD_FLAG
 #pragma omp parallel for default(none) private(l) shared(lastDim, offsetIndLastDim, ind, indLastDim, N, NDim)
 #else
 #pragma omp parallel for default(none) private(l) shared(lastDim, offsetIndLastDim, x, ind, indLastDim, N, NDim)
 #endif
-        for ( l=0; l<(int)lastDim; l++ )
-          {
+        for ( l=0; l<(long long)lastDim; l++ )
+        {
             if ( l==indLastDim ) continue;
             ind[NDim-1] = l;
-            int offsetInd = x.calculate_offset(ind);
+            size_t offsetInd = x.calculate_offset(ind);
 
             memcpy(x.begin()+offsetInd, x.begin()+offsetIndLastDim, sizeof(T)*N);
-          }
+        }
       }
     catch (...)
       {
@@ -482,4 +482,200 @@ namespace Gadgetron {
     return true;
   }
 
+  // Utility to check if all neighbors required for the linear interpolation exists
+  // ... do not include dimensions of size 1
+
+  template<class REAL, unsigned int D> inline bool
+  is_border_pixel( vector_td<size_t,D> co, vector_td<size_t,D> dims )
+  {
+    for( size_t dim=0; dim<D; dim++ ){
+      if( dims[dim] > 1 && ( co[dim] == 0 || co[dim] == (dims[dim]-1) ) )
+	return true;
+    }
+    return false;
+  }
+
+  // Downsample
+  template<class REAL, unsigned int D> 
+  boost::shared_ptr< hoNDArray<REAL> > downsample( hoNDArray<REAL> *_in )
+  {
+    // A few sanity checks 
+
+    if( _in == 0x0 ){
+      throw std::runtime_error( "downsample(): illegal input provided.");
+    }
+    
+    if( _in->get_number_of_dimensions() < D ){
+      throw std::runtime_error( "downsample(): the number of array dimensions should be at least D");
+    }
+    
+    for( size_t d=0; d<D; d++ ){
+      if( (_in->get_size(d)%2) == 1 && _in->get_size(d) != 1 ){
+	throw std::runtime_error( "downsample(): uneven array dimensions larger than one not accepted");
+      }
+    }
+    
+    typename uint64d<D>::Type matrix_size_in = from_std_vector<size_t,D>( *_in->get_dimensions() );
+    typename uint64d<D>::Type matrix_size_out = matrix_size_in >> 1;
+
+    for( size_t d=0; d<D; d++ ){
+      if( matrix_size_out[d] == 0 ) 
+	matrix_size_out[d] = 1;
+    }
+  
+    size_t num_elements = prod(matrix_size_out);
+    size_t num_batches = 1;
+
+    for( size_t d=D; d<_in->get_number_of_dimensions(); d++ ){
+      num_batches *= _in->get_size(d);
+    }
+  
+    std::vector<size_t> dims = to_std_vector(matrix_size_out);
+    for( size_t d=D; d<_in->get_number_of_dimensions(); d++ ){
+      dims.push_back(_in->get_size(d));
+    }
+  
+    REAL *in = _in->get_data_ptr();
+
+    boost::shared_ptr< hoNDArray<REAL> > _out( new hoNDArray<REAL>(&dims) );
+    REAL *out = _out->get_data_ptr();
+    
+    typedef vector_td<size_t,D> uint64d;
+
+#ifdef USE_OMP
+#pragma omp parallel for
+#endif
+    for( long long idx=0; idx < num_elements*num_batches; idx++ ){
+
+      const size_t frame_offset = idx/num_elements;
+      const uint64d co_out = idx_to_co<D>( idx-frame_offset*num_elements, matrix_size_out );
+      const uint64d co_in = co_out << 1;
+      const uint64d twos(2);
+      const size_t num_adds = 1 << D;
+
+      size_t actual_adds = 0;
+      REAL res = REAL(0);
+
+      for( size_t i=0; i<num_adds; i++ ){
+	const uint64d local_co = idx_to_co<D>( i, twos );
+	if( weak_greater_equal( local_co, matrix_size_out ) ) continue; // To allow array dimensions of size 1
+	const size_t in_idx = co_to_idx<D>(co_in+local_co, matrix_size_in)+frame_offset*prod(matrix_size_in);
+	actual_adds++;
+	res += in[in_idx];
+      }    
+      out[idx] = res/REAL(actual_adds);
+    }
+
+    return _out;
+  }
+
+  // Linear interpolation upsampling
+  template<class REAL, unsigned int D> boost::shared_ptr< hoNDArray<REAL> >
+  upsample( hoNDArray<REAL> *_in )
+  {
+    // A few sanity checks 
+
+    if( _in == 0x0 ){
+      throw std::runtime_error("upsample(): illegal input provided.");
+    }
+
+    if( _in->get_number_of_dimensions() < D ){
+      throw std::runtime_error( "upsample(): the number of array dimensions should be at least D");
+    }
+    
+    typename uint64d<D>::Type matrix_size_in = from_std_vector<size_t,D>( *_in->get_dimensions() );
+    typename uint64d<D>::Type matrix_size_out = matrix_size_in << 1;
+
+    for( size_t d=0; d<D; d++ ){
+      if( matrix_size_in[d] == 1 )
+	matrix_size_out[d] = 1;
+    }
+  
+    size_t num_elements = prod(matrix_size_out);
+    size_t num_batches = 1;
+
+    for( size_t d=D; d<_in->get_number_of_dimensions(); d++ ){
+      num_batches *= _in->get_size(d);
+    }
+  
+    std::vector<size_t> dims = to_std_vector(matrix_size_out);
+    for( size_t d=D; d<_in->get_number_of_dimensions(); d++ ){
+      dims.push_back(_in->get_size(d));
+    }
+
+    REAL *in = _in->get_data_ptr();
+
+    boost::shared_ptr< hoNDArray<REAL> > _out( new hoNDArray<REAL>(&dims) );
+    REAL *out = _out->get_data_ptr();
+    
+    typedef vector_td<size_t,D> uint64d;
+
+#ifdef USE_OMP
+#pragma omp parallel for
+#endif
+    for( long long idx=0; idx < num_elements*num_batches; idx++ ){
+      
+      REAL res = REAL(0);
+
+      const size_t num_neighbors = 1 << D;
+      const size_t frame_idx = idx/num_elements;
+      const uint64d co_out = idx_to_co<D>( idx-frame_idx*num_elements, matrix_size_out );
+
+      // We will only proceed if all neighbours exist (this adds a zero-boundary to the upsampled image/vector field)
+      //
+    
+      if( !is_border_pixel<REAL,D>(co_out, matrix_size_out) ){
+      
+	for( size_t i=0; i<num_neighbors; i++ ){
+	
+	  // Determine coordinate of neighbor in input
+	  //
+
+	  const uint64d twos(2);
+	  const uint64d stride = idx_to_co<D>( i, twos );
+
+	  if( weak_greater_equal( stride, matrix_size_out ) ) continue; // To allow array dimensions of 1
+
+	  // Be careful about dimensions of size 1
+	  uint64d ones(1);
+	  for( size_t d=0; d<D; d++ ){
+	    if( matrix_size_out[d] == 1 )
+	      ones[d] = 0;
+	  }
+	  uint64d co_in = ((co_out-ones)>>1)+stride;
+	
+	  // Read corresponding pixel value
+	  //
+	
+	  const size_t in_idx = co_to_idx<D>(co_in, matrix_size_in)+frame_idx*prod(matrix_size_in);
+	  REAL value = in[in_idx];
+	
+	  // Determine weight
+	  //
+	
+	  REAL weight = REAL(1);
+	
+	  for( size_t dim=0; dim<D; dim++ ){	  
+	    if( matrix_size_in[dim] > 1 ){
+	      if( stride.vec[dim] == (co_out.vec[dim]%2) ) {
+		weight *= REAL(0.25);
+	      }
+	      else{
+		weight *= REAL(0.75);
+	      }
+	    }
+	  }
+	
+	  // Accumulate result
+	  //
+	
+	  res += weight*value;
+	}
+      }
+      out[idx] = res;
+    }
+    
+    return _out;
+  }
+
 }
diff --git a/toolboxes/core/cpu/hoNDBoundaryHandler.h b/toolboxes/core/cpu/hoNDBoundaryHandler.h
new file mode 100644
index 0000000..e053ed1
--- /dev/null
+++ b/toolboxes/core/cpu/hoNDBoundaryHandler.h
@@ -0,0 +1,276 @@
+/** \file       hoNDBoundaryHandler.h
+    \brief      N-dimensional boundary condition handler
+
+                Designed to work with hoNDArray and hoNDImage
+
+    \author     Hui Xue
+*/
+
+#pragma once
+
+#include "hoNDArray.h"
+#include "hoNDImage.h"
+
+namespace Gadgetron
+{
+    // define the boundary condition
+    enum GT_BOUNDARY_CONDITION
+    {
+        GT_BOUNDARY_CONDITION_FIXEDVALUE=34568, // a magic number
+        GT_BOUNDARY_CONDITION_BORDERVALUE,
+        GT_BOUNDARY_CONDITION_PERIODIC,
+        GT_BOUNDARY_CONDITION_MIRROR
+    };
+
+    inline std::string getBoundaryHandlerName(GT_BOUNDARY_CONDITION bh)
+    {
+        std::string name;
+
+        switch (bh)
+        {
+            case GT_BOUNDARY_CONDITION_FIXEDVALUE:
+                name = "FixedValue";
+                break;
+
+            case GT_BOUNDARY_CONDITION_BORDERVALUE:
+                name = "BorderValue";
+                break;
+
+            case GT_BOUNDARY_CONDITION_PERIODIC:
+                name = "Periodic";
+                break;
+
+            case GT_BOUNDARY_CONDITION_MIRROR:
+                name = "Mirror";
+                break;
+
+            default:
+                GADGET_ERROR_MSG("Unrecognized boundary handler type : " << bh);
+        }
+
+        return name;
+    }
+
+    inline GT_BOUNDARY_CONDITION getBoundaryHandlerType(const std::string& bh_name)
+    {
+        GT_BOUNDARY_CONDITION bh;
+
+        if ( bh_name == "FixedValue" )
+        {
+            bh = GT_BOUNDARY_CONDITION_FIXEDVALUE;
+        }
+        else if ( bh_name == "BorderValue" )
+        {
+            bh = GT_BOUNDARY_CONDITION_BORDERVALUE;
+        }
+        else if ( bh_name == "Periodic" )
+        {
+            bh = GT_BOUNDARY_CONDITION_PERIODIC;
+        }
+        else if ( bh_name == "Mirror" )
+        {
+            bh = GT_BOUNDARY_CONDITION_MIRROR;
+        }
+        else
+        {
+            GADGET_ERROR_MSG("Unrecognized boundary handler name : " << bh_name);
+        }
+
+        return bh;
+    }
+
+    template <typename ArrayType>
+    class hoNDBoundaryHandler
+    {
+    public:
+
+        typedef hoNDBoundaryHandler<ArrayType> Self;
+        typedef typename ArrayType::value_type T;
+
+        // enum { D = ArrayType::D }
+
+        hoNDBoundaryHandler() { array_ = NULL; }
+        hoNDBoundaryHandler(ArrayType& a) { array_ = &a; }
+        virtual ~hoNDBoundaryHandler() { array_ = NULL ; }
+
+        /// access the pixel value
+        virtual T operator()( const std::vector<gt_index_type>& ind ) = 0;
+        virtual T operator()( gt_index_type x ) = 0;
+        virtual T operator()( gt_index_type x, gt_index_type y ) = 0;
+        virtual T operator()( gt_index_type x, gt_index_type y, gt_index_type z ) = 0;
+        virtual T operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s ) = 0;
+        virtual T operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p ) = 0;
+        virtual T operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r ) = 0;
+        virtual T operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r, gt_index_type a ) = 0;
+        virtual T operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r, gt_index_type a, gt_index_type q ) = 0;
+        virtual T operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r, gt_index_type a, gt_index_type q, gt_index_type u ) = 0;
+
+        void setArray(ArrayType& a) { array_ = &a; };
+
+        /// return a%b
+        inline gt_index_type mod(gt_index_type a, gt_index_type b)
+        {
+            a %= b;
+
+            if ( a<0 && b>0 )
+            {
+                a += b;
+            }
+
+            return a;
+        }
+
+    protected:
+
+        ArrayType* array_;
+    };
+
+    template <typename ArrayType>
+    class hoNDBoundaryHandlerFixedValue : public hoNDBoundaryHandler<ArrayType>
+    {
+    public:
+
+        typedef hoNDBoundaryHandler<ArrayType> BaseClass;
+        typedef hoNDBoundaryHandlerFixedValue<ArrayType> Self;
+        typedef typename BaseClass::T T;
+
+        hoNDBoundaryHandlerFixedValue(T v=0) : BaseClass(), value_(v) {}
+        hoNDBoundaryHandlerFixedValue(ArrayType& a, T v=T(0)) : BaseClass(a), value_(v) {}
+        virtual ~hoNDBoundaryHandlerFixedValue() {}
+
+        /// access the pixel value
+        virtual T operator()( const std::vector<gt_index_type>& ind );
+        virtual T operator()( gt_index_type x );
+        virtual T operator()( gt_index_type x, gt_index_type y );
+        virtual T operator()( gt_index_type x, gt_index_type y, gt_index_type z );
+        virtual T operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s );
+        virtual T operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p );
+        virtual T operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r );
+        virtual T operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r, gt_index_type a );
+        virtual T operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r, gt_index_type a, gt_index_type q );
+        virtual T operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r, gt_index_type a, gt_index_type q, gt_index_type u );
+
+    protected:
+        using BaseClass::array_;
+        T value_;
+    };
+
+    template <typename ArrayType>
+    class hoNDBoundaryHandlerBorderValue : public hoNDBoundaryHandler<ArrayType>
+    {
+    public:
+
+        typedef hoNDBoundaryHandler<ArrayType> BaseClass;
+        typedef hoNDBoundaryHandlerBorderValue<ArrayType> Self;
+        typedef typename BaseClass::T T;
+
+        hoNDBoundaryHandlerBorderValue() : BaseClass() {}
+        hoNDBoundaryHandlerBorderValue(ArrayType& a) : BaseClass(a) {}
+        virtual ~hoNDBoundaryHandlerBorderValue() {}
+
+        /// access the pixel value
+        virtual T operator()( const std::vector<gt_index_type>& ind );
+        virtual T operator()( gt_index_type x );
+        virtual T operator()( gt_index_type x, gt_index_type y );
+        virtual T operator()( gt_index_type x, gt_index_type y, gt_index_type z );
+        virtual T operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s );
+        virtual T operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p );
+        virtual T operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r );
+        virtual T operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r, gt_index_type a );
+        virtual T operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r, gt_index_type a, gt_index_type q );
+        virtual T operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r, gt_index_type a, gt_index_type q, gt_index_type u );
+
+    protected:
+        using BaseClass::array_;
+    };
+
+    template <typename ArrayType>
+    class hoNDBoundaryHandlerPeriodic : public hoNDBoundaryHandler<ArrayType>
+    {
+    public:
+
+        typedef hoNDBoundaryHandler<ArrayType> BaseClass;
+        typedef hoNDBoundaryHandlerPeriodic<ArrayType> Self;
+        typedef typename BaseClass::T T;
+
+        hoNDBoundaryHandlerPeriodic() : BaseClass() {}
+        hoNDBoundaryHandlerPeriodic(ArrayType& a) : BaseClass(a) {}
+        virtual ~hoNDBoundaryHandlerPeriodic() {}
+
+        /// access the pixel value
+        virtual T operator()( const std::vector<gt_index_type>& ind );
+        virtual T operator()( gt_index_type x );
+        virtual T operator()( gt_index_type x, gt_index_type y );
+        virtual T operator()( gt_index_type x, gt_index_type y, gt_index_type z );
+        virtual T operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s );
+        virtual T operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p );
+        virtual T operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r );
+        virtual T operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r, gt_index_type a );
+        virtual T operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r, gt_index_type a, gt_index_type q );
+        virtual T operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r, gt_index_type a, gt_index_type q, gt_index_type u );
+
+    protected:
+        using BaseClass::array_;
+    };
+
+    template <typename ArrayType>
+    class hoNDBoundaryHandlerMirror : public hoNDBoundaryHandler<ArrayType>
+    {
+    public:
+
+        typedef hoNDBoundaryHandler<ArrayType> BaseClass;
+        typedef hoNDBoundaryHandlerMirror<ArrayType> Self;
+        typedef typename BaseClass::T T;
+
+        hoNDBoundaryHandlerMirror() : BaseClass() {}
+        hoNDBoundaryHandlerMirror(ArrayType& a) : BaseClass(a) {}
+        virtual ~hoNDBoundaryHandlerMirror() {}
+
+        /// access the pixel value
+        virtual T operator()( const std::vector<gt_index_type>& ind );
+        virtual T operator()( gt_index_type x );
+        virtual T operator()( gt_index_type x, gt_index_type y );
+        virtual T operator()( gt_index_type x, gt_index_type y, gt_index_type z );
+        virtual T operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s );
+        virtual T operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p );
+        virtual T operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r );
+        virtual T operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r, gt_index_type a );
+        virtual T operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r, gt_index_type a, gt_index_type q );
+        virtual T operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r, gt_index_type a, gt_index_type q, gt_index_type u );
+
+    protected:
+        using BaseClass::array_;
+    };
+
+    template <typename ArrayType> 
+    hoNDBoundaryHandler<ArrayType>* createBoundaryHandler(GT_BOUNDARY_CONDITION bh)
+    {
+        hoNDBoundaryHandler<ArrayType>* res=NULL;
+
+        switch (bh)
+        {
+            case GT_BOUNDARY_CONDITION_FIXEDVALUE:
+                res = new hoNDBoundaryHandlerFixedValue<ArrayType>();
+                break;
+
+            case GT_BOUNDARY_CONDITION_BORDERVALUE:
+                res = new hoNDBoundaryHandlerBorderValue<ArrayType>();
+                break;
+
+            case GT_BOUNDARY_CONDITION_PERIODIC:
+                res = new hoNDBoundaryHandlerPeriodic<ArrayType>();
+                break;
+
+            case GT_BOUNDARY_CONDITION_MIRROR:
+                res = new hoNDBoundaryHandlerMirror<ArrayType>();
+                break;
+
+            default:
+                GADGET_ERROR_MSG("Unrecognized boundary handler type : " << bh);
+        }
+
+        return res;
+    }
+}
+
+#include "hoNDBoundaryHandler.hxx"
diff --git a/toolboxes/core/cpu/hoNDBoundaryHandler.hxx b/toolboxes/core/cpu/hoNDBoundaryHandler.hxx
new file mode 100644
index 0000000..abe5903
--- /dev/null
+++ b/toolboxes/core/cpu/hoNDBoundaryHandler.hxx
@@ -0,0 +1,470 @@
+/** \file       hoNDBoundaryHandler.hxx
+    \brief      N-dimensional boundary condition handler
+
+                Designed to work with hoNDArray and hoNDImage
+
+    \author     Hui Xue
+*/
+
+namespace Gadgetron
+{
+    /// hoNDBoundaryHandlerFixedValue
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerFixedValue<ArrayType>::T hoNDBoundaryHandlerFixedValue<ArrayType>::operator()( const std::vector<gt_index_type>& ind )
+    {
+        return (array_->point_in_range(ind)) ? (*array_)(ind) : value_;
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerFixedValue<ArrayType>::T hoNDBoundaryHandlerFixedValue<ArrayType>::operator()( gt_index_type x )
+    {
+        return (array_->point_in_range(x)) ? (*array_)( size_t(x) ) : value_;
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerFixedValue<ArrayType>::T hoNDBoundaryHandlerFixedValue<ArrayType>::operator()( gt_index_type x, gt_index_type y )
+    {
+        return (array_->point_in_range(x, y)) ? (*array_)( size_t(x), size_t(y) ) : value_;
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerFixedValue<ArrayType>::T hoNDBoundaryHandlerFixedValue<ArrayType>::operator()( gt_index_type x, gt_index_type y, gt_index_type z )
+    {
+        return (array_->point_in_range(x, y, z)) ? (*array_)( size_t(x), size_t(y), size_t(z) ) : value_;
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerFixedValue<ArrayType>::T hoNDBoundaryHandlerFixedValue<ArrayType>::operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s )
+    {
+        return (array_->point_in_range(x, y, z, s)) ? (*array_)( size_t(x), size_t(y), size_t(z), size_t(s) ) : value_;
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerFixedValue<ArrayType>::T hoNDBoundaryHandlerFixedValue<ArrayType>::operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p )
+    {
+        return (array_->point_in_range(x, y, z, s, p)) ? (*array_)( size_t(x), size_t(y), size_t(z), size_t(s), size_t(p) ) : value_;
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerFixedValue<ArrayType>::T hoNDBoundaryHandlerFixedValue<ArrayType>::operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r )
+    {
+        return (array_->point_in_range(x, y, z, s, p, r)) ? (*array_)( size_t(x), size_t(y), size_t(z), size_t(s), size_t(p), size_t(r) ) : value_;
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerFixedValue<ArrayType>::T hoNDBoundaryHandlerFixedValue<ArrayType>::operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r, gt_index_type a )
+    {
+        return (array_->point_in_range(x, y, z, s, p, r, a)) ? (*array_)( size_t(x), size_t(y), size_t(z), size_t(s), size_t(p), size_t(r), size_t(a) ) : value_;
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerFixedValue<ArrayType>::T hoNDBoundaryHandlerFixedValue<ArrayType>::operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r, gt_index_type a, gt_index_type q )
+    {
+        return (array_->point_in_range(x, y, z, s, p, r, a, q)) ? (*array_)( size_t(x), size_t(y), size_t(z), size_t(s), size_t(p), size_t(r), size_t(a), size_t(q) ) : value_;
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerFixedValue<ArrayType>::T hoNDBoundaryHandlerFixedValue<ArrayType>::operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r, gt_index_type a, gt_index_type q, gt_index_type u )
+    {
+        return (array_->point_in_range(x, y, z, s, p, r, a, q, u)) ? (*array_)( size_t(x), size_t(y), size_t(z), size_t(s), size_t(p), size_t(r), size_t(a), size_t(q), size_t(u) ) : value_;
+    }
+
+    /// hoNDBoundaryHandlerBorderValue
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerBorderValue<ArrayType>::T hoNDBoundaryHandlerBorderValue<ArrayType>::operator()( const std::vector<gt_index_type>& ind )
+    {
+        std::vector<size_t> indInside(array_->get_number_of_dimensions());
+        unsigned int D = array_->get_number_of_dimensions();
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            if ( ind[ii] < 0 )
+            {
+                indInside[ii] = 0;
+            }
+            else if ( ind[ii] >= array_->get_size(ii) )
+            {
+                indInside[ii] = array_->get_size(ii)-1;
+            }
+            else
+            {
+                indInside[ii] = ind[ii];
+            }
+        }
+        return (*array_)(indInside);
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerBorderValue<ArrayType>::T hoNDBoundaryHandlerBorderValue<ArrayType>::operator()( gt_index_type x )
+    {
+        size_t x_inside = (x<0) ? 0 : ( (x>=(gt_index_type)array_->get_size(0)) ? array_->get_size(0)-1 : x );
+
+        return (*array_)(x_inside);
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerBorderValue<ArrayType>::T hoNDBoundaryHandlerBorderValue<ArrayType>::operator()( gt_index_type x, gt_index_type y )
+    {
+        size_t x_inside = (x<0) ? 0 : ( (x>=(gt_index_type)array_->get_size(0)) ? array_->get_size(0)-1 : x );
+        size_t y_inside = (y<0) ? 0 : ( (y>=(gt_index_type)array_->get_size(1)) ? array_->get_size(1)-1 : y );
+
+        return (*array_)(x_inside, y_inside);
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerBorderValue<ArrayType>::T hoNDBoundaryHandlerBorderValue<ArrayType>::operator()( gt_index_type x, gt_index_type y, gt_index_type z )
+    {
+        size_t x_inside = (x<0) ? 0 : ( (x>=(gt_index_type)array_->get_size(0)) ? array_->get_size(0)-1 : x );
+        size_t y_inside = (y<0) ? 0 : ( (y>=(gt_index_type)array_->get_size(1)) ? array_->get_size(1)-1 : y );
+        size_t z_inside = (z<0) ? 0 : ( (z>=(gt_index_type)array_->get_size(2)) ? array_->get_size(2)-1 : z );
+
+        return (*array_)(x_inside, y_inside, z_inside);
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerBorderValue<ArrayType>::T hoNDBoundaryHandlerBorderValue<ArrayType>::operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s )
+    {
+        size_t x_inside = (x<0) ? 0 : ( (x>=(gt_index_type)array_->get_size(0)) ? array_->get_size(0)-1 : x );
+        size_t y_inside = (y<0) ? 0 : ( (y>=(gt_index_type)array_->get_size(1)) ? array_->get_size(1)-1 : y );
+        size_t z_inside = (z<0) ? 0 : ( (z>=(gt_index_type)array_->get_size(2)) ? array_->get_size(2)-1 : z );
+        size_t s_inside = (s<0) ? 0 : ( (s>=(gt_index_type)array_->get_size(3)) ? array_->get_size(3)-1 : s );
+
+        return (*array_)(x_inside, y_inside, z_inside, s_inside);
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerBorderValue<ArrayType>::T hoNDBoundaryHandlerBorderValue<ArrayType>::operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p )
+    {
+        size_t x_inside = (x<0) ? 0 : ( (x>=(gt_index_type)array_->get_size(0)) ? array_->get_size(0)-1 : x );
+        size_t y_inside = (y<0) ? 0 : ( (y>=(gt_index_type)array_->get_size(1)) ? array_->get_size(1)-1 : y );
+        size_t z_inside = (z<0) ? 0 : ( (z>=(gt_index_type)array_->get_size(2)) ? array_->get_size(2)-1 : z );
+        size_t s_inside = (s<0) ? 0 : ( (s>=(gt_index_type)array_->get_size(3)) ? array_->get_size(3)-1 : s );
+        size_t p_inside = (p<0) ? 0 : ( (p>=(gt_index_type)array_->get_size(4)) ? array_->get_size(4)-1 : p );
+
+        return (*array_)(x_inside, y_inside, z_inside, s_inside, p_inside);
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerBorderValue<ArrayType>::T hoNDBoundaryHandlerBorderValue<ArrayType>::operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r )
+    {
+        size_t x_inside = (x<0) ? 0 : ( (x>=(gt_index_type)array_->get_size(0)) ? array_->get_size(0)-1 : x );
+        size_t y_inside = (y<0) ? 0 : ( (y>=(gt_index_type)array_->get_size(1)) ? array_->get_size(1)-1 : y );
+        size_t z_inside = (z<0) ? 0 : ( (z>=(gt_index_type)array_->get_size(2)) ? array_->get_size(2)-1 : z );
+        size_t s_inside = (s<0) ? 0 : ( (s>=(gt_index_type)array_->get_size(3)) ? array_->get_size(3)-1 : s );
+        size_t p_inside = (p<0) ? 0 : ( (p>=(gt_index_type)array_->get_size(4)) ? array_->get_size(4)-1 : p );
+        size_t r_inside = (r<0) ? 0 : ( (r>=(gt_index_type)array_->get_size(5)) ? array_->get_size(5)-1 : r );
+
+        return (*array_)(x_inside, y_inside, z_inside, s_inside, p_inside, r_inside);
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerBorderValue<ArrayType>::T hoNDBoundaryHandlerBorderValue<ArrayType>::operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r, gt_index_type a )
+    {
+        size_t x_inside = (x<0) ? 0 : ( (x>=(gt_index_type)array_->get_size(0)) ? array_->get_size(0)-1 : x );
+        size_t y_inside = (y<0) ? 0 : ( (y>=(gt_index_type)array_->get_size(1)) ? array_->get_size(1)-1 : y );
+        size_t z_inside = (z<0) ? 0 : ( (z>=(gt_index_type)array_->get_size(2)) ? array_->get_size(2)-1 : z );
+        size_t s_inside = (s<0) ? 0 : ( (s>=(gt_index_type)array_->get_size(3)) ? array_->get_size(3)-1 : s );
+        size_t p_inside = (p<0) ? 0 : ( (p>=(gt_index_type)array_->get_size(4)) ? array_->get_size(4)-1 : p );
+        size_t r_inside = (r<0) ? 0 : ( (r>=(gt_index_type)array_->get_size(5)) ? array_->get_size(5)-1 : r );
+        size_t a_inside = (a<0) ? 0 : ( (a>=(gt_index_type)array_->get_size(6)) ? array_->get_size(6)-1 : a );
+
+        return (*array_)(x_inside, y_inside, z_inside, s_inside, p_inside, r_inside, a_inside);
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerBorderValue<ArrayType>::T hoNDBoundaryHandlerBorderValue<ArrayType>::operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r, gt_index_type a, gt_index_type q )
+    {
+        size_t x_inside = (x<0) ? 0 : ( (x>=(gt_index_type)array_->get_size(0)) ? array_->get_size(0)-1 : x );
+        size_t y_inside = (y<0) ? 0 : ( (y>=(gt_index_type)array_->get_size(1)) ? array_->get_size(1)-1 : y );
+        size_t z_inside = (z<0) ? 0 : ( (z>=(gt_index_type)array_->get_size(2)) ? array_->get_size(2)-1 : z );
+        size_t s_inside = (s<0) ? 0 : ( (s>=(gt_index_type)array_->get_size(3)) ? array_->get_size(3)-1 : s );
+        size_t p_inside = (p<0) ? 0 : ( (p>=(gt_index_type)array_->get_size(4)) ? array_->get_size(4)-1 : p );
+        size_t r_inside = (r<0) ? 0 : ( (r>=(gt_index_type)array_->get_size(5)) ? array_->get_size(5)-1 : r );
+        size_t a_inside = (a<0) ? 0 : ( (a>=(gt_index_type)array_->get_size(6)) ? array_->get_size(6)-1 : a );
+        size_t q_inside = (q<0) ? 0 : ( (q>=(gt_index_type)array_->get_size(7)) ? array_->get_size(7)-1 : q );
+
+        return (*array_)(x_inside, y_inside, z_inside, s_inside, p_inside, r_inside, a_inside, q_inside);
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerBorderValue<ArrayType>::T hoNDBoundaryHandlerBorderValue<ArrayType>::operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r, gt_index_type a, gt_index_type q, gt_index_type u )
+    {
+        size_t x_inside = (x<0) ? 0 : ( (x>=(gt_index_type)array_->get_size(0)) ? array_->get_size(0)-1 : x );
+        size_t y_inside = (y<0) ? 0 : ( (y>=(gt_index_type)array_->get_size(1)) ? array_->get_size(1)-1 : y );
+        size_t z_inside = (z<0) ? 0 : ( (z>=(gt_index_type)array_->get_size(2)) ? array_->get_size(2)-1 : z );
+        size_t s_inside = (s<0) ? 0 : ( (s>=(gt_index_type)array_->get_size(3)) ? array_->get_size(3)-1 : s );
+        size_t p_inside = (p<0) ? 0 : ( (p>=(gt_index_type)array_->get_size(4)) ? array_->get_size(4)-1 : p );
+        size_t r_inside = (r<0) ? 0 : ( (r>=(gt_index_type)array_->get_size(5)) ? array_->get_size(5)-1 : r );
+        size_t a_inside = (a<0) ? 0 : ( (a>=(gt_index_type)array_->get_size(6)) ? array_->get_size(6)-1 : a );
+        size_t q_inside = (q<0) ? 0 : ( (q>=(gt_index_type)array_->get_size(7)) ? array_->get_size(7)-1 : q );
+        size_t u_inside = (u<0) ? 0 : ( (u>=(gt_index_type)array_->get_size(8)) ? array_->get_size(8)-1 : u );
+
+        return (*array_)(x_inside, y_inside, z_inside, s_inside, p_inside, r_inside, a_inside, q_inside, u_inside);
+    }
+
+    /// hoNDBoundaryHandlerPeriodic
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerPeriodic<ArrayType>::T hoNDBoundaryHandlerPeriodic<ArrayType>::operator()( const std::vector<gt_index_type>& ind )
+    {
+        unsigned int D = (unsigned int)array_->get_number_of_dimensions();
+        std::vector<size_t> indInside(D);
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            if ( (ind[ii]<0) || (ind[ii]>=(gt_index_type)array_->get_size(ii)) )
+            {
+                indInside[ii] = this->mod(ind[ii], array_->get_size(ii));
+            }
+            else
+            {
+                indInside[ii] = ind[ii];
+            }
+        }
+        return (*array_)(indInside);
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerPeriodic<ArrayType>::T hoNDBoundaryHandlerPeriodic<ArrayType>::operator()( gt_index_type x )
+    {
+        size_t x_inside = (x<0 || x>=(gt_index_type)array_->get_size(0)) ? (this->mod(x, (gt_index_type)array_->get_size(0))) : x;
+
+        return (*array_)(x_inside);
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerPeriodic<ArrayType>::T hoNDBoundaryHandlerPeriodic<ArrayType>::operator()( gt_index_type x, gt_index_type y )
+    {
+        size_t x_inside = (x<0 || x>=(gt_index_type)array_->get_size(0)) ? (this->mod(x, (gt_index_type)array_->get_size(0))) : x;
+        size_t y_inside = (y<0 || y>=(gt_index_type)array_->get_size(1)) ? (this->mod(y, (gt_index_type)array_->get_size(1))) : y;
+
+        return (*array_)(x_inside, y_inside);
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerPeriodic<ArrayType>::T hoNDBoundaryHandlerPeriodic<ArrayType>::operator()( gt_index_type x, gt_index_type y, gt_index_type z )
+    {
+        size_t x_inside = (x<0 || x>=(gt_index_type)array_->get_size(0)) ? (this->mod(x, (gt_index_type)array_->get_size(0))) : x;
+        size_t y_inside = (y<0 || y>=(gt_index_type)array_->get_size(1)) ? (this->mod(y, (gt_index_type)array_->get_size(1))) : y;
+        size_t z_inside = (z<0 || z>=(gt_index_type)array_->get_size(2)) ? (this->mod(z, (gt_index_type)array_->get_size(2))) : z;
+
+        return (*array_)(x_inside, y_inside, z_inside);
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerPeriodic<ArrayType>::T hoNDBoundaryHandlerPeriodic<ArrayType>::operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s )
+    {
+        size_t x_inside = (x<0 || x>=(gt_index_type)array_->get_size(0)) ? (this->mod(x, (gt_index_type)array_->get_size(0))) : x;
+        size_t y_inside = (y<0 || y>=(gt_index_type)array_->get_size(1)) ? (this->mod(y, (gt_index_type)array_->get_size(1))) : y;
+        size_t z_inside = (z<0 || z>=(gt_index_type)array_->get_size(2)) ? (this->mod(z, (gt_index_type)array_->get_size(2))) : z;
+        size_t s_inside = (s<0 || s>=(gt_index_type)array_->get_size(3)) ? (this->mod(s, (gt_index_type)array_->get_size(3))) : s;
+
+        return (*array_)(x_inside, y_inside, z_inside, s_inside);
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerPeriodic<ArrayType>::T hoNDBoundaryHandlerPeriodic<ArrayType>::operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p )
+    {
+        size_t x_inside = (x<0 || x>=(gt_index_type)array_->get_size(0)) ? (this->mod(x, (gt_index_type)array_->get_size(0))) : x;
+        size_t y_inside = (y<0 || y>=(gt_index_type)array_->get_size(1)) ? (this->mod(y, (gt_index_type)array_->get_size(1))) : y;
+        size_t z_inside = (z<0 || z>=(gt_index_type)array_->get_size(2)) ? (this->mod(z, (gt_index_type)array_->get_size(2))) : z;
+        size_t s_inside = (s<0 || s>=(gt_index_type)array_->get_size(3)) ? (this->mod(s, (gt_index_type)array_->get_size(3))) : s;
+        size_t p_inside = (p<0 || p>=(gt_index_type)array_->get_size(4)) ? (this->mod(p, (gt_index_type)array_->get_size(4))) : p;
+
+        return (*array_)(x_inside, y_inside, z_inside, s_inside, p_inside);
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerPeriodic<ArrayType>::T hoNDBoundaryHandlerPeriodic<ArrayType>::operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r )
+    {
+        size_t x_inside = (x<0 || x>=(gt_index_type)array_->get_size(0)) ? (this->mod(x, (gt_index_type)array_->get_size(0))) : x;
+        size_t y_inside = (y<0 || y>=(gt_index_type)array_->get_size(1)) ? (this->mod(y, (gt_index_type)array_->get_size(1))) : y;
+        size_t z_inside = (z<0 || z>=(gt_index_type)array_->get_size(2)) ? (this->mod(z, (gt_index_type)array_->get_size(2))) : z;
+        size_t s_inside = (s<0 || s>=(gt_index_type)array_->get_size(3)) ? (this->mod(s, (gt_index_type)array_->get_size(3))) : s;
+        size_t p_inside = (p<0 || p>=(gt_index_type)array_->get_size(4)) ? (this->mod(p, (gt_index_type)array_->get_size(4))) : p;
+        size_t r_inside = (r<0 || r>=(gt_index_type)array_->get_size(5)) ? (this->mod(r, (gt_index_type)array_->get_size(5))) : r;
+
+        return (*array_)(x_inside, y_inside, z_inside, s_inside, p_inside, r_inside);
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerPeriodic<ArrayType>::T hoNDBoundaryHandlerPeriodic<ArrayType>::operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r, gt_index_type a )
+    {
+        size_t x_inside = (x<0 || x>=(gt_index_type)array_->get_size(0)) ? (this->mod(x, (gt_index_type)array_->get_size(0))) : x;
+        size_t y_inside = (y<0 || y>=(gt_index_type)array_->get_size(1)) ? (this->mod(y, (gt_index_type)array_->get_size(1))) : y;
+        size_t z_inside = (z<0 || z>=(gt_index_type)array_->get_size(2)) ? (this->mod(z, (gt_index_type)array_->get_size(2))) : z;
+        size_t s_inside = (s<0 || s>=(gt_index_type)array_->get_size(3)) ? (this->mod(s, (gt_index_type)array_->get_size(3))) : s;
+        size_t p_inside = (p<0 || p>=(gt_index_type)array_->get_size(4)) ? (this->mod(p, (gt_index_type)array_->get_size(4))) : p;
+        size_t r_inside = (r<0 || r>=(gt_index_type)array_->get_size(5)) ? (this->mod(r, (gt_index_type)array_->get_size(5))) : r;
+        size_t a_inside = (a<0 || a>=(gt_index_type)array_->get_size(6)) ? (this->mod(a, (gt_index_type)array_->get_size(6))) : a;
+
+        return (*array_)(x_inside, y_inside, z_inside, s_inside, p_inside, r_inside, a_inside);
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerPeriodic<ArrayType>::T hoNDBoundaryHandlerPeriodic<ArrayType>::operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r, gt_index_type a, gt_index_type q )
+    {
+        size_t x_inside = (x<0 || x>=(gt_index_type)array_->get_size(0)) ? (this->mod(x, (gt_index_type)array_->get_size(0))) : x;
+        size_t y_inside = (y<0 || y>=(gt_index_type)array_->get_size(1)) ? (this->mod(y, (gt_index_type)array_->get_size(1))) : y;
+        size_t z_inside = (z<0 || z>=(gt_index_type)array_->get_size(2)) ? (this->mod(z, (gt_index_type)array_->get_size(2))) : z;
+        size_t s_inside = (s<0 || s>=(gt_index_type)array_->get_size(3)) ? (this->mod(s, (gt_index_type)array_->get_size(3))) : s;
+        size_t p_inside = (p<0 || p>=(gt_index_type)array_->get_size(4)) ? (this->mod(p, (gt_index_type)array_->get_size(4))) : p;
+        size_t r_inside = (r<0 || r>=(gt_index_type)array_->get_size(5)) ? (this->mod(r, (gt_index_type)array_->get_size(5))) : r;
+        size_t a_inside = (a<0 || a>=(gt_index_type)array_->get_size(6)) ? (this->mod(a, (gt_index_type)array_->get_size(6))) : a;
+        size_t q_inside = (q<0 || q>=(gt_index_type)array_->get_size(7)) ? (this->mod(q, (gt_index_type)array_->get_size(7))) : q;
+
+        return (*array_)(x_inside, y_inside, z_inside, s_inside, p_inside, r_inside, a_inside, q_inside);
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerPeriodic<ArrayType>::T hoNDBoundaryHandlerPeriodic<ArrayType>::operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r, gt_index_type a, gt_index_type q, gt_index_type u )
+    {
+        size_t x_inside = (x<0 || x>=(gt_index_type)array_->get_size(0)) ? (this->mod(x, (gt_index_type)array_->get_size(0))) : x;
+        size_t y_inside = (y<0 || y>=(gt_index_type)array_->get_size(1)) ? (this->mod(y, (gt_index_type)array_->get_size(1))) : y;
+        size_t z_inside = (z<0 || z>=(gt_index_type)array_->get_size(2)) ? (this->mod(z, (gt_index_type)array_->get_size(2))) : z;
+        size_t s_inside = (s<0 || s>=(gt_index_type)array_->get_size(3)) ? (this->mod(s, (gt_index_type)array_->get_size(3))) : s;
+        size_t p_inside = (p<0 || p>=(gt_index_type)array_->get_size(4)) ? (this->mod(p, (gt_index_type)array_->get_size(4))) : p;
+        size_t r_inside = (r<0 || r>=(gt_index_type)array_->get_size(5)) ? (this->mod(r, (gt_index_type)array_->get_size(5))) : r;
+        size_t a_inside = (a<0 || a>=(gt_index_type)array_->get_size(6)) ? (this->mod(a, (gt_index_type)array_->get_size(6))) : a;
+        size_t q_inside = (q<0 || q>=(gt_index_type)array_->get_size(7)) ? (this->mod(q, (gt_index_type)array_->get_size(7))) : q;
+        size_t u_inside = (u<0 || u>=(gt_index_type)array_->get_size(8)) ? (this->mod(u, (gt_index_type)array_->get_size(8))) : u;
+
+        return (*array_)(x_inside, y_inside, z_inside, s_inside, p_inside, r_inside, a_inside, q_inside, u_inside);
+    }
+
+    /// hoNDBoundaryHandlerMirror
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerMirror<ArrayType>::T hoNDBoundaryHandlerMirror<ArrayType>::operator()( const std::vector<gt_index_type>& ind )
+    {
+        unsigned int D = array_->get_number_of_dimensions();
+        std::vector<size_t> indInside(D);
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            if ( ind[ii] < 0 )
+            {
+                indInside[ii] = -ind[ii];
+            }
+            else if ( ind[ii] >= (gt_index_type)array_->get_size(ii) )
+            {
+                indInside[ii] = 2*(gt_index_type)array_->get_size(ii) - ind[ii] -2;
+            }
+            else
+            {
+                indInside[ii] = ind[ii];
+            }
+        }
+        return (*array_)(indInside);
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerMirror<ArrayType>::T hoNDBoundaryHandlerMirror<ArrayType>::operator()( gt_index_type x )
+    {
+        size_t x_inside = (x<0) ? -x : ( (x>=(gt_index_type)array_->get_size(0)) ? (2*array_->get_size(0)-x-2) : x );
+
+        return (*array_)(x_inside);
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerMirror<ArrayType>::T hoNDBoundaryHandlerMirror<ArrayType>::operator()( gt_index_type x, gt_index_type y )
+    {
+        size_t x_inside = (x<0) ? -x : ( (x>=(gt_index_type)array_->get_size(0)) ? (2*array_->get_size(0)-x-2) : x );
+        size_t y_inside = (y<0) ? -y : ( (y>=(gt_index_type)array_->get_size(1)) ? (2*array_->get_size(1)-y-2) : y );
+
+        return (*array_)(x_inside, y_inside);
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerMirror<ArrayType>::T hoNDBoundaryHandlerMirror<ArrayType>::operator()( gt_index_type x, gt_index_type y, gt_index_type z )
+    {
+        size_t x_inside = (x<0) ? -x : ( (x>=(gt_index_type)array_->get_size(0)) ? (2*array_->get_size(0)-x-2) : x );
+        size_t y_inside = (y<0) ? -y : ( (y>=(gt_index_type)array_->get_size(1)) ? (2*array_->get_size(1)-y-2) : y );
+        size_t z_inside = (z<0) ? -z : ( (z>=(gt_index_type)array_->get_size(2)) ? (2*array_->get_size(2)-z-2) : z );
+
+        return (*array_)(x_inside, y_inside, z_inside);
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerMirror<ArrayType>::T hoNDBoundaryHandlerMirror<ArrayType>::operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s )
+    {
+        size_t x_inside = (x<0) ? -x : ( (x>=(gt_index_type)array_->get_size(0)) ? (2*array_->get_size(0)-x-2) : x );
+        size_t y_inside = (y<0) ? -y : ( (y>=(gt_index_type)array_->get_size(1)) ? (2*array_->get_size(1)-y-2) : y );
+        size_t z_inside = (z<0) ? -z : ( (z>=(gt_index_type)array_->get_size(2)) ? (2*array_->get_size(2)-z-2) : z );
+        size_t s_inside = (s<0) ? -s : ( (s>=(gt_index_type)array_->get_size(3)) ? (2*array_->get_size(3)-s-2) : s );
+
+        return (*array_)(x_inside, y_inside, z_inside, s_inside);
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerMirror<ArrayType>::T hoNDBoundaryHandlerMirror<ArrayType>::operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p )
+    {
+        size_t x_inside = (x<0) ? -x : ( (x>=(gt_index_type)array_->get_size(0)) ? (2*array_->get_size(0)-x-2) : x );
+        size_t y_inside = (y<0) ? -y : ( (y>=(gt_index_type)array_->get_size(1)) ? (2*array_->get_size(1)-y-2) : y );
+        size_t z_inside = (z<0) ? -z : ( (z>=(gt_index_type)array_->get_size(2)) ? (2*array_->get_size(2)-z-2) : z );
+        size_t s_inside = (s<0) ? -s : ( (s>=(gt_index_type)array_->get_size(3)) ? (2*array_->get_size(3)-s-2) : s );
+        size_t p_inside = (p<0) ? -p : ( (p>=(gt_index_type)array_->get_size(4)) ? (2*array_->get_size(4)-p-2) : p );
+
+        return (*array_)(x_inside, y_inside, z_inside, s_inside, p_inside);
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerMirror<ArrayType>::T hoNDBoundaryHandlerMirror<ArrayType>::operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r )
+    {
+        size_t x_inside = (x<0) ? -x : ( (x>=(gt_index_type)array_->get_size(0)) ? (2*array_->get_size(0)-x-2) : x );
+        size_t y_inside = (y<0) ? -y : ( (y>=(gt_index_type)array_->get_size(1)) ? (2*array_->get_size(1)-y-2) : y );
+        size_t z_inside = (z<0) ? -z : ( (z>=(gt_index_type)array_->get_size(2)) ? (2*array_->get_size(2)-z-2) : z );
+        size_t s_inside = (s<0) ? -s : ( (s>=(gt_index_type)array_->get_size(3)) ? (2*array_->get_size(3)-s-2) : s );
+        size_t p_inside = (p<0) ? -p : ( (p>=(gt_index_type)array_->get_size(4)) ? (2*array_->get_size(4)-p-2) : p );
+        size_t r_inside = (r<0) ? -r : ( (r>=(gt_index_type)array_->get_size(5)) ? (2*array_->get_size(5)-r-2) : r );
+
+        return (*array_)(x_inside, y_inside, z_inside, s_inside, p_inside, r_inside);
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerMirror<ArrayType>::T hoNDBoundaryHandlerMirror<ArrayType>::operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r, gt_index_type a )
+    {
+        size_t x_inside = (x<0) ? -x : ( (x>=(gt_index_type)array_->get_size(0)) ? (2*array_->get_size(0)-x-2) : x );
+        size_t y_inside = (y<0) ? -y : ( (y>=(gt_index_type)array_->get_size(1)) ? (2*array_->get_size(1)-y-2) : y );
+        size_t z_inside = (z<0) ? -z : ( (z>=(gt_index_type)array_->get_size(2)) ? (2*array_->get_size(2)-z-2) : z );
+        size_t s_inside = (s<0) ? -s : ( (s>=(gt_index_type)array_->get_size(3)) ? (2*array_->get_size(3)-s-2) : s );
+        size_t p_inside = (p<0) ? -p : ( (p>=(gt_index_type)array_->get_size(4)) ? (2*array_->get_size(4)-p-2) : p );
+        size_t r_inside = (r<0) ? -r : ( (r>=(gt_index_type)array_->get_size(5)) ? (2*array_->get_size(5)-r-2) : r );
+        size_t a_inside = (a<0) ? -a : ( (a>=(gt_index_type)array_->get_size(6)) ? (2*array_->get_size(6)-a-2) : a );
+
+        return (*array_)(x_inside, y_inside, z_inside, s_inside, p_inside, r_inside, a_inside);
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerMirror<ArrayType>::T hoNDBoundaryHandlerMirror<ArrayType>::operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r, gt_index_type a, gt_index_type q )
+    {
+        size_t x_inside = (x<0) ? -x : ( (x>=(gt_index_type)array_->get_size(0)) ? (2*array_->get_size(0)-x-2) : x );
+        size_t y_inside = (y<0) ? -y : ( (y>=(gt_index_type)array_->get_size(1)) ? (2*array_->get_size(1)-y-2) : y );
+        size_t z_inside = (z<0) ? -z : ( (z>=(gt_index_type)array_->get_size(2)) ? (2*array_->get_size(2)-z-2) : z );
+        size_t s_inside = (s<0) ? -s : ( (s>=(gt_index_type)array_->get_size(3)) ? (2*array_->get_size(3)-s-2) : s );
+        size_t p_inside = (p<0) ? -p : ( (p>=(gt_index_type)array_->get_size(4)) ? (2*array_->get_size(4)-p-2) : p );
+        size_t r_inside = (r<0) ? -r : ( (r>=(gt_index_type)array_->get_size(5)) ? (2*array_->get_size(5)-r-2) : r );
+        size_t a_inside = (a<0) ? -a : ( (a>=(gt_index_type)array_->get_size(6)) ? (2*array_->get_size(6)-a-2) : a );
+        size_t q_inside = (q<0) ? -q : ( (q>=(gt_index_type)array_->get_size(7)) ? (2*array_->get_size(7)-q-2) : q );
+
+        return (*array_)(x_inside, y_inside, z_inside, s_inside, p_inside, r_inside, a_inside, q_inside);
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDBoundaryHandlerMirror<ArrayType>::T hoNDBoundaryHandlerMirror<ArrayType>::operator()( gt_index_type x, gt_index_type y, gt_index_type z, gt_index_type s, gt_index_type p, gt_index_type r, gt_index_type a, gt_index_type q, gt_index_type u )
+    {
+        size_t x_inside = (x<0) ? -x : ( (x>=(gt_index_type)array_->get_size(0)) ? (2*array_->get_size(0)-x-2) : x );
+        size_t y_inside = (y<0) ? -y : ( (y>=(gt_index_type)array_->get_size(1)) ? (2*array_->get_size(1)-y-2) : y );
+        size_t z_inside = (z<0) ? -z : ( (z>=(gt_index_type)array_->get_size(2)) ? (2*array_->get_size(2)-z-2) : z );
+        size_t s_inside = (s<0) ? -s : ( (s>=(gt_index_type)array_->get_size(3)) ? (2*array_->get_size(3)-s-2) : s );
+        size_t p_inside = (p<0) ? -p : ( (p>=(gt_index_type)array_->get_size(4)) ? (2*array_->get_size(4)-p-2) : p );
+        size_t r_inside = (r<0) ? -r : ( (r>=(gt_index_type)array_->get_size(5)) ? (2*array_->get_size(5)-r-2) : r );
+        size_t a_inside = (a<0) ? -a : ( (a>=(gt_index_type)array_->get_size(6)) ? (2*array_->get_size(6)-a-2) : a );
+        size_t q_inside = (q<0) ? -q : ( (q>=(gt_index_type)array_->get_size(7)) ? (2*array_->get_size(7)-q-2) : q );
+        size_t u_inside = (u<0) ? -u : ( (u>=(gt_index_type)array_->get_size(8)) ? (2*array_->get_size(8)-u-2) : u );
+
+        return (*array_)(x_inside, y_inside, z_inside, s_inside, p_inside, r_inside, a_inside, q_inside, u_inside);
+    }
+}
diff --git a/toolboxes/core/cpu/hoNDFFT.cpp b/toolboxes/core/cpu/hoNDFFT.cpp
deleted file mode 100644
index 3a0f431..0000000
--- a/toolboxes/core/cpu/hoNDFFT.cpp
+++ /dev/null
@@ -1,1713 +0,0 @@
-/*
- * hoNDFFT.cpp
- *
- *  Created on: Nov 29, 2011
- *      Author: hansenms
- */
-
-#include "hoNDFFT.h"
-#include "hoMatrix.h"
-
-namespace Gadgetron{
-
-  template<typename T> hoNDFFT<T>* hoNDFFT<T>::instance()
-  {
-    if (!instance_) instance_ = new hoNDFFT<T>();
-    return instance_;
-  }
-  
-  template<class T> hoNDFFT<T>* hoNDFFT<T>::instance_ = NULL;
-
-  template<class T> void hoNDFFT<T>::fft_int(hoNDArray< std::complex<T> >* input, size_t dim_to_transform, int sign)
-  {
-    if (sign != -1 && sign != 1) return;
-    if (dim_to_transform >= input->get_number_of_dimensions()) return;
-
-    int stride     = 1;           //Distance between points in transform
-    int dist       = 1;           //Distance between vectors
-    int trafos     = 1;           //Transformations per chunk
-    int chunks     = 1;           //Number of chunks
-    int chunk_size = 1;           //Points per chunk
-    int length     = 1;           //Length of each transform
-    int total_dist = 1;
-
-    T scale = 0.0;
-
-    void* fft_plan        = 0;
-    T*    fft_storage     = 0;
-
-    T* fft_buffer = 0;
-    T* data_ptr = 0;
-
-    //Set sizes
-    length = input->get_size(dim_to_transform);
-
-    if (sign == 1)
-      {
-        scale = 1.0/length;
-      }
-    else
-      {
-        scale = 1.0;
-      }
-
-    if (dim_to_transform != 0)
-      {
-        for (size_t i = 0; i < dim_to_transform; i++)
-          {
-            chunk_size *= input->get_size(i);
-          }
-        stride = chunk_size;
-        trafos = chunk_size;
-        chunk_size *= length;
-
-        for (size_t i = dim_to_transform+1; i < input->get_number_of_dimensions(); i++)
-          {
-            chunks *= input->get_size(i);
-          }
-      }
-    else
-      {
-        for (size_t i = 1; i < input->get_number_of_dimensions(); i++)
-          {
-            trafos *= input->get_size(i);
-          }
-        chunk_size = trafos*length;
-
-        dist = length;
-      }
-
-    //*2 real and imag
-    chunk_size *= 2;
-    dist *= 2;
-    total_dist = trafos*dist;
-
-
-    //Allocate storage and make plan
-    {
-      mutex_.lock();
-      fft_storage = (T*)fftw_malloc_ptr_(sizeof(T)*length*2);
-      if (fft_storage == 0)
-        {
-          std::cout << "Failed to allocate buffer for FFT" << std::endl;
-          return;
-        }
-      fft_buffer = (T*)fft_storage;
-
-      unsigned planner_flags = FFTW_MEASURE | FFTW_DESTROY_INPUT;
-
-      fft_plan = fftw_plan_dft_1d_ptr_(length, fft_storage, fft_storage, sign, planner_flags);
-
-      if (fft_plan == 0)
-        {
-          fftw_free_ptr_(fft_storage);
-          std::cout << "Failed to create plan for FFT" << std::endl;
-          return;
-        }
-      mutex_.unlock();
-    }
-
-    //Grab address of data
-    data_ptr = reinterpret_cast<T*>(input->get_data_ptr());
-
-    register int idx1_max = chunks*chunk_size;
-    register int idx1, idx2;       //Index variables
-    register int idx2_limit;
-    register int middle_point = ((length+1)>>1)<<1;
-    register int length2 = length<<1;
-    register int stride2 = stride<<1;
-
-    for (idx1 = 0; idx1 < idx1_max; idx1+=chunk_size) //Loop over all chunks
-      {
-        idx2_limit = idx1+total_dist;
-        for (idx2 = idx1; idx2 < idx2_limit; idx2+=dist) //Loop over all transformations
-          {
-            ///Copy data to buffer.
-            {
-              register int j, idx3 = idx2;
-              for (j = middle_point; j < length2; idx3+=stride2)
-                {
-                  fft_buffer[j++] = data_ptr[idx3  ];
-                  fft_buffer[j++] = data_ptr[idx3+1];
-                }
-              for (j = 0; j < middle_point; idx3+=stride2)
-                {
-                  fft_buffer[j++] = data_ptr[idx3  ];
-                  fft_buffer[j++] = data_ptr[idx3+1];
-                }
-            }
-
-            fftw_execute_ptr_(fft_plan);
-
-            {
-              register int j, idx3 = idx2;
-
-              for (j = middle_point; j < length2; idx3+=stride2)
-                {
-                  data_ptr[idx3  ] = fft_buffer[j++]*scale;
-                  data_ptr[idx3+1] = fft_buffer[j++]*scale;
-                }
-              for (j = 0; j < middle_point; idx3+=stride2)
-                {
-                  data_ptr[idx3  ] = fft_buffer[j++]*scale;
-                  data_ptr[idx3+1] = fft_buffer[j++]*scale;
-                }
-            }
-
-          } //Loop over transformations
-      } //Loop over chunks
-
-    //clean up
-    {
-      mutex_.lock();
-      if (fft_plan != 0)
-        {
-          fftw_destroy_plan_ptr_(fft_plan);
-        }
-
-      if (fft_storage != 0)
-        {
-          fftw_free_ptr_(fft_storage);
-        }
-      mutex_.unlock();
-    }
-  }
-  
-  template<> void hoNDFFT<float>::set_function_pointers()
-  {
-    fftw_import_wisdom_from_file_ptr_ = &fftwf_import_wisdom_from_file;
-    fftw_export_wisdom_to_file_ptr_ = &fftwf_export_wisdom_to_file;
-    fftw_cleanup_ptr_ = &fftwf_cleanup;
-    fftw_malloc_ptr_ = &fftwf_malloc;
-    fftw_free_ptr_ = &fftwf_free;
-    fftw_execute_ptr_ = (void (*)(void*))(&fftwf_execute);
-    fftw_plan_dft_1d_ptr_ = (void* (*)(int, void*, void*, int, unsigned))(&fftwf_plan_dft_1d);
-    fftw_destroy_plan_ptr_ = (void (*)(void*))(&fftwf_destroy_plan);
-  }
-
-  template<> void hoNDFFT<double>::set_function_pointers()
-  {
-    fftw_import_wisdom_from_file_ptr_ = &fftw_import_wisdom_from_file;
-    fftw_export_wisdom_to_file_ptr_ = &fftw_export_wisdom_to_file;
-    fftw_cleanup_ptr_ = &fftw_cleanup;
-    fftw_malloc_ptr_ = &fftw_malloc;
-    fftw_free_ptr_ = &fftw_free;
-    fftw_execute_ptr_ = (void (*)(void*))(&fftw_execute);
-    fftw_plan_dft_1d_ptr_ = (void* (*)(int, void*, void*, int, unsigned))(&fftw_plan_dft_1d);
-    fftw_destroy_plan_ptr_ = (void (*)(void*))(&fftw_destroy_plan);
-  }
-
-  template<typename T> 
-  inline size_t hoNDFFT<T>::fftshiftPivot(size_t x)
-  {
-    return (size_t)(ceil(x*0.5));
-  }
-
-  template<typename T> 
-  inline size_t hoNDFFT<T>::ifftshiftPivot(size_t x)
-  {
-    return (size_t)(floor(x*0.5));
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::fftshift1D(const ComplexType* a, ComplexType* r, size_t x, size_t pivot)
-  {
-    try
-      {
-        memcpy(r, a+pivot, sizeof(ComplexType)*(x-pivot));
-        memcpy(r+x-pivot, a, sizeof(ComplexType)*pivot);
-      }
-    catch(...)
-      {
-        GADGET_ERROR_MSG("Errors in hoNDFFT<T>::fftshift1D(const ComplexType* a, ComplexType* r, size_t x, size_t pivot) ...");
-        return false;
-      }
-
-    return true;
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::ifftshift1D(const ComplexType* a, ComplexType* r, size_t x, size_t pivot)
-  {
-    return fftshift1D(a, r, x, pivot);
-  }
-
-  template<typename T> 
-  bool hoNDFFT<T>::fftshiftPivot1D(ComplexType* a, size_t x, size_t n, size_t pivot)
-  {
-    try
-      {
-        long long counter;
-
-#pragma omp parallel private(counter) shared(n, x, pivot, a)
-        {
-          hoNDArray< ComplexType > aTmp(x);
-
-#pragma omp for
-          for ( counter=0; counter<(long long)n; counter++ )
-            {
-              fftshift1D(a+counter*x, aTmp.begin(), x, pivot);
-              memcpy(a+counter*x, aTmp.begin(), sizeof(ComplexType)*x);
-            }
-        }
-      }
-    catch(...)
-      {
-        GADGET_ERROR_MSG("Errors in hoNDFFT<T>::fftshiftPivot1D(ComplexType* a, size_t x, size_t n, size_t pivot) ...");
-        return false;
-      }
-
-    return true;
-  }
-
-  template<typename T> 
-  bool hoNDFFT<T>::fftshiftPivot1D(const ComplexType* a, ComplexType* r, size_t x, size_t n, size_t pivot)
-  {
-    try
-      {
-        long long counter;
-
-#pragma omp parallel for private(counter) shared(n, x, pivot, a, r)
-        for ( counter=0; counter<(long long)n; counter++ )
-          {
-            fftshift1D(a+counter*x, r+counter*x, x, pivot);
-          }
-      }
-    catch(...)
-      {
-        GADGET_ERROR_MSG("Errors in hoNDFFT<T>::fftshiftPivot1D(const ComplexType* a, ComplexType* r, size_t x, size_t n, size_t pivot) ...");
-        return false;
-      }
-
-    return true;
-  }
-
-  template<typename T> 
-  bool hoNDFFT<T>::fftshift1D(hoNDArray< ComplexType >& a)
-  {
-    try
-      {
-        size_t x = a.get_size(0);
-        size_t pivot = fftshiftPivot(x);
-        size_t numOfShifts = a.get_number_of_elements()/x;
-
-        GADGET_CHECK_RETURN_FALSE(fftshiftPivot1D(a.begin(), x, numOfShifts, pivot));
-      }
-    catch(...)
-      {
-        GADGET_ERROR_MSG("Errors in hoNDFFT<T>::fftshift1D(hoNDArray< ComplexType >& a) ...");
-        return false;
-      }
-
-    return true;
-  }
-
-  template<typename T> 
-  bool hoNDFFT<T>::fftshift1D(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r)
-  {
-    try
-      {
-        if ( !r.dimensions_equal(&a) )
-          {
-            r = a;
-          }
-
-        size_t x = a.get_size(0);
-        size_t pivot = fftshiftPivot(x);
-        size_t numOfShifts = a.get_number_of_elements()/x;
-
-        GADGET_CHECK_RETURN_FALSE(fftshiftPivot1D(a.begin(), r.begin(), x, numOfShifts, pivot));
-      }
-    catch(...)
-      {
-        GADGET_ERROR_MSG("Errors in hoNDFFT<T>::fftshift1D(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r) ...");
-        return false;
-      }
-
-    return true;
-  }
-
-  template<typename T> 
-  bool hoNDFFT<T>::ifftshift1D(hoNDArray< ComplexType >& a)
-  {
-    try
-      {
-        size_t x = a.get_size(0);
-        size_t pivot = ifftshiftPivot(x);
-        size_t numOfShifts = a.get_number_of_elements()/x;
-
-        GADGET_CHECK_RETURN_FALSE(fftshiftPivot1D(a.begin(), x, numOfShifts, pivot));
-      }
-    catch(...)
-      {
-        GADGET_ERROR_MSG("Errors in hoNDFFT<T>::ifftshift1D(hoNDArray< ComplexType >& a) ...");
-        return false;
-      }
-
-    return true;
-  }
-
-  template<typename T> 
-  bool hoNDFFT<T>::ifftshift1D(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r)
-  {
-    try
-      {
-        if ( !r.dimensions_equal(&a) )
-          {
-            r = a;
-          }
-
-        size_t x = a.get_size(0);
-        size_t pivot = ifftshiftPivot(x);
-        size_t numOfShifts = a.get_number_of_elements()/x;
-
-        GADGET_CHECK_RETURN_FALSE(fftshiftPivot1D(a.begin(), r.begin(), x, numOfShifts, pivot));
-      }
-    catch(...)
-      {
-        GADGET_ERROR_MSG("Errors in hoNDFFT<T>::ifftshift1D(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r) ...");
-        return false;
-      }
-
-    return true;
-  }
-
-  template<typename T> 
-  bool hoNDFFT<T>::fftshiftPivot2D(const ComplexType* a, ComplexType* r, size_t x, size_t y, size_t n, unsigned pivotx, unsigned pivoty)
-  {
-    try
-      {
-        GADGET_CHECK_RETURN_FALSE(a!=NULL);
-        GADGET_CHECK_RETURN_FALSE(r!=NULL);
-
-        long long tt;
-
-#pragma omp parallel for private(tt) shared(a, r, x, y, n, pivotx, pivoty) if (n>1)
-        for ( tt=0; tt<(long long)n; tt++ )
-          {
-            const ComplexType* ac = a + tt*x*y;
-            ComplexType* rc = r + tt*x*y;
-
-            size_t ay, ry;
-
-            for ( ay=pivoty; ay<y; ay++ )
-              {
-                ry = ay - pivoty;
-                memcpy(rc+ry*x, ac+ay*x+pivotx, sizeof(ComplexType)*(x-pivotx));
-                memcpy(rc+ry*x+x-pivotx, ac+ay*x, sizeof(ComplexType)*pivotx);
-              }
-
-            for ( ay=0; ay<pivoty; ay++ )
-              {
-                ry = ay + y - pivoty;
-                memcpy(rc+ry*x, ac+ay*x+pivotx, sizeof(ComplexType)*(x-pivotx));
-                memcpy(rc+ry*x+x-pivotx, ac+ay*x, sizeof(ComplexType)*pivotx);
-              }
-          }
-      }
-    catch(...)
-      {
-        GADGET_ERROR_MSG("Errors in hoNDFFT<T>::fftshiftPivot2D(const ComplexType* a, ComplexType* r, size_t x, size_t y, size_t n, unsigned pivotx, unsigned pivoty) ...");
-        return false;
-      }
-
-    return true;
-  }
-
-  template<typename T> 
-  bool hoNDFFT<T>::fftshiftPivot2D(ComplexType* a, size_t x, size_t y, size_t n, unsigned pivotx, unsigned pivoty)
-  {
-    try
-      {
-        GADGET_CHECK_RETURN_FALSE(a!=NULL);
-
-        long long tt;
-
-#pragma omp parallel private(tt) shared(a, x, y, n, pivotx, pivoty) if (n>1)
-        {
-          hoNDArray< ComplexType > aTmp(x*y);
-          ComplexType* rc = aTmp.begin();
-
-#pragma omp for
-          for ( tt=0; tt<(long long)n; tt++ )
-            {
-              ComplexType* ac = a + tt*x*y;
-
-              size_t ay, ry;
-
-              for ( ay=pivoty; ay<y; ay++ )
-                {
-                  ry = ay - pivoty;
-                  memcpy(rc+ry*x, ac+ay*x+pivotx, sizeof(ComplexType)*(x-pivotx));
-                  memcpy(rc+ry*x+x-pivotx, ac+ay*x, sizeof(ComplexType)*pivotx);
-                }
-
-              for ( ay=0; ay<pivoty; ay++ )
-                {
-                  ry = ay + y - pivoty;
-                  memcpy(rc+ry*x, ac+ay*x+pivotx, sizeof(ComplexType)*(x-pivotx));
-                  memcpy(rc+ry*x+x-pivotx, ac+ay*x, sizeof(ComplexType)*pivotx);
-                }
-
-              memcpy(ac, rc, sizeof(ComplexType)*x*y);
-            }
-        }
-      }
-    catch(...)
-      {
-        GADGET_ERROR_MSG("Errors in hoNDFFT<T>::fftshiftPivot2D(ComplexType* a, size_t x, size_t y, size_t n, unsigned pivotx, unsigned pivoty) ...");
-        return false;
-      }
-
-    return true;
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::fftshift2D(const ComplexType* a, ComplexType* r, size_t x, size_t y, size_t n)
-  {
-    try
-      {
-        GADGET_CHECK_RETURN_FALSE(a!=NULL);
-        GADGET_CHECK_RETURN_FALSE(r!=NULL);
-
-        unsigned pivotx = fftshiftPivot(x);
-        unsigned pivoty = fftshiftPivot(y);
-
-        GADGET_CHECK_RETURN_FALSE(fftshiftPivot2D(a, r, x, y, n, pivotx, pivoty));
-      }
-    catch(...)
-      {
-        GADGET_ERROR_MSG("Errors in hoNDFFT<T>::fftshift2D(const ComplexType* a, ComplexType* r, size_t x, size_t y, size_t n) ...");
-        return false;
-      }
-
-    return true;
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::ifftshift2D(const ComplexType* a, ComplexType* r, size_t x, size_t y, size_t n)
-  {
-    try
-      {
-        GADGET_CHECK_RETURN_FALSE(a!=NULL);
-        GADGET_CHECK_RETURN_FALSE(r!=NULL);
-
-        unsigned pivotx = ifftshiftPivot(x);
-        unsigned pivoty = ifftshiftPivot(y);
-
-        GADGET_CHECK_RETURN_FALSE(fftshiftPivot2D(a, r, x, y, n, pivotx, pivoty));
-      }
-    catch(...)
-      {
-        GADGET_ERROR_MSG("Errors in hoNDFFT<T>::ifftshift2D(const ComplexType* a, ComplexType* r, size_t x, size_t y, size_t n) ...");
-        return false;
-      }
-
-    return true;
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::fftshift2D(ComplexType* a, size_t x, size_t y, size_t n)
-  {
-    try
-      {
-        GADGET_CHECK_RETURN_FALSE(a!=NULL);
-
-        unsigned pivotx = fftshiftPivot(x);
-        unsigned pivoty = fftshiftPivot(y);
-
-        GADGET_CHECK_RETURN_FALSE(fftshiftPivot2D(a, x, y, n, pivotx, pivoty));
-      }
-    catch(...)
-      {
-        GADGET_ERROR_MSG("Errors in hoNDFFT<T>::fftshift2D(ComplexType* a, size_t x, size_t y, size_t n) ...");
-        return false;
-      }
-
-    return true;
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::ifftshift2D(ComplexType* a, size_t x, size_t y, size_t n)
-  {
-    try
-      {
-        GADGET_CHECK_RETURN_FALSE(a!=NULL);
-
-        unsigned pivotx = ifftshiftPivot(x);
-        unsigned pivoty = ifftshiftPivot(y);
-
-        GADGET_CHECK_RETURN_FALSE(fftshiftPivot2D(a, x, y, n, pivotx, pivoty));
-      }
-    catch(...)
-      {
-        GADGET_ERROR_MSG("Errors in hoNDFFT<T>::ifftshift2D(ComplexType* a, size_t x, size_t y, size_t n) ...");
-        return false;
-      }
-
-    return true;
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::fftshift2D(hoNDArray< ComplexType >& a)
-  {
-    size_t n = a.get_number_of_elements()/(a.get_size(0)*a.get_size(1));
-    return fftshift2D(a.begin(), a.get_size(0), a.get_size(1), n);
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::fftshift2D(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r)
-  {
-    if ( !r.dimensions_equal(&a) )
-      {
-        r = a;
-      }
-
-    size_t n = a.get_number_of_elements()/(a.get_size(0)*a.get_size(1));
-    return fftshift2D(a.begin(), r.begin(), a.get_size(0), a.get_size(1), n);
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::ifftshift2D(hoNDArray< ComplexType >& a)
-  {
-    size_t n = a.get_number_of_elements()/(a.get_size(0)*a.get_size(1));
-    return ifftshift2D(a.begin(), a.get_size(0), a.get_size(1), n);
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::ifftshift2D(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r)
-  {
-    if ( !r.dimensions_equal(&a) )
-      {
-        r = a;
-      }
-
-    size_t n = a.get_number_of_elements()/(a.get_size(0)*a.get_size(1));
-    return ifftshift2D(a.begin(), r.begin(), a.get_size(0), a.get_size(1), n);
-  }
-
-  template<typename T> 
-  bool hoNDFFT<T>::fftshiftPivot3D(const ComplexType* a, ComplexType* r, size_t x, size_t y, size_t z, size_t n, unsigned pivotx, unsigned pivoty,  unsigned pivotz)
-  {
-    try
-      {
-        GADGET_CHECK_RETURN_FALSE(a!=NULL);
-        GADGET_CHECK_RETURN_FALSE(r!=NULL);
-
-        long long tt;
-
-#pragma omp parallel for private(tt) shared(a, r, x, y, z, n, pivotx, pivoty, pivotz) if (n>1)
-        for ( tt=0; tt<(long long)n; tt++ )
-          {
-            size_t ay, ry, az, rz;
-
-            for ( az=pivotz; az<z; az++ )
-              {
-                rz = az - pivotz;
-
-                const ComplexType* ac = a + tt*x*y*z + az*x*y;
-                ComplexType* rc = r + tt*x*y*z + rz*x*y;
-
-                for ( ay=pivoty; ay<y; ay++ )
-                  {
-                    ry = ay - pivoty;
-                    memcpy(rc+ry*x, ac+ay*x+pivotx, sizeof(ComplexType)*(x-pivotx));
-                    memcpy(rc+ry*x+x-pivotx, ac+ay*x, sizeof(ComplexType)*pivotx);
-                  }
-
-                for ( ay=0; ay<pivoty; ay++ )
-                  {
-                    ry = ay + y - pivoty;
-                    memcpy(rc+ry*x, ac+ay*x+pivotx, sizeof(ComplexType)*(x-pivotx));
-                    memcpy(rc+ry*x+x-pivotx, ac+ay*x, sizeof(ComplexType)*pivotx);
-                  }
-              }
-
-            for ( az=0; az<pivotz; az++ )
-              {
-                rz = az + z - pivotz;
-
-                const ComplexType* ac = a + tt*x*y*z + az*x*y;
-                ComplexType* rc = r + tt*x*y*z + rz*x*y;
-
-                for ( ay=pivoty; ay<y; ay++ )
-                  {
-                    ry = ay - pivoty;
-                    memcpy(rc+ry*x, ac+ay*x+pivotx, sizeof(ComplexType)*(x-pivotx));
-                    memcpy(rc+ry*x+x-pivotx, ac+ay*x, sizeof(ComplexType)*pivotx);
-                  }
-
-                for ( ay=0; ay<pivoty; ay++ )
-                  {
-                    ry = ay + y - pivoty;
-                    memcpy(rc+ry*x, ac+ay*x+pivotx, sizeof(ComplexType)*(x-pivotx));
-                    memcpy(rc+ry*x+x-pivotx, ac+ay*x, sizeof(ComplexType)*pivotx);
-                  }
-              }
-          }
-      }
-    catch(...)
-      {
-        GADGET_ERROR_MSG("Errors in hoNDFFT<T>::fftshiftPivot3D(const ComplexType* a, ComplexType* r, size_t x, size_t y, size_t z, size_t n, unsigned pivotx, unsigned pivoty,  unsigned pivotz) ...");
-        return false;
-      }
-
-    return true;
-  }
-
-  template<typename T> 
-  bool hoNDFFT<T>::fftshiftPivot3D(ComplexType* a, size_t x, size_t y, size_t z, size_t n, unsigned pivotx, unsigned pivoty,  unsigned pivotz)
-  {
-    try
-      {
-        GADGET_CHECK_RETURN_FALSE(a!=NULL);
-
-        long long tt;
-
-#pragma omp parallel private(tt) shared(a, x, y, z, n, pivotx, pivoty, pivotz) if (n>1)
-        {
-          hoNDArray< ComplexType > aTmp(x*y*z);
-
-#pragma omp for
-          for ( tt=0; tt<(long long)n; tt++ )
-            {
-              size_t ay, ry, az, rz;
-
-              for ( az=pivotz; az<z; az++ )
-                {
-                  rz = az - pivotz;
-
-                  const ComplexType* ac = a + tt*x*y*z + az*x*y;
-                  ComplexType* rc = aTmp.begin() + rz*x*y;
-
-                  for ( ay=pivoty; ay<y; ay++ )
-                    {
-                      ry = ay - pivoty;
-                      memcpy(rc+ry*x, ac+ay*x+pivotx, sizeof(ComplexType)*(x-pivotx));
-                      memcpy(rc+ry*x+x-pivotx, ac+ay*x, sizeof(ComplexType)*pivotx);
-                    }
-
-                  for ( ay=0; ay<pivoty; ay++ )
-                    {
-                      ry = ay + y - pivoty;
-                      memcpy(rc+ry*x, ac+ay*x+pivotx, sizeof(ComplexType)*(x-pivotx));
-                      memcpy(rc+ry*x+x-pivotx, ac+ay*x, sizeof(ComplexType)*pivotx);
-                    }
-                }
-
-              for ( az=0; az<pivotz; az++ )
-                {
-                  rz = az + z - pivotz;
-
-                  const ComplexType* ac = a + tt*x*y*z + az*x*y;
-                  ComplexType* rc = aTmp.begin() + rz*x*y;
-
-                  for ( ay=pivoty; ay<y; ay++ )
-                    {
-                      ry = ay - pivoty;
-                      memcpy(rc+ry*x, ac+ay*x+pivotx, sizeof(ComplexType)*(x-pivotx));
-                      memcpy(rc+ry*x+x-pivotx, ac+ay*x, sizeof(ComplexType)*pivotx);
-                    }
-
-                  for ( ay=0; ay<pivoty; ay++ )
-                    {
-                      ry = ay + y - pivoty;
-                      memcpy(rc+ry*x, ac+ay*x+pivotx, sizeof(ComplexType)*(x-pivotx));
-                      memcpy(rc+ry*x+x-pivotx, ac+ay*x, sizeof(ComplexType)*pivotx);
-                    }
-                }
-
-              memcpy(a+tt*x*y*z, aTmp.begin(), sizeof(ComplexType)*x*y*z);
-            }
-        }
-      }
-    catch(...)
-      {
-        GADGET_ERROR_MSG("Errors in hoNDFFT<T>::fftshiftPivot3D(ComplexType* a, size_t x, size_t y, size_t z, size_t n, unsigned pivotx, unsigned pivoty,  unsigned pivotz) ...");
-        return false;
-      }
-
-    return true;
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::fftshift3D(const ComplexType* a, ComplexType* r, size_t x, size_t y, size_t z, size_t n)
-  {
-    try
-      {
-        GADGET_CHECK_RETURN_FALSE(a!=NULL);
-        GADGET_CHECK_RETURN_FALSE(r!=NULL);
-
-        unsigned pivotx = fftshiftPivot(x);
-        unsigned pivoty = fftshiftPivot(y);
-        unsigned pivotz = fftshiftPivot(z);
-
-        GADGET_CHECK_RETURN_FALSE(fftshiftPivot3D(a, r, x, y, z, n, pivotx, pivoty, pivotz));
-      }
-    catch(...)
-      {
-        GADGET_ERROR_MSG("Errors in hoNDFFT<T>::fftshift3D(const ComplexType* a, ComplexType* r, size_t x, size_t y, size_t z, size_t n) ...");
-        return false;
-      }
-
-    return true;
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::ifftshift3D(const ComplexType* a, ComplexType* r, size_t x, size_t y, size_t z, size_t n)
-  {
-    try
-      {
-        GADGET_CHECK_RETURN_FALSE(a!=NULL);
-        GADGET_CHECK_RETURN_FALSE(r!=NULL);
-
-        unsigned pivotx = ifftshiftPivot(x);
-        unsigned pivoty = ifftshiftPivot(y);
-        unsigned pivotz = ifftshiftPivot(z);
-
-        GADGET_CHECK_RETURN_FALSE(fftshiftPivot3D(a, r, x, y, z, n, pivotx, pivoty, pivotz));
-      }
-    catch(...)
-      {
-        GADGET_ERROR_MSG("Errors in hoNDFFT<T>::ifftshift3D(const ComplexType* a, ComplexType* r, size_t x, size_t y, size_t z, size_t n) ...");
-        return false;
-      }
-
-    return true;
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::fftshift3D(ComplexType* a, size_t x, size_t y, size_t z, size_t n)
-  {
-    try
-      {
-        GADGET_CHECK_RETURN_FALSE(a!=NULL);
-
-        unsigned pivotx = fftshiftPivot(x);
-        unsigned pivoty = fftshiftPivot(y);
-        unsigned pivotz = fftshiftPivot(z);
-
-        GADGET_CHECK_RETURN_FALSE(fftshiftPivot3D(a, x, y, z, n, pivotx, pivoty, pivotz));
-      }
-    catch(...)
-      {
-        GADGET_ERROR_MSG("Errors in hoNDFFT<T>::fftshift3D(ComplexType* a, size_t x, size_t y, size_t z, size_t n) ...");
-        return false;
-      }
-
-    return true;
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::ifftshift3D(ComplexType* a, size_t x, size_t y, size_t z, size_t n)
-  {
-    try
-      {
-        GADGET_CHECK_RETURN_FALSE(a!=NULL);
-
-        unsigned pivotx = ifftshiftPivot(x);
-        unsigned pivoty = ifftshiftPivot(y);
-        unsigned pivotz = ifftshiftPivot(z);
-
-        GADGET_CHECK_RETURN_FALSE(fftshiftPivot3D(a, x, y, z, n, pivotx, pivoty, pivotz));
-      }
-    catch(...)
-      {
-        GADGET_ERROR_MSG("Errors in hoNDFFT<T>::ifftshift3D(ComplexType* a, size_t x, size_t y, size_t z, size_t n) ...");
-        return false;
-      }
-
-    return true;
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::fftshift3D(hoNDArray< ComplexType >& a)
-  {
-    size_t n = a.get_number_of_elements()/(a.get_size(0)*a.get_size(1)*a.get_size(2));
-    return fftshift3D(a.begin(), a.get_size(0), a.get_size(1), a.get_size(2), n);
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::fftshift3D(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r)
-  {
-    if ( !r.dimensions_equal(&a) )
-      {
-        r = a;
-      }
-
-    size_t n = a.get_number_of_elements()/(a.get_size(0)*a.get_size(1)*a.get_size(2));
-    return fftshift3D(a.begin(), r.begin(), a.get_size(0), a.get_size(1), a.get_size(2), n);
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::ifftshift3D(hoNDArray< ComplexType >& a)
-  {
-    size_t n = a.get_number_of_elements()/(a.get_size(0)*a.get_size(1)*a.get_size(2));
-    return ifftshift3D(a.begin(), a.get_size(0), a.get_size(1), a.get_size(2), n);
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::ifftshift3D(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r)
-  {
-    if ( !r.dimensions_equal(&a) )
-      {
-        r = a;
-      }
-
-    size_t n = a.get_number_of_elements()/(a.get_size(0)*a.get_size(1)*a.get_size(2));
-    return ifftshift3D(a.begin(), r.begin(), a.get_size(0), a.get_size(1), a.get_size(2), n);
-  }
-
-  // -----------------------------------------------------------------------------------------
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::fft1(hoNDArray< ComplexType >& a)
-  {
-    return fft1(a, true);
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::ifft1(hoNDArray< ComplexType >& a)
-  {
-    return fft1(a, false);
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::fft1(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r)
-  {
-    if ( !r.dimensions_equal(&a) )
-      {
-        r.create(a.get_dimensions());
-      }
-
-    return fft1(const_cast<hoNDArray< ComplexType >&>(a), r, true);
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::ifft1(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r)
-  {
-    if ( !r.dimensions_equal(&a) )
-      {
-        r.create(a.get_dimensions());
-      }
-
-    return fft1(const_cast<hoNDArray< ComplexType >&>(a), r, false);
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::fft1c(hoNDArray< ComplexType >& a)
-  {
-    GADGET_CHECK_RETURN_FALSE(ifftshift1D(a));
-    GADGET_CHECK_RETURN_FALSE(fft1(a));
-    GADGET_CHECK_RETURN_FALSE(fftshift1D(a));
-    return true;
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::ifft1c(hoNDArray< ComplexType >& a)
-  {
-    GADGET_CHECK_RETURN_FALSE(ifftshift1D(a));
-    GADGET_CHECK_RETURN_FALSE(ifft1(a));
-    GADGET_CHECK_RETURN_FALSE(fftshift1D(a));
-    return true;
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::fft1c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r)
-  {
-    GADGET_CHECK_RETURN_FALSE(ifftshift1D(a, r));
-    GADGET_CHECK_RETURN_FALSE(fft1(r));
-    GADGET_CHECK_RETURN_FALSE(fftshift1D(r));
-    return true;
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::ifft1c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r)
-  {
-    GADGET_CHECK_RETURN_FALSE(ifftshift1D(a, r));
-    GADGET_CHECK_RETURN_FALSE(ifft1(r));
-    GADGET_CHECK_RETURN_FALSE(fftshift1D(r));
-    return true;
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::fft1c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, hoNDArray< ComplexType >& buf)
-  {
-    GADGET_CHECK_RETURN_FALSE(ifftshift1D(a, r));
-    GADGET_CHECK_RETURN_FALSE(fft1(r, buf));
-    GADGET_CHECK_RETURN_FALSE(fftshift1D(buf, r));
-    return true;
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::ifft1c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, hoNDArray< ComplexType >& buf)
-  {
-    GADGET_CHECK_RETURN_FALSE(ifftshift1D(a, r));
-    GADGET_CHECK_RETURN_FALSE(ifft1(r, buf));
-    GADGET_CHECK_RETURN_FALSE(fftshift1D(buf, r));
-    return true;
-  }
-
-  // -----------------------------------------------------------------------------------------
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::fft2(hoNDArray< ComplexType >& a)
-  {
-    return fft2(a, true);
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::ifft2(hoNDArray< ComplexType >& a)
-  {
-    return fft2(a, false);
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::fft2(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r)
-  {
-    //r = a;
-    //return fft2(r);
-    if ( !r.dimensions_equal(&a) )
-      {
-        r.create(a.get_dimensions());
-      }
-
-    return fft2(const_cast<hoNDArray< ComplexType >&>(a), r, true);
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::ifft2(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r)
-  {
-    /*r = a;
-      return ifft2(r);*/
-
-    if ( !r.dimensions_equal(&a) )
-      {
-        r.create(a.get_dimensions());
-      }
-
-    return fft2(const_cast<hoNDArray< ComplexType >&>(a), r, false);
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::fft2c(hoNDArray< ComplexType >& a)
-  {
-    GADGET_CHECK_RETURN_FALSE(ifftshift2D(a));
-    GADGET_CHECK_RETURN_FALSE(fft2(a));
-    GADGET_CHECK_RETURN_FALSE(fftshift2D(a));
-    return true;
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::ifft2c(hoNDArray< ComplexType >& a)
-  {
-    GADGET_CHECK_RETURN_FALSE(ifftshift2D(a));
-    GADGET_CHECK_RETURN_FALSE(ifft2(a));
-    GADGET_CHECK_RETURN_FALSE(fftshift2D(a));
-    return true;
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::fft2c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r)
-  {
-    GADGET_CHECK_RETURN_FALSE(ifftshift2D(a, r));
-    GADGET_CHECK_RETURN_FALSE(fft2(r));
-    GADGET_CHECK_RETURN_FALSE(fftshift2D(r));
-    return true;
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::ifft2c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r)
-  {
-    GADGET_CHECK_RETURN_FALSE(ifftshift2D(a, r));
-    GADGET_CHECK_RETURN_FALSE(ifft2(r));
-    GADGET_CHECK_RETURN_FALSE(fftshift2D(r));
-    return true;
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::fft2c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, hoNDArray< ComplexType >& buf)
-  {
-    GADGET_CHECK_RETURN_FALSE(ifftshift2D(a, r));
-    GADGET_CHECK_RETURN_FALSE(fft2(r, buf));
-    GADGET_CHECK_RETURN_FALSE(fftshift2D(buf, r));
-    return true;
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::ifft2c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, hoNDArray< ComplexType >& buf)
-  {
-    GADGET_CHECK_RETURN_FALSE(ifftshift2D(a, r));
-    GADGET_CHECK_RETURN_FALSE(ifft2(r, buf));
-    GADGET_CHECK_RETURN_FALSE(fftshift2D(buf, r));
-    return true;
-  }
-
-  // -----------------------------------------------------------------------------------------
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::fft3(hoNDArray< ComplexType >& a)
-  {
-    return fft3(a, true);
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::ifft3(hoNDArray< ComplexType >& a)
-  {
-    return fft3(a, false);
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::fft3(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r)
-  {
-    /*r = a;
-      return fft3(r);*/
-    if ( !r.dimensions_equal(&a) )
-      {
-        r.create(a.get_dimensions());
-      }
-
-    return fft3(const_cast<hoNDArray< ComplexType >&>(a), r, true);
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::ifft3(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r)
-  {
-    /*r = a;
-      return ifft3(r);*/
-    if ( !r.dimensions_equal(&a) )
-      {
-        r.create(a.get_dimensions());
-      }
-
-    return fft3(const_cast<hoNDArray< ComplexType >&>(a), r, false);
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::fft3c(hoNDArray< ComplexType >& a)
-  {
-    GADGET_CHECK_RETURN_FALSE(ifftshift3D(a));
-    GADGET_CHECK_RETURN_FALSE(fft3(a));
-    GADGET_CHECK_RETURN_FALSE(fftshift3D(a));
-    return true;
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::ifft3c(hoNDArray< ComplexType >& a)
-  {
-    GADGET_CHECK_RETURN_FALSE(ifftshift3D(a));
-    GADGET_CHECK_RETURN_FALSE(ifft3(a));
-    GADGET_CHECK_RETURN_FALSE(fftshift3D(a));
-    return true;
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::fft3c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r)
-  {
-    GADGET_CHECK_RETURN_FALSE(ifftshift3D(a, r));
-    GADGET_CHECK_RETURN_FALSE(fft3(r));
-    GADGET_CHECK_RETURN_FALSE(fftshift3D(r));
-    return true;
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::ifft3c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r)
-  {
-    GADGET_CHECK_RETURN_FALSE(ifftshift3D(a, r));
-    GADGET_CHECK_RETURN_FALSE(ifft3(r));
-    GADGET_CHECK_RETURN_FALSE(fftshift3D(r));
-    return true;
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::fft3c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, hoNDArray< ComplexType >& buf)
-  {
-    GADGET_CHECK_RETURN_FALSE(ifftshift3D(a, r));
-    GADGET_CHECK_RETURN_FALSE(fft3(r, buf));
-    GADGET_CHECK_RETURN_FALSE(fftshift3D(buf, r));
-    return true;
-  }
-
-  template<typename T> 
-  inline bool hoNDFFT<T>::ifft3c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, hoNDArray< ComplexType >& buf)
-  {
-    GADGET_CHECK_RETURN_FALSE(ifftshift3D(a, r));
-    GADGET_CHECK_RETURN_FALSE(ifft3(r, buf));
-    GADGET_CHECK_RETURN_FALSE(fftshift3D(buf, r));
-    return true;
-  }
-
-  // -----------------------------------------------------------------------------------------
-
-  // MKL related
-
-#ifdef USE_MKL
-
-  template<typename T> 
-  bool hoNDFFT<T>::configureFFTHandle(long long NDim, MKL_LONG* dim, DFTI_CONFIG_VALUE fftPresion, size_t n, DFTI_DESCRIPTOR_HANDLE& handle)
-  {
-    long long ii;
-
-    MKL_LONG res;
-
-    if ( NDim == 1 )
-      {
-        if ( (res=DftiCreateDescriptor( &handle, fftPresion, DFTI_COMPLEX, NDim, dim[0])) != 0 )
-          {
-            GADGET_ERROR_MSG( DftiErrorMessage(res) );
-            return false;
-          }
-      }
-    else
-      {
-        if ( (res=DftiCreateDescriptor( &handle, fftPresion, DFTI_COMPLEX, NDim, dim)) != 0 )
-          {
-            GADGET_ERROR_MSG( DftiErrorMessage(res) );
-            return false;
-          }
-      }
-
-    double fftScaling = 1.0;
-    for ( ii=0; ii<NDim; ii++ )
-      {
-        fftScaling *= dim[ii];
-      }
-
-    if ( (res=DftiSetValue( handle, DFTI_FORWARD_SCALE, 1.0/std::sqrt(fftScaling))) != 0 )
-      {
-        GADGET_ERROR_MSG( DftiErrorMessage(res) );
-        return false;
-      }
-
-    if ( (res=DftiSetValue( handle, DFTI_BACKWARD_SCALE, 1.0/std::sqrt(fftScaling))) != 0 )
-      {
-        GADGET_ERROR_MSG( DftiErrorMessage(res) );
-        return false;
-      }
-
-    if ( (res=DftiSetValue( handle, DFTI_PLACEMENT, DFTI_INPLACE)) != 0 )
-      {
-        GADGET_ERROR_MSG( DftiErrorMessage(res) );
-        return false;
-      }
-
-    if ( n > 1 )
-      {
-        if ( (res=DftiSetValue( handle, DFTI_NUMBER_OF_TRANSFORMS, n)) != 0 )
-          {
-            GADGET_ERROR_MSG( DftiErrorMessage(res) );
-            return false;
-          }
-
-        if ( (res=DftiSetValue( handle, DFTI_INPUT_DISTANCE, (MKL_INT)fftScaling)) != 0 )
-          {
-            GADGET_ERROR_MSG( DftiErrorMessage(res) );
-            return false;
-          }
-
-        if ( (res=DftiSetValue( handle, DFTI_OUTPUT_DISTANCE, (MKL_INT)fftScaling)) != 0 )
-          {
-            GADGET_ERROR_MSG( DftiErrorMessage(res) );
-            return false;
-          }
-      }
-
-    if ( (res=DftiCommitDescriptor( handle)) != 0 )
-      {
-        GADGET_ERROR_MSG( DftiErrorMessage(res) );
-        return false;
-      }
-
-    return true;
-  }
-
-  template<typename T> 
-  bool hoNDFFT<T>::configureFFTHandleOutOfPlace(long long NDim, MKL_LONG* dim, DFTI_CONFIG_VALUE fftPresion, size_t n, DFTI_DESCRIPTOR_HANDLE& handle)
-  {
-    long long ii;
-
-    MKL_LONG res;
-
-    if ( NDim == 1 )
-      {
-        if ( (res=DftiCreateDescriptor( &handle, fftPresion, DFTI_COMPLEX, NDim, dim[0])) != 0 )
-          {
-            GADGET_ERROR_MSG( DftiErrorMessage(res) );
-            return false;
-          }
-      }
-    else
-      {
-        if ( (res=DftiCreateDescriptor( &handle, fftPresion, DFTI_COMPLEX, NDim, dim)) != 0 )
-          {
-            GADGET_ERROR_MSG( DftiErrorMessage(res) );
-            return false;
-          }
-      }
-
-    double fftScaling = 1.0;
-    for ( ii=0; ii<NDim; ii++ )
-      {
-        fftScaling *= dim[ii];
-      }
-
-    if ( (res=DftiSetValue( handle, DFTI_FORWARD_SCALE, 1.0/std::sqrt(fftScaling))) != 0 )
-      {
-        GADGET_ERROR_MSG( DftiErrorMessage(res) );
-        return false;
-      }
-
-    if ( (res=DftiSetValue( handle, DFTI_BACKWARD_SCALE, 1.0/std::sqrt(fftScaling))) != 0 )
-      {
-        GADGET_ERROR_MSG( DftiErrorMessage(res) );
-        return false;
-      }
-
-    if ( (res=DftiSetValue( handle, DFTI_PLACEMENT, DFTI_NOT_INPLACE)) != 0 )
-      {
-        GADGET_ERROR_MSG( DftiErrorMessage(res) );
-        return false;
-      }
-
-    if ( n > 1 )
-      {
-        if ( (res=DftiSetValue( handle, DFTI_NUMBER_OF_TRANSFORMS, n)) != 0 )
-          {
-            GADGET_ERROR_MSG( DftiErrorMessage(res) );
-            return false;
-          }
-
-        if ( (res=DftiSetValue( handle, DFTI_INPUT_DISTANCE, (MKL_INT)fftScaling)) != 0 )
-          {
-            GADGET_ERROR_MSG( DftiErrorMessage(res) );
-            return false;
-          }
-
-        if ( (res=DftiSetValue( handle, DFTI_OUTPUT_DISTANCE, (MKL_INT)fftScaling)) != 0 )
-          {
-            GADGET_ERROR_MSG( DftiErrorMessage(res) );
-            return false;
-          }
-      }
-
-    if ( (res=DftiCommitDescriptor( handle)) != 0 )
-      {
-        GADGET_ERROR_MSG( DftiErrorMessage(res) );
-        return false;
-      }
-
-    return true;
-  }
-
-  template<typename T> 
-  bool hoNDFFT<T>::fft1(hoNDArray< ComplexType >& a, bool forward)
-  {
-    size_t n = a.get_number_of_elements()/a.get_size(0);
-    MKL_LONG dim = a.get_size(0);
-
-    DFTI_DESCRIPTOR_HANDLE handle;
-
-    if ( typeid(T) == typeid(float) )
-      {
-        GADGET_CHECK_RETURN_FALSE(configureFFTHandle(1, &dim, DFTI_SINGLE, n, handle));
-      }
-    else if ( typeid(T) == typeid(double) )
-      {
-        GADGET_CHECK_RETURN_FALSE(configureFFTHandle(1, &dim, DFTI_DOUBLE, n, handle));
-      }
-    else
-      {
-        GADGET_ERROR_MSG("hoNDFFT<T>::fft1(hoNDArray< ComplexType >& a), only float and double are supported ... ");
-        return false;
-      }
-
-    MKL_LONG res;
-
-    if ( forward )
-      {
-        if ( ( res=DftiComputeForward(handle, reinterpret_cast<T*>(a.begin())) ) != 0 ) 
-          { 
-            GADGET_ERROR_MSG( DftiErrorMessage(res) ); 
-            return false; 
-          }
-      }
-    else
-      {
-        if ( ( res=DftiComputeBackward(handle, reinterpret_cast<T*>(a.begin())) ) != 0 ) 
-          { 
-            GADGET_ERROR_MSG( DftiErrorMessage(res) ); 
-            return false; 
-          }
-      }
-
-    if ( ( res=DftiFreeDescriptor(&handle) ) != 0 ) 
-      { 
-        GADGET_ERROR_MSG( DftiErrorMessage(res) ); 
-        return false; 
-      }
-
-    return true;
-  }
-
-  template<typename T> 
-  bool hoNDFFT<T>::fft1(hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, bool forward)
-  {
-    size_t n = a.get_number_of_elements()/a.get_size(0);
-    MKL_LONG dim = a.get_size(0);
-
-    DFTI_DESCRIPTOR_HANDLE handle;
-
-    if ( typeid(T) == typeid(float) )
-      {
-        GADGET_CHECK_RETURN_FALSE(configureFFTHandleOutOfPlace(1, &dim, DFTI_SINGLE, n, handle));
-      }
-    else if ( typeid(T) == typeid(double) )
-      {
-        GADGET_CHECK_RETURN_FALSE(configureFFTHandleOutOfPlace(1, &dim, DFTI_DOUBLE, n, handle));
-      }
-    else
-      {
-        GADGET_ERROR_MSG("hoNDFFT<T>::fft1(hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r), only float and double are supported ... ");
-        return false;
-      }
-
-    MKL_LONG res;
-
-    if ( forward )
-      {
-        if ( ( res=DftiComputeForward( handle, reinterpret_cast<T*>(a.begin()), reinterpret_cast<T*>(r.begin()) ) ) != 0 ) 
-          { 
-            GADGET_ERROR_MSG( DftiErrorMessage(res) ); 
-            return false; 
-          }
-      }
-    else
-      {
-        if ( ( res=DftiComputeBackward( handle, reinterpret_cast<T*>(a.begin()), reinterpret_cast<T*>(r.begin()) ) ) != 0 ) 
-          { 
-            GADGET_ERROR_MSG( DftiErrorMessage(res) ); 
-            return false; 
-          }
-      }
-
-    if ( ( res=DftiFreeDescriptor(&handle) ) != 0 ) 
-      { 
-        GADGET_ERROR_MSG( DftiErrorMessage(res) ); 
-        return false; 
-      }
-
-    return true;
-  }
-
-  template<typename T> 
-  bool hoNDFFT<T>::fft2(hoNDArray< ComplexType >& a, bool forward)
-  {
-    size_t n = a.get_number_of_elements()/(a.get_size(0)*a.get_size(1));
-    MKL_LONG dim[2];
-    dim[0] = a.get_size(1);
-    dim[1] = a.get_size(0);
-
-    DFTI_DESCRIPTOR_HANDLE handle;
-
-    if ( typeid(T) == typeid(float) )
-      {
-        GADGET_CHECK_RETURN_FALSE(configureFFTHandle(2, dim, DFTI_SINGLE, n, handle));
-      }
-    else if ( typeid(T) == typeid(double) )
-      {
-        GADGET_CHECK_RETURN_FALSE(configureFFTHandle(2, dim, DFTI_DOUBLE, n, handle));
-      }
-    else
-      {
-        GADGET_ERROR_MSG("hoNDFFT<T>::fft2(hoNDArray< ComplexType >& a), only float and double are supported ... ");
-        return false;
-      }
-
-    MKL_LONG res;
-    if ( forward )
-      {
-        if ( ( res=DftiComputeForward(handle, reinterpret_cast<T*>(a.begin())) ) != 0 ) 
-          { 
-            GADGET_ERROR_MSG( DftiErrorMessage(res) ); 
-            return false; 
-          }
-      }
-    else
-      {
-        if ( ( res=DftiComputeBackward(handle, reinterpret_cast<T*>(a.begin())) ) != 0 ) 
-          { 
-            GADGET_ERROR_MSG( DftiErrorMessage(res) ); 
-            return false; 
-          }
-      }
-
-    if ( ( res=DftiFreeDescriptor(&handle) ) != 0 ) 
-      { 
-        GADGET_ERROR_MSG( DftiErrorMessage(res) ); 
-        return false; 
-      }
-
-    return true;
-  }
-
-  template<typename T> 
-  bool hoNDFFT<T>::fft2(hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, bool forward)
-  {
-    size_t n = a.get_number_of_elements()/(a.get_size(0)*a.get_size(1));
-    MKL_LONG dim[2];
-    dim[0] = a.get_size(1);
-    dim[1] = a.get_size(0);
-
-    DFTI_DESCRIPTOR_HANDLE handle;
-
-    if ( typeid(T) == typeid(float) )
-      {
-        GADGET_CHECK_RETURN_FALSE(configureFFTHandleOutOfPlace(2, dim, DFTI_SINGLE, n, handle));
-      }
-    else if ( typeid(T) == typeid(double) )
-      {
-        GADGET_CHECK_RETURN_FALSE(configureFFTHandleOutOfPlace(2, dim, DFTI_DOUBLE, n, handle));
-      }
-    else
-      {
-        GADGET_ERROR_MSG("hoNDFFT<T>::fft2(hoNDArray< ComplexType >& a), only float and double are supported ... ");
-        return false;
-      }
-
-    MKL_LONG res;
-    if ( forward )
-      {
-        if ( ( res=DftiComputeForward(handle, reinterpret_cast<T*>(a.begin()), reinterpret_cast<T*>(r.begin())) ) != 0 ) 
-          { 
-            GADGET_ERROR_MSG( DftiErrorMessage(res) ); 
-            return false; 
-          }
-      }
-    else
-      {
-        if ( ( res=DftiComputeBackward(handle, reinterpret_cast<T*>(a.begin()), reinterpret_cast<T*>(r.begin())) ) != 0 ) 
-          { 
-            GADGET_ERROR_MSG( DftiErrorMessage(res) ); 
-            return false; 
-          }
-      }
-
-    if ( ( res=DftiFreeDescriptor(&handle) ) != 0 ) 
-      { 
-        GADGET_ERROR_MSG( DftiErrorMessage(res) ); 
-        return false; 
-      }
-
-    return true;
-  }
-
-  template<typename T> 
-  bool hoNDFFT<T>::fft3(hoNDArray< ComplexType >& a, bool forward)
-  {
-    size_t n = a.get_number_of_elements()/(a.get_size(0)*a.get_size(1)*a.get_size(2));
-
-    MKL_LONG dim[3];
-    dim[0] = a.get_size(2);
-    dim[1] = a.get_size(1);
-    dim[2] = a.get_size(0);
-
-    DFTI_DESCRIPTOR_HANDLE handle;
-
-    if ( typeid(T) == typeid(float) )
-      {
-        GADGET_CHECK_RETURN_FALSE(configureFFTHandle(3, dim, DFTI_SINGLE, n, handle));
-      }
-    else if ( typeid(T) == typeid(double) )
-      {
-        GADGET_CHECK_RETURN_FALSE(configureFFTHandle(3, dim, DFTI_DOUBLE, n, handle));
-      }
-    else
-      {
-        GADGET_ERROR_MSG("hoNDFFT<T>::fft3(hoNDArray< ComplexType >& a), only float and double are supported ... ");
-        return false;
-      }
-
-    MKL_LONG res;
-    if ( forward )
-      {
-        if ( ( res=DftiComputeForward(handle, reinterpret_cast<T*>(a.begin())) ) != 0 ) 
-          { 
-            GADGET_ERROR_MSG( DftiErrorMessage(res) ); 
-            return false; 
-          }
-      }
-    else
-      {
-        if ( ( res=DftiComputeBackward(handle, reinterpret_cast<T*>(a.begin())) ) != 0 ) 
-          { 
-            GADGET_ERROR_MSG( DftiErrorMessage(res) ); 
-            return false; 
-          }
-      }
-
-    if ( ( res=DftiFreeDescriptor(&handle) ) != 0 ) 
-      { 
-        GADGET_ERROR_MSG( DftiErrorMessage(res) ); 
-        return false; 
-      }
-
-    return true;
-  }
-
-  template<typename T> 
-  bool hoNDFFT<T>::fft3(hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, bool forward)
-  {
-    size_t n = a.get_number_of_elements()/(a.get_size(0)*a.get_size(1)*a.get_size(2));
-
-    MKL_LONG dim[3];
-    dim[0] = a.get_size(2);
-    dim[1] = a.get_size(1);
-    dim[2] = a.get_size(0);
-
-    DFTI_DESCRIPTOR_HANDLE handle;
-
-    if ( typeid(T) == typeid(float) )
-      {
-        GADGET_CHECK_RETURN_FALSE(configureFFTHandleOutOfPlace(3, dim, DFTI_SINGLE, n, handle));
-      }
-    else if ( typeid(T) == typeid(double) )
-      {
-        GADGET_CHECK_RETURN_FALSE(configureFFTHandleOutOfPlace(3, dim, DFTI_DOUBLE, n, handle));
-      }
-    else
-      {
-        GADGET_ERROR_MSG("hoNDFFT<T>::fft3(a, r), only float and double are supported ... ");
-        return false;
-      }
-
-    MKL_LONG res;
-    if ( forward )
-      {
-        if ( ( res=DftiComputeForward(handle, reinterpret_cast<T*>(a.begin()), reinterpret_cast<T*>(r.begin())) ) != 0 ) 
-          { 
-            GADGET_ERROR_MSG( DftiErrorMessage(res) ); 
-            return false; 
-          }
-      }
-    else
-      {
-        if ( ( res=DftiComputeBackward(handle, reinterpret_cast<T*>(a.begin()), reinterpret_cast<T*>(r.begin())) ) != 0 ) 
-          { 
-            GADGET_ERROR_MSG( DftiErrorMessage(res) ); 
-            return false; 
-          }
-      }
-
-    if ( ( res=DftiFreeDescriptor(&handle) ) != 0 ) 
-      { 
-        GADGET_ERROR_MSG( DftiErrorMessage(res) ); 
-        return false; 
-      }
-
-    return true;
-  }
-
-#else
-
-  template<typename T> 
-  bool hoNDFFT<T>::fft1(hoNDArray< ComplexType >& a, bool forward)
-  {
-    if ( forward )
-      {
-        fft(&a, 1);
-      }
-    else
-      {
-        ifft(&a, 1);
-      }
-
-    return true;
-  }
-
-  template<typename T> 
-  bool hoNDFFT<T>::fft2(hoNDArray< ComplexType >& a, bool forward)
-  {
-    if ( forward )
-      {
-        fft(&a);
-      }
-    else
-      {
-        ifft(&a);
-      }
-
-    return true;
-  }
-
-  template<typename T> 
-  bool hoNDFFT<T>::fft3(hoNDArray< ComplexType >& a, bool forward)
-  {
-    if ( forward )
-      {
-        fft(&a);
-      }
-    else
-      {
-        ifft(&a);
-      }
-
-    return true;
-  }
-
-  template<typename T> 
-  bool hoNDFFT<T>::fft1(hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, bool forward)
-  {
-    r = a;
-    if ( forward )
-      {
-        fft(&r, 1);
-      }
-    else
-      {
-        ifft(&r, 1);
-      }
-
-    return true;
-  }
-
-  template<typename T> 
-  bool hoNDFFT<T>::fft2(hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, bool forward)
-  {
-    r = a;
-    if ( forward )
-      {
-        fft(&a);
-      }
-    else
-      {
-        ifft(&a);
-      }
-
-    return true;
-  }
-
-  template<typename T> 
-  bool hoNDFFT<T>::fft3(hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, bool forward)
-  {
-    r = a;
-    if ( forward )
-      {
-        fft(&a);
-      }
-    else
-      {
-        ifft(&a);
-      }
-
-    return true;
-  }
-
-#endif // USE_MKL
-
-  // 
-  // Instantiation
-  //
-  
-  template class hoNDFFT<float>;
-  template class hoNDFFT<double>;
-}
diff --git a/toolboxes/core/cpu/hoNDFFT.h b/toolboxes/core/cpu/hoNDFFT.h
deleted file mode 100644
index cd0a61d..0000000
--- a/toolboxes/core/cpu/hoNDFFT.h
+++ /dev/null
@@ -1,222 +0,0 @@
-/** \file hoNDFFT.h
-    \brief Wrappers for FFTW for ndarrays of type std::complex.
-*/
-
-#ifndef hoNDFFT_H
-#define hoNDFFT_H
-
-#include "hoNDArray.h"
-#include "cpucore_export.h"
-
-#include <boost/thread/mutex.hpp>
-#include <iostream>
-#include <fftw3.h>
-#include <complex>
-
-#ifdef USE_MKL
-    #include "mkl.h"
-#endif // USE_MKL
-
-namespace Gadgetron{
-
-    /** 
-    Generic class for Fast Fourier Transforms using FFTW on the hoNDArray class.
-    This class is a singleton because the planning and memory allocation routines of FFTW are NOT threadsafe.
-    The class' template type is a REAL, ie. float or double.
-
-    Access using e.g.
-    FFT<float>::instance()
-    */
-    template <typename T> class EXPORTCPUCORE hoNDFFT
-    {
-    public:
-
-        typedef std::complex<T> ComplexType;
-
-        static hoNDFFT<T>* instance(); 
-
-        void fft(hoNDArray< ComplexType >* input, unsigned int dim_to_transform)
-        {
-            //-1 refers to the sign of the transform, -1 for FFTW_FORWARD
-            fft_int(input,dim_to_transform,-1);
-        }
-
-        void ifft(hoNDArray< ComplexType >* input, unsigned int dim_to_transform)
-        {
-            //1 refers to the sign of the transform, +1 for FFTW_BACKWARD
-            fft_int(input,dim_to_transform,1);
-        }
-
-        void fft(hoNDArray< ComplexType >* input)
-        {
-            for (size_t i = 0; i < input->get_number_of_dimensions(); i++) {
-                //-1 refers to the sign of the transform, -1 for FFTW_FORWARD
-                fft_int(input,i,-1);
-            }
-        }
-
-        void ifft(hoNDArray< ComplexType >* input)
-        {
-            for (size_t i = 0; i < input->get_number_of_dimensions(); i++) {
-                //1 refers to the sign of the transform, +1 for FFTW_BACKWARD
-                fft_int(input,i,1);
-            }
-        }
-
-        // 1D
-        bool fftshift1D(hoNDArray< ComplexType >& a);
-        bool fftshift1D(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r);
-
-        bool ifftshift1D(hoNDArray< ComplexType >& a);
-        bool ifftshift1D(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r);
-
-        // 2D
-        bool fftshift2D(hoNDArray< ComplexType >& a);
-        bool fftshift2D(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r);
-
-        bool ifftshift2D(hoNDArray< ComplexType >& a);
-        bool ifftshift2D(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r);
-
-        // 3D
-        bool fftshift3D(hoNDArray< ComplexType >& a);
-        bool fftshift3D(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r);
-
-        bool ifftshift3D(hoNDArray< ComplexType >& a);
-        bool ifftshift3D(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r);
-
-        // 1D fft, in-place and out-of-place
-        // the first dimension will be transformed
-        bool fft1(hoNDArray< ComplexType >& a);
-        bool ifft1(hoNDArray< ComplexType >& a);
-
-        bool fft1(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r);
-        bool ifft1(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r);
-
-        // centered 1D fft
-        bool fft1c(hoNDArray< ComplexType >& a);
-        bool ifft1c(hoNDArray< ComplexType >& a);
-
-        bool fft1c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r);
-        bool ifft1c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r);
-
-        bool fft1c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, hoNDArray< ComplexType >& buf);
-        bool ifft1c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, hoNDArray< ComplexType >& buf);
-
-        // 2D fft, in-place and out-of-place
-        // the first and second dimensions will be transformed
-        bool fft2(hoNDArray< ComplexType >& a);
-        bool ifft2(hoNDArray< ComplexType >& a);
-
-        bool fft2(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r);
-        bool ifft2(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r);
-
-        // centered 2D fft
-        bool fft2c(hoNDArray< ComplexType >& a);
-        bool ifft2c(hoNDArray< ComplexType >& a);
-
-        bool fft2c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r);
-        bool ifft2c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r);
-
-        bool fft2c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, hoNDArray< ComplexType >& buf);
-        bool ifft2c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, hoNDArray< ComplexType >& buf);
-
-        // 3D fft, in-place and out-of-place
-        // the first, second and third dimensions will be transformed
-        bool fft3(hoNDArray< ComplexType >& a);
-        bool ifft3(hoNDArray< ComplexType >& a);
-
-        bool fft3(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r);
-        bool ifft3(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r);
-
-        // centered 3D fft
-        bool fft3c(hoNDArray< ComplexType >& a);
-        bool ifft3c(hoNDArray< ComplexType >& a);
-
-        bool fft3c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r);
-        bool ifft3c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r);
-
-        bool fft3c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, hoNDArray< ComplexType >& buf);
-        bool ifft3c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, hoNDArray< ComplexType >& buf);
-
-    protected:
-
-        //We are making these protected since this class is a singleton
-
-        hoNDFFT() {
-            set_function_pointers();
-        }
-
-        virtual ~hoNDFFT() { fftw_cleanup_ptr_(); }
-
-        void fft_int(hoNDArray< ComplexType >* input, size_t dim_to_transform, int sign);
-
-        void set_function_pointers();
-
-        int   (*fftw_import_wisdom_from_file_ptr_)(FILE*);
-        void  (*fftw_export_wisdom_to_file_ptr_)(FILE*);
-        void  (*fftw_cleanup_ptr_)(void);
-        void* (*fftw_malloc_ptr_)(size_t);
-        void  (*fftw_free_ptr_)(void* p);
-        void  (*fftw_execute_ptr_)(void*);
-        void* (*fftw_plan_dft_1d_ptr_)(int, void*, void*, int, unsigned);
-        void  (*fftw_destroy_plan_ptr_)(void*);
-
-        static hoNDFFT<T>* instance_;
-        boost::mutex mutex_;
-
-        // the fft and ifft shift pivot for a certain length
-        // [0 .. pivot-1] will be shifted to the right end
-        size_t fftshiftPivot(size_t len);
-        size_t ifftshiftPivot(size_t len);
-
-        // 1D
-        bool fftshift1D(const ComplexType* a, ComplexType* r, size_t x, size_t pivot);
-        bool ifftshift1D(const ComplexType* a, ComplexType* r, size_t x, size_t pivot);
-
-        bool fftshiftPivot1D(ComplexType* a, size_t x, size_t n, size_t pivot);
-        bool fftshiftPivot1D(const ComplexType* a, ComplexType* r, size_t x, size_t n, size_t pivot);
-
-        // 2D
-        bool fftshiftPivot2D(const ComplexType* a, ComplexType* r, size_t x, size_t y, size_t n, unsigned pivotx, unsigned pivoty);
-        bool fftshiftPivot2D(ComplexType* a, size_t x, size_t y, size_t n, unsigned pivotx, unsigned pivoty);
-
-        bool fftshift2D(const ComplexType* a, ComplexType* r, size_t x, size_t y, size_t n);
-        bool ifftshift2D(const ComplexType* a, ComplexType* r, size_t x, size_t y, size_t n);
-
-        bool fftshift2D(ComplexType* a, size_t x, size_t y, size_t n);
-        bool ifftshift2D(ComplexType* a, size_t x, size_t y, size_t n);
-
-        // 3D
-        bool fftshiftPivot3D(const ComplexType* a, ComplexType* r, size_t x, size_t y, size_t z, size_t n, unsigned pivotx, unsigned pivoty, unsigned pivotz);
-        bool fftshiftPivot3D(ComplexType* a, size_t x, size_t y, size_t z, size_t n, unsigned pivotx, unsigned pivoty, unsigned pivotz);
-
-        bool fftshift3D(const ComplexType* a, ComplexType* r, size_t x, size_t y, size_t z, size_t n);
-        bool ifftshift3D(const ComplexType* a, ComplexType* r, size_t x, size_t y, size_t z, size_t n);
-
-        bool fftshift3D(ComplexType* a, size_t x, size_t y, size_t z, size_t n);
-        bool ifftshift3D(ComplexType* a, size_t x, size_t y, size_t z, size_t n);
-
-        // forward: true, fft; false, inverse fft
-        bool fft1(hoNDArray< ComplexType >& a, bool forward);
-        bool fft2(hoNDArray< ComplexType >& a, bool forward);
-        bool fft3(hoNDArray< ComplexType >& a, bool forward);
-
-        bool fft1(hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, bool forward);
-        bool fft2(hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, bool forward);
-        bool fft3(hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, bool forward);
-
-        #ifdef USE_MKL
-
-        // configure the 1D/2D/3D MKL based fft handles
-        // x, y, z: the length of dimensions
-        // n: the number of transformation
-        // handle: the fft MKL handle
-        // fftPresion: DFTI_SINGLE or DFTI_DOUBLE
-        bool configureFFTHandle(long long NDim, MKL_LONG* dim, DFTI_CONFIG_VALUE fftPresion, size_t n, DFTI_DESCRIPTOR_HANDLE& handle);
-        bool configureFFTHandleOutOfPlace(long long NDim, MKL_LONG* dim, DFTI_CONFIG_VALUE fftPresion, size_t n, DFTI_DESCRIPTOR_HANDLE& handle);
-
-        #endif // USE_MKL
-    };
-}
-
-#endif //hoNDFFT_H
diff --git a/toolboxes/core/cpu/hoNDInterpolator.h b/toolboxes/core/cpu/hoNDInterpolator.h
new file mode 100644
index 0000000..9ff262e
--- /dev/null
+++ b/toolboxes/core/cpu/hoNDInterpolator.h
@@ -0,0 +1,307 @@
+/** \file       hoNDInterpolator.h
+    \brief      N-dimensional interpolator
+
+                Designed to work with hoNDArray and hoNDImage
+
+    \author     Hui Xue
+*/
+
+#pragma once
+
+#include "hoNDArray.h"
+#include "hoNDImage.h"
+#include "hoNDBoundaryHandler.h"
+#include "hoNDBSpline.h"
+
+namespace Gadgetron
+{
+    // define the image interpolation methods
+    enum GT_IMAGE_INTERPOLATOR
+    {
+        GT_IMAGE_INTERPOLATOR_NEARESTNEIGHBOR=35642, // a magic number
+        GT_IMAGE_INTERPOLATOR_LINEAR,
+        GT_IMAGE_INTERPOLATOR_BSPLINE
+    };
+
+    inline std::string getInterpolatorName(GT_IMAGE_INTERPOLATOR interp)
+    {
+        std::string name;
+
+        switch (interp)
+        {
+            case GT_IMAGE_INTERPOLATOR_NEARESTNEIGHBOR:
+                name = "NearestNeighbor";
+                break;
+
+            case GT_IMAGE_INTERPOLATOR_LINEAR:
+                name = "Linear";
+                break;
+
+            case GT_IMAGE_INTERPOLATOR_BSPLINE:
+                name = "BSpline";
+                break;
+
+            default:
+                GADGET_ERROR_MSG("Unrecognized interpolator type : " << interp);
+        }
+
+        return name;
+    }
+
+    inline GT_IMAGE_INTERPOLATOR getInterpolatorType(const std::string& interp_name)
+    {
+        GT_IMAGE_INTERPOLATOR interp;
+
+        if ( interp_name == "NearestNeighbor" )
+        {
+            interp = GT_IMAGE_INTERPOLATOR_NEARESTNEIGHBOR;
+        }
+        else if ( interp_name == "Linear" )
+        {
+            interp = GT_IMAGE_INTERPOLATOR_LINEAR;
+        }
+        else if ( interp_name == "BSpline" )
+        {
+            interp = GT_IMAGE_INTERPOLATOR_BSPLINE;
+        }
+        else
+        {
+            GADGET_ERROR_MSG("Unrecognized interpolator name : " << interp_name);
+        }
+
+        return interp;
+    }
+
+    /// all interpolation calls must be made thread-safe
+    template <typename ArrayType>
+    class hoNDInterpolator
+    {
+    public:
+
+        typedef hoNDInterpolator<ArrayType> Self;
+        typedef typename ArrayType::value_type T;
+        typedef hoNDBoundaryHandler<ArrayType> BoundHanlderType;
+        typedef typename ArrayType::coord_type coord_type;
+
+        hoNDInterpolator() : array_(NULL), data_(NULL), bh_(NULL), sx_(0), sy_(0), sz_(0), st_(0) {}
+
+        hoNDInterpolator(ArrayType& a, BoundHanlderType& bh)
+        {
+            array_ = &a;
+            data_ = array_->begin();
+            bh_ = &bh; bh_->setArray(a);
+
+            sx_ = array_->get_size(0);
+            sy_ = array_->get_size(1);
+            sz_ = array_->get_size(2);
+            st_ = array_->get_size(3);
+        }
+
+        virtual ~hoNDInterpolator() { array_ = NULL; bh_ = NULL; }
+
+        virtual void setArray(ArrayType& a)
+        {
+            array_ = &a;
+            data_ = array_->begin();
+
+            sx_ = array_->get_size(0);
+            sy_ = array_->get_size(1);
+            sz_ = array_->get_size(2);
+            st_ = array_->get_size(3);
+        }
+
+        virtual void setBoundaryHandler(BoundHanlderType& bh) { bh_ = &bh; if ( array_!=NULL ) bh_->setArray(*array_); }
+
+        /// access the pixel value
+        virtual T operator()( const coord_type* pos ) = 0;
+        virtual T operator()( const std::vector<coord_type>& pos ) = 0;
+        virtual T operator()( coord_type x ) = 0;
+        virtual T operator()( coord_type x, coord_type y ) = 0;
+        virtual T operator()( coord_type x, coord_type y, coord_type z ) = 0;
+        virtual T operator()( coord_type x, coord_type y, coord_type z, coord_type s ) = 0;
+        virtual T operator()( coord_type x, coord_type y, coord_type z, coord_type s, coord_type p ) = 0;
+        virtual T operator()( coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type r ) = 0;
+        virtual T operator()( coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type r, coord_type a ) = 0;
+        virtual T operator()( coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type r, coord_type a, coord_type q ) = 0;
+        virtual T operator()( coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type r, coord_type a, coord_type q, coord_type u ) = 0;
+
+    protected:
+
+        ArrayType* array_;
+        T* data_;
+        BoundHanlderType* bh_;
+
+        size_t sx_;
+        size_t sy_;
+        size_t sz_;
+        size_t st_;
+    };
+
+    template <typename ArrayType>
+    class hoNDInterpolatorNearestNeighbor : public hoNDInterpolator<ArrayType>
+    {
+    public:
+
+        typedef hoNDInterpolator<ArrayType> BaseClass;
+        typedef hoNDInterpolatorNearestNeighbor<ArrayType> Self;
+        typedef typename BaseClass::T T;
+        typedef typename BaseClass::coord_type coord_type;
+        typedef typename BaseClass::BoundHanlderType BoundHanlderType;
+
+        hoNDInterpolatorNearestNeighbor() : BaseClass() {}
+        hoNDInterpolatorNearestNeighbor(ArrayType& a, BoundHanlderType& bh) : BaseClass(a, bh) {}
+        virtual ~hoNDInterpolatorNearestNeighbor() {}
+
+        /// access the pixel value
+        virtual T operator()( const coord_type* pos );
+        virtual T operator()( const std::vector<coord_type>& pos );
+        virtual T operator()( coord_type x );
+        virtual T operator()( coord_type x, coord_type y );
+        virtual T operator()( coord_type x, coord_type y, coord_type z );
+        virtual T operator()( coord_type x, coord_type y, coord_type z, coord_type s );
+        virtual T operator()( coord_type x, coord_type y, coord_type z, coord_type s, coord_type p );
+        virtual T operator()( coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type r );
+        virtual T operator()( coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type r, coord_type a );
+        virtual T operator()( coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type r, coord_type a, coord_type q );
+        virtual T operator()( coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type r, coord_type a, coord_type q, coord_type u );
+
+    protected:
+
+        using BaseClass::array_;
+        using BaseClass::data_;
+        using BaseClass::bh_;
+
+        using BaseClass::sx_;
+        using BaseClass::sy_;
+        using BaseClass::sz_;
+        using BaseClass::st_;
+    };
+
+    template <typename ArrayType>
+    class hoNDInterpolatorLinear : public hoNDInterpolator<ArrayType>
+    {
+    public:
+
+        typedef hoNDInterpolator<ArrayType> BaseClass;
+        typedef hoNDInterpolatorLinear<ArrayType> Self;
+        typedef typename BaseClass::T T;
+        typedef typename BaseClass::coord_type coord_type;
+        typedef typename BaseClass::BoundHanlderType BoundHanlderType;
+
+        hoNDInterpolatorLinear() : BaseClass() {}
+
+        hoNDInterpolatorLinear(ArrayType& a, BoundHanlderType& bh) : BaseClass(a, bh)
+        {
+            number_of_points_ = 1<<a.get_number_of_dimensions();
+        }
+
+        virtual ~hoNDInterpolatorLinear() {}
+
+        /// access the pixel value
+        virtual T operator()( const coord_type* pos );
+        virtual T operator()( const std::vector<coord_type>& pos );
+        virtual T operator()( coord_type x );
+        virtual T operator()( coord_type x, coord_type y );
+        virtual T operator()( coord_type x, coord_type y, coord_type z );
+        virtual T operator()( coord_type x, coord_type y, coord_type z, coord_type s );
+        virtual T operator()( coord_type x, coord_type y, coord_type z, coord_type s, coord_type p );
+        virtual T operator()( coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type r );
+        virtual T operator()( coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type r, coord_type a );
+        virtual T operator()( coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type r, coord_type a, coord_type q );
+        virtual T operator()( coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type r, coord_type a, coord_type q, coord_type u );
+
+    protected:
+
+        using BaseClass::array_;
+        using BaseClass::data_;
+        using BaseClass::bh_;
+
+        using BaseClass::sx_;
+        using BaseClass::sy_;
+        using BaseClass::sz_;
+        using BaseClass::st_;
+
+        // number of points involved in interpolation
+        unsigned int number_of_points_;
+    };
+
+    template <typename ArrayType, unsigned int D>
+    class hoNDInterpolatorBSpline : public hoNDInterpolator<ArrayType>
+    {
+    public:
+
+        typedef hoNDInterpolator<ArrayType> BaseClass;
+        typedef hoNDInterpolatorBSpline<ArrayType, D> Self;
+        typedef typename BaseClass::T T;
+        typedef typename BaseClass::coord_type coord_type;
+        typedef typename BaseClass::BoundHanlderType BoundHanlderType;
+
+        hoNDInterpolatorBSpline(unsigned int order=5) : BaseClass(), order_(order) { derivative_.resize(D, 0); }
+        hoNDInterpolatorBSpline(ArrayType& a, BoundHanlderType& bh, unsigned int order=5);
+        virtual ~hoNDInterpolatorBSpline();
+
+        virtual void setArray(ArrayType& a);
+
+        void setDerivative(const std::vector<unsigned int>& derivative) { GADGET_CHECK_THROW(derivative.size()>=D); derivative_ = derivative; }
+
+        /// access the pixel value
+        virtual T operator()( const coord_type* pos );
+        virtual T operator()( const std::vector<coord_type>& pos );
+        virtual T operator()( coord_type x );
+        virtual T operator()( coord_type x, coord_type y );
+        virtual T operator()( coord_type x, coord_type y, coord_type z );
+        virtual T operator()( coord_type x, coord_type y, coord_type z, coord_type s );
+        virtual T operator()( coord_type x, coord_type y, coord_type z, coord_type s, coord_type p );
+        virtual T operator()( coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type r );
+        virtual T operator()( coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type r, coord_type a );
+        virtual T operator()( coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type r, coord_type a, coord_type q );
+        virtual T operator()( coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type r, coord_type a, coord_type q, coord_type u );
+
+     protected:
+
+        using BaseClass::array_;
+        using BaseClass::data_;
+        using BaseClass::bh_;
+
+        using BaseClass::sx_;
+        using BaseClass::sy_;
+        using BaseClass::sz_;
+        using BaseClass::st_;
+
+        hoNDBSpline<T, D> bspline_;
+        std::vector<size_t> dimension_;
+        std::vector<unsigned int> derivative_;
+        unsigned int order_;
+        hoNDArray<T> coeff_;
+    };
+
+    template <typename ArrayType, unsigned int D>
+    inline hoNDInterpolator<ArrayType>* createInterpolator(GT_IMAGE_INTERPOLATOR interp)
+    {
+        hoNDInterpolator<ArrayType>* res = NULL;
+
+        switch (interp)
+        {
+            case GT_IMAGE_INTERPOLATOR_NEARESTNEIGHBOR:
+                res = new hoNDInterpolatorNearestNeighbor<ArrayType>();
+                break;
+
+            case GT_IMAGE_INTERPOLATOR_LINEAR:
+                res = new hoNDInterpolatorLinear<ArrayType>();
+                break;
+
+            case GT_IMAGE_INTERPOLATOR_BSPLINE:
+                res = new hoNDInterpolatorBSpline<ArrayType, D>();
+                break;
+
+            default:
+                GADGET_ERROR_MSG("Unrecognized interpolator type : " << interp);
+        }
+
+        return res;
+    }
+}
+
+#include "hoNDInterpolatorNearestNeighbor.hxx"
+#include "hoNDInterpolatorLinear.hxx"
+#include "hoNDInterpolatorBSpline.hxx"
diff --git a/toolboxes/core/cpu/hoNDInterpolatorBSpline.hxx b/toolboxes/core/cpu/hoNDInterpolatorBSpline.hxx
new file mode 100644
index 0000000..1093535
--- /dev/null
+++ b/toolboxes/core/cpu/hoNDInterpolatorBSpline.hxx
@@ -0,0 +1,339 @@
+/** \file       hoNDInterpolatorBSpline.hxx
+    \brief      N-dimensional BSpline interpolator
+
+                Designed to work with hoNDArray and hoNDImage
+
+    \author     Hui Xue
+*/
+
+namespace Gadgetron
+{
+    template <typename ArrayType, unsigned int D> 
+    hoNDInterpolatorBSpline<ArrayType, D>::hoNDInterpolatorBSpline(ArrayType& a, BoundHanlderType& bh, unsigned int order) : BaseClass(a, bh), order_(order)
+    {
+        bspline_.computeBSplineCoefficients(a, order_, this->coeff_);
+
+        dimension_.resize(D);
+
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            dimension_[ii] = a.get_size(ii);
+        }
+
+        derivative_.resize(D, 0);
+    }
+
+    template <typename ArrayType, unsigned int D> 
+    hoNDInterpolatorBSpline<ArrayType, D>::~hoNDInterpolatorBSpline()
+    {
+    }
+
+    template <typename ArrayType, unsigned int D> 
+    void hoNDInterpolatorBSpline<ArrayType, D>::setArray(ArrayType& a)
+    {
+        this->array_ = &a;
+
+        dimension_.resize(D);
+
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            dimension_[ii] = a.get_size(ii);
+        }
+
+        bspline_.computeBSplineCoefficients(a, this->order_, this->coeff_);
+    }
+
+    template <typename ArrayType, unsigned int D> 
+    inline typename hoNDInterpolatorBSpline<ArrayType, D>::T hoNDInterpolatorBSpline<ArrayType, D>::operator()( const coord_type* pos )
+    {
+        std::vector<gt_index_type> anchor(D);
+
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            anchor[ii] = static_cast<gt_index_type>(std::floor(pos[ii]));
+        }
+
+        bool inRange = true;
+        for ( ii=0; ii<D; ii++ )
+        {
+            if ( anchor[ii]<0 || anchor[ii]>=array_->get_size(ii) )
+            {
+                inRange = false;
+                break;
+            }
+        }
+
+        if( inRange )
+        {
+            return bspline_.evaluateBSpline(coeff_.begin(), dimension_, order_, derivative_, pos);
+        }
+        else
+        {
+            return (*bh_)(anchor);
+        }
+    }
+
+    template <typename ArrayType, unsigned int D> 
+    inline typename hoNDInterpolatorBSpline<ArrayType, D>::T hoNDInterpolatorBSpline<ArrayType, D>::operator()( const std::vector<coord_type>& pos )
+    {
+        return this->operator()(&pos[0]);
+    }
+
+    template <typename ArrayType, unsigned int D> 
+    inline typename hoNDInterpolatorBSpline<ArrayType, D>::T hoNDInterpolatorBSpline<ArrayType, D>::operator()( coord_type x )
+    {
+        gt_index_type ix = static_cast<gt_index_type>(std::floor(x));
+
+        if ( ix>=0 && ix<(gt_index_type)array_->get_size(0)-1 )
+        {
+            return bspline_.evaluateBSpline(coeff_.begin(), dimension_[0], order_, derivative_[0], x);
+        }
+        else
+        {
+            return (*bh_)(ix);
+        }
+    }
+
+    template <typename ArrayType, unsigned int D> 
+    inline typename hoNDInterpolatorBSpline<ArrayType, D>::T hoNDInterpolatorBSpline<ArrayType, D>::operator()( coord_type x, coord_type y )
+    {
+        gt_index_type ix = static_cast<gt_index_type>(std::floor(x));
+        gt_index_type iy = static_cast<gt_index_type>(std::floor(y));
+
+        /*x = (x<0) ? 0 : x;
+        x = (x>array_->get_size(0)-1) ? array_->get_size(0)-1 : x;
+
+        y = (y<0) ? 0 : y;
+        y = (y>array_->get_size(1)-1) ? array_->get_size(1)-1 : y;*/
+
+        /*if ( ix>=0 && ix<(gt_index_type)array_->get_size(0)-1 && iy>=0 && iy<(gt_index_type)array_->get_size(1)-1 )
+        {
+            return bspline_.evaluateBSpline(coeff_.begin(), dimension_[0], dimension_[1], order_, derivative_[0], derivative_[1], x, y);
+        }
+        else
+        {
+            return (*bh_)(ix, iy);
+        }*/
+
+        if ( ix>=0 && ix<sx_-1 && iy>=0 && iy<sy_-1 )
+        {
+            return bspline_.evaluateBSpline(coeff_.begin(), dimension_[0], dimension_[1], order_, derivative_[0], derivative_[1], x, y);
+        }
+        else
+        {
+            return (*bh_)(ix, iy);
+        }
+    }
+
+    template <typename ArrayType, unsigned int D> 
+    inline typename hoNDInterpolatorBSpline<ArrayType, D>::T hoNDInterpolatorBSpline<ArrayType, D>::operator()( coord_type x, coord_type y, coord_type z )
+    {
+        gt_index_type ix = static_cast<gt_index_type>(std::floor(x));
+        gt_index_type iy = static_cast<gt_index_type>(std::floor(y));
+        gt_index_type iz = static_cast<gt_index_type>(std::floor(z));
+
+        if ( ix>=0 && ix<(gt_index_type)array_->get_size(0)-1 
+            && iy>=0 && iy<(gt_index_type)array_->get_size(1)-1 
+            && iz>=0 && iz<(gt_index_type)array_->get_size(2)-1 )
+        {
+            return bspline_.evaluateBSpline(coeff_.begin(), 
+                dimension_[0], dimension_[1], dimension_[2], 
+                order_, 
+                derivative_[0], derivative_[1], derivative_[2], 
+                x, y, z);
+        }
+        else
+        {
+            return (*bh_)(ix, iy, iz);
+        }
+    }
+
+    template <typename ArrayType, unsigned int D> 
+    inline typename hoNDInterpolatorBSpline<ArrayType, D>::T hoNDInterpolatorBSpline<ArrayType, D>::operator()( coord_type x, coord_type y, coord_type z, coord_type s )
+    {
+        gt_index_type ix = static_cast<gt_index_type>(std::floor(x));
+        gt_index_type iy = static_cast<gt_index_type>(std::floor(y));
+        gt_index_type iz = static_cast<gt_index_type>(std::floor(z));
+        gt_index_type is = static_cast<gt_index_type>(std::floor(s));
+
+        if ( ix>=0 && ix<(gt_index_type)array_->get_size(0)-1 
+            && iy>=0 && iy<(gt_index_type)array_->get_size(1)-1 
+            && iz>=0 && iz<(gt_index_type)array_->get_size(2)-1 
+            && is>=0 && is<(gt_index_type)array_->get_size(3)-1 )
+        {
+            return bspline_.evaluateBSpline(coeff_.begin(), 
+                dimension_[0], dimension_[1], dimension_[2], dimension_[3], 
+                order_, 
+                derivative_[0], derivative_[1], derivative_[2], derivative_[3], 
+                x, y, z, s);
+        }
+        else
+        {
+            return (*bh_)(ix, iy, iz, is);
+        }
+    }
+
+    template <typename ArrayType, unsigned int D> 
+    inline typename hoNDInterpolatorBSpline<ArrayType, D>::T hoNDInterpolatorBSpline<ArrayType, D>::operator()( coord_type x, coord_type y, coord_type z, coord_type s, coord_type p )
+    {
+        gt_index_type ix = static_cast<gt_index_type>(std::floor(x));
+        gt_index_type iy = static_cast<gt_index_type>(std::floor(y));
+        gt_index_type iz = static_cast<gt_index_type>(std::floor(z));
+        gt_index_type is = static_cast<gt_index_type>(std::floor(s));
+        gt_index_type ip = static_cast<gt_index_type>(std::floor(p));
+
+        if ( ix>=0 && ix<(gt_index_type)array_->get_size(0)-1 
+            && iy>=0 && iy<(gt_index_type)array_->get_size(1)-1 
+            && iz>=0 && iz<(gt_index_type)array_->get_size(2)-1 
+            && is>=0 && is<(gt_index_type)array_->get_size(3)-1 
+            && ip>=0 && ip<(gt_index_type)array_->get_size(4)-1 )
+        {
+            return bspline_.evaluateBSpline(coeff_.begin(), 
+                dimension_[0], dimension_[1], dimension_[2], dimension_[3], dimension_[4], 
+                order_, 
+                derivative_[0], derivative_[1], derivative_[2], derivative_[3], derivative_[4], 
+                x, y, z, s, p);
+        }
+        else
+        {
+            return (*bh_)(ix, iy, iz, is, ip);
+        }
+    }
+
+    template <typename ArrayType, unsigned int D> 
+    inline typename hoNDInterpolatorBSpline<ArrayType, D>::T hoNDInterpolatorBSpline<ArrayType, D>::operator()( coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type r )
+    {
+        gt_index_type ix = static_cast<gt_index_type>(std::floor(x));
+        gt_index_type iy = static_cast<gt_index_type>(std::floor(y));
+        gt_index_type iz = static_cast<gt_index_type>(std::floor(z));
+        gt_index_type is = static_cast<gt_index_type>(std::floor(s));
+        gt_index_type ip = static_cast<gt_index_type>(std::floor(p));
+        gt_index_type ir = static_cast<gt_index_type>(std::floor(r));
+
+        if ( ix>=0 && ix<(gt_index_type)array_->get_size(0)-1 
+            && iy>=0 && iy<(gt_index_type)array_->get_size(1)-1 
+            && iz>=0 && iz<(gt_index_type)array_->get_size(2)-1 
+            && is>=0 && is<(gt_index_type)array_->get_size(3)-1 
+            && ip>=0 && ip<(gt_index_type)array_->get_size(4)-1 
+            && ir>=0 && ir<(gt_index_type)array_->get_size(5)-1 )
+        {
+            return bspline_.evaluateBSpline(coeff_.begin(), 
+                dimension_[0], dimension_[1], dimension_[2], dimension_[3], dimension_[4], dimension_[5], 
+                order_, 
+                derivative_[0], derivative_[1], derivative_[2], derivative_[3], derivative_[4], derivative_[5], 
+                x, y, z, s, p, r);
+        }
+        else
+        {
+            return (*bh_)(ix, iy, iz, is, ip, ir);
+        }
+    }
+
+    template <typename ArrayType, unsigned int D> 
+    inline typename hoNDInterpolatorBSpline<ArrayType, D>::T hoNDInterpolatorBSpline<ArrayType, D>::operator()( coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type r, coord_type a )
+    {
+        gt_index_type anchor[7];
+
+        anchor[0] = static_cast<gt_index_type>(std::floor(x));
+        anchor[1] = static_cast<gt_index_type>(std::floor(y));
+        anchor[2] = static_cast<gt_index_type>(std::floor(z));
+        anchor[3] = static_cast<gt_index_type>(std::floor(s));
+        anchor[4] = static_cast<gt_index_type>(std::floor(p));
+        anchor[5] = static_cast<gt_index_type>(std::floor(r));
+        anchor[6] = static_cast<gt_index_type>(std::floor(a));
+
+        if ( anchor[0]>=0 && anchor[0]<(gt_index_type)array_->get_size(0)-1 
+            && anchor[1]>=0 && anchor[1]<(gt_index_type)array_->get_size(1)-1 
+            && anchor[2]>=0 && anchor[2]<(gt_index_type)array_->get_size(2)-1 
+            && anchor[3]>=0 && anchor[3]<(gt_index_type)array_->get_size(3)-1 
+            && anchor[4]>=0 && anchor[4]<(gt_index_type)array_->get_size(4)-1 
+            && anchor[5]>=0 && anchor[5]<(gt_index_type)array_->get_size(5)-1
+            && anchor[6]>=0 && anchor[6]<(gt_index_type)array_->get_size(6)-1 )
+        {
+            return bspline_.evaluateBSpline(coeff_.begin(), 
+                dimension_[0], dimension_[1], dimension_[2], dimension_[3], dimension_[4], dimension_[5], dimension_[6], 
+                order_, 
+                derivative_[0], derivative_[1], derivative_[2], derivative_[3], derivative_[4], derivative_[5], derivative_[6], 
+                x, y, z, s, p, r, a);
+        }
+        else
+        {
+            return (*bh_)(anchor[0], anchor[1], anchor[1], anchor[2], anchor[3], anchor[4], anchor[5], anchor[6]);
+        }
+    }
+
+    template <typename ArrayType, unsigned int D> 
+    inline typename hoNDInterpolatorBSpline<ArrayType, D>::T hoNDInterpolatorBSpline<ArrayType, D>::operator()( coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type r, coord_type a, coord_type q )
+    {
+        gt_index_type anchor[8];
+
+        anchor[0] = static_cast<gt_index_type>(std::floor(x));
+        anchor[1] = static_cast<gt_index_type>(std::floor(y));
+        anchor[2] = static_cast<gt_index_type>(std::floor(z));
+        anchor[3] = static_cast<gt_index_type>(std::floor(s));
+        anchor[4] = static_cast<gt_index_type>(std::floor(p));
+        anchor[5] = static_cast<gt_index_type>(std::floor(r));
+        anchor[6] = static_cast<gt_index_type>(std::floor(a));
+        anchor[7] = static_cast<gt_index_type>(std::floor(q));
+
+        if ( anchor[0]>=0 && anchor[0]<(gt_index_type)array_->get_size(0)-1 
+            && anchor[1]>=0 && anchor[1]<(gt_index_type)array_->get_size(1)-1 
+            && anchor[2]>=0 && anchor[2]<(gt_index_type)array_->get_size(2)-1 
+            && anchor[3]>=0 && anchor[3]<(gt_index_type)array_->get_size(3)-1 
+            && anchor[4]>=0 && anchor[4]<(gt_index_type)array_->get_size(4)-1 
+            && anchor[5]>=0 && anchor[5]<(gt_index_type)array_->get_size(5)-1 
+            && anchor[6]>=0 && anchor[6]<(gt_index_type)array_->get_size(6)-1 
+            && anchor[7]>=0 && anchor[7]<(gt_index_type)array_->get_size(7)-1 )
+        {
+            return bspline_.evaluateBSpline(coeff_.begin(), 
+                dimension_[0], dimension_[1], dimension_[2], dimension_[3], dimension_[4], dimension_[5], dimension_[6], dimension_[7], 
+                order_, 
+                derivative_[0], derivative_[1], derivative_[2], derivative_[3], derivative_[4], derivative_[5], derivative_[6], derivative_[7], 
+                x, y, z, s, p, r, a, q);
+        }
+        else
+        {
+            return (*bh_)(anchor[0], anchor[1], anchor[1], anchor[2], anchor[3], anchor[4], anchor[5], anchor[6], anchor[7]);
+        }
+    }
+
+    template <typename ArrayType, unsigned int D> 
+    inline typename hoNDInterpolatorBSpline<ArrayType, D>::T hoNDInterpolatorBSpline<ArrayType, D>::operator()( coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type r, coord_type a, coord_type q, coord_type u )
+    {
+        gt_index_type anchor[9];
+
+        anchor[0] = static_cast<gt_index_type>(std::floor(x));
+        anchor[1] = static_cast<gt_index_type>(std::floor(y));
+        anchor[2] = static_cast<gt_index_type>(std::floor(z));
+        anchor[3] = static_cast<gt_index_type>(std::floor(s));
+        anchor[4] = static_cast<gt_index_type>(std::floor(p));
+        anchor[5] = static_cast<gt_index_type>(std::floor(r));
+        anchor[6] = static_cast<gt_index_type>(std::floor(a));
+        anchor[7] = static_cast<gt_index_type>(std::floor(q));
+        anchor[8] = static_cast<gt_index_type>(std::floor(u));
+
+        if ( anchor[0]>=0 && anchor[0]<(gt_index_type)array_->get_size(0)-1 
+            && anchor[1]>=0 && anchor[1]<(gt_index_type)array_->get_size(1)-1 
+            && anchor[2]>=0 && anchor[2]<(gt_index_type)array_->get_size(2)-1 
+            && anchor[3]>=0 && anchor[3]<(gt_index_type)array_->get_size(3)-1 
+            && anchor[4]>=0 && anchor[4]<(gt_index_type)array_->get_size(4)-1 
+            && anchor[5]>=0 && anchor[5]<(gt_index_type)array_->get_size(5)-1 
+            && anchor[6]>=0 && anchor[6]<(gt_index_type)array_->get_size(6)-1 
+            && anchor[7]>=0 && anchor[7]<(gt_index_type)array_->get_size(7)-1 
+            && anchor[8]>=0 && anchor[8]<(gt_index_type)array_->get_size(8)-1 )
+        {
+            return bspline_.evaluateBSpline(coeff_.begin(), 
+                dimension_[0], dimension_[1], dimension_[2], dimension_[3], dimension_[4], dimension_[5], dimension_[6], dimension_[7], dimension_[8], 
+                order_, 
+                derivative_[0], derivative_[1], derivative_[2], derivative_[3], derivative_[4], derivative_[5], derivative_[6], derivative_[7], derivative_[8], 
+                x, y, z, s, p, r, a, q, u);
+        }
+        else
+        {
+            return (*bh_)(anchor[0], anchor[1], anchor[2], anchor[3], anchor[4], anchor[5], anchor[6], anchor[7], anchor[8]);
+        }
+    }
+}
diff --git a/toolboxes/core/cpu/hoNDInterpolatorLinear.hxx b/toolboxes/core/cpu/hoNDInterpolatorLinear.hxx
new file mode 100644
index 0000000..bc846ae
--- /dev/null
+++ b/toolboxes/core/cpu/hoNDInterpolatorLinear.hxx
@@ -0,0 +1,874 @@
+/** \file       hoNDInterpolatorLinear.h
+    \brief      N-dimensional linear interpolator
+
+                Designed to work with hoNDArray and hoNDImage
+
+    \author     Hui Xue
+*/
+
+#ifdef _WIN32
+    #include "malloc.h"
+#else
+    #include "alloca.h"
+#endif // _WIN32
+
+namespace Gadgetron
+{
+    /// hoNDInterpolatorLinear
+
+    template <typename ArrayType> 
+    typename hoNDInterpolatorLinear<ArrayType>::T hoNDInterpolatorLinear<ArrayType>::operator()( const coord_type* pos )
+    {
+        unsigned int D = array_->get_number_of_dimensions();
+
+        gt_index_type* anchor = reinterpret_cast<gt_index_type*>(alloca(D * sizeof(gt_index_type)));
+        coord_type* weights = reinterpret_cast<coord_type*>(alloca(D * sizeof(coord_type)));
+        coord_type* weightsMinusOne = reinterpret_cast<coord_type*>(alloca(D * sizeof(coord_type)));
+
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            anchor[ii] = static_cast<gt_index_type>(std::floor(pos[ii]));
+            weights[ii] = pos[ii] - anchor[ii];
+            weightsMinusOne[ii] = coord_type(1.0) - weights[ii];
+        }
+
+        T res(0);
+
+        coord_type weightAll(1.0);
+
+        bool inRange = true;
+        for ( ii=0; ii<D; ii++ )
+        {
+            if ( anchor[ii]<0 || anchor[ii]>=array_->get_size(ii)-1 )
+            {
+                inRange = false;
+                break;
+            }
+        }
+
+        if( inRange )
+        {
+            std::vector<size_t> ind(D);
+
+            unsigned int n;
+            for ( n=0; n<number_of_points_; n++ )
+            {
+                unsigned int lastDigit = n;
+                weightAll = coord_type(1.0);
+
+                for ( ii=0; ii<D; ii++ )
+                {
+                    if ( lastDigit & 1 )
+                    {
+                        ind[ii] = anchor[ii]+1;
+                        weightAll *= weights[ii];
+                    }
+                    else
+                    {
+                        ind[ii] = anchor[ii];
+                        weightAll *= weightsMinusOne[ii];
+                    }
+
+                    // shift one digit
+                    lastDigit >>= 1;
+                }
+
+                res += weightAll * (*array_)(ind);
+            }
+        }
+        else
+        {
+            std::vector<gt_index_type> ind(D);
+
+            unsigned int n;
+            for ( n=0; n<number_of_points_; n++ )
+            {
+                unsigned int lastDigit = n;
+                weightAll = coord_type(1.0);
+
+                for ( ii=0; ii<D; ii++ )
+                {
+                    if ( lastDigit & 1 )
+                    {
+                        ind[ii] = anchor[ii]+1;
+                        weightAll *= weights[ii];
+                    }
+                    else
+                    {
+                        ind[ii] = anchor[ii];
+                        weightAll *= weightsMinusOne[ii];
+                    }
+
+                    // shift one digit
+                    lastDigit >>= 1;
+                }
+
+                res += weightAll * (*bh_)(ind);
+            }
+        }
+
+        return res;
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDInterpolatorLinear<ArrayType>::T hoNDInterpolatorLinear<ArrayType>::operator()( const std::vector<coord_type>& pos )
+    {
+        return this->operator()(&pos[0]);
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDInterpolatorLinear<ArrayType>::T hoNDInterpolatorLinear<ArrayType>::operator()( coord_type x )
+    {
+        gt_index_type ix = static_cast<gt_index_type>(std::floor(x));
+        coord_type dx = x - ix;
+
+        if ( ix>=0 && ix<(gt_index_type)sx_-1 )
+        {
+            return ( (*array_)( size_t(ix) )*(1-dx) + (*array_)( size_t(ix)+1 )*dx );
+        }
+        else
+        {
+            return ( (*bh_)(ix)*(1-dx) + (*bh_)(ix+1)*dx );
+        }
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDInterpolatorLinear<ArrayType>::T hoNDInterpolatorLinear<ArrayType>::operator()( coord_type x, coord_type y )
+    {
+        gt_index_type ix = static_cast<gt_index_type>(std::floor(x));
+        coord_type dx = x - ix;
+        coord_type dx_prime = coord_type(1.0)-dx;
+
+        gt_index_type iy = static_cast<gt_index_type>(std::floor(y));
+        coord_type dy = y - iy;
+        coord_type dy_prime = coord_type(1.0)-dy;
+
+        if ( ix>=0 && ix<sx_-1 && iy>=0 && iy<sy_-1 )
+        {
+            size_t offset = ix + iy*sx_;
+            T* data = array_->begin();
+
+            //return (    ( data_[offset]   *   dx_prime     *dy_prime 
+            //        +   data_[offset+1]   *   dx           *dy_prime)
+            //        +   (data_[offset+sx_]   *   dx_prime     *dy
+            //        +   data_[offset+sx_+1]   *   dx           *dy) );
+
+            /*return (    ((*array_)(size_t(ix), size_t(iy)       )   *   dx_prime     *dy_prime
+                    +   (*array_)(size_t(ix)+1, size_t(iy)      )   *   dx           *dy_prime)
+                    +   ((*array_)(size_t(ix), size_t(iy)+1     )   *   dx_prime     *dy
+                    +   (*array_)(size_t(ix)+1, size_t(iy)+1    )   *   dx           *dy) );*/
+
+            return (    (data[offset]       *   dx_prime     *dy_prime
+                    +   data[offset+1]      *   dx           *dy_prime)
+                    +   (data[offset+sx_]   *   dx_prime     *dy
+                    +   data[offset+sx_+1]  *   dx           *dy) );
+        }
+        else
+        {
+            return (    ((*bh_)(ix, iy       )   *   dx_prime    *dy_prime 
+                    +   (*bh_)(ix+1, iy      )   *   dx          *dy_prime)
+                    +   ((*bh_)(ix, iy+1     )   *   dx_prime    *dy
+                    +   (*bh_)(ix+1, iy+1    )   *   dx          *dy) );
+        }
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDInterpolatorLinear<ArrayType>::T hoNDInterpolatorLinear<ArrayType>::operator()( coord_type x, coord_type y, coord_type z )
+    {
+        gt_index_type ix = static_cast<gt_index_type>(std::floor(x));
+        coord_type dx = x - ix;
+        coord_type dx_prime = coord_type(1.0)-dx;
+
+        gt_index_type iy = static_cast<gt_index_type>(std::floor(y));
+        coord_type dy = y - iy;
+        coord_type dy_prime = coord_type(1.0)-dy;
+
+        gt_index_type iz = static_cast<gt_index_type>(std::floor(z));
+        coord_type dz = z - iz;
+        coord_type dz_prime = coord_type(1.0)-dz;
+
+        if ( ix>=0 && ix<sx_-1 
+            && iy>=0 && iy<sy_-1 
+            && iz>=0 && iz<sz_-1 )
+        {
+            /*return (    ((*array_)(size_t(ix),   size_t(iy),     size_t(iz)   )   *   dx_prime     *dy_prime   *dz_prime 
+                    +   (*array_)(size_t(ix)+1, size_t(iy),     size_t(iz)    )   *   dx           *dy_prime   *dz_prime) 
+                    +   ((*array_)(size_t(ix),   size_t(iy)+1,   size_t(iz)   )   *   dx_prime     *dy         *dz_prime 
+                    +   (*array_)(size_t(ix)+1, size_t(iy)+1,   size_t(iz)    )   *   dx           *dy         *dz_prime) 
+                    +   ((*array_)(size_t(ix),   size_t(iy),     size_t(iz)+1 )   *   dx_prime     *dy_prime   *dz 
+                    +   (*array_)(size_t(ix)+1, size_t(iy),     size_t(iz)+1  )   *   dx           *dy_prime   *dz) 
+                    +   ((*array_)(size_t(ix),   size_t(iy)+1,   size_t(iz)+1 )   *   dx_prime     *dy         *dz 
+                    +   (*array_)(size_t(ix)+1, size_t(iy)+1,   size_t(iz)+1  )   *   dx           *dy         *dz) );*/
+
+            size_t offset = ix + iy*sx_ + iz*sx_*sy_;
+
+            return (    (data_[offset]              *   dx_prime     *dy_prime   *dz_prime 
+                    +   data_[offset+1]             *   dx           *dy_prime   *dz_prime) 
+                    +   (data_[offset+sx_]          *   dx_prime     *dy         *dz_prime 
+                    +   data_[offset+sx_+1]         *   dx           *dy         *dz_prime) 
+                    +   (data_[offset+sx_*sy_]      *   dx_prime     *dy_prime   *dz 
+                    +   data_[offset+sx_*sy_+1]     *   dx           *dy_prime   *dz) 
+                    +   (data_[offset+sx_*sy_+sx_]  *   dx_prime     *dy         *dz 
+                    +   data_[offset+sx_*sy_+sx_+1] *   dx           *dy         *dz) );
+        }
+        else
+        {
+            return (    ((*bh_)(ix,   iy,     iz   )   *   dx_prime     *dy_prime   *dz_prime 
+                    +   (*bh_)(ix+1, iy,     iz    )   *   dx           *dy_prime   *dz_prime) 
+                    +   ((*bh_)(ix,   iy+1,   iz   )   *   dx_prime     *dy         *dz_prime 
+                    +   (*bh_)(ix+1, iy+1,   iz    )   *   dx           *dy         *dz_prime) 
+                    +   ((*bh_)(ix,   iy,     iz+1 )   *   dx_prime     *dy_prime   *dz 
+                    +   (*bh_)(ix+1, iy,     iz+1  )   *   dx           *dy_prime   *dz) 
+                    +   ((*bh_)(ix,   iy+1,   iz+1 )   *   dx_prime     *dy         *dz 
+                    +   (*bh_)(ix+1, iy+1,   iz+1  )   *   dx           *dy         *dz) );
+        }
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDInterpolatorLinear<ArrayType>::T hoNDInterpolatorLinear<ArrayType>::operator()( coord_type x, coord_type y, coord_type z, coord_type s )
+    {
+        gt_index_type ix = static_cast<gt_index_type>(std::floor(x));
+        coord_type dx = x - ix;
+        coord_type dx_prime = coord_type(1.0)-dx;
+
+        gt_index_type iy = static_cast<gt_index_type>(std::floor(y));
+        coord_type dy = y - iy;
+        coord_type dy_prime = coord_type(1.0)-dy;
+
+        gt_index_type iz = static_cast<gt_index_type>(std::floor(z));
+        coord_type dz = z - iz;
+        coord_type dz_prime = coord_type(1.0)-dz;
+
+        gt_index_type is = static_cast<gt_index_type>(std::floor(s));
+        coord_type ds = s - is;
+        coord_type ds_prime = coord_type(1.0)-ds;
+
+        if ( ix>=0 && ix<(gt_index_type)array_->get_size(0)-1 
+            && iy>=0 && iy<(gt_index_type)array_->get_size(1)-1 
+            && iz>=0 && iz<(gt_index_type)array_->get_size(2)-1 
+            && is>=0 && is<(gt_index_type)array_->get_size(3)-1 )
+        {
+            return (    ((*array_)(size_t(ix),   size_t(iy),     size_t(iz),    size_t(is) )   *   dx_prime     *dy_prime   *dz_prime   *ds_prime 
+                    +   (*array_)(size_t(ix)+1, size_t(iy),     size_t(iz),     size_t(is) )   *   dx           *dy_prime   *dz_prime   *ds_prime) 
+                    +   ((*array_)(size_t(ix),   size_t(iy)+1,   size_t(iz),    size_t(is) )   *   dx_prime     *dy         *dz_prime   *ds_prime 
+                    +   (*array_)(size_t(ix)+1, size_t(iy)+1,   size_t(iz),     size_t(is) )   *   dx           *dy         *dz_prime   *ds_prime) 
+                    +   ((*array_)(size_t(ix),   size_t(iy),     size_t(iz)+1,  size_t(is) )   *   dx_prime     *dy_prime   *dz         *ds_prime 
+                    +   (*array_)(size_t(ix)+1, size_t(iy),     size_t(iz)+1,   size_t(is) )   *   dx           *dy_prime   *dz         *ds_prime) 
+                    +   ((*array_)(size_t(ix),   size_t(iy)+1,   size_t(iz)+1,  size_t(is) )   *   dx_prime     *dy         *dz         *ds_prime 
+                    +   (*array_)(size_t(ix)+1, size_t(iy)+1,   size_t(iz)+1,   size_t(is) )   *   dx           *dy         *dz         *ds_prime)
+                    +   ((*array_)(size_t(ix),   size_t(iy),     size_t(iz),    size_t(is)+1 )   *   dx_prime     *dy_prime   *dz_prime   *ds 
+                    +   (*array_)(size_t(ix)+1, size_t(iy),     size_t(iz),     size_t(is)+1 )   *   dx           *dy_prime   *dz_prime   *ds) 
+                    +   ((*array_)(size_t(ix),   size_t(iy)+1,   size_t(iz),    size_t(is)+1 )   *   dx_prime     *dy         *dz_prime   *ds 
+                    +   (*array_)(size_t(ix)+1, size_t(iy)+1,   size_t(iz),     size_t(is)+1 )   *   dx           *dy         *dz_prime   *ds) 
+                    +   ((*array_)(size_t(ix),   size_t(iy),     size_t(iz)+1,  size_t(is)+1 )   *   dx_prime     *dy_prime   *dz         *ds 
+                    +   (*array_)(size_t(ix)+1, size_t(iy),     size_t(iz)+1,   size_t(is)+1 )   *   dx           *dy_prime   *dz         *ds) 
+                    +   ((*array_)(size_t(ix),   size_t(iy)+1,   size_t(iz)+1,  size_t(is)+1 )   *   dx_prime     *dy         *dz         *ds 
+                    +   (*array_)(size_t(ix)+1, size_t(iy)+1,   size_t(iz)+1,   size_t(is)+1 )   *   dx           *dy         *dz         *ds) );
+        }
+        else
+        {
+            return (    ((*bh_)(ix,   iy,     iz,    is )   *   dx_prime     *dy_prime   *dz_prime   *ds_prime 
+                    +   (*bh_)(ix+1, iy,     iz,     is )   *   dx           *dy_prime   *dz_prime   *ds_prime) 
+                    +   ((*bh_)(ix,   iy+1,   iz,    is )   *   dx_prime     *dy         *dz_prime   *ds_prime 
+                    +   (*bh_)(ix+1, iy+1,   iz,     is )   *   dx           *dy         *dz_prime   *ds_prime) 
+                    +   ((*bh_)(ix,   iy,     iz+1,  is )   *   dx_prime     *dy_prime   *dz         *ds_prime 
+                    +   (*bh_)(ix+1, iy,     iz+1,   is )   *   dx           *dy_prime   *dz         *ds_prime) 
+                    +   ((*bh_)(ix,   iy+1,   iz+1,  is )   *   dx_prime     *dy         *dz         *ds_prime 
+                    +   (*bh_)(ix+1, iy+1,   iz+1,   is )   *   dx           *dy         *dz         *ds_prime)
+                    +   ((*bh_)(ix,   iy,     iz,    is+1 )   *   dx_prime     *dy_prime   *dz_prime   *ds 
+                    +   (*bh_)(ix+1, iy,     iz,     is+1 )   *   dx           *dy_prime   *dz_prime   *ds) 
+                    +   ((*bh_)(ix,   iy+1,   iz,    is+1 )   *   dx_prime     *dy         *dz_prime   *ds 
+                    +   (*bh_)(ix+1, iy+1,   iz,     is+1 )   *   dx           *dy         *dz_prime   *ds) 
+                    +   ((*bh_)(ix,   iy,     iz+1,  is+1 )   *   dx_prime     *dy_prime   *dz         *ds 
+                    +   (*bh_)(ix+1, iy,     iz+1,   is+1 )   *   dx           *dy_prime   *dz         *ds) 
+                    +   ((*bh_)(ix,   iy+1,   iz+1,  is+1 )   *   dx_prime     *dy         *dz         *ds 
+                    +   (*bh_)(ix+1, iy+1,   iz+1,   is+1 )   *   dx           *dy         *dz         *ds) );
+        }
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDInterpolatorLinear<ArrayType>::T hoNDInterpolatorLinear<ArrayType>::operator()( coord_type x, coord_type y, coord_type z, coord_type s, coord_type p )
+    {
+        gt_index_type ix = static_cast<gt_index_type>(std::floor(x));
+        coord_type dx = x - ix;
+        coord_type dx_prime = coord_type(1.0)-dx;
+
+        gt_index_type iy = static_cast<gt_index_type>(std::floor(y));
+        coord_type dy = y - iy;
+        coord_type dy_prime = coord_type(1.0)-dy;
+
+        gt_index_type iz = static_cast<gt_index_type>(std::floor(z));
+        coord_type dz = z - iz;
+        coord_type dz_prime = coord_type(1.0)-dz;
+
+        gt_index_type is = static_cast<gt_index_type>(std::floor(s));
+        coord_type ds = s - is;
+        coord_type ds_prime = coord_type(1.0)-ds;
+
+        gt_index_type ip = static_cast<gt_index_type>(std::floor(p));
+        coord_type dp = p - ip;
+        coord_type dp_prime = coord_type(1.0)-dp;
+
+        if ( ix>=0 && ix<(gt_index_type)array_->get_size(0)-1 
+            && iy>=0 && iy<(gt_index_type)array_->get_size(1)-1 
+            && iz>=0 && iz<(gt_index_type)array_->get_size(2)-1 
+            && is>=0 && is<(gt_index_type)array_->get_size(3)-1 
+            && ip>=0 && ip<(gt_index_type)array_->get_size(4)-1 )
+        {
+            return (    ((*array_)(size_t(ix),   size_t(iy),     size_t(iz),    size_t(is),     size_t(ip) )   *   dx_prime     *dy_prime   *dz_prime   *ds_prime   *dp_prime
+                    +   (*array_)(size_t(ix)+1, size_t(iy),     size_t(iz),     size_t(is),     size_t(ip) )   *   dx           *dy_prime   *dz_prime   *ds_prime   *dp_prime) 
+                    +   ((*array_)(size_t(ix),   size_t(iy)+1,   size_t(iz),    size_t(is),     size_t(ip) )   *   dx_prime     *dy         *dz_prime   *ds_prime   *dp_prime 
+                    +   (*array_)(size_t(ix)+1, size_t(iy)+1,   size_t(iz),     size_t(is),     size_t(ip) )   *   dx           *dy         *dz_prime   *ds_prime   *dp_prime) 
+                    +   ((*array_)(size_t(ix),   size_t(iy),     size_t(iz)+1,  size_t(is),     size_t(ip) )   *   dx_prime     *dy_prime   *dz         *ds_prime   *dp_prime 
+                    +   (*array_)(size_t(ix)+1, size_t(iy),     size_t(iz)+1,   size_t(is),     size_t(ip) )   *   dx           *dy_prime   *dz         *ds_prime   *dp_prime) 
+                    +   ((*array_)(size_t(ix),   size_t(iy)+1,   size_t(iz)+1,  size_t(is),     size_t(ip) )   *   dx_prime     *dy         *dz         *ds_prime   *dp_prime 
+                    +   (*array_)(size_t(ix)+1, size_t(iy)+1,   size_t(iz)+1,   size_t(is),     size_t(ip) )   *   dx           *dy         *dz         *ds_prime   *dp_prime)
+                    +   ((*array_)(size_t(ix),   size_t(iy),     size_t(iz),    size_t(is)+1,   size_t(ip) )   *   dx_prime     *dy_prime   *dz_prime   *ds   *dp_prime 
+                    +   (*array_)(size_t(ix)+1, size_t(iy),     size_t(iz),     size_t(is)+1,   size_t(ip) )   *   dx           *dy_prime   *dz_prime   *ds   *dp_prime) 
+                    +   ((*array_)(size_t(ix),   size_t(iy)+1,   size_t(iz),    size_t(is)+1,   size_t(ip) )   *   dx_prime     *dy         *dz_prime   *ds   *dp_prime 
+                    +   (*array_)(size_t(ix)+1, size_t(iy)+1,   size_t(iz),     size_t(is)+1,   size_t(ip) )   *   dx           *dy         *dz_prime   *ds   *dp_prime) 
+                    +   ((*array_)(size_t(ix),   size_t(iy),     size_t(iz)+1,  size_t(is)+1,   size_t(ip) )   *   dx_prime     *dy_prime   *dz         *ds   *dp_prime 
+                    +   (*array_)(size_t(ix)+1, size_t(iy),     size_t(iz)+1,   size_t(is)+1,   size_t(ip) )   *   dx           *dy_prime   *dz         *ds   *dp_prime) 
+                    +   ((*array_)(size_t(ix),   size_t(iy)+1,   size_t(iz)+1,  size_t(is)+1,   size_t(ip) )   *   dx_prime     *dy         *dz         *ds   *dp_prime 
+                    +   (*array_)(size_t(ix)+1, size_t(iy)+1,   size_t(iz)+1,   size_t(is)+1,   size_t(ip) )   *   dx           *dy         *dz         *ds   *dp_prime)
+                    +   ((*array_)(size_t(ix),   size_t(iy),     size_t(iz),    size_t(is),     size_t(ip)+1 )   *   dx_prime     *dy_prime   *dz_prime   *ds_prime   *dp
+                    +   (*array_)(size_t(ix)+1, size_t(iy),     size_t(iz),     size_t(is),     size_t(ip)+1 )   *   dx           *dy_prime   *dz_prime   *ds_prime   *dp) 
+                    +   ((*array_)(size_t(ix),   size_t(iy)+1,   size_t(iz),    size_t(is),     size_t(ip)+1 )   *   dx_prime     *dy         *dz_prime   *ds_prime   *dp 
+                    +   (*array_)(size_t(ix)+1, size_t(iy)+1,   size_t(iz),     size_t(is),     size_t(ip)+1 )   *   dx           *dy         *dz_prime   *ds_prime   *dp) 
+                    +   ((*array_)(size_t(ix),   size_t(iy),     size_t(iz)+1,  size_t(is),     size_t(ip)+1 )   *   dx_prime     *dy_prime   *dz         *ds_prime   *dp 
+                    +   (*array_)(size_t(ix)+1, size_t(iy),     size_t(iz)+1,   size_t(is),     size_t(ip)+1 )   *   dx           *dy_prime   *dz         *ds_prime   *dp) 
+                    +   ((*array_)(size_t(ix),   size_t(iy)+1,   size_t(iz)+1,  size_t(is),     size_t(ip)+1 )   *   dx_prime     *dy         *dz         *ds_prime   *dp 
+                    +   (*array_)(size_t(ix)+1, size_t(iy)+1,   size_t(iz)+1,   size_t(is),     size_t(ip)+1 )   *   dx           *dy         *dz         *ds_prime   *dp)
+                    +   ((*array_)(size_t(ix),   size_t(iy),     size_t(iz),    size_t(is)+1,     size_t(ip)+1 )   *   dx_prime     *dy_prime   *dz_prime   *ds   *dp 
+                    +   (*array_)(size_t(ix)+1, size_t(iy),     size_t(iz),     size_t(is)+1,     size_t(ip)+1 )   *   dx           *dy_prime   *dz_prime   *ds   *dp) 
+                    +   ((*array_)(size_t(ix),   size_t(iy)+1,   size_t(iz),    size_t(is)+1,     size_t(ip)+1 )   *   dx_prime     *dy         *dz_prime   *ds   *dp 
+                    +   (*array_)(size_t(ix)+1, size_t(iy)+1,   size_t(iz),     size_t(is)+1,     size_t(ip)+1 )   *   dx           *dy         *dz_prime   *ds   *dp) 
+                    +   ((*array_)(size_t(ix),   size_t(iy),     size_t(iz)+1,  size_t(is)+1,     size_t(ip)+1 )   *   dx_prime     *dy_prime   *dz         *ds   *dp 
+                    +   (*array_)(size_t(ix)+1, size_t(iy),     size_t(iz)+1,   size_t(is)+1,     size_t(ip)+1 )   *   dx           *dy_prime   *dz         *ds   *dp) 
+                    +   ((*array_)(size_t(ix),   size_t(iy)+1,   size_t(iz)+1,  size_t(is)+1,     size_t(ip)+1 )   *   dx_prime     *dy         *dz         *ds   *dp 
+                    +   (*array_)(size_t(ix)+1, size_t(iy)+1,   size_t(iz)+1,   size_t(is)+1,     size_t(ip)+1 )   *   dx           *dy         *dz         *ds   *dp) );
+        }
+        else
+        {
+            return (    ((*bh_)(ix,   iy,     iz,    is,     ip )   *   dx_prime     *dy_prime   *dz_prime   *ds_prime   *dp_prime
+                    +   (*bh_)(ix+1, iy,     iz,     is,     ip )   *   dx           *dy_prime   *dz_prime   *ds_prime   *dp_prime) 
+                    +   ((*bh_)(ix,   iy+1,   iz,    is,     ip )   *   dx_prime     *dy         *dz_prime   *ds_prime   *dp_prime 
+                    +   (*bh_)(ix+1, iy+1,   iz,     is,     ip )   *   dx           *dy         *dz_prime   *ds_prime   *dp_prime) 
+                    +   ((*bh_)(ix,   iy,     iz+1,  is,     ip )   *   dx_prime     *dy_prime   *dz         *ds_prime   *dp_prime 
+                    +   (*bh_)(ix+1, iy,     iz+1,   is,     ip )   *   dx           *dy_prime   *dz         *ds_prime   *dp_prime) 
+                    +   ((*bh_)(ix,   iy+1,   iz+1,  is,     ip )   *   dx_prime     *dy         *dz         *ds_prime   *dp_prime 
+                    +   (*bh_)(ix+1, iy+1,   iz+1,   is,     ip )   *   dx           *dy         *dz         *ds_prime   *dp_prime)
+                    +   ((*bh_)(ix,   iy,     iz,    is+1,   ip )   *   dx_prime     *dy_prime   *dz_prime   *ds   *dp_prime 
+                    +   (*bh_)(ix+1, iy,     iz,     is+1,   ip )   *   dx           *dy_prime   *dz_prime   *ds   *dp_prime) 
+                    +   ((*bh_)(ix,   iy+1,   iz,    is+1,   ip )   *   dx_prime     *dy         *dz_prime   *ds   *dp_prime 
+                    +   (*bh_)(ix+1, iy+1,   iz,     is+1,   ip )   *   dx           *dy         *dz_prime   *ds   *dp_prime) 
+                    +   ((*bh_)(ix,   iy,     iz+1,  is+1,   ip )   *   dx_prime     *dy_prime   *dz         *ds   *dp_prime 
+                    +   (*bh_)(ix+1, iy,     iz+1,   is+1,   ip )   *   dx           *dy_prime   *dz         *ds   *dp_prime) 
+                    +   ((*bh_)(ix,   iy+1,   iz+1,  is+1,   ip )   *   dx_prime     *dy         *dz         *ds   *dp_prime 
+                    +   (*bh_)(ix+1, iy+1,   iz+1,   is+1,   ip )   *   dx           *dy         *dz         *ds   *dp_prime)
+                    +   ((*bh_)(ix,   iy,     iz,    is,     ip+1 )   *   dx_prime     *dy_prime   *dz_prime   *ds_prime   *dp
+                    +   (*bh_)(ix+1, iy,     iz,     is,     ip+1 )   *   dx           *dy_prime   *dz_prime   *ds_prime   *dp) 
+                    +   ((*bh_)(ix,   iy+1,   iz,    is,     ip+1 )   *   dx_prime     *dy         *dz_prime   *ds_prime   *dp 
+                    +   (*bh_)(ix+1, iy+1,   iz,     is,     ip+1 )   *   dx           *dy         *dz_prime   *ds_prime   *dp) 
+                    +   ((*bh_)(ix,   iy,     iz+1,  is,     ip+1 )   *   dx_prime     *dy_prime   *dz         *ds_prime   *dp 
+                    +   (*bh_)(ix+1, iy,     iz+1,   is,     ip+1 )   *   dx           *dy_prime   *dz         *ds_prime   *dp) 
+                    +   ((*bh_)(ix,   iy+1,   iz+1,  is,     ip+1 )   *   dx_prime     *dy         *dz         *ds_prime   *dp 
+                    +   (*bh_)(ix+1, iy+1,   iz+1,   is,     ip+1 )   *   dx           *dy         *dz         *ds_prime   *dp)
+                    +   ((*bh_)(ix,   iy,     iz,    is+1,   ip+1 )   *   dx_prime     *dy_prime   *dz_prime   *ds   *dp 
+                    +   (*bh_)(ix+1, iy,     iz,     is+1,   ip+1 )   *   dx           *dy_prime   *dz_prime   *ds   *dp) 
+                    +   ((*bh_)(ix,   iy+1,   iz,    is+1,   ip+1 )   *   dx_prime     *dy         *dz_prime   *ds   *dp 
+                    +   (*bh_)(ix+1, iy+1,   iz,     is+1,   ip+1 )   *   dx           *dy         *dz_prime   *ds   *dp) 
+                    +   ((*bh_)(ix,   iy,     iz+1,  is+1,   ip+1 )   *   dx_prime     *dy_prime   *dz         *ds   *dp 
+                    +   (*bh_)(ix+1, iy,     iz+1,   is+1,   ip+1 )   *   dx           *dy_prime   *dz         *ds   *dp) 
+                    +   ((*bh_)(ix,   iy+1,   iz+1,  is+1,   ip+1 )   *   dx_prime     *dy         *dz         *ds   *dp 
+                    +   (*bh_)(ix+1, iy+1,   iz+1,   is+1,   ip+1 )   *   dx           *dy         *dz         *ds   *dp) );
+        }
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDInterpolatorLinear<ArrayType>::T hoNDInterpolatorLinear<ArrayType>::operator()( coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type r )
+    {
+        gt_index_type ix = static_cast<gt_index_type>(std::floor(x));
+        coord_type dx = x - ix;
+        coord_type dx_prime = coord_type(1.0)-dx;
+
+        gt_index_type iy = static_cast<gt_index_type>(std::floor(y));
+        coord_type dy = y - iy;
+        coord_type dy_prime = coord_type(1.0)-dy;
+
+        gt_index_type iz = static_cast<gt_index_type>(std::floor(z));
+        coord_type dz = z - iz;
+        coord_type dz_prime = coord_type(1.0)-dz;
+
+        gt_index_type is = static_cast<gt_index_type>(std::floor(s));
+        coord_type ds = s - is;
+        coord_type ds_prime = coord_type(1.0)-ds;
+
+        gt_index_type ip = static_cast<gt_index_type>(std::floor(p));
+        coord_type dp = p - ip;
+        coord_type dp_prime = coord_type(1.0)-dp;
+
+        gt_index_type ir = static_cast<gt_index_type>(std::floor(r));
+        coord_type dr = r - ir;
+        coord_type dr_prime = coord_type(1.0)-dr;
+
+        if ( ix>=0 && ix<(gt_index_type)array_->get_size(0)-1 
+            && iy>=0 && iy<(gt_index_type)array_->get_size(1)-1 
+            && iz>=0 && iz<(gt_index_type)array_->get_size(2)-1 
+            && is>=0 && is<(gt_index_type)array_->get_size(3)-1 
+            && ip>=0 && ip<(gt_index_type)array_->get_size(4)-1 
+            && ir>=0 && ir<(gt_index_type)array_->get_size(5)-1 )
+        {
+            return (    ((*array_)(size_t(ix),   size_t(iy),     size_t(iz),    size_t(is),     size_t(ip),     size_t(ir) )   *   dx_prime     *dy_prime   *dz_prime   *ds_prime   *dp_prime   *dr_prime
+                    +   (*array_)(size_t(ix)+1, size_t(iy),     size_t(iz),     size_t(is),     size_t(ip),     size_t(ir) )   *   dx           *dy_prime   *dz_prime   *ds_prime   *dp_prime   *dr_prime) 
+                    +   ((*array_)(size_t(ix),   size_t(iy)+1,   size_t(iz),    size_t(is),     size_t(ip),     size_t(ir) )   *   dx_prime     *dy         *dz_prime   *ds_prime   *dp_prime   *dr_prime 
+                    +   (*array_)(size_t(ix)+1, size_t(iy)+1,   size_t(iz),     size_t(is),     size_t(ip),     size_t(ir) )   *   dx           *dy         *dz_prime   *ds_prime   *dp_prime   *dr_prime) 
+                    +   ((*array_)(size_t(ix),   size_t(iy),     size_t(iz)+1,  size_t(is),     size_t(ip),     size_t(ir) )   *   dx_prime     *dy_prime   *dz         *ds_prime   *dp_prime   *dr_prime 
+                    +   (*array_)(size_t(ix)+1, size_t(iy),     size_t(iz)+1,   size_t(is),     size_t(ip),     size_t(ir) )   *   dx           *dy_prime   *dz         *ds_prime   *dp_prime   *dr_prime) 
+                    +   ((*array_)(size_t(ix),   size_t(iy)+1,   size_t(iz)+1,  size_t(is),     size_t(ip),     size_t(ir) )   *   dx_prime     *dy         *dz         *ds_prime   *dp_prime   *dr_prime 
+                    +   (*array_)(size_t(ix)+1, size_t(iy)+1,   size_t(iz)+1,   size_t(is),     size_t(ip),     size_t(ir) )   *   dx           *dy         *dz         *ds_prime   *dp_prime   *dr_prime)
+                    +   ((*array_)(size_t(ix),   size_t(iy),     size_t(iz),    size_t(is)+1,   size_t(ip),     size_t(ir) )   *   dx_prime     *dy_prime   *dz_prime   *ds         *dp_prime   *dr_prime 
+                    +   (*array_)(size_t(ix)+1, size_t(iy),     size_t(iz),     size_t(is)+1,   size_t(ip),     size_t(ir) )   *   dx           *dy_prime   *dz_prime   *ds         *dp_prime   *dr_prime) 
+                    +   ((*array_)(size_t(ix),   size_t(iy)+1,   size_t(iz),    size_t(is)+1,   size_t(ip),     size_t(ir) )   *   dx_prime     *dy         *dz_prime   *ds         *dp_prime   *dr_prime 
+                    +   (*array_)(size_t(ix)+1, size_t(iy)+1,   size_t(iz),     size_t(is)+1,   size_t(ip),     size_t(ir) )   *   dx           *dy         *dz_prime   *ds         *dp_prime   *dr_prime) 
+                    +   ((*array_)(size_t(ix),   size_t(iy),     size_t(iz)+1,  size_t(is)+1,   size_t(ip),     size_t(ir) )   *   dx_prime     *dy_prime   *dz         *ds         *dp_prime   *dr_prime 
+                    +   (*array_)(size_t(ix)+1, size_t(iy),     size_t(iz)+1,   size_t(is)+1,   size_t(ip),     size_t(ir) )   *   dx           *dy_prime   *dz         *ds         *dp_prime   *dr_prime) 
+                    +   ((*array_)(size_t(ix),   size_t(iy)+1,   size_t(iz)+1,  size_t(is)+1,   size_t(ip),     size_t(ir) )   *   dx_prime     *dy         *dz         *ds         *dp_prime   *dr_prime 
+                    +   (*array_)(size_t(ix)+1, size_t(iy)+1,   size_t(iz)+1,   size_t(is)+1,   size_t(ip),     size_t(ir) )   *   dx           *dy         *dz         *ds         *dp_prime   *dr_prime)
+                    +   ((*array_)(size_t(ix),   size_t(iy),     size_t(iz),    size_t(is),     size_t(ip)+1,     size_t(ir) ) *   dx_prime     *dy_prime   *dz_prime   *ds_prime   *dp         *dr_prime
+                    +   (*array_)(size_t(ix)+1, size_t(iy),     size_t(iz),     size_t(is),     size_t(ip)+1,     size_t(ir) ) *   dx           *dy_prime   *dz_prime   *ds_prime   *dp         *dr_prime) 
+                    +   ((*array_)(size_t(ix),   size_t(iy)+1,   size_t(iz),    size_t(is),     size_t(ip)+1,     size_t(ir) ) *   dx_prime     *dy         *dz_prime   *ds_prime   *dp         *dr_prime 
+                    +   (*array_)(size_t(ix)+1, size_t(iy)+1,   size_t(iz),     size_t(is),     size_t(ip)+1,     size_t(ir) ) *   dx           *dy         *dz_prime   *ds_prime   *dp         *dr_prime) 
+                    +   ((*array_)(size_t(ix),   size_t(iy),     size_t(iz)+1,  size_t(is),     size_t(ip)+1,     size_t(ir) ) *   dx_prime     *dy_prime   *dz         *ds_prime   *dp         *dr_prime 
+                    +   (*array_)(size_t(ix)+1, size_t(iy),     size_t(iz)+1,   size_t(is),     size_t(ip)+1,     size_t(ir) ) *   dx           *dy_prime   *dz         *ds_prime   *dp         *dr_prime) 
+                    +   ((*array_)(size_t(ix),   size_t(iy)+1,   size_t(iz)+1,  size_t(is),     size_t(ip)+1,     size_t(ir) ) *   dx_prime     *dy         *dz         *ds_prime   *dp         *dr_prime 
+                    +   (*array_)(size_t(ix)+1, size_t(iy)+1,   size_t(iz)+1,   size_t(is),     size_t(ip)+1,     size_t(ir) ) *   dx           *dy         *dz         *ds_prime   *dp         *dr_prime)
+                    +   ((*array_)(size_t(ix),   size_t(iy),     size_t(iz),    size_t(is)+1,   size_t(ip)+1,     size_t(ir) ) *   dx_prime     *dy_prime   *dz_prime   *ds         *dp         *dr_prime 
+                    +   (*array_)(size_t(ix)+1, size_t(iy),     size_t(iz),     size_t(is)+1,   size_t(ip)+1,     size_t(ir) ) *   dx           *dy_prime   *dz_prime   *ds         *dp         *dr_prime) 
+                    +   ((*array_)(size_t(ix),   size_t(iy)+1,   size_t(iz),    size_t(is)+1,   size_t(ip)+1,     size_t(ir) ) *   dx_prime     *dy         *dz_prime   *ds         *dp         *dr_prime 
+                    +   (*array_)(size_t(ix)+1, size_t(iy)+1,   size_t(iz),     size_t(is)+1,   size_t(ip)+1,     size_t(ir) ) *   dx           *dy         *dz_prime   *ds         *dp         *dr_prime) 
+                    +   ((*array_)(size_t(ix),   size_t(iy),     size_t(iz)+1,  size_t(is)+1,   size_t(ip)+1,     size_t(ir) ) *   dx_prime     *dy_prime   *dz         *ds         *dp         *dr_prime 
+                    +   (*array_)(size_t(ix)+1, size_t(iy),     size_t(iz)+1,   size_t(is)+1,   size_t(ip)+1,     size_t(ir) ) *   dx           *dy_prime   *dz         *ds         *dp         *dr_prime) 
+                    +   ((*array_)(size_t(ix),   size_t(iy)+1,   size_t(iz)+1,  size_t(is)+1,   size_t(ip)+1,     size_t(ir) ) *   dx_prime     *dy         *dz         *ds         *dp         *dr_prime 
+                    +   (*array_)(size_t(ix)+1, size_t(iy)+1,   size_t(iz)+1,   size_t(is)+1,   size_t(ip)+1,     size_t(ir) ) *   dx           *dy         *dz         *ds         *dp         *dr_prime)
+                    +   ((*array_)(size_t(ix),   size_t(iy),     size_t(iz),    size_t(is),     size_t(ip),     size_t(ir)+1 )   *   dx_prime     *dy_prime   *dz_prime   *ds_prime   *dp_prime   *dr
+                    +   (*array_)(size_t(ix)+1, size_t(iy),     size_t(iz),     size_t(is),     size_t(ip),     size_t(ir)+1 )   *   dx           *dy_prime   *dz_prime   *ds_prime   *dp_prime   *dr) 
+                    +   ((*array_)(size_t(ix),   size_t(iy)+1,   size_t(iz),    size_t(is),     size_t(ip),     size_t(ir)+1 )   *   dx_prime     *dy         *dz_prime   *ds_prime   *dp_prime   *dr 
+                    +   (*array_)(size_t(ix)+1, size_t(iy)+1,   size_t(iz),     size_t(is),     size_t(ip),     size_t(ir)+1 )   *   dx           *dy         *dz_prime   *ds_prime   *dp_prime   *dr) 
+                    +   ((*array_)(size_t(ix),   size_t(iy),     size_t(iz)+1,  size_t(is),     size_t(ip),     size_t(ir)+1 )   *   dx_prime     *dy_prime   *dz         *ds_prime   *dp_prime   *dr 
+                    +   (*array_)(size_t(ix)+1, size_t(iy),     size_t(iz)+1,   size_t(is),     size_t(ip),     size_t(ir)+1 )   *   dx           *dy_prime   *dz         *ds_prime   *dp_prime   *dr) 
+                    +   ((*array_)(size_t(ix),   size_t(iy)+1,   size_t(iz)+1,  size_t(is),     size_t(ip),     size_t(ir)+1 )   *   dx_prime     *dy         *dz         *ds_prime   *dp_prime   *dr 
+                    +   (*array_)(size_t(ix)+1, size_t(iy)+1,   size_t(iz)+1,   size_t(is),     size_t(ip),     size_t(ir)+1 )   *   dx           *dy         *dz         *ds_prime   *dp_prime   *dr)
+                    +   ((*array_)(size_t(ix),   size_t(iy),     size_t(iz),    size_t(is)+1,   size_t(ip),     size_t(ir)+1 )   *   dx_prime     *dy_prime   *dz_prime   *ds         *dp_prime   *dr 
+                    +   (*array_)(size_t(ix)+1, size_t(iy),     size_t(iz),     size_t(is)+1,   size_t(ip),     size_t(ir)+1 )   *   dx           *dy_prime   *dz_prime   *ds         *dp_prime   *dr) 
+                    +   ((*array_)(size_t(ix),   size_t(iy)+1,   size_t(iz),    size_t(is)+1,   size_t(ip),     size_t(ir)+1 )   *   dx_prime     *dy         *dz_prime   *ds         *dp_prime   *dr 
+                    +   (*array_)(size_t(ix)+1, size_t(iy)+1,   size_t(iz),     size_t(is)+1,   size_t(ip),     size_t(ir)+1 )   *   dx           *dy         *dz_prime   *ds         *dp_prime   *dr) 
+                    +   ((*array_)(size_t(ix),   size_t(iy),     size_t(iz)+1,  size_t(is)+1,   size_t(ip),     size_t(ir)+1 )   *   dx_prime     *dy_prime   *dz         *ds         *dp_prime   *dr 
+                    +   (*array_)(size_t(ix)+1, size_t(iy),     size_t(iz)+1,   size_t(is)+1,   size_t(ip),     size_t(ir)+1 )   *   dx           *dy_prime   *dz         *ds         *dp_prime   *dr) 
+                    +   ((*array_)(size_t(ix),   size_t(iy)+1,   size_t(iz)+1,  size_t(is)+1,   size_t(ip),     size_t(ir)+1 )   *   dx_prime     *dy         *dz         *ds         *dp_prime   *dr 
+                    +   (*array_)(size_t(ix)+1, size_t(iy)+1,   size_t(iz)+1,   size_t(is)+1,   size_t(ip),     size_t(ir)+1 )   *   dx           *dy         *dz         *ds         *dp_prime   *dr)
+                    +   ((*array_)(size_t(ix),   size_t(iy),     size_t(iz),    size_t(is),     size_t(ip)+1,     size_t(ir)+1 ) *   dx_prime     *dy_prime   *dz_prime   *ds_prime   *dp         *dr
+                    +   (*array_)(size_t(ix)+1, size_t(iy),     size_t(iz),     size_t(is),     size_t(ip)+1,     size_t(ir)+1 ) *   dx           *dy_prime   *dz_prime   *ds_prime   *dp         *dr) 
+                    +   ((*array_)(size_t(ix),   size_t(iy)+1,   size_t(iz),    size_t(is),     size_t(ip)+1,     size_t(ir)+1 ) *   dx_prime     *dy         *dz_prime   *ds_prime   *dp         *dr 
+                    +   (*array_)(size_t(ix)+1, size_t(iy)+1,   size_t(iz),     size_t(is),     size_t(ip)+1,     size_t(ir)+1 ) *   dx           *dy         *dz_prime   *ds_prime   *dp         *dr) 
+                    +   ((*array_)(size_t(ix),   size_t(iy),     size_t(iz)+1,  size_t(is),     size_t(ip)+1,     size_t(ir)+1 ) *   dx_prime     *dy_prime   *dz         *ds_prime   *dp         *dr 
+                    +   (*array_)(size_t(ix)+1, size_t(iy),     size_t(iz)+1,   size_t(is),     size_t(ip)+1,     size_t(ir)+1 ) *   dx           *dy_prime   *dz         *ds_prime   *dp         *dr) 
+                    +   ((*array_)(size_t(ix),   size_t(iy)+1,   size_t(iz)+1,  size_t(is),     size_t(ip)+1,     size_t(ir)+1 ) *   dx_prime     *dy         *dz         *ds_prime   *dp         *dr 
+                    +   (*array_)(size_t(ix)+1, size_t(iy)+1,   size_t(iz)+1,   size_t(is),     size_t(ip)+1,     size_t(ir)+1 ) *   dx           *dy         *dz         *ds_prime   *dp         *dr)
+                    +   ((*array_)(size_t(ix),   size_t(iy),     size_t(iz),    size_t(is)+1,   size_t(ip)+1,     size_t(ir)+1 ) *   dx_prime     *dy_prime   *dz_prime   *ds         *dp         *dr 
+                    +   (*array_)(size_t(ix)+1, size_t(iy),     size_t(iz),     size_t(is)+1,   size_t(ip)+1,     size_t(ir)+1 ) *   dx           *dy_prime   *dz_prime   *ds         *dp         *dr) 
+                    +   ((*array_)(size_t(ix),   size_t(iy)+1,   size_t(iz),    size_t(is)+1,   size_t(ip)+1,     size_t(ir)+1 ) *   dx_prime     *dy         *dz_prime   *ds         *dp         *dr 
+                    +   (*array_)(size_t(ix)+1, size_t(iy)+1,   size_t(iz),     size_t(is)+1,   size_t(ip)+1,     size_t(ir)+1 ) *   dx           *dy         *dz_prime   *ds         *dp         *dr) 
+                    +   ((*array_)(size_t(ix),   size_t(iy),     size_t(iz)+1,  size_t(is)+1,   size_t(ip)+1,     size_t(ir)+1 ) *   dx_prime     *dy_prime   *dz         *ds         *dp         *dr 
+                    +   (*array_)(size_t(ix)+1, size_t(iy),     size_t(iz)+1,   size_t(is)+1,   size_t(ip)+1,     size_t(ir)+1 ) *   dx           *dy_prime   *dz         *ds         *dp         *dr) 
+                    +   ((*array_)(size_t(ix),   size_t(iy)+1,   size_t(iz)+1,  size_t(is)+1,   size_t(ip)+1,     size_t(ir)+1 ) *   dx_prime     *dy         *dz         *ds         *dp         *dr 
+                    +   (*array_)(size_t(ix)+1, size_t(iy)+1,   size_t(iz)+1,   size_t(is)+1,   size_t(ip)+1,     size_t(ir)+1 ) *   dx           *dy         *dz         *ds         *dp         *dr) );
+        }
+        else
+        {
+            return (    ((*bh_)(ix,   iy,     iz,    is,     ip,     ir )   *   dx_prime     *dy_prime   *dz_prime   *ds_prime   *dp_prime   *dr_prime
+                    +   (*bh_)(ix+1, iy,     iz,     is,     ip,     ir )   *   dx           *dy_prime   *dz_prime   *ds_prime   *dp_prime   *dr_prime) 
+                    +   ((*bh_)(ix,   iy+1,   iz,    is,     ip,     ir )   *   dx_prime     *dy         *dz_prime   *ds_prime   *dp_prime   *dr_prime 
+                    +   (*bh_)(ix+1, iy+1,   iz,     is,     ip,     ir )   *   dx           *dy         *dz_prime   *ds_prime   *dp_prime   *dr_prime) 
+                    +   ((*bh_)(ix,   iy,     iz+1,  is,     ip,     ir )   *   dx_prime     *dy_prime   *dz         *ds_prime   *dp_prime   *dr_prime 
+                    +   (*bh_)(ix+1, iy,     iz+1,   is,     ip,     ir )   *   dx           *dy_prime   *dz         *ds_prime   *dp_prime   *dr_prime) 
+                    +   ((*bh_)(ix,   iy+1,   iz+1,  is,     ip,     ir )   *   dx_prime     *dy         *dz         *ds_prime   *dp_prime   *dr_prime 
+                    +   (*bh_)(ix+1, iy+1,   iz+1,   is,     ip,     ir )   *   dx           *dy         *dz         *ds_prime   *dp_prime   *dr_prime)
+                    +   ((*bh_)(ix,   iy,     iz,    is+1,   ip,     ir )   *   dx_prime     *dy_prime   *dz_prime   *ds         *dp_prime   *dr_prime 
+                    +   (*bh_)(ix+1, iy,     iz,     is+1,   ip,     ir )   *   dx           *dy_prime   *dz_prime   *ds         *dp_prime   *dr_prime) 
+                    +   ((*bh_)(ix,   iy+1,   iz,    is+1,   ip,     ir )   *   dx_prime     *dy         *dz_prime   *ds         *dp_prime   *dr_prime 
+                    +   (*bh_)(ix+1, iy+1,   iz,     is+1,   ip,     ir )   *   dx           *dy         *dz_prime   *ds         *dp_prime   *dr_prime) 
+                    +   ((*bh_)(ix,   iy,     iz+1,  is+1,   ip,     ir )   *   dx_prime     *dy_prime   *dz         *ds         *dp_prime   *dr_prime 
+                    +   (*bh_)(ix+1, iy,     iz+1,   is+1,   ip,     ir )   *   dx           *dy_prime   *dz         *ds         *dp_prime   *dr_prime) 
+                    +   ((*bh_)(ix,   iy+1,   iz+1,  is+1,   ip,     ir )   *   dx_prime     *dy         *dz         *ds         *dp_prime   *dr_prime 
+                    +   (*bh_)(ix+1, iy+1,   iz+1,   is+1,   ip,     ir )   *   dx           *dy         *dz         *ds         *dp_prime   *dr_prime)
+                    +   ((*bh_)(ix,   iy,     iz,    is,     ip+1,     ir ) *   dx_prime     *dy_prime   *dz_prime   *ds_prime   *dp         *dr_prime
+                    +   (*bh_)(ix+1, iy,     iz,     is,     ip+1,     ir ) *   dx           *dy_prime   *dz_prime   *ds_prime   *dp         *dr_prime) 
+                    +   ((*bh_)(ix,   iy+1,   iz,    is,     ip+1,     ir ) *   dx_prime     *dy         *dz_prime   *ds_prime   *dp         *dr_prime 
+                    +   (*bh_)(ix+1, iy+1,   iz,     is,     ip+1,     ir ) *   dx           *dy         *dz_prime   *ds_prime   *dp         *dr_prime) 
+                    +   ((*bh_)(ix,   iy,     iz+1,  is,     ip+1,     ir ) *   dx_prime     *dy_prime   *dz         *ds_prime   *dp         *dr_prime 
+                    +   (*bh_)(ix+1, iy,     iz+1,   is,     ip+1,     ir ) *   dx           *dy_prime   *dz         *ds_prime   *dp         *dr_prime) 
+                    +   ((*bh_)(ix,   iy+1,   iz+1,  is,     ip+1,     ir ) *   dx_prime     *dy         *dz         *ds_prime   *dp         *dr_prime 
+                    +   (*bh_)(ix+1, iy+1,   iz+1,   is,     ip+1,     ir ) *   dx           *dy         *dz         *ds_prime   *dp         *dr_prime)
+                    +   ((*bh_)(ix,   iy,     iz,    is+1,   ip+1,     ir ) *   dx_prime     *dy_prime   *dz_prime   *ds         *dp         *dr_prime 
+                    +   (*bh_)(ix+1, iy,     iz,     is+1,   ip+1,     ir ) *   dx           *dy_prime   *dz_prime   *ds         *dp         *dr_prime) 
+                    +   ((*bh_)(ix,   iy+1,   iz,    is+1,   ip+1,     ir ) *   dx_prime     *dy         *dz_prime   *ds         *dp         *dr_prime 
+                    +   (*bh_)(ix+1, iy+1,   iz,     is+1,   ip+1,     ir ) *   dx           *dy         *dz_prime   *ds         *dp         *dr_prime) 
+                    +   ((*bh_)(ix,   iy,     iz+1,  is+1,   ip+1,     ir ) *   dx_prime     *dy_prime   *dz         *ds         *dp         *dr_prime 
+                    +   (*bh_)(ix+1, iy,     iz+1,   is+1,   ip+1,     ir ) *   dx           *dy_prime   *dz         *ds         *dp         *dr_prime) 
+                    +   ((*bh_)(ix,   iy+1,   iz+1,  is+1,   ip+1,     ir ) *   dx_prime     *dy         *dz         *ds         *dp         *dr_prime 
+                    +   (*bh_)(ix+1, iy+1,   iz+1,   is+1,   ip+1,     ir ) *   dx           *dy         *dz         *ds         *dp         *dr_prime)
+                    +   ((*bh_)(ix,   iy,     iz,    is,     ip,     ir+1 )   *   dx_prime     *dy_prime   *dz_prime   *ds_prime   *dp_prime   *dr
+                    +   (*bh_)(ix+1, iy,     iz,     is,     ip,     ir+1 )   *   dx           *dy_prime   *dz_prime   *ds_prime   *dp_prime   *dr) 
+                    +   ((*bh_)(ix,   iy+1,   iz,    is,     ip,     ir+1 )   *   dx_prime     *dy         *dz_prime   *ds_prime   *dp_prime   *dr 
+                    +   (*bh_)(ix+1, iy+1,   iz,     is,     ip,     ir+1 )   *   dx           *dy         *dz_prime   *ds_prime   *dp_prime   *dr) 
+                    +   ((*bh_)(ix,   iy,     iz+1,  is,     ip,     ir+1 )   *   dx_prime     *dy_prime   *dz         *ds_prime   *dp_prime   *dr 
+                    +   (*bh_)(ix+1, iy,     iz+1,   is,     ip,     ir+1 )   *   dx           *dy_prime   *dz         *ds_prime   *dp_prime   *dr) 
+                    +   ((*bh_)(ix,   iy+1,   iz+1,  is,     ip,     ir+1 )   *   dx_prime     *dy         *dz         *ds_prime   *dp_prime   *dr 
+                    +   (*bh_)(ix+1, iy+1,   iz+1,   is,     ip,     ir+1 )   *   dx           *dy         *dz         *ds_prime   *dp_prime   *dr)
+                    +   ((*bh_)(ix,   iy,     iz,    is+1,   ip,     ir+1 )   *   dx_prime     *dy_prime   *dz_prime   *ds         *dp_prime   *dr 
+                    +   (*bh_)(ix+1, iy,     iz,     is+1,   ip,     ir+1 )   *   dx           *dy_prime   *dz_prime   *ds         *dp_prime   *dr) 
+                    +   ((*bh_)(ix,   iy+1,   iz,    is+1,   ip,     ir+1 )   *   dx_prime     *dy         *dz_prime   *ds         *dp_prime   *dr 
+                    +   (*bh_)(ix+1, iy+1,   iz,     is+1,   ip,     ir+1 )   *   dx           *dy         *dz_prime   *ds         *dp_prime   *dr) 
+                    +   ((*bh_)(ix,   iy,     iz+1,  is+1,   ip,     ir+1 )   *   dx_prime     *dy_prime   *dz         *ds         *dp_prime   *dr 
+                    +   (*bh_)(ix+1, iy,     iz+1,   is+1,   ip,     ir+1 )   *   dx           *dy_prime   *dz         *ds         *dp_prime   *dr) 
+                    +   ((*bh_)(ix,   iy+1,   iz+1,  is+1,   ip,     ir+1 )   *   dx_prime     *dy         *dz         *ds         *dp_prime   *dr 
+                    +   (*bh_)(ix+1, iy+1,   iz+1,   is+1,   ip,     ir+1 )   *   dx           *dy         *dz         *ds         *dp_prime   *dr)
+                    +   ((*bh_)(ix,   iy,     iz,    is,     ip+1,     ir+1 ) *   dx_prime     *dy_prime   *dz_prime   *ds_prime   *dp         *dr
+                    +   (*bh_)(ix+1, iy,     iz,     is,     ip+1,     ir+1 ) *   dx           *dy_prime   *dz_prime   *ds_prime   *dp         *dr) 
+                    +   ((*bh_)(ix,   iy+1,   iz,    is,     ip+1,     ir+1 ) *   dx_prime     *dy         *dz_prime   *ds_prime   *dp         *dr 
+                    +   (*bh_)(ix+1, iy+1,   iz,     is,     ip+1,     ir+1 ) *   dx           *dy         *dz_prime   *ds_prime   *dp         *dr) 
+                    +   ((*bh_)(ix,   iy,     iz+1,  is,     ip+1,     ir+1 ) *   dx_prime     *dy_prime   *dz         *ds_prime   *dp         *dr 
+                    +   (*bh_)(ix+1, iy,     iz+1,   is,     ip+1,     ir+1 ) *   dx           *dy_prime   *dz         *ds_prime   *dp         *dr) 
+                    +   ((*bh_)(ix,   iy+1,   iz+1,  is,     ip+1,     ir+1 ) *   dx_prime     *dy         *dz         *ds_prime   *dp         *dr 
+                    +   (*bh_)(ix+1, iy+1,   iz+1,   is,     ip+1,     ir+1 ) *   dx           *dy         *dz         *ds_prime   *dp         *dr)
+                    +   ((*bh_)(ix,   iy,     iz,    is+1,   ip+1,     ir+1 ) *   dx_prime     *dy_prime   *dz_prime   *ds         *dp         *dr 
+                    +   (*bh_)(ix+1, iy,     iz,     is+1,   ip+1,     ir+1 ) *   dx           *dy_prime   *dz_prime   *ds         *dp         *dr) 
+                    +   ((*bh_)(ix,   iy+1,   iz,    is+1,   ip+1,     ir+1 ) *   dx_prime     *dy         *dz_prime   *ds         *dp         *dr 
+                    +   (*bh_)(ix+1, iy+1,   iz,     is+1,   ip+1,     ir+1 ) *   dx           *dy         *dz_prime   *ds         *dp         *dr) 
+                    +   ((*bh_)(ix,   iy,     iz+1,  is+1,   ip+1,     ir+1 ) *   dx_prime     *dy_prime   *dz         *ds         *dp         *dr 
+                    +   (*bh_)(ix+1, iy,     iz+1,   is+1,   ip+1,     ir+1 ) *   dx           *dy_prime   *dz         *ds         *dp         *dr) 
+                    +   ((*bh_)(ix,   iy+1,   iz+1,  is+1,   ip+1,     ir+1 ) *   dx_prime     *dy         *dz         *ds         *dp         *dr 
+                    +   (*bh_)(ix+1, iy+1,   iz+1,   is+1,   ip+1,     ir+1 ) *   dx           *dy         *dz         *ds         *dp         *dr) );
+        }
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDInterpolatorLinear<ArrayType>::T hoNDInterpolatorLinear<ArrayType>::operator()( coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type r, coord_type a )
+    {
+        gt_index_type anchor[7];
+        coord_type d[7];
+        coord_type d_prime[7];
+
+        anchor[0] = static_cast<gt_index_type>(std::floor(x));
+        anchor[1] = static_cast<gt_index_type>(std::floor(y));
+        anchor[2] = static_cast<gt_index_type>(std::floor(z));
+        anchor[3] = static_cast<gt_index_type>(std::floor(s));
+        anchor[4] = static_cast<gt_index_type>(std::floor(p));
+        anchor[5] = static_cast<gt_index_type>(std::floor(r));
+        anchor[6] = static_cast<gt_index_type>(std::floor(a));
+
+        d[0] = x - anchor[0];
+        d[1] = y - anchor[1];
+        d[2] = z - anchor[2];
+        d[3] = s - anchor[3];
+        d[4] = p - anchor[4];
+        d[5] = r - anchor[5];
+        d[6] = a - anchor[6];
+
+        unsigned int ii;
+        for ( ii=0; ii<7; ii++ )
+        {
+            d_prime[ii] = coord_type(1.0)-d[ii];
+        }
+
+        T res(0);
+
+        coord_type weightAll(1.0);
+
+        unsigned int n;
+
+        if ( anchor[0]>=0 && anchor[0]<(gt_index_type)array_->get_size(0)-1 
+            && anchor[1]>=0 && anchor[1]<(gt_index_type)array_->get_size(1)-1 
+            && anchor[2]>=0 && anchor[2]<(gt_index_type)array_->get_size(2)-1 
+            && anchor[3]>=0 && anchor[3]<(gt_index_type)array_->get_size(3)-1 
+            && anchor[4]>=0 && anchor[4]<(gt_index_type)array_->get_size(4)-1 
+            && anchor[5]>=0 && anchor[5]<(gt_index_type)array_->get_size(5)-1
+            && anchor[6]>=0 && anchor[6]<(gt_index_type)array_->get_size(6)-1 )
+        {
+            std::vector<size_t> ind(7);
+
+            for ( n=0; n<number_of_points_; n++ )
+            {
+                unsigned int lastDigit = n;
+                weightAll = coord_type(1.0);
+
+                for ( ii=0; ii<7; ii++ )
+                {
+                    if ( lastDigit & 1 )
+                    {
+                        ind[ii] = anchor[ii]+1;
+                        weightAll *= d[ii];
+                    }
+                    else
+                    {
+                        ind[ii] = anchor[ii];
+                        weightAll *= d_prime[ii];
+                    }
+
+                    // shift one digit
+                    lastDigit >>= 1;
+                }
+
+                res += weightAll * (*array_)(ind);
+            }
+        }
+        else
+        {
+            std::vector<gt_index_type> ind(7);
+
+            for ( n=0; n<number_of_points_; n++ )
+            {
+                unsigned int lastDigit = n;
+                weightAll = coord_type(1.0);
+
+                for ( ii=0; ii<7; ii++ )
+                {
+                    if ( lastDigit & 1 )
+                    {
+                        ind[ii] = anchor[ii]+1;
+                        weightAll *= d[ii];
+                    }
+                    else
+                    {
+                        ind[ii] = anchor[ii];
+                        weightAll *= d_prime[ii];
+                    }
+
+                    // shift one digit
+                    lastDigit >>= 1;
+                }
+
+                res += weightAll * (*bh_)(ind);
+            }
+        }
+
+        return res;
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDInterpolatorLinear<ArrayType>::T hoNDInterpolatorLinear<ArrayType>::operator()( coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type r, coord_type a, coord_type q )
+    {
+        gt_index_type anchor[8];
+        coord_type d[8];
+        coord_type d_prime[8];
+
+        anchor[0] = static_cast<gt_index_type>(std::floor(x));
+        anchor[1] = static_cast<gt_index_type>(std::floor(y));
+        anchor[2] = static_cast<gt_index_type>(std::floor(z));
+        anchor[3] = static_cast<gt_index_type>(std::floor(s));
+        anchor[4] = static_cast<gt_index_type>(std::floor(p));
+        anchor[5] = static_cast<gt_index_type>(std::floor(r));
+        anchor[6] = static_cast<gt_index_type>(std::floor(a));
+        anchor[7] = static_cast<gt_index_type>(std::floor(q));
+
+        d[0] = x - anchor[0];
+        d[1] = y - anchor[1];
+        d[2] = z - anchor[2];
+        d[3] = s - anchor[3];
+        d[4] = p - anchor[4];
+        d[5] = r - anchor[5];
+        d[6] = a - anchor[6];
+        d[7] = q - anchor[7];
+
+        unsigned int ii;
+        for ( ii=0; ii<8; ii++ )
+        {
+            d_prime[ii] = coord_type(1.0)-d[ii];
+        }
+
+        T res(0);
+
+        coord_type weightAll(1.0);
+
+        unsigned int n;
+
+        if ( anchor[0]>=0 && anchor[0]<(gt_index_type)array_->get_size(0)-1 
+            && anchor[1]>=0 && anchor[1]<(gt_index_type)array_->get_size(1)-1 
+            && anchor[2]>=0 && anchor[2]<(gt_index_type)array_->get_size(2)-1 
+            && anchor[3]>=0 && anchor[3]<(gt_index_type)array_->get_size(3)-1 
+            && anchor[4]>=0 && anchor[4]<(gt_index_type)array_->get_size(4)-1 
+            && anchor[5]>=0 && anchor[5]<(gt_index_type)array_->get_size(5)-1
+            && anchor[6]>=0 && anchor[6]<(gt_index_type)array_->get_size(6)-1
+            && anchor[7]>=0 && anchor[7]<(gt_index_type)array_->get_size(7)-1 )
+        {
+            std::vector<size_t> ind(8);
+
+            for ( n=0; n<number_of_points_; n++ )
+            {
+                unsigned int lastDigit = n;
+                weightAll = coord_type(1.0);
+
+                for ( ii=0; ii<8; ii++ )
+                {
+                    if ( lastDigit & 1 )
+                    {
+                        ind[ii] = anchor[ii]+1;
+                        weightAll *= d[ii];
+                    }
+                    else
+                    {
+                        ind[ii] = anchor[ii];
+                        weightAll *= d_prime[ii];
+                    }
+
+                    // shift one digit
+                    lastDigit >>= 1;
+                }
+
+                res += weightAll * (*array_)(ind);
+            }
+        }
+        else
+        {
+            std::vector<gt_index_type> ind(8);
+
+            for ( n=0; n<number_of_points_; n++ )
+            {
+                unsigned int lastDigit = n;
+                weightAll = coord_type(1.0);
+
+                for ( ii=0; ii<8; ii++ )
+                {
+                    if ( lastDigit & 1 )
+                    {
+                        ind[ii] = anchor[ii]+1;
+                        weightAll *= d[ii];
+                    }
+                    else
+                    {
+                        ind[ii] = anchor[ii];
+                        weightAll *= d_prime[ii];
+                    }
+
+                    // shift one digit
+                    lastDigit >>= 1;
+                }
+
+                res += weightAll * (*bh_)(ind);
+            }
+        }
+
+        return res;
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDInterpolatorLinear<ArrayType>::T hoNDInterpolatorLinear<ArrayType>::operator()( coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type r, coord_type a, coord_type q, coord_type u )
+    {
+        gt_index_type anchor[9];
+        coord_type d[9];
+        coord_type d_prime[9];
+
+        anchor[0] = static_cast<gt_index_type>(std::floor(x));
+        anchor[1] = static_cast<gt_index_type>(std::floor(y));
+        anchor[2] = static_cast<gt_index_type>(std::floor(z));
+        anchor[3] = static_cast<gt_index_type>(std::floor(s));
+        anchor[4] = static_cast<gt_index_type>(std::floor(p));
+        anchor[5] = static_cast<gt_index_type>(std::floor(r));
+        anchor[6] = static_cast<gt_index_type>(std::floor(a));
+        anchor[7] = static_cast<gt_index_type>(std::floor(q));
+        anchor[8] = static_cast<gt_index_type>(std::floor(u));
+
+        d[0] = x - anchor[0];
+        d[1] = y - anchor[1];
+        d[2] = z - anchor[2];
+        d[3] = s - anchor[3];
+        d[4] = p - anchor[4];
+        d[5] = r - anchor[5];
+        d[6] = a - anchor[6];
+        d[7] = q - anchor[7];
+        d[8] = u - anchor[8];
+
+        unsigned int ii;
+        for ( ii=0; ii<9; ii++ )
+        {
+            d_prime[ii] = coord_type(1.0)-d[ii];
+        }
+
+        T res(0);
+
+        coord_type weightAll(1.0);
+
+        unsigned int n;
+
+        if ( anchor[0]>=0 && anchor[0]<(gt_index_type)array_->get_size(0)-1 
+            && anchor[1]>=0 && anchor[1]<(gt_index_type)array_->get_size(1)-1 
+            && anchor[2]>=0 && anchor[2]<(gt_index_type)array_->get_size(2)-1 
+            && anchor[3]>=0 && anchor[3]<(gt_index_type)array_->get_size(3)-1 
+            && anchor[4]>=0 && anchor[4]<(gt_index_type)array_->get_size(4)-1 
+            && anchor[5]>=0 && anchor[5]<(gt_index_type)array_->get_size(5)-1
+            && anchor[6]>=0 && anchor[6]<(gt_index_type)array_->get_size(6)-1
+            && anchor[7]>=0 && anchor[7]<(gt_index_type)array_->get_size(7)-1
+            && anchor[8]>=0 && anchor[8]<(gt_index_type)array_->get_size(8)-1 )
+        {
+            std::vector<size_t> ind(9);
+
+            for ( n=0; n<number_of_points_; n++ )
+            {
+                unsigned int lastDigit = n;
+                weightAll = coord_type(1.0);
+
+                for ( ii=0; ii<9; ii++ )
+                {
+                    if ( lastDigit & 1 )
+                    {
+                        ind[ii] = anchor[ii]+1;
+                        weightAll *= d[ii];
+                    }
+                    else
+                    {
+                        ind[ii] = anchor[ii];
+                        weightAll *= d_prime[ii];
+                    }
+
+                    // shift one digit
+                    lastDigit >>= 1;
+                }
+
+                res += weightAll * (*array_)(ind);
+            }
+        }
+        else
+        {
+            std::vector<gt_index_type> ind(9);
+
+            for ( n=0; n<number_of_points_; n++ )
+            {
+                unsigned int lastDigit = n;
+                weightAll = coord_type(1.0);
+
+                for ( ii=0; ii<9; ii++ )
+                {
+                    if ( lastDigit & 1 )
+                    {
+                        ind[ii] = anchor[ii]+1;
+                        weightAll *= d[ii];
+                    }
+                    else
+                    {
+                        ind[ii] = anchor[ii];
+                        weightAll *= d_prime[ii];
+                    }
+
+                    // shift one digit
+                    lastDigit >>= 1;
+                }
+
+                res += weightAll * (*bh_)(ind);
+            }
+        }
+
+        return res;
+    }
+}
diff --git a/toolboxes/core/cpu/hoNDInterpolatorNearestNeighbor.hxx b/toolboxes/core/cpu/hoNDInterpolatorNearestNeighbor.hxx
new file mode 100644
index 0000000..730ffa3
--- /dev/null
+++ b/toolboxes/core/cpu/hoNDInterpolatorNearestNeighbor.hxx
@@ -0,0 +1,94 @@
+/** \file       hoNDInterpolatorNearestNeighbor.h
+    \brief      N-dimensional nearest neighbor interpolator
+
+                Designed to work with hoNDArray and hoNDImage
+
+    \author     Hui Xue
+*/
+
+namespace Gadgetron
+{
+    /// hoNDInterpolatorNearestNeighbor
+
+    template <typename ArrayType> 
+    inline typename hoNDInterpolatorNearestNeighbor<ArrayType>::T hoNDInterpolatorNearestNeighbor<ArrayType>::operator()( const coord_type* pos )
+    {
+        unsigned int D = array_->get_number_of_dimensions();
+        std::vector<gt_index_type> ind(D);
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            ind[ii] = static_cast<gt_index_type>(pos[ii]+0.5);
+        }
+
+        return (*bh_)(ind);
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDInterpolatorNearestNeighbor<ArrayType>::T hoNDInterpolatorNearestNeighbor<ArrayType>::operator()( const std::vector<coord_type>& pos )
+    {
+        std::vector<gt_index_type> ind(pos.size());
+        unsigned int ii;
+        unsigned int D = array_->get_number_of_dimensions();
+        for ( ii=0; ii<D; ii++ )
+        {
+            ind[ii] = static_cast<gt_index_type>(pos[ii]+0.5);
+        }
+
+        return (*bh_)(ind);
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDInterpolatorNearestNeighbor<ArrayType>::T hoNDInterpolatorNearestNeighbor<ArrayType>::operator()( coord_type x )
+    {
+        return (*bh_)(static_cast<gt_index_type>(x+0.5));
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDInterpolatorNearestNeighbor<ArrayType>::T hoNDInterpolatorNearestNeighbor<ArrayType>::operator()( coord_type x, coord_type y )
+    {
+        return (*bh_)(static_cast<gt_index_type>(x+0.5), static_cast<gt_index_type>(y+0.5));
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDInterpolatorNearestNeighbor<ArrayType>::T hoNDInterpolatorNearestNeighbor<ArrayType>::operator()( coord_type x, coord_type y, coord_type z )
+    {
+        return (*bh_)(static_cast<gt_index_type>(x+0.5), static_cast<gt_index_type>(y+0.5), static_cast<gt_index_type>(z+0.5));
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDInterpolatorNearestNeighbor<ArrayType>::T hoNDInterpolatorNearestNeighbor<ArrayType>::operator()( coord_type x, coord_type y, coord_type z, coord_type s )
+    {
+        return (*bh_)(static_cast<gt_index_type>(x+0.5), static_cast<gt_index_type>(y+0.5), static_cast<gt_index_type>(z+0.5), static_cast<gt_index_type>(s+0.5));
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDInterpolatorNearestNeighbor<ArrayType>::T hoNDInterpolatorNearestNeighbor<ArrayType>::operator()( coord_type x, coord_type y, coord_type z, coord_type s, coord_type p )
+    {
+        return (*bh_)(static_cast<gt_index_type>(x+0.5), static_cast<gt_index_type>(y+0.5), static_cast<gt_index_type>(z+0.5), static_cast<gt_index_type>(s+0.5), static_cast<gt_index_type>(p+0.5));
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDInterpolatorNearestNeighbor<ArrayType>::T hoNDInterpolatorNearestNeighbor<ArrayType>::operator()( coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type r )
+    {
+        return (*bh_)(static_cast<gt_index_type>(x+0.5), static_cast<gt_index_type>(y+0.5), static_cast<gt_index_type>(z+0.5), static_cast<gt_index_type>(s+0.5), static_cast<gt_index_type>(p+0.5), static_cast<gt_index_type>(r+0.5));
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDInterpolatorNearestNeighbor<ArrayType>::T hoNDInterpolatorNearestNeighbor<ArrayType>::operator()( coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type r, coord_type a )
+    {
+        return (*bh_)(static_cast<gt_index_type>(x+0.5), static_cast<gt_index_type>(y+0.5), static_cast<gt_index_type>(z+0.5), static_cast<gt_index_type>(s+0.5), static_cast<gt_index_type>(p+0.5), static_cast<gt_index_type>(r+0.5), static_cast<gt_index_type>(a+0.5));
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDInterpolatorNearestNeighbor<ArrayType>::T hoNDInterpolatorNearestNeighbor<ArrayType>::operator()( coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type r, coord_type a, coord_type q )
+    {
+        return (*bh_)(static_cast<gt_index_type>(x+0.5), static_cast<gt_index_type>(y+0.5), static_cast<gt_index_type>(z+0.5), static_cast<gt_index_type>(s+0.5), static_cast<gt_index_type>(p+0.5), static_cast<gt_index_type>(r+0.5), static_cast<gt_index_type>(a+0.5), static_cast<gt_index_type>(q+0.5));
+    }
+
+    template <typename ArrayType> 
+    inline typename hoNDInterpolatorNearestNeighbor<ArrayType>::T hoNDInterpolatorNearestNeighbor<ArrayType>::operator()( coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type r, coord_type a, coord_type q, coord_type u )
+    {
+        return (*bh_)(static_cast<gt_index_type>(x+0.5), static_cast<gt_index_type>(y+0.5), static_cast<gt_index_type>(z+0.5), static_cast<gt_index_type>(s+0.5), static_cast<gt_index_type>(p+0.5), static_cast<gt_index_type>(r+0.5), static_cast<gt_index_type>(a+0.5), static_cast<gt_index_type>(q+0.5), static_cast<gt_index_type>(u+0.5));
+    }
+}
diff --git a/toolboxes/core/cpu/hoNDObjectArray.h b/toolboxes/core/cpu/hoNDObjectArray.h
new file mode 100644
index 0000000..6091203
--- /dev/null
+++ b/toolboxes/core/cpu/hoNDObjectArray.h
@@ -0,0 +1,200 @@
+/** \file   hoNDObjectArray.h
+\brief  CPU-based N-dimensional array for object pointers
+if delete_data_on_destruct == true, the object will be released; otherwise, only the object array memory is released
+\author Hui Xue
+*/
+
+#pragma once
+
+#include "hoNDArray.h"
+
+namespace Gadgetron
+{
+
+    template <typename TObjectType> class hoNDObjectArray : public hoNDArray<TObjectType*>
+    {
+    public:
+
+        typedef hoNDArray<TObjectType*> BaseClass;
+        typedef float coord_type;
+        typedef typename BaseClass::value_type value_type;
+
+        hoNDObjectArray();
+
+        explicit hoNDObjectArray(std::vector<size_t> &dimensions);
+        explicit hoNDObjectArray(std::vector<size_t> *dimensions);
+        explicit hoNDObjectArray(boost::shared_ptr< std::vector<size_t> > dimensions);
+
+        virtual ~hoNDObjectArray();
+
+        // Copy constructors
+        hoNDObjectArray(const hoNDObjectArray<TObjectType> &a);
+        explicit hoNDObjectArray(const hoNDObjectArray<TObjectType> *a);
+
+        // Assignment operator
+        hoNDObjectArray& operator=(const hoNDObjectArray& rhs);
+
+        virtual void create(std::vector<size_t>& dimensions);
+        virtual void create(std::vector<size_t> *dimensions);
+        virtual void create(boost::shared_ptr< std::vector<size_t> > dimensions);
+
+        void get_sub_array(const std::vector<size_t>& start, std::vector<size_t>& size, hoNDObjectArray<TObjectType>& out);
+
+        virtual void print(std::ostream& os) const;
+
+    protected:
+
+        using BaseClass::dimensions_;
+        using BaseClass::offsetFactors_;
+        using BaseClass::data_;
+        using BaseClass::elements_;
+        using BaseClass::delete_data_on_destruct_;
+    };
+
+    template <typename TObjectType> 
+    hoNDObjectArray<TObjectType>::hoNDObjectArray() : BaseClass() 
+    {
+    }
+
+    template <typename TObjectType> 
+    hoNDObjectArray<TObjectType>::hoNDObjectArray(std::vector<size_t> *dimensions) : BaseClass(dimensions)
+    {
+        this->create(dimensions);
+    }
+
+    template <typename TObjectType> 
+    hoNDObjectArray<TObjectType>::hoNDObjectArray(std::vector<size_t> &dimensions) : BaseClass(dimensions)
+    {
+        this->create(dimensions);
+    }
+
+    template <typename TObjectType> 
+    hoNDObjectArray<TObjectType>::hoNDObjectArray(boost::shared_ptr< std::vector<size_t> > dimensions) : BaseClass(dimensions)
+    {
+        this->create(dimensions);
+    }
+
+    template <typename TObjectType> 
+    hoNDObjectArray<TObjectType>::~hoNDObjectArray()
+    {
+        if (this->delete_data_on_destruct_)
+        {
+            size_t n;
+            for ( n=0; n<this->elements_; n++ )
+            {
+                if ( this->data_[n] != NULL )
+                {
+                    delete this->data_[n];
+                    this->data_[n] = NULL;
+                }
+            }
+
+            this->deallocate_memory();
+        }
+    }
+
+    template <typename TObjectType> 
+    hoNDObjectArray<TObjectType>::hoNDObjectArray(const hoNDObjectArray<TObjectType>  *a) : BaseClass(a)
+    {
+        this->delete_data_on_destruct_ = false;
+    }
+
+    template <typename TObjectType> 
+    hoNDObjectArray<TObjectType>::hoNDObjectArray(const hoNDObjectArray<TObjectType> &a) : BaseClass(a)
+    {
+        this->delete_data_on_destruct_ = false;
+    }
+
+    template <typename TObjectType> 
+    hoNDObjectArray<TObjectType>& hoNDObjectArray<TObjectType>::operator=(const hoNDObjectArray<TObjectType>& rhs)
+    {
+        if ( &rhs == this ) return *this;
+
+        BaseClass::operator=(rhs);
+
+        this->delete_data_on_destruct_ = false;
+
+        return *this;
+    }
+
+    template <typename TObjectType> 
+    void hoNDObjectArray<TObjectType>::create(std::vector<size_t>& dimensions)
+    {
+        BaseClass::create(dimensions);
+
+        for ( size_t n=0; n<this->elements_; n++ )
+        {
+            this->data_[n] = NULL;
+        }
+    }
+
+    template <typename TObjectType> 
+    void hoNDObjectArray<TObjectType>::create(std::vector<size_t> *dimensions)
+    {
+        BaseClass::create(dimensions);
+
+        for ( size_t n=0; n<this->elements_; n++ )
+        {
+            this->data_[n] = NULL;
+        }
+    }
+
+    template <typename TObjectType> 
+    void hoNDObjectArray<TObjectType>::create(boost::shared_ptr< std::vector<size_t> > dimensions)
+    {
+        BaseClass::create(dimensions);
+
+        for ( size_t n=0; n<this->elements_; n++ )
+        {
+            this->data_[n] = NULL;
+        }
+    }
+
+    template <typename TObjectType> 
+    void hoNDObjectArray<TObjectType>::get_sub_array(const std::vector<size_t>& start, std::vector<size_t>& size, hoNDObjectArray<TObjectType>& out)
+    {
+        if ( start.size() != size.size() )
+        {
+            BOOST_THROW_EXCEPTION( runtime_error("hoNDArray<>::get_sub_array failed"));
+        }
+
+        if ( start.size() != (*dimensions_).size() )
+        {
+            BOOST_THROW_EXCEPTION( runtime_error("hoNDArray<>::get_sub_array failed"));
+        }
+
+        out.create(&size);
+
+        if ( out.get_number_of_elements() == this->get_number_of_elements() )
+        {
+            out = *this;
+            return;
+        }
+
+        std::vector<size_t> end(start.size());
+
+        size_t ii;
+        for ( ii=0; ii<start.size(); ii++ )
+        {
+            end[ii] = start[ii] + size[ii] - 1;
+            if ( end[ii] >= (*dimensions_)[ii] )
+            {
+                BOOST_THROW_EXCEPTION( runtime_error("hoNDArray<>::get_sub_array failed"));
+            }
+        }
+
+        out.delete_data_on_destruct(false);
+    }
+
+    template <typename TObjectType> 
+    void hoNDObjectArray<TObjectType>::print(std::ostream& os) const
+    {
+        using namespace std;
+
+        os.unsetf(std::ios::scientific);
+        os.setf(ios::fixed);
+
+        os << "-------------- Gagdgetron ND Object Array -------------" << endl;
+        this->printContent(os);
+    }
+}
diff --git a/toolboxes/core/cpu/hoNDPoint.h b/toolboxes/core/cpu/hoNDPoint.h
new file mode 100644
index 0000000..9e20f41
--- /dev/null
+++ b/toolboxes/core/cpu/hoNDPoint.h
@@ -0,0 +1,338 @@
+/** \file       hoNDPoint.h
+    \brief      N-dimensional point
+    \author     Hui Xue
+*/
+
+#pragma once
+
+#include "GadgetronException.h"
+#include "GadgetronCommon.h"
+
+#include <new>
+#include <vector>
+#include <iostream>
+#include <stdexcept>
+#include <cmath>
+
+#include "float.h"
+
+namespace Gadgetron
+{
+
+    template <typename T, unsigned int D>
+    class hoNDPoint
+    {
+    public:
+
+        typedef hoNDPoint<T, D> Self;
+        typedef T value_type;
+
+        hoNDPoint();
+        hoNDPoint(const Self& p);
+
+        ~hoNDPoint();
+
+        Self& operator=(const Self& p);
+
+        void fill(const T& v);
+
+        T* begin() { return this->data_; }
+        const T* begin() const { return this->data_; }
+
+        T& operator[]( size_t idx );
+        const T& operator[]( size_t idx ) const;
+
+        T& operator()( size_t idx );
+        const T& operator()( size_t idx ) const;
+
+        bool operator==(const Self& p) const;
+        bool operator!=(const Self& p) const;
+
+        template<typename T2> 
+        void copyFrom(const hoNDPoint<T2, D>& aArray)
+        {
+            unsigned int ii;
+            for ( ii=0; ii<D; ii++ )
+            {
+                this->data_[ii] = static_cast<T>(aArray(ii));
+            }
+        }
+
+        Self& operator += (const Self& p);
+        Self& operator -= (const Self& p);
+        Self& operator *= (const Self& p);
+        Self& operator /= (const Self& p);
+
+        Self& operator += (const T& p);
+        Self& operator -= (const T& p);
+        Self& operator *= (const T& p);
+        Self& operator /= (const T& p);
+
+        // dot product
+        void dot(const Self& p, T& r);
+
+        // the magnitude of point vector
+        T abs();
+
+        // normalize the magnitude of point to be 1
+        void normalize();
+
+        virtual void print(std::ostream& os) const;
+
+    protected:
+
+        T data_[D];
+    };
+
+    template <typename T, unsigned int D>
+    hoNDPoint<T, D>::hoNDPoint()
+    {
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            this->data_[ii] = T(0);
+        }
+    }
+
+    template <typename T, unsigned int D>
+    hoNDPoint<T, D>::hoNDPoint(const Self& p)
+    {
+        memcpy(this->data_, p.data_, sizeof(T)*D);
+    }
+
+    template <typename T, unsigned int D>
+    hoNDPoint<T, D>::~hoNDPoint()
+    {
+
+    }
+
+    template <typename T, unsigned int D>
+    inline hoNDPoint<T, D>& hoNDPoint<T, D>::operator=(const Self& p)
+    {
+        if ( this == &p ) return *this;
+        memcpy(this->data_, p.data_, sizeof(T)*D);
+        return *this;
+    }
+
+    template <typename T, unsigned int D>
+    inline void hoNDPoint<T, D>::fill(const T& v)
+    {
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            this->data_[ii] = v;
+        }
+    }
+
+    template <typename T, unsigned int D>
+    inline T& hoNDPoint<T, D>::operator[]( size_t idx )
+    {
+        GADGET_DEBUG_CHECK_THROW(idx < D);
+        return this->data_[idx];
+    }
+
+    template <typename T, unsigned int D>
+    inline const T& hoNDPoint<T, D>::operator[]( size_t idx ) const
+    {
+        GADGET_DEBUG_CHECK_THROW(idx < D);
+        return this->data_[idx];
+    }
+
+    template <typename T, unsigned int D>
+    inline T& hoNDPoint<T, D>::operator()( size_t idx )
+    {
+        GADGET_DEBUG_CHECK_THROW(idx < D);
+        return this->data_[idx];
+    }
+
+    template <typename T, unsigned int D>
+    inline const T& hoNDPoint<T, D>::operator()( size_t idx ) const
+    {
+        GADGET_DEBUG_CHECK_THROW(idx < D);
+        return this->data_[idx];
+    }
+
+    template <typename T, unsigned int D>
+    inline bool hoNDPoint<T, D>::operator==(const Self& p) const
+    {
+        T minV = std::numeric_limits<T>::epsilon();
+
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            if ( GT_ABS(this->data_[ii] - p.data_[ii]) > minV ) return false;
+        }
+
+        return true;
+    }
+
+    template <typename T, unsigned int D>
+    inline bool hoNDPoint<T, D>::operator!=(const Self& p) const
+    {
+        return !(*this==p);
+    }
+
+    template <typename T, unsigned int D>
+    inline hoNDPoint<T, D>& hoNDPoint<T, D>::operator += (const Self& p)
+    {
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            this->data_[ii] += p.data_[ii];
+        }
+
+        return *this;
+    }
+
+    template <typename T, unsigned int D>
+    inline hoNDPoint<T, D>& hoNDPoint<T, D>::operator -= (const Self& p)
+    {
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            this->data_[ii] -= p.data_[ii];
+        }
+
+        return *this;
+    }
+
+    template <typename T, unsigned int D>
+    inline hoNDPoint<T, D>& hoNDPoint<T, D>::operator *= (const Self& p)
+    {
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            this->data_[ii] *= p.data_[ii];
+        }
+
+        return *this;
+    }
+
+    template <typename T, unsigned int D>
+    inline hoNDPoint<T, D>& hoNDPoint<T, D>::operator /= (const Self& p)
+    {
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            if ( std::abs(p.data_[ii]) < DBL_EPSILON )
+            {
+                this->data_[ii] /= (p.data_[ii]+DBL_EPSILON);
+            }
+            else
+            {
+                this->data_[ii] /= p.data_[ii];
+            }
+        }
+
+        return *this;
+    }
+
+    template <typename T, unsigned int D>
+    inline hoNDPoint<T, D>& hoNDPoint<T, D>::operator += (const T& p)
+    {
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            this->data_[ii] += p;
+        }
+
+        return *this;
+    }
+
+    template <typename T, unsigned int D>
+    inline hoNDPoint<T, D>& hoNDPoint<T, D>::operator -= (const T& p)
+    {
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            this->data_[ii] -= p;
+        }
+
+        return *this;
+    }
+
+    template <typename T, unsigned int D>
+    inline hoNDPoint<T, D>& hoNDPoint<T, D>::operator *= (const T& p)
+    {
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            this->data_[ii] *= p;
+        }
+
+        return *this;
+    }
+
+    template <typename T, unsigned int D>
+    inline hoNDPoint<T, D>& hoNDPoint<T, D>::operator /= (const T& p)
+    {
+        T pTmp = p;
+        if ( std::abs(p) < DBL_EPSILON ) pTmp += DBL_EPSILON;
+
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            this->data_[ii] /= pTmp;
+        }
+
+        return *this;
+    }
+
+    template <typename T, unsigned int D>
+    inline void hoNDPoint<T, D>::dot(const Self& p, T& r)
+    {
+        r = this->data_[0]*p.data_[0];
+
+        unsigned int ii;
+        for ( ii=1; ii<D; ii++ )
+        {
+            r += (this->data_[ii]*p.data_[ii]);
+        }
+    }
+
+    template <typename T, unsigned int D>
+    inline T hoNDPoint<T, D>::abs()
+    {
+        T dist = this->data_[0]*this->data_[0];
+
+        unsigned int ii;
+        for ( ii=1; ii<D; ii++ )
+        {
+            dist += (this->data_[ii]*this->data_[ii]);
+        }
+
+        dist = std::sqrt(dist);
+
+        return dist;
+    }
+
+    template <typename T, unsigned int D>
+    inline void hoNDPoint<T, D>::normalize()
+    {
+        T dist = this->abs();
+        if ( std::abs(dist) < DBL_EPSILON ) return;
+
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            this->data_[ii] /= dist;
+        }
+    }
+
+    template <typename T, unsigned int D>
+    void hoNDPoint<T, D>::print(std::ostream& os) const
+    {
+        using namespace std;
+
+        os.unsetf(std::ios::scientific);
+        os.setf(ios::fixed);
+
+        os << "[";
+        unsigned int ii;
+        for ( ii=0; ii<D-1; ii++ )
+        {
+            os << this->data_[ii] << ",";
+        }
+        os << this->data_[D-1] << "]";
+    }
+}
diff --git a/toolboxes/core/cpu/hostutils/CMakeLists.txt b/toolboxes/core/cpu/hostutils/CMakeLists.txt
index c8800c4..6b008ff 100644
--- a/toolboxes/core/cpu/hostutils/CMakeLists.txt
+++ b/toolboxes/core/cpu/hostutils/CMakeLists.txt
@@ -2,17 +2,19 @@ if (WIN32)
   ADD_DEFINITIONS(-D__BUILD_GADGETRON_HOSTUTILS__)
 endif (WIN32)
 
-add_library(hostutils SHARED 
+add_library(gadgetron_toolbox_hostutils SHARED 
   parameterparser.cpp
   )
 
+set_target_properties(gadgetron_toolbox_hostutils PROPERTIES VERSION ${GADGETRON_VERSION_STRING} SOVERSION ${GADGETRON_SOVERSION})
+
 install(TARGETS 
-  hostutils 
-  DESTINATION lib)
+  gadgetron_toolbox_hostutils 
+  DESTINATION lib COMPONENT main)
 
 install(FILES 
   hostutils_export.h 
   parameterparser.h 
   url_encode.h 
   FileInfo.h 
-  DESTINATION include)
+  DESTINATION include COMPONENT main)
diff --git a/toolboxes/core/cpu/image/hoNDImage.h b/toolboxes/core/cpu/image/hoNDImage.h
new file mode 100644
index 0000000..2f89f09
--- /dev/null
+++ b/toolboxes/core/cpu/image/hoNDImage.h
@@ -0,0 +1,517 @@
+/** \file       hoNDImage.h
+    \brief      N-dimensional image class for gadgetron
+
+                The default N-dimensional image is defined by the origin (the first pixel indexed by [0 0 0 ...]),
+                the pixel size and the axis for every coordinate. This defines an Euclidean space.
+
+                If this N-dimensional image is used with other coordinate systems, e.g. polar coordinate system, then the axis 
+                should not be used to compute the image-to-world transformation.
+
+    \author     Hui Xue
+*/
+
+#pragma once
+
+#include "hoNDPoint.h"
+#include "hoMatrix.h"
+#include "ismrmrd/meta.h"
+
+namespace Gadgetron
+{
+
+    template <typename T, unsigned int D>
+    class hoNDImage : public hoNDArray<T>
+    {
+    public:
+
+        typedef hoNDArray<T> BaseClass;
+        typedef hoNDImage<T, D> Self;
+
+        typedef T element_type;
+        typedef T value_type;
+        typedef float coord_type;
+
+        typedef hoNDPoint<coord_type, D> a_axis_type;
+        typedef std::vector<a_axis_type> axis_type;
+
+        typedef hoNDPoint<coord_type, 3> a_axis_image_patient_type;
+
+        enum { NDIM = D };
+
+        void* operator new (size_t bytes)
+        {
+            return ::new char[bytes];
+        }
+
+        void operator delete (void *ptr)
+        {
+            delete [] static_cast <char *> (ptr);
+        } 
+
+        void * operator new(size_t s, void * p)
+        {
+            return p;
+        }
+
+        /// constructors
+        hoNDImage ();
+        hoNDImage (const std::vector<size_t>& dimensions);
+        hoNDImage (boost::shared_ptr< std::vector<size_t> > dimensions);
+        hoNDImage (const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize);
+        hoNDImage (const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize, const std::vector<coord_type>& origin);
+        hoNDImage (const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize, const std::vector<coord_type>& origin, const axis_type& axis);
+
+        hoNDImage(size_t len);
+        hoNDImage(size_t sx, size_t sy);
+        hoNDImage(size_t sx, size_t sy, size_t sz);
+        hoNDImage(size_t sx, size_t sy, size_t sz, size_t st);
+        hoNDImage(size_t sx, size_t sy, size_t sz, size_t st, size_t sp);
+        hoNDImage(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq);
+        hoNDImage(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr);
+        hoNDImage(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, size_t ss);
+
+        /// attach memory constructors
+        hoNDImage (const std::vector<size_t>& dimensions, T* data, bool delete_data_on_destruct = false);
+        hoNDImage (const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize, T* data, bool delete_data_on_destruct = false);
+        hoNDImage (const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize, const std::vector<coord_type>& origin, T* data, bool delete_data_on_destruct = false);
+        hoNDImage (const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize, const std::vector<coord_type>& origin, const axis_type& axis, T* data, bool delete_data_on_destruct = false);
+
+        hoNDImage(size_t len, T* data, bool delete_data_on_destruct = false);
+        hoNDImage(size_t sx, size_t sy, T* data, bool delete_data_on_destruct = false);
+        hoNDImage(size_t sx, size_t sy, size_t sz, T* data, bool delete_data_on_destruct = false);
+        hoNDImage(size_t sx, size_t sy, size_t sz, size_t st, T* data, bool delete_data_on_destruct = false);
+        hoNDImage(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, T* data, bool delete_data_on_destruct = false);
+        hoNDImage(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, T* data, bool delete_data_on_destruct = false);
+        hoNDImage(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, T* data, bool delete_data_on_destruct = false);
+        hoNDImage(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, size_t ss, T* data, bool delete_data_on_destruct = false);
+
+        hoNDImage(const hoNDArray<T>& a);
+
+        hoNDImage(const Self& a);
+        Self& operator=(const Self& rhs);
+
+        virtual ~hoNDImage();
+
+        /// clear the images, release all memory it holds, set pixelsize/axis/origin to zero-status
+        void clear();
+
+        /// create the image, called by constructors
+        virtual void create(const std::vector<size_t>& dimensions);
+        virtual void create(boost::shared_ptr< std::vector<size_t> > dimensions);
+        virtual void create(const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize);
+        virtual void create(const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize, const std::vector<coord_type>& origin);
+        virtual void create(const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize, const std::vector<coord_type>& origin, const axis_type& axis);
+
+        /// create the image from another image
+        /// not copy its content
+        template<typename T2> 
+        void createFrom(const hoNDImage<T2, D>& im)
+        {
+            this->clear();
+
+            std::vector<size_t> dim;
+            im.get_dimensions(dim);
+
+            std::vector<coord_type> pixelSize;
+            im.get_pixel_size(pixelSize);
+
+            std::vector<coord_type> origin;
+            im.get_origin(origin);
+
+            axis_type axis;
+            im.get_axis(axis);
+
+            this->create(dim, pixelSize, origin, axis);
+
+            this->attrib_ = im.attrib_;
+        }
+
+        /// create the image from another image
+        /// copy its content
+        template<typename T2> 
+        void create(const hoNDImage<T2, D>& im)
+        {
+            this->createFrom(im);
+
+            size_t ii;
+            size_t N = this->get_number_of_elements();
+            for ( ii=0; ii<N; ii++ )
+            {
+                this->data_[ii] = static_cast<T>(im.get_data_ptr()[ii]);
+            }
+        }
+
+        template<typename T2> 
+        inline void copyImageInfo(const hoNDImage<T2, D>& im)
+        {
+            this->createFrom(im);
+        }
+
+        template<typename T2> 
+        inline void copyImageInfoAndContent(const hoNDImage<T2, D>& im)
+        {
+            this->create(im);
+        }
+
+        template<typename T2> 
+        inline void copyImageInfoWithoutImageSize(const hoNDImage<T2, D>& im)
+        {
+            std::vector<coord_type> pixelSize;
+            im.get_pixel_size(pixelSize);
+
+            std::vector<coord_type> origin;
+            im.get_origin(origin);
+
+            axis_type axis;
+            im.get_axis(axis);
+
+            this->set_pixel_size(pixelSize);
+            this->set_origin(origin);
+            this->set_axis(axis);
+
+            this->attrib_ = im.attrib_;
+        }
+
+        virtual void create(const std::vector<size_t>& dimensions,
+                            T* data, 
+                            bool delete_data_on_destruct = false);
+
+        virtual void create(const std::vector<size_t>& dimensions, 
+                            const std::vector<coord_type>& pixelSize, 
+                            T* data, 
+                            bool delete_data_on_destruct = false);
+
+        virtual void create(const std::vector<size_t>& dimensions, 
+                            const std::vector<coord_type>& pixelSize, 
+                            const std::vector<coord_type>& origin, 
+                            T* data, 
+                            bool delete_data_on_destruct = false);
+
+        virtual void create(const std::vector<size_t>& dimensions, 
+                            const std::vector<coord_type>& pixelSize, 
+                            const std::vector<coord_type>& origin, 
+                            const axis_type& axis, 
+                            T* data, 
+                            bool delete_data_on_destruct = false);
+
+        /// convert from/to hoNDArray
+        void from_NDArray(const hoNDArray<T>& a);
+        void to_NDArray(hoNDArray<T>& a) const;
+
+        /// whether two images have the same size
+        bool dimensions_equal(const std::vector<size_t>& dimensions) const;
+
+        template<class S> 
+        bool dimensions_equal(const hoNDArray<S>& im) const
+        {
+            std::vector<size_t> dim;
+            im.get_dimensions(dim);
+
+            return this->dimensions_equal(dim);
+        }
+
+        template<class S> 
+        bool dimensions_equal(const hoNDImage<S, D>& im) const
+        {
+            std::vector<size_t> dim;
+            im.get_dimensions(dim);
+
+            return this->dimensions_equal(dim);
+        }
+
+        template<class S> 
+        bool dimensions_equal(const hoNDImage<S, D>* im) const
+        {
+            return this->dimensions_equal(*im);
+        }
+
+        template<class S> 
+        bool pixel_size_equal(const hoNDImage<S, D>& im) const
+        {
+            unsigned int ii;
+            for ( ii=0; ii<D; ii++ )
+            {
+                if ( GT_ABS(this->pixelSize_[ii] - im.pixelSize_[ii]) > FLT_EPSILON ) return false;
+            }
+
+            return true;
+        }
+
+        template<class S> 
+        bool axis_equal(const hoNDImage<S, D>& im) const
+        {
+            unsigned int ii, jj;
+            for ( ii=0; ii<D; ii++ )
+            {
+                if ( this->axis_[ii] != im.axis_[ii] ) return false;
+            }
+
+            return true;
+        }
+
+        /// get the pixel size
+        coord_type get_pixel_size(size_t dimension) const;
+        void get_pixel_size(std::vector<coord_type>& pixelSize) const;
+
+        void set_pixel_size(size_t dimension, coord_type v);
+        void set_pixel_size(const std::vector<coord_type>& pixelSize);
+
+        /// get origin
+        coord_type get_origin(size_t dimension) const;
+        void get_origin(std::vector<coord_type>& origin) const;
+
+        void set_origin(size_t dimension, coord_type v);
+        void set_origin(const std::vector<coord_type>& origin);
+
+        /// get axis
+        coord_type get_axis(size_t dimension, size_t elem) const;
+        a_axis_type get_axis(size_t dimension) const;
+        void get_axis(axis_type& axis) const;
+
+        void set_axis(size_t dimension, size_t elem, coord_type v);
+        void set_axis(size_t dimension, const a_axis_type& v);
+        void set_axis(const axis_type& axis);
+
+        /// get image position patient
+        void get_image_position(coord_type pos[3]) const;
+        void get_image_position(unsigned int d, coord_type& pos) const;
+        void get_image_position(a_axis_image_patient_type& pos) const;
+
+        void set_image_position(coord_type pos[3]);
+        void set_image_position(unsigned int d, coord_type pos);
+        void set_image_position(const a_axis_image_patient_type& pos);
+
+        /// get image orientation patient
+        void get_image_orientation(unsigned int d, coord_type ori[3]) const;
+        void get_image_orientation(unsigned int d, a_axis_image_patient_type& ori) const;
+        
+        /// for dimension d and index ind
+        void get_image_orientation(unsigned int d, unsigned int ind, coord_type& ori) const;
+        /// get image orientation as a quaternion
+        void get_image_orientation(coord_type quat[4]) const;
+
+        void set_image_orientation(unsigned int d, coord_type ori[3]);
+        void set_image_orientation(unsigned int d, const a_axis_image_patient_type& ori);
+        void set_image_orientation(unsigned int d, unsigned int ind, coord_type ori);
+        void set_image_orientation(coord_type quat[4]);
+
+        size_t get_number_of_dimensions() const { return D; }
+
+        size_t calculate_offset(const size_t* ind) const;
+        size_t calculate_offset(const std::vector<size_t>& ind) const;
+        size_t calculate_offset(const std::vector<gt_index_type>& ind) const;
+
+        size_t calculate_offset(size_t x, size_t y) const;
+        size_t calculate_offset(size_t x, size_t y, size_t z) const;
+        size_t calculate_offset(size_t x, size_t y, size_t z, size_t s) const;
+        size_t calculate_offset(size_t x, size_t y, size_t z, size_t s, size_t p) const;
+        size_t calculate_offset(size_t x, size_t y, size_t z, size_t s, size_t p, size_t r) const;
+        size_t calculate_offset(size_t x, size_t y, size_t z, size_t s, size_t p, size_t r, size_t a) const;
+        size_t calculate_offset(size_t x, size_t y, size_t z, size_t s, size_t p, size_t r, size_t a, size_t q) const;
+        size_t calculate_offset(size_t x, size_t y, size_t z, size_t s, size_t p, size_t r, size_t a, size_t q, size_t u) const;
+
+        /// given the 1D offset, compute the corresponding indexes
+        std::vector<size_t> calculate_index( size_t offset ) const;
+        void calculate_index( size_t offset, size_t* index ) const;
+        void calculate_index( size_t offset, std::vector<size_t>& index ) const;
+        void calculate_index( size_t offset, coord_type* index ) const;
+        void calculate_index( size_t offset, size_t& x, size_t& y ) const;
+        void calculate_index( size_t offset, size_t& x, size_t& y, size_t& z ) const;
+        void calculate_index( size_t offset, size_t& x, size_t& y, size_t& z, size_t& s ) const;
+        void calculate_index( size_t offset, size_t& x, size_t& y, size_t& z, size_t& s, size_t& p ) const;
+        void calculate_index( size_t offset, size_t& x, size_t& y, size_t& z, size_t& s, size_t& p, size_t& r ) const;
+        void calculate_index( size_t offset, size_t& x, size_t& y, size_t& z, size_t& s, size_t& p, size_t& r, size_t& a ) const;
+        void calculate_index( size_t offset, size_t& x, size_t& y, size_t& z, size_t& s, size_t& p, size_t& r, size_t& a, size_t& q ) const;
+        void calculate_index( size_t offset, size_t& x, size_t& y, size_t& z, size_t& s, size_t& p, size_t& r, size_t& a, size_t& q, size_t& u ) const;
+
+        /// access the pixel value
+        T& operator()( const size_t* ind );
+        const T& operator()( const size_t* ind ) const;
+
+        T& operator()( const std::vector<size_t>& ind );
+        const T& operator()( const std::vector<size_t>& ind ) const;
+
+        T& operator()( const std::vector<gt_index_type>& ind );
+        const T& operator()( const std::vector<gt_index_type>& ind ) const;
+
+        T& operator[]( size_t x );
+        const T& operator[]( size_t x ) const;
+
+        T& operator()( size_t x );
+        const T& operator()( size_t x ) const;
+
+        T& operator()( size_t x, size_t y );
+        const T& operator()( size_t x, size_t y ) const;
+
+        T& operator()( size_t x, size_t y, size_t z );
+        const T& operator()( size_t x, size_t y, size_t z ) const;
+
+        T& operator()( size_t x, size_t y, size_t z, size_t s );
+        const T& operator()( size_t x, size_t y, size_t z, size_t s ) const;
+
+        T& operator()( size_t x, size_t y, size_t z, size_t s, size_t p );
+        const T& operator()( size_t x, size_t y, size_t z, size_t s, size_t p ) const;
+
+        T& operator()( size_t x, size_t y, size_t z, size_t s, size_t p, size_t r );
+        const T& operator()( size_t x, size_t y, size_t z, size_t s, size_t p, size_t r ) const;
+
+        T& operator()( size_t x, size_t y, size_t z, size_t s, size_t p, size_t r, size_t a );
+        const T& operator()( size_t x, size_t y, size_t z, size_t s, size_t p, size_t r, size_t a ) const;
+
+        T& operator()( size_t x, size_t y, size_t z, size_t s, size_t p, size_t r, size_t a, size_t q );
+        const T& operator()( size_t x, size_t y, size_t z, size_t s, size_t p, size_t r, size_t a, size_t q ) const;
+
+        T& operator()( size_t x, size_t y, size_t z, size_t s, size_t p, size_t r, size_t a, size_t q, size_t u );
+        const T& operator()( size_t x, size_t y, size_t z, size_t s, size_t p, size_t r, size_t a, size_t q, size_t u ) const;
+
+        /// fill the image with a value
+        void fill(T value);
+
+        template<typename T2> 
+        void copyFrom(const hoNDImage<T2, D>& aIm)
+        {
+            this->create(aIm);
+        }
+
+        /// image pixel index to world coordinate
+        void image_to_world(const coord_type* ind, coord_type* coord) const;
+        void image_to_world(const std::vector<coord_type>& ind, std::vector<coord_type>& coord) const;
+
+        void image_to_world(coord_type x, coord_type& cx) const;
+
+        void image_to_world(coord_type x, coord_type y, 
+                            coord_type& cx, coord_type& cy) const;
+
+        void image_to_world(coord_type x, coord_type y, coord_type z,
+                            coord_type& cx, coord_type& cy, coord_type& cz) const;
+
+        void image_to_world(coord_type x, coord_type y, coord_type z, coord_type s,
+                            coord_type& cx, coord_type& cy, coord_type& cz, coord_type& cs) const;
+
+        void image_to_world(coord_type x, coord_type y, coord_type z, coord_type s, coord_type p,
+                            coord_type& cx, coord_type& cy, coord_type& cz, coord_type& cs, coord_type& cp) const;
+
+        void image_to_world(coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type r,
+                            coord_type& cx, coord_type& cy, coord_type& cz, coord_type& cs, coord_type& cp, coord_type& cr) const;
+
+        void image_to_world(coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type r, coord_type a,
+                            coord_type& cx, coord_type& cy, coord_type& cz, coord_type& cs, coord_type& cp, coord_type& cr, coord_type& ca) const;
+
+        void image_to_world(coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type r, coord_type a, coord_type q,
+                            coord_type& cx, coord_type& cy, coord_type& cz, coord_type& cs, coord_type& cp, coord_type& cr, coord_type& ca, coord_type& cq) const;
+
+        void image_to_world(coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type r, coord_type a, coord_type q, coord_type u,
+                            coord_type& cx, coord_type& cy, coord_type& cz, coord_type& cs, coord_type& cp, coord_type& cr, coord_type& ca, coord_type& cq, coord_type& cu) const;
+
+        /// for integer pixel indexes
+        void image_to_world(const size_t* ind, coord_type* coord) const;
+        void image_to_world(const std::vector<size_t>& ind, std::vector<coord_type>& coord) const;
+
+        void image_to_world(size_t x, coord_type& cx) const;
+
+        void image_to_world(size_t x, size_t y, 
+                            coord_type& cx, coord_type& cy) const;
+
+        void image_to_world(size_t x, size_t y, size_t z,
+                            coord_type& cx, coord_type& cy, coord_type& cz) const;
+
+        void image_to_world(size_t x, size_t y, size_t z, size_t s,
+                            coord_type& cx, coord_type& cy, coord_type& cz, coord_type& cs) const;
+
+        void image_to_world(size_t x, size_t y, size_t z, size_t s, size_t p,
+                            coord_type& cx, coord_type& cy, coord_type& cz, coord_type& cs, coord_type& cp) const;
+
+        void image_to_world(size_t x, size_t y, size_t z, size_t s, size_t p, size_t r,
+                            coord_type& cx, coord_type& cy, coord_type& cz, coord_type& cs, coord_type& cp, coord_type& cr) const;
+
+        void image_to_world(size_t x, size_t y, size_t z, size_t s, size_t p, size_t r, size_t a,
+                            coord_type& cx, coord_type& cy, coord_type& cz, coord_type& cs, coord_type& cp, coord_type& cr, coord_type& ca) const;
+
+        void image_to_world(size_t x, size_t y, size_t z, size_t s, size_t p, size_t r, size_t a, size_t q,
+                            coord_type& cx, coord_type& cy, coord_type& cz, coord_type& cs, coord_type& cp, coord_type& cr, coord_type& ca, coord_type& cq) const;
+
+        void image_to_world(size_t x, size_t y, size_t z, size_t s, size_t p, size_t r, size_t a, size_t q, size_t u,
+                            coord_type& cx, coord_type& cy, coord_type& cz, coord_type& cs, coord_type& cp, coord_type& cr, coord_type& ca, coord_type& cq, coord_type& cu) const;
+
+        /// get the image-to-world transformation matrix
+        /// the Homogeneous coordinate transformation matrix is computed
+        void image_to_world_matrix(hoMatrix<coord_type>& image2world) const;
+        void set_image_to_world_matrix(const hoMatrix<coord_type>& image2world);
+
+        /// world coordinate to image pixel index
+        void world_to_image(const coord_type* coord, coord_type* ind) const;
+        void world_to_image(const std::vector<coord_type>& coord, std::vector<coord_type>& ind) const;
+
+        void world_to_image(coord_type cx, coord_type& x) const;
+
+        void world_to_image(coord_type cx, coord_type cy, 
+                            coord_type& x, coord_type& y) const;
+
+        void world_to_image(coord_type cx, coord_type cy, coord_type cz,
+                            coord_type& x, coord_type& y, coord_type& z) const;
+
+        void world_to_image(coord_type cx, coord_type cy, coord_type cz, coord_type cs,
+                            coord_type& x, coord_type& y, coord_type& z, coord_type& s) const;
+
+        void world_to_image(coord_type cx, coord_type cy, coord_type cz, coord_type cs, coord_type cp,
+                            coord_type& x, coord_type& y, coord_type& z, coord_type& s, coord_type& p) const;
+
+        void world_to_image(coord_type cx, coord_type cy, coord_type cz, coord_type cs, coord_type cp, coord_type cr,
+                            coord_type& x, coord_type& y, coord_type& z, coord_type& s, coord_type& p, coord_type& r) const;
+
+        void world_to_image(coord_type cx, coord_type cy, coord_type cz, coord_type cs, coord_type cp, coord_type cr, coord_type ca,
+                            coord_type& x, coord_type& y, coord_type& z, coord_type& s, coord_type& p, coord_type& r, coord_type& a) const;
+
+        void world_to_image(coord_type cx, coord_type cy, coord_type cz, coord_type cs, coord_type cp, coord_type cr, coord_type ca, coord_type cq,
+                            coord_type& x, coord_type& y, coord_type& z, coord_type& s, coord_type& p, coord_type& r, coord_type& a, coord_type& q) const;
+
+        void world_to_image(coord_type cx, coord_type cy, coord_type cz, coord_type cs, coord_type cp, coord_type cr, coord_type ca, coord_type cq, coord_type cu,
+                            coord_type& x, coord_type& y, coord_type& z, coord_type& s, coord_type& p, coord_type& r, coord_type& a, coord_type& q, coord_type& u) const;
+
+        /// get the world_to_image transformation matrix
+        /// the Homogeneous coordinate transformation matrix is computed
+        void world_to_image_matrix(hoMatrix<coord_type>& world2image) const;
+        void set_world_to_image_matrix(const hoMatrix<coord_type>& world2image);
+
+        /// is the sub region in the image
+        bool in_image_region(const std::vector<size_t>& start, std::vector<size_t>& size);
+
+        /// get the sub image
+        void get_sub_image(const std::vector<size_t>& start, std::vector<size_t>& size, Self& out);
+
+        /// meta attributes
+        ISMRMRD::MetaContainer attrib_;
+
+        /// serialize/deserialize image content
+        virtual bool serializeImage(char*& buf, size_t& len) const;
+        virtual bool deserializeImage(char* buf, size_t& len);
+
+        /// serialize/deserialize image content and meta attributes
+        virtual bool serialize(char*& buf, size_t& len) const;
+        virtual bool deserialize(char* buf, size_t& len);
+
+        /// print out the image information
+        virtual void print(std::ostream& os) const;
+        virtual void printContent(std::ostream& os) const;
+
+    protected:
+
+        using BaseClass::dimensions_;
+        using BaseClass::offsetFactors_;
+        using BaseClass::data_;
+        using BaseClass::elements_;
+        using BaseClass::delete_data_on_destruct_;
+
+        coord_type pixelSize_[D];
+        coord_type pixelSize_reciprocal_[D];
+        coord_type origin_[D];
+        hoNDPoint<coord_type, D> axis_[D];
+
+        /// for the dicom coordinate system
+        a_axis_image_patient_type image_position_patient_;
+        /// image orientation for row/column/slice directions
+        a_axis_image_patient_type image_orientation_patient_[3];
+    };
+}
+
+#include "hoNDImage.hxx"
diff --git a/toolboxes/core/cpu/image/hoNDImage.hxx b/toolboxes/core/cpu/image/hoNDImage.hxx
new file mode 100644
index 0000000..6e55b31
--- /dev/null
+++ b/toolboxes/core/cpu/image/hoNDImage.hxx
@@ -0,0 +1,2980 @@
+/** \file       hoNDImage.hxx
+    \brief      Implementation of N-dimensional image class for gadgetron
+    \author     Hui Xue
+*/
+
+#include "hoNDImage.h"
+
+namespace Gadgetron
+{
+    template <typename T, unsigned int D> 
+    hoNDImage<T, D>::hoNDImage () : BaseClass()
+    {
+        dimensions_->resize(D, 0);
+        offsetFactors_->resize(D, 0);
+
+        unsigned int ii;
+        for (ii=0;ii<D; ii++)
+        {
+            pixelSize_[ii] = 1;
+            pixelSize_reciprocal_[ii] = 1;
+            origin_[ii] = 0;
+            axis_[ii].fill(0);
+            axis_[ii][ii] = coord_type(1.0);
+        }
+
+        image_position_patient_[0] = 0;
+        image_position_patient_[1] = 0;
+        image_position_patient_[2] = 0;
+
+        image_orientation_patient_[0][0] = 1; image_orientation_patient_[0][1] = 0; image_orientation_patient_[0][2] = 0;
+        image_orientation_patient_[1][0] = 0; image_orientation_patient_[1][1] = 1; image_orientation_patient_[1][2] = 0;
+        image_orientation_patient_[2][0] = 0; image_orientation_patient_[2][1] = 0; image_orientation_patient_[2][2] = 1;
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImage<T, D>::hoNDImage (const std::vector<size_t>& dimensions) : BaseClass( const_cast<std::vector<size_t>& >(dimensions) )
+    {
+        this->create(dimensions);
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImage<T, D>::hoNDImage (boost::shared_ptr< std::vector<size_t> > dimensions) : BaseClass( dimensions )
+    {
+        this->create( *dimensions );
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImage<T, D>::hoNDImage (const std::vector<size_t>& dimensions, 
+        const std::vector<coord_type>& pixelSize) : BaseClass( const_cast<std::vector<size_t>& >(dimensions) )
+    {
+        this->create(dimensions, pixelSize);
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImage<T, D>::hoNDImage (const std::vector<size_t>& dimensions, 
+        const std::vector<coord_type>& pixelSize, 
+        const std::vector<coord_type>& origin) : BaseClass( const_cast<std::vector<size_t>& >(dimensions) )
+    {
+        this->create(dimensions, pixelSize, origin);
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImage<T, D>::hoNDImage (const std::vector<size_t>& dimensions, 
+                                const std::vector<coord_type>& pixelSize, 
+                                const std::vector<coord_type>& origin, 
+                                const axis_type& axis) : BaseClass( const_cast<std::vector<size_t>& >(dimensions) )
+    {
+        this->create(dimensions, pixelSize, origin, axis);
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImage<T, D>::hoNDImage(size_t len) : BaseClass(len)
+    {
+        std::vector<size_t> dimension(1, len);
+        this->create(dimension);
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImage<T, D>::hoNDImage(size_t sx, size_t sy) : BaseClass(sx, sy)
+    {
+        std::vector<size_t> dimension(2);
+        dimension[0] = sx;
+        dimension[1] = sy;
+        this->create(dimension);
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImage<T, D>::hoNDImage(size_t sx, size_t sy, size_t sz) : BaseClass(sx, sy, sz)
+    {
+        std::vector<size_t> dimension(3);
+        dimension[0] = sx;
+        dimension[1] = sy;
+        dimension[2] = sz;
+        this->create(dimension);
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImage<T, D>::hoNDImage(size_t sx, size_t sy, size_t sz, size_t st) : BaseClass(sx, sy, sz, st)
+    {
+        std::vector<size_t> dimension(4);
+        dimension[0] = sx;
+        dimension[1] = sy;
+        dimension[2] = sz;
+        dimension[3] = st;
+        this->create(dimension);
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImage<T, D>::hoNDImage(size_t sx, size_t sy, size_t sz, size_t st, size_t sp) : BaseClass(sx, sy, sz, st, sp)
+    {
+        std::vector<size_t> dimension(5);
+        dimension[0] = sx;
+        dimension[1] = sy;
+        dimension[2] = sz;
+        dimension[3] = st;
+        dimension[4] = sp;
+        this->create(dimension);
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImage<T, D>::hoNDImage(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq) : BaseClass(sx, sy, sz, st, sp, sq)
+    {
+        std::vector<size_t> dimension(6);
+        dimension[0] = sx;
+        dimension[1] = sy;
+        dimension[2] = sz;
+        dimension[3] = st;
+        dimension[4] = sp;
+        dimension[5] = sq;
+        this->create(dimension);
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImage<T, D>::hoNDImage(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr) : BaseClass(sx, sy, sz, st, sp, sq, sr)
+    {
+        std::vector<size_t> dimension(7);
+        dimension[0] = sx;
+        dimension[1] = sy;
+        dimension[2] = sz;
+        dimension[3] = st;
+        dimension[4] = sp;
+        dimension[5] = sq;
+        dimension[6] = sr;
+        this->create(dimension);
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImage<T, D>::hoNDImage(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, size_t ss) : BaseClass(sx, sy, sz, st, sp, sq, sr, ss)
+    {
+        std::vector<size_t> dimension(8);
+        dimension[0] = sx;
+        dimension[1] = sy;
+        dimension[2] = sz;
+        dimension[3] = st;
+        dimension[4] = sp;
+        dimension[5] = sq;
+        dimension[6] = sr;
+        dimension[7] = ss;
+        this->create(dimension);
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImage<T, D>::hoNDImage (const std::vector<size_t>& dimensions, T* data, bool delete_data_on_destruct) : BaseClass(dimensions, data, delete_data_on_destruct)
+    {
+        this->create(dimensions, data, delete_data_on_destruct);
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImage<T, D>::hoNDImage (const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize, T* data, bool delete_data_on_destruct) : BaseClass(dimensions, data, delete_data_on_destruct)
+    {
+        this->create(dimensions, pixelSize, data, delete_data_on_destruct);
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImage<T, D>::hoNDImage (const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize, const std::vector<coord_type>& origin, T* data, bool delete_data_on_destruct) : BaseClass(dimensions, data, delete_data_on_destruct)
+    {
+        this->create(dimensions, pixelSize, origin, data, delete_data_on_destruct);
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImage<T, D>::hoNDImage (const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize, const std::vector<coord_type>& origin, const axis_type& axis, T* data, bool delete_data_on_destruct) : BaseClass(dimensions, data, delete_data_on_destruct)
+    {
+        this->create(dimensions, pixelSize, origin, axis, data, delete_data_on_destruct);
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImage<T, D>::hoNDImage(size_t len, T* data, bool delete_data_on_destruct) : BaseClass(len, data, delete_data_on_destruct)
+    {
+        std::vector<size_t> dimension(1, len);
+        this->create(dimension, data, delete_data_on_destruct);
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImage<T, D>::hoNDImage(size_t sx, size_t sy, T* data, bool delete_data_on_destruct) : BaseClass(sx, sy, data, delete_data_on_destruct)
+    {
+        std::vector<size_t> dimension(2);
+        dimension[0] = sx;
+        dimension[1] = sy;
+        this->create(dimension, data, delete_data_on_destruct);
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImage<T, D>::hoNDImage(size_t sx, size_t sy, size_t sz, T* data, bool delete_data_on_destruct) : BaseClass(sx, sy, sz, data, delete_data_on_destruct)
+    {
+        std::vector<size_t> dimension(3);
+        dimension[0] = sx;
+        dimension[1] = sy;
+        dimension[2] = sz;
+        this->create(dimension, data, delete_data_on_destruct);
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImage<T, D>::hoNDImage(size_t sx, size_t sy, size_t sz, size_t st, T* data, bool delete_data_on_destruct) : BaseClass(sx, sy, sz, st, data, delete_data_on_destruct)
+    {
+        std::vector<size_t> dimension(4);
+        dimension[0] = sx;
+        dimension[1] = sy;
+        dimension[2] = sz;
+        dimension[3] = st;
+        this->create(dimension, data, delete_data_on_destruct);
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImage<T, D>::hoNDImage(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, T* data, bool delete_data_on_destruct) : BaseClass(sx, sy, sz, st, sp, data, delete_data_on_destruct)
+    {
+        std::vector<size_t> dimension(5);
+        dimension[0] = sx;
+        dimension[1] = sy;
+        dimension[2] = sz;
+        dimension[3] = st;
+        dimension[4] = sp;
+        this->create(dimension, data, delete_data_on_destruct);
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImage<T, D>::hoNDImage(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, T* data, bool delete_data_on_destruct) : BaseClass(sx, sy, sz, st, sp, sq, data, delete_data_on_destruct)
+    {
+        std::vector<size_t> dimension(6);
+        dimension[0] = sx;
+        dimension[1] = sy;
+        dimension[2] = sz;
+        dimension[3] = st;
+        dimension[4] = sp;
+        dimension[5] = sq;
+        this->create(dimension, data, delete_data_on_destruct);
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImage<T, D>::hoNDImage(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, T* data, bool delete_data_on_destruct) : BaseClass(sx, sy, sz, st, sp, sq, sr, data, delete_data_on_destruct)
+    {
+        std::vector<size_t> dimension(7);
+        dimension[0] = sx;
+        dimension[1] = sy;
+        dimension[2] = sz;
+        dimension[3] = st;
+        dimension[4] = sp;
+        dimension[5] = sq;
+        dimension[6] = sr;
+        this->create(dimension, data, delete_data_on_destruct);
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImage<T, D>::hoNDImage(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, size_t ss, T* data, bool delete_data_on_destruct) : BaseClass(sx, sy, sz, st, sp, sq, sr, ss, data, delete_data_on_destruct)
+    {
+        std::vector<size_t> dimension(8);
+        dimension[0] = sx;
+        dimension[1] = sy;
+        dimension[2] = sz;
+        dimension[3] = st;
+        dimension[4] = sp;
+        dimension[5] = sq;
+        dimension[6] = sr;
+        dimension[7] = ss;
+        this->create(dimension, data, delete_data_on_destruct);
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImage<T, D>::hoNDImage(const hoNDArray<T>& a) : BaseClass(a)
+    {
+         boost::shared_ptr< std::vector<size_t> > dim = a.get_dimensions();
+         this->create(*dim);
+         memcpy(this->data_, a.begin(), this->get_number_of_bytes());
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImage<T, D>::hoNDImage(const Self& a) : BaseClass()
+    {
+        *this = a;
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImage<T, D>& hoNDImage<T, D>::operator=(const Self& rhs)
+    {
+        if ( &rhs == this ) return *this;
+
+        if ( rhs.get_number_of_elements() == 0 )
+        {
+            this->clear();
+            return *this;
+        }
+
+        if ( !this->dimensions_ ) this->dimensions_ = boost::shared_ptr< std::vector<size_t> >( new std::vector<size_t> );
+        if ( !this->offsetFactors_ ) this->offsetFactors_ = boost::shared_ptr< std::vector<size_t> >( new std::vector<size_t> );
+
+        if ( this->dimensions_equal(rhs) && this->data_!=NULL )
+        {
+            memcpy(this->data_, rhs.data_, rhs.elements_*sizeof(T));
+        }
+        else
+        {
+            this->deallocate_memory();
+            this->data_ = 0;
+
+            *(this->dimensions_) = *(rhs.dimensions_);
+            this->allocate_memory();
+            this->calculate_offset_factors( *(this->dimensions_) );
+            memcpy( this->data_, rhs.data_, this->elements_*sizeof(T) );
+        }
+
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            this->pixelSize_[ii] = rhs.pixelSize_[ii];
+            this->pixelSize_reciprocal_[ii] = rhs.pixelSize_reciprocal_[ii];
+            this->origin_[ii] = rhs.origin_[ii];
+            this->axis_[ii] = rhs.axis_[ii];
+        }
+
+        this->image_position_patient_ = rhs.image_position_patient_;
+        this->image_orientation_patient_[0] = rhs.image_orientation_patient_[0];
+        this->image_orientation_patient_[1] = rhs.image_orientation_patient_[1];
+        this->image_orientation_patient_[2] = rhs.image_orientation_patient_[2];
+
+        this->attrib_ = rhs.attrib_;
+
+        return *this;
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImage<T, D>::~hoNDImage()
+    {
+        if (this->delete_data_on_destruct_)
+        {
+            this->deallocate_memory();
+        }
+    }
+
+    template <typename T, unsigned int D> 
+    void hoNDImage<T, D>::clear()
+    {
+        if ( this->delete_data_on_destruct_ )
+        {
+            this->deallocate_memory();
+        }
+        this->data_ = 0;
+        this->elements_ = 0;
+        this->delete_data_on_destruct_ = true;
+
+        unsigned int ii;
+
+        dimensions_->clear();
+        offsetFactors_->clear();
+
+        for (ii=0;ii<D; ii++)
+        {
+            pixelSize_[ii] = 1;
+            pixelSize_reciprocal_[ii] = 1;
+            origin_[ii] = 0;
+            axis_[ii].fill(0);
+            axis_[ii][ii] = coord_type(1.0);
+        }
+
+        image_position_patient_[0] = 0;
+        image_position_patient_[1] = 0;
+        image_position_patient_[2] = 0;
+
+        image_orientation_patient_[0][0] = 1; image_orientation_patient_[0][1] = 0; image_orientation_patient_[0][2] = 0;
+        image_orientation_patient_[1][0] = 0; image_orientation_patient_[1][1] = 1; image_orientation_patient_[1][2] = 0;
+        image_orientation_patient_[2][0] = 0; image_orientation_patient_[2][1] = 0; image_orientation_patient_[2][2] = 1;
+
+        this->attrib_ = ISMRMRD::MetaContainer();
+    }
+
+    template <typename T, unsigned int D> 
+    void hoNDImage<T, D>::create(const std::vector<size_t>& dimensions)
+    {
+        if ( !this->dimensions_equal(dimensions) )
+        {
+            if ( !dimensions_ )
+            {
+                dimensions_ = boost::shared_ptr< std::vector<size_t> >( new std::vector<size_t> );
+            }
+
+            if ( !offsetFactors_ )
+            {
+                offsetFactors_ = boost::shared_ptr< std::vector<size_t> >( new std::vector<size_t> );
+            }
+
+            *dimensions_ = dimensions;
+            this->allocate_memory();
+            this->calculate_offset_factors(dimensions);
+        }
+
+        unsigned int ii;
+        for (ii=0;ii<D; ii++)
+        {
+            pixelSize_[ii] = 1;
+            pixelSize_reciprocal_[ii] = 1;
+            origin_[ii] = 0;
+            axis_[ii].fill(0);
+            axis_[ii][ii] = coord_type(1.0);
+        }
+
+        image_position_patient_[0] = 0;
+        image_position_patient_[1] = 0;
+        image_position_patient_[2] = 0;
+
+        image_orientation_patient_[0][0] = 1; image_orientation_patient_[0][1] = 0; image_orientation_patient_[0][2] = 0;
+        image_orientation_patient_[1][0] = 0; image_orientation_patient_[1][1] = 1; image_orientation_patient_[1][2] = 0;
+        image_orientation_patient_[2][0] = 0; image_orientation_patient_[2][1] = 0; image_orientation_patient_[2][2] = 1;
+    }
+
+    template <typename T, unsigned int D> 
+    void hoNDImage<T, D>::create(boost::shared_ptr< std::vector<size_t> > dimensions)
+    {
+        this->create(*dimensions);
+    }
+
+    template <typename T, unsigned int D> 
+    void hoNDImage<T, D>::create(const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize)
+    {
+        if ( !this->dimensions_equal(dimensions) )
+        {
+            if ( !dimensions_ )
+            {
+                dimensions_ = boost::shared_ptr< std::vector<size_t> >( new std::vector<size_t> );
+            }
+
+            if ( !offsetFactors_ )
+            {
+                offsetFactors_ = boost::shared_ptr< std::vector<size_t> >( new std::vector<size_t> );
+            }
+
+            *dimensions_ = dimensions;
+            this->allocate_memory();
+            this->calculate_offset_factors(dimensions);
+        }
+
+        unsigned int ii;
+        for (ii=0;ii<D; ii++)
+        {
+            pixelSize_[ii] = pixelSize[ii];
+            pixelSize_reciprocal_[ii] = coord_type(1.0)/pixelSize_[ii];
+            origin_[ii] = 0;
+            axis_[ii].fill(0);
+            axis_[ii][ii] = coord_type(1.0);
+        }
+
+        image_position_patient_[0] = 0;
+        image_position_patient_[1] = 0;
+        image_position_patient_[2] = 0;
+
+        image_orientation_patient_[0][0] = 1; image_orientation_patient_[0][1] = 0; image_orientation_patient_[0][2] = 0;
+        image_orientation_patient_[1][0] = 0; image_orientation_patient_[1][1] = 1; image_orientation_patient_[1][2] = 0;
+        image_orientation_patient_[2][0] = 0; image_orientation_patient_[2][1] = 0; image_orientation_patient_[2][2] = 1;
+    }
+
+    template <typename T, unsigned int D> 
+    void hoNDImage<T, D>::create(const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize, const std::vector<coord_type>& origin)
+    {
+        if ( !this->dimensions_equal(dimensions) )
+        {
+            if ( !dimensions_ )
+            {
+                dimensions_ = boost::shared_ptr< std::vector<size_t> >( new std::vector<size_t> );
+            }
+
+            if ( !offsetFactors_ )
+            {
+                offsetFactors_ = boost::shared_ptr< std::vector<size_t> >( new std::vector<size_t> );
+            }
+
+            *dimensions_ = dimensions;
+            this->allocate_memory();
+            this->calculate_offset_factors(dimensions);
+        }
+
+        unsigned int ii;
+        for (ii=0;ii<D; ii++)
+        {
+            pixelSize_[ii] = pixelSize[ii];
+            pixelSize_reciprocal_[ii] = coord_type(1.0)/pixelSize_[ii];
+            origin_[ii] = origin[ii];
+            axis_[ii].fill(0);
+            axis_[ii][ii] = coord_type(1.0);
+        }
+
+        image_position_patient_[0] = 0;
+        image_position_patient_[1] = 0;
+        image_position_patient_[2] = 0;
+
+        if ( D==1 )
+        {
+            image_position_patient_[0] = origin[0];
+        }
+        else if ( D == 2 )
+        {
+            image_position_patient_[0] = origin[0];
+            image_position_patient_[1] = origin[1];
+        }
+        else
+        {
+            image_position_patient_[0] = origin[0];
+            image_position_patient_[1] = origin[1];
+            image_position_patient_[2] = origin[2];
+        }
+
+        image_orientation_patient_[0][0] = 1; image_orientation_patient_[0][1] = 0; image_orientation_patient_[0][2] = 0;
+        image_orientation_patient_[1][0] = 0; image_orientation_patient_[1][1] = 1; image_orientation_patient_[1][2] = 0;
+        image_orientation_patient_[2][0] = 0; image_orientation_patient_[2][1] = 0; image_orientation_patient_[2][2] = 1;
+    }
+
+    template <typename T, unsigned int D> 
+    void hoNDImage<T, D>::create(const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize, const std::vector<coord_type>& origin, const axis_type& axis)
+    {
+        if ( !this->dimensions_equal(dimensions) )
+        {
+            if ( !dimensions_ )
+            {
+                dimensions_ = boost::shared_ptr< std::vector<size_t> >( new std::vector<size_t> );
+            }
+
+            if ( !offsetFactors_ )
+            {
+                offsetFactors_ = boost::shared_ptr< std::vector<size_t> >( new std::vector<size_t> );
+            }
+
+            *dimensions_ = dimensions;
+            this->allocate_memory();
+            this->calculate_offset_factors(dimensions);
+        }
+
+        unsigned int ii;
+        for (ii=0;ii<D; ii++)
+        {
+            pixelSize_[ii] = pixelSize[ii];
+            pixelSize_reciprocal_[ii] = coord_type(1.0)/pixelSize_[ii];
+            origin_[ii] = origin[ii];
+            axis_[ii] = axis[ii];
+        }
+
+        image_position_patient_[0] = 0;
+        image_position_patient_[1] = 0;
+        image_position_patient_[2] = 0;
+
+        image_orientation_patient_[0][0] = 1; image_orientation_patient_[0][1] = 0; image_orientation_patient_[0][2] = 0;
+        image_orientation_patient_[1][0] = 0; image_orientation_patient_[1][1] = 1; image_orientation_patient_[1][2] = 0;
+        image_orientation_patient_[2][0] = 0; image_orientation_patient_[2][1] = 0; image_orientation_patient_[2][2] = 1;
+
+        if ( D==1 )
+        {
+            image_position_patient_[0] = origin[0];
+        }
+        else if ( D == 2 )
+        {
+            image_position_patient_[0] = origin[0];
+            image_position_patient_[1] = origin[1];
+        }
+        else
+        {
+            image_position_patient_[0] = origin[0];
+            image_position_patient_[1] = origin[1];
+            image_position_patient_[2] = origin[2];
+
+            image_orientation_patient_[0][0] = axis[0][0]; image_orientation_patient_[0][1] = axis[0][1]; image_orientation_patient_[0][2] = axis[0][2];
+            image_orientation_patient_[1][0] = axis[1][0]; image_orientation_patient_[1][1] = axis[1][1]; image_orientation_patient_[1][2] = axis[1][2];
+            image_orientation_patient_[2][0] = axis[2][0]; image_orientation_patient_[2][1] = axis[2][1]; image_orientation_patient_[2][2] = axis[2][2];
+        }
+    }
+
+    template <typename T, unsigned int D> 
+    void hoNDImage<T, D>::create(const std::vector<size_t>& dimensions, T* data, bool delete_data_on_destruct)
+    {
+        if ( this->delete_data_on_destruct_ )
+        {
+            this->deallocate_memory();
+            this->data_ = NULL;
+        }
+
+        this->data_ = data;
+        this->delete_data_on_destruct_ = delete_data_on_destruct;
+
+        if ( !dimensions_ )
+        {
+            dimensions_ = boost::shared_ptr< std::vector<size_t> >( new std::vector<size_t> );
+        }
+
+        if ( !offsetFactors_ )
+        {
+            offsetFactors_ = boost::shared_ptr< std::vector<size_t> >( new std::vector<size_t> );
+        }
+
+        *dimensions_ = dimensions;
+
+        unsigned int ii;
+
+        this->elements_ = 1;
+        for (ii=0; ii<D; ii++)
+        {
+            this->elements_ *= (*dimensions_)[ii];
+        }
+        this->calculate_offset_factors(dimensions);
+
+        for (ii=0;ii<D; ii++)
+        {
+            pixelSize_[ii] = 1.0;
+            pixelSize_reciprocal_[ii] = 1.0;
+            origin_[ii] = 0;
+            axis_[ii].fill(0);
+            axis_[ii][ii] = coord_type(1.0);
+        }
+
+        image_position_patient_[0] = 0;
+        image_position_patient_[1] = 0;
+        image_position_patient_[2] = 0;
+
+        image_orientation_patient_[0][0] = 1; image_orientation_patient_[0][1] = 0; image_orientation_patient_[0][2] = 0;
+        image_orientation_patient_[1][0] = 0; image_orientation_patient_[1][1] = 1; image_orientation_patient_[1][2] = 0;
+        image_orientation_patient_[2][0] = 0; image_orientation_patient_[2][1] = 0; image_orientation_patient_[2][2] = 1;
+    }
+
+    template <typename T, unsigned int D> 
+    void hoNDImage<T, D>::create(const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize, T* data, bool delete_data_on_destruct)
+    {
+        if ( this->delete_data_on_destruct_ )
+        {
+            this->deallocate_memory();
+            this->data_ = NULL;
+        }
+
+        this->data_ = data;
+        this->delete_data_on_destruct_ = delete_data_on_destruct;
+
+        if ( !dimensions_ )
+        {
+            dimensions_ = boost::shared_ptr< std::vector<size_t> >( new std::vector<size_t> );
+        }
+
+        if ( !offsetFactors_ )
+        {
+            offsetFactors_ = boost::shared_ptr< std::vector<size_t> >( new std::vector<size_t> );
+        }
+
+        *dimensions_ = dimensions;
+
+        unsigned int ii;
+
+        this->elements_ = 1;
+        for (ii=0; ii<D; ii++)
+        {
+            this->elements_ *= (*dimensions_)[ii];
+        }
+        this->calculate_offset_factors(dimensions);
+
+        for (ii=0;ii<D; ii++)
+        {
+            pixelSize_[ii] = pixelSize[ii];
+            pixelSize_reciprocal_[ii] = coord_type(1.0)/pixelSize_[ii];
+            origin_[ii] = 0;
+            axis_[ii].fill(0);
+            axis_[ii][ii] = coord_type(1.0);
+        }
+
+        image_position_patient_[0] = 0;
+        image_position_patient_[1] = 0;
+        image_position_patient_[2] = 0;
+
+        image_orientation_patient_[0][0] = 1; image_orientation_patient_[0][1] = 0; image_orientation_patient_[0][2] = 0;
+        image_orientation_patient_[1][0] = 0; image_orientation_patient_[1][1] = 1; image_orientation_patient_[1][2] = 0;
+        image_orientation_patient_[2][0] = 0; image_orientation_patient_[2][1] = 0; image_orientation_patient_[2][2] = 1;
+    }
+
+    template <typename T, unsigned int D> 
+    void hoNDImage<T, D>::create(const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize, const std::vector<coord_type>& origin, T* data, bool delete_data_on_destruct)
+    {
+        if ( this->delete_data_on_destruct_ )
+        {
+            this->deallocate_memory();
+            this->data_ = NULL;
+        }
+
+        this->data_ = data;
+        this->delete_data_on_destruct_ = delete_data_on_destruct;
+
+        if ( !dimensions_ )
+        {
+            dimensions_ = boost::shared_ptr< std::vector<size_t> >( new std::vector<size_t> );
+        }
+
+        if ( !offsetFactors_ )
+        {
+            offsetFactors_ = boost::shared_ptr< std::vector<size_t> >( new std::vector<size_t> );
+        }
+
+        *dimensions_ = dimensions;
+
+        unsigned int ii;
+
+        this->elements_ = 1;
+        for (ii=0; ii<D; ii++)
+        {
+            this->elements_ *= (*dimensions_)[ii];
+        }
+        this->calculate_offset_factors(dimensions);
+
+        for (ii=0;ii<D; ii++)
+        {
+            pixelSize_[ii] = pixelSize[ii];
+            pixelSize_reciprocal_[ii] = coord_type(1.0)/pixelSize_[ii];
+            origin_[ii] = origin[ii];
+            axis_[ii].fill(0);
+            axis_[ii][ii] = coord_type(1.0);
+        }
+
+        image_position_patient_[0] = 0;
+        image_position_patient_[1] = 0;
+        image_position_patient_[2] = 0;
+
+        if ( D==1 )
+        {
+            image_position_patient_[0] = origin[0];
+        }
+        else if ( D == 2 )
+        {
+            image_position_patient_[0] = origin[0];
+            image_position_patient_[1] = origin[1];
+        }
+        else
+        {
+            image_position_patient_[0] = origin[0];
+            image_position_patient_[1] = origin[1];
+            image_position_patient_[2] = origin[2];
+        }
+
+        image_orientation_patient_[0][0] = 1; image_orientation_patient_[0][1] = 0; image_orientation_patient_[0][2] = 0;
+        image_orientation_patient_[1][0] = 0; image_orientation_patient_[1][1] = 1; image_orientation_patient_[1][2] = 0;
+        image_orientation_patient_[2][0] = 0; image_orientation_patient_[2][1] = 0; image_orientation_patient_[2][2] = 1;
+    }
+
+    template <typename T, unsigned int D> 
+    void hoNDImage<T, D>::create(const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize, const std::vector<coord_type>& origin, const axis_type& axis, T* data, bool delete_data_on_destruct)
+    {
+        if ( this->delete_data_on_destruct_ )
+        {
+            this->deallocate_memory();
+            this->data_ = NULL;
+        }
+
+        this->data_ = data;
+        this->delete_data_on_destruct_ = delete_data_on_destruct;
+
+        if ( !dimensions_ )
+        {
+            dimensions_ = boost::shared_ptr< std::vector<size_t> >( new std::vector<size_t> );
+        }
+
+        if ( !offsetFactors_ )
+        {
+            offsetFactors_ = boost::shared_ptr< std::vector<size_t> >( new std::vector<size_t> );
+        }
+
+        *dimensions_ = dimensions;
+
+        unsigned int ii;
+
+        this->elements_ = 1;
+        for (ii=0; ii<D; ii++)
+        {
+            this->elements_ *= (*dimensions_)[ii];
+        }
+        this->calculate_offset_factors(dimensions);
+
+        for (ii=0;ii<D; ii++)
+        {
+            pixelSize_[ii] = pixelSize[ii];
+            pixelSize_reciprocal_[ii] = coord_type(1.0)/pixelSize_[ii];
+            origin_[ii] = origin[ii];
+            axis_[ii] = axis[ii];
+        }
+
+        image_position_patient_[0] = 0;
+        image_position_patient_[1] = 0;
+        image_position_patient_[2] = 0;
+
+        image_orientation_patient_[0][0] = 1; image_orientation_patient_[0][1] = 0; image_orientation_patient_[0][2] = 0;
+        image_orientation_patient_[1][0] = 0; image_orientation_patient_[1][1] = 1; image_orientation_patient_[1][2] = 0;
+        image_orientation_patient_[2][0] = 0; image_orientation_patient_[2][1] = 0; image_orientation_patient_[2][2] = 1;
+
+        if ( D==1 )
+        {
+            image_position_patient_[0] = origin[0];
+        }
+        else if ( D == 2 )
+        {
+            image_position_patient_[0] = origin[0];
+            image_position_patient_[1] = origin[1];
+        }
+        else
+        {
+            image_position_patient_[0] = origin[0];
+            image_position_patient_[1] = origin[1];
+            image_position_patient_[2] = origin[2];
+
+            image_orientation_patient_[0][0] = axis[0][0]; image_orientation_patient_[0][1] = axis[0][1]; image_orientation_patient_[0][2] = axis[0][2];
+            image_orientation_patient_[1][0] = axis[1][0]; image_orientation_patient_[1][1] = axis[1][1]; image_orientation_patient_[1][2] = axis[1][2];
+            image_orientation_patient_[2][0] = axis[2][0]; image_orientation_patient_[2][1] = axis[2][1]; image_orientation_patient_[2][2] = axis[2][2];
+        }
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::from_NDArray(const hoNDArray<T>& a)
+    {
+        boost::shared_ptr< std::vector<size_t> > dim = a.get_dimensions();
+
+        size_t ii;
+
+        if ( dim->size() < D )
+        {
+            std::vector<size_t> dimUsed(D, 1);
+            for ( ii=0; ii<dim->size(); ii++ )
+            {
+                dimUsed[ii] = (*dim)[ii];
+            }
+
+            if ( !this->dimensions_equal(dimUsed) )
+            {
+                this->create(dimUsed);
+            }
+        }
+        else if ( dim->size() > D )
+        {
+            std::vector<size_t> dimUsed(D, 1);
+            for ( ii=0; ii<D; ii++ )
+            {
+                dimUsed[ii] = (*dim)[ii];
+            }
+
+            if ( !this->dimensions_equal(dimUsed) )
+            {
+                this->create(dimUsed);
+            }
+        }
+        else
+        {
+            if ( !this->dimensions_equal(*dim) )
+            {
+                this->create(*dim);
+            }
+        }
+
+        memcpy(this->data_, a.begin(), this->get_number_of_bytes());
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::to_NDArray(hoNDArray<T>& a) const
+    {
+        std::vector<size_t> dim;
+        this->get_dimensions(dim);
+
+        if ( !a.dimensions_equal(&dim) )
+        {
+            a.create(&dim);
+        }
+
+        memcpy(a.begin(), this->data_, a.get_number_of_bytes());
+    }
+
+    template <typename T, unsigned int D> 
+    inline bool hoNDImage<T, D>::dimensions_equal(const std::vector<size_t>& dimensions) const
+    {
+        if ( (!dimensions_) || (dimensions.size() != D) || ( dimensions_->size() != dimensions.size() ) ) return false;
+
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            if ( (*dimensions_)[ii] != dimensions[ii] ) return false;
+        }
+
+        return true;
+    }
+
+    template <typename T, unsigned int D> 
+    inline typename hoNDImage<T, D>::coord_type hoNDImage<T, D>::get_pixel_size(size_t dimension) const
+    {
+        GADGET_DEBUG_CHECK_THROW(dimension < D);
+        return this->pixelSize_[dimension];
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::get_pixel_size(std::vector<coord_type>& pixelSize) const
+    {
+        pixelSize.resize(D);
+        memcpy(&pixelSize[0], this->pixelSize_, sizeof(coord_type)*D);
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::set_pixel_size(size_t dimension, coord_type v)
+    {
+        GADGET_DEBUG_CHECK_THROW(dimension < D);
+        this->pixelSize_[dimension] = v;
+        this->pixelSize_reciprocal_[dimension] = coord_type(1.0)/v;
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::set_pixel_size(const std::vector<coord_type>& pixelSize)
+    {
+        GADGET_DEBUG_CHECK_THROW(pixelSize.size() >= D);
+        memcpy(this->pixelSize_, &pixelSize[0], sizeof(coord_type)*D);
+
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            this->pixelSize_reciprocal_[ii] = coord_type(1.0)/this->pixelSize_[ii];
+        }
+    }
+
+    template <typename T, unsigned int D> 
+    inline typename hoNDImage<T, D>::coord_type hoNDImage<T, D>::get_origin(size_t dimension) const
+    {
+        GADGET_DEBUG_CHECK_THROW(dimension < D);
+        return this->origin_[dimension];
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::get_origin(std::vector<coord_type>& origin) const
+    {
+        origin.resize(D);
+        memcpy(&origin[0], this->origin_, sizeof(coord_type)*D);
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::set_origin(size_t dimension, coord_type v)
+    {
+        GADGET_DEBUG_CHECK_THROW(dimension < D);
+        this->origin_[dimension] = v;
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::set_origin(const std::vector<coord_type>& origin)
+    {
+        GADGET_DEBUG_CHECK_THROW(origin.size() >= D);
+        memcpy(this->origin_, &origin[0], sizeof(coord_type)*D);
+    }
+
+    template <typename T, unsigned int D> 
+    inline typename hoNDImage<T, D>::coord_type hoNDImage<T, D>::get_axis(size_t dimension, size_t elem) const
+    {
+        GADGET_DEBUG_CHECK_THROW(dimension<D && elem<D);
+        return this->axis_[dimension][elem];
+    }
+
+    template <typename T, unsigned int D> 
+    inline typename hoNDImage<T, D>::a_axis_type hoNDImage<T, D>::get_axis(size_t dimension) const
+    {
+        GADGET_DEBUG_CHECK_THROW(dimension < D);
+        return this->axis_[dimension];
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::get_axis(axis_type& axis) const
+    {
+        axis.resize(D);
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            axis[ii] = this->axis_[ii];
+        }
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::set_axis(size_t dimension, size_t elem, coord_type v)
+    {
+        GADGET_DEBUG_CHECK_THROW(dimension<D && elem<D);
+        this->axis_[dimension][elem] = v;
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::set_axis(size_t dimension, const a_axis_type& v)
+    {
+        GADGET_DEBUG_CHECK_THROW(dimension < D);
+        this->axis_[dimension] = v;
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::set_axis(const axis_type& axis)
+    {
+        GADGET_DEBUG_CHECK_THROW(axis.size() >= D);
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            this->axis_[ii] = axis[ii];
+        }
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::get_image_position(coord_type pos[3]) const 
+    {
+        pos[0] = image_position_patient_[0];
+        pos[1] = image_position_patient_[1];
+        pos[2] = image_position_patient_[2];
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::get_image_position(unsigned int d, coord_type& pos) const 
+    {
+        GADGET_DEBUG_CHECK_THROW(d<3);
+        pos = image_position_patient_[d];
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::get_image_position(a_axis_image_patient_type& pos) const 
+    {
+        pos = image_position_patient_;
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::set_image_position(coord_type pos[3])
+    {
+        image_position_patient_[0] = pos[0];
+        image_position_patient_[1] = pos[1];
+        image_position_patient_[2] = pos[2];
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::set_image_position(unsigned int d, coord_type pos)
+    {
+        GADGET_DEBUG_CHECK_THROW(d<3);
+        pos = image_position_patient_[d];
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::set_image_position(const a_axis_image_patient_type& pos)
+    {
+        image_position_patient_ = pos;
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::get_image_orientation(unsigned int d, coord_type ori[3]) const 
+    {
+        GADGET_DEBUG_CHECK_THROW(d<3);
+        ori[0] = image_orientation_patient_[d][0];
+        ori[1] = image_orientation_patient_[d][1];
+        ori[2] = image_orientation_patient_[d][2];
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::get_image_orientation(unsigned int d, a_axis_image_patient_type& ori) const 
+    {
+        GADGET_DEBUG_CHECK_THROW(d<3);
+        ori = image_orientation_patient_[d];
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::get_image_orientation(unsigned int d, unsigned int ind, coord_type& ori) const 
+    {
+        GADGET_DEBUG_CHECK_THROW(d<3);
+        GADGET_DEBUG_CHECK_THROW(ind<3);
+        ori = image_orientation_patient_[d][ind];
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::get_image_orientation(coord_type quat[4]) const 
+    {
+        coord_type r11 = image_orientation_patient_[0][0], r12 = image_orientation_patient_[1][0], r13 = image_orientation_patient_[2][0];
+        coord_type r21 = image_orientation_patient_[0][1], r22 = image_orientation_patient_[1][1], r23 = image_orientation_patient_[2][1];
+        coord_type r31 = image_orientation_patient_[0][2], r32 = image_orientation_patient_[1][2], r33 = image_orientation_patient_[2][2];
+
+        double a = 1, b = 0, c = 0, d = 0, s = 0;
+        double trace = 0;
+        double xd, yd, zd;
+
+        /* verify the sign of the rotation*/
+        coord_type deti = (r11 * r22 * r33) + (r12 * r23 * r31) + (r21 * r32 * r13) -
+            (r13 * r22 * r31) - (r12 * r21 * r33) - (r11 * r23 * r32);
+
+        if (deti < 0)
+        {
+            /* flip 3rd column */
+            r13 = -r13;
+            r23 = -r23;
+            r33 = -r33;
+        }
+
+        /* Compute quaternion parameters */
+        /* http://www.cs.princeton.edu/~gewang/projects/darth/stuff/quat_faq.html#Q55 */
+        trace = 1.0l + r11 + r22 + r33;
+        if (trace > 0.00001l)
+        {                /* simplest case */
+            s = std::sqrt(trace) * 2;
+            a = (r32 - r23) / s;
+            b = (r13 - r31) / s;
+            c = (r21 - r12) / s;
+            d = 0.25l * s;
+        }
+        else
+        {
+            /* trickier case...
+             * determine which major diagonal element has
+             * the greatest value... */
+            xd = 1.0 + r11 - (r22 + r33);  /* 4**b**b */
+            yd = 1.0 + r22 - (r11 + r33);  /* 4**c**c */
+            zd = 1.0 + r33 - (r11 + r22);  /* 4**d**d */
+            /* if r11 is the greatest */
+            if (xd > 1.0)
+            {
+                s = 2.0 * std::sqrt(xd);
+                a = 0.25l * s;
+                b = (r21 + r12) / s;
+                c = (r31 + r13) / s;
+                d = (r32 - r23) / s;
+            }
+            /* else if r22 is the greatest */
+            else if (yd > 1.0)
+            {
+                s = 2.0 * std::sqrt(yd);
+                a = (r21 + r12) / s;
+                b = 0.25l * s;
+                c = (r32 + r23) / s;
+                d = (r13 - r31) / s;
+            }
+            /* else, r33 must be the greatest */
+            else
+            {
+                s = 2.0 * std::sqrt(zd);
+                a = (r13 + r31) / s;
+                b = (r23 + r32) / s;
+                c = 0.25l * s;
+                d = (r21 - r12) / s;
+            }
+
+            if (a < 0.0l)
+            {
+                b = -b;
+                c = -c;
+                d = -d;
+                a = -a;
+            }
+        }
+
+        quat[0] = (coord_type)a; 
+        quat[1] = (coord_type)b; 
+        quat[2] = (coord_type)c; 
+        quat[3] = (coord_type)d;
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::set_image_orientation(unsigned int d, coord_type ori[3])
+    {
+        GADGET_DEBUG_CHECK_THROW(d<3);
+        image_orientation_patient_[d][0] = ori[0];
+        image_orientation_patient_[d][1] = ori[1];
+        image_orientation_patient_[d][2] = ori[2];
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::set_image_orientation(unsigned int d, const a_axis_image_patient_type& ori)
+    {
+        GADGET_DEBUG_CHECK_THROW(d<3);
+        image_orientation_patient_[d] = ori;
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::set_image_orientation(unsigned int d, unsigned int ind, coord_type ori)
+    {
+        GADGET_DEBUG_CHECK_THROW(d<3);
+        GADGET_DEBUG_CHECK_THROW(ind<3);
+        image_orientation_patient_[d][ind] = ori;
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::set_image_orientation(coord_type quat[4])
+    {
+        coord_type a = quat[0], b = quat[1], c = quat[2], d = quat[3];
+
+        image_orientation_patient_[0][0] = 1 - 2*( b*b + c*c );
+        image_orientation_patient_[1][0] = 2*( a*b - c*d );
+        image_orientation_patient_[2][0] = 2*( a*c + b*d );
+
+        image_orientation_patient_[0][1] = 2*( a*b + c*d );
+        image_orientation_patient_[1][1] = 1 - 2*( a*a + c*c );
+        image_orientation_patient_[2][1] = 2*( b*c - a*d );
+
+        image_orientation_patient_[0][2] = 2*( a*c - b*d );
+        image_orientation_patient_[1][2] = 2*( b*c + a*d );
+        image_orientation_patient_[2][2] = 1 - 2*( a*a + b*b );
+    }
+
+    template <typename T, unsigned int D> 
+    inline size_t hoNDImage<T, D>::calculate_offset(const size_t* ind) const
+    {
+        GADGET_DEBUG_CHECK_THROW(ind!=NULL);
+
+        size_t offset = ind[0];
+        for( size_t i = 1; i < D; i++ )
+            offset += ind[i] * (*offsetFactors_)[i];
+        return offset;
+    }
+
+    template <typename T, unsigned int D> 
+    inline size_t hoNDImage<T, D>::calculate_offset(const std::vector<size_t>& ind) const
+    {
+        return this->calculate_offset(&ind[0]);
+    }
+
+    template <typename T, unsigned int D> 
+    inline size_t hoNDImage<T, D>::calculate_offset(const std::vector<gt_index_type>& ind) const
+    {
+        size_t offset = (size_t)(ind[0]);
+        for( size_t i = 1; i < D; i++ )
+            offset += (size_t)(ind[i]) * (*offsetFactors_)[i];
+        return offset;
+    }
+
+    template <typename T, unsigned int D> 
+    inline size_t hoNDImage<T, D>::calculate_offset(size_t x, size_t y) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==2);
+        return x + y * (*offsetFactors_)[1];
+    }
+
+    template <typename T, unsigned int D> 
+    inline size_t hoNDImage<T, D>::calculate_offset(size_t x, size_t y, size_t z) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==3);
+        return x + (y * (*offsetFactors_)[1]) + (z * (*offsetFactors_)[2]);
+    }
+
+    template <typename T, unsigned int D> 
+    inline size_t hoNDImage<T, D>::calculate_offset(size_t x, size_t y, size_t z, size_t s) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==4);
+        return x + (y * (*offsetFactors_)[1]) + (z * (*offsetFactors_)[2]) + (s * (*offsetFactors_)[3]);
+    }
+
+    template <typename T, unsigned int D> 
+    inline size_t hoNDImage<T, D>::calculate_offset(size_t x, size_t y, size_t z, size_t s, size_t p) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==5);
+        return x + (y * (*offsetFactors_)[1]) + (z * (*offsetFactors_)[2]) + (s * (*offsetFactors_)[3]) + (p * (*offsetFactors_)[4]);
+    }
+
+    template <typename T, unsigned int D> 
+    inline size_t hoNDImage<T, D>::calculate_offset(size_t x, size_t y, size_t z, size_t s, size_t p, size_t r) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==6);
+        return x + (y * (*offsetFactors_)[1]) + (z * (*offsetFactors_)[2]) + (s * (*offsetFactors_)[3]) + (p * (*offsetFactors_)[4]) + (r * (*offsetFactors_)[5]);
+    }
+
+    template <typename T, unsigned int D> 
+    inline size_t hoNDImage<T, D>::calculate_offset(size_t x, size_t y, size_t z, size_t s, size_t p, size_t r, size_t a) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==7);
+        return x + (y * (*offsetFactors_)[1]) + (z * (*offsetFactors_)[2]) + (s * (*offsetFactors_)[3]) + (p * (*offsetFactors_)[4]) + (r * (*offsetFactors_)[5]) + (a * (*offsetFactors_)[6]);
+    }
+
+    template <typename T, unsigned int D> 
+    inline size_t hoNDImage<T, D>::calculate_offset(size_t x, size_t y, size_t z, size_t s, size_t p, size_t r, size_t a, size_t q) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==8);
+        return x + (y * (*offsetFactors_)[1]) + (z * (*offsetFactors_)[2]) + (s * (*offsetFactors_)[3]) + (p * (*offsetFactors_)[4]) + (r * (*offsetFactors_)[5]) + (a * (*offsetFactors_)[6]) + (q * (*offsetFactors_)[7]);
+    }
+
+    template <typename T, unsigned int D> 
+    inline size_t hoNDImage<T, D>::calculate_offset(size_t x, size_t y, size_t z, size_t s, size_t p, size_t r, size_t a, size_t q, size_t u) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==9);
+        return x + (y * (*offsetFactors_)[1]) + (z * (*offsetFactors_)[2]) + (s * (*offsetFactors_)[3]) + (p * (*offsetFactors_)[4]) + (r * (*offsetFactors_)[5]) + (a * (*offsetFactors_)[6]) + (q * (*offsetFactors_)[7]) + (u * (*offsetFactors_)[8]);
+    }
+
+    template <typename T, unsigned int D> 
+    inline std::vector<size_t> hoNDImage<T, D>::calculate_index( size_t offset ) const
+    {
+        std::vector<size_t> index(D, 0);
+        this->calculate_index(offset, index);
+        return index;
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::calculate_index( size_t offset, size_t* index ) const
+    {
+        GADGET_DEBUG_CHECK_THROW(index!=NULL);
+
+        unsigned int i;
+        for( i=D-1; i>0; i-- )
+        {
+            index[i] = offset / (*offsetFactors_)[i];
+            offset %= (*offsetFactors_)[i];
+        }
+        index[0] = offset;
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::calculate_index( size_t offset, std::vector<size_t>& index ) const
+    {
+        index.resize(D, 0);
+        this->calculate_index(offset, &index[0]);
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::calculate_index( size_t offset, coord_type* index ) const
+    {
+        unsigned int i;
+        for( i=D-1; i>0; i-- )
+        {
+            index[i] = offset / (*offsetFactors_)[i];
+            offset %= (*offsetFactors_)[i];
+        }
+        index[0] = offset;
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::calculate_index( size_t offset, size_t& x, size_t& y ) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==2);
+        y = offset / (*offsetFactors_)[1];
+        x = offset % (*offsetFactors_)[1];
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::calculate_index( size_t offset, size_t& x, size_t& y, size_t& z ) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==3);
+
+        z = offset / (*offsetFactors_)[2];
+        offset %= (*offsetFactors_)[2];
+
+        y = offset / (*offsetFactors_)[1];
+        x = offset % (*offsetFactors_)[1];
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::calculate_index( size_t offset, size_t& x, size_t& y, size_t& z, size_t& s ) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==4);
+
+        s = offset / (*offsetFactors_)[3];
+        offset %= (*offsetFactors_)[3];
+
+        z = offset / (*offsetFactors_)[2];
+        offset %= (*offsetFactors_)[2];
+
+        y = offset / (*offsetFactors_)[1];
+        x = offset % (*offsetFactors_)[1];
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::calculate_index( size_t offset, size_t& x, size_t& y, size_t& z, size_t& s, size_t& p ) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==5);
+
+        p = offset / (*offsetFactors_)[4];
+        offset %= (*offsetFactors_)[4];
+
+        s = offset / (*offsetFactors_)[3];
+        offset %= (*offsetFactors_)[3];
+
+        z = offset / (*offsetFactors_)[2];
+        offset %= (*offsetFactors_)[2];
+
+        y = offset / (*offsetFactors_)[1];
+        x = offset % (*offsetFactors_)[1];
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::calculate_index( size_t offset, size_t& x, size_t& y, size_t& z, size_t& s, size_t& p, size_t& r ) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==6);
+
+        r = offset / (*offsetFactors_)[5];
+        offset %= (*offsetFactors_)[5];
+
+        p = offset / (*offsetFactors_)[4];
+        offset %= (*offsetFactors_)[4];
+
+        s = offset / (*offsetFactors_)[3];
+        offset %= (*offsetFactors_)[3];
+
+        z = offset / (*offsetFactors_)[2];
+        offset %= (*offsetFactors_)[2];
+
+        y = offset / (*offsetFactors_)[1];
+        x = offset % (*offsetFactors_)[1];
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::calculate_index( size_t offset, size_t& x, size_t& y, size_t& z, size_t& s, size_t& p, size_t& r, size_t& a ) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==7);
+
+        a = offset / (*offsetFactors_)[6];
+        offset %= (*offsetFactors_)[6];
+
+        r = offset / (*offsetFactors_)[5];
+        offset %= (*offsetFactors_)[5];
+
+        p = offset / (*offsetFactors_)[4];
+        offset %= (*offsetFactors_)[4];
+
+        s = offset / (*offsetFactors_)[3];
+        offset %= (*offsetFactors_)[3];
+
+        z = offset / (*offsetFactors_)[2];
+        offset %= (*offsetFactors_)[2];
+
+        y = offset / (*offsetFactors_)[1];
+        x = offset % (*offsetFactors_)[1];
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::calculate_index( size_t offset, size_t& x, size_t& y, size_t& z, size_t& s, size_t& p, size_t& r, size_t& a, size_t& q ) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==8);
+
+        q = offset / (*offsetFactors_)[7];
+        offset %= (*offsetFactors_)[7];
+
+        a = offset / (*offsetFactors_)[6];
+        offset %= (*offsetFactors_)[6];
+
+        r = offset / (*offsetFactors_)[5];
+        offset %= (*offsetFactors_)[5];
+
+        p = offset / (*offsetFactors_)[4];
+        offset %= (*offsetFactors_)[4];
+
+        s = offset / (*offsetFactors_)[3];
+        offset %= (*offsetFactors_)[3];
+
+        z = offset / (*offsetFactors_)[2];
+        offset %= (*offsetFactors_)[2];
+
+        y = offset / (*offsetFactors_)[1];
+        x = offset % (*offsetFactors_)[1];
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::calculate_index( size_t offset, size_t& x, size_t& y, size_t& z, size_t& s, size_t& p, size_t& r, size_t& a, size_t& q, size_t& u ) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==9);
+
+        u = offset / (*offsetFactors_)[8];
+        offset %= (*offsetFactors_)[8];
+
+        q = offset / (*offsetFactors_)[7];
+        offset %= (*offsetFactors_)[7];
+
+        a = offset / (*offsetFactors_)[6];
+        offset %= (*offsetFactors_)[6];
+
+        r = offset / (*offsetFactors_)[5];
+        offset %= (*offsetFactors_)[5];
+
+        p = offset / (*offsetFactors_)[4];
+        offset %= (*offsetFactors_)[4];
+
+        s = offset / (*offsetFactors_)[3];
+        offset %= (*offsetFactors_)[3];
+
+        z = offset / (*offsetFactors_)[2];
+        offset %= (*offsetFactors_)[2];
+
+        y = offset / (*offsetFactors_)[1];
+        x = offset % (*offsetFactors_)[1];
+    }
+
+    template <typename T, unsigned int D> 
+    inline T& hoNDImage<T, D>::operator()( const size_t* ind )
+    {
+        size_t idx = this->calculate_offset(ind);
+        GADGET_DEBUG_CHECK_THROW(idx < this->elements_);
+        return this->data_[idx];
+    }
+
+    template <typename T, unsigned int D> 
+    inline const T& hoNDImage<T, D>::operator()( const size_t* ind ) const
+    {
+        size_t idx = this->calculate_offset(ind);
+        GADGET_DEBUG_CHECK_THROW(idx < this->elements_);
+        return this->data_[idx];
+    }
+
+    template <typename T, unsigned int D> 
+    inline T& hoNDImage<T, D>::operator()( const std::vector<size_t>& ind )
+    {
+        size_t idx = this->calculate_offset(ind);
+        GADGET_DEBUG_CHECK_THROW(idx < this->elements_);
+        return this->data_[idx];
+    }
+
+    template <typename T, unsigned int D> 
+    inline const T& hoNDImage<T, D>::operator()( const std::vector<size_t>& ind ) const
+    {
+        size_t idx = this->calculate_offset(ind);
+        GADGET_DEBUG_CHECK_THROW(idx < this->elements_);
+        return this->data_[idx];
+    }
+
+    template <typename T, unsigned int D> 
+    inline T& hoNDImage<T, D>::operator()( const std::vector<gt_index_type>& ind )
+    {
+        size_t idx = this->calculate_offset(ind);
+        GADGET_DEBUG_CHECK_THROW(idx < this->elements_);
+        return this->data_[idx];
+    }
+
+    template <typename T, unsigned int D> 
+    inline const T& hoNDImage<T, D>::operator()( const std::vector<gt_index_type>& ind ) const
+    {
+        size_t idx = this->calculate_offset(ind);
+        GADGET_DEBUG_CHECK_THROW(idx < this->elements_);
+        return this->data_[idx];
+    }
+
+    template <typename T, unsigned int D> 
+    inline T& hoNDImage<T, D>::operator[]( size_t x )
+    {
+        GADGET_DEBUG_CHECK_THROW(x < this->elements_);
+        return this->data_[x];
+    }
+
+    template <typename T, unsigned int D> 
+    inline const T& hoNDImage<T, D>::operator[]( size_t x ) const
+    {
+        GADGET_DEBUG_CHECK_THROW(x < this->elements_);
+        return this->data_[x];
+    }
+
+    template <typename T, unsigned int D> 
+    inline T& hoNDImage<T, D>::operator()( size_t x )
+    {
+        GADGET_DEBUG_CHECK_THROW(x < this->elements_);
+        return this->data_[x];
+    }
+
+    template <typename T, unsigned int D> 
+    inline const T& hoNDImage<T, D>::operator()( size_t x ) const
+    {
+        GADGET_DEBUG_CHECK_THROW(x < this->elements_);
+        return this->data_[x];
+    }
+
+    template <typename T, unsigned int D> 
+    inline T& hoNDImage<T, D>::operator()( size_t x, size_t y )
+    {
+        size_t idx = this->calculate_offset(x, y);
+        GADGET_DEBUG_CHECK_THROW(idx < this->elements_);
+        return this->data_[idx];
+    }
+
+    template <typename T, unsigned int D> 
+    inline const T& hoNDImage<T, D>::operator()( size_t x, size_t y ) const
+    {
+        size_t idx = this->calculate_offset(x, y);
+        GADGET_DEBUG_CHECK_THROW(idx < this->elements_);
+        return this->data_[idx];
+    }
+
+    template <typename T, unsigned int D> 
+    inline T& hoNDImage<T, D>::operator()( size_t x, size_t y, size_t z )
+    {
+        size_t idx = this->calculate_offset(x, y, z);
+        GADGET_DEBUG_CHECK_THROW(idx < this->elements_);
+        return this->data_[idx];
+    }
+
+    template <typename T, unsigned int D> 
+    inline const T& hoNDImage<T, D>::operator()( size_t x, size_t y, size_t z ) const
+    {
+        size_t idx = this->calculate_offset(x, y, z);
+        GADGET_DEBUG_CHECK_THROW(idx < this->elements_);
+        return this->data_[idx];
+    }
+
+    template <typename T, unsigned int D> 
+    inline T& hoNDImage<T, D>::operator()( size_t x, size_t y, size_t z, size_t s )
+    {
+        size_t idx = this->calculate_offset(x, y, z, s);
+        GADGET_DEBUG_CHECK_THROW(idx < this->elements_);
+        return this->data_[idx];
+    }
+
+    template <typename T, unsigned int D> 
+    inline const T& hoNDImage<T, D>::operator()( size_t x, size_t y, size_t z, size_t s ) const
+    {
+        size_t idx = this->calculate_offset(x, y, z, s);
+        GADGET_DEBUG_CHECK_THROW(idx < this->elements_);
+        return this->data_[idx];
+    }
+
+    template <typename T, unsigned int D> 
+    inline T& hoNDImage<T, D>::operator()( size_t x, size_t y, size_t z, size_t s, size_t p )
+    {
+        size_t idx = this->calculate_offset(x, y, z, s, p);
+        GADGET_DEBUG_CHECK_THROW(idx < this->elements_);
+        return this->data_[idx];
+    }
+
+    template <typename T, unsigned int D> 
+    inline const T& hoNDImage<T, D>::operator()( size_t x, size_t y, size_t z, size_t s, size_t p ) const
+    {
+        size_t idx = this->calculate_offset(x, y, z, s, p);
+        GADGET_DEBUG_CHECK_THROW(idx < this->elements_);
+        return this->data_[idx];
+    }
+
+    template <typename T, unsigned int D> 
+    inline T& hoNDImage<T, D>::operator()( size_t x, size_t y, size_t z, size_t s, size_t p, size_t r )
+    {
+        size_t idx = this->calculate_offset(x, y, z, s, p, r);
+        GADGET_DEBUG_CHECK_THROW(idx < this->elements_);
+        return this->data_[idx];
+    }
+
+    template <typename T, unsigned int D> 
+    inline const T& hoNDImage<T, D>::operator()( size_t x, size_t y, size_t z, size_t s, size_t p, size_t r ) const
+    {
+        size_t idx = this->calculate_offset(x, y, z, s, p, r);
+        GADGET_DEBUG_CHECK_THROW(idx < this->elements_);
+        return this->data_[idx];
+    }
+
+    template <typename T, unsigned int D> 
+    inline T& hoNDImage<T, D>::operator()( size_t x, size_t y, size_t z, size_t s, size_t p, size_t r, size_t a )
+    {
+        size_t idx = this->calculate_offset(x, y, z, s, p, r, a);
+        GADGET_DEBUG_CHECK_THROW(idx < this->elements_);
+        return this->data_[idx];
+    }
+
+    template <typename T, unsigned int D> 
+    inline const T& hoNDImage<T, D>::operator()( size_t x, size_t y, size_t z, size_t s, size_t p, size_t r, size_t a ) const
+    {
+        size_t idx = this->calculate_offset(x, y, z, s, p, r, a);
+        GADGET_DEBUG_CHECK_THROW(idx < this->elements_);
+        return this->data_[idx];
+    }
+
+    template <typename T, unsigned int D> 
+    inline T& hoNDImage<T, D>::operator()( size_t x, size_t y, size_t z, size_t s, size_t p, size_t r, size_t a, size_t q )
+    {
+        size_t idx = this->calculate_offset(x, y, z, s, p, r, a, q);
+        GADGET_DEBUG_CHECK_THROW(idx < this->elements_);
+        return this->data_[idx];
+    }
+
+    template <typename T, unsigned int D> 
+    inline const T& hoNDImage<T, D>::operator()( size_t x, size_t y, size_t z, size_t s, size_t p, size_t r, size_t a, size_t q ) const
+    {
+        size_t idx = this->calculate_offset(x, y, z, s, p, r, a, q);
+        GADGET_DEBUG_CHECK_THROW(idx < this->elements_);
+        return this->data_[idx];
+    }
+
+    template <typename T, unsigned int D> 
+    inline T& hoNDImage<T, D>::operator()( size_t x, size_t y, size_t z, size_t s, size_t p, size_t r, size_t a, size_t q, size_t u )
+    {
+        size_t idx = this->calculate_offset(x, y, z, s, p, r, a, q, u);
+        GADGET_DEBUG_CHECK_THROW(idx < this->elements_);
+        return this->data_[idx];
+    }
+
+    template <typename T, unsigned int D> 
+    inline const T& hoNDImage<T, D>::operator()( size_t x, size_t y, size_t z, size_t s, size_t p, size_t r, size_t a, size_t q, size_t u ) const
+    {
+        size_t idx = this->calculate_offset(x, y, z, s, p, r, a, q, u);
+        GADGET_DEBUG_CHECK_THROW(idx < this->elements_);
+        return this->data_[idx];
+    }
+
+    template <typename T, unsigned int D> 
+    void hoNDImage<T, D>::fill(T value)
+    {
+        std::fill(this->get_data_ptr(), this->get_data_ptr()+this->get_number_of_elements(), value);
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::image_to_world(const coord_type* ind, coord_type* coord) const
+    {
+        unsigned int ii, jj;
+        for(ii=0; ii<D; ii++)
+        {
+            coord[ii] = 0;
+
+            for(jj=0; jj<D; jj++)
+            {
+                coord[ii] += this->axis_[jj][ii] * ( ind[jj] * this->pixelSize_[jj] );
+            }
+
+            coord[ii] += this->origin_[ii];
+        }
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::image_to_world(const std::vector<coord_type>& ind, std::vector<coord_type>& coord) const
+    {
+        GADGET_DEBUG_CHECK_THROW(ind.size >= D);
+
+        if ( coord.size() < D ) coord.resize(D);
+
+        this->image_to_world(&ind[0], &coord[0]);
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::image_to_world(coord_type x, coord_type& cx) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==1);
+        cx = this->axis_[0][0] * ( x * this->pixelSize_[0] ) + this->origin_[0];
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::image_to_world(coord_type x, coord_type y, coord_type& cx, coord_type& cy) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==2);
+
+        coord_type sx = x*this->pixelSize_[0];
+        coord_type sy = y*this->pixelSize_[1];
+
+        cx =    this->axis_[0][0] * sx 
+              + this->axis_[1][0] * sy 
+              + this->origin_[0];
+
+        cy =    this->axis_[0][1] * sx 
+              + this->axis_[1][1] * sy 
+              + this->origin_[1];
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::image_to_world(coord_type x, coord_type y, coord_type z, coord_type& cx, coord_type& cy, coord_type& cz) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==3);
+
+        coord_type sx = x*this->pixelSize_[0];
+        coord_type sy = y*this->pixelSize_[1];
+        coord_type sz = z*this->pixelSize_[2];
+
+        cx =    (this->axis_[0][0] * sx 
+              + this->axis_[1][0] * sy) 
+              + (this->axis_[2][0] * sz 
+              + this->origin_[0]);
+
+        cy =    (this->axis_[0][1] * sx 
+              + this->axis_[1][1] * sy) 
+              + (this->axis_[2][1] * sz 
+              + this->origin_[1]);
+
+        cz =    (this->axis_[0][2] * sx 
+              + this->axis_[1][2] * sy) 
+              + (this->axis_[2][2] * sz 
+              + this->origin_[2]);
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::image_to_world(coord_type x, coord_type y, coord_type z, coord_type s, coord_type& cx, coord_type& cy, coord_type& cz, coord_type& cs) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==4);
+
+        coord_type sx = x*this->pixelSize_[0];
+        coord_type sy = y*this->pixelSize_[1];
+        coord_type sz = z*this->pixelSize_[2];
+        coord_type ss = s*this->pixelSize_[3];
+
+        cx =    (this->axis_[0][0] * sx 
+              + this->axis_[1][0] * sy) 
+              + (this->axis_[2][0] * sz 
+              + this->axis_[3][0] * ss) 
+              + this->origin_[0];
+
+        cy =    (this->axis_[0][1] * sx 
+              + this->axis_[1][1] * sy) 
+              + (this->axis_[2][1] * sz 
+              + this->axis_[3][1] * ss) 
+              + this->origin_[1];
+
+        cz =    (this->axis_[0][2] * sx 
+              + this->axis_[1][2] * sy) 
+              + (this->axis_[2][2] * sz 
+              + this->axis_[3][2] * ss) 
+              + this->origin_[2];
+
+        cs =    (this->axis_[0][3] * sx 
+              + this->axis_[1][3] * sy) 
+              + (this->axis_[2][3] * sz 
+              + this->axis_[3][3] * ss) 
+              + this->origin_[3];
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::image_to_world(coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type& cx, coord_type& cy, coord_type& cz, coord_type& cs, coord_type& cp) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==5);
+
+        coord_type sx = x*this->pixelSize_[0];
+        coord_type sy = y*this->pixelSize_[1];
+        coord_type sz = z*this->pixelSize_[2];
+        coord_type ss = s*this->pixelSize_[3];
+        coord_type sp = p*this->pixelSize_[4];
+
+        cx =    (this->axis_[0][0] * sx 
+              + this->axis_[1][0] * sy) 
+              + (this->axis_[2][0] * sz 
+              + this->axis_[3][0] * ss) 
+              + (this->axis_[4][0] * sp 
+              + this->origin_[0]);
+
+        cy =    (this->axis_[0][1] * sx 
+              + this->axis_[1][1] * sy) 
+              + (this->axis_[2][1] * sz 
+              + this->axis_[3][1] * ss) 
+              + (this->axis_[4][1] * sp 
+              + this->origin_[1]);
+
+        cz =    (this->axis_[0][2] * sx 
+              + this->axis_[1][2] * sy) 
+              + (this->axis_[2][2] * sz 
+              + this->axis_[3][2] * ss) 
+              + (this->axis_[4][2] * sp 
+              + this->origin_[2]);
+
+        cs =    (this->axis_[0][3] * sx 
+              + this->axis_[1][3] * sy) 
+              + (this->axis_[2][3] * sz 
+              + this->axis_[3][3] * ss) 
+              + (this->axis_[4][3] * sp 
+              + this->origin_[3]);
+
+        cp =    (this->axis_[0][4] * sx 
+              + this->axis_[1][4] * sy) 
+              + (this->axis_[2][4] * sz 
+              + this->axis_[3][4] * ss) 
+              + (this->axis_[4][4] * sp 
+              + this->origin_[4]);
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::image_to_world(coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type r, coord_type& cx, coord_type& cy, coord_type& cz, coord_type& cs, coord_type& cp, coord_type& cr) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==6);
+
+        coord_type sx = x*this->pixelSize_[0];
+        coord_type sy = y*this->pixelSize_[1];
+        coord_type sz = z*this->pixelSize_[2];
+        coord_type ss = s*this->pixelSize_[3];
+        coord_type sp = p*this->pixelSize_[4];
+        coord_type sr = r*this->pixelSize_[5];
+
+        cx =    (this->axis_[0][0] * sx 
+              + this->axis_[1][0] * sy) 
+              + (this->axis_[2][0] * sz 
+              + this->axis_[3][0] * ss) 
+              + (this->axis_[4][0] * sp 
+              + this->axis_[5][0] * sr) 
+              + this->origin_[0];
+
+        cy =    (this->axis_[0][1] * sx 
+              + this->axis_[1][1] * sy) 
+              + (this->axis_[2][1] * sz 
+              + this->axis_[3][1] * ss) 
+              + (this->axis_[4][1] * sp 
+              + this->axis_[5][1] * sr) 
+              + this->origin_[1];
+
+        cz =    (this->axis_[0][2] * sx 
+              + this->axis_[1][2] * sy) 
+              + (this->axis_[2][2] * sz 
+              + this->axis_[3][2] * ss) 
+              + (this->axis_[4][2] * sp 
+              + this->axis_[5][2] * sr) 
+              + this->origin_[2];
+
+        cs =    (this->axis_[0][3] * sx 
+              + this->axis_[1][3] * sy) 
+              + (this->axis_[2][3] * sz 
+              + this->axis_[3][3] * ss) 
+              + (this->axis_[4][3] * sp 
+              + this->axis_[5][3] * sr) 
+              + this->origin_[3];
+
+        cp =    (this->axis_[0][4] * sx 
+              + this->axis_[1][4] * sy) 
+              + (this->axis_[2][4] * sz 
+              + this->axis_[3][4] * ss) 
+              + (this->axis_[4][4] * sp 
+              + this->axis_[5][4] * sr) 
+              + this->origin_[4];
+
+        cr =    (this->axis_[0][5] * sx 
+              + this->axis_[1][5] * sy) 
+              + (this->axis_[2][5] * sz 
+              + this->axis_[3][5] * ss) 
+              + (this->axis_[4][5] * sp 
+              + this->axis_[5][5] * sr) 
+              + this->origin_[5];
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::image_to_world(coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type r, coord_type a, coord_type& cx, coord_type& cy, coord_type& cz, coord_type& cs, coord_type& cp, coord_type& cr, coord_type& ca) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==7);
+
+        coord_type sx = x*this->pixelSize_[0];
+        coord_type sy = y*this->pixelSize_[1];
+        coord_type sz = z*this->pixelSize_[2];
+        coord_type ss = s*this->pixelSize_[3];
+        coord_type sp = p*this->pixelSize_[4];
+        coord_type sr = r*this->pixelSize_[5];
+        coord_type sa = a*this->pixelSize_[6];
+
+        cx =    (this->axis_[0][0] * sx 
+              + this->axis_[1][0] * sy) 
+              + (this->axis_[2][0] * sz 
+              + this->axis_[3][0] * ss) 
+              + (this->axis_[4][0] * sp 
+              + this->axis_[5][0] * sr) 
+              + (this->axis_[6][0] * sa 
+              + this->origin_[0]);
+
+        cy =    (this->axis_[0][1] * sx 
+              + this->axis_[1][1] * sy) 
+              + (this->axis_[2][1] * sz 
+              + this->axis_[3][1] * ss) 
+              + (this->axis_[4][1] * sp 
+              + this->axis_[5][1] * sr) 
+              + (this->axis_[6][1] * sa 
+              + this->origin_[1]);
+
+        cz =    (this->axis_[0][2] * sx 
+              + this->axis_[1][2] * sy) 
+              + (this->axis_[2][2] * sz 
+              + this->axis_[3][2] * ss) 
+              + (this->axis_[4][2] * sp 
+              + this->axis_[5][2] * sr) 
+              + (this->axis_[6][2] * sa 
+              + this->origin_[2]);
+
+        cs =    (this->axis_[0][3] * sx 
+              + this->axis_[1][3] * sy) 
+              + (this->axis_[2][3] * sz 
+              + this->axis_[3][3] * ss) 
+              + (this->axis_[4][3] * sp 
+              + this->axis_[5][3] * sr) 
+              + (this->axis_[6][3] * sa 
+              + this->origin_[3]);
+
+        cp =    (this->axis_[0][4] * sx 
+              + this->axis_[1][4] * sy) 
+              + (this->axis_[2][4] * sz 
+              + this->axis_[3][4] * ss) 
+              + (this->axis_[4][4] * sp 
+              + this->axis_[5][4] * sr) 
+              + (this->axis_[6][4] * sa 
+              + this->origin_[4]);
+
+        cr =    (this->axis_[0][5] * sx 
+              + this->axis_[1][5] * sy) 
+              + (this->axis_[2][5] * sz 
+              + this->axis_[3][5] * ss) 
+              + (this->axis_[4][5] * sp 
+              + this->axis_[5][5] * sr) 
+              + (this->axis_[6][5] * sa 
+              + this->origin_[5]);
+
+        ca =    (this->axis_[0][6] * sx 
+              + this->axis_[1][6] * sy) 
+              + (this->axis_[2][6] * sz 
+              + this->axis_[3][6] * ss) 
+              + (this->axis_[4][6] * sp 
+              + this->axis_[5][6] * sr) 
+              + (this->axis_[6][6] * sa 
+              + this->origin_[6]);
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::image_to_world(coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type r, coord_type a, coord_type q, coord_type& cx, coord_type& cy, coord_type& cz, coord_type& cs, coord_type& cp, coord_type& cr, coord_type& ca, coord_type& cq) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==8);
+
+        coord_type sx = x*this->pixelSize_[0];
+        coord_type sy = y*this->pixelSize_[1];
+        coord_type sz = z*this->pixelSize_[2];
+        coord_type ss = s*this->pixelSize_[3];
+        coord_type sp = p*this->pixelSize_[4];
+        coord_type sr = r*this->pixelSize_[5];
+        coord_type sa = a*this->pixelSize_[6];
+        coord_type sq = q*this->pixelSize_[7];
+
+        cx =    (this->axis_[0][0] * sx 
+              + this->axis_[1][0] * sy) 
+              + (this->axis_[2][0] * sz 
+              + this->axis_[3][0] * ss) 
+              + (this->axis_[4][0] * sp 
+              + this->axis_[5][0] * sr) 
+              + (this->axis_[6][0] * sa 
+              + this->axis_[7][0] * sq) 
+              + this->origin_[0];
+
+        cy =    (this->axis_[0][1] * sx 
+              + this->axis_[1][1] * sy) 
+              + (this->axis_[2][1] * sz 
+              + this->axis_[3][1] * ss) 
+              + (this->axis_[4][1] * sp 
+              + this->axis_[5][1] * sr) 
+              + (this->axis_[6][1] * sa 
+              + this->axis_[7][1] * sq) 
+              + this->origin_[1];
+
+        cz =    (this->axis_[0][2] * sx 
+              + this->axis_[1][2] * sy) 
+              + (this->axis_[2][2] * sz 
+              + this->axis_[3][2] * ss) 
+              + (this->axis_[4][2] * sp 
+              + this->axis_[5][2] * sr) 
+              + (this->axis_[6][2] * sa 
+              + this->axis_[7][2] * sq) 
+              + this->origin_[2];
+
+        cs =    (this->axis_[0][3] * sx 
+              + this->axis_[1][3] * sy) 
+              + (this->axis_[2][3] * sz 
+              + this->axis_[3][3] * ss) 
+              + (this->axis_[4][3] * sp 
+              + this->axis_[5][3] * sr) 
+              + (this->axis_[6][3] * sa 
+              + this->axis_[7][3] * sq) 
+              + this->origin_[3];
+
+        cp =    (this->axis_[0][4] * sx 
+              + this->axis_[1][4] * sy) 
+              + (this->axis_[2][4] * sz 
+              + this->axis_[3][4] * ss) 
+              + (this->axis_[4][4] * sp 
+              + this->axis_[5][4] * sr) 
+              + (this->axis_[6][4] * sa 
+              + this->axis_[7][4] * sq) 
+              + this->origin_[4];
+
+        cr =    (this->axis_[0][5] * sx 
+              + this->axis_[1][5] * sy) 
+              + (this->axis_[2][5] * sz 
+              + this->axis_[3][5] * ss) 
+              + (this->axis_[4][5] * sp 
+              + this->axis_[5][5] * sr) 
+              + (this->axis_[6][5] * sa 
+              + this->axis_[7][5] * sq) 
+              + this->origin_[5];
+
+        ca =    (this->axis_[0][6] * sx 
+              + this->axis_[1][6] * sy) 
+              + (this->axis_[2][6] * sz 
+              + this->axis_[3][6] * ss) 
+              + (this->axis_[4][6] * sp 
+              + this->axis_[5][6] * sr) 
+              + (this->axis_[6][6] * sa 
+              + this->axis_[7][6] * sq) 
+              + this->origin_[6];
+
+        cq =    (this->axis_[0][7] * sx 
+              + this->axis_[1][7] * sy) 
+              + (this->axis_[2][7] * sz 
+              + this->axis_[3][7] * ss) 
+              + (this->axis_[4][7] * sp 
+              + this->axis_[5][7] * sr) 
+              + (this->axis_[6][7] * sa 
+              + this->axis_[7][7] * sq) 
+              + this->origin_[7];
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::image_to_world(coord_type x, coord_type y, coord_type z, coord_type s, coord_type p, coord_type r, coord_type a, coord_type q, coord_type u, coord_type& cx, coord_type& cy, coord_type& cz, coord_type& cs, coord_type& cp, coord_type& cr, coord_type& ca, coord_type& cq, coord_type& cu) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==9);
+
+        coord_type sx = x*this->pixelSize_[0];
+        coord_type sy = y*this->pixelSize_[1];
+        coord_type sz = z*this->pixelSize_[2];
+        coord_type ss = s*this->pixelSize_[3];
+        coord_type sp = p*this->pixelSize_[4];
+        coord_type sr = r*this->pixelSize_[5];
+        coord_type sa = a*this->pixelSize_[6];
+        coord_type sq = q*this->pixelSize_[7];
+        coord_type su = u*this->pixelSize_[8];
+
+        cx =    (this->axis_[0][0] * sx 
+              + this->axis_[1][0] * sy) 
+              + (this->axis_[2][0] * sz 
+              + this->axis_[3][0] * ss) 
+              + (this->axis_[4][0] * sp 
+              + this->axis_[5][0] * sr) 
+              + (this->axis_[6][0] * sa 
+              + this->axis_[7][0] * sq) 
+              + (this->axis_[8][0] * su 
+              + this->origin_[0]);
+
+        cy =    (this->axis_[0][1] * sx 
+              + this->axis_[1][1] * sy) 
+              + (this->axis_[2][1] * sz 
+              + this->axis_[3][1] * ss) 
+              + (this->axis_[4][1] * sp 
+              + this->axis_[5][1] * sr) 
+              + (this->axis_[6][1] * sa 
+              + this->axis_[7][1] * sq) 
+              + (this->axis_[8][1] * su 
+              + this->origin_[1]);
+
+        cz =    (this->axis_[0][2] * sx 
+              + this->axis_[1][2] * sy) 
+              + (this->axis_[2][2] * sz 
+              + this->axis_[3][2] * ss) 
+              + (this->axis_[4][2] * sp 
+              + this->axis_[5][2] * sr) 
+              + (this->axis_[6][2] * sa 
+              + this->axis_[7][2] * sq) 
+              + (this->axis_[8][2] * su 
+              + this->origin_[2]);
+
+        cs =    (this->axis_[0][3] * sx 
+              + this->axis_[1][3] * sy) 
+              + (this->axis_[2][3] * sz 
+              + this->axis_[3][3] * ss) 
+              + (this->axis_[4][3] * sp 
+              + this->axis_[5][3] * sr) 
+              + (this->axis_[6][3] * sa 
+              + this->axis_[7][3] * sq) 
+              + (this->axis_[8][3] * su 
+              + this->origin_[3]);
+
+        cp =    (this->axis_[0][4] * sx 
+              + this->axis_[1][4] * sy) 
+              + (this->axis_[2][4] * sz 
+              + this->axis_[3][4] * ss) 
+              + (this->axis_[4][4] * sp 
+              + this->axis_[5][4] * sr) 
+              + (this->axis_[6][4] * sa 
+              + this->axis_[7][4] * sq) 
+              + (this->axis_[8][4] * su 
+              + this->origin_[4]);
+
+        cr =    (this->axis_[0][5] * sx 
+              + this->axis_[1][5] * sy) 
+              + (this->axis_[2][5] * sz 
+              + this->axis_[3][5] * ss) 
+              + (this->axis_[4][5] * sp 
+              + this->axis_[5][5] * sr) 
+              + (this->axis_[6][5] * sa 
+              + this->axis_[7][5] * sq) 
+              + (this->axis_[8][5] * su 
+              + this->origin_[5]);
+
+        ca =    (this->axis_[0][6] * sx 
+              + this->axis_[1][6] * sy) 
+              + (this->axis_[2][6] * sz 
+              + this->axis_[3][6] * ss) 
+              + (this->axis_[4][6] * sp 
+              + this->axis_[5][6] * sr) 
+              + (this->axis_[6][6] * sa 
+              + this->axis_[7][6] * sq) 
+              + (this->axis_[8][6] * su 
+              + this->origin_[6]);
+
+        cq =    (this->axis_[0][7] * sx 
+              + this->axis_[1][7] * sy) 
+              + (this->axis_[2][7] * sz 
+              + this->axis_[3][7] * ss) 
+              + (this->axis_[4][7] * sp 
+              + this->axis_[5][7] * sr) 
+              + (this->axis_[6][7] * sa 
+              + this->axis_[7][7] * sq) 
+              + (this->axis_[8][7] * su 
+              + this->origin_[7]);
+
+        cu =    (this->axis_[0][8] * sx 
+              + this->axis_[1][8] * sy) 
+              + (this->axis_[2][8] * sz 
+              + this->axis_[3][8] * ss) 
+              + (this->axis_[4][8] * sp 
+              + this->axis_[5][8] * sr) 
+              + (this->axis_[6][8] * sa 
+              + this->axis_[7][8] * sq) 
+              + (this->axis_[8][8] * su 
+              + this->origin_[8]);
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::image_to_world(const size_t* ind, coord_type* coord) const
+    {
+        unsigned int ii, jj;
+        for(ii=0; ii<D; ii++)
+        {
+            coord[ii] = 0;
+
+            for(jj=0; jj<D; jj++)
+            {
+                coord[ii] += this->axis_[jj][ii] * ( ind[jj] * this->pixelSize_[jj] );
+            }
+
+            coord[ii] += this->origin_[ii];
+        }
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::image_to_world(const std::vector<size_t>& ind, std::vector<coord_type>& coord) const
+    {
+        GADGET_DEBUG_CHECK_THROW(ind.size >= D);
+
+        if ( coord.size() < D ) coord.resize(D);
+
+        this->image_to_world(&ind[0], &coord[0]);
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::image_to_world(size_t x, coord_type& cx) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==1);
+        cx = this->axis_[0][0] * ( x * this->pixelSize_[0] ) + this->origin_[0];
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::image_to_world(size_t x, size_t y, coord_type& cx, coord_type& cy) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==2);
+
+        coord_type sx = x*this->pixelSize_[0];
+        coord_type sy = y*this->pixelSize_[1];
+
+        cx =    this->axis_[0][0] * sx 
+              + this->axis_[1][0] * sy 
+              + this->origin_[0];
+
+        cy =    this->axis_[0][1] * sx 
+              + this->axis_[1][1] * sy 
+              + this->origin_[1];
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::image_to_world(size_t x, size_t y, size_t z, coord_type& cx, coord_type& cy, coord_type& cz) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==3);
+
+        coord_type sx = x*this->pixelSize_[0];
+        coord_type sy = y*this->pixelSize_[1];
+        coord_type sz = z*this->pixelSize_[2];
+
+        cx =    (this->axis_[0][0] * sx 
+              + this->axis_[1][0] * sy) 
+              + (this->axis_[2][0] * sz 
+              + this->origin_[0]);
+
+        cy =    (this->axis_[0][1] * sx 
+              + this->axis_[1][1] * sy) 
+              + (this->axis_[2][1] * sz 
+              + this->origin_[1]);
+
+        cz =    (this->axis_[0][2] * sx 
+              + this->axis_[1][2] * sy) 
+              + (this->axis_[2][2] * sz 
+              + this->origin_[2]);
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::image_to_world(size_t x, size_t y, size_t z, size_t s, coord_type& cx, coord_type& cy, coord_type& cz, coord_type& cs) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==4);
+
+        coord_type sx = x*this->pixelSize_[0];
+        coord_type sy = y*this->pixelSize_[1];
+        coord_type sz = z*this->pixelSize_[2];
+        coord_type ss = s*this->pixelSize_[3];
+
+        cx =    (this->axis_[0][0] * sx 
+              + this->axis_[1][0] * sy) 
+              + (this->axis_[2][0] * sz 
+              + this->axis_[3][0] * ss) 
+              + this->origin_[0];
+
+        cy =    (this->axis_[0][1] * sx 
+              + this->axis_[1][1] * sy) 
+              + (this->axis_[2][1] * sz 
+              + this->axis_[3][1] * ss) 
+              + this->origin_[1];
+
+        cz =    (this->axis_[0][2] * sx 
+              + this->axis_[1][2] * sy) 
+              + (this->axis_[2][2] * sz 
+              + this->axis_[3][2] * ss) 
+              + this->origin_[2];
+
+        cs =    (this->axis_[0][3] * sx 
+              + this->axis_[1][3] * sy) 
+              + (this->axis_[2][3] * sz 
+              + this->axis_[3][3] * ss) 
+              + this->origin_[3];
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::image_to_world(size_t x, size_t y, size_t z, size_t s, size_t p,
+                            coord_type& cx, coord_type& cy, coord_type& cz, coord_type& cs, coord_type& cp) const
+    {
+        this->image_to_world(static_cast<coord_type>(x), static_cast<coord_type>(y), static_cast<coord_type>(z), static_cast<coord_type>(s), static_cast<coord_type>(p), cx, cy, cz, cs, cp);
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::image_to_world(size_t x, size_t y, size_t z, size_t s, size_t p, size_t r,
+                        coord_type& cx, coord_type& cy, coord_type& cz, coord_type& cs, coord_type& cp, coord_type& cr) const
+    {
+        this->image_to_world(static_cast<coord_type>(x), static_cast<coord_type>(y), static_cast<coord_type>(z), static_cast<coord_type>(s), static_cast<coord_type>(p), static_cast<coord_type>(r), cx, cy, cz, cs, cp, cr);
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::image_to_world(size_t x, size_t y, size_t z, size_t s, size_t p, size_t r, size_t a,
+                        coord_type& cx, coord_type& cy, coord_type& cz, coord_type& cs, coord_type& cp, coord_type& cr, coord_type& ca) const
+    {
+        this->image_to_world(static_cast<coord_type>(x), static_cast<coord_type>(y), static_cast<coord_type>(z), static_cast<coord_type>(s), static_cast<coord_type>(p), static_cast<coord_type>(r), static_cast<coord_type>(a), cx, cy, cz, cs, cp, cr, ca);
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::image_to_world(size_t x, size_t y, size_t z, size_t s, size_t p, size_t r, size_t a, size_t q,
+                        coord_type& cx, coord_type& cy, coord_type& cz, coord_type& cs, coord_type& cp, coord_type& cr, coord_type& ca, coord_type& cq) const
+    {
+        this->image_to_world(static_cast<coord_type>(x), static_cast<coord_type>(y), static_cast<coord_type>(z), static_cast<coord_type>(s), static_cast<coord_type>(p), static_cast<coord_type>(r), static_cast<coord_type>(a), static_cast<coord_type>(q), cx, cy, cz, cs, cp, cr, ca, cq);
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::image_to_world(size_t x, size_t y, size_t z, size_t s, size_t p, size_t r, size_t a, size_t q, size_t u,
+                        coord_type& cx, coord_type& cy, coord_type& cz, coord_type& cs, coord_type& cp, coord_type& cr, coord_type& ca, coord_type& cq, coord_type& cu) const
+    {
+        this->image_to_world(static_cast<coord_type>(x), static_cast<coord_type>(y), static_cast<coord_type>(z), static_cast<coord_type>(s), static_cast<coord_type>(p), static_cast<coord_type>(r), static_cast<coord_type>(a), static_cast<coord_type>(q), static_cast<coord_type>(u), cx, cy, cz, cs, cp, cr, ca, cq, cu);
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::image_to_world_matrix(hoMatrix<coord_type>& image2world) const
+    {
+        // image to world matrix = tranlation * rotation * pixelSize_Scaling
+        image2world.createMatrix(D+1, D+1);
+
+        // rotation matrix
+        hoMatrix<coord_type> rotation(D+1, D+1);
+        rotation.setIdentity();
+
+        unsigned int ii, jj;
+        for ( jj=0; jj<D; jj++ )
+        {
+            for ( ii=0; ii<D; ii++ )
+            {
+                rotation(ii, jj) = this->axis_[jj][ii];
+            }
+        }
+
+        // pixel scaling matrix
+        hoMatrix<coord_type> scaling(D+1, D+1);
+        scaling.setIdentity();
+        for ( ii=0; ii<D; ii++ )
+        {
+            scaling(ii, ii) = this->pixelSize_[ii];
+        }
+
+        // translation matrix
+        hoMatrix<coord_type> translation(D+1, D+1);
+        translation.setIdentity();
+        for ( ii=0; ii<D; ii++ )
+        {
+            translation(ii, D) = this->origin_[ii];
+        }
+        Gadgetron::GeneralMatrixProduct(image2world, rotation, false, scaling, false);
+        Gadgetron::GeneralMatrixProduct(rotation, translation, false, image2world, false);
+        image2world = rotation;
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::set_image_to_world_matrix(const hoMatrix<coord_type>& image2world)
+    {
+        GADGET_DEBUG_CHECK_THROW(D+1==image2world.rows());
+        GADGET_DEBUG_CHECK_THROW(D+1==image2world.cols);
+
+        // origin
+        hoMatrix<coord_type> pt(D+1, 1);
+        pt(D, 0) = 1.0;
+
+        hoMatrix<coord_type> res(D+1, 1);
+
+        Gadgetron::GeneralMatrixProduct(res, image2world, false, pt, false);
+        memcpy(this->origin_, res.begin(), sizeof(coord_type)*D);
+
+        // rotation
+        unsigned int ii, jj;
+        for ( ii=0; ii<D; ii++ )
+        {
+            memset(pt.get_data_ptr(), 0, sizeof(coord_type)*(D+1));
+            pt(D, 0) = 1.0;
+            pt(ii, 0) = 1.0;
+
+            Gadgetron::GeneralMatrixProduct(res, image2world, false, pt, false);
+            for ( jj=0; jj<D; jj++ )
+            {
+                this->axis_[ii][jj] = res(jj, 0) - this->origin_[jj];
+            }
+
+            this->pixelSize_[ii] = this->axis_[ii].abs();
+            this->axis_[ii].normalize();
+        }
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::world_to_image(const coord_type* coord, coord_type* ind) const
+    {
+        unsigned int ii, jj;
+        for(ii=0; ii<D; ii++)
+        {
+            ind[ii] = 0;
+            for(jj=0; jj<D; jj++)
+            {
+                ind[ii] += this->axis_[ii][jj] * ( coord[jj] - this->origin_[jj] );
+            }
+
+            ind[ii] *= this->pixelSize_reciprocal_[ii];
+        }
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::world_to_image(const std::vector<coord_type>& coord, std::vector<coord_type>& ind) const
+    {
+        GADGET_DEBUG_CHECK_THROW(coord.size()>=D);
+
+        if ( ind.size() < D ) ind.resize(D);
+
+        this->world_to_image(&coord[0], &ind[0]);
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::world_to_image(coord_type cx, coord_type& x) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==1);
+        x = this->pixelSize_reciprocal_[0] * this->axis_[0][0] * ( cx - this->origin_[0] );
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::world_to_image(coord_type cx, coord_type cy, coord_type& x, coord_type& y) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==2);
+
+        coord_type sx = cx - this->origin_[0];
+        coord_type sy = cy - this->origin_[1];
+
+        x = this->pixelSize_reciprocal_[0] * (this->axis_[0][0]*sx + this->axis_[0][1]*sy);
+        y = this->pixelSize_reciprocal_[1] * (this->axis_[1][0]*sx + this->axis_[1][1]*sy);
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::world_to_image(coord_type cx, coord_type cy, coord_type cz, coord_type& x, coord_type& y, coord_type& z) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==3);
+
+        coord_type sx = cx - this->origin_[0];
+        coord_type sy = cy - this->origin_[1];
+        coord_type sz = cz - this->origin_[2];
+
+        x = this->pixelSize_reciprocal_[0] * (this->axis_[0][0]*sx + this->axis_[0][1]*sy + this->axis_[0][2]*sz);
+        y = this->pixelSize_reciprocal_[1] * (this->axis_[1][0]*sx + this->axis_[1][1]*sy + this->axis_[1][2]*sz);
+        z = this->pixelSize_reciprocal_[2] * (this->axis_[2][0]*sx + this->axis_[2][1]*sy + this->axis_[2][2]*sz);
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::world_to_image(coord_type cx, coord_type cy, coord_type cz, coord_type cs, coord_type& x, coord_type& y, coord_type& z, coord_type& s) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==4);
+
+        coord_type sx = cx - this->origin_[0];
+        coord_type sy = cy - this->origin_[1];
+        coord_type sz = cz - this->origin_[2];
+        coord_type ss = cs - this->origin_[3];
+
+        x = this->pixelSize_reciprocal_[0] * ((this->axis_[0][0]*sx + this->axis_[0][1]*sy) + (this->axis_[0][2]*sz + this->axis_[0][3]*ss));
+        y = this->pixelSize_reciprocal_[1] * ((this->axis_[1][0]*sx + this->axis_[1][1]*sy) + (this->axis_[1][2]*sz + this->axis_[1][3]*ss));
+        z = this->pixelSize_reciprocal_[2] * ((this->axis_[2][0]*sx + this->axis_[2][1]*sy) + (this->axis_[2][2]*sz + this->axis_[2][3]*ss));
+        s = this->pixelSize_reciprocal_[3] * ((this->axis_[3][0]*sx + this->axis_[3][1]*sy) + (this->axis_[3][2]*sz + this->axis_[3][3]*ss));
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::world_to_image(coord_type cx, coord_type cy, coord_type cz, coord_type cs, coord_type cp, coord_type& x, coord_type& y, coord_type& z, coord_type& s, coord_type& p) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==5);
+
+        coord_type sx = cx - this->origin_[0];
+        coord_type sy = cy - this->origin_[1];
+        coord_type sz = cz - this->origin_[2];
+        coord_type ss = cs - this->origin_[3];
+        coord_type sp = cp - this->origin_[4];
+
+        x = this->pixelSize_reciprocal_[0] * ((this->axis_[0][0]*sx + this->axis_[0][1]*sy) + (this->axis_[0][2]*sz + this->axis_[0][3]*ss) + this->axis_[0][4]*sp);
+        y = this->pixelSize_reciprocal_[1] * ((this->axis_[1][0]*sx + this->axis_[1][1]*sy) + (this->axis_[1][2]*sz + this->axis_[1][3]*ss) + this->axis_[1][4]*sp);
+        z = this->pixelSize_reciprocal_[2] * ((this->axis_[2][0]*sx + this->axis_[2][1]*sy) + (this->axis_[2][2]*sz + this->axis_[2][3]*ss) + this->axis_[2][4]*sp);
+        s = this->pixelSize_reciprocal_[3] * ((this->axis_[3][0]*sx + this->axis_[3][1]*sy) + (this->axis_[3][2]*sz + this->axis_[3][3]*ss) + this->axis_[3][4]*sp);
+        p = this->pixelSize_reciprocal_[4] * ((this->axis_[4][0]*sx + this->axis_[4][1]*sy) + (this->axis_[4][2]*sz + this->axis_[4][3]*ss) + this->axis_[4][4]*sp);
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::world_to_image(coord_type cx, coord_type cy, coord_type cz, coord_type cs, coord_type cp, coord_type cr, coord_type& x, coord_type& y, coord_type& z, coord_type& s, coord_type& p, coord_type& r) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==6);
+
+        coord_type sx = cx - this->origin_[0];
+        coord_type sy = cy - this->origin_[1];
+        coord_type sz = cz - this->origin_[2];
+        coord_type ss = cs - this->origin_[3];
+        coord_type sp = cp - this->origin_[4];
+        coord_type sr = cr - this->origin_[5];
+
+        x = this->pixelSize_reciprocal_[0] * ((this->axis_[0][0]*sx + this->axis_[0][1]*sy) + (this->axis_[0][2]*sz + this->axis_[0][3]*ss) + (this->axis_[0][4]*sp + this->axis_[0][5]*sr));
+        y = this->pixelSize_reciprocal_[1] * ((this->axis_[1][0]*sx + this->axis_[1][1]*sy) + (this->axis_[1][2]*sz + this->axis_[1][3]*ss) + (this->axis_[1][4]*sp + this->axis_[1][5]*sr));
+        z = this->pixelSize_reciprocal_[2] * ((this->axis_[2][0]*sx + this->axis_[2][1]*sy) + (this->axis_[2][2]*sz + this->axis_[2][3]*ss) + (this->axis_[2][4]*sp + this->axis_[2][5]*sr));
+        s = this->pixelSize_reciprocal_[3] * ((this->axis_[3][0]*sx + this->axis_[3][1]*sy) + (this->axis_[3][2]*sz + this->axis_[3][3]*ss) + (this->axis_[3][4]*sp + this->axis_[3][5]*sr));
+        p = this->pixelSize_reciprocal_[4] * ((this->axis_[4][0]*sx + this->axis_[4][1]*sy) + (this->axis_[4][2]*sz + this->axis_[4][3]*ss) + (this->axis_[4][4]*sp + this->axis_[4][5]*sr));
+        r = this->pixelSize_reciprocal_[5] * ((this->axis_[5][0]*sx + this->axis_[5][1]*sy) + (this->axis_[5][2]*sz + this->axis_[5][3]*ss) + (this->axis_[5][4]*sp + this->axis_[5][5]*sr));
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::world_to_image(coord_type cx, coord_type cy, coord_type cz, coord_type cs, coord_type cp, coord_type cr, coord_type ca, coord_type& x, coord_type& y, coord_type& z, coord_type& s, coord_type& p, coord_type& r, coord_type& a) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==7);
+
+        coord_type sx = cx - this->origin_[0];
+        coord_type sy = cy - this->origin_[1];
+        coord_type sz = cz - this->origin_[2];
+        coord_type ss = cs - this->origin_[3];
+        coord_type sp = cp - this->origin_[4];
+        coord_type sr = cr - this->origin_[5];
+        coord_type sa = ca - this->origin_[6];
+
+        x = this->pixelSize_reciprocal_[0] * ((this->axis_[0][0]*sx + this->axis_[0][1]*sy) + (this->axis_[0][2]*sz + this->axis_[0][3]*ss) + (this->axis_[0][4]*sp + this->axis_[0][5]*sr) + this->axis_[0][6]*sa);
+        y = this->pixelSize_reciprocal_[1] * ((this->axis_[1][0]*sx + this->axis_[1][1]*sy) + (this->axis_[1][2]*sz + this->axis_[1][3]*ss) + (this->axis_[1][4]*sp + this->axis_[1][5]*sr) + this->axis_[1][6]*sa);
+        z = this->pixelSize_reciprocal_[2] * ((this->axis_[2][0]*sx + this->axis_[2][1]*sy) + (this->axis_[2][2]*sz + this->axis_[2][3]*ss) + (this->axis_[2][4]*sp + this->axis_[2][5]*sr) + this->axis_[2][6]*sa);
+        s = this->pixelSize_reciprocal_[3] * ((this->axis_[3][0]*sx + this->axis_[3][1]*sy) + (this->axis_[3][2]*sz + this->axis_[3][3]*ss) + (this->axis_[3][4]*sp + this->axis_[3][5]*sr) + this->axis_[3][6]*sa);
+        p = this->pixelSize_reciprocal_[4] * ((this->axis_[4][0]*sx + this->axis_[4][1]*sy) + (this->axis_[4][2]*sz + this->axis_[4][3]*ss) + (this->axis_[4][4]*sp + this->axis_[4][5]*sr) + this->axis_[4][6]*sa);
+        r = this->pixelSize_reciprocal_[5] * ((this->axis_[5][0]*sx + this->axis_[5][1]*sy) + (this->axis_[5][2]*sz + this->axis_[5][3]*ss) + (this->axis_[5][4]*sp + this->axis_[5][5]*sr) + this->axis_[5][6]*sa);
+        a = this->pixelSize_reciprocal_[6] * ((this->axis_[6][0]*sx + this->axis_[6][1]*sy) + (this->axis_[6][2]*sz + this->axis_[6][3]*ss) + (this->axis_[6][4]*sp + this->axis_[6][5]*sr) + this->axis_[6][6]*sa);
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::world_to_image(coord_type cx, coord_type cy, coord_type cz, coord_type cs, coord_type cp, coord_type cr, coord_type ca, coord_type cq, coord_type& x, coord_type& y, coord_type& z, coord_type& s, coord_type& p, coord_type& r, coord_type& a, coord_type& q) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==8);
+
+        coord_type sx = cx - this->origin_[0];
+        coord_type sy = cy - this->origin_[1];
+        coord_type sz = cz - this->origin_[2];
+        coord_type ss = cs - this->origin_[3];
+        coord_type sp = cp - this->origin_[4];
+        coord_type sr = cr - this->origin_[5];
+        coord_type sa = ca - this->origin_[6];
+        coord_type sq = cq - this->origin_[7];
+
+        x = this->pixelSize_reciprocal_[0] * ((this->axis_[0][0]*sx + this->axis_[0][1]*sy) + (this->axis_[0][2]*sz + this->axis_[0][3]*ss) + (this->axis_[0][4]*sp + this->axis_[0][5]*sr) + (this->axis_[0][6]*sa + this->axis_[0][7]*sq));
+        y = this->pixelSize_reciprocal_[1] * ((this->axis_[1][0]*sx + this->axis_[1][1]*sy) + (this->axis_[1][2]*sz + this->axis_[1][3]*ss) + (this->axis_[1][4]*sp + this->axis_[1][5]*sr) + (this->axis_[1][6]*sa + this->axis_[1][7]*sq));
+        z = this->pixelSize_reciprocal_[2] * ((this->axis_[2][0]*sx + this->axis_[2][1]*sy) + (this->axis_[2][2]*sz + this->axis_[2][3]*ss) + (this->axis_[2][4]*sp + this->axis_[2][5]*sr) + (this->axis_[2][6]*sa + this->axis_[2][7]*sq));
+        s = this->pixelSize_reciprocal_[3] * ((this->axis_[3][0]*sx + this->axis_[3][1]*sy) + (this->axis_[3][2]*sz + this->axis_[3][3]*ss) + (this->axis_[3][4]*sp + this->axis_[3][5]*sr) + (this->axis_[3][6]*sa + this->axis_[3][7]*sq));
+        p = this->pixelSize_reciprocal_[4] * ((this->axis_[4][0]*sx + this->axis_[4][1]*sy) + (this->axis_[4][2]*sz + this->axis_[4][3]*ss) + (this->axis_[4][4]*sp + this->axis_[4][5]*sr) + (this->axis_[4][6]*sa + this->axis_[4][7]*sq));
+        r = this->pixelSize_reciprocal_[5] * ((this->axis_[5][0]*sx + this->axis_[5][1]*sy) + (this->axis_[5][2]*sz + this->axis_[5][3]*ss) + (this->axis_[5][4]*sp + this->axis_[5][5]*sr) + (this->axis_[5][6]*sa + this->axis_[5][7]*sq));
+        a = this->pixelSize_reciprocal_[6] * ((this->axis_[6][0]*sx + this->axis_[6][1]*sy) + (this->axis_[6][2]*sz + this->axis_[6][3]*ss) + (this->axis_[6][4]*sp + this->axis_[6][5]*sr) + (this->axis_[6][6]*sa + this->axis_[6][7]*sq));
+        q = this->pixelSize_reciprocal_[7] * ((this->axis_[7][0]*sx + this->axis_[7][1]*sy) + (this->axis_[7][2]*sz + this->axis_[7][3]*ss) + (this->axis_[7][4]*sp + this->axis_[7][5]*sr) + (this->axis_[7][6]*sa + this->axis_[7][7]*sq));
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::world_to_image(coord_type cx, coord_type cy, coord_type cz, coord_type cs, coord_type cp, coord_type cr, coord_type ca, coord_type cq, coord_type cu, coord_type& x, coord_type& y, coord_type& z, coord_type& s, coord_type& p, coord_type& r, coord_type& a, coord_type& q, coord_type& u) const
+    {
+        GADGET_DEBUG_CHECK_THROW(D==9);
+
+        coord_type sx = cx - this->origin_[0];
+        coord_type sy = cy - this->origin_[1];
+        coord_type sz = cz - this->origin_[2];
+        coord_type ss = cs - this->origin_[3];
+        coord_type sp = cp - this->origin_[4];
+        coord_type sr = cr - this->origin_[5];
+        coord_type sa = ca - this->origin_[6];
+        coord_type sq = cq - this->origin_[7];
+        coord_type su = cu - this->origin_[8];
+
+        x = this->pixelSize_reciprocal_[0] * ((this->axis_[0][0]*sx + this->axis_[0][1]*sy) + (this->axis_[0][2]*sz + this->axis_[0][3]*ss) + (this->axis_[0][4]*sp + this->axis_[0][5]*sr) + (this->axis_[0][6]*sa + this->axis_[0][7]*sq) + this->axis_[0][8]*su);
+        y = this->pixelSize_reciprocal_[1] * ((this->axis_[1][0]*sx + this->axis_[1][1]*sy) + (this->axis_[1][2]*sz + this->axis_[1][3]*ss) + (this->axis_[1][4]*sp + this->axis_[1][5]*sr) + (this->axis_[1][6]*sa + this->axis_[1][7]*sq) + this->axis_[1][8]*su);
+        z = this->pixelSize_reciprocal_[2] * ((this->axis_[2][0]*sx + this->axis_[2][1]*sy) + (this->axis_[2][2]*sz + this->axis_[2][3]*ss) + (this->axis_[2][4]*sp + this->axis_[2][5]*sr) + (this->axis_[2][6]*sa + this->axis_[2][7]*sq) + this->axis_[2][8]*su);
+        s = this->pixelSize_reciprocal_[3] * ((this->axis_[3][0]*sx + this->axis_[3][1]*sy) + (this->axis_[3][2]*sz + this->axis_[3][3]*ss) + (this->axis_[3][4]*sp + this->axis_[3][5]*sr) + (this->axis_[3][6]*sa + this->axis_[3][7]*sq) + this->axis_[3][8]*su);
+        p = this->pixelSize_reciprocal_[4] * ((this->axis_[4][0]*sx + this->axis_[4][1]*sy) + (this->axis_[4][2]*sz + this->axis_[4][3]*ss) + (this->axis_[4][4]*sp + this->axis_[4][5]*sr) + (this->axis_[4][6]*sa + this->axis_[4][7]*sq) + this->axis_[4][8]*su);
+        r = this->pixelSize_reciprocal_[5] * ((this->axis_[5][0]*sx + this->axis_[5][1]*sy) + (this->axis_[5][2]*sz + this->axis_[5][3]*ss) + (this->axis_[5][4]*sp + this->axis_[5][5]*sr) + (this->axis_[5][6]*sa + this->axis_[5][7]*sq) + this->axis_[5][8]*su);
+        a = this->pixelSize_reciprocal_[6] * ((this->axis_[6][0]*sx + this->axis_[6][1]*sy) + (this->axis_[6][2]*sz + this->axis_[6][3]*ss) + (this->axis_[6][4]*sp + this->axis_[6][5]*sr) + (this->axis_[6][6]*sa + this->axis_[6][7]*sq) + this->axis_[6][8]*su);
+        q = this->pixelSize_reciprocal_[7] * ((this->axis_[7][0]*sx + this->axis_[7][1]*sy) + (this->axis_[7][2]*sz + this->axis_[7][3]*ss) + (this->axis_[7][4]*sp + this->axis_[7][5]*sr) + (this->axis_[7][6]*sa + this->axis_[7][7]*sq) + this->axis_[7][8]*su);
+        u = this->pixelSize_reciprocal_[8] * ((this->axis_[8][0]*sx + this->axis_[8][1]*sy) + (this->axis_[8][2]*sz + this->axis_[8][3]*ss) + (this->axis_[8][4]*sp + this->axis_[8][5]*sr) + (this->axis_[8][6]*sa + this->axis_[8][7]*sq) + this->axis_[8][8]*su);
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::world_to_image_matrix(hoMatrix<coord_type>& world2image) const
+    {
+        // world to image matrix = inv(pixelSize_Scaling) * inv(rotation) * inv(tranlation)
+        world2image.createMatrix(D+1, D+1);
+
+        // rotation matrix
+        hoMatrix<coord_type> rotation(D+1, D+1);
+        rotation.setIdentity();
+
+        unsigned int ii, jj;
+        for ( jj=0; jj<D; jj++ )
+        {
+            for ( ii=0; ii<D; ii++ )
+            {
+                rotation(jj, ii) = this->axis_[jj][ii];
+            }
+        }
+
+        // pixel scaling matrix
+        hoMatrix<coord_type> scaling(D+1, D+1);
+        scaling.setIdentity();
+        for ( ii=0; ii<D; ii++ )
+        {
+            scaling(ii, ii) = this->pixelSize_reciprocal_[ii];
+        }
+
+        // translation matrix
+        hoMatrix<coord_type> translation(D+1, D+1);
+        translation.setIdentity();
+        for ( ii=0; ii<D; ii++ )
+        {
+            translation(ii, D) = -this->origin_[ii];
+        }
+
+        Gadgetron::GeneralMatrixProduct(world2image, rotation, false, translation, false);
+        Gadgetron::GeneralMatrixProduct(rotation, scaling, false, world2image, false);
+
+        world2image = rotation;
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::set_world_to_image_matrix(const hoMatrix<coord_type>& world2image)
+    {
+        GADGET_THROW("This function is not implemented ... ");
+    }
+
+    template <typename T, unsigned int D> 
+    inline bool hoNDImage<T, D>::in_image_region(const std::vector<size_t>& start, std::vector<size_t>& size)
+    {
+        GADGET_DEBUG_CHECK_THROW(start.size()>=D);
+        GADGET_DEBUG_CHECK_THROW(size.size()>=D);
+
+        if ( !this->point_in_range(start) ) return false;
+
+        std::vector<size_t> end(D);
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            end[ii] = start[ii]+size[ii]-1;
+        }
+
+        if ( !this->point_in_range(end) ) return false;
+
+        return true;
+    }
+
+    template <typename T, unsigned int D> 
+    void hoNDImage<T, D>::get_sub_image(const std::vector<size_t>& start, std::vector<size_t>& size, Self& out)
+    {
+        GADGET_DEBUG_CHECK_THROW(start.size()>=D);
+        GADGET_DEBUG_CHECK_THROW(size.size()>=D);
+
+        if ( !this->in_image_region(start, size) )
+        {
+        	GADGET_WARN_MSG("Sub-image regin is not in the image ... ");
+            return;
+        }
+
+        out.create(size);
+
+        memcpy(out.pixelSize_, this->pixelSize_, sizeof(coord_type)*D);
+        memcpy(out.pixelSize_reciprocal_, this->pixelSize_reciprocal_, sizeof(coord_type)*D);
+
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            out.axis_[ii] = this->axis_[ii];
+        }
+
+        size_t N = out.get_number_of_elements() / size[0];
+
+        long long t;
+
+        #pragma omp parallel default(none) private(t) shared(N, size, out)
+        {
+            std::vector<size_t> indOut(D), ind(D);
+
+            #pragma omp for
+            for ( t=0; t<N; t++ )
+            {
+                out->calculate_index(t*size[0], indOut);
+
+                unsigned int ii;
+                for ( ii=0; ii<D; ii++ )
+                {
+                    ind[ii] = indOut[ii]+start[ii];
+                }
+
+                size_t offset = this->calculate_offset(ind);
+
+                memcpy(out.begin()+t*size[0], this->data_+offset, sizeof(T)*size[0]);
+            }
+        }
+
+        std::vector<coord_type> origin_out(D);
+        this->image_to_world(start, origin_out);
+
+        memcpy(out.origin_, &origin_out[0], sizeof(coord_type)*D);
+    }
+
+    template <typename T, unsigned int D> 
+    bool hoNDImage<T, D>::serializeImage(char*& buf, size_t& len) const 
+    {
+        try
+        {
+            if ( buf != NULL ) delete[] buf;
+
+            // number of dimensions + dimension vector + pixel size + origin + axis + contents
+            len = sizeof(unsigned int) + sizeof(size_t)*D 
+                + sizeof(coord_type)*D + sizeof(coord_type)*D + sizeof(coord_type)*D*D 
+                + sizeof(T)*this->elements_;
+
+            buf = new char[len];
+            GADGET_CHECK_RETURN_FALSE(buf!=NULL);
+
+            unsigned int NDim=D;
+
+            size_t offset = 0;
+            memcpy(buf, &NDim, sizeof(unsigned int));
+            offset += sizeof(unsigned int);
+
+            if ( NDim > 0 )
+            {
+                memcpy(buf+offset, &((*dimensions_)[0]), sizeof(size_t)*D);
+                offset += sizeof(size_t)*D;
+
+                memcpy(buf+offset, this->pixelSize_, sizeof(coord_type)*D);
+                offset += sizeof(coord_type)*D;
+
+                memcpy(buf+offset, this->origin_, sizeof(coord_type)*D);
+                offset += sizeof(coord_type)*D;
+
+                unsigned int ii;
+                for ( ii=0; ii<D; ii++ )
+                {
+                    memcpy(buf+offset, this->axis_[ii].begin(), sizeof(coord_type)*D);
+                    offset += sizeof(coord_type)*D;
+                }
+
+                memcpy(buf+offset, this->data_, sizeof(T)*elements_);
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoNDImage<T, D>::serializeImage(...) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename T, unsigned int D> 
+    bool hoNDImage<T, D>::deserializeImage(char* buf, size_t& len)
+    {
+        try
+        {
+            unsigned int NDim;
+            memcpy(&NDim, buf, sizeof(unsigned int));
+            if ( NDim != D )
+            {
+                GADGET_ERROR_MSG("hoNDImage<T, D>::deserialize(...) : number of image dimensions does not match ... ");
+                return false;
+            }
+
+            size_t offset = sizeof(unsigned int);
+
+            unsigned int ii;
+
+            if ( NDim > 0 )
+            {
+                std::vector<size_t> dimensions(NDim);
+
+                memcpy(&dimensions[0], buf+offset, sizeof(size_t)*D);
+                offset += sizeof(size_t)*D;
+
+                this->create(dimensions);
+
+                memcpy(this->pixelSize_, buf+offset, sizeof(coord_type)*D);
+                offset += sizeof(coord_type)*D;
+
+                for ( ii=0; ii<D; ii++ )
+                {
+                    this->pixelSize_reciprocal_[ii] = coord_type(1.0)/this->pixelSize_[ii];
+                }
+
+                memcpy(this->origin_, buf+offset, sizeof(coord_type)*D);
+                offset += sizeof(coord_type)*D;
+
+                for ( ii=0; ii<D; ii++ )
+                {
+                    memcpy(this->axis_[ii].begin(), buf+offset, sizeof(coord_type)*D);
+                    offset += sizeof(coord_type)*D;
+                }
+
+                // copy the content
+                memcpy(this->data_, buf+offset, sizeof(T)*elements_);
+                offset += sizeof(T)*elements_;
+            }
+            else
+            {
+                this->clear();
+            }
+
+            len = offset;
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoNDImage<T, D>::deserializeImage(...) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename T, unsigned int D> 
+    bool hoNDImage<T, D>::serialize(char*& buf, size_t& len) const 
+    {
+        char* bufImage = NULL;
+        char* bufAttrib = NULL;
+
+        try
+        {
+            size_t lenImage(0);
+            GADGET_CHECK_THROW(this->serializeImage(bufImage, lenImage));
+
+            unsigned long long lenAttrib(0);
+
+            std::stringstream str;
+            ISMRMRD::serialize( const_cast<ISMRMRD::MetaContainer&>(attrib_), str);
+            std::string attribContent = str.str();
+            lenAttrib = attribContent.length()+1;
+
+            bufAttrib = new char[lenAttrib];
+            GADGET_CHECK_THROW(bufAttrib != NULL);
+
+            memset(bufAttrib, '\0', sizeof(char)*lenAttrib);
+            memcpy(bufAttrib, attribContent.c_str(), lenAttrib-1);
+
+            len = sizeof(unsigned long long) + lenImage + sizeof(unsigned long long) + lenAttrib;
+
+            if ( buf != NULL )
+            {
+                delete [] buf;
+                buf = NULL;
+            }
+
+            buf = new char[len];
+            GADGET_CHECK_THROW(buf != NULL);
+
+            size_t offset = 0;
+            memcpy(buf, &lenImage, sizeof(size_t));
+            offset += sizeof(size_t);
+
+            memcpy(buf+offset, bufImage, lenImage);
+            offset += lenImage;
+
+            memcpy(buf+offset, &lenAttrib, sizeof(size_t));
+            offset += sizeof(size_t);
+
+            memcpy(buf+offset, bufAttrib, lenAttrib);
+            offset += lenAttrib;
+
+            if ( bufImage != NULL ) delete [] bufImage;
+            if ( bufAttrib != NULL ) delete [] bufAttrib;
+        }
+        catch(...)
+        {
+            if ( bufImage != NULL ) delete [] bufImage;
+            if ( bufAttrib != NULL ) delete [] bufAttrib;
+
+            GADGET_ERROR_MSG("Errors happened in hoNDImage<T, D>::serialize(char*& buf, size_t& len) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename T, unsigned int D> 
+    bool hoNDImage<T, D>::deserialize(char* buf, size_t& len)
+    {
+        try
+        {
+            size_t lenImage(0);
+            unsigned long long lenAttrib(0);
+
+            size_t offset = 0;
+            memcpy(&lenImage, buf, sizeof(size_t));
+            offset += sizeof(size_t);
+
+            GADGET_CHECK_RETURN_FALSE(this->deserializeImage(buf+offset, lenImage));
+            offset += lenImage;
+
+            memcpy(&lenAttrib, buf+offset, sizeof(size_t));
+            offset += sizeof(size_t);
+
+            ISMRMRD::deserialize(buf+offset, attrib_);
+            offset += lenAttrib;
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoNDImage<T, D>::deserialize(char* buf, size_t& len) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename T, unsigned int D> 
+    void hoNDImage<T, D>::print(std::ostream& os) const
+    {
+        using namespace std;
+        os << "-------------- Gagdgetron ND Image -------------" << endl;
+        this->printContent(os);
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImage<T, D>::printContent(std::ostream& os) const
+    {
+        using namespace std;
+
+        os.unsetf(std::ios::scientific);
+        os.setf(ios::fixed);
+
+        size_t i, j;
+
+        os << "Image dimension is : " << D << endl;
+
+        os << "Image size is : ";
+        for (i=0; i<D; i++ ) 
+            os << (*dimensions_)[i] << " "; 
+        os << endl;
+
+        int elemTypeSize = sizeof(T);
+        std::string elemTypeName = std::string(typeid(T).name());
+
+        os << "Image data type is : " << elemTypeName << std::endl;
+        os << "Byte number for each element is : " << elemTypeSize << std::endl;
+        os << "Number of array size in bytes is : ";
+        os << elements_*elemTypeSize << std::endl;
+
+        os << "Pixel size is : ";
+        for (i=0; i<D; i++ ) 
+            os << this->pixelSize_[i] << " "; 
+        os << endl;
+
+        os << "Origin is : ";
+        for (i=0; i<D; i++ ) 
+            os << this->origin_[i] << " "; 
+        os << endl;
+
+        for (i=0; i<D; i++ )
+        {
+            os << "Axis " << i << " : [ ";
+            for (j=0; j<D; j++ )
+            {
+                os << this->axis_[i][j] << " "; 
+            }
+            os << "] " << endl;
+        }
+        os << endl << ends;
+
+        ISMRMRD::serialize( const_cast<ISMRMRD::MetaContainer&>(this->attrib_), os);
+    }
+}
diff --git a/toolboxes/core/cpu/image/hoNDImageAttrib.h b/toolboxes/core/cpu/image/hoNDImageAttrib.h
new file mode 100644
index 0000000..55d0107
--- /dev/null
+++ b/toolboxes/core/cpu/image/hoNDImageAttrib.h
@@ -0,0 +1,329 @@
+/** \file       hoNDImageAttrib.h
+    \brief      N-dimensional image class for gadgetron with meta attributes
+
+                The serialize and deserialize function includes the meta attribute structure as well
+                The image data are first serialized, followed by the xml meta attribute representation
+
+    \author     Hui Xue
+*/
+
+#pragma once
+
+#include "hoNDImage.h"
+#include "hoNDMetaAttributes.h"
+
+namespace Gadgetron
+{
+    template <typename T, unsigned int D>
+    class hoNDImageAttrib : public hoNDImage<T, D>
+    {
+    public:
+
+        typedef hoNDImage<T, D> BaseClass;
+        typedef hoNDImageAttrib<T, D> Self;
+
+        typedef T element_type;
+        typedef T value_type;
+        typedef float coord_type;
+
+        typedef typename BaseClass::a_axis_type a_axis_type;
+        typedef typename BaseClass::axis_type axis_type;
+
+        /// constructors
+        hoNDImageAttrib ();
+        hoNDImageAttrib (const std::vector<size_t>& dimensions);
+        hoNDImageAttrib (const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize);
+        hoNDImageAttrib (const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize, const std::vector<coord_type>& origin);
+        hoNDImageAttrib (const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize, const std::vector<coord_type>& origin, const axis_type& axis);
+
+        hoNDImageAttrib(size_t len);
+        hoNDImageAttrib(size_t sx, size_t sy);
+        hoNDImageAttrib(size_t sx, size_t sy, size_t sz);
+        hoNDImageAttrib(size_t sx, size_t sy, size_t sz, size_t st);
+        hoNDImageAttrib(size_t sx, size_t sy, size_t sz, size_t st, size_t sp);
+        hoNDImageAttrib(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq);
+        hoNDImageAttrib(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr);
+        hoNDImageAttrib(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, size_t ss);
+
+        /// attach memory constructors
+        hoNDImageAttrib (const std::vector<size_t>& dimensions, T* data, bool delete_data_on_destruct = false);
+        hoNDImageAttrib (const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize, T* data, bool delete_data_on_destruct = false);
+        hoNDImageAttrib (const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize, const std::vector<coord_type>& origin, T* data, bool delete_data_on_destruct = false);
+        hoNDImageAttrib (const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize, const std::vector<coord_type>& origin, const axis_type& axis, T* data, bool delete_data_on_destruct = false);
+
+        hoNDImageAttrib(size_t len, T* data, bool delete_data_on_destruct = false);
+        hoNDImageAttrib(size_t sx, size_t sy, T* data, bool delete_data_on_destruct = false);
+        hoNDImageAttrib(size_t sx, size_t sy, size_t sz, T* data, bool delete_data_on_destruct = false);
+        hoNDImageAttrib(size_t sx, size_t sy, size_t sz, size_t st, T* data, bool delete_data_on_destruct = false);
+        hoNDImageAttrib(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, T* data, bool delete_data_on_destruct = false);
+        hoNDImageAttrib(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, T* data, bool delete_data_on_destruct = false);
+        hoNDImageAttrib(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, T* data, bool delete_data_on_destruct = false);
+        hoNDImageAttrib(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, size_t ss, T* data, bool delete_data_on_destruct = false);
+
+        hoNDImageAttrib(const hoNDArray<T>& a);
+        hoNDImageAttrib(const Self& a);
+
+        virtual ~hoNDImageAttrib();
+
+        /// meta attributes
+        GtImageAttribType attrib_;
+
+        /// serialize/deserialize
+        virtual bool serialize(char*& buf, size_t& len);
+        virtual bool deserialize(char* buf, size_t& len);
+
+        /// print out the image information
+        virtual void print(std::ostream& os) const;
+        virtual void printContent(std::ostream& os) const;
+
+    protected:
+
+        using BaseClass::dimensions_;
+        using BaseClass::offsetFactors_;
+        using BaseClass::pixelSize_;
+        using BaseClass::pixelSize_reciprocal_;
+        using BaseClass::origin_;
+        using BaseClass::axis_;
+        using BaseClass::data_;
+        using BaseClass::elements_;
+        using BaseClass::delete_data_on_destruct_;
+    };
+
+    template <typename T, unsigned int D> 
+    hoNDImageAttrib<T, D>::hoNDImageAttrib () : BaseClass()
+    {
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImageAttrib<T, D>::hoNDImageAttrib (const std::vector<size_t>& dimensions) : BaseClass(dimensions)
+    {
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImageAttrib<T, D>::hoNDImageAttrib (const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize) : BaseClass(dimensions, pixelSize)
+    {
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImageAttrib<T, D>::hoNDImageAttrib (const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize, const std::vector<coord_type>& origin) : BaseClass(dimensions, pixelSize, origin)
+    {
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImageAttrib<T, D>::hoNDImageAttrib (const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize, const std::vector<coord_type>& origin, const axis_type& axis) : BaseClass(dimensions, pixelSize, origin, axis)
+    {
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImageAttrib<T, D>::hoNDImageAttrib(size_t len) : BaseClass(len)
+    {
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImageAttrib<T, D>::hoNDImageAttrib(size_t sx, size_t sy) : BaseClass(sx, sy)
+    {
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImageAttrib<T, D>::hoNDImageAttrib(size_t sx, size_t sy, size_t sz) : BaseClass(sx, sy, sz)
+    {
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImageAttrib<T, D>::hoNDImageAttrib(size_t sx, size_t sy, size_t sz, size_t st) : BaseClass(sx, sy, sz, st)
+    {
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImageAttrib<T, D>::hoNDImageAttrib(size_t sx, size_t sy, size_t sz, size_t st, size_t sp) : BaseClass(sx, sy, sz, st, sp)
+    {
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImageAttrib<T, D>::hoNDImageAttrib(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq) : BaseClass(sx, sy, sz, st, sp, sq)
+    {
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImageAttrib<T, D>::hoNDImageAttrib(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr) : BaseClass(sx, sy, sz, st, sp, sq, sr)
+    {
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImageAttrib<T, D>::hoNDImageAttrib(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, size_t ss) : BaseClass(sx, sy, sz, st, sp, sq, sr, ss)
+    {
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImageAttrib<T, D>::hoNDImageAttrib (const std::vector<size_t>& dimensions, T* data, bool delete_data_on_destruct) : BaseClass(dimensions, data, delete_data_on_destruct)
+    {
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImageAttrib<T, D>::hoNDImageAttrib (const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize, T* data, bool delete_data_on_destruct) : BaseClass(dimensions, pixelSize, data, delete_data_on_destruct)
+    {
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImageAttrib<T, D>::hoNDImageAttrib (const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize, const std::vector<coord_type>& origin, T* data, bool delete_data_on_destruct) : BaseClass(dimensions, pixelSize, origin, data, delete_data_on_destruct)
+    {
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImageAttrib<T, D>::hoNDImageAttrib (const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize, const std::vector<coord_type>& origin, const axis_type& axis, T* data, bool delete_data_on_destruct) : BaseClass(dimensions, pixelSize, origin, axis, data, delete_data_on_destruct)
+    {
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImageAttrib<T, D>::hoNDImageAttrib(size_t len, T* data, bool delete_data_on_destruct) : BaseClass(len, data, delete_data_on_destruct)
+    {
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImageAttrib<T, D>::hoNDImageAttrib(size_t sx, size_t sy, T* data, bool delete_data_on_destruct) : BaseClass(sx, sy, data, delete_data_on_destruct)
+    {
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImageAttrib<T, D>::hoNDImageAttrib(size_t sx, size_t sy, size_t sz, T* data, bool delete_data_on_destruct) : BaseClass(sx, sy, sz, data, delete_data_on_destruct)
+    {
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImageAttrib<T, D>::hoNDImageAttrib(size_t sx, size_t sy, size_t sz, size_t st, T* data, bool delete_data_on_destruct) : BaseClass(sx, sy, sz, st, data, delete_data_on_destruct)
+    {
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImageAttrib<T, D>::hoNDImageAttrib(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, T* data, bool delete_data_on_destruct) : BaseClass(sx, sy, sz, st, sp, data, delete_data_on_destruct)
+    {
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImageAttrib<T, D>::hoNDImageAttrib(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, T* data, bool delete_data_on_destruct) : BaseClass(sx, sy, sz, st, sp, sq, data, delete_data_on_destruct)
+    {
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImageAttrib<T, D>::hoNDImageAttrib(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, T* data, bool delete_data_on_destruct) : BaseClass(sx, sy, sz, st, sp, sq, sr, data, delete_data_on_destruct)
+    {
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImageAttrib<T, D>::hoNDImageAttrib(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, size_t ss, T* data, bool delete_data_on_destruct) : BaseClass(sx, sy, sz, st, sp, sq, sr, ss, data, delete_data_on_destruct)
+    {
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImageAttrib<T, D>::hoNDImageAttrib(const hoNDArray<T>& a) : BaseClass(a)
+    {
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImageAttrib<T, D>::hoNDImageAttrib(const Self& a) : BaseClass(a)
+    {
+    }
+
+    template <typename T, unsigned int D> 
+    hoNDImageAttrib<T, D>::~hoNDImageAttrib()
+    {
+    }
+
+    template <typename T, unsigned int D> 
+    bool hoNDImageAttrib<T, D>::serialize(char*& buf, size_t& len) 
+    {
+        char* bufImage = NULL;
+        char* bufAttrib = NULL;
+
+        try
+        {
+            size_t lenImage(0);
+            GADGET_CHECK_THROW(BaseClass::serialize(bufImage, lenImage));
+
+            size_t lenAttrib(0);
+            GADGET_CHECK_THROW(attrib_.serialize(bufAttrib, lenAttrib));
+
+            len = sizeof(unsigned long long) + lenImage + sizeof(unsigned long long) + lenAttrib;
+
+            if ( buf != NULL )
+            {
+                delete [] buf;
+                buf = NULL;
+            }
+
+            buf = new char[len];
+            GADGET_CHECK_THROW(buf != NULL);
+
+            size_t offset = 0;
+            memcpy(buf, &lenImage, sizeof(size_t));
+            offset += sizeof(size_t);
+
+            memcpy(buf+offset, bufImage, lenImage);
+            offset += lenImage;
+
+            memcpy(buf+offset, &lenAttrib, sizeof(size_t));
+            offset += sizeof(size_t);
+
+            memcpy(buf+offset, bufAttrib, lenAttrib);
+            offset += lenAttrib;
+
+            if ( bufImage != NULL ) delete [] bufImage;
+            if ( bufAttrib != NULL ) delete [] bufAttrib;
+        }
+        catch(...)
+        {
+            if ( bufImage != NULL ) delete [] bufImage;
+            if ( bufAttrib != NULL ) delete [] bufAttrib;
+
+            GADGET_ERROR_MSG("Errors happened in hoNDImageAttrib<T, D>::serialize(char*& buf, size_t& len) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename T, unsigned int D> 
+    bool hoNDImageAttrib<T, D>::deserialize(char* buf, size_t& len)
+    {
+        try
+        {
+            size_t lenImage(0);
+            size_t lenAttrib(0);
+
+            size_t offset = 0;
+            memcpy(&lenImage, buf, sizeof(size_t));
+            offset += sizeof(size_t);
+
+            GADGET_CHECK_RETURN_FALSE(BaseClass::deserialize(buf+offset, lenImage));
+            offset += lenImage;
+
+            memcpy(&lenAttrib, buf+offset, sizeof(size_t));
+            offset += sizeof(size_t);
+
+            GADGET_CHECK_RETURN_FALSE(attrib_.deserialize(buf+offset, lenAttrib));
+            offset += lenAttrib;
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoNDImageAttrib<T, D>::deserialize(char* buf, size_t& len) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImageAttrib<T, D>::print(std::ostream& os) const
+    {
+        using namespace std;
+        os << "-------------- Gagdgetron ND Image with meta attributes -------------" << endl;
+        this->printContent(os);
+    }
+
+    template <typename T, unsigned int D> 
+    inline void hoNDImageAttrib<T, D>::printContent(std::ostream& os) const
+    {
+        BaseClass::printContent(os);
+        attrib_.print(os);
+    }
+}
diff --git a/toolboxes/core/cpu/image/hoNDImageContainer2D.h b/toolboxes/core/cpu/image/hoNDImageContainer2D.h
new file mode 100644
index 0000000..e54b248
--- /dev/null
+++ b/toolboxes/core/cpu/image/hoNDImageContainer2D.h
@@ -0,0 +1,1223 @@
+/** \file       hoNDImageContainer2D.h
+    \brief      a container class to store a matrix of hoNDImages
+
+                This name "container2D" does not mean the 2D images. It means the container is a 2D array in its storage logic.
+
+                The points of images are stored in this container. However, the images can be deleted if delete_data_on_destruct_==true
+                The images are stored as 2D arrays. But every row can have differnet number of images (or columns). So it is not exactly an 
+                image matrix.
+
+    \author     Hui Xue
+*/
+
+#pragma once
+
+#include "hoNDImage.h"
+#include "hoNDArray_elemwise.h"
+
+namespace Gadgetron
+{
+
+    template <typename ImageType>
+    class hoNDImageContainer2D
+    {
+    public:
+
+        typedef hoNDImageContainer2D<ImageType> Self;
+
+        typedef typename ImageType::value_type value_type;
+        typedef typename ImageType::coord_type coord_type;
+        typedef typename ImageType::a_axis_type a_axis_type;
+        typedef typename ImageType::axis_type axis_type;
+
+        /// constructors
+        hoNDImageContainer2D(bool delete_data_on_destruct=true);
+        hoNDImageContainer2D(const hoNDImageContainer2D<ImageType>& a);
+
+        Self& operator=(const Self& rhs);
+
+        virtual ~hoNDImageContainer2D();
+
+        /// create a container with images
+        bool create(const std::vector<size_t>& col, bool createImage=true);
+
+        /// create a container with images at certain sizes/pixel sizes/axis
+        /// the image will not be filled with zeros
+        bool create(const std::vector<size_t>& col, const std::vector<size_t>& dimensions);
+        bool create(const std::vector<size_t>& col, const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize);
+        bool create(const std::vector<size_t>& col, const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize, const std::vector<coord_type>& origin);
+        bool create(const std::vector<size_t>& col, const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize, const std::vector<coord_type>& origin, const axis_type& axis);
+
+        /// create a container from a chunk of memory
+        /// the dim.size() = ImageType.get_number_of_dimensions()+1
+        /// e.g., a 3D memory chunk [RO E1 N] is used to allocate N [RO E1] images
+        /// the container will have 1 row and N columns
+        bool create(value_type* buf, const std::vector<size_t>& dim);
+
+        /// clear the matrix, if delete_data_on_destruct_==true, delete all stored images
+        bool clear();
+
+        /// copy from a container 2D, deep copy image content
+        bool copyFrom(const Self& a);
+
+        /// fill all images with zeros
+        bool fillWithZeros();
+
+        /// whether two containers have the same size
+        template <typename ImageType2> 
+        bool dimensions_equal_container(const hoNDImageContainer2D<ImageType2>& a) const
+        {
+            if ( this->rows() != a.rows() ) return false;
+
+            unsigned int row;
+            for ( row=0; row<this->rows(); row++ )
+            {
+                if ( this->cols(row) != a.cols(row) )
+                {
+                    return false;
+                }
+            }
+
+            return true;
+        }
+
+        /// add one image to a row at end
+        bool push_back(ImageType& im, size_t row);
+
+        /// add one image to a row at head
+        bool push_front(ImageType& im, size_t row);
+
+        /// add one image to a row
+        bool insert(ImageType& im, size_t row, size_t col);
+
+        /// pop an image from a row end
+        bool pop_back(ImageType*& im, size_t row);
+
+        /// pop an image from a row head
+        bool pop_front(ImageType*& im, size_t row);
+
+        /// remove an image from the storage
+        bool remove(ImageType*& im, size_t row, size_t col);
+
+        /// if delete_data_on_destruct_==true, the image will be deleted
+        bool remove(size_t row, size_t col);
+
+        /// expand the container by certain number of rows
+        bool expand(size_t newRows);
+
+        /// insert one row
+        bool insert(std::vector<ImageType*>& im_array, size_t row);
+
+        /// remove one row
+        bool remove(std::vector<ImageType*>& im_array, size_t row);
+        /// if delete_data_on_destruct_==true, the image will be deleted
+        bool remove(size_t row);
+
+        /// get image pointers
+        ImageType& get(size_t row, size_t col);
+        const ImageType& get(size_t row, size_t col) const;
+
+        ImageType& operator() (size_t row, size_t col);
+        const ImageType& operator() (size_t row, size_t col) const;
+
+        /// get one row
+        bool get(std::vector<ImageType*>& im_array, size_t row) const;
+
+        /// get number of all images in the container
+        size_t get_number_of_all_images();
+
+        /// get all images
+        bool get_all_images(std::vector<ImageType*>& im_array);
+
+        /// set image pointer
+        bool set(ImageType* pImage, size_t row, size_t col);
+
+        /// convert one row to a hoNDArray
+        /// all images in this row should have the same dimensions; if not, return false
+        bool to_NDArray(size_t row, hoNDArray<value_type>& a);
+
+        /// whether to delete the memory on destruction
+        bool delete_data_on_destruct() const;
+        void delete_data_on_destruct(bool d);
+
+        /// get number of row and column
+        size_t rows() const;
+        size_t cols(size_t row) const;
+        std::vector<size_t> cols() const;
+
+        /// check whether all images in a row have the same dimensions/pixelSizes/axises
+        bool has_identical_dimensions(unsigned int row) const;
+        bool has_identical_pixel_size(unsigned int row) const;
+        bool has_identical_axis(unsigned int row) const;
+        bool has_identical_image_geometry(unsigned int row) const;
+
+        /// serialize/deserialize
+        virtual bool serialize(char*& buf, size_t& len) const;
+        virtual bool deserialize(char* buf, size_t& len);
+
+        /// print out the image container information
+        virtual void print(std::ostream& os) const;
+
+    protected:
+
+        std::vector< std::vector<ImageType*> > image_container_;
+
+        bool delete_data_on_destruct_;
+    };
+
+    template <typename ImageType> 
+    hoNDImageContainer2D<ImageType>::hoNDImageContainer2D(bool delete_data_on_destruct) : delete_data_on_destruct_(delete_data_on_destruct)
+    {
+    }
+
+    template <typename ImageType> 
+    hoNDImageContainer2D<ImageType>::hoNDImageContainer2D(const hoNDImageContainer2D<ImageType>& a) : delete_data_on_destruct_(false)
+    {
+        *this = a;
+    }
+
+    template <typename ImageType> 
+    hoNDImageContainer2D<ImageType>& hoNDImageContainer2D<ImageType>::operator=(const Self& rhs)
+    {
+        if ( this == &rhs ) return *this;
+
+        this->clear();
+        size_t row = rhs.rows();
+
+        size_t ii;
+        for ( ii=0; ii<row; ii++ )
+        {
+            std::vector<ImageType*> a_row;
+            rhs.get(a_row, ii);
+            this->image_container_.push_back(a_row);
+        }
+
+        this->delete_data_on_destruct_ = false;
+
+        return *this;
+    }
+
+    template <typename ImageType> 
+    hoNDImageContainer2D<ImageType>::~hoNDImageContainer2D()
+    {
+        this->clear();
+    }
+
+    template <typename ImageType> 
+    inline bool hoNDImageContainer2D<ImageType>::create(const std::vector<size_t>& col, bool createImage)
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(this->clear());
+            if ( createImage )
+            {
+                this->delete_data_on_destruct(true);
+            }
+            else
+            {
+                this->delete_data_on_destruct(false);
+            }
+
+            size_t row = col.size();
+            image_container_.resize(row);
+
+            unsigned int r, c;
+            for ( r=0; r<row; r++ )
+            {
+                image_container_[r].resize(col[r], NULL);
+
+                if ( createImage )
+                {
+                    for ( c=0; c<col[r]; c++ )
+                    {
+                        image_container_[r][c] = new ImageType();
+                    }
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoNDImageContainer2D<ImageType>::create(const std::vector<size_t>& col) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ImageType> 
+    inline bool hoNDImageContainer2D<ImageType>::create(const std::vector<size_t>& col, const std::vector<size_t>& dimensions)
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(this->clear());
+            this->delete_data_on_destruct(true);
+
+            size_t row = col.size();
+            image_container_.resize(row);
+
+            unsigned int r, c;
+            for ( r=0; r<row; r++ )
+            {
+                image_container_[r].resize(col[r], NULL);
+
+                for ( c=0; c<col[r]; c++ )
+                {
+                    image_container_[r][c] = new ImageType(dimensions);
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoNDImageContainer2D<ImageType>::create(const std::vector<size_t>& col, const std::vector<size_t>& dimensions) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ImageType> 
+    inline bool hoNDImageContainer2D<ImageType>::create(const std::vector<size_t>& col, const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize)
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(this->clear());
+            this->delete_data_on_destruct(true);
+
+            size_t row = col.size();
+            image_container_.resize(row);
+
+            unsigned int r, c;
+            for ( r=0; r<row; r++ )
+            {
+                image_container_[r].resize(col[r], NULL);
+
+                for ( c=0; c<col[r]; c++ )
+                {
+                    image_container_[r][c] = new ImageType(dimensions, pixelSize);
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoNDImageContainer2D<ImageType>::create(const std::vector<size_t>& col, const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ImageType> 
+    inline bool hoNDImageContainer2D<ImageType>::create(const std::vector<size_t>& col, const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize, const std::vector<coord_type>& origin)
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(this->clear());
+            this->delete_data_on_destruct(true);
+
+            size_t row = col.size();
+            image_container_.resize(row);
+
+            unsigned int r, c;
+            for ( r=0; r<row; r++ )
+            {
+                image_container_[r].resize(col[r], NULL);
+
+                for ( c=0; c<col[r]; c++ )
+                {
+                    image_container_[r][c] = new ImageType(dimensions, pixelSize, origin);
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoNDImageContainer2D<ImageType>::create(const std::vector<size_t>& col, const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize, const std::vector<coord_type>& origin) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ImageType> 
+    inline bool hoNDImageContainer2D<ImageType>::create(const std::vector<size_t>& col, const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize, const std::vector<coord_type>& origin, const axis_type& axis)
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(this->clear());
+            this->delete_data_on_destruct(true);
+
+            size_t row = col.size();
+            image_container_.resize(row);
+
+            unsigned int r, c;
+            for ( r=0; r<row; r++ )
+            {
+                image_container_[r].resize(col[r], NULL);
+
+                for ( c=0; c<col[r]; c++ )
+                {
+                    image_container_[r][c] = new ImageType(dimensions, pixelSize, origin, axis);
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoNDImageContainer2D<ImageType>::create(const std::vector<size_t>& col, const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize, const std::vector<coord_type>& origin, const axis_type& axis) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ImageType> 
+    inline bool hoNDImageContainer2D<ImageType>::create(value_type* buf, const std::vector<size_t>& dim)
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE( (dim.size()==ImageType::NDIM) || (dim.size()==ImageType::NDIM+1) );
+
+            GADGET_CHECK_RETURN_FALSE(this->clear());
+            this->delete_data_on_destruct(true);
+
+            unsigned int ii;
+            size_t col;
+            std::vector<size_t> dim_im;
+            if ( dim.size()==ImageType::NDIM )
+            {
+                dim_im = dim;
+                col = 1;
+            }
+            else
+            {
+                dim_im.resize(ImageType::NDIM);
+                memcpy(&dim_im[0], &dim[0], sizeof(size_t)*ImageType::NDIM);
+                col = dim[ImageType::NDIM];
+            }
+
+            size_t row = 1;
+            image_container_.resize(row);
+            image_container_[0].resize(col);
+
+            size_t numOfPixels = 1;
+            for ( ii=0; ii<dim_im.size(); ii++ )
+            {
+                numOfPixels *= dim_im[ii];
+            }
+
+            unsigned int c;
+            for ( c=0; c<col; c++ )
+            {
+                image_container_[0][c] = new ImageType();
+                image_container_[0][c]->create(dim_im, buf+c*numOfPixels, false);
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoNDImageContainer2D<ImageType>::create(value_type* buf, const std::vector<size_t>& dim) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ImageType> 
+    inline bool hoNDImageContainer2D<ImageType>::clear()
+    {
+        try
+        {
+            if ( delete_data_on_destruct_ )
+            {
+                size_t row = this->rows();
+
+                unsigned int ii, jj;
+                for ( ii=0; ii<row; ii++ )
+                {
+                    size_t col = this->cols(ii);
+                    for ( jj=0; jj<col; jj++ )
+                    {
+                        ImageType* pImg = image_container_[ii][jj];
+                        if ( pImg != NULL )
+                        {
+                            delete pImg;
+                            image_container_[ii][jj] = NULL;
+                        }
+                    }
+                }
+            }
+
+            image_container_.clear();
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoNDImageContainer2D<ImageType>::clear() ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ImageType> 
+    inline bool hoNDImageContainer2D<ImageType>::copyFrom(const Self& a)
+    {
+        try
+        {
+            if ( !this->dimensions_equal_container(a) )
+            {
+                GADGET_CHECK_RETURN_FALSE(this->clear());
+                this->delete_data_on_destruct(true);
+
+                GADGET_CHECK_RETURN_FALSE(this->create(a.cols()));
+            }
+
+            size_t row = this->rows();
+
+            unsigned int ii, jj;
+            for ( ii=0; ii<row; ii++ )
+            {
+                size_t col = this->cols(ii);
+                for ( jj=0; jj<col; jj++ )
+                {
+                    image_container_[ii][jj]->copyImageInfoAndContent(a(ii, jj));
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoNDImageContainer2D<ImageType>::copyFrom(const Self& a) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ImageType> 
+    inline bool hoNDImageContainer2D<ImageType>::fillWithZeros()
+    {
+        try
+        {
+            size_t row = this->rows();
+
+            unsigned int ii, jj;
+            for ( ii=0; ii<row; ii++ )
+            {
+                size_t col = this->cols(ii);
+                for ( jj=0; jj<col; jj++ )
+                {
+                    memset(image_container_[ii][jj]->begin(), 0, image_container_[ii][jj]->get_number_of_bytes());
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoNDImageContainer2D<ImageType>::fillWithZeros() ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ImageType> 
+    inline bool hoNDImageContainer2D<ImageType>::push_back(ImageType& im, size_t row)
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(row<this->rows());
+            image_container_[row].push_back(&im);
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoNDImageContainer2D<ImageType>::push_back(ImageType& im, size_t row) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ImageType> 
+    inline bool hoNDImageContainer2D<ImageType>::push_front(ImageType& im, size_t row)
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(row<this->rows());
+            image_container_[row].insert(image_container_[row].begin(), &im);
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoNDImageContainer2D<ImageType>::push_front(ImageType& im, size_t row) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ImageType> 
+    inline bool hoNDImageContainer2D<ImageType>::insert(ImageType& im, size_t row, size_t col)
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(row<this->rows());
+            GADGET_CHECK_RETURN_FALSE(col<this->cols(row));
+
+            image_container_[row].insert(image_container_[row].begin()+col, &im);
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoNDImageContainer2D<ImageType>::insert(ImageType& im, size_t row, size_t col) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ImageType> 
+    inline bool hoNDImageContainer2D<ImageType>::pop_back(ImageType*& im, size_t row)
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(row<this->rows());
+            im = image_container_[row][this->cols(row)-1];
+            image_container_[row].pop_back();
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoNDImageContainer2D<ImageType>::pop_back(ImageType*& im, size_t row) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ImageType> 
+    inline bool hoNDImageContainer2D<ImageType>::pop_front(ImageType*& im, size_t row)
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(row<this->rows());
+
+            if ( this->cols(row) == 0 )
+            {
+                im = NULL;
+                return true;
+            }
+
+            image_container_[row].erase(image_container_[row].begin());
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoNDImageContainer2D<ImageType>::pop_front(ImageType*& im, size_t row) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ImageType> 
+    inline bool hoNDImageContainer2D<ImageType>::remove(ImageType*& im, size_t row, size_t col)
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(row<this->rows());
+            GADGET_CHECK_RETURN_FALSE(col<this->cols(row));
+
+            im = image_container_[row][col];
+
+            image_container_[row].erase(image_container_[row].begin()+col);
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoNDImageContainer2D<ImageType>::remove(ImageType*& im, size_t row, size_t col) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ImageType> 
+    inline bool hoNDImageContainer2D<ImageType>::remove(size_t row, size_t col)
+    {
+        try
+        {
+            ImageType* im = NULL;
+            GADGET_CHECK_RETURN_FALSE(this->remove(im, row, col));
+            if( delete_data_on_destruct_ ) delete im;
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoNDImageContainer2D<ImageType>::remove(size_t row, size_t col) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ImageType> 
+    inline bool hoNDImageContainer2D<ImageType>::expand(size_t newRows)
+    {
+        try
+        {
+            size_t row = this->rows();
+            if ( newRows > 0 )
+            {
+                image_container_.resize(row+newRows);
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoNDImageContainer2D<ImageType>::expand(size_t newRows) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ImageType> 
+    inline bool hoNDImageContainer2D<ImageType>::insert(std::vector<ImageType*>& im_array, size_t row)
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(row<this->rows());
+            image_container_.insert(image_container_.begin()+row, im_array);
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoNDImageContainer2D<ImageType>::insert(std::vector<ImageType*>& im_array, size_t row) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ImageType> 
+    inline bool hoNDImageContainer2D<ImageType>::remove(std::vector<ImageType*>& im_array, size_t row)
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(row<this->rows());
+            im_array = image_container_[row];
+            image_container_.erase(image_container_.begin()+row);
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoNDImageContainer2D<ImageType>::remove(std::vector<ImageType*>& im_array, size_t row) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ImageType> 
+    inline bool hoNDImageContainer2D<ImageType>::remove(size_t row)
+    {
+        try
+        {
+            std::vector<ImageType*> im_array;
+            GADGET_CHECK_RETURN_FALSE(this->remove(im_array, row));
+
+            if( delete_data_on_destruct_ )
+            {
+                size_t N = im_array.size();
+                unsigned int ii;
+                for ( ii=0; ii<N; ii++ )
+                {
+                    if ( im_array[ii] != NULL )
+                    {
+                        delete im_array[ii];
+                        im_array[ii] = NULL;
+                    }
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoNDImageContainer2D<ImageType>::remove(size_t row) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    /// get image pointers
+    template <typename ImageType> 
+    inline ImageType& hoNDImageContainer2D<ImageType>::get(size_t row, size_t col)
+    {
+        GADGET_DEBUG_CHECK_THROW(row<this->rows());
+        GADGET_DEBUG_CHECK_THROW(col<this->cols(row));
+
+        return *(image_container_[row][col]);
+    }
+
+    template <typename ImageType> 
+    inline const ImageType& hoNDImageContainer2D<ImageType>::get(size_t row, size_t col) const
+    {
+        GADGET_DEBUG_CHECK_THROW(row<this->rows());
+        GADGET_DEBUG_CHECK_THROW(col<this->cols(row));
+
+        return *(image_container_[row][col]);
+    }
+
+    template <typename ImageType> 
+    inline ImageType& hoNDImageContainer2D<ImageType>::operator() (size_t row, size_t col)
+    {
+        GADGET_DEBUG_CHECK_THROW(row<this->rows());
+        GADGET_DEBUG_CHECK_THROW(col<this->cols(row));
+
+        return *(image_container_[row][col]);
+    }
+
+    template <typename ImageType> 
+    inline const ImageType& hoNDImageContainer2D<ImageType>::operator() (size_t row, size_t col) const
+    {
+        GADGET_DEBUG_CHECK_THROW(row<this->rows());
+        GADGET_DEBUG_CHECK_THROW(col<this->cols(row));
+
+        return *(image_container_[row][col]);
+    }
+
+    template <typename ImageType> 
+    inline bool hoNDImageContainer2D<ImageType>::get(std::vector<ImageType*>& im_array, size_t row) const
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(row<this->rows());
+            im_array = image_container_[row];
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoNDImageContainer2D<ImageType>::get(std::vector<ImageType*>& im_array, size_t row) const ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ImageType> 
+    inline size_t hoNDImageContainer2D<ImageType>::get_number_of_all_images()
+    {
+        try
+        {
+            size_t num = 0;
+
+            size_t row = this->rows();
+            if ( row == 0 ) return num;
+
+            unsigned int r;
+            for ( r=0; r<row; r++ )
+            {
+                num += this->cols(r);
+            }
+
+            return num;
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoNDImageContainer2D<ImageType>::get(std::vector<ImageType*>& im_array, size_t row) ... ");
+            return false;
+        }
+    }
+
+    template <typename ImageType> 
+    inline bool hoNDImageContainer2D<ImageType>::get_all_images(std::vector<ImageType*>& im_array)
+    {
+        try
+        {
+            im_array.clear();
+
+            size_t row = this->rows();
+            if ( row == 0 ) return true;
+
+            size_t num = this->get_number_of_all_images();
+
+            im_array.resize(num, NULL);
+
+            unsigned int r, c, ind(0);
+            for ( r=0; r<row; r++ )
+            {
+                for ( c=0; c<this->cols(r); c++ )
+                {
+                    im_array[ind] = image_container_[r][c];
+                    ind++;
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoNDImageContainer2D<ImageType>::get(std::vector<ImageType*>& im_array, size_t row) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ImageType> 
+    inline bool hoNDImageContainer2D<ImageType>::set(ImageType* pImage, size_t row, size_t col)
+    {
+        GADGET_DEBUG_CHECK_RETURN_FALSE(row<this->rows());
+        GADGET_DEBUG_CHECK_RETURN_FALSE(col<this->cols(row));
+
+        if ( image_container_[row][col] != NULL )
+        {
+            if ( this->delete_data_on_destruct() ) delete image_container_[row][col];
+        }
+
+        image_container_[row][col] = pImage;
+
+        return true;
+    }
+
+    template <typename ImageType> 
+    inline bool hoNDImageContainer2D<ImageType>::to_NDArray(size_t row, hoNDArray<value_type>& a)
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(row<this->rows());
+
+            size_t col = this->cols(row);
+            if ( col == 0 ) return true;
+
+            GADGET_CHECK_RETURN_FALSE(this->has_identical_dimensions( (unsigned int)row));
+
+            std::vector<size_t> dim;
+            image_container_[row][0]->get_dimensions(dim);
+
+            size_t numOfElements = image_container_[row][0]->get_number_of_elements();
+            size_t numOfBytes = image_container_[row][0]->get_number_of_bytes();
+
+            std::vector<size_t> dim_out(dim.size()+1);
+            memcpy(&dim_out[0], &dim[0], sizeof(size_t)*dim.size());
+            dim_out[ dim.size() ] = col;
+
+            a.create(dim_out);
+
+            unsigned int c;
+            for ( c=0; c<col; c++ )
+            {
+                memcpy(a.begin()+c*numOfElements, image_container_[row][c]->begin(), numOfBytes);
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoNDImageContainer2D<ImageType>::to_NDArray(size_t row, hoNDArray<value_type>& a) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ImageType> 
+    inline bool hoNDImageContainer2D<ImageType>::delete_data_on_destruct() const
+    {
+        return this->delete_data_on_destruct_;
+    }
+
+    template <typename ImageType> 
+    inline void hoNDImageContainer2D<ImageType>::delete_data_on_destruct(bool d)
+    {
+        this->delete_data_on_destruct_ = d;
+    }
+
+    template <typename ImageType> 
+    inline size_t hoNDImageContainer2D<ImageType>::rows() const
+    {
+        return image_container_.size();
+    }
+
+    template <typename ImageType> 
+    inline size_t hoNDImageContainer2D<ImageType>::cols(size_t row) const
+    {
+        GADGET_DEBUG_CHECK_THROW(row<this->rows());
+        return image_container_[row].size();
+    }
+
+    template <typename ImageType> 
+    inline std::vector<size_t> hoNDImageContainer2D<ImageType>::cols() const
+    {
+        std::vector<size_t> col(this->rows(), 0);
+        unsigned int row;
+        for ( row=0; row<this->rows(); row++ )
+        {
+            col[row] = this->cols(row);
+        }
+        return col;
+    }
+
+    template <typename ImageType> 
+    inline bool hoNDImageContainer2D<ImageType>::has_identical_dimensions(unsigned int row) const
+    {
+        GADGET_CHECK_RETURN_FALSE(row<this->rows());
+
+        size_t col = this->cols(row);
+        if ( col == 0 ) return true;
+
+        unsigned int c;
+        for ( c=1; c<col; c++ )
+        {
+            if ( !image_container_[row][0]->dimensions_equal( *image_container_[row][c] ) )
+            {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    template <typename ImageType> 
+    inline bool hoNDImageContainer2D<ImageType>::has_identical_pixel_size(unsigned int row) const
+    {
+        GADGET_CHECK_RETURN_FALSE(row<this->rows());
+
+        size_t col = this->cols(row);
+        if ( col == 0 ) return true;
+
+        unsigned int c;
+        for ( c=1; c<col; c++ )
+        {
+            if ( !image_container_[row][0]->pixel_size_equal( *image_container_[row][c] ) )
+            {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    template <typename ImageType> 
+    inline bool hoNDImageContainer2D<ImageType>::has_identical_axis(unsigned int row) const
+    {
+        GADGET_CHECK_RETURN_FALSE(row<this->rows());
+
+        size_t col = this->cols(row);
+        if ( col == 0 ) return true;
+
+        unsigned int c;
+        for ( c=1; c<col; c++ )
+        {
+            if ( !image_container_[row][0]->axis_equal( *image_container_[row][c] ) )
+            {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    template <typename ImageType> 
+    inline bool hoNDImageContainer2D<ImageType>::has_identical_image_geometry(unsigned int row) const
+    {
+        GADGET_CHECK_RETURN_FALSE(row<this->rows());
+
+        if ( !this->has_identical_dimensions() ) return false;
+        if ( !this->has_identical_pixel_size() ) return false;
+        if ( !this->has_identical_axis() ) return false;
+
+        return true;
+    }
+
+    template <typename ImageType> 
+    inline bool hoNDImageContainer2D<ImageType>::serialize(char*& buf, size_t& totalLen) const 
+    {
+        try
+        {
+            // memory layout
+            // number of row, number of col for row 1 (col1), number of col for row 2, ..., number of col for row n
+            // offset for image[0][0], len of buffer for image[0][0], offset for image[0][1], len of buffer for image[0][1], ..., offset for image[0][0], len of buffer for image[0][col1-1], 
+            // ...
+            // offset for image[row-1][0], len of buffer for image[row-1][0], offset for image[row-1][1], len of buffer for image[row-1][1], ..., offset for image[row-1][0], len of buffer for image[row-1][col1-1], 
+            // content for image[0][0], ..., image[0][col1-1], image[1][0], ..., image[row-1][col1-1]
+
+            // starting for image content
+            size_t offsetImage = sizeof(size_t) + this->rows()*sizeof(size_t);
+
+            std::vector<size_t> col(this->rows());
+
+            std::vector< std::vector<size_t> > offset(this->rows());
+            std::vector< std::vector<size_t> > len(this->rows());
+            std::vector< std::vector<char*> > bufIm(this->rows());
+
+            size_t row, c;
+            for ( row=0; row<this->rows(); row++ )
+            {
+                col[row] = this->cols(row);
+                offset[row].resize(col[row], 0);
+                len[row].resize(col[row], 0);
+
+                offsetImage += sizeof(size_t)*col[row]*2;
+
+                bufIm[row].resize(col[row], NULL);
+            }
+
+            totalLen = offsetImage;
+            offset[0][0] = offsetImage;
+
+            for ( row=0; row<this->rows(); row++ )
+            {
+                for ( c=0; c<col[row]; c++ )
+                {
+                    ImageType* im = image_container_[row][c];
+                    if ( im != NULL )
+                    {
+                        char* bufImCurr=NULL;
+                        size_t lenIm;
+
+                        im->serialize(bufImCurr, lenIm);
+
+                        bufIm[row][c] = bufImCurr;
+                        len[row][c] = lenIm;
+                    }
+                    else
+                    {
+                        len[row][c] = 0;
+                    }
+
+                    totalLen += len[row][c];
+
+                    if ( row==0 && c== 0 ) continue;
+
+                    offset[row][c] = offset[row][c-1] + len[row][c-1];
+                }
+            }
+
+            buf = new char[totalLen];
+            GADGET_CHECK_RETURN_FALSE(buf!=NULL);
+
+            size_t offsetBuf = 0;
+
+            size_t numOfRows = this->rows();
+            memcpy(buf+offsetBuf, &numOfRows, sizeof(size_t));
+            offsetBuf += sizeof(size_t);
+
+            memcpy(buf+offsetBuf, &col[0], sizeof(size_t)*numOfRows);
+            offsetBuf += sizeof(size_t)*numOfRows;
+
+            for ( row=0; row<this->rows(); row++ )
+            {
+                for ( c=0; c<col[row]; c++ )
+                {
+                    size_t v = offset[row][c];
+                    size_t lv = len[row][c];
+
+                    memcpy(buf+offsetBuf, &v, sizeof(size_t));
+                    offsetBuf += sizeof(size_t);
+
+                    memcpy(buf+offsetBuf, &lv, sizeof(size_t));
+                    offsetBuf += sizeof(size_t);
+                }
+            }
+
+            for ( row=0; row<this->rows(); row++ )
+            {
+                for ( c=0; c<col[row]; c++ )
+                {
+                    if ( bufIm[row][c] != NULL )
+                    {
+                        memcpy(buf+offsetBuf, bufIm[row][c], len[row][c]);
+                        offsetBuf += len[row][c];
+
+                        delete [] bufIm[row][c];
+                        bufIm[row][c] = NULL;
+                    }
+                }
+            }
+
+            GADGET_CHECK_RETURN_FALSE(totalLen == offsetBuf);
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoNDImageContainer2D<ImageType>::serialize(char*& buf, size_t& len) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ImageType> 
+    inline bool hoNDImageContainer2D<ImageType>::deserialize(char* buf, size_t& len)
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(this->clear());
+            this->delete_data_on_destruct(true);
+
+            size_t offsetBuf = 0;
+
+            size_t numOfRows(0);
+            memcpy(&numOfRows, buf+offsetBuf, sizeof(size_t));
+            offsetBuf += sizeof(size_t);
+
+            if ( numOfRows == 0 ) return true;
+
+            image_container_.resize(numOfRows);
+
+            std::vector<size_t> col(numOfRows);
+
+            memcpy(&col[0], buf+offsetBuf, sizeof(size_t)*numOfRows);
+            offsetBuf += sizeof(size_t)*numOfRows;
+
+            size_t row, c;
+            for ( row=0; row<this->rows(); row++ )
+            {
+                image_container_[row].resize(col[row], NULL);
+
+                for ( c=0; c<col[row]; c++ )
+                {
+                    size_t offsetCurr, lenCurr;
+
+                    memcpy(&offsetCurr, buf+offsetBuf, sizeof(size_t));
+                    offsetBuf += sizeof(size_t);
+
+                    memcpy(&lenCurr, buf+offsetBuf, sizeof(size_t));
+                    offsetBuf += sizeof(size_t);
+
+                    image_container_[row][c] = new ImageType();
+                    GADGET_CHECK_RETURN_FALSE(image_container_[row][c]!=NULL);
+
+                    if ( lenCurr > 0 )
+                    {
+                        GADGET_CHECK_RETURN_FALSE(image_container_[row][c]->deserialize(buf+offsetCurr, lenCurr));
+                    }
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoNDImageContainer2D<ImageType>::deserialize(char* buf, size_t& len) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ImageType> 
+    inline void hoNDImageContainer2D<ImageType>::print(std::ostream& os) const
+    {
+        using namespace std;
+
+        os.unsetf(std::ios::scientific);
+        os.setf(ios::fixed);
+
+        size_t r, c;
+
+        os << "--------------Gagdgetron Image Container 2D -------------" << endl;
+        os << "Image type is : " << std::string(typeid(ImageType).name()) << endl;
+        os << "Number of stored image rows is : " << this->rows() << endl;
+        for ( r=0; r<this->rows(); r++ )
+        {
+            os << "Row " << r << " has " << this->cols(r) << " images " << endl;
+        }
+        os << "---------------------------------------------------------" << endl;
+        for ( r=0; r<this->rows(); r++ )
+        {
+            os << "Row " << r << " : "<< endl;
+            os << "=========================================================" << endl;
+            for ( c=0; c<this->cols(r); c++ )
+            {
+                if ( c > 2 ) break;
+
+                if ( image_container_[r][c] != NULL )
+                {
+                    os << "--> Image " << c << " : "<< endl;
+                    image_container_[r][c]->print(os);
+                    os << "=========================================================" << endl;
+                }
+            }
+        }
+    }
+}
diff --git a/toolboxes/core/cpu/math/CMakeLists.txt b/toolboxes/core/cpu/math/CMakeLists.txt
new file mode 100644
index 0000000..677d270
--- /dev/null
+++ b/toolboxes/core/cpu/math/CMakeLists.txt
@@ -0,0 +1,76 @@
+if (WIN32)
+    ADD_DEFINITIONS(-D__BUILD_GADGETRON_CPUCORE_MATH__)
+endif (WIN32)
+
+include_directories(
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/image
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/algorithm
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/hostutils
+    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/math
+    ${ARMADILLO_INCLUDE_DIRS} 
+    ${ISMRMRD_INCLUDE_DIR} )
+
+if (MKL_FOUND)
+    INCLUDE_DIRECTORIES( ${MKL_INCLUDE_DIR} )
+    LINK_DIRECTORIES( ${MKL_LIB_DIR} ${MKL_COMPILER_LIB_DIR} )
+endif (MKL_FOUND)
+
+set(cpucore_math_header_files
+    cpucore_math_export.h
+    # hoNDArray_math_util.h
+    hoNDArray_math.h
+    hoNDImage_util.h
+    hoNDImage_util.hxx
+    hoNDImage_util_instantiate.hxx 
+    hoNDArray_linalg.h )
+
+set(cpucore_math_src_files 
+    # hoNDArray_math_util.cpp
+    hoNDImage_util.cpp 
+    hoNDArray_linalg.cpp )
+
+if (ARMADILLO_FOUND)
+
+    set(cpucore_math_header_files 
+        ${cpucore_math_header_files}
+        hoNDArray_reductions.h
+        hoArmadillo.h
+        hoNDArray_elemwise.h
+         )
+
+    set(cpucore_math_src_files 
+        ${cpucore_math_src_files} 
+        hoNDArray_reductions.cpp
+        hoNDArray_elemwise.cpp
+        )
+
+endif (ARMADILLO_FOUND)
+
+add_library(gadgetron_toolbox_cpucore_math ${LIBTYPE} ${cpucore_math_header_files} ${cpucore_math_src_files})
+set_target_properties(gadgetron_toolbox_cpucore_math PROPERTIES VERSION ${GADGETRON_VERSION_STRING} SOVERSION ${GADGETRON_SOVERSION})
+
+if (MKL_FOUND)
+    target_link_libraries(gadgetron_toolbox_cpucore_math 
+        gadgetron_toolbox_cpucore
+        ${ARMADILLO_LIBRARIES}
+        ${ISMRMRD_LIBRARIES}
+        ${FFTW3_LIBRARIES} 
+        ${MKL_LIBRARIES}
+    )
+else (MKL_FOUND)
+    target_link_libraries(gadgetron_toolbox_cpucore_math 
+        gadgetron_toolbox_cpucore
+        ${ARMADILLO_LIBRARIES}
+        ${ISMRMRD_LIBRARIES}
+        ${BLAS_LIBRARIES}
+        ${LAPACK_LIBRARIES}
+        ${FFTW3_LIBRARIES}
+    )
+endif (MKL_FOUND)
+
+install(TARGETS gadgetron_toolbox_cpucore_math DESTINATION lib COMPONENT main)
+
+install(FILES 
+    ${cpucore_math_header_files}
+    DESTINATION include COMPONENT main)
diff --git a/toolboxes/core/cpu/math/cpucore_math_export.h b/toolboxes/core/cpu/math/cpucore_math_export.h
new file mode 100644
index 0000000..1ac090a
--- /dev/null
+++ b/toolboxes/core/cpu/math/cpucore_math_export.h
@@ -0,0 +1,22 @@
+/** \file cpucore_math_export.h
+    \brief Required definitions for Windows, importing/exporting dll symbols 
+*/
+
+#ifndef CPUCORE_MATH_EXPORT_H_
+#define CPUCORE_MATH_EXPORT_H_
+
+#if defined (WIN32)
+    #ifdef BUILD_TOOLBOX_STATIC
+        #define EXPORTCPUCOREMATH
+    #else
+        #if defined (__BUILD_GADGETRON_CPUCORE_MATH__) || defined (gadgetron_toolbox_cpucore_math_EXPORTS)
+            #define EXPORTCPUCOREMATH __declspec(dllexport)
+        #else
+            #define EXPORTCPUCOREMATH __declspec(dllimport)
+        #endif
+    #endif
+#else
+#define EXPORTCPUCOREMATH
+#endif
+
+#endif /* CPUCORE_MATH_EXPORT_H_ */
diff --git a/toolboxes/core/cpu/math/hoArmadillo.h b/toolboxes/core/cpu/math/hoArmadillo.h
new file mode 100644
index 0000000..87d65bd
--- /dev/null
+++ b/toolboxes/core/cpu/math/hoArmadillo.h
@@ -0,0 +1,89 @@
+#pragma once
+#define ARMA_64BIT_WORD
+#include "hoNDArray.h"
+
+#ifdef USE_ARMADILLO
+
+#include <armadillo>
+
+/** \file hoArmadillo.h
+\brief Utilities to create an Armadillo matrix or column vector from an hoNDArray.
+
+Utilities to create an Armadillo matrix or column vector from an hoNDArray.
+A helper function that creates an hoNDArray from an Armadillo matrix or vector is deliberatly omitted:
+The reccomended approach to using Armadillo's functionality and providing an hoNDArray of the result is 
+1) create an hoNDArray to hold the result, 
+2) convert this array to an Armadillo matrix or vector using the utilities provided in this header,
+3) assign the desired Armadillo computation to this array.
+This approach ensures that the Gadgetron -- and not Armadillo -- is responsible for subsequent memory handling.
+We refer to hoNDArray_math.h for some specific examples on how to use this Armadillo interface.
+*/
+
+namespace Gadgetron{
+
+  /**
+   * @brief Creates an Armadillo matrix from a two-dimensional hoNDArray.
+   * @param[in] x Input array.
+   * @return An Armadillo array mapped to the data pointer of the hoNDArray.
+   */
+  template<class T> arma::Mat<typename stdType<T>::Type> as_arma_matrix( hoNDArray<T> *x )
+  {
+    if( x->get_number_of_dimensions() != 2 )
+      throw std::runtime_error("Wrong number of dimensions. Cannot convert hoNDArray to matrix");
+    return arma::Mat<typename stdType<T>::Type>( (typename stdType<T>::Type*) x->get_data_ptr(), x->get_size(0), x->get_size(1), false, true );
+  }
+
+  /**
+   * @brief Creates an Armadillo matrix from a two-dimensional hoNDArray.
+   * @param[in] x Input array.
+   * @return An Armadillo array mapped to the data pointer of the hoNDArray.
+   */
+  template<class T> const arma::Mat<typename stdType<T>::Type> as_arma_matrix( const hoNDArray<T> *x )
+  {
+    if( x->get_number_of_dimensions() != 2 )
+      throw std::runtime_error("Wrong number of dimensions. Cannot convert hoNDArray to matrix");
+    return arma::Mat<typename stdType<T>::Type>( (typename stdType<T>::Type*) x->get_data_ptr(), x->get_size(0), x->get_size(1), false, true );
+  }
+  
+  /**
+   * @brief Creates an Armadillo column vector from an arbitrary-dimensional hoNDArray.
+   * @param[in] x Input array.
+   * @return An Armadillo array mapped to the data pointer of the hoNDArray.
+   */
+  template<class T> arma::Col<typename stdType<T>::Type > as_arma_col( hoNDArray<T> *x )
+  {
+    return arma::Col<typename stdType<T>::Type>( (typename stdType<T>::Type*) x->get_data_ptr(), x->get_number_of_elements(), false, true );
+  }
+
+  /**
+   * @brief Creates an Armadillo column vector from an arbitrary-dimensional hoNDArray.
+   * @param[in] x Input array.
+   * @return An Armadillo array mapped to the data pointer of the hoNDArray.
+   */
+  template<class T> const arma::Col<typename stdType<T>::Type > as_arma_col( const hoNDArray<T> *x )
+  {
+    return arma::Col<typename stdType<T>::Type>( (typename stdType<T>::Type*) x->get_data_ptr(), x->get_number_of_elements(), false, true );
+  }
+
+  /**
+     * @brief Creates an Armadillo row vector from an arbitrary-dimensional hoNDArray.
+     * @param[in] x Input array.
+     * @return An Armadillo array mapped to the data pointer of the hoNDArray.
+     */
+    template<class T> arma::Row<typename stdType<T>::Type > as_arma_row( hoNDArray<T> *x )
+    {
+      return arma::Row<typename stdType<T>::Type>( (typename stdType<T>::Type*) x->get_data_ptr(), x->get_number_of_elements(), false, true );
+    }
+
+    /**
+     * @brief Creates an Armadillo row vector from an arbitrary-dimensional hoNDArray.
+     * @param[in] x Input array.
+     * @return An Armadillo array mapped to the data pointer of the hoNDArray.
+     */
+    template<class T> const arma::Row<typename stdType<T>::Type > as_arma_row( const hoNDArray<T> *x )
+    {
+      return arma::Row<typename stdType<T>::Type>( (typename stdType<T>::Type*) x->get_data_ptr(), x->get_number_of_elements(), false, true );
+    }
+}
+
+#endif // USE_ARMADILLO
diff --git a/toolboxes/core/cpu/math/hoNDArray_elemwise.cpp b/toolboxes/core/cpu/math/hoNDArray_elemwise.cpp
new file mode 100644
index 0000000..532555d
--- /dev/null
+++ b/toolboxes/core/cpu/math/hoNDArray_elemwise.cpp
@@ -0,0 +1,3158 @@
+#include "hoNDArray_elemwise.h"
+#include "hoNDArray_reductions.h"
+#include "complext.h"
+#include "hoArmadillo.h"
+
+#ifdef USE_OMP
+    #include <omp.h>
+#endif
+
+#ifndef lapack_int
+    #define lapack_int int
+#endif // lapack_int
+
+#ifndef lapack_complex_float
+    #define lapack_complex_float  std::complex<float> 
+#endif // lapack_complex_float
+
+#ifndef lapack_complex_double
+    #define lapack_complex_double  std::complex<double> 
+#endif // #ifndef lapack_complex_double
+
+#define NumElementsUseThreading 64*1024
+
+namespace Gadgetron{
+
+    // --------------------------------------------------------------------------------
+
+    inline void add(size_t N, const float* x, const float* y, float* r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, r, x, y) if(N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; ++n)
+        {
+            r[n] = x[n] + y[n];
+        }
+    }
+
+    inline void add(size_t N, const double* x, const double* y, double* r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, r, x, y) if(N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; ++n)
+        {
+            r[n] = x[n] + y[n];
+        }
+    }
+
+    inline void add(size_t N, const  std::complex<float> * x, const  std::complex<float> * y,  std::complex<float> * r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, r, x, y) if(N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; ++n)
+        {
+            const  std::complex<float> & vx = x[n];
+            const float re1 = vx.real();
+            const float im1 = vx.imag();
+
+            const  std::complex<float> & vy = y[n];
+            const float re2 = vy.real();
+            const float im2 = vy.imag();
+
+            reinterpret_cast<float(&)[2]>(r[n])[0] = re1 + re2;
+            reinterpret_cast<float(&)[2]>(r[n])[1] = im1 + im2;
+        }
+    }
+
+    inline void add(size_t N, const  std::complex<double> * x, const  std::complex<double> * y,  std::complex<double> * r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, r, x, y) if(N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; ++n)
+        {
+            const  std::complex<double> & vx = x[n];
+            const double re1 = vx.real();
+            const double im1 = vx.imag();
+
+            const  std::complex<double> & vy = y[n];
+            const double re2 = vy.real();
+            const double im2 = vy.imag();
+
+            reinterpret_cast<double(&)[2]>(r[n])[0] = re1 + re2;
+            reinterpret_cast<double(&)[2]>(r[n])[1] = im1 + im2;
+        }
+    }
+
+    template <typename T> 
+    void add(size_t N, const  complext<T>* x, const complext<T>* y, complext<T>* r)
+    {
+        add(N, reinterpret_cast< const std::complex<T>* >(x), reinterpret_cast< const std::complex<T>* >(y), reinterpret_cast< std::complex<T>* >(r));
+    }
+
+    template <typename T> 
+    void add(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r)
+    {
+        GADGET_DEBUG_CHECK_THROW(x.get_number_of_elements()==y.get_number_of_elements());
+        if ( r.get_number_of_elements()!=x.get_number_of_elements())
+        {
+            r = x;
+        }
+
+        add(x.get_number_of_elements(), x.begin(), y.begin(), r.begin());
+    }
+
+    template EXPORTCPUCOREMATH void add(const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& r);
+    template EXPORTCPUCOREMATH void add(const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& r);
+    template EXPORTCPUCOREMATH void add(const hoNDArray< std::complex<float> >& x, const hoNDArray< std::complex<float> >& y, hoNDArray< std::complex<float> >& r);
+    template EXPORTCPUCOREMATH void add(const hoNDArray< std::complex<double> >& x, const hoNDArray< std::complex<double> >& y, hoNDArray< std::complex<double> >& r);
+    template EXPORTCPUCOREMATH void add(const hoNDArray< complext<float> >& x, const hoNDArray< complext<float> >& y, hoNDArray< complext<float> >& r);
+    template EXPORTCPUCOREMATH void add(const hoNDArray< complext<double> >& x, const hoNDArray< complext<double> >& y, hoNDArray< complext<double> >& r);
+
+    template <typename T> 
+    void add(size_t N, const std::complex<T>* x, const T* y, std::complex<T>* r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, r, x, y) if(N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; ++n)
+        {
+            const  std::complex<T> & vx = x[n];
+            const T re1 = vx.real();
+            const T im1 = vx.imag();
+
+            reinterpret_cast<T(&)[2]>(r[n])[0] = re1 + y[n];
+            reinterpret_cast<T(&)[2]>(r[n])[1] = im1;
+        }
+    }
+
+    template <typename T> 
+    void add(size_t N, const complext<T>* x, const T* y, complext<T>* r)
+    {
+        add(N, reinterpret_cast< const std::complex<T>* >(x), y, reinterpret_cast< std::complex<T>* >(r));
+    }
+
+    template <typename T> 
+    void add(const hoNDArray< std::complex<T> >& x, const hoNDArray<T>& y, hoNDArray< std::complex<T> >& r)
+    {
+        GADGET_DEBUG_CHECK_THROW(x.get_number_of_elements()==y.get_number_of_elements());
+        if ( r.get_number_of_elements()!=x.get_number_of_elements())
+        {
+            r = x;
+        }
+
+        size_t N = x.get_number_of_elements();
+
+        const std::complex<T>* pX = x.begin();
+        const T* pY = y.begin();
+        std::complex<T>* pR = r.begin();
+
+        add(N, pX, pY, pR );
+    }
+
+    template EXPORTCPUCOREMATH void add(const hoNDArray< std::complex<float> >& x, const hoNDArray< float >& y, hoNDArray< std::complex<float> >& r);
+    template EXPORTCPUCOREMATH void add(const hoNDArray< std::complex<double> >& x, const hoNDArray< double >& y, hoNDArray< std::complex<double> >& r);
+
+    template <typename T> 
+    void add(const hoNDArray< complext<T> >& x, const hoNDArray<T>& y, hoNDArray< complext<T> >& r)
+    {
+        GADGET_DEBUG_CHECK_THROW(x.get_number_of_elements()==y.get_number_of_elements());
+        if ( r.get_number_of_elements()!=x.get_number_of_elements())
+        {
+            r = x;
+        }
+
+        size_t N = x.get_number_of_elements();
+
+        const complext<T>* pX = x.begin();
+        const T* pY = y.begin();
+        complext<T>* pR = r.begin();
+
+        add(N, pX, pY, pR );
+    }
+
+    template EXPORTCPUCOREMATH void add(const hoNDArray< complext<float> >& x, const hoNDArray< float >& y, hoNDArray< complext<float> >& r);
+    template EXPORTCPUCOREMATH void add(const hoNDArray< complext<double> >& x, const hoNDArray< double >& y, hoNDArray< complext<double> >& r);
+
+    // --------------------------------------------------------------------------------
+
+    inline void subtract(size_t N, const float* x, const float* y, float* r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, r, x, y) if(N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; ++n)
+        {
+            r[n] = x[n] - y[n];
+        }
+    }
+
+    inline void subtract(size_t N, const double* x, const double* y, double* r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, r, x, y) if(N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; ++n)
+        {
+            r[n] = x[n] - y[n];
+        }
+    }
+
+    inline void subtract(size_t N, const  std::complex<float> * x, const  std::complex<float> * y,  std::complex<float> * r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, r, x, y) if(N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; ++n)
+        {
+            const  std::complex<float> & vx = x[n];
+            const float re1 = vx.real();
+            const float im1 = vx.imag();
+
+            const  std::complex<float> & vy = y[n];
+            const float re2 = vy.real();
+            const float im2 = vy.imag();
+
+            reinterpret_cast<float(&)[2]>(r[n])[0] = re1 - re2;
+            reinterpret_cast<float(&)[2]>(r[n])[1] = im1 - im2;
+        }
+    }
+
+    inline void subtract(size_t N, const  std::complex<double> * x, const  std::complex<double> * y,  std::complex<double> * r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, r, x, y) if(N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; ++n)
+        {
+            const  std::complex<double> & vx = x[n];
+            const double re1 = vx.real();
+            const double im1 = vx.imag();
+
+            const  std::complex<double> & vy = y[n];
+            const double re2 = vy.real();
+            const double im2 = vy.imag();
+
+            reinterpret_cast<double(&)[2]>(r[n])[0] = re1 - re2;
+            reinterpret_cast<double(&)[2]>(r[n])[1] = im1 - im2;
+        }
+    }
+
+    template <typename T> 
+    void subtract(size_t N, const  complext<T>* x, const complext<T>* y, complext<T>* r)
+    {
+        subtract(N, reinterpret_cast< const std::complex<T>* >(x), reinterpret_cast< const std::complex<T>* >(y), reinterpret_cast< std::complex<T>* >(r));
+    }
+
+    template <typename T> 
+    void subtract(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r)
+    {
+        GADGET_DEBUG_CHECK_THROW(x.get_number_of_elements()==y.get_number_of_elements());
+        if ( r.get_number_of_elements()!=x.get_number_of_elements())
+        {
+            r = x;
+        }
+
+        subtract(x.get_number_of_elements(), x.begin(), y.begin(), r.begin());
+    }
+
+    template EXPORTCPUCOREMATH void subtract(const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& r);
+    template EXPORTCPUCOREMATH void subtract(const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& r);
+    template EXPORTCPUCOREMATH void subtract(const hoNDArray< std::complex<float> >& x, const hoNDArray< std::complex<float> >& y, hoNDArray< std::complex<float> >& r);
+    template EXPORTCPUCOREMATH void subtract(const hoNDArray< std::complex<double> >& x, const hoNDArray< std::complex<double> >& y, hoNDArray< std::complex<double> >& r);
+    template EXPORTCPUCOREMATH void subtract(const hoNDArray< complext<float> >& x, const hoNDArray< complext<float> >& y, hoNDArray< complext<float> >& r);
+    template EXPORTCPUCOREMATH void subtract(const hoNDArray< complext<double> >& x, const hoNDArray< complext<double> >& y, hoNDArray< complext<double> >& r);
+
+    template <typename T> 
+    void subtract(size_t N, const std::complex<T>* x, const T* y, std::complex<T>* r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, r, x, y) if(N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; ++n)
+        {
+            const  std::complex<T> & vx = x[n];
+            const T re1 = vx.real();
+            const T im1 = vx.imag();
+
+            reinterpret_cast<T(&)[2]>(r[n])[0] = re1 - y[n];
+            reinterpret_cast<T(&)[2]>(r[n])[1] = im1;
+        }
+    }
+
+    template <typename T> 
+    void subtract(size_t N, const complext<T>* x, const T* y, complext<T>* r)
+    {
+        subtract(N, reinterpret_cast< const std::complex<T>* >(x), y, reinterpret_cast< std::complex<T>* >(r));
+    }
+
+    template <typename T> 
+    void subtract(const hoNDArray< std::complex<T> >& x, const hoNDArray<T>& y, hoNDArray< std::complex<T> >& r)
+    {
+        GADGET_DEBUG_CHECK_THROW(x.get_number_of_elements()==y.get_number_of_elements());
+        if ( r.get_number_of_elements()!=x.get_number_of_elements())
+        {
+            r = x;
+        }
+
+        size_t N = x.get_number_of_elements();
+
+        const std::complex<T>* pX = x.begin();
+        const T* pY = y.begin();
+        std::complex<T>* pR = r.begin();
+
+        subtract(N, pX, pY, pR );
+    }
+
+    template EXPORTCPUCOREMATH void subtract(const hoNDArray< std::complex<float> >& x, const hoNDArray< float >& y, hoNDArray< std::complex<float> >& r);
+    template EXPORTCPUCOREMATH void subtract(const hoNDArray< std::complex<double> >& x, const hoNDArray< double >& y, hoNDArray< std::complex<double> >& r);
+
+    template <typename T> 
+    void subtract(const hoNDArray< complext<T> >& x, const hoNDArray<T>& y, hoNDArray< complext<T> >& r)
+    {
+        GADGET_DEBUG_CHECK_THROW(x.get_number_of_elements()==y.get_number_of_elements());
+        if ( r.get_number_of_elements()!=x.get_number_of_elements())
+        {
+            r = x;
+        }
+
+        size_t N = x.get_number_of_elements();
+
+        const complext<T>* pX = x.begin();
+        const T* pY = y.begin();
+        complext<T>* pR = r.begin();
+
+        subtract(N, pX, pY, pR );
+    }
+
+    template EXPORTCPUCOREMATH void subtract(const hoNDArray< complext<float> >& x, const hoNDArray< float >& y, hoNDArray< complext<float> >& r);
+    template EXPORTCPUCOREMATH void subtract(const hoNDArray< complext<double> >& x, const hoNDArray< double >& y, hoNDArray< complext<double> >& r);
+
+    // --------------------------------------------------------------------------------
+
+    template <typename T> 
+    inline void multiply(size_t N, const T* x, const T* y, T* r)
+    {
+        long long n;
+        #pragma omp parallel for default(none) private(n) shared(N, x, y, r) if (N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; n++ )
+        {
+            const T& a = x[n];
+            const T& b = y[n];
+            r[n] = a*b;
+        }
+    }
+
+    inline void multiply(size_t N, const  std::complex<float> * x, const  std::complex<float> * y,  std::complex<float> * r)
+    {
+        long long n;
+        #pragma omp parallel for default(none) private(n) shared(N, x, y, r) if (N>NumElementsUseThreading)
+        for (n = 0; n < (long long)N; n++)
+        {
+            const std::complex<float>& a1 = x[n];
+            const std::complex<float>& b1 = y[n];
+            const float a = a1.real();
+            const float b = a1.imag();
+            const float c = b1.real();
+            const float d = b1.imag();
+
+            reinterpret_cast<float(&)[2]>(r[n])[0] = a*c-b*d;
+            reinterpret_cast<float(&)[2]>(r[n])[1] = a*d+b*c;
+        }
+    }
+
+    inline void multiply(size_t N, const  std::complex<double> * x, const  std::complex<double> * y,  std::complex<double> * r)
+    {
+        long long n;
+        #pragma omp parallel for default(none) private(n) shared(N, x, y, r) if (N>NumElementsUseThreading)
+        for (n = 0; n < (long long)N; n++)
+        {
+            const std::complex<double>& a1 = x[n];
+            const std::complex<double>& b1 = y[n];
+            const double a = a1.real();
+            const double b = a1.imag();
+            const double c = b1.real();
+            const double d = b1.imag();
+
+            reinterpret_cast<double(&)[2]>(r[n])[0] = a*c-b*d;
+            reinterpret_cast<double(&)[2]>(r[n])[1] = a*d+b*c;
+        }
+    }
+
+    template <typename T> 
+    void multiply(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r)
+    {
+        GADGET_DEBUG_CHECK_THROW(x.get_number_of_elements()==y.get_number_of_elements());
+        if ( r.get_number_of_elements()!=x.get_number_of_elements())
+        {
+            r = x;
+        }
+
+        multiply(x.get_number_of_elements(), x.begin(), y.begin(), r.begin());
+    }
+
+    template EXPORTCPUCOREMATH void multiply(const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& r);
+    template EXPORTCPUCOREMATH void multiply(const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& r);
+    template EXPORTCPUCOREMATH void multiply(const hoNDArray< std::complex<float> >& x, const hoNDArray< std::complex<float> >& y, hoNDArray< std::complex<float> >& r);
+    template EXPORTCPUCOREMATH void multiply(const hoNDArray< std::complex<double> >& x, const hoNDArray< std::complex<double> >& y, hoNDArray< std::complex<double> >& r);
+
+    template <typename T> 
+    void multiply(size_t N, const std::complex<T>* x, const T* y, std::complex<T>* r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, r, x, y) if(N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; ++n)
+        {
+            const  std::complex<T> & vx = x[n];
+            const T re1 = vx.real();
+            const T im1 = vx.imag();
+
+            reinterpret_cast<T(&)[2]>(r[n])[0] = re1*y[n];
+            reinterpret_cast<T(&)[2]>(r[n])[1] = im1*y[n];
+        }
+    }
+
+    template <typename T> 
+    void multiply(size_t N, const complext<T>* x, const T* y, complext<T>* r)
+    {
+        multiply(N, reinterpret_cast< const std::complex<T>* >(x), y, reinterpret_cast< std::complex<T>* >(r));
+    }
+
+    template <typename T> 
+    void multiply(const hoNDArray< std::complex<T> >& x, const hoNDArray<T>& y, hoNDArray< std::complex<T> >& r)
+    {
+        GADGET_DEBUG_CHECK_THROW(x.get_number_of_elements()==y.get_number_of_elements());
+        if ( r.get_number_of_elements()!=x.get_number_of_elements())
+        {
+            r = x;
+        }
+
+        size_t N = x.get_number_of_elements();
+
+        const std::complex<T>* pX = x.begin();
+        const T* pY = y.begin();
+        std::complex<T>* pR = r.begin();
+
+        multiply(N, pX, pY, pR );
+    }
+
+    template EXPORTCPUCOREMATH void multiply(const hoNDArray< std::complex<float> >& x, const hoNDArray< float >& y, hoNDArray< std::complex<float> >& r);
+    template EXPORTCPUCOREMATH void multiply(const hoNDArray< std::complex<double> >& x, const hoNDArray< double >& y, hoNDArray< std::complex<double> >& r);
+
+    template <typename T> 
+    void multiply(const hoNDArray< complext<T> >& x, const hoNDArray<T>& y, hoNDArray< complext<T> >& r)
+    {
+        GADGET_DEBUG_CHECK_THROW(x.get_number_of_elements()==y.get_number_of_elements());
+        if ( r.get_number_of_elements()!=x.get_number_of_elements())
+        {
+            r = x;
+        }
+
+        size_t N = x.get_number_of_elements();
+
+        const complext<T>* pX = x.begin();
+        const T* pY = y.begin();
+        complext<T>* pR = r.begin();
+
+        multiply(N, pX, pY, pR );
+    }
+
+    template EXPORTCPUCOREMATH void multiply(const hoNDArray< complext<float> >& x, const hoNDArray< float >& y, hoNDArray< complext<float> >& r);
+    template EXPORTCPUCOREMATH void multiply(const hoNDArray< complext<double> >& x, const hoNDArray< double >& y, hoNDArray< complext<double> >& r);
+
+    // --------------------------------------------------------------------------------
+
+    template <typename T> 
+    inline void divide(size_t N, const T* x, const T* y, T* r)
+    {
+        long long n;
+        #pragma omp parallel for default(none) private(n) shared(N, x, y, r) if (N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; n++ )
+        {
+            const T& a = x[n];
+            const T& b = y[n];
+            r[n] = a/b;
+        }
+    }
+
+    inline void divide(size_t N, const  std::complex<float> * x, const  std::complex<float> * y,  std::complex<float> * r)
+    {
+        long long n;
+        #pragma omp parallel for default(none) private(n) shared(N, x, y, r) if (N>NumElementsUseThreading)
+        for (n = 0; n < (long long)N; n++)
+        {
+            const std::complex<float>& a1 = x[n];
+            const std::complex<float>& b1 = y[n];
+            const float a = a1.real();
+            const float b = a1.imag();
+            const float c = b1.real();
+            const float d = b1.imag();
+
+            const float m = 1/(c*c+d*d);
+
+            reinterpret_cast<float(&)[2]>(r[n])[0] = (a*c+b*d)*m;
+            reinterpret_cast<float(&)[2]>(r[n])[1] = (b*c-a*d)*m;
+        }
+    }
+
+    inline void divide(size_t N, const  std::complex<double> * x, const  std::complex<double> * y,  std::complex<double> * r)
+    {
+        long long n;
+        #pragma omp parallel for default(none) private(n) shared(N, x, y, r) if (N>NumElementsUseThreading)
+        for (n = 0; n < (long long)N; n++)
+        {
+            const std::complex<double>& a1 = x[n];
+            const std::complex<double>& b1 = y[n];
+            const double a = a1.real();
+            const double b = a1.imag();
+            const double c = b1.real();
+            const double d = b1.imag();
+
+            const double m = 1/(c*c+d*d);
+
+            reinterpret_cast<double(&)[2]>(r[n])[0] = (a*c+b*d)*m;
+            reinterpret_cast<double(&)[2]>(r[n])[1] = (b*c-a*d)*m;
+        }
+    }
+
+    template <typename T> 
+    void divide(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r)
+    {
+        GADGET_DEBUG_CHECK_THROW(x.get_number_of_elements()==y.get_number_of_elements());
+        if ( r.get_number_of_elements()!=x.get_number_of_elements())
+        {
+            r = x;
+        }
+
+        divide(x.get_number_of_elements(), x.begin(), y.begin(), r.begin());
+    }
+
+    template EXPORTCPUCOREMATH void divide(const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& r);
+    template EXPORTCPUCOREMATH void divide(const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& r);
+    template EXPORTCPUCOREMATH void divide(const hoNDArray< std::complex<float> >& x, const hoNDArray< std::complex<float> >& y, hoNDArray< std::complex<float> >& r);
+    template EXPORTCPUCOREMATH void divide(const hoNDArray< std::complex<double> >& x, const hoNDArray< std::complex<double> >& y, hoNDArray< std::complex<double> >& r);
+
+    template <typename T> 
+    void divide(size_t N, const std::complex<T>* x, const T* y, std::complex<T>* r)
+    {
+        long long n;
+
+        #pragma omp parallel for private(n) shared(N, r, x, y) if(N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; ++n)
+        {
+            const  std::complex<T> & vx = x[n];
+            const T re1 = vx.real();
+            const T im1 = vx.imag();
+
+            T v = T(1)/y[n];
+
+            reinterpret_cast<T(&)[2]>(r[n])[0] = re1*v;
+            reinterpret_cast<T(&)[2]>(r[n])[1] = im1*v;
+        }
+    }
+
+    template <typename T> 
+    void divide(size_t N, const complext<T>* x, const T* y, complext<T>* r)
+    {
+        divide(N, reinterpret_cast< const std::complex<T>* >(x), y, reinterpret_cast< std::complex<T>* >(r));
+    }
+
+    template <typename T> 
+    void divide(const hoNDArray< std::complex<T> >& x, const hoNDArray<T>& y, hoNDArray< std::complex<T> >& r)
+    {
+        GADGET_DEBUG_CHECK_THROW(x.get_number_of_elements()==y.get_number_of_elements());
+        if ( r.get_number_of_elements()!=x.get_number_of_elements())
+        {
+            r = x;
+        }
+
+        size_t N = x.get_number_of_elements();
+
+        const std::complex<T>* pX = x.begin();
+        const T* pY = y.begin();
+        std::complex<T>* pR = r.begin();
+
+        divide(N, pX, pY, pR );
+    }
+
+    template EXPORTCPUCOREMATH void divide(const hoNDArray< std::complex<float> >& x, const hoNDArray< float >& y, hoNDArray< std::complex<float> >& r);
+    template EXPORTCPUCOREMATH void divide(const hoNDArray< std::complex<double> >& x, const hoNDArray< double >& y, hoNDArray< std::complex<double> >& r);
+
+    template <typename T> 
+    void divide(const hoNDArray< complext<T> >& x, const hoNDArray<T>& y, hoNDArray< complext<T> >& r)
+    {
+        GADGET_DEBUG_CHECK_THROW(x.get_number_of_elements()==y.get_number_of_elements());
+        if ( r.get_number_of_elements()!=x.get_number_of_elements())
+        {
+            r = x;
+        }
+
+        size_t N = x.get_number_of_elements();
+
+        const complext<T>* pX = x.begin();
+        const T* pY = y.begin();
+        complext<T>* pR = r.begin();
+
+        divide(N, pX, pY, pR );
+    }
+
+    template EXPORTCPUCOREMATH void divide(const hoNDArray< complext<float> >& x, const hoNDArray< float >& y, hoNDArray< complext<float> >& r);
+    template EXPORTCPUCOREMATH void divide(const hoNDArray< complext<double> >& x, const hoNDArray< double >& y, hoNDArray< complext<double> >& r);
+
+    // --------------------------------------------------------------------------------
+
+    inline void multiplyConj(size_t N, const  std::complex<float> * x, const  std::complex<float> * y,  std::complex<float> * r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, x, y, r) if (N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; n++ )
+        {
+            const float a = x[n].real();
+            const float b = x[n].imag();
+            const float c = y[n].real();
+            const float d = y[n].imag();
+
+            reinterpret_cast<float(&)[2]>(r[n])[0] = (a*c + b*d);
+            reinterpret_cast<float(&)[2]>(r[n])[1] = (c*b - a*d);
+        }
+    }
+
+    inline void multiplyConj(size_t N, const  std::complex<double> * x, const  std::complex<double> * y,  std::complex<double> * r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, x, y, r) if (N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; n++ )
+        {
+            const double a = x[n].real();
+            const double b = x[n].imag();
+            const double c = y[n].real();
+            const double d = y[n].imag();
+
+            reinterpret_cast<double(&)[2]>(r[n])[0] = (a*c + b*d);
+            reinterpret_cast<double(&)[2]>(r[n])[1] = (c*b - a*d);
+        }
+    }
+
+    template <typename T> 
+    void multiplyConj(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r)
+    {
+        GADGET_DEBUG_CHECK_THROW(x.get_number_of_elements()==y.get_number_of_elements());
+        if ( r.get_number_of_elements()!=x.get_number_of_elements())
+        {
+            r = x;
+        }
+
+        multiplyConj(x.get_number_of_elements(), x.begin(), y.begin(), r.begin());
+    }
+
+    template EXPORTCPUCOREMATH void multiplyConj(const hoNDArray< std::complex<float> >& x, const hoNDArray< std::complex<float> >& y, hoNDArray< std::complex<float> >& r);
+    template EXPORTCPUCOREMATH void multiplyConj(const hoNDArray< std::complex<double> >& x, const hoNDArray< std::complex<double> >& y, hoNDArray< std::complex<double> >& r);
+
+    // --------------------------------------------------------------------------------
+
+    inline void conjugate(size_t N, const  std::complex<float> * x,  std::complex<float> * r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, x, r) if (N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; n++ )
+        {
+            reinterpret_cast<float(&)[2]>(r[n])[0] = reinterpret_cast< const float(&)[2]>(x[n])[0];
+            reinterpret_cast<float(&)[2]>(r[n])[1] = -(reinterpret_cast< const float(&)[2]>(x[n])[1]);
+        }
+    }
+
+    inline void conjugate(size_t N, const  std::complex<double> * x,  std::complex<double> * r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, x, r) if (N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; n++ )
+        {
+            reinterpret_cast<double(&)[2]>(r[n])[0] = reinterpret_cast< const double(&)[2]>(x[n])[0];
+            reinterpret_cast<double(&)[2]>(r[n])[1] = -(reinterpret_cast<const double(&)[2]>(x[n])[1]);
+        }
+    }
+
+    template <typename T> 
+    void conjugate(const hoNDArray<T>& x, hoNDArray<T>& r)
+    {
+        if ( r.get_number_of_elements()!=x.get_number_of_elements())
+        {
+            r.create(x.get_dimensions());
+        }
+
+        conjugate(x.get_number_of_elements(), x.begin(), r.begin());
+    }
+
+    template EXPORTCPUCOREMATH void conjugate(const hoNDArray< std::complex<float> >& x, hoNDArray< std::complex<float> >& r);
+    template EXPORTCPUCOREMATH void conjugate(const hoNDArray< std::complex<double> >& x, hoNDArray< std::complex<double> >& r);
+
+    // --------------------------------------------------------------------------------
+
+    template <typename T> 
+    inline void addEpsilon(size_t N, T* x)
+    {
+        typename realType<T>::Type eps = std::numeric_limits<typename realType<T>::Type>::epsilon();
+
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, x, eps) if (N>NumElementsUseThreading)
+        for (n=0; n<(long long)N; n++ )
+        {
+            if ( std::abs(x[n]) < eps )
+            {
+                x[n] += eps;
+            }
+        }
+    }
+
+    inline void addEpsilon(size_t N,  std::complex<float> * x)
+    {
+        const float eps = std::numeric_limits<float>::epsilon();
+
+        long long n;
+
+        #pragma omp parallel for private(n) if (N>NumElementsUseThreading)
+        for (n=0; n<(long long)N; n++ )
+        {
+            if ( std::abs(x[n]) < eps )
+            {
+                reinterpret_cast<float(&)[2]>(x[n])[0] += eps;
+            }
+        }
+    }
+
+    inline void addEpsilon(size_t N,  std::complex<double> * x)
+    {
+        const double eps = std::numeric_limits<double>::epsilon();
+
+        long long n;
+
+        #pragma omp parallel for private(n) if (N>NumElementsUseThreading)
+        for (n=0; n<(long long)N; n++ )
+        {
+            if ( std::abs(x[n]) < eps )
+            {
+                reinterpret_cast<double(&)[2]>(x[n])[0] += eps;
+            }
+        }
+    }
+
+    template <typename T> 
+    void addEpsilon(hoNDArray<T>& x)
+    {
+        addEpsilon(x.get_number_of_elements(), x.begin());
+    }
+
+    template EXPORTCPUCOREMATH void addEpsilon(hoNDArray<float>& x);
+    template EXPORTCPUCOREMATH void addEpsilon(hoNDArray<double>& x);
+    template EXPORTCPUCOREMATH void addEpsilon(hoNDArray< std::complex<float> >& x);
+    template EXPORTCPUCOREMATH void addEpsilon(hoNDArray< std::complex<double> >& x);
+
+    // --------------------------------------------------------------------------------
+
+    template <typename T> 
+    void argument(const hoNDArray<T>& x, hoNDArray<typename realType<T>::Type>& r)
+    {
+        if ( r.get_number_of_elements()!=x.get_number_of_elements())
+        {
+            r.create(x.get_dimensions());
+        }
+
+        size_t N = x.get_number_of_elements();
+        const T* pX = x.begin();
+        typename realType<T>::Type* pR = r.begin();
+
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, pX, pR) if (N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; n++ )
+        {
+            pR[n] = std::arg( pX[n] );
+        }
+    }
+
+    template EXPORTCPUCOREMATH void argument(const hoNDArray< std::complex<float> >& x, hoNDArray<float>& r);
+    template EXPORTCPUCOREMATH void argument(const hoNDArray< std::complex<double> >& x, hoNDArray<double>& r);
+
+    // --------------------------------------------------------------------------------
+
+    template <typename T> 
+    void inv(const hoNDArray<T>& x, hoNDArray<T>& r)
+    {
+        if ( !r.dimensions_equal(&x) )
+        {
+            r = x;
+        }
+
+        size_t N = x.get_number_of_elements();
+        const T* pX = x.begin();
+        T* pR = r.begin();
+
+        T v(1.0);
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, pX, pR, v) if (N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; n++ )
+        {
+            pR[n] = v/pX[n];
+        }
+    }
+
+    template EXPORTCPUCOREMATH void inv(const hoNDArray<float>& x, hoNDArray<float>& r);
+    template EXPORTCPUCOREMATH void inv(const hoNDArray<double>& x, hoNDArray<double>& r);
+    template EXPORTCPUCOREMATH void inv(const hoNDArray< std::complex<float> >& x, hoNDArray< std::complex<float> >& r);
+    template EXPORTCPUCOREMATH void inv(const hoNDArray< std::complex<double> >& x, hoNDArray< std::complex<double> >& r);
+
+    // --------------------------------------------------------------------------------
+
+    template <typename T> 
+    void abs(size_t N, const T* x, typename realType<T>::Type* r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, x, r) if (N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; n++ )
+        {
+            r[n]= GT_ABS(x[n]);
+        }
+    }
+
+    inline void abs(size_t N, const  std::complex<float> * x, float* r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, x, r) if (N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; n++ )
+        {
+            const  std::complex<float> & c = x[n];
+            const float re = c.real();
+            const float im = c.imag();
+            r[n]= std::sqrt( (re*re) + (im * im) );
+        }
+    }
+
+    inline void abs(size_t N, const  std::complex<double> * x, double* r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, x, r) if (N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; n++ )
+        {
+            const  std::complex<double> & c = x[n];
+            const double re = c.real();
+            const double im = c.imag();
+            r[n]= std::sqrt( (re*re) + (im * im) );
+        }
+    }
+
+    void abs(size_t N, const complext<float> * x, float* r)
+    {
+        long long n;
+
+        #pragma omp parallel for private(n) shared(N, x, r) if (N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; n++ )
+        {
+            const  complext<float> & c = x[n];
+            const float re = c.real();
+            const float im = c.imag();
+            r[n]= std::sqrt( (re*re) + (im * im) );
+        }
+    }
+
+    void abs(size_t N, const complext<double> * x, double* r)
+    {
+        long long n;
+
+        #pragma omp parallel for private(n) shared(N, x, r) if (N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; n++ )
+        {
+            const  complext<double> & c = x[n];
+            const double re = c.real();
+            const double im = c.imag();
+            r[n]= std::sqrt( (re*re) + (im * im) );
+        }
+    }
+
+    template <typename T> 
+    void abs(const hoNDArray<T>& x, hoNDArray<typename realType<T>::Type>& r)
+    {
+        if ( r.get_number_of_elements()!=x.get_number_of_elements())
+        {
+            r.create(x.get_dimensions());
+        }
+
+        abs(x.get_number_of_elements(), x.begin(), r.begin());
+    }
+
+    template EXPORTCPUCOREMATH void abs(const hoNDArray<float>& x, hoNDArray<float>& r);
+    template EXPORTCPUCOREMATH void abs(const hoNDArray<double>& x, hoNDArray<double>& r);
+    template EXPORTCPUCOREMATH void abs(const hoNDArray< std::complex<float> >& x, hoNDArray<float>& r);
+    template EXPORTCPUCOREMATH void abs(const hoNDArray< std::complex<double> >& x, hoNDArray<double>& r);
+    template EXPORTCPUCOREMATH void abs(const hoNDArray< complext<float> >& x, hoNDArray<float>& r);
+    template EXPORTCPUCOREMATH void abs(const hoNDArray< complext<double> >& x, hoNDArray<double>& r);
+
+    inline void abs(size_t N, const std::complex<float>* x, std::complex<float>* r)
+    {
+        try
+        {
+            long long n;
+
+            #pragma omp parallel for default(none) private(n) shared(N, x, r) if (N>NumElementsUseThreading)
+            for ( n=0; n<(long long)N; n++ )
+            {
+                const std::complex<float>& c = x[n];
+                const float re = c.real();
+                const float im = c.imag();
+
+                reinterpret_cast<float(&)[2]>(r[n])[0] = std::sqrt( (re*re) + (im * im) );
+                reinterpret_cast<float(&)[2]>(r[n])[1] = 0;
+            }
+        }
+        catch(...)
+        {
+            GADGET_THROW("Error happened in abs(size_t N, const std::complex<float>* x, std::complex<float>* r) ... ");
+        }
+    }
+
+    inline void abs(size_t N, const std::complex<double>* x, std::complex<double>* r)
+    {
+        try
+        {
+            long long n;
+
+            #pragma omp parallel for default(none) private(n) shared(N, x, r) if (N>NumElementsUseThreading)
+            for ( n=0; n<(long long)N; n++ )
+            {
+                const std::complex<double>& c = x[n];
+                const double re = c.real();
+                const double im = c.imag();
+
+                reinterpret_cast<double(&)[2]>(r[n])[0] = std::sqrt( (re*re) + (im * im) );
+                reinterpret_cast<double(&)[2]>(r[n])[1] = 0;
+            }
+        }
+        catch(...)
+        {
+            GADGET_THROW("Error happened in abs(size_t N, const std::complex<double>* x, std::complex<double>* r) ... ");
+        }
+    }
+
+    template <typename T> 
+    void abs(const hoNDArray< std::complex<T> >& x, hoNDArray< std::complex<T> >& r)
+    {
+        if ( r.get_number_of_elements()!=x.get_number_of_elements())
+        {
+            r.create(x.get_dimensions());
+        }
+
+        abs(x.get_number_of_elements(), x.begin(), r.begin());
+    }
+
+    template EXPORTCPUCOREMATH void abs(const hoNDArray< std::complex<float> >& x, hoNDArray< std::complex<float> >& r);
+    template EXPORTCPUCOREMATH void abs(const hoNDArray< std::complex<double> >& x, hoNDArray< std::complex<double> >& r);
+
+    template<class T> boost::shared_ptr< hoNDArray<typename realType<T>::Type> > abs( hoNDArray<T> *x )
+    {
+        if( x == 0x0 )
+            throw std::runtime_error("Gadgetron::abs(): Invalid input array");
+
+        boost::shared_ptr< hoNDArray<typename realType<T>::Type> > result(new hoNDArray<typename realType<T>::Type>());
+        result->create(x->get_dimensions());
+        abs(*x, *result);
+        return result;
+    }
+
+    template<class T> void abs_inplace( hoNDArray<T> *x )
+    {
+        if( x == 0x0 )
+            throw std::runtime_error("Gadgetron::abs_inplace(): Invalid input array");
+
+        abs(*x, *x);
+    }
+
+    template<class T> boost::shared_ptr< hoNDArray<typename realType<T>::Type> > abs_square( hoNDArray<T> *x )
+    {
+        if( x == 0x0 )
+            throw std::runtime_error("Gadgetron::abs_square(): Invalid input array");
+
+        boost::shared_ptr< hoNDArray<typename realType<T>::Type> > result(new hoNDArray<typename realType<T>::Type>());
+        result->create(x->get_dimensions());
+        abs(*x, *result);
+        multiply(*result, *result, *result);
+        return result;
+    }
+
+    // --------------------------------------------------------------------------------
+
+    template <typename T> 
+    void sqrt(const hoNDArray<T>& x, hoNDArray<T>& r)
+    {
+        if ( r.get_number_of_elements()!=x.get_number_of_elements())
+        {
+            r.create(x.get_dimensions());
+        }
+
+        size_t N = x.get_number_of_elements();
+        const T* pX = x.begin();
+        T* pR = r.begin();
+
+        long long n;
+        #pragma omp parallel for default(none) private(n) shared(N, pX, pR) if (N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; n++ )
+        {
+            pR[n] = std::sqrt(pX[n]);
+        }
+    }
+
+    template <typename T> 
+    void sqrt(const hoNDArray< complext<T> >& x, hoNDArray< complext<T> >& r)
+    {
+        if ( r.get_number_of_elements()!=x.get_number_of_elements())
+        {
+            r.create(x.get_dimensions());
+        }
+
+        size_t N = x.get_number_of_elements();
+        const complext<T>* pX = x.begin();
+        complext<T>* pR = r.begin();
+
+        long long n;
+        #pragma omp parallel for default(none) private(n) shared(N, pX, pR) if (N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; n++ )
+        {
+            pR[n] = Gadgetron::sqrt(pX[n]);
+        }
+    }
+
+    template EXPORTCPUCOREMATH void sqrt(const hoNDArray<float>& x, hoNDArray<float>& r);
+    template EXPORTCPUCOREMATH void sqrt(const hoNDArray<double>& x, hoNDArray<double>& r);
+    template EXPORTCPUCOREMATH void sqrt(const hoNDArray< std::complex<float> >& x, hoNDArray< std::complex<float> >& r);
+    template EXPORTCPUCOREMATH void sqrt(const hoNDArray< std::complex<double> >& x, hoNDArray< std::complex<double> >& r);
+    template EXPORTCPUCOREMATH void sqrt(const hoNDArray< complext<float> >& x, hoNDArray< complext<float> >& r);
+    template EXPORTCPUCOREMATH void sqrt(const hoNDArray< complext<double> >& x, hoNDArray< complext<double> >& r);
+
+    template<class T> boost::shared_ptr< hoNDArray<T> > sqrt( hoNDArray<T> *x )
+    {
+        if( x == 0x0 )
+            throw std::runtime_error("Gadgetron::sqrt(): Invalid input array");
+
+        boost::shared_ptr< hoNDArray<T> > result(new hoNDArray<T>());
+        result->create(x->get_dimensions());
+        sqrt(*x, *result);
+        return result;
+    }
+
+    template<class T> void sqrt_inplace( hoNDArray<T> *x )
+    {
+        if( x == 0x0 )
+            throw std::runtime_error("Gadgetron::sqrt_inplace(): Invalid input array");
+
+        sqrt(*x, *x);
+    }
+
+    // --------------------------------------------------------------------------------
+
+    template<class T> boost::shared_ptr< hoNDArray<T> > square( hoNDArray<T> *x )
+    {
+        if( x == 0x0 )
+            throw std::runtime_error("Gadgetron::square(): Invalid input array");
+
+        boost::shared_ptr< hoNDArray<T> > result(new hoNDArray<T>());
+        result->create(x->get_dimensions());
+        /*arma::Col<typename stdType<T>::Type> aRes = as_arma_col(result.get());
+        aRes = arma::square(as_arma_col(x));*/
+        multiply(*x, *x, *result);
+        return result;
+    }
+
+    template<class T> void square_inplace( hoNDArray<T> *x )
+    {
+        if( x == 0x0 )
+            throw std::runtime_error("Gadgetron::square_inplace(): Invalid input array");
+
+        /*arma::Col<typename stdType<T>::Type> aRes = as_arma_col(x);
+        aRes = arma::square(aRes);*/
+
+        multiply(*x, *x, *x);
+    }
+
+    // --------------------------------------------------------------------------------
+
+    template<class T> boost::shared_ptr< hoNDArray<T> > reciprocal( hoNDArray<T> *x )
+    {
+        if( x == 0x0 )
+            throw std::runtime_error("Gadgetron::reciprocal(): Invalid input array");
+
+        /*arma::Col<typename stdType<T>::Type> ones(x->get_number_of_elements());
+        ones.ones();*/
+        boost::shared_ptr< hoNDArray<T> > result(new hoNDArray<T>());
+        result->create(x->get_dimensions());
+        /*arma::Col<typename stdType<T>::Type> aRes = as_arma_col(result.get());
+        aRes = ones/as_arma_col(x);*/
+        inv(*x, *result);
+        return result;
+    }
+
+    template<class T> void reciprocal_inplace( hoNDArray<T> *x )
+    {
+        if( x == 0x0 )
+            throw std::runtime_error("Gadgetron::reciprocal_inplace(): Invalid input array");
+
+        /*arma::Col<typename stdType<T>::Type> aRes = as_arma_col(x);
+        arma::Col<typename stdType<T>::Type> ones(x->get_number_of_elements());
+        ones.ones();
+        aRes = ones/aRes;*/
+
+        inv(*x, *x);
+    }
+
+    template<class T> boost::shared_ptr< hoNDArray<T> > reciprocal_sqrt( hoNDArray<T> *x )
+    {
+        if( x == 0x0 )
+            throw std::runtime_error("Gadgetron::reciprocal_sqrt(): Invalid input array");
+
+        /*arma::Col<typename stdType<T>::Type> ones(x->get_number_of_elements());
+        ones.ones();*/
+        boost::shared_ptr< hoNDArray<T> > result(new hoNDArray<T>());
+        result->create(x->get_dimensions());
+        /*arma::Col<typename stdType<T>::Type> aRes = as_arma_col(result.get());
+        aRes = ones/arma::sqrt(as_arma_col(x));*/
+
+        sqrt(*x, *result);
+        inv(*result, *result);
+        return result;
+    }
+
+    template<class T> void reciprocal_sqrt_inplace( hoNDArray<T> *x )
+    {
+        if( x == 0x0 )
+            throw std::runtime_error("Gadgetron::reciprocal_sqrt_inplace(): Invalid input array");
+
+        /*arma::Col<typename stdType<T>::Type> ones(x->get_number_of_elements());
+        ones.ones();
+        arma::Col<typename stdType<T>::Type> aRes = as_arma_col(x);
+        aRes = ones/arma::sqrt(aRes);*/
+
+        sqrt(*x, *x);
+        inv(*x, *x);
+    }
+
+    // --------------------------------------------------------------------------------
+
+    template<class T> boost::shared_ptr< hoNDArray<T> > sgn( hoNDArray<T> *x )
+    {
+        if( x == 0x0 )
+            throw std::runtime_error("Gadgetron::sgn(): Invalid input array");
+
+        boost::shared_ptr< hoNDArray<T> > res( new hoNDArray<T>() );
+        res->create(x->get_dimensions());
+#ifdef USE_OMP
+#pragma omp parallel for
+#endif
+        for( long long i = 0; i < (long long)res->get_number_of_elements(); i++ ){
+            res->get_data_ptr()[i] = sgn(x->get_data_ptr()[i]);
+        }
+        return res;
+    }
+
+    template<class T> void sgn_inplace( hoNDArray<T> *x )
+    {
+        if( x == 0x0 )
+            throw std::runtime_error("Gadgetron::sgn_inplace(): Invalid input array");
+
+#ifdef USE_OMP
+#pragma omp parallel for
+#endif
+        for( long long i = 0; i < (long long)x->get_number_of_elements(); i++ )
+            x->get_data_ptr()[i] = sgn(x->get_data_ptr()[i]);
+    }
+
+    // --------------------------------------------------------------------------------
+
+    template<class T> boost::shared_ptr< hoNDArray<typename realType<T>::Type> > real( hoNDArray<T> *x )
+    {
+        if( x == 0x0 )
+            throw std::runtime_error("Gadgetron::real(): Invalid input array");
+
+        boost::shared_ptr< hoNDArray<typename realType<T>::Type> > result(new hoNDArray<typename realType<T>::Type>());
+        result->create(x->get_dimensions());
+        arma::Col<typename realType<T>::Type> aRes = as_arma_col(result.get());
+        aRes = arma::real(as_arma_col(x));
+        return result;
+    }
+
+    // --------------------------------------------------------------------------------
+
+    template<class T> boost::shared_ptr< hoNDArray<typename realType<T>::Type> > imag( hoNDArray<T> *x )
+    {
+        if( x == 0x0 )
+            throw std::runtime_error("Gadgetron::imag(): Invalid input array");
+
+        boost::shared_ptr< hoNDArray<typename realType<T>::Type> > result(new hoNDArray<typename realType<T>::Type>());
+        result->create(x->get_dimensions());
+        arma::Col<typename realType<T>::Type> aRes = as_arma_col(result.get());
+        aRes = arma::imag(as_arma_col(x));
+        return result;
+    }
+
+    // --------------------------------------------------------------------------------
+
+    template<class T> boost::shared_ptr< hoNDArray<T> > conj( hoNDArray<T> *x )
+    {
+        if( x == 0x0 )
+            throw std::runtime_error("Gadgetron::conj(): Invalid input array");
+
+        boost::shared_ptr< hoNDArray<T> > result(new hoNDArray<T>());
+        result->create(x->get_dimensions());
+        arma::Col<typename stdType<T>::Type> aRes = as_arma_col(result.get());
+        aRes = arma::conj(as_arma_col(x));
+        return result;
+    }
+
+    // --------------------------------------------------------------------------------
+
+    template<class T> boost::shared_ptr< hoNDArray<T> > real_to_complex( hoNDArray<typename realType<T>::Type> *x )
+    {
+        if( x == 0x0 )
+            BOOST_THROW_EXCEPTION(runtime_error("Gadgetron::real_to_complex(): Invalid input array"));
+
+        boost::shared_ptr< hoNDArray<T> > result(new hoNDArray<T>());
+        result->create(x->get_dimensions());
+        arma::Col<typename stdType<T>::Type> aRes = as_arma_col(result.get());
+        aRes = arma::Col<typename stdType<T>::Type>(as_arma_col(x), arma::Col<typename realType<T>::Type>(x->get_number_of_elements()).zeros());
+        return result;
+    }
+
+    template<class T> boost::shared_ptr< hoNDArray<T> > real_imag_to_complex( hoNDArray<typename realType<T>::Type>* real, hoNDArray<typename realType<T>::Type>* imag )
+    {
+        if( real==0x0 || imag==0x0 )
+            BOOST_THROW_EXCEPTION(runtime_error("Gadgetron::real_imag_to_complex(): Invalid input array"));
+
+        if( real->get_number_of_elements() != imag->get_number_of_elements() )
+            BOOST_THROW_EXCEPTION(runtime_error("Gadgetron::real_imag_to_complex(): Invalid input array"));
+
+        boost::shared_ptr< hoNDArray<T> > result(new hoNDArray<T>());
+        result->create(real->get_dimensions());
+
+        T* pRes = result->begin();
+
+        size_t N = real->get_number_of_elements();
+        for ( size_t n=0; n<N; n++ )
+        {
+            pRes[n] = T(real->at(n), imag->at(n));
+        }
+
+        return result;
+    }
+
+    // --------------------------------------------------------------------------------
+
+    template<class T> 
+    void real_imag_to_complex(const hoNDArray<typename realType<T>::Type>& real, const hoNDArray<typename realType<T>::Type>& imag, hoNDArray<T>& cplx)
+    {
+        try
+        {
+            GADGET_CHECK_THROW(real.dimensions_equal(&imag));
+
+            if ( !cplx.dimensions_equal(&real) )
+            {
+                cplx.create(real.get_dimensions());
+            }
+
+            T* pRes = cplx.begin();
+            const typename realType<T>::Type* pReal = real.begin();
+            const typename realType<T>::Type* pImag = imag.begin();
+
+            size_t N = real.get_number_of_elements();
+
+            long long n;
+            #pragma omp parallel for private(n) shared(N, pRes, pReal, pImag)
+            for ( n=0; n<(long long)N; n++ )
+            {
+                pRes[n] = T(pReal[n], pImag[n]);
+            }
+        }
+        catch(...)
+        {
+            GADGET_THROW("Errors in real_imag_to_complex(...) ... ");
+        }
+    }
+
+    template EXPORTCPUCOREMATH void real_imag_to_complex(const hoNDArray<float>& real, const hoNDArray<float>& imag, hoNDArray< std::complex<float> >& cplx);
+    template EXPORTCPUCOREMATH void real_imag_to_complex(const hoNDArray<double>& real, const hoNDArray<double>& imag, hoNDArray< std::complex<double> >& cplx);
+
+    // --------------------------------------------------------------------------------
+
+    template<class T> 
+    void complex_to_real_imag(const hoNDArray<T>& cplx, hoNDArray<typename realType<T>::Type>& real, hoNDArray<typename realType<T>::Type>& imag)
+    {
+        try
+        {
+            if ( !real.dimensions_equal(&cplx) )
+            {
+                real.create(cplx.get_dimensions());
+            }
+
+            if ( !imag.dimensions_equal(&cplx) )
+            {
+                imag.create(cplx.get_dimensions());
+            }
+
+            const T* pRes = cplx.begin();
+            typename realType<T>::Type* pReal = real.begin();
+            typename realType<T>::Type* pImag = imag.begin();
+
+            size_t N = real.get_number_of_elements();
+
+            long long n;
+            #pragma omp parallel for default(none) private(n) shared(N, pRes, pReal, pImag)
+            for ( n=0; n<(long long)N; n++ )
+            {
+                pReal[n] = pRes[n].real();
+                pImag[n] = pRes[n].imag();
+            }
+        }
+        catch(...)
+        {
+            GADGET_THROW("Errors in complex_to_real_imag(...) ... ");
+        }
+    }
+
+    template EXPORTCPUCOREMATH void complex_to_real_imag(const hoNDArray< std::complex<float> >& cplx, hoNDArray<float>& real, hoNDArray<float>& imag);
+    template EXPORTCPUCOREMATH void complex_to_real_imag(const hoNDArray< std::complex<double> >& cplx, hoNDArray<double>& real, hoNDArray<double>& imag);
+
+    void complex_to_real_imag(const hoNDArray<float>& cplx, hoNDArray<float>& real, hoNDArray<float>& imag)
+    {
+        try
+        {
+            if ( !real.dimensions_equal(&cplx) )
+            {
+                real.create(cplx.get_dimensions());
+            }
+
+            if ( !imag.dimensions_equal(&cplx) )
+            {
+                imag.create(cplx.get_dimensions());
+            }
+
+            const float* pRes = cplx.begin();
+            float* pReal = real.begin();
+            float* pImag = imag.begin();
+
+            size_t N = real.get_number_of_elements();
+
+            long long n;
+            #pragma omp parallel for default(none) private(n) shared(N, pRes, pReal, pImag)
+            for ( n=0; n<(long long)N; n++ )
+            {
+                pReal[n] = pRes[n];
+                pImag[n] = 0;
+            }
+        }
+        catch(...)
+        {
+            GADGET_THROW("Errors in complex_to_real_imag(...) ... ");
+        }
+    }
+
+    void complex_to_real_imag(const hoNDArray<double>& cplx, hoNDArray<double>& real, hoNDArray<double>& imag)
+    {
+        try
+        {
+            if ( !real.dimensions_equal(&cplx) )
+            {
+                real.create(cplx.get_dimensions());
+            }
+
+            if ( !imag.dimensions_equal(&cplx) )
+            {
+                imag.create(cplx.get_dimensions());
+            }
+
+            const double* pRes = cplx.begin();
+            double* pReal = real.begin();
+            double* pImag = imag.begin();
+
+            size_t N = real.get_number_of_elements();
+
+            long long n;
+            #pragma omp parallel for default(none) private(n) shared(N, pRes, pReal, pImag)
+            for ( n=0; n<(long long)N; n++ )
+            {
+                pReal[n] = pRes[n];
+                pImag[n] = 0;
+            }
+        }
+        catch(...)
+        {
+            GADGET_THROW("Errors in complex_to_real_imag(...) ... ");
+        }
+    }
+
+    // --------------------------------------------------------------------------------
+
+    template<class T> 
+    void complex_to_real(const hoNDArray<T>& cplx, hoNDArray<typename realType<T>::Type>& real)
+    {
+        try
+        {
+            if ( !real.dimensions_equal(&cplx) )
+            {
+                real.create(cplx.get_dimensions());
+            }
+
+            const T* pRes = cplx.begin();
+            typename realType<T>::Type* pReal = real.begin();
+
+            size_t N = real.get_number_of_elements();
+
+            long long n;
+            #pragma omp parallel for default(none) private(n) shared(N, pRes, pReal)
+            for ( n=0; n<(long long)N; n++ )
+            {
+                pReal[n] = pRes[n].real();
+            }
+        }
+        catch(...)
+        {
+            GADGET_THROW("Errors in complex_to_real(...) ... ");
+        }
+    }
+
+    template EXPORTCPUCOREMATH void complex_to_real(const hoNDArray< std::complex<float> >& cplx, hoNDArray<float>& real);
+    template EXPORTCPUCOREMATH void complex_to_real(const hoNDArray< std::complex<double> >& cplx, hoNDArray<double>& real);
+
+    template<class T> 
+    void complex_to_real(const hoNDArray<T>& cplx, hoNDArray<T>& real)
+    {
+        try
+        {
+            if ( !real.dimensions_equal(&cplx) )
+            {
+                real.create(cplx.get_dimensions());
+            }
+
+            const T* pRes = cplx.begin();
+            T* pReal = real.begin();
+
+            size_t N = real.get_number_of_elements();
+
+            long long n;
+            #pragma omp parallel for private(n) shared(N, pRes, pReal)
+            for ( n=0; n<(long long)N; n++ )
+            {
+                pReal[n] = T(pRes[n].real(), 0);
+            }
+        }
+        catch(...)
+        {
+            GADGET_THROW("Errors in complex_to_real(...) ... ");
+        }
+    }
+
+    template EXPORTCPUCOREMATH void complex_to_real(const hoNDArray< std::complex<float> >& cplx, hoNDArray< std::complex<float> >& real);
+    template EXPORTCPUCOREMATH void complex_to_real(const hoNDArray< std::complex<double> >& cplx, hoNDArray< std::complex<double> >& real);
+
+    template<class T> 
+    void complex_to_real(hoNDArray<T>& cplx)
+    {
+        try
+        {
+            T* pRes = cplx.begin();
+
+            size_t N = cplx.get_number_of_elements();
+
+            long long n;
+            #pragma omp parallel for private(n) shared(N, pRes)
+            for ( n=0; n<(long long)N; n++ )
+            {
+                pRes[n] = T(pRes[n].real(), 0);
+            }
+        }
+        catch(...)
+        {
+            GADGET_THROW("Errors in complex_to_real(...) ... ");
+        }
+    }
+
+    template EXPORTCPUCOREMATH void complex_to_real(hoNDArray< std::complex<float> >& cplx);
+    template EXPORTCPUCOREMATH void complex_to_real(hoNDArray< std::complex<double> >& cplx);
+
+    // --------------------------------------------------------------------------------
+
+    template<class T> 
+    void complex_to_imag(const hoNDArray<T>& cplx, hoNDArray<typename realType<T>::Type>& imag)
+    {
+        try
+        {
+            if ( !imag.dimensions_equal(&cplx) )
+            {
+                imag.create(cplx.get_dimensions());
+            }
+
+            const T* pRes = cplx.begin();
+            typename realType<T>::Type* pImag = imag.begin();
+
+            size_t N = imag.get_number_of_elements();
+
+            long long n;
+            #pragma omp parallel for default(none) private(n) shared(N, pRes, pImag)
+            for ( n=0; n<(long long)N; n++ )
+            {
+                pImag[n] = pRes[n].imag();
+            }
+        }
+        catch(...)
+        {
+            GADGET_THROW("Errors in complex_to_imag(...) ... ");
+        }
+    }
+
+    template EXPORTCPUCOREMATH void complex_to_imag(const hoNDArray< std::complex<float> >& cplx, hoNDArray<float>& imag);
+    template EXPORTCPUCOREMATH void complex_to_imag(const hoNDArray< std::complex<double> >& cplx, hoNDArray<double>& imag);
+
+    template<class T> 
+    void complex_to_imag(const hoNDArray<T>& cplx, hoNDArray<T>& imag)
+    {
+        try
+        {
+            if ( !imag.dimensions_equal(&cplx) )
+            {
+                imag.create(cplx.get_dimensions());
+            }
+
+            const T* pRes = cplx.begin();
+            T* pImag = imag.begin();
+
+            size_t N = imag.get_number_of_elements();
+
+            long long n;
+            #pragma omp parallel for private(n) shared(N, pRes, pImag)
+            for ( n=0; n<(long long)N; n++ )
+            {
+                pImag[n] = T(0, pRes[n].imag());
+            }
+        }
+        catch(...)
+        {
+            GADGET_THROW("Errors in complex_to_imag(...) ... ");
+        }
+    }
+
+    template EXPORTCPUCOREMATH void complex_to_imag(const hoNDArray< std::complex<float> >& cplx, hoNDArray< std::complex<float> >& imag);
+    template EXPORTCPUCOREMATH void complex_to_imag(const hoNDArray< std::complex<double> >& cplx, hoNDArray< std::complex<double> >& imag);
+
+    template<class T> 
+    void complex_to_imag(hoNDArray<T>& cplx)
+    {
+        try
+        {
+            T* pRes = cplx.begin();
+
+            size_t N = cplx.get_number_of_elements();
+
+            long long n;
+            #pragma omp parallel for private(n) shared(N, pRes)
+            for ( n=0; n<(long long)N; n++ )
+            {
+                pRes[n] = T( pRes[n].real(), 0);
+            }
+        }
+        catch(...)
+        {
+            GADGET_THROW("Errors in complex_to_imag(...) ... ");
+        }
+    }
+
+    template EXPORTCPUCOREMATH void complex_to_imag(hoNDArray< std::complex<float> >& cplx);
+    template EXPORTCPUCOREMATH void complex_to_imag(hoNDArray< std::complex<double> >& cplx);
+
+    // --------------------------------------------------------------------------------
+
+    template<class T> 
+    void real_to_complex(const hoNDArray<typename realType<T>::Type>& real, hoNDArray<T>& cplx)
+    {
+        try
+        {
+            if ( !cplx.dimensions_equal(&real) )
+            {
+                cplx.create(real.get_dimensions());
+            }
+
+            const typename realType<T>::Type* pReal = real.begin();
+            T* pRes = cplx.begin();
+
+            size_t N = real.get_number_of_elements();
+
+            long long n;
+            #pragma omp parallel for private(n) shared(N, pRes, pReal)
+            for ( n=0; n<(long long)N; n++ )
+            {
+                pRes[n] = T(pReal[n], 0);
+            }
+        }
+        catch(...)
+        {
+            GADGET_THROW("Errors in real_to_complex(...) ... ");
+        }
+    }
+
+    template EXPORTCPUCOREMATH void real_to_complex(const hoNDArray< float >& real, hoNDArray< std::complex<float> >& cplx);
+    template EXPORTCPUCOREMATH void real_to_complex(const hoNDArray< double >& real, hoNDArray< std::complex<double> >& cplx);
+
+    // --------------------------------------------------------------------------------
+
+    template<typename T> void fill( hoNDArray<T>* x, T val)
+    {
+        size_t N = x->get_number_of_elements();
+        T* pX = x->begin();
+
+        long long n;
+        #pragma omp parallel for default(none) private(n) shared(N, pX, val) if (N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; n++ )
+        {
+            pX[n] = val;
+        }
+    }
+
+    template EXPORTCPUCOREMATH void fill( hoNDArray<float>* x, float val);
+    template EXPORTCPUCOREMATH void fill( hoNDArray<double>* x, double val);
+    template EXPORTCPUCOREMATH void fill( hoNDArray< std::complex<float> >* x,  std::complex<float>  val);
+    template EXPORTCPUCOREMATH void fill( hoNDArray< std::complex<double> >* x,  std::complex<double>  val);
+    template EXPORTCPUCOREMATH void fill( hoNDArray< complext<float> >* x,  complext<float>  val);
+    template EXPORTCPUCOREMATH void fill( hoNDArray< complext<double> >* x,  complext<double>  val);
+
+    // --------------------------------------------------------------------------------
+
+    template<typename T> void fill( hoNDArray<T>& x, T val )
+    {
+        Gadgetron::fill( &x, val);
+    }
+
+    template EXPORTCPUCOREMATH void fill( hoNDArray<float>& x, float val);
+    template EXPORTCPUCOREMATH void fill( hoNDArray<double>& x, double val);
+    template EXPORTCPUCOREMATH void fill( hoNDArray< std::complex<float> >& x,  std::complex<float>  val);
+    template EXPORTCPUCOREMATH void fill( hoNDArray< std::complex<double> >& x,  std::complex<double>  val);
+    template EXPORTCPUCOREMATH void fill( hoNDArray< complext<float> >& x,  complext<float>  val);
+    template EXPORTCPUCOREMATH void fill( hoNDArray< complext<double> >& x,  complext<double>  val);
+
+    // --------------------------------------------------------------------------------
+
+    //
+    // TODO:
+    // The clamp functions could (probably) be implemented much like we use Thrust for the device versions
+    // - i.e. using Armadillo's transform on the array.
+    // However this requires a newer version of Armadillo as current Linux distributions provide...
+    //
+
+    template<typename T> struct hoNDA_clamp //: public thrust::unary_function<T,T>
+    {
+        hoNDA_clamp( T _min, T _max, T _min_val, T _max_val ) : min(_min), max(_max), min_val(_min_val), max_val(_max_val) {}
+        T operator()(const T &x) const
+        {
+            if( x < min ) return min_val;
+            else if ( x >= max) return max_val;
+            else return x;
+        }
+        T min, max;
+        T min_val, max_val;
+    };
+
+    template<typename T> struct hoNDA_clamp< std::complex<T> > //: public thrust::unary_function< std::complex<T>, std::complex<T> >
+    {
+        hoNDA_clamp( T _min, T _max, std::complex<T> _min_val, std::complex<T> _max_val ) : min(_min), max(_max), min_val(_min_val), max_val(_max_val) {}
+        std::complex<T> operator()(const std::complex<T> &x) const
+        {
+            if( real(x) < min ) return min_val;
+            else if ( real(x) >= max) return max_val;
+            else return std::complex<T>(real(x));
+        }
+        T min, max;
+        std::complex<T> min_val, max_val;
+    };
+
+    template<typename T> struct hoNDA_clamp< complext<T> > //: public thrust::unary_function< complext<T>, complext<T> >
+    {
+        hoNDA_clamp( T _min, T _max, complext<T> _min_val, complext<T> _max_val ) : min(_min), max(_max), min_val(_min_val), max_val(_max_val) {}
+        complext<T> operator()(const complext<T> &x) const
+        {
+            if( real(x) < min ) return min_val;
+            else if ( real(x) >= max) return max_val;
+            else return complext<T>(real(x));
+        }
+        T min, max;
+        complext<T> min_val, max_val;
+    };
+
+    template<class T> void clamp( hoNDArray<T> *x,
+        typename realType<T>::Type min, typename realType<T>::Type max, T min_val, T max_val )
+    {
+        if( x == 0x0 )
+            throw std::runtime_error("Gadgetron::clamp(): Invalid input array");
+
+        hoNDA_clamp<T> functor(min, max, min_val, max_val);
+        std::transform(x->begin(),x->end(),x->begin(),functor);
+    }
+
+    template<class T> void clamp( hoNDArray<T> *x, typename realType<T>::Type min, typename realType<T>::Type max )
+    {
+        clamp(x,min,max,T(min),T(max));
+    }
+
+    template<typename T> struct hoNDA_clamp_min //: public thrust::unary_function<T,T>
+    {
+        hoNDA_clamp_min( T _min ) : min(_min) {}
+        T operator()(const T &x) const
+        {
+            if( x < min ) return min;
+            else return x;
+        }
+        T min;
+    };
+
+    template<typename T> struct hoNDA_clamp_min< std::complex<T> > //: public thrust::unary_function< std::complex<T>, std::complex<T> >
+    {
+        hoNDA_clamp_min( T _min ) : min(_min) {}
+        std::complex<T> operator()(const std::complex<T> &x) const
+        {
+            if( real(x) < min ) return std::complex<T>(min);
+            else return std::complex<T>(real(x));
+        }
+        T min;
+    };
+
+    template<typename T> struct hoNDA_clamp_min< complext<T> > //: public thrust::unary_function< complext<T>, complext<T> >
+    {
+        hoNDA_clamp_min( T _min ) : min(_min) {}
+        complext<T> operator()(const complext<T> &x) const
+        {
+            if( real(x) < min ) return complext<T>(min);
+            else return complext<T>(real(x));
+        }
+        T min;
+    };
+
+    template<class T> void clamp_min( hoNDArray<T> *x, typename realType<T>::Type min )
+    {
+        if( x == 0x0 )
+            throw std::runtime_error("Gadgetron::clamp_min(): Invalid input array");
+
+        hoNDA_clamp_min<T> functor(min);
+        std::transform(x->begin(),x->end(),x->begin(),functor);
+    }
+
+    template<typename T> struct hoNDA_clamp_max //: public thrust::unary_function<T,T>
+    {
+        hoNDA_clamp_max( T _max ) : max(_max) {}
+        T operator()(const T &x) const
+        {
+            if( x > max ) return max;
+            else return x;
+        }
+        T max;
+    };
+
+    template<typename T> struct hoNDA_clamp_max< std::complex<T> > //: public thrust::unary_function< std::complex<T>, std::complex<T> >
+    {
+        hoNDA_clamp_max( T _max ) : max(_max) {}
+        std::complex<T> operator()(const std::complex<T> &x) const
+        {
+            if( real(x) > max ) return std::complex<T>(max);
+            else return std::complex<T>(real(x));
+        }
+        T max;
+    };
+
+    template<typename T> struct hoNDA_clamp_max< complext<T> > //: public thrust::unary_function< complext<T>, complext<T> >
+    {
+        hoNDA_clamp_max( T _max ) : max(_max) {}
+        complext<T> operator()(const complext<T> &x) const
+        {
+            if( real(x) > max ) return complext<T>(max);
+            else return complext<T>(real(x));
+        }
+        T max;
+    };
+
+    template<class T> void clamp_max( hoNDArray<T> *x, typename realType<T>::Type max )
+    {
+        if( x == 0x0 )
+            throw std::runtime_error("Gadgetron::clamp_max(): Invalid input array");
+
+        hoNDA_clamp_max<T> functor(max);
+        std::transform(x->begin(),x->end(),x->begin(),functor);
+    }
+
+    template<class T> void normalize( hoNDArray<T> *x, typename realType<T>::Type val )
+    {
+        if( x == 0x0 )
+            throw std::runtime_error("Gadgetron::normalize(): Invalid input array");
+
+        size_t max_idx = amax(x);
+        T max_val_before = x->get_data_ptr()[max_idx];
+        typename realType<T>::Type scale = val/abs(max_val_before);
+        *x *= scale;
+    }
+
+    template<class T> void shrink1( hoNDArray<T> *x, typename realType<T>::Type gamma, hoNDArray<T> *out )
+    {
+        if( x == 0x0 )
+            throw std::runtime_error("Gadgetron::shrink1(): Invalid input array");
+
+        T *outPtr = (out==0x0) ? x->get_data_ptr() : out->get_data_ptr();
+
+#ifdef USE_OMP
+#pragma omp parallel for
+#endif
+        for( long long i = 0; i < (long long)x->get_number_of_elements(); i++ ) {
+            T prev = x->get_data_ptr()[i];
+            typename realType<T>::Type absPrev = abs(prev);
+            T sgnPrev = (absPrev <= typename realType<T>::Type(0)) ? T(0) : prev/absPrev;
+            outPtr[i] = sgnPrev*std::max(absPrev-gamma, typename realType<T>::Type(0));
+        }
+    }
+
+    template<class T> void pshrink( hoNDArray<T> *x, typename realType<T>::Type gamma,typename realType<T>::Type p, hoNDArray<T> *out )
+    {
+        if( x == 0x0 )
+            throw std::runtime_error("Gadgetron::pshrink(): Invalid input array");
+
+        T *outPtr = (out==0x0) ? x->get_data_ptr() : out->get_data_ptr();
+
+#ifdef USE_OMP
+#pragma omp parallel for
+#endif
+        for( long long i = 0; i < (long long)x->get_number_of_elements(); i++ ) {
+            T prev = x->get_data_ptr()[i];
+            typename realType<T>::Type absPrev = abs(prev);
+            T sgnPrev = (absPrev <= typename realType<T>::Type(0)) ? T(0) : prev/absPrev;
+            outPtr[i] = sgnPrev*std::max(absPrev-gamma*std::pow(absPrev,p-1), typename realType<T>::Type(0));
+        }
+    }
+
+    template<class T> void shrinkd ( hoNDArray<T> *_x, hoNDArray<typename realType<T>::Type> *_s, typename realType<T>::Type gamma, hoNDArray<T> *out )
+    {
+        if( _x == 0x0  || _s == 0 )
+            throw std::runtime_error("Gadgetron::shrinkd(): Invalid input array");
+
+        T *outPtr = (out==0x0) ? _x->get_data_ptr() : out->get_data_ptr();
+
+#ifdef USE_OMP
+#pragma omp parallel for
+#endif
+        for( long long i = 0; i < (long long)_x->get_number_of_elements(); i++ ) {
+            T x = _x->get_data_ptr()[i];
+            typename realType<T>::Type s = _s->get_data_ptr()[i];
+            if (s > gamma)
+                outPtr[i] = x/s*(s-gamma);
+            else
+                outPtr[i] = 0;
+        }
+    }
+
+    template<class T> void pshrinkd( hoNDArray<T> *_x, hoNDArray<typename realType<T>::Type> *_s, typename realType<T>::Type gamma,typename realType<T>::Type p, hoNDArray<T> *out )
+    {
+        if( _x == 0x0 )
+            throw std::runtime_error("Gadgetron::pshrinkd(): Invalid input array");
+
+        T *outPtr = (out==0x0) ? _x->get_data_ptr() : out->get_data_ptr();
+
+#ifdef USE_OMP
+#pragma omp parallel for
+#endif
+        for( long long i = 0; i < (long long)_x->get_number_of_elements(); i++ )
+        {
+            T x = _x->get_data_ptr()[i];
+            typename realType<T>::Type s = _s->get_data_ptr()[i];
+            outPtr[i] = x/s*std::max(s-gamma*std::pow(s,p-1),typename realType<T>::Type(0));
+        }
+    }
+    // Private utility to verify array dimensions.
+    // It "replaces" NDArray::dimensions_equal() to support batch mode.
+    // There is an identical function for all array instances (currently hoNDArray, cuNDArray, hoCuNDAraay)
+    // !!! Remember to fix any bugs in all versions !!!
+    //
+    template<class T,class S> static bool compatible_dimensions( const hoNDArray<T> &x, const hoNDArray<S> &y )
+    {
+        return ((x.get_number_of_elements()%y.get_number_of_elements())==0);
+    }
+
+    // --------------------------------------------------------------------------------
+
+    inline void axpy(float a, size_t N, const float* x, const float* y, float* r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, r, a , x, y) if(N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; ++n)
+        {
+            r[n] = a*x[n] + y[n];
+        }
+    }
+
+    inline void axpy(double a, size_t N, const double* x, const double* y, double* r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, r, a , x, y) if(N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; ++n)
+        {
+            r[n] = a*x[n] + y[n];
+        }
+    }
+
+    inline void axpy( std::complex<float>  a, size_t N, const  std::complex<float> * x, const  std::complex<float> * y,  std::complex<float> * r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, r, a, x, y) if(N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; ++n)
+        {
+            const  std::complex<float> & vx = x[n];
+            const float re1 = vx.real();
+            const float im1 = vx.imag();
+
+            const  std::complex<float> & vy = y[n];
+            const float re2 = vy.real();
+            const float im2 = vy.imag();
+
+            const float ar = a.real();
+            const float ai = a.imag();
+
+            reinterpret_cast<float(&)[2]>(r[n])[0] = re2 + ar*re1 - ai*im1;
+            reinterpret_cast<float(&)[2]>(r[n])[1] = im2 + ar*im1 + ai*re1;
+        }
+    }
+
+    inline void axpy( std::complex<double>  a, size_t N, const  std::complex<double> * x, const  std::complex<double> * y,  std::complex<double> * r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, r, a, x, y) if(N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; ++n)
+        {
+            const  std::complex<double> & vx = x[n];
+            const double re1 = vx.real();
+            const double im1 = vx.imag();
+
+            const  std::complex<double> & vy = y[n];
+            const double re2 = vy.real();
+            const double im2 = vy.imag();
+
+            const double ar = a.real();
+            const double ai = a.imag();
+
+            reinterpret_cast<double(&)[2]>(r[n])[0] = re2 + ar*re1 - ai*im1;
+            reinterpret_cast<double(&)[2]>(r[n])[1] = im2 + ar*im1 + ai*re1;
+        }
+    }
+
+    template <typename T> 
+    void axpy( complext<T>  a, size_t N, const  complext<T> * x, const  complext<T> * y,  complext<T> * r)
+    {
+        long long n;
+
+        #pragma omp parallel for private(n) shared(N, r, a, x, y) if(N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; ++n)
+        {
+            const  complext<T> & vx = x[n];
+            const T re1 = vx.real();
+            const T im1 = vx.imag();
+
+            const  complext<T> & vy = y[n];
+            const T re2 = vy.real();
+            const T im2 = vy.imag();
+
+            const T ar = a.real();
+            const T ai = a.imag();
+
+            reinterpret_cast<T(&)[2]>(r[n])[0] = re2 + ar*re1 - ai*im1;
+            reinterpret_cast<T(&)[2]>(r[n])[1] = im2 + ar*im1 + ai*re1;
+        }
+    }
+
+    template <typename T> 
+    void axpy(T a, const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r)
+    {
+        GADGET_DEBUG_CHECK_THROW(x.get_number_of_elements()==y.get_number_of_elements());
+
+        if ( r.get_number_of_elements() != x.get_number_of_elements() )
+        {
+            r = y;
+        }
+        else
+        {
+            if ( &r != &y )
+            {
+                memcpy(r.begin(), y.begin(), r.get_number_of_bytes());
+            }
+        }
+
+        axpy(a, x.get_number_of_elements(), x.begin(), y.begin(), r.begin());
+    }
+
+    template EXPORTCPUCOREMATH void axpy(float a, const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& r);
+    template EXPORTCPUCOREMATH void axpy(double a, const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& r);
+    template EXPORTCPUCOREMATH void axpy( std::complex<float>  a, const hoNDArray< std::complex<float> >& x, const hoNDArray< std::complex<float> >& y, hoNDArray< std::complex<float> >& r);
+    template EXPORTCPUCOREMATH void axpy( std::complex<double>  a, const hoNDArray< std::complex<double> >& x, const hoNDArray< std::complex<double> >& y, hoNDArray< std::complex<double> >& r);
+    template EXPORTCPUCOREMATH void axpy( complext<float>  a, const hoNDArray< complext<float> >& x, const hoNDArray< complext<float> >& y, hoNDArray< complext<float> >& r);
+    template EXPORTCPUCOREMATH void axpy( complext<double>  a, const hoNDArray< complext<double> >& x, const hoNDArray< complext<double> >& y, hoNDArray< complext<double> >& r);
+
+    template<class T> void axpy( T a, hoNDArray<T> *x, hoNDArray<T> *y )
+    {
+        if( x == 0x0 || y == 0x0 )
+            throw std::runtime_error("Gadgetron::axpy(): Invalid input array");
+
+        if( x->get_number_of_elements() != y->get_number_of_elements() )
+            throw std::runtime_error("Gadgetron::axpy(): Array sizes mismatch");
+
+        axpy(a, *x, *y, *y);
+    }
+
+    // --------------------------------------------------------------------------------
+
+    inline void scal(size_t N, float a, float* x)
+    {
+        long long n;
+        #pragma omp parallel for default(none) private(n) shared(N, x, a) if (N>NumElementsUseThreading)
+        for (n = 0; n < (long long)N; n++)
+        {
+            x[n] *= a;
+        }
+    }
+
+    inline void scal(size_t N, double a, double* x)
+    {
+        long long n;
+        #pragma omp parallel for default(none) private(n) shared(N, x, a) if (N>NumElementsUseThreading)
+        for (n = 0; n < (long long)N; n++)
+        {
+            x[n] *= a;
+        }
+    }
+
+    inline void scal(size_t N,  std::complex<float>  a,  std::complex<float> * x)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, x, a) if (N>NumElementsUseThreading)
+        for (n = 0; n < (long long)N; n++)
+        {
+            const  std::complex<float> & c = x[n];
+            const float re = c.real();
+            const float im = c.imag();
+
+            const float ar = a.real();
+            const float ai = a.imag();
+
+            reinterpret_cast<float(&)[2]>(x[n])[0] = re*ar-im*ai;
+            reinterpret_cast<float(&)[2]>(x[n])[1] = re*ai+im*ar;
+        }
+    }
+
+    inline void scal(size_t N,  std::complex<double>  a,  std::complex<double> * x)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, x, a) if (N>NumElementsUseThreading)
+        for (n = 0; n < (long long)N; n++)
+        {
+            const  std::complex<double> & c = x[n];
+            const double re = c.real();
+            const double im = c.imag();
+
+            const double ar = a.real();
+            const double ai = a.imag();
+
+            reinterpret_cast<double(&)[2]>(x[n])[0] = re*ar-im*ai;
+            reinterpret_cast<double(&)[2]>(x[n])[1] = re*ai+im*ar;
+        }
+    }
+
+    inline void scal(size_t N,  complext<float>  a,  complext<float> * x)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, x, a) if (N>NumElementsUseThreading)
+        for (n = 0; n < (long long)N; n++)
+        {
+            const  complext<float> & c = x[n];
+            const float re = c.real();
+            const float im = c.imag();
+
+            const float ar = a.real();
+            const float ai = a.imag();
+
+            reinterpret_cast<float(&)[2]>(x[n])[0] = re*ar-im*ai;
+            reinterpret_cast<float(&)[2]>(x[n])[1] = re*ai+im*ar;
+        }
+    }
+
+    inline void scal(size_t N,  complext<double>  a,  complext<double> * x)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, x, a) if (N>NumElementsUseThreading)
+        for (n = 0; n < (long long)N; n++)
+        {
+            const  complext<double> & c = x[n];
+            const double re = c.real();
+            const double im = c.imag();
+
+            const double ar = a.real();
+            const double ai = a.imag();
+
+            reinterpret_cast<double(&)[2]>(x[n])[0] = re*ar-im*ai;
+            reinterpret_cast<double(&)[2]>(x[n])[1] = re*ai+im*ar;
+        }
+    }
+
+    template <typename T> 
+    void scal(T a, hoNDArray<T>& x)
+    {
+        scal(x.get_number_of_elements(), a, x.begin());
+    }
+
+    template EXPORTCPUCOREMATH void scal(float a, hoNDArray<float>& x);
+    template EXPORTCPUCOREMATH void scal(double a, hoNDArray<double>& x);
+    template EXPORTCPUCOREMATH void scal( std::complex<float>  a, hoNDArray< std::complex<float> >& x);
+    template EXPORTCPUCOREMATH void scal( std::complex<double>  a, hoNDArray< std::complex<double> >& x);
+    template EXPORTCPUCOREMATH void scal( complext<float>  a, hoNDArray< complext<float> >& x);
+    template EXPORTCPUCOREMATH void scal( complext<double>  a, hoNDArray< complext<double> >& x);
+
+    // --------------------------------------------------------------------------------
+
+    inline void scal(size_t N, float a,  std::complex<float> * x)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, x, a) if (N>NumElementsUseThreading)
+        for (n = 0; n < (long long)N; n++)
+        {
+            const  std::complex<float> & c = x[n];
+            const float re = c.real();
+            const float im = c.imag();
+
+            reinterpret_cast<float(&)[2]>(x[n])[0] = re*a;
+            reinterpret_cast<float(&)[2]>(x[n])[1] = im*a;
+        }
+    }
+
+    inline void scal(size_t N, double a,  std::complex<double> * x)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, x, a) if (N>NumElementsUseThreading)
+        for (n = 0; n < (long long)N; n++)
+        {
+            const  std::complex<double> & c = x[n];
+            const double re = c.real();
+            const double im = c.imag();
+
+            reinterpret_cast<double(&)[2]>(x[n])[0] = re*a;
+            reinterpret_cast<double(&)[2]>(x[n])[1] = im*a;
+        }
+    }
+
+    template <typename T> 
+    void scal(T a, hoNDArray< std::complex<T> >& x)
+    {
+        scal(x.get_number_of_elements(), a, x.begin());
+    }
+
+    template EXPORTCPUCOREMATH void scal(float a, hoNDArray< std::complex<float> >& x);
+    template EXPORTCPUCOREMATH void scal(double a, hoNDArray< std::complex<double> >& x);
+
+    template <typename T> 
+    void scal(T a, hoNDArray< complext<T> >& x)
+    {
+        scal(x.get_number_of_elements(), a, x.begin());
+    }
+
+    template EXPORTCPUCOREMATH void scal(float a, hoNDArray< complext<float> >& x);
+    template EXPORTCPUCOREMATH void scal(double a, hoNDArray< complext<double> >& x);
+
+    // --------------------------------------------------------------------------------
+
+    template<typename T> 
+    void conv2(size_t RO, size_t E1, size_t num, const T* x, size_t kRO, size_t kE1, const T* y, T* z)
+    {
+        try
+        {
+            long long halfKRO = (long long)(kRO/2);
+            long long halfKE1 = (long long)(kE1/2);
+
+            hoNDArray<T> flipY(2*halfKRO+1, 2*halfKE1+1);
+            T* pKer = flipY.begin();
+
+            long long n;
+            long long ro, e1;
+
+            // flip the kernel
+            for ( e1=0; e1<(long long)kE1; e1++ )
+            {
+                long long flip_e1 = 2*halfKE1 - e1;
+
+                for ( ro=0; ro<(long long)kRO; ro++ )
+                {
+                    long long flip_ro = 2*halfKRO - ro;
+
+                    flipY(flip_ro, flip_e1) = y[ro+e1*kRO];
+                }
+            }
+
+            // perform the convolution
+            #pragma omp parallel for default(none) private(n, ro, e1) shared(num, x, RO, E1, z, halfKRO, halfKE1, pKer)
+            for ( n=0; n<(long long)num; n++ )
+            {
+                const T* pX = x + n*RO*E1;
+                T* pZ = z + n*RO*E1;
+
+                long long kro, ke1, dro, de1;
+
+                for ( e1=0; e1<(long long)E1; e1++ )
+                {
+                    for ( ro=0; ro<(long long)RO; ro++ )
+                    {
+                        pZ[ro + e1*RO] = 0;
+                        for ( ke1=-halfKE1; ke1<=halfKE1; ke1++ )
+                        {
+                            de1 = ke1 + e1;
+                            if ( de1 < 0 )
+                            {
+                                de1 += E1;
+                            }
+                            else if ( de1 >= (long long)E1 )
+                            {
+                                de1 -= E1;
+                            }
+
+                            for ( kro=-halfKRO; kro<=halfKRO; kro++ )
+                            {
+                                dro = kro + ro;
+                                if ( dro < 0 )
+                                {
+                                    dro += RO;
+                                }
+                                else if ( dro >= (long long)RO )
+                                {
+                                    dro -= RO;
+                                }
+
+                                pZ[ro + e1*RO] += pKer[ kro+halfKRO + (ke1+halfKE1) * (2*halfKRO+1) ] * pX[dro + de1*RO];
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_THROW("Errors happened in conv2(size_t RO, size_t E1, size_t num, const T* x, size_t kRO, size_t kE1, const T* y, T* z) ... ");
+        }
+    }
+
+    template<typename T> 
+    void conv2(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& z)
+    {
+        try
+        {
+            if ( !z.dimensions_equal(&x) )
+            {
+                z = x;
+            }
+
+            long long RO = (long long) x.get_size(0);
+            long long E1 = (long long) x.get_size(1);
+            long long num = ((long long) x.get_number_of_elements()) / (RO*E1);
+
+            long long kRO = (long long) y.get_size(0);
+            long long kE1 = (long long) y.get_size(1);
+
+            conv2(RO, E1, num, x.begin(), kRO, kE1, y.begin(), z.begin());
+        }
+        catch(...)
+        {
+            GADGET_THROW("Errors happened in conv2(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& z) ... ");
+        }
+    }
+
+    template EXPORTCPUCOREMATH void conv2(const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& z);
+    template EXPORTCPUCOREMATH void conv2(const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& z);
+    template EXPORTCPUCOREMATH void conv2(const hoNDArray< std::complex<float> >& x, const hoNDArray< std::complex<float> >& y, hoNDArray< std::complex<float> >& z);
+    template EXPORTCPUCOREMATH void conv2(const hoNDArray< std::complex<double> >& x, const hoNDArray< std::complex<double> >& y, hoNDArray< std::complex<double> >& z);
+
+    // --------------------------------------------------------------------------------
+
+    template<typename T> 
+    void conv3(size_t RO, size_t E1, size_t E2, size_t num, const T* x, size_t kRO, size_t kE1, size_t kE2, const T* y, T* z)
+    {
+        try
+        {
+            long long halfKRO = (long long)(kRO/2);
+            long long halfKE1 = (long long)(kE1/2);
+            long long halfKE2 = (long long)(kE2/2);
+
+            hoNDArray<T> flipY(2*halfKRO+1, 2*halfKE1+1, 2*halfKE2+1);
+            T* pKer = flipY.begin();
+
+            long long n, e2;
+            long long ro, e1;
+
+            // flip the kernel
+            for ( e2=0; e2<(long long)kE2; e2++ )
+            {
+                long long flip_e2 = 2*halfKE2 - e2;
+
+                for ( e1=0; e1<(long long)kE1; e1++ )
+                {
+                    long long flip_e1 = 2*halfKE1 - e1;
+
+                    for ( ro=0; ro<(long long)kRO; ro++ )
+                    {
+                        long long flip_ro = 2*halfKRO - ro;
+
+                        flipY(flip_ro, flip_e1, flip_e2) = y[ro+e1*kRO+e2*kRO*kE1];
+                    }
+                }
+            }
+
+            // perform the convolution
+            #pragma omp parallel for default(none) private(n) shared(num, x, RO, E1, E2, z, halfKRO, halfKE1, halfKE2, pKer) if ( num > 8 )
+            for ( n=0; n<(long long)num; n++ )
+            {
+                const T* pX = x + n*RO*E1*E2;
+                T* pZ = z + n*RO*E1*E2;
+
+                long long kro, ke1, ke2, dro, de1, de2;
+
+                #pragma omp parallel for default(none) private(ro, e1, e2, kro, ke1, ke2, dro, de1, de2) shared(pX, RO, E1, E2, pZ, halfKRO, halfKE1, halfKE2, pKer)
+                for ( e2=0; e2<(long long)E2; e2++ )
+                {
+                    for ( e1=0; e1<(long long)E1; e1++ )
+                    {
+                        for ( ro=0; ro<(long long)RO; ro++ )
+                        {
+                            pZ[ro + e1*RO + e2*RO*E1] = 0;
+                            for ( ke2=-halfKE2; ke2<=halfKE2; ke2++ )
+                            {
+                                de2 = ke2 + e2;
+                                if ( de2 < 0 )
+                                {
+                                    de2 += E2;
+                                }
+                                else if ( de2 >= (long long)E2 )
+                                {
+                                    de2 -= E2;
+                                }
+
+                                for ( ke1=-halfKE1; ke1<=halfKE1; ke1++ )
+                                {
+                                    de1 = ke1 + e1;
+                                    if ( de1 < 0 )
+                                    {
+                                        de1 += E1;
+                                    }
+                                    else if ( de1 >= (long long)E1 )
+                                    {
+                                        de1 -= E1;
+                                    }
+
+                                    for ( kro=-halfKRO; kro<=halfKRO; kro++ )
+                                    {
+                                        dro = kro + ro;
+                                        if ( dro < 0 )
+                                        {
+                                            dro += RO;
+                                        }
+                                        else if ( dro >= (long long)RO )
+                                        {
+                                            dro -= RO;
+                                        }
+
+                                        pZ[ro + e1*RO + e2*RO*E1] += pKer[ kro+halfKRO + (ke1+halfKE1)*(2*halfKRO+1) + (ke2+halfKE2)*(2*halfKRO+1)*(2*halfKE1+1) ] * pX[dro + de1*RO + de2*RO*E1];
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_THROW("Errors happened in conv3(size_t RO, size_t E1, size_t E2, size_t num, const T* x, size_t kRO, size_t kE1, size_t kE2, const T* y, T* z) ... ");
+        }
+    }
+
+    template<typename T> 
+    void conv3(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& z)
+    {
+        try
+        {
+            if ( !z.dimensions_equal(&x) )
+            {
+                z = x;
+            }
+
+            long long RO = (long long) x.get_size(0);
+            long long E1 = (long long) x.get_size(1);
+            long long E2 = (long long) x.get_size(2);
+            long long num = ((long long)x.get_number_of_elements()) / (RO*E1*E2);
+
+            long long kRO = (long long) y.get_size(0);
+            long long kE1 = (long long) y.get_size(1);
+            long long kE2 = (long long) y.get_size(2);
+
+            conv3(RO, E1, E2, num, x.begin(), kRO, kE1, kE2, y.begin(), z.begin());
+        }
+        catch(...)
+        {
+            GADGET_THROW("Errors happened in conv3(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& z) ... ");
+        }
+    }
+
+    template EXPORTCPUCOREMATH void conv3(const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& z);
+    template EXPORTCPUCOREMATH void conv3(const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& z);
+    template EXPORTCPUCOREMATH void conv3(const hoNDArray< std::complex<float> >& x, const hoNDArray< std::complex<float> >& y, hoNDArray< std::complex<float> >& z);
+    template EXPORTCPUCOREMATH void conv3(const hoNDArray< std::complex<double> >& x, const hoNDArray< std::complex<double> >& y, hoNDArray< std::complex<double> >& z);
+
+    // --------------------------------------------------------------------------------
+
+    template<class T> hoNDArray<T>& operator+= (hoNDArray<T> &x, const hoNDArray<T> &y)
+    {
+        if( compatible_dimensions<T,T>(x,y) ){
+            //arma::Col<typename stdType<T>::Type> aY = as_arma_col(&y);
+            size_t num_batches = x.get_number_of_elements()/y.get_number_of_elements();
+            for( size_t batch=0; batch<num_batches; batch++ ){
+                /*hoNDArray<T> tmp;
+                tmp.create( y.get_dimensions(), x.get_data_ptr()+batch*y.get_number_of_elements() );
+                arma::Col<typename stdType<T>::Type> aRes = as_arma_col(&tmp);
+                aRes += aY;*/
+
+                add(y.get_number_of_elements(), x.get_data_ptr()+batch*y.get_number_of_elements(), y.get_data_ptr(), x.get_data_ptr()+batch*y.get_number_of_elements());
+            }
+            return x;
+        }
+        else {
+            throw std::runtime_error("hoNDArray::operator+=: Incompatible array dimensions");
+        }
+    }
+
+    template<class T> hoNDArray< std::complex<T> >& operator+= (hoNDArray< std::complex<T> > &x, const hoNDArray<T> &y)
+    {
+        if( compatible_dimensions<std::complex<T>,T>(x,y) ){
+            //arma::Col< std::complex<T> > aY( as_arma_col(&y), arma::Col<T>(y.get_number_of_elements()).zeros() );
+            size_t num_batches = x.get_number_of_elements()/y.get_number_of_elements();
+            for( size_t batch=0; batch<num_batches; batch++ ){
+                /*hoNDArray< std::complex<T> > tmp;
+                tmp.create( y.get_dimensions(), x.get_data_ptr()+batch*y.get_number_of_elements() );
+                arma::Col< std::complex<T> > aRes = as_arma_col(&tmp);
+                aRes += aY;*/
+
+                add(y.get_number_of_elements(), x.get_data_ptr()+batch*y.get_number_of_elements(), y.get_data_ptr(), x.get_data_ptr()+batch*y.get_number_of_elements());
+            }
+            return x;
+        }
+        else {
+            throw std::runtime_error("hoNDArray::operator+=: Incompatible array dimensions");
+        }
+    }
+
+    template<class T> hoNDArray< complext<T> >& operator+= (hoNDArray< complext<T> > &x, const hoNDArray<T> &y)
+    {
+        if( compatible_dimensions<complext<T>,T>(x,y) ){
+            //arma::Col< std::complex<T> > aY( as_arma_col(&y), arma::Col<T>(y.get_number_of_elements()).zeros() );
+            size_t num_batches = x.get_number_of_elements()/y.get_number_of_elements();
+            for( size_t batch=0; batch<num_batches; batch++ ){
+                /*hoNDArray< complext<T> > tmp;
+                tmp.create( y.get_dimensions(), x.get_data_ptr()+batch*y.get_number_of_elements() );
+                arma::Col< std::complex<T> > aRes = as_arma_col(&tmp);
+                aRes += aY;*/
+
+                add(y.get_number_of_elements(), x.get_data_ptr()+batch*y.get_number_of_elements(), y.get_data_ptr(), x.get_data_ptr()+batch*y.get_number_of_elements());
+            }
+            return x;
+        }
+        else {
+            throw std::runtime_error("hoNDArray::operator+=: Incompatible array dimensions");
+        }
+    }
+
+    template<class T> hoNDArray<T>& operator+= (hoNDArray<T> &x, const T &y)
+    {
+        /*arma::Col<typename stdType<T>::Type> aRes = as_arma_col(&x);
+        typename stdType<T>::Type aY = *((typename stdType<T>::Type*)&y);
+        aRes += aY;*/
+
+        long long n;
+
+        size_t N = x.get_number_of_elements();
+        T* px = x.begin();
+
+        #pragma omp parallel for default(none) private(n) shared(N, x, y) if(N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; ++n)
+        {
+            x[n] += y;
+        }
+
+        return x;
+    }
+
+    template<class T> hoNDArray< std::complex<T> >& operator+= (hoNDArray< std::complex<T> > &x, const T &y)
+    {
+        /*arma::Col< std::complex<T> > aRes = as_arma_col(&x);
+        std::complex<T> aY( y, T(0) );
+        aRes += aY;*/
+
+        long long n;
+
+        size_t N = x.get_number_of_elements();
+        std::complex<T>* px = x.begin();
+
+        #pragma omp parallel for default(none) private(n) shared(N, x, y) if(N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; ++n)
+        {
+            x[n] += y;
+        }
+
+        return x;
+    }
+
+    template<class T> hoNDArray< complext<T> >& operator+= (hoNDArray< complext<T> > &x, const T &y)
+    {
+        /*arma::Col< std::complex<T> > aRes = as_arma_col(&x);
+        std::complex<T> aY( y, T(0) );
+        aRes += aY;*/
+
+        long long n;
+
+        size_t N = x.get_number_of_elements();
+        complext<T>* px = x.begin();
+
+        #pragma omp parallel for default(none) private(n) shared(N, x, y) if(N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; ++n)
+        {
+            x[n] += y;
+        }
+
+        return x;
+    }
+
+    // --------------------------------------------------------------------------------
+
+    template<class T> hoNDArray<T>& operator-= (hoNDArray<T> &x, const hoNDArray<T> &y)
+    {
+        if( compatible_dimensions<T,T>(x,y) ){
+            // arma::Col<typename stdType<T>::Type> aY = as_arma_col(&y);
+            size_t num_batches = x.get_number_of_elements()/y.get_number_of_elements();
+            for( size_t batch=0; batch<num_batches; batch++ ){
+                /*hoNDArray<T> tmp;
+                tmp.create( y.get_dimensions(), x.get_data_ptr()+batch*y.get_number_of_elements() );
+                arma::Col<typename stdType<T>::Type> aRes = as_arma_col(&tmp);
+                aRes -= aY;*/
+
+                subtract(y.get_number_of_elements(), x.get_data_ptr()+batch*y.get_number_of_elements(), y.get_data_ptr(), x.get_data_ptr()+batch*y.get_number_of_elements());
+            }
+            return x;
+        }
+        else {
+            throw std::runtime_error("hoNDArray::operator-=: Incompatible array dimensions");
+        }
+    }
+
+    template<class T> hoNDArray< std::complex<T> >& operator-= (hoNDArray< std::complex<T> > &x, const hoNDArray<T> &y)
+    {
+        if( compatible_dimensions<std::complex<T>,T>(x,y) ){
+            // arma::Col< std::complex<T> > aY( as_arma_col(&y), arma::Col<T>(y.get_number_of_elements()).zeros() );
+            size_t num_batches = x.get_number_of_elements()/y.get_number_of_elements();
+            for( size_t batch=0; batch<num_batches; batch++ ){
+                /*hoNDArray< std::complex<T> > tmp;
+                tmp.create( y.get_dimensions(), x.get_data_ptr()+batch*y.get_number_of_elements() );
+                arma::Col< std::complex<T> > aRes = as_arma_col(&tmp);
+                aRes -= aY;*/
+
+                subtract(y.get_number_of_elements(), x.get_data_ptr()+batch*y.get_number_of_elements(), y.get_data_ptr(), x.get_data_ptr()+batch*y.get_number_of_elements());
+            }
+            return x;
+        }
+        else {
+            throw std::runtime_error("hoNDArray::operator-=: Incompatible array dimensions");
+        }
+    }
+
+    template<class T> hoNDArray< complext<T> >& operator-= (hoNDArray< complext<T> > &x, const hoNDArray<T> &y)
+    {
+        if( compatible_dimensions<complext<T>,T>(x,y) ){
+            // arma::Col< std::complex<T> > aY( as_arma_col(&y), arma::Col<T>(y.get_number_of_elements()).zeros() );
+            size_t num_batches = x.get_number_of_elements()/y.get_number_of_elements();
+            for( size_t batch=0; batch<num_batches; batch++ ){
+                /*hoNDArray< complext<T> > tmp;
+                tmp.create( y.get_dimensions(), x.get_data_ptr()+batch*y.get_number_of_elements() );
+                arma::Col< std::complex<T> > aRes = as_arma_col(&tmp);
+                aRes -= arma::Col< std::complex<T> >( as_arma_col(&y), arma::Col<T>(y.get_number_of_elements()).zeros() );*/
+
+                subtract(y.get_number_of_elements(), x.get_data_ptr()+batch*y.get_number_of_elements(), y.get_data_ptr(), x.get_data_ptr()+batch*y.get_number_of_elements());
+            }
+            return x;
+        }
+        else {
+            throw std::runtime_error("hoNDArray::operator-=: Incompatible array dimensions");
+        }
+    }
+
+    template<class T> hoNDArray<T>& operator-= (hoNDArray<T> &x, const T &y)
+    {
+        /*arma::Col<typename stdType<T>::Type> aRes = as_arma_col(&x);
+        typename stdType<T>::Type aY = *((typename stdType<T>::Type*)&y);
+        aRes -= aY;*/
+
+        long long n;
+
+        size_t N = x.get_number_of_elements();
+        T* px = x.begin();
+
+        #pragma omp parallel for default(none) private(n) shared(N, x, y) if(N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; ++n)
+        {
+            x[n] -= y;
+        }
+
+        return x;
+    }
+
+    template<class T> hoNDArray< std::complex<T> >& operator-= (hoNDArray< std::complex<T> > &x, const T &y)
+    {
+        /*arma::Col< std::complex<T> > aRes = as_arma_col(&x);
+        std::complex<T> aY( y, T(0) );
+        aRes -= aY;*/
+
+        long long n;
+
+        size_t N = x.get_number_of_elements();
+        std::complex<T>* px = x.begin();
+
+        #pragma omp parallel for default(none) private(n) shared(N, x, y) if(N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; ++n)
+        {
+            x[n] -= y;
+        }
+
+        return x;
+    }
+
+    template<class T> hoNDArray< complext<T> >& operator-= (hoNDArray< complext<T> > &x, const T &y)
+    {
+        /*arma::Col< std::complex<T> > aRes = as_arma_col(&x);
+        std::complex<T> aY( y, T(0) );
+        aRes -= aY;*/
+
+        long long n;
+
+        size_t N = x.get_number_of_elements();
+        complext<T>* px = x.begin();
+
+        #pragma omp parallel for default(none) private(n) shared(N, x, y) if(N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; ++n)
+        {
+            x[n] -= y;
+        }
+
+        return x;
+    }
+
+    // --------------------------------------------------------------------------------
+
+    template<class T> hoNDArray<T>& operator*= (hoNDArray<T> &x, const hoNDArray<T> &y)
+    {
+        if( compatible_dimensions<T,T>(x,y) ){
+            //arma::Col<typename stdType<T>::Type> aY = as_arma_col(&y);
+            size_t num_batches = x.get_number_of_elements()/y.get_number_of_elements();
+            for( size_t batch=0; batch<num_batches; batch++ ){
+                //hoNDArray<T> tmp;
+                //tmp.create( y.get_dimensions(), x.get_data_ptr()+batch*y.get_number_of_elements() );
+                //arma::Col<typename stdType<T>::Type> aRes = as_arma_col(&tmp);
+                //aRes %= aY;
+
+                multiply(y.get_number_of_elements(), x.get_data_ptr()+batch*y.get_number_of_elements(), y.get_data_ptr(), x.get_data_ptr()+batch*y.get_number_of_elements());
+            }
+            return x;
+        }
+        else {
+            throw std::runtime_error("hoNDArray::operator*=: Incompatible array dimensions");
+        }
+    }
+
+    template<class T> hoNDArray< std::complex<T> >& operator*= (hoNDArray< std::complex<T> > &x, const hoNDArray<T> &y)
+    {
+        if( compatible_dimensions<std::complex<T>,T>(x,y) ){
+            //arma::Col< std::complex<T> > aY( as_arma_col(&y), arma::Col<T>(y.get_number_of_elements()).zeros() );
+            size_t num_batches = x.get_number_of_elements()/y.get_number_of_elements();
+            for( size_t batch=0; batch<num_batches; batch++ ){
+                /*hoNDArray< std::complex<T> > tmp;
+                tmp.create( y.get_dimensions(), x.get_data_ptr()+batch*y.get_number_of_elements() );
+                arma::Col< std::complex<T> > aRes = as_arma_col(&tmp);
+                aRes %= aY;*/
+
+                multiply(y.get_number_of_elements(), x.get_data_ptr()+batch*y.get_number_of_elements(), y.get_data_ptr(), x.get_data_ptr()+batch*y.get_number_of_elements());
+            }
+            return x;
+        }
+        else {
+            throw std::runtime_error("hoNDArray::operator*=: Incompatible array dimensions");
+        }
+    }
+
+    template<class T> hoNDArray< complext<T> >& operator*= (hoNDArray< complext<T> > &x, const hoNDArray<T> &y)
+    {
+        if( compatible_dimensions<complext<T>,T>(x,y) ){
+            //arma::Col< std::complex<T> > aY( as_arma_col(&y), arma::Col<T>(y.get_number_of_elements()).zeros() );
+            size_t num_batches = x.get_number_of_elements()/y.get_number_of_elements();
+            for( size_t batch=0; batch<num_batches; batch++ ){
+                /*hoNDArray< complext<T> > tmp;
+                tmp.create( y.get_dimensions(), x.get_data_ptr()+batch*y.get_number_of_elements() );
+                arma::Col< std::complex<T> > aRes = as_arma_col(&tmp);
+                aRes %= arma::Col< std::complex<T> >( as_arma_col(&y), arma::Col<T>(y.get_number_of_elements()).zeros() );*/
+
+                multiply(y.get_number_of_elements(), x.get_data_ptr()+batch*y.get_number_of_elements(), y.get_data_ptr(), x.get_data_ptr()+batch*y.get_number_of_elements());
+            }
+            return x;
+        }
+        else {
+            throw std::runtime_error("hoNDArray::operator*=: Incompatible array dimensions");
+        }
+    }
+
+    template<class T> hoNDArray<T>& operator*= (hoNDArray<T> &x, const T &y)
+    {
+        //arma::Col<typename stdType<T>::Type> aRes = as_arma_col(&x);
+        //typename stdType<T>::Type aY = *((typename stdType<T>::Type*)&y);
+        //aRes *= aY;
+
+        scal(x.get_number_of_elements(), y, x.begin());
+
+        return x;
+    }
+
+    template<class T> hoNDArray< std::complex<T> >& operator*= (hoNDArray< std::complex<T> > &x, const T &y)
+    {
+        /*arma::Col< std::complex<T> > aRes = as_arma_col(&x);
+        std::complex<T> aY( y, T(0) );
+        aRes *= aY;*/
+
+        scal(x.get_number_of_elements(), y, x.begin());
+
+        return x;
+    }
+
+    template<class T> hoNDArray< complext<T> >& operator*= (hoNDArray< complext<T> > &x, const T &y)
+    {
+        //arma::Col< std::complex<T> > aRes = as_arma_col(&x);
+        //std::complex<T> aY( y, T(0) );
+        //aRes *= aY;
+
+        scal(x.get_number_of_elements(), y, reinterpret_cast< std::complex<T>* >(x.begin()) );
+        return x;
+    }
+
+    // --------------------------------------------------------------------------------
+
+    template<class T> hoNDArray<T>& operator/= (hoNDArray<T> &x, const hoNDArray<T> &y)
+    {
+        if( compatible_dimensions<T,T>(x,y) ){
+            // arma::Col<typename stdType<T>::Type> aY = as_arma_col(&y);
+            size_t num_batches = x.get_number_of_elements()/y.get_number_of_elements();
+            for( size_t batch=0; batch<num_batches; batch++ ){
+                /*hoNDArray<T> tmp;
+                tmp.create( y.get_dimensions(), x.get_data_ptr()+batch*y.get_number_of_elements() );
+                arma::Col<typename stdType<T>::Type> aRes = as_arma_col(&tmp);
+                aRes /= aY;*/
+
+                size_t N = y.get_number_of_elements();
+                divide(N, x.get_data_ptr()+batch*N, y.get_data_ptr(), x.get_data_ptr()+batch*N);
+            }
+            return x;
+        }
+        else {
+            throw std::runtime_error("hoNDArray::operator/=: Incompatible array dimensions");
+        }
+    }
+
+    template<class T> hoNDArray< std::complex<T> >& operator/= (hoNDArray< std::complex<T> > &x, const hoNDArray<T> &y)
+    {
+        if( compatible_dimensions<std::complex<T>,T>(x,y) ){
+            // arma::Col< std::complex<T> > aY( as_arma_col(&y), arma::Col<T>(y.get_number_of_elements()).zeros() );
+            size_t num_batches = x.get_number_of_elements()/y.get_number_of_elements();
+            for( size_t batch=0; batch<num_batches; batch++ ){
+                /*hoNDArray< std::complex<T> > tmp;
+                tmp.create( y.get_dimensions(), x.get_data_ptr()+batch*y.get_number_of_elements() );
+                arma::Col< std::complex<T> > aRes = as_arma_col(&tmp);
+                aRes /= aY;*/
+
+                size_t N = y.get_number_of_elements();
+                divide(N, x.get_data_ptr()+batch*N, y.get_data_ptr(), x.get_data_ptr()+batch*N);
+            }
+            return x;
+        }
+        else {
+            throw std::runtime_error("hoNDArray::operator/=: Incompatible array dimensions");
+        }
+    }
+
+    template<class T> hoNDArray< complext<T> >& operator/= (hoNDArray< complext<T> > &x, const hoNDArray<T> &y)
+    {
+        if( compatible_dimensions<complext<T>,T>(x,y) ){
+            // arma::Col< std::complex<T> > aY( as_arma_col(&y), arma::Col<T>(y.get_number_of_elements()).zeros() );
+            size_t num_batches = x.get_number_of_elements()/y.get_number_of_elements();
+            for( size_t batch=0; batch<num_batches; batch++ ){
+                /*hoNDArray< complext<T> > tmp;
+                tmp.create( y.get_dimensions(), x.get_data_ptr()+batch*y.get_number_of_elements() );
+                arma::Col< std::complex<T> > aRes = as_arma_col(&tmp);
+                aRes /= arma::Col< std::complex<T> >( as_arma_col(&y), arma::Col<T>(y.get_number_of_elements()).zeros() );*/
+
+                size_t N = y.get_number_of_elements();
+                divide(N, x.get_data_ptr()+batch*N, y.get_data_ptr(), x.get_data_ptr()+batch*N);
+            }
+            return x;
+        }
+        else {
+            throw std::runtime_error("hoNDArray::operator/=: Incompatible array dimensions");
+        }
+    }
+
+    template<class T> hoNDArray<T>& operator/= (hoNDArray<T> &x, const T &y)
+    {
+        /*arma::Col<typename stdType<T>::Type> aRes = as_arma_col(&x);
+        typename stdType<T>::Type aY = *((typename stdType<T>::Type*)&y);
+        aRes /= aY;*/
+
+        T ry = T(1)/y;
+        scal(x.get_number_of_elements(), ry, x.begin());
+
+        return x;
+    }
+
+    template<class T> hoNDArray< std::complex<T> >& operator/= (hoNDArray< std::complex<T> > &x, const T &y)
+    {
+        /*arma::Col< std::complex<T> > aRes = as_arma_col(&x);
+        std::complex<T> aY( y, T(0) );
+        aRes /= aY;*/
+
+        T ry = T(1)/y;
+        scal(x.get_number_of_elements(), ry, x.begin());
+
+        return x;
+    }
+
+    template<class T> hoNDArray< complext<T> >& operator/= (hoNDArray< complext<T> > &x, const T &y)
+    {
+        /*arma::Col< std::complex<T> > aRes = as_arma_col(&x);
+        std::complex<T> aY( y, T(0) );
+        aRes /= aY;*/
+
+        T ry = T(1)/y;
+        scal(x.get_number_of_elements(), ry, reinterpret_cast< std::complex<T>* >(x.begin()) );
+
+        return x;
+    }
+
+    // --------------------------------------------------------------------------------
+
+    //
+    // Instantiation
+    //
+
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float> > abs<float>( hoNDArray<float>* );
+    template EXPORTCPUCOREMATH void abs_inplace<float>( hoNDArray<float>* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float> > abs_square<float>( hoNDArray<float>* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float> > sqrt<float>( hoNDArray<float>* );
+    template EXPORTCPUCOREMATH void sqrt_inplace<float>( hoNDArray<float>* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float> > square<float>( hoNDArray<float>* );
+    template EXPORTCPUCOREMATH void square_inplace<float>( hoNDArray<float>* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float> > reciprocal<float>( hoNDArray<float>* );
+    template EXPORTCPUCOREMATH void reciprocal_inplace<float>( hoNDArray<float>* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float> > reciprocal_sqrt<float>( hoNDArray<float>* );
+    template EXPORTCPUCOREMATH void reciprocal_sqrt_inplace<float>( hoNDArray<float>* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float> > sgn<float>( hoNDArray<float>* );
+    template EXPORTCPUCOREMATH void sgn_inplace<float>( hoNDArray<float>* );
+    template EXPORTCPUCOREMATH void clamp<float>( hoNDArray<float>*, float, float );
+    template EXPORTCPUCOREMATH void clamp_min<float>( hoNDArray<float>*, float );
+    template EXPORTCPUCOREMATH void clamp_max<float>( hoNDArray<float>*, float );
+    template EXPORTCPUCOREMATH void normalize<float>( hoNDArray<float>*, float );
+    template EXPORTCPUCOREMATH void shrink1<float>( hoNDArray<float>*, float, hoNDArray<float>* );
+    template EXPORTCPUCOREMATH void pshrink<float>( hoNDArray<float>*, float,float, hoNDArray<float>* );
+    template EXPORTCPUCOREMATH void shrinkd<float> ( hoNDArray<float>*, hoNDArray<float>*, float, hoNDArray<float>* );
+    template EXPORTCPUCOREMATH void pshrinkd<float> ( hoNDArray<float>*, hoNDArray<float>*, float, float, hoNDArray<float>* );
+
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double> > abs<double>( hoNDArray<double>* );
+    template EXPORTCPUCOREMATH void abs_inplace<double>( hoNDArray<double>* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double> > abs_square<double>( hoNDArray<double>* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double> > sqrt<double>( hoNDArray<double>* );
+    template EXPORTCPUCOREMATH void sqrt_inplace<double>( hoNDArray<double>* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double> > square<double>( hoNDArray<double>* );
+    template EXPORTCPUCOREMATH void square_inplace<double>( hoNDArray<double>* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double> > reciprocal<double>( hoNDArray<double>* );
+    template EXPORTCPUCOREMATH void reciprocal_inplace<double>( hoNDArray<double>* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double> > reciprocal_sqrt<double>( hoNDArray<double>* );
+    template EXPORTCPUCOREMATH void reciprocal_sqrt_inplace<double>( hoNDArray<double>* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double> > sgn<double>( hoNDArray<double>* );
+    template EXPORTCPUCOREMATH void sgn_inplace<double>( hoNDArray<double>* );
+    template EXPORTCPUCOREMATH void clamp<double>( hoNDArray<double>*, double, double );
+    template EXPORTCPUCOREMATH void clamp_min<double>( hoNDArray<double>*, double );
+    template EXPORTCPUCOREMATH void clamp_max<double>( hoNDArray<double>*, double );
+    template EXPORTCPUCOREMATH void normalize<double>( hoNDArray<double>*, double );
+    template EXPORTCPUCOREMATH void shrink1<double>( hoNDArray<double>*, double, hoNDArray<double>* );
+    template EXPORTCPUCOREMATH void pshrink<double>( hoNDArray<double>*, double,double, hoNDArray<double>* );
+    template EXPORTCPUCOREMATH void shrinkd<double> ( hoNDArray<double>*, hoNDArray<double>*, double, hoNDArray<double>* );
+    template EXPORTCPUCOREMATH void pshrinkd<double> ( hoNDArray<double>*, hoNDArray<double>*, double, double, hoNDArray<double>* );
+
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float> > abs< std::complex<float> >( hoNDArray< std::complex<float> >* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float> > abs_square< std::complex<float> >( hoNDArray< std::complex<float> >* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< std::complex<float> > > sqrt< std::complex<float> >( hoNDArray< std::complex<float> >* );
+    template EXPORTCPUCOREMATH void sqrt_inplace< std::complex<float> >( hoNDArray< std::complex<float> >* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< std::complex<float> > > square< std::complex<float> >( hoNDArray< std::complex<float> >* );
+    template EXPORTCPUCOREMATH void square_inplace< std::complex<float> >( hoNDArray< std::complex<float> >* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< std::complex<float> > > reciprocal< std::complex<float> >( hoNDArray< std::complex<float> >* );
+    template EXPORTCPUCOREMATH void reciprocal_inplace< std::complex<float> >( hoNDArray< std::complex<float> >* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< std::complex<float> > > reciprocal_sqrt< std::complex<float> >( hoNDArray< std::complex<float> >* );
+    template EXPORTCPUCOREMATH void reciprocal_sqrt_inplace< std::complex<float> >( hoNDArray< std::complex<float> >* );
+
+    template EXPORTCPUCOREMATH void clamp< std::complex<float> >( hoNDArray< std::complex<float> >*, float, float );
+    template EXPORTCPUCOREMATH void clamp_min< std::complex<float> >( hoNDArray< std::complex<float> >*, float );
+    template EXPORTCPUCOREMATH void clamp_max<std::complex<float> >( hoNDArray< std::complex<float> >*, float );
+    template EXPORTCPUCOREMATH void normalize< std::complex<float> >( hoNDArray< std::complex<float> >*, float );
+    template EXPORTCPUCOREMATH void shrink1< std::complex<float> >( hoNDArray< std::complex<float> >*, float, hoNDArray< std::complex<float> >* );
+    template EXPORTCPUCOREMATH void pshrink< std::complex<float> >( hoNDArray< std::complex<float> >*, float,float, hoNDArray< std::complex<float> >* );
+    template EXPORTCPUCOREMATH void shrinkd< std::complex<float> > ( hoNDArray< std::complex<float> >*, hoNDArray<float>*, float, hoNDArray< std::complex<float> >* );
+    template EXPORTCPUCOREMATH void pshrinkd< std::complex<float> > ( hoNDArray< std::complex<float> >*, hoNDArray<float>*, float, float, hoNDArray< std::complex<float> >* );
+
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double> > abs< std::complex<double> >( hoNDArray< std::complex<double> >* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double> > abs_square< std::complex<double> >( hoNDArray< std::complex<double> >* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< std::complex<double> > > sqrt< std::complex<double> >( hoNDArray< std::complex<double> >* );
+    template EXPORTCPUCOREMATH void sqrt_inplace< std::complex<double> >( hoNDArray< std::complex<double> >* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< std::complex<double> > > square< std::complex<double> >( hoNDArray< std::complex<double> >* );
+    template EXPORTCPUCOREMATH void square_inplace< std::complex<double> >( hoNDArray< std::complex<double> >* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< std::complex<double> > > reciprocal< std::complex<double> >( hoNDArray< std::complex<double> >* );
+    template EXPORTCPUCOREMATH void reciprocal_inplace< std::complex<double> >( hoNDArray< std::complex<double> >* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< std::complex<double> > > reciprocal_sqrt< std::complex<double> >( hoNDArray< std::complex<double> >* );
+    template EXPORTCPUCOREMATH void reciprocal_sqrt_inplace< std::complex<double> >( hoNDArray< std::complex<double> >* );
+    template EXPORTCPUCOREMATH void clamp< std::complex<double> >( hoNDArray< std::complex<double> >*, double, double );
+    template EXPORTCPUCOREMATH void clamp_min< std::complex<double> >( hoNDArray< std::complex<double> >*, double );
+    template EXPORTCPUCOREMATH void clamp_max<std::complex<double> >( hoNDArray< std::complex<double> >*, double );
+    template EXPORTCPUCOREMATH void normalize< std::complex<double> >( hoNDArray< std::complex<double> >*, double );
+    template EXPORTCPUCOREMATH void shrink1< std::complex<double> >( hoNDArray< std::complex<double> >*, double, hoNDArray< std::complex<double> >* );
+    template EXPORTCPUCOREMATH void pshrink< std::complex<double> >( hoNDArray< std::complex<double> >*, double,double, hoNDArray< std::complex<double> >* );
+    template EXPORTCPUCOREMATH void shrinkd< std::complex<double> > ( hoNDArray< std::complex<double> >*, hoNDArray<double>*, double, hoNDArray< std::complex<double> >* );
+    template EXPORTCPUCOREMATH void pshrinkd< std::complex<double> > ( hoNDArray< std::complex<double> >*, hoNDArray<double>*, double, double, hoNDArray< std::complex<double> >* );
+
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float> > abs< complext<float> >( hoNDArray< complext<float> >* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float> > abs_square< complext<float> >( hoNDArray< complext<float> >* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< complext<float> > > sqrt< complext<float> >( hoNDArray< complext<float> >* );
+    template EXPORTCPUCOREMATH void sqrt_inplace< complext<float> >( hoNDArray< complext<float> >* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< complext<float> > > square< complext<float> >( hoNDArray< complext<float> >* );
+    template EXPORTCPUCOREMATH void square_inplace< complext<float> >( hoNDArray< complext<float> >* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< complext<float> > > reciprocal< complext<float> >( hoNDArray< complext<float> >* );
+    template EXPORTCPUCOREMATH void reciprocal_inplace< complext<float> >( hoNDArray< complext<float> >* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< complext<float> > > reciprocal_sqrt< complext<float> >( hoNDArray< complext<float> >* );
+    template EXPORTCPUCOREMATH void reciprocal_sqrt_inplace< complext<float> >( hoNDArray< complext<float> >* );
+    template EXPORTCPUCOREMATH void clamp< complext<float> >( hoNDArray< complext<float> >*, float, float );
+    template EXPORTCPUCOREMATH void clamp_min< complext<float> >( hoNDArray< complext<float> >*, float );
+    template EXPORTCPUCOREMATH void clamp_max<complext<float> >( hoNDArray< complext<float> >*, float );
+    template EXPORTCPUCOREMATH void normalize< complext<float> >( hoNDArray< complext<float> >*, float );
+    template EXPORTCPUCOREMATH void shrink1< complext<float> >( hoNDArray< complext<float> >*, float, hoNDArray< complext<float> >* );
+    template EXPORTCPUCOREMATH void pshrink< complext<float> >( hoNDArray< complext<float> >*, float,float, hoNDArray< complext<float> >* );
+    template EXPORTCPUCOREMATH void shrinkd< complext<float> > ( hoNDArray< complext<float> >*, hoNDArray<float>*, float, hoNDArray< complext<float> >* );
+    template EXPORTCPUCOREMATH void pshrinkd< complext<float> > ( hoNDArray< complext<float> >*, hoNDArray<float>*, float, float, hoNDArray< complext<float> >* );
+
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double> > abs< complext<double> >( hoNDArray< complext<double> >* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double> > abs_square< complext<double> >( hoNDArray< complext<double> >* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< complext<double> > > sqrt< complext<double> >( hoNDArray< complext<double> >* );
+    template EXPORTCPUCOREMATH void sqrt_inplace< complext<double> >( hoNDArray< complext<double> >* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< complext<double> > > square< complext<double> >( hoNDArray< complext<double> >* );
+    template EXPORTCPUCOREMATH void square_inplace< complext<double> >( hoNDArray< complext<double> >* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< complext<double> > > reciprocal< complext<double> >( hoNDArray< complext<double> >* );
+    template EXPORTCPUCOREMATH void reciprocal_inplace< complext<double> >( hoNDArray< complext<double> >* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< complext<double> > > reciprocal_sqrt< complext<double> >( hoNDArray< complext<double> >* );
+    template EXPORTCPUCOREMATH void reciprocal_sqrt_inplace< complext<double> >( hoNDArray< complext<double> >* );
+    template EXPORTCPUCOREMATH void clamp< complext<double> >( hoNDArray< complext<double> >*, double, double );
+    template EXPORTCPUCOREMATH void clamp_min< complext<double> >( hoNDArray< complext<double> >*, double );
+    template EXPORTCPUCOREMATH void clamp_max<complext<double> >( hoNDArray< complext<double> >*, double );
+    template EXPORTCPUCOREMATH void normalize< complext<double> >( hoNDArray< complext<double> >*, double );
+    template EXPORTCPUCOREMATH void shrink1< complext<double> >( hoNDArray< complext<double> >*, double, hoNDArray< complext<double> >* );
+    template EXPORTCPUCOREMATH void pshrink< complext<double> >( hoNDArray< complext<double> >*, double,double, hoNDArray< complext<double> >* );
+    template EXPORTCPUCOREMATH void shrinkd< complext<double> > ( hoNDArray< complext<double> >*, hoNDArray<double>*, double, hoNDArray< complext<double> >* );
+    template EXPORTCPUCOREMATH void pshrinkd< complext<double> > ( hoNDArray< complext<double> >*, hoNDArray<double>*, double, double, hoNDArray< complext<double> >* );
+
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< std::complex<float> > > real_to_complex< std::complex<float> >( hoNDArray<float>* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< std::complex<float> > > real_imag_to_complex< std::complex<float> >( hoNDArray<float>*, hoNDArray<float>* );
+
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float_complext> > real_to_complex<float_complext>( hoNDArray<float>* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float_complext> > real_imag_to_complex<float_complext>( hoNDArray<float>*, hoNDArray<float>* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float> > real<float>( hoNDArray<float>* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float> > real<std::complex<float> >( hoNDArray< std::complex<float> >* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float> > real<float_complext>( hoNDArray<float_complext>* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float> > imag<float>( hoNDArray<float>* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float> > imag<std::complex<float> >( hoNDArray< std::complex<float> >* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float> > imag<float_complext>( hoNDArray<float_complext>* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float> > conj<float>( hoNDArray<float>* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<std::complex<float> > > conj<std::complex<float> >( hoNDArray<std::complex<float> >* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<float_complext> > conj<float_complext>( hoNDArray<float_complext>* );
+
+
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< std::complex<double> > > real_to_complex< std::complex<double> >( hoNDArray<double>* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray< std::complex<double> > > real_imag_to_complex< std::complex<double> >( hoNDArray<double>*, hoNDArray<double>* );
+
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double_complext> > real_to_complex<double_complext>( hoNDArray<double>* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double_complext> > real_imag_to_complex<double_complext>( hoNDArray<double>*, hoNDArray<double>* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double> > real<double>( hoNDArray<double>* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double> > real<std::complex<double> >( hoNDArray< std::complex<double> >* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double> > real<double_complext>( hoNDArray<double_complext>* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double> > imag<std::complex<double> >( hoNDArray< std::complex<double> >* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double> > imag<double>( hoNDArray<double>* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double> > imag<double_complext>( hoNDArray<double_complext>* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double> > conj<double>( hoNDArray<double>* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<std::complex<double> > > conj<std::complex<double> >( hoNDArray<std::complex<double> >* );
+    template EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<double_complext> > conj<double_complext>( hoNDArray<double_complext>* );
+    template EXPORTCPUCOREMATH hoNDArray<float>& operator+=<float>(hoNDArray<float>&, const hoNDArray<float>&);
+    template EXPORTCPUCOREMATH hoNDArray<float>& operator+=<float>(hoNDArray<float>&, const float&);
+    template EXPORTCPUCOREMATH hoNDArray<float>& operator-=<float>(hoNDArray<float>&, const hoNDArray<float>&);
+    template EXPORTCPUCOREMATH hoNDArray<float>& operator-=<float>(hoNDArray<float>&, const float&);
+    template EXPORTCPUCOREMATH hoNDArray<float>& operator*=<float>(hoNDArray<float>&, const hoNDArray<float>&);
+    template EXPORTCPUCOREMATH hoNDArray<float>& operator*=<float>(hoNDArray<float>&, const float&);
+    template EXPORTCPUCOREMATH hoNDArray<float>& operator/=<float>(hoNDArray<float>&, const hoNDArray<float>&);
+    template EXPORTCPUCOREMATH hoNDArray<float>& operator/=<float>(hoNDArray<float>&, const float&);
+
+    template EXPORTCPUCOREMATH hoNDArray<double>& operator+=<double>(hoNDArray<double>&, const hoNDArray<double>&);
+    template EXPORTCPUCOREMATH hoNDArray<double>& operator+=<double>(hoNDArray<double>&, const double&);
+    template EXPORTCPUCOREMATH hoNDArray<double>& operator-=<double>(hoNDArray<double>&, const hoNDArray<double>&);
+    template EXPORTCPUCOREMATH hoNDArray<double>& operator-=<double>(hoNDArray<double>&, const double&);
+    template EXPORTCPUCOREMATH hoNDArray<double>& operator*=<double>(hoNDArray<double>&, const hoNDArray<double>&);
+    template EXPORTCPUCOREMATH hoNDArray<double>& operator*=<double>(hoNDArray<double>&, const double&);
+    template EXPORTCPUCOREMATH hoNDArray<double>& operator/=<double>(hoNDArray<double>&, const hoNDArray<double>&);
+    template EXPORTCPUCOREMATH hoNDArray<double>& operator/=<double>(hoNDArray<double>&, const double&);
+
+    template EXPORTCPUCOREMATH hoNDArray< std::complex<float> >& operator+=< std::complex<float> >
+        (hoNDArray< std::complex<float> >&, const hoNDArray< std::complex<float> >&);
+    template EXPORTCPUCOREMATH hoNDArray< std::complex<float> >& operator+=< std::complex<float> >
+        (hoNDArray< std::complex<float> >&, const std::complex<float>&);
+    template EXPORTCPUCOREMATH hoNDArray< std::complex<float> >& operator-=< std::complex<float> >
+        (hoNDArray< std::complex<float> >&, const hoNDArray< std::complex<float> >&);
+    template EXPORTCPUCOREMATH hoNDArray< std::complex<float> >& operator-=< std::complex<float> >
+        (hoNDArray< std::complex<float> >&, const std::complex<float>&);
+    template EXPORTCPUCOREMATH hoNDArray< std::complex<float> >& operator*=< std::complex<float> >
+        (hoNDArray< std::complex<float> >&, const hoNDArray< std::complex<float> >&);
+    template EXPORTCPUCOREMATH hoNDArray< std::complex<float> >& operator*=< std::complex<float> >
+        (hoNDArray< std::complex<float> >&, const std::complex<float>&);
+    template EXPORTCPUCOREMATH hoNDArray< std::complex<float> >& operator/=< std::complex<float> >
+        (hoNDArray< std::complex<float> >&, const hoNDArray< std::complex<float> >&);
+    template EXPORTCPUCOREMATH hoNDArray< std::complex<float> >& operator/=< std::complex<float> >
+        (hoNDArray< std::complex<float> >&, const std::complex<float>&);
+
+    template EXPORTCPUCOREMATH hoNDArray< complext<float> >& operator+=< complext<float> >
+        (hoNDArray< complext<float> >&, const hoNDArray< complext<float> >&);
+    template EXPORTCPUCOREMATH hoNDArray< complext<float> >& operator+=< complext<float> >
+        (hoNDArray< complext<float> >&, const complext<float>&);
+    template EXPORTCPUCOREMATH hoNDArray< complext<float> >& operator-=< complext<float> >
+        (hoNDArray< complext<float> >&, const hoNDArray< complext<float> >&);
+    template EXPORTCPUCOREMATH hoNDArray< complext<float> >& operator-=< complext<float> >
+        (hoNDArray< complext<float> >&, const complext<float>&);
+    template EXPORTCPUCOREMATH hoNDArray< complext<float> >& operator*=< complext<float> >
+        (hoNDArray< complext<float> >&, const hoNDArray< complext<float> >&);
+    template EXPORTCPUCOREMATH hoNDArray< complext<float> >& operator*=< complext<float> >
+        (hoNDArray< complext<float> >&, const complext<float>&);
+    template EXPORTCPUCOREMATH hoNDArray< complext<float> >& operator/=< complext<float> >
+        (hoNDArray< complext<float> >&, const hoNDArray< complext<float> >&);
+    template EXPORTCPUCOREMATH hoNDArray< complext<float> >& operator/=< complext<float> >
+        (hoNDArray< complext<float> >&, const complext<float>&);
+
+    template EXPORTCPUCOREMATH hoNDArray< std::complex<float> >& operator+=<float>(hoNDArray< std::complex<float> >&, const hoNDArray<float>&);
+    template EXPORTCPUCOREMATH hoNDArray< std::complex<float> >& operator-=<float>(hoNDArray< std::complex<float> >&, const hoNDArray<float>&);
+    template EXPORTCPUCOREMATH hoNDArray< std::complex<float> >& operator*=<float>(hoNDArray< std::complex<float> >&, const hoNDArray<float>&);
+    template EXPORTCPUCOREMATH hoNDArray< std::complex<float> >& operator/=<float>(hoNDArray< std::complex<float> >&, const hoNDArray<float>&);
+
+    template EXPORTCPUCOREMATH hoNDArray< complext<float> >& operator+=<float>(hoNDArray< complext<float> >&, const hoNDArray<float>&);
+    template EXPORTCPUCOREMATH hoNDArray< complext<float> >& operator-=<float>(hoNDArray< complext<float> >&, const hoNDArray<float>&);
+    template EXPORTCPUCOREMATH hoNDArray< complext<float> >& operator*=<float>(hoNDArray< complext<float> >&, const hoNDArray<float>&);
+    template EXPORTCPUCOREMATH hoNDArray< complext<float> >& operator/=<float>(hoNDArray< complext<float> >&, const hoNDArray<float>&);
+
+    template EXPORTCPUCOREMATH hoNDArray< std::complex<float> >& operator+=<float>(hoNDArray< std::complex<float> >&, const float&);
+    template EXPORTCPUCOREMATH hoNDArray< std::complex<float> >& operator-=<float>(hoNDArray< std::complex<float> >&, const float&);
+    template EXPORTCPUCOREMATH hoNDArray< std::complex<float> >& operator*=<float>(hoNDArray< std::complex<float> >&, const float&);
+    template EXPORTCPUCOREMATH hoNDArray< std::complex<float> >& operator/=<float>(hoNDArray< std::complex<float> >&, const float&);
+
+    template EXPORTCPUCOREMATH hoNDArray< complext<float> >& operator+=<float>(hoNDArray< complext<float> >&, const float&);
+    template EXPORTCPUCOREMATH hoNDArray< complext<float> >& operator-=<float>(hoNDArray< complext<float> >&, const float&);
+    template EXPORTCPUCOREMATH hoNDArray< complext<float> >& operator*=<float>(hoNDArray< complext<float> >&, const float&);
+    template EXPORTCPUCOREMATH hoNDArray< complext<float> >& operator/=<float>(hoNDArray< complext<float> >&, const float&);
+
+    template EXPORTCPUCOREMATH hoNDArray< std::complex<double> >& operator+=< std::complex<double> >
+        (hoNDArray< std::complex<double> >&, const hoNDArray< std::complex<double> >&);
+    template EXPORTCPUCOREMATH hoNDArray< std::complex<double> >& operator+=< std::complex<double> >
+        (hoNDArray< std::complex<double> >&, const std::complex<double>&);
+    template EXPORTCPUCOREMATH hoNDArray< std::complex<double> >& operator-=< std::complex<double> >
+        (hoNDArray< std::complex<double> >&, const hoNDArray< std::complex<double> >&);
+    template EXPORTCPUCOREMATH hoNDArray< std::complex<double> >& operator-=< std::complex<double> >
+        (hoNDArray< std::complex<double> >&, const std::complex<double>&);
+    template EXPORTCPUCOREMATH hoNDArray< std::complex<double> >& operator*=< std::complex<double> >
+        (hoNDArray< std::complex<double> >&, const hoNDArray< std::complex<double> >&);
+    template EXPORTCPUCOREMATH hoNDArray< std::complex<double> >& operator*=< std::complex<double> >
+        (hoNDArray< std::complex<double> >&, const std::complex<double>&);
+    template EXPORTCPUCOREMATH hoNDArray< std::complex<double> >& operator/=< std::complex<double> >
+        (hoNDArray< std::complex<double> >&, const hoNDArray< std::complex<double> >&);
+    template EXPORTCPUCOREMATH hoNDArray< std::complex<double> >& operator/=< std::complex<double> >
+        (hoNDArray< std::complex<double> >&, const std::complex<double>&);
+
+    template EXPORTCPUCOREMATH hoNDArray< complext<double> >& operator+=< complext<double> >
+        (hoNDArray< complext<double> >&, const hoNDArray< complext<double> >&);
+    template EXPORTCPUCOREMATH hoNDArray< complext<double> >& operator+=< complext<double> >
+        (hoNDArray< complext<double> >&, const complext<double>&);
+    template EXPORTCPUCOREMATH hoNDArray< complext<double> >& operator-=< complext<double> >
+        (hoNDArray< complext<double> >&, const hoNDArray< complext<double> >&);
+    template EXPORTCPUCOREMATH hoNDArray< complext<double> >& operator-=< complext<double> >
+        (hoNDArray< complext<double> >&, const complext<double>&);
+    template EXPORTCPUCOREMATH hoNDArray< complext<double> >& operator*=< complext<double> >
+        (hoNDArray< complext<double> >&, const hoNDArray< complext<double> >&);
+    template EXPORTCPUCOREMATH hoNDArray< complext<double> >& operator*=< complext<double> >
+        (hoNDArray< complext<double> >&, const complext<double>&);
+    template EXPORTCPUCOREMATH hoNDArray< complext<double> >& operator/=< complext<double> >
+        (hoNDArray< complext<double> >&, const hoNDArray< complext<double> >&);
+    template EXPORTCPUCOREMATH hoNDArray< complext<double> >& operator/=< complext<double> >
+        (hoNDArray< complext<double> >&, const complext<double>&);
+
+    template EXPORTCPUCOREMATH hoNDArray< std::complex<double> >& operator+=<double>(hoNDArray< std::complex<double> >&, const hoNDArray<double>&);
+    template EXPORTCPUCOREMATH hoNDArray< std::complex<double> >& operator-=<double>(hoNDArray< std::complex<double> >&, const hoNDArray<double>&);
+    template EXPORTCPUCOREMATH hoNDArray< std::complex<double> >& operator*=<double>(hoNDArray< std::complex<double> >&, const hoNDArray<double>&);
+    template EXPORTCPUCOREMATH hoNDArray< std::complex<double> >& operator/=<double>(hoNDArray< std::complex<double> >&, const hoNDArray<double>&);
+
+    template EXPORTCPUCOREMATH hoNDArray< complext<double> >& operator+=<double>(hoNDArray< complext<double> >&, const hoNDArray<double>&);
+    template EXPORTCPUCOREMATH hoNDArray< complext<double> >& operator-=<double>(hoNDArray< complext<double> >&, const hoNDArray<double>&);
+    template EXPORTCPUCOREMATH hoNDArray< complext<double> >& operator*=<double>(hoNDArray< complext<double> >&, const hoNDArray<double>&);
+    template EXPORTCPUCOREMATH hoNDArray< complext<double> >& operator/=<double>(hoNDArray< complext<double> >&, const hoNDArray<double>&);
+
+    template EXPORTCPUCOREMATH hoNDArray< std::complex<double> >& operator+=<double>(hoNDArray< std::complex<double> >&, const double&);
+    template EXPORTCPUCOREMATH hoNDArray< std::complex<double> >& operator-=<double>(hoNDArray< std::complex<double> >&, const double&);
+    template EXPORTCPUCOREMATH hoNDArray< std::complex<double> >& operator*=<double>(hoNDArray< std::complex<double> >&, const double&);
+    template EXPORTCPUCOREMATH hoNDArray< std::complex<double> >& operator/=<double>(hoNDArray< std::complex<double> >&, const double&);
+
+    template EXPORTCPUCOREMATH hoNDArray< complext<double> >& operator+=<double>(hoNDArray< complext<double> >&, const double&);
+    template EXPORTCPUCOREMATH hoNDArray< complext<double> >& operator-=<double>(hoNDArray< complext<double> >&, const double&);
+    template EXPORTCPUCOREMATH hoNDArray< complext<double> >& operator*=<double>(hoNDArray< complext<double> >&, const double&);
+    template EXPORTCPUCOREMATH hoNDArray< complext<double> >& operator/=<double>(hoNDArray< complext<double> >&, const double&);
+
+
+    template EXPORTCPUCOREMATH void axpy<float>( float, hoNDArray<float>*, hoNDArray<float>* );
+    template EXPORTCPUCOREMATH void axpy<double>( double, hoNDArray<double>*, hoNDArray<double>* );
+    template EXPORTCPUCOREMATH void axpy< std::complex<float> >( std::complex<float> , hoNDArray< std::complex<float> >*, hoNDArray< std::complex<float> >* );
+    template EXPORTCPUCOREMATH void axpy< std::complex<double> >( std::complex<double> , hoNDArray< std::complex<double> >*, hoNDArray< std::complex<double> >* );
+    template EXPORTCPUCOREMATH void axpy< complext<float> >( complext<float> , hoNDArray< complext<float> >*, hoNDArray< complext<float> >* );
+    template EXPORTCPUCOREMATH void axpy< complext<double> >( complext<double> , hoNDArray< complext<double> >*, hoNDArray< complext<double> >* );
+
+}
diff --git a/toolboxes/core/cpu/math/hoNDArray_elemwise.h b/toolboxes/core/cpu/math/hoNDArray_elemwise.h
new file mode 100644
index 0000000..2224bb9
--- /dev/null
+++ b/toolboxes/core/cpu/math/hoNDArray_elemwise.h
@@ -0,0 +1,641 @@
+/** \file   hoNDArray_elemwise.h
+    \brief  Element-wise math operations on the hoNDArray class.
+
+    hoNDArray_elementwise.h defines element-wise array operations on the hoNDArray class.
+    Many of the provided functions come in two flavours:
+    1) A function that returns a smart pointer to a new array holding the result of the element-wise operation, and
+
+    2) A function that perform in-place element-wise computation replacing the input array.
+    When both versions are available the in-place version is suffixed _inplace.
+    Some functions (clear, fill, clamp, clamp_min, clamp_max, normalize, shrink1, shrinkd) are only provided as in-place operations,
+    and they do not carry the _inplace suffix in order to keep user code compact.
+    A few functions return a different type as its input array 
+    (abs on complex data, real, imag, real_to_std_complex, real_to_complext) and consequently is not offered as an in place operation.
+    The functions provided in hoNDArray_elemwise are deliberatly placed outside the NDArray derived classes
+    - to allow the NDArray classes to be lightweight header only data containers for both the cpu and gpu instances
+    - to allow for external library optimized implementations of the element-wise functions without adding such dependencies to the core data container
+    The present cpu implementation is based on Armadillo (whenever suitable functions are available).
+    The implementation is purposely split into a header and underlying implementation (.cpp) 
+    as this allows specific instantiation of the supported template types.     
+    The supported types are float, double, std::complex<float>, std::complex<double>, 
+    Gadgetron::complext<float> and Gadgetron::complext<double> -- with some deliberate omissions.
+
+    3) Many functions are also reimplemented if the Intel MKL is avaiable to speedup the computation.
+ */
+
+#pragma once
+
+#include "hoNDArray.h"
+#include "cpucore_math_export.h"
+
+#include "GadgetronCommon.h"
+#include <complex>
+
+namespace Gadgetron{
+
+/**
+* @brief add two vectors of values, r = x + y
+  support in-place computation, e.g. x==r or y==r
+*/
+template <typename T> EXPORTCPUCOREMATH 
+void add(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r);
+
+template <typename T> EXPORTCPUCOREMATH 
+void add(const hoNDArray< std::complex<T> >& x, const hoNDArray<T>& y, hoNDArray< std::complex<T> >& r);
+
+template <typename T> EXPORTCPUCOREMATH 
+void add(const hoNDArray< complext<T> >& x, const hoNDArray<T>& y, hoNDArray< complext<T> >& r);
+
+/**
+* @brief subtract two vectors of values, r = x - y
+  support in-place computation, e.g. x==r
+*/
+template <typename T> EXPORTCPUCOREMATH 
+void subtract(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r);
+
+template <typename T> EXPORTCPUCOREMATH 
+void subtract(const hoNDArray< std::complex<T> >& x, const hoNDArray<T>& y, hoNDArray< std::complex<T> >& r);
+
+template <typename T> EXPORTCPUCOREMATH 
+void subtract(const hoNDArray< complext<T> >& x, const hoNDArray<T>& y, hoNDArray< complext<T> >& r);
+
+/**
+* @brief multiply two vectors of values, r = x * y
+  support in-place computation, e.g. x==r or y==r
+*/
+template <typename T> EXPORTCPUCOREMATH 
+void multiply(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r);
+
+template <typename T> EXPORTCPUCOREMATH 
+void multiply(const hoNDArray< std::complex<T> >& x, const hoNDArray<T>& y, hoNDArray< std::complex<T> >& r);
+
+template <typename T> EXPORTCPUCOREMATH 
+void multiply(const hoNDArray< complext<T> >& x, const hoNDArray<T>& y, hoNDArray< complext<T> >& r);
+
+/**
+* @brief divide two vectors of values, r = x / y
+  support in-place computation, e.g. x==r
+  no check for y==0
+*/
+template <typename T> EXPORTCPUCOREMATH 
+void divide(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r);
+
+template <typename T> EXPORTCPUCOREMATH 
+void divide(const hoNDArray< std::complex<T> >& x, const hoNDArray<T>& y, hoNDArray< std::complex<T> >& r);
+
+template <typename T> EXPORTCPUCOREMATH 
+void divide(const hoNDArray< complext<T> >& x, const hoNDArray<T>& y, hoNDArray< complext<T> >& r);
+
+/**
+* @brief r = x * conj(y)
+*/
+template <typename T> EXPORTCPUCOREMATH 
+void multiplyConj(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r);
+
+/**
+* @brief r = conj(x)
+*/
+template <typename T> EXPORTCPUCOREMATH 
+void conjugate(const hoNDArray<T>& x, hoNDArray<T>& r);
+
+/**
+* @brief if abs(x) is smaller than epsilon for its numeric type
+add epsilon to this x
+*/
+template <typename T> EXPORTCPUCOREMATH 
+void addEpsilon(hoNDArray<T>& x);
+
+/**
+* @brief r = angle(x)
+*/
+template <typename T> EXPORTCPUCOREMATH 
+void argument(const hoNDArray<T>& x, hoNDArray<typename realType<T>::Type>& r);
+
+/**
+* @brief r = 1/x
+*/
+template <typename T> EXPORTCPUCOREMATH 
+void inv(const hoNDArray<T>& x, hoNDArray<T>& r);
+
+/**
+ * @brief Calculates the element-wise absolute values (l2 norm) of the array entries
+ * @param[in] x Input array.
+ * @return A new array containing the element-wise absolute values of the input.
+ */
+template<class T> EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<typename realType<T>::Type> > abs( hoNDArray<T> *x );
+template <typename T> EXPORTCPUCOREMATH void abs(const hoNDArray<T>& x, hoNDArray<typename realType<T>::Type>& r);
+template <typename T> EXPORTCPUCOREMATH void abs(const hoNDArray< std::complex<T> >& x, hoNDArray< std::complex<T> >& r);
+
+/**
+ * @brief Calculates the element-wise absolute values (l2 norm) of the array entries (in place).
+ * @param[in,out] x Input and output array.
+ */
+template<class T> EXPORTCPUCOREMATH void abs_inplace( hoNDArray<T> *x );
+
+/**
+ * @brief Calculates the element-wise squared absolute values of the array entries
+ * @param[in] x Input array.
+ * @return A new array containing the element-wise absolute values of the input.
+ */
+template<class T> EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<typename realType<T>::Type> > abs_square( hoNDArray<T> *x );
+
+/**
+ * @brief Calculates the element-wise sqrt of the array entries.
+ * @param[in] x Input array.
+ * @return A new array containing the element-wise sqrt of the input.
+ */
+template<class T> EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<T> > sqrt( hoNDArray<T> *x );
+
+template <typename T> EXPORTCPUCOREMATH void sqrt(const hoNDArray<T>& x, hoNDArray<T>& r);
+
+/**
+ * @brief Calculates the element-wise sqrt of the array entries (in place).
+ * @param[in,out] x Input and output array.
+ */
+template<class T> EXPORTCPUCOREMATH void sqrt_inplace( hoNDArray<T> *x );
+
+/**
+ * @brief Calculates the element-wise square of the array entries.
+ * @param[in] x Input array.
+ * @return A new array containing the element-wise square of the input.
+ *
+ * For real numbers this functions is equivalent to square.
+ * For complex arrays abs_square() and square() differ however.
+ */
+template<class T> EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<T> > square( hoNDArray<T> *x );
+
+/**
+ * @brief Calculates the element-wise square of the array entries (in place).
+ * @param[in,out] x Input and output array.
+ */
+template<class T> EXPORTCPUCOREMATH void square_inplace( hoNDArray<T> *x );
+
+/**
+ * @brief Calculates the element-wise reciprocal of the array entries.
+ * @param[in] x Input array.
+ * @return A new array containing the element-wise reciprocal of the input.
+ */
+template<class T> EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<T> > reciprocal( hoNDArray<T> *x );
+
+/**
+ * @brief Calculates the element-wise reciprocal of the array entries (in place).
+ * @param[in,out] x Input and output array.
+ */
+template<class T> EXPORTCPUCOREMATH void reciprocal_inplace( hoNDArray<T> *x );
+
+/**
+ * @brief Calculates the element-wise reciprocal sqrt of the array entries.
+ * @param[in] x Input array.
+ * @return A new array containing the element-wise reciprocal sqrt of the input.
+ */
+template<class T> EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<T> > reciprocal_sqrt( hoNDArray<T> *x );
+
+/**
+ * @brief Calculates the element-wise reciprocal sqrt of the array entries (in place).
+ * @param[in,out] x Input and output array.
+ */
+template<class T> EXPORTCPUCOREMATH void reciprocal_sqrt_inplace( hoNDArray<T> *x );
+
+/**
+ * @brief Calculates the elementwise signum function on the array.
+ * @param[in] x Input array.
+ * @return A new array containing the element-wise sgn of the input.
+ */
+template<class T> EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<T> > sgn( hoNDArray<T> *x );
+
+/**
+ * @brief Calculates the elementwise signum function on the array (in place).
+ * @param[in,out] x Input and output array.
+ */
+template<class T> EXPORTCPUCOREMATH void sgn_inplace( hoNDArray<T> *x );
+
+/**
+ * @brief Extract the real component from a complex array.
+ * @param[in] x Input array.
+ * @return A new array of the real component of the complex array.
+ */
+template<class T> EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<typename realType<T>::Type> > real( hoNDArray<T> *x );
+
+/**
+ * @brief Extract the imaginary component from a complex array.
+ * @param[in] x Input array.
+ * @return A new array of the imaginary component of the complex array.
+ */
+template<class T> EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<typename realType<T>::Type> > imag( hoNDArray<T> *x );
+
+/**
+ * @brief Create a new array of the complex conjugate of the input array. For real arrays a copy of the input array is return.
+ * @param[in] x Input array.
+ * @return A new array of the complex conjugate of the input array.
+ */
+template<class T> EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<T> > conj( hoNDArray<T> *x );
+
+/**
+ * @brief Construct a complex array from a real array.
+ * @param[in] x Input array.
+ * @return A new complex array containing the input array in the real component and zeros in the imaginary component.
+ */
+template<class T> EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<T> >
+real_to_complex( hoNDArray<typename realType<T>::Type> *x );
+
+template<class T> EXPORTCPUCOREMATH boost::shared_ptr< hoNDArray<T> >
+real_imag_to_complex( hoNDArray<typename realType<T>::Type> *real, hoNDArray<typename realType<T>::Type>* imag);
+
+/**
+* @brief real and imag to complex
+*/
+template<class T> EXPORTCPUCOREMATH 
+void real_imag_to_complex(const hoNDArray<typename realType<T>::Type>& real, const hoNDArray<typename realType<T>::Type>& imag, hoNDArray<T>& cplx);
+
+/**
+* @brief complex to real and imag
+*/
+template<class T> EXPORTCPUCOREMATH 
+void complex_to_real_imag(const hoNDArray<T>& cplx, hoNDArray<typename realType<T>::Type>& real, hoNDArray<typename realType<T>::Type>& imag);
+
+template<class T> EXPORTCPUCOREMATH 
+void complex_to_real_imag(const hoNDArray<T>& cplx, hoNDArray<T>& real, hoNDArray<T>& imag);
+
+/**
+* @brief get the real part of complex
+*/
+template<class T> EXPORTCPUCOREMATH 
+void complex_to_real(const hoNDArray<T>& cplx, hoNDArray<typename realType<T>::Type>& real);
+
+template<class T> EXPORTCPUCOREMATH 
+void complex_to_real(const hoNDArray<T>& cplx, hoNDArray<T>& real);
+
+template<class T> 
+void complex_to_real(hoNDArray<T>& cplx);
+
+/**
+* @brief get the imag part of complex
+*/
+template<class T> EXPORTCPUCOREMATH 
+void complex_to_imag(const hoNDArray<T>& cplx, hoNDArray<typename realType<T>::Type>& imag);
+
+template<class T> EXPORTCPUCOREMATH 
+void complex_to_imag(const hoNDArray<T>& cplx, hoNDArray<T>& imag);
+
+template<class T> EXPORTCPUCOREMATH 
+void complex_to_imag(hoNDArray<T>& cplx);
+
+/**
+* @brief get complex array whose real part is the input and imag part is zero
+*/
+template<class T> EXPORTCPUCOREMATH 
+void real_to_complex(const hoNDArray<typename realType<T>::Type>& real, hoNDArray<T>& cplx);
+
+/**
+ * @brief Clears the array to all zeros ( in place). Faster than fill.
+ * @param[in,out] x Input and output array.
+ */
+template<class T> void clear( hoNDArray<T>* x )
+{
+    if ( x->get_number_of_elements() > 0 )
+    {
+        memset( x->get_data_ptr(), 0, x->get_number_of_elements()*sizeof(T));
+    }
+}
+
+template<class T> void clear( hoNDArray<T>& x )
+{
+    if ( x.get_number_of_elements() > 0 )
+    {
+        memset( x.get_data_ptr(), 0, x.get_number_of_elements()*sizeof(T));
+    }
+}
+
+/**
+ * @brief Fills the array with a user provided constant value (in place).
+ * @param[in,out] x Input and output array.
+ * @param[in] val Fill value.
+ */
+template <typename T> EXPORTCPUCOREMATH void fill( hoNDArray<T>* x, T val);
+template <typename T> EXPORTCPUCOREMATH void fill( hoNDArray<T>& x, T val );
+
+/**
+ * @brief Clamps all values in the array to the minimum and maximum values specified (in place).
+ * @param[in,out] x Input and output array.
+ * @param[in] min minimum value.
+ * @param[in] max maximum value.
+ * @param[in] min_val value to which everything below the minimum will be set
+ * @param[in] max_val value to which everything above the maximum will be set
+ */
+template<class T> EXPORTCPUCOREMATH void clamp( hoNDArray<T> *x, typename realType<T>::Type min, typename realType<T>::Type max, T min_val, T max_val );
+
+/**
+ * @brief Clamps all values in the array to the minimum and maximum values specified (in place).
+ * @param[in,out] x Input and output array.
+ * @param[in] min minimum value.
+ * @param[in] max maximum value.
+ */
+template<class T> EXPORTCPUCOREMATH void clamp( hoNDArray<T> *x, typename realType<T>::Type min, typename realType<T>::Type max );
+
+/**
+ * @brief Clamps all values in the array to a minimum value allowed (in place).
+ * @param[in,out] x Input and output array.
+ * @param[in] min Minimum value.
+ */
+template<class T> EXPORTCPUCOREMATH void clamp_min( hoNDArray<T> *x, typename realType<T>::Type min );
+
+/**
+ * @brief Clamps all values in the array to a maximum value allowed (in place).
+ * @param[in,out] x Input and output array.
+ * @param[in] max Maximum value.
+ */
+template<class T> EXPORTCPUCOREMATH void clamp_max( hoNDArray<T> *x, typename realType<T>::Type max );
+
+/**
+ * @brief In place normalization (scaling) to a new maximum absolute array value val.
+ * @param[in,out] x Input and output array.
+ * @param[in] val New maximum absolute array value (according to the l2-norm)
+ */
+template<class T> EXPORTCPUCOREMATH void normalize( hoNDArray<T> *x, typename realType<T>::Type val = typename realType<T>::Type(1) );
+
+/**
+ * @brief Shrinkage (soft thresholding), i.e. shrink(x,gamma) = x/abs(x)*max(abs(x)-gamma,0).
+ * @param[out] out Output array. Can be 0x0 in which case an in place transform is performed.
+ * @param[in,out] x Input array (and output array if out == 0x0).
+ * @param[in] gamma Shrinkage control parameter
+ */
+template<class T> EXPORTCPUCOREMATH void shrink1( hoNDArray<T> *x, typename realType<T>::Type gamma, hoNDArray<T> *out = 0x0 );
+
+/**
+ * @brief In place p-shrinkage (soft thresholding), i.e. pshrink(x,gamma,p) = x/abs(x)*max(abs(x)-gamma*abs(x)^(p-1),0).
+ * @param[out] out Output array. Can be 0x0 in which case an in place transform is performed.
+ * @param[in,out] x Input array (and output array if out == 0x0).
+ * @param[in] gamma Shrinkage control parameter
+ * @param[in] p p value of the shrinkage. Should be less than 1 and more than 0.
+ */
+template<class T> EXPORTCPUCOREMATH void pshrink( hoNDArray<T> *x, typename realType<T>::Type gamma,typename realType<T>::Type p, hoNDArray<T> *out = 0x0 );
+
+/**
+ * @brief Shrinkage (soft thresholding, multi-dimensional), i.e. shrink(x,gamma,s) = x/s*max(s-gamma,0).
+ * @param[out] out Output array. Can be 0x0 in which case an in place transform is performed.
+ * @param[in,out] x Input array (and output array if out == 0x0).
+ * @param[in] s Input array, normalization.
+ * @param[in] gamma Shrinkage control parameter
+ */
+template<class T> EXPORTCPUCOREMATH void shrinkd ( hoNDArray<T> *x, hoNDArray<typename realType<T>::Type> *s, typename realType<T>::Type gamma, hoNDArray<T> *out = 0x0 );
+
+/**
+ * @brief In place p-shrinkage (soft thresholding, multi-dimensional), i.e. pshrink(x,s,gamma,p) = x/s*max(s-gamma*s^(p-1),0).
+ * @param[out] out Output array. Can be 0x0 in which case an in place transform is performed.
+ * @param[in,out] x Input array (and output array if out == 0x0).
+ * @param[in] gamma Shrinkage control parameter
+ * @param[in] p p value of the shrinkage. Should be less than 1 and more than 0.
+ */
+template<class T> EXPORTCPUCOREMATH void pshrinkd ( hoNDArray<T> *x, hoNDArray<typename realType<T>::Type> *s, typename realType<T>::Type gamma,typename realType<T>::Type p, hoNDArray<T> *out = 0x0 );
+
+/**
+ * @brief Implementation of element-wise operator+= on two hoNDArrays.
+ * @param[in,out] x Input and output array.
+ * @param[in] y Input array.
+
+ * Let y be an n-dimensional array.
+ * Then the sizes of the first n array dimensions must match between x and y.
+ * If x contains further dimensions the operator is batched across those dimensions.
+ */
+template<class T> EXPORTCPUCOREMATH hoNDArray<T>& operator+= (hoNDArray<T> &x, const hoNDArray<T> &y);
+
+/**
+ * @brief Implementation of element-wise operator+= on a hoNDArray with a scalar value.
+ * @param[in,out] x Input and output array.
+ * @param[in] y Input scalar.
+ */
+template<class T> EXPORTCPUCOREMATH hoNDArray<T>& operator+= (hoNDArray<T> &x, const T &y);
+
+/**
+ * @brief Implementation of element-wise operator+= on two hoNDArrays.
+ * @param[in,out] x Input and output array.
+ * @param[in] y Input array.
+
+ * Let y be an n-dimensional array.
+ * Then the sizes of the first n array dimensions must match between x and y.
+ * If x contains further dimensions the operator is batched across those dimensions.
+ */
+template<class T> EXPORTCPUCOREMATH hoNDArray< std::complex<T> >& operator+= (hoNDArray< std::complex<T> > &x, const hoNDArray<T> &y);
+
+/**
+ * @brief Implementation of element-wise operator+= on a hoNDArray with a scalar value.
+ * @param[in,out] x Input and output array.
+ * @param[in] y Input scalar.
+ */
+template<class T> EXPORTCPUCOREMATH hoNDArray< std::complex<T> >& operator+= (hoNDArray< std::complex<T> >&x, const T &y);
+
+/**
+ * @brief Implementation of element-wise operator+= on two hoNDArrays.
+ * @param[in,out] x Input and output array.
+ * @param[in] y Input array.
+
+ * Let y be an n-dimensional array.
+ * Then the sizes of the first n array dimensions must match between x and y.
+ * If x contains further dimensions the operator is batched across those dimensions.
+ */
+template<class T> EXPORTCPUCOREMATH hoNDArray< complext<T> >& operator+= (hoNDArray< complext<T> > &x, const hoNDArray<T> &y);
+
+/**
+ * @brief Implementation of element-wise operator+= on a hoNDArray with a scalar value.
+ * @param[in,out] x Input and output array.
+ * @param[in] y Input scalar.
+ */
+template<class T> EXPORTCPUCOREMATH hoNDArray< complext<T> >& operator+= (hoNDArray< complext<T> >&x, const T &y);
+
+/**
+ * @brief Implementation of element-wise operator-= on two hoNDArrays.
+ * @param[in,out] x Input and output array.
+ * @param[in] y Input array.
+
+ * Let y be an n-dimensional array.
+ * Then the sizes of the first n array dimensions must match between x and y.
+ * If x contains further dimensions the operator is batched across those dimensions.
+ */
+template<class T> EXPORTCPUCOREMATH hoNDArray<T>& operator-= (hoNDArray<T> &x, const hoNDArray<T> &y);
+
+/**
+ * @brief Implementation of element-wise operator-= on a hoNDArray with a scalar value.
+ * @param[in,out] x Input and output array.
+ * @param[in] y Input scalar.
+ */
+template<class T> EXPORTCPUCOREMATH hoNDArray<T>& operator-= (hoNDArray<T> &x, const T &y);
+
+/**
+ * @brief Implementation of element-wise operator-= on two hoNDArrays.
+ * @param[in,out] x Input and output array.
+ * @param[in] y Input array.
+
+ * Let y be an n-dimensional array.
+ * Then the sizes of the first n array dimensions must match between x and y.
+ * If x contains further dimensions the operator is batched across those dimensions.
+ */
+template<class T> EXPORTCPUCOREMATH hoNDArray< std::complex<T> >& operator-= (hoNDArray< std::complex<T > > &x, const hoNDArray<T> &y);
+
+/**
+ * @brief Implementation of element-wise operator-= on a hoNDArray with a scalar value.
+ * @param[in,out] x Input and output array.
+ * @param[in] y Input scalar.
+ */
+template<class T> EXPORTCPUCOREMATH hoNDArray< std::complex<T> >& operator-= (hoNDArray< std::complex<T> >&x, const T &y);
+
+/**
+ * @brief Implementation of element-wise operator-= on two hoNDArrays.
+ * @param[in,out] x Input and output array.
+ * @param[in] y Input array.
+
+ * Let y be an n-dimensional array.
+ * Then the sizes of the first n array dimensions must match between x and y.
+ * If x contains further dimensions the operator is batched across those dimensions.
+ */
+template<class T> EXPORTCPUCOREMATH hoNDArray< complext<T> >& operator-= (hoNDArray< complext<T > > &x, const hoNDArray<T> &y);
+
+/**
+ * @brief Implementation of element-wise operator-= on a hoNDArray with a scalar value.
+ * @param[in,out] x Input and output array.
+ * @param[in] y Input scalar.
+ */
+template<class T> EXPORTCPUCOREMATH hoNDArray< complext<T> >& operator-= (hoNDArray< complext<T> >&x, const T &y);
+
+/**
+ * @brief Implementation of element-wise operator*= on two hoNDArrays.
+ * @param[in,out] x Input and output array.
+ * @param[in] y Input array.
+
+ * Let y be an n-dimensional array.
+ * Then the sizes of the first n array dimensions must match between x and y.
+ * If x contains further dimensions the operator is batched across those dimensions.
+ */
+template<class T> EXPORTCPUCOREMATH hoNDArray<T>& operator*= (hoNDArray<T> &x, const hoNDArray<T> &y);
+
+/**
+ * @brief Implementation of element-wise operator*= on a hoNDArray with a scalar value.
+ * @param[in,out] x Input and output array.
+ * @param[in] y Input scalar.
+ */
+template<class T> EXPORTCPUCOREMATH hoNDArray<T>& operator*= (hoNDArray<T> &x, const T &y);
+
+/**
+ * @brief Implementation of element-wise operator*= on two hoNDArrays.
+ * @param[in,out] x Input and output array.
+ * @param[in] y Input array.
+
+ * Let y be an n-dimensional array.
+ * Then the sizes of the first n array dimensions must match between x and y.
+ * If x contains further dimensions the operator is batched across those dimensions.
+ */
+template<class T> EXPORTCPUCOREMATH hoNDArray< std::complex<T> >& operator*= (hoNDArray< std::complex<T> > &x, const hoNDArray<T> &y);
+
+/**
+ * @brief Implementation of element-wise operator*= on a hoNDArray with a scalar value.
+ * @param[in,out] x Input and output array.
+ * @param[in] y Input scalar.
+ */
+template<class T> EXPORTCPUCOREMATH hoNDArray< std::complex<T> >& operator*= (hoNDArray< std::complex<T> > &x, const T &y);
+
+/**
+ * @brief Implementation of element-wise operator*= on two hoNDArrays.
+ * @param[in,out] x Input and output array.
+ * @param[in] y Input array.
+
+ * Let y be an n-dimensional array.
+ * Then the sizes of the first n array dimensions must match between x and y.
+ * If x contains further dimensions the operator is batched across those dimensions.
+ */
+template<class T> EXPORTCPUCOREMATH hoNDArray< complext<T> >& operator*= (hoNDArray< complext<T> > &x, const hoNDArray<T> &y);
+
+/**
+ * @brief Implementation of element-wise operator*= on a hoNDArray with a scalar value.
+ * @param[in,out] x Input and output array.
+ * @param[in] y Input scalar.
+ */
+template<class T> EXPORTCPUCOREMATH hoNDArray< complext<T> >& operator*= (hoNDArray< complext<T> > &x, const T &y);
+
+/**
+ * @brief Implementation of element-wise operator/= on two hoNDArrays.
+ * @param[in,out] x Input and output array.
+ * @param[in] y Input array.
+
+ * Let y be an n-dimensional array.
+ * Then the sizes of the first n array dimensions must match between x and y.
+ * If x contains further dimensions the operator is batched across those dimensions.
+ */
+template<class T> EXPORTCPUCOREMATH hoNDArray<T>& operator/= (hoNDArray<T> &x, const hoNDArray<T> &y);
+
+/**
+ * @brief Implementation of element-wise operator/= on a hoNDArray with a scalar value.
+ * @param[in,out] x Input and output array.
+ * @param[in] y Input scalar.
+ */
+template<class T> EXPORTCPUCOREMATH hoNDArray<T>& operator/= (hoNDArray<T> &x, const T &y);
+
+/**
+ * @brief Implementation of element-wise operator/= on two hoNDArrays.
+ * @param[in,out] x Input and output array.
+ * @param[in] y Input array.
+
+ * Let y be an n-dimensional array.
+ * Then the sizes of the first n array dimensions must match between x and y.
+ * If x contains further dimensions the operator is batched across those dimensions.
+ */
+template<class T> EXPORTCPUCOREMATH hoNDArray< std::complex<T> >& operator/= (hoNDArray< std::complex<T> > &x, const hoNDArray<T> &y);
+
+/**
+ * @brief Implementation of element-wise operator/= on a hoNDArray with a scalar value.
+ * @param[in,out] x Input and output array.
+ * @param[in] y Input scalar.
+ */
+template<class T> EXPORTCPUCOREMATH hoNDArray< std::complex<T> >& operator/= (hoNDArray< std::complex<T> > &x, const T &y);
+
+/**
+ * @brief Implementation of element-wise operator/= on two hoNDArrays.
+ * @param[in,out] x Input and output array.
+ * @param[in] y Input array.
+
+ * Let y be an n-dimensional array.
+ * Then the sizes of the first n array dimensions must match between x and y.
+ * If x contains further dimensions the operator is batched across those dimensions.
+ */
+template<class T> EXPORTCPUCOREMATH hoNDArray< complext<T> >& operator/= (hoNDArray< complext<T> > &x, const hoNDArray<T> &y);
+
+/**
+ * @brief Implementation of element-wise operator/= on a hoNDArray with a scalar value.
+ * @param[in,out] x Input and output array.
+ * @param[in] y Input scalar.
+ */
+template<class T> EXPORTCPUCOREMATH hoNDArray< complext<T> >& operator/= (hoNDArray< complext<T> > &x, const T &y);
+
+/**
+ * @brief Calculates y = a*x+y in which x and y are considered as vectors
+ * @param[in] a Scalar value
+ * @param[in] x Array
+ * @param[in,out] y Array
+ */
+template<class T> EXPORTCPUCOREMATH void axpy( T a, hoNDArray<T> *x, hoNDArray<T> *y );
+
+/**
+* @brief compute r = a*x + y
+*/
+template <typename T> EXPORTCPUCOREMATH void axpy(T a, const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r);
+
+/**
+* @brief compute x *= a
+*/
+template <typename T> EXPORTCPUCOREMATH void scal(T a, hoNDArray<T>& x);
+template <typename T> EXPORTCPUCOREMATH void scal(T a, hoNDArray< std::complex<T> >& x);
+template <typename T> EXPORTCPUCOREMATH void scal(T a, hoNDArray< complext<T> >& x);
+
+/**
+* @brief 2D convolution
+            x: input data, y: convolution kernel, z: output; each 2D slice is convolved
+*/
+template <typename T> EXPORTCPUCOREMATH 
+void conv2(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& z);
+
+/**
+* @brief 3D convolution
+            x: input data, y: convolution kernel, z: output; each 3D volume is convolved
+*/
+template <typename T> EXPORTCPUCOREMATH 
+void conv3(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& z);
+
+}
diff --git a/toolboxes/core/cpu/math/hoNDArray_linalg.cpp b/toolboxes/core/cpu/math/hoNDArray_linalg.cpp
new file mode 100644
index 0000000..f3f4599
--- /dev/null
+++ b/toolboxes/core/cpu/math/hoNDArray_linalg.cpp
@@ -0,0 +1,1949 @@
+
+#include "hoNDArray_linalg.h"
+#include "hoNDArray_elemwise.h"
+#include "hoNDArray_reductions.h"
+
+#ifndef lapack_complex_float
+    #define lapack_complex_float  std::complex<float> 
+#endif // lapack_complex_float
+
+#ifndef lapack_complex_double
+    #define lapack_complex_double  std::complex<double> 
+#endif // #ifndef lapack_complex_double
+
+extern "C" void sgemm_(const char *transa, const char *transb, const lapack_int *m, const lapack_int *n, const lapack_int *k,
+            const float *alpha, const float *a, const lapack_int *lda, const float *b, const lapack_int *ldb,
+            const float *beta, float *c, const lapack_int *ldc);
+
+extern "C" void dgemm_(const char *transa, const char *transb, const lapack_int *m, const lapack_int *n, const lapack_int *k,
+            const double *alpha, const double *a, const lapack_int *lda, const double *b, const lapack_int *ldb,
+            const double *beta, double *c, const lapack_int *ldc);
+
+extern "C" void cgemm_(const char *transa, const char *transb, const lapack_int *m, const lapack_int *n, const lapack_int *k,
+                    const lapack_complex_float *alpha, const lapack_complex_float *a, const lapack_int *lda,
+                    const lapack_complex_float *b, const lapack_int *ldb, const lapack_complex_float *beta,
+                    lapack_complex_float *c, const lapack_int *ldc);
+
+extern "C" void zgemm_(const char *transa, const char *transb, const lapack_int *m, const lapack_int *n, const lapack_int *k,
+            const lapack_complex_double *alpha, const lapack_complex_double *a, const lapack_int *lda,
+            const lapack_complex_double *b, const lapack_int *ldb, const lapack_complex_double *beta,
+            lapack_complex_double *c, const lapack_int *ldc);
+
+extern "C" void ssyrk_( const char* uplo, const char *trans, const lapack_int *n, const lapack_int *k, const float *alpha, const float *a, const lapack_int *lda, const float *beta, float *c, const lapack_int *ldc);
+extern "C" void dsyrk_( const char* uplo, const char *trans, const lapack_int *n, const lapack_int *k, const double *alpha, const double *a, const lapack_int *lda, const double *beta, double *c, const lapack_int *ldc);
+extern "C" void csyrk_( const char* uplo, const char *trans, const lapack_int *n, const lapack_int *k, const lapack_complex_float *alpha, const lapack_complex_float *a, const lapack_int *lda, const lapack_complex_float *beta, lapack_complex_float *c, const lapack_int *ldc);
+extern "C" void zsyrk_( const char* uplo, const char *trans, const lapack_int *n, const lapack_int *k, const lapack_complex_double *alpha, const lapack_complex_double *a, const lapack_int *lda, const lapack_complex_double *beta, lapack_complex_double *c, const lapack_int *ldc);
+
+extern "C" void cherk_( const char* uplo, const char *trans, const lapack_int *n, const lapack_int *k, const lapack_complex_float *alpha, const lapack_complex_float *a, const lapack_int *lda, const lapack_complex_float *beta, lapack_complex_float *c, const lapack_int *ldc);
+extern "C" void zherk_( const char* uplo, const char *trans, const lapack_int *n, const lapack_int *k, const lapack_complex_double *alpha, const lapack_complex_double *a, const lapack_int *lda, const lapack_complex_double *beta, lapack_complex_double *c, const lapack_int *ldc);
+
+extern "C" void spotrf_( const char* uplo, const lapack_int* n, float* a, const lapack_int* lda, lapack_int* info );
+extern "C" void dpotrf_( const char* uplo, const lapack_int* n, double* a, const lapack_int* lda, lapack_int* info );
+extern "C" void cpotrf_( const char* uplo, const lapack_int* n, lapack_complex_float* a, const lapack_int* lda, lapack_int* info );
+extern "C" void zpotrf_( const char* uplo, const lapack_int* n, lapack_complex_double* a, const lapack_int* lda, lapack_int* info );
+
+extern "C" void ssyev_( const char* jobz, const char* uplo, const lapack_int* n, float* a,
+        const lapack_int* lda, float* w, float* work, const lapack_int* lwork,
+        lapack_int* info );
+
+extern "C" void dsyev_( const char* jobz, const char* uplo, const lapack_int* n, double* a,
+        const lapack_int* lda, double* w, double* work, const lapack_int* lwork,
+        lapack_int* info );
+
+extern "C" void cheev_( const char* jobz, const char* uplo, const lapack_int* n,
+        lapack_complex_float* a, const lapack_int* lda, float* w, lapack_complex_float* work,
+        const lapack_int* lwork, float* rwork, lapack_int* info );
+
+extern "C" void zheev_( const char* jobz, const char* uplo, const lapack_int* n,
+        lapack_complex_double* a, const lapack_int* lda, double* w,
+        lapack_complex_double* work, const lapack_int* lwork, double* rwork,
+        lapack_int* info );
+
+extern "C" void spotrf_( const char* uplo, const lapack_int* n, float* a, const lapack_int* lda,
+        lapack_int* info );
+
+extern "C" void spotri_( const char* uplo, const lapack_int* n, float* a, const lapack_int* lda,
+        lapack_int* info );
+
+extern "C" void dpotrf_( const char* uplo, const lapack_int* n, double* a,
+        const lapack_int* lda, lapack_int* info );
+
+extern "C" void dpotri_( const char* uplo, const lapack_int* n, double* a,
+        const lapack_int* lda, lapack_int* info );
+
+extern "C" void cpotrf_( const char* uplo, const lapack_int* n, lapack_complex_float* a,
+        const lapack_int* lda, lapack_int* info );
+
+extern "C" void cpotri_( const char* uplo, const lapack_int* n, lapack_complex_float* a,
+        const lapack_int* lda, lapack_int* info );
+
+extern "C" void zpotrf_( const char* uplo, const lapack_int* n, lapack_complex_double* a,
+        const lapack_int* lda, lapack_int* info );
+
+extern "C" void zpotri_( const char* uplo, const lapack_int* n, lapack_complex_double* a,
+        const lapack_int* lda, lapack_int* info );
+
+extern "C" void strtri_( const char* uplo, const char* diag, const lapack_int* n, float* a,
+        const lapack_int* lda, lapack_int* info );
+
+extern "C" void dtrtri_( const char* uplo, const char* diag, const lapack_int* n, double* a,
+        const lapack_int* lda, lapack_int* info );
+
+extern "C" void ctrtri_( const char* uplo, const char* diag, const lapack_int* n,
+        lapack_complex_float* a, const lapack_int* lda, lapack_int* info );
+
+extern "C" void ztrtri_( const char* uplo, const char* diag, const lapack_int* n,
+        lapack_complex_double* a, const lapack_int* lda, lapack_int* info );
+
+extern "C" void sposv_( const char* uplo, const lapack_int* n, const lapack_int* nrhs, float* a,
+        const lapack_int* lda, float* b, const lapack_int* ldb, lapack_int* info );
+
+extern "C" void dposv_( const char* uplo, const lapack_int* n, const lapack_int* nrhs,
+        double* a, const lapack_int* lda, double* b, const lapack_int* ldb,
+        lapack_int* info );
+
+extern "C" void cposv_( const char* uplo, const lapack_int* n, const lapack_int* nrhs,
+        lapack_complex_float* a, const lapack_int* lda, lapack_complex_float* b,
+        const lapack_int* ldb, lapack_int* info );
+
+extern "C" void zposv_( const char* uplo, const lapack_int* n, const lapack_int* nrhs,
+        lapack_complex_double* a, const lapack_int* lda, lapack_complex_double* b,
+        const lapack_int* ldb, lapack_int* info );
+
+extern "C" void sgesv_( const lapack_int* n, const lapack_int* nrhs, float* a,
+        const lapack_int* lda, lapack_int* ipiv, float* b, const lapack_int* ldb, lapack_int* info );
+
+extern "C" void dgesv_( const lapack_int* n, const lapack_int* nrhs, double* a,
+        const lapack_int* lda, lapack_int* ipiv, double* b, const lapack_int* ldb, lapack_int* info );
+
+extern "C" void cgesv_( const lapack_int* n, const lapack_int* nrhs, lapack_complex_float* a,
+        const lapack_int* lda, lapack_int* ipiv, lapack_complex_float* b, const lapack_int* ldb, lapack_int* info );
+
+extern "C" void zgesv_( const lapack_int* n, const lapack_int* nrhs, lapack_complex_double* a,
+        const lapack_int* lda, lapack_int* ipiv, lapack_complex_double* b, const lapack_int* ldb, lapack_int* info );
+
+extern "C" void ssysv_( const char* uplo, const lapack_int* n, const lapack_int* nrhs, float* a,
+        const lapack_int* lda, lapack_int* ipiv, float* b, const lapack_int* ldb, float* work, lapack_int* lwork, lapack_int* info );
+
+extern "C" void dsysv_( const char* uplo, const lapack_int* n, const lapack_int* nrhs, double* a,
+        const lapack_int* lda, lapack_int* ipiv, double* b, const lapack_int* ldb, double* work, lapack_int* lwork, lapack_int* info );
+
+extern "C" void chesv_( const char* uplo, const lapack_int* n, const lapack_int* nrhs, lapack_complex_float* a,
+        const lapack_int* lda, lapack_int* ipiv, lapack_complex_float* b, const lapack_int* ldb, lapack_complex_float* work, lapack_int* lwork, lapack_int* info );
+
+extern "C" void zhesv_( const char* uplo, const lapack_int* n, const lapack_int* nrhs, lapack_complex_double* a,
+        const lapack_int* lda, lapack_int* ipiv, lapack_complex_double* b, const lapack_int* ldb, lapack_complex_double* work, lapack_int* lwork,  lapack_int* info );
+
+extern "C" void sgetrf_( const lapack_int* m, const lapack_int* n, float* a, const lapack_int* lda,
+        lapack_int* ipiv, lapack_int* info );
+
+extern "C" void dgetrf_( const lapack_int* m, const lapack_int* n, double* a,
+        const lapack_int* lda, lapack_int* ipiv, lapack_int* info );
+
+extern "C" void cgetrf_( const lapack_int* m, const lapack_int* n, lapack_complex_float* a,
+        const lapack_int* lda, lapack_int* ipiv, lapack_int* info );
+
+extern "C" void zgetrf_( const lapack_int* m, const lapack_int* n, lapack_complex_double* a,
+        const lapack_int* lda, lapack_int* ipiv, lapack_int* info );
+
+extern "C" void sgetri_( const lapack_int* n, float* a, const lapack_int* lda,
+        const lapack_int* ipiv, float* work, const lapack_int* lwork,
+        lapack_int* info );
+
+extern "C" void dgetri_( const lapack_int* n, double* a, const lapack_int* lda,
+        const lapack_int* ipiv, double* work, const lapack_int* lwork,
+        lapack_int* info );
+
+extern "C" void cgetri_( const lapack_int* n, lapack_complex_float* a, const lapack_int* lda,
+        const lapack_int* ipiv, lapack_complex_float* work, const lapack_int* lwork,
+        lapack_int* info );
+
+extern "C" void zgetri_( const lapack_int* n, lapack_complex_double* a, const lapack_int* lda,
+        const lapack_int* ipiv, lapack_complex_double* work, const lapack_int* lwork,
+        lapack_int* info );
+
+namespace Gadgetron
+{
+
+// following matrix computation calls MKL functions
+#if defined(USE_MKL) || defined(USE_LAPACK)
+
+void gemm(hoNDArray< std::complex<float> >& C, const hoNDArray< std::complex<float> >& A, const hoNDArray< std::complex<float> >& B)
+{
+    typedef std::complex<float> T;
+    try
+    {
+        char TA, TB;
+
+        GADGET_CHECK_THROW( (&C!=&A) && (&C!=&B) && (&A!=&B) );
+
+        lapack_int lda = (lapack_int)A.get_size(0);
+        lapack_int ldb = (lapack_int)B.get_size(0);
+        const T* pA = A.begin(); 
+        const T* pB = B.begin(); 
+
+        lapack_int M = (lapack_int)A.get_size(0);
+        lapack_int K = (lapack_int)A.get_size(1);
+
+        lapack_int K2 = (lapack_int)B.get_size(0);
+        lapack_int N = (lapack_int)B.get_size(1);
+
+        GADGET_CHECK_THROW(K==K2);
+        if ( (C.get_size(0)!=M) || (C.get_size(1)!=N) )
+        {
+            C.create(M, N);
+        }
+
+        T* pC = C.begin();
+        lapack_int ldc = (lapack_int)C.get_size(0);
+
+         std::complex<float>  alpha(1), beta(0);
+
+        TA = 'N';
+        TB = 'N';
+
+        cgemm_(&TA, &TB, &M, &N, &K, reinterpret_cast<lapack_complex_float*>(&alpha), reinterpret_cast<const lapack_complex_float*>(pA), &lda, reinterpret_cast<const lapack_complex_float*>(pB), &ldb, reinterpret_cast<lapack_complex_float*>(&beta), reinterpret_cast<lapack_complex_float*>(pC), &ldc);
+    }
+    catch(...)
+    {
+        GADGET_THROW("Errors in gemm(hoNDArray< std::complex<float> >& C, const hoNDArray< std::complex<float> >& A, const hoNDArray< std::complex<float> >& B) ...");
+    }
+}
+
+template<> EXPORTCPUCOREMATH 
+void gemm(hoNDArray<float>& C, const hoNDArray<float>& A, bool transA, const hoNDArray<float>& B, bool transB)
+{
+    try
+    {
+        typedef float T;
+
+        GADGET_CHECK_THROW( (&C!=&A) && (&C!=&B) && (&A!=&B) );
+
+        char TA, TB;
+
+        lapack_int lda = (lapack_int)A.get_size(0);
+        lapack_int ldb = (lapack_int)B.get_size(0);
+        const T* pA = A.begin(); 
+        const T* pB = B.begin(); 
+
+        lapack_int M = (lapack_int)A.get_size(0);
+        lapack_int K = (lapack_int)A.get_size(1);
+        if ( transA )
+        { 
+            M = (lapack_int)A.get_size(1);
+            K = (lapack_int)A.get_size(0);
+        }
+
+        lapack_int K2 = (lapack_int)B.get_size(0);
+        lapack_int N = (lapack_int)B.get_size(1);
+        if ( transB )
+        {
+            K2 = (lapack_int)B.get_size(1);
+            N = (lapack_int)B.get_size(0);
+        }
+
+        GADGET_CHECK_THROW(K==K2);
+        if ( (C.get_size(0)!=M) || (C.get_size(1)!=N) )
+        {
+            C.create(M, N);
+        }
+
+        T* pC = C.begin();
+        lapack_int ldc = (lapack_int)C.get_size(0);
+
+        float alpha(1), beta(0);
+
+        if ( transA )
+        {
+            TA = 'T';
+        }
+        else
+        {
+            TA = 'N';
+        }
+
+        if ( transB )
+        {
+            TB = 'T';
+        }
+        else
+        {
+            TB = 'N';
+        }
+
+        sgemm_(&TA, &TB, &M, &N, &K, &alpha, reinterpret_cast<const float*>(pA), &lda, reinterpret_cast<const float*>(pB), &ldb, &beta, reinterpret_cast<float*>(pC), &ldc);
+    }
+    catch(...)
+    {
+        GADGET_THROW("Errors in gemm(hoNDArray<float>& C, const hoNDArray<float>& A, bool transA, const hoNDArray<float>& B, bool transB) ...");
+    }
+}
+
+template<> EXPORTCPUCOREMATH 
+void gemm(hoNDArray<double>& C, const hoNDArray<double>& A, bool transA, const hoNDArray<double>& B, bool transB)
+{
+    try
+    {
+        typedef double T;
+
+        GADGET_CHECK_THROW( (&C!=&A) && (&C!=&B) && (&A!=&B) );
+
+        char TA, TB;
+
+        lapack_int lda = (lapack_int)A.get_size(0);
+        lapack_int ldb = (lapack_int)B.get_size(0);
+        const T* pA = A.begin(); 
+        const T* pB = B.begin(); 
+
+        lapack_int M = (lapack_int)A.get_size(0);
+        lapack_int K = (lapack_int)A.get_size(1);
+        if ( transA )
+        { 
+            M = (lapack_int)A.get_size(1);
+            K = (lapack_int)A.get_size(0);
+        }
+
+        lapack_int K2 = (lapack_int)B.get_size(0);
+        lapack_int N = (lapack_int)B.get_size(1);
+        if ( transB )
+        {
+            K2 = (lapack_int)B.get_size(1);
+            N = (lapack_int)B.get_size(0);
+        }
+
+        GADGET_CHECK_THROW(K==K2);
+        if ( (C.get_size(0)!=M) || (C.get_size(1)!=N) )
+        {
+            C.create(M, N);
+        }
+
+        T* pC = C.begin();
+        lapack_int ldc = (lapack_int)C.get_size(0);
+
+        double alpha(1), beta(0);
+
+        if ( transA )
+        {
+            TA = 'T';
+        }
+        else
+        {
+            TA = 'N';
+        }
+
+        if ( transB )
+        {
+            TB = 'T';
+        }
+        else
+        {
+            TB = 'N';
+        }
+
+        dgemm_(&TA, &TB, &M, &N, &K, &alpha, reinterpret_cast<const double*>(pA), &lda, reinterpret_cast<const double*>(pB), &ldb, &beta, reinterpret_cast<double*>(pC), &ldc);
+    }
+    catch(...)
+    {
+        GADGET_THROW("Errors in gemm(hoNDArray<double>& C, const hoNDArray<double>& A, bool transA, const hoNDArray<double>& B, bool transB) ...");
+    }
+}
+
+template<> EXPORTCPUCOREMATH 
+void gemm(hoNDArray< std::complex<float> >& C, const hoNDArray< std::complex<float> >& A, bool transA, const hoNDArray< std::complex<float> >& B, bool transB)
+{
+    try
+    {
+        typedef  std::complex<float>  T;
+
+        GADGET_CHECK_THROW( (&C!=&A) && (&C!=&B) && (&A!=&B) );
+
+        char TA, TB;
+
+        lapack_int lda = (lapack_int)A.get_size(0);
+        lapack_int ldb = (lapack_int)B.get_size(0);
+        const T* pA = A.begin(); 
+        const T* pB = B.begin(); 
+
+        lapack_int M = (lapack_int)A.get_size(0);
+        lapack_int K = (lapack_int)A.get_size(1);
+        if ( transA )
+        { 
+            M = (lapack_int)A.get_size(1);
+            K = (lapack_int)A.get_size(0);
+        }
+
+        lapack_int K2 = (lapack_int)B.get_size(0);
+        lapack_int N = (lapack_int)B.get_size(1);
+        if ( transB )
+        {
+            K2 = (lapack_int)B.get_size(1);
+            N = (lapack_int)B.get_size(0);
+        }
+
+        GADGET_CHECK_THROW(K==K2);
+        if ( (C.get_size(0)!=M) || (C.get_size(1)!=N) )
+        {
+            C.create(M, N);
+        }
+
+        T* pC = C.begin();
+        lapack_int ldc = (lapack_int)C.get_size(0);
+
+         std::complex<float>  alpha(1), beta(0);
+
+        if ( transA )
+        {
+            TA = 'C';
+        }
+        else
+        {
+            TA = 'N';
+        }
+
+        if ( transB )
+        {
+            TB = 'C';
+        }
+        else
+        {
+            TB = 'N';
+        }
+
+        cgemm_(&TA, &TB, &M, &N, &K, reinterpret_cast<lapack_complex_float*>(&alpha), reinterpret_cast<const lapack_complex_float*>(pA), &lda, reinterpret_cast<const lapack_complex_float*>(pB), &ldb, reinterpret_cast<lapack_complex_float*>(&beta), reinterpret_cast<lapack_complex_float*>(pC), &ldc);
+    }
+    catch(...)
+    {
+        GADGET_THROW("Errors in gemm(hoNDArray< std::complex<float> >& C, const hoNDArray< std::complex<float> >& A, bool transA, const hoNDArray< std::complex<float> >& B, bool transB) ...");
+    }
+}
+
+template<> EXPORTCPUCOREMATH 
+void gemm(hoNDArray< complext<float> >& C, const hoNDArray< complext<float> >& A, bool transA, const hoNDArray< complext<float> >& B, bool transB)
+{
+    try
+    {
+        typedef hoNDArray< std::complex<float> > ArrayType;
+        gemm( reinterpret_cast<ArrayType&>(C), reinterpret_cast<const ArrayType&>(A), transA, reinterpret_cast<const ArrayType&>(B), transB );
+    }
+    catch(...)
+    {
+        GADGET_THROW("Errors in gemm(hoNDArray< complext<float> >& C, const hoNDArray< complext<float> >& A, bool transA, const hoNDArray< complext<float> >& B, bool transB) ...");
+    }
+}
+
+template<> EXPORTCPUCOREMATH 
+void gemm(hoNDArray< std::complex<double> >& C, const hoNDArray< std::complex<double> >& A, bool transA, const hoNDArray< std::complex<double> >& B, bool transB)
+{
+    try
+    {
+        typedef  std::complex<double>  T;
+
+        GADGET_CHECK_THROW( (&C!=&A) && (&C!=&B) && (&A!=&B) );
+
+        char TA, TB;
+
+        lapack_int lda = (lapack_int)A.get_size(0);
+        lapack_int ldb = (lapack_int)B.get_size(0);
+        const T* pA = A.begin(); 
+        const T* pB = B.begin(); 
+
+        lapack_int M = (lapack_int)A.get_size(0);
+        lapack_int K = (lapack_int)A.get_size(1);
+        if ( transA )
+        { 
+            M = (lapack_int)A.get_size(1);
+            K = (lapack_int)A.get_size(0);
+        }
+
+        lapack_int K2 = (lapack_int)B.get_size(0);
+        lapack_int N = (lapack_int)B.get_size(1);
+        if ( transB )
+        {
+            K2 = (lapack_int)B.get_size(1);
+            N = (lapack_int)B.get_size(0);
+        }
+
+        GADGET_CHECK_THROW(K==K2);
+        if ( (C.get_size(0)!=M) || (C.get_size(1)!=N) )
+        {
+            C.create(M, N);
+        }
+
+        T* pC = C.begin();
+        lapack_int ldc = (lapack_int)C.get_size(0);
+
+         std::complex<double>  alpha(1), beta(0);
+
+        if ( transA )
+        {
+            TA = 'C';
+        }
+        else
+        {
+            TA = 'N';
+        }
+
+        if ( transB )
+        {
+            TB = 'C';
+        }
+        else
+        {
+            TB = 'N';
+        }
+
+        zgemm_(&TA, &TB, &M, &N, &K, reinterpret_cast<lapack_complex_double*>(&alpha), reinterpret_cast<const lapack_complex_double*>(pA), &lda, reinterpret_cast<const lapack_complex_double*>(pB), &ldb, reinterpret_cast<lapack_complex_double*>(&beta), reinterpret_cast<lapack_complex_double*>(pC), &ldc);
+    }
+    catch(...)
+    {
+        GADGET_THROW("Errors in gemm(hoNDArray< std::complex<float> >& C, const hoNDArray< std::complex<float> >& A, bool transA, const hoNDArray< std::complex<float> >& B, bool transB) ...");
+    }
+}
+
+template<> EXPORTCPUCOREMATH 
+void gemm(hoNDArray< complext<double> >& C, const hoNDArray< complext<double> >& A, bool transA, const hoNDArray< complext<double> >& B, bool transB)
+{
+    try
+    {
+        typedef hoNDArray< std::complex<double> > ArrayType;
+        gemm( reinterpret_cast<ArrayType&>(C), reinterpret_cast<const ArrayType&>(A), transA, reinterpret_cast<const ArrayType&>(B), transB );
+    }
+    catch(...)
+    {
+        GADGET_THROW("Errors in gemm(hoNDArray< complext<double> >& C, const hoNDArray< complext<double> >& A, bool transA, const hoNDArray< complext<double> >& B, bool transB) ...");
+    }
+}
+
+/// ------------------------------------------------------------------------------------
+
+template<> EXPORTCPUCOREMATH 
+void syrk(hoNDArray<float>& C, const hoNDArray<float>& A, char uplo, bool isATA)
+{
+    try
+    {
+        typedef float T;
+
+        GADGET_CHECK_THROW( (&A!=&C) );
+
+        char TA;
+
+        lapack_int lda = (lapack_int)A.get_size(0);
+        const T* pA = A.begin(); 
+
+        lapack_int M = (lapack_int)A.get_size(0);
+        lapack_int K = (lapack_int)A.get_size(1);
+        if ( isATA )
+        { 
+            M = (lapack_int)A.get_size(1);
+            K = (lapack_int)A.get_size(0);
+        }
+
+        if ( (C.get_size(0)!=M) || (C.get_size(1)!=M) )
+        {
+            C.create(M, M);
+        }
+
+        T* pC = C.begin();
+        lapack_int ldc = (lapack_int)C.get_size(0);
+
+        float alpha(1), beta(0);
+
+        if ( isATA )
+        {
+            TA = 'T';
+        }
+        else
+        {
+            TA = 'N';
+        }
+
+        ssyrk_(&uplo, &TA, &M, &K, &alpha, pA, &lda, &beta, pC, &ldc);
+    }
+    catch(...)
+    {
+        GADGET_THROW("Errors in ssyrk(hoNDArray<float>& C, const hoNDArray<float>& A, char uplo, bool isATA) ...");
+    }
+}
+
+template<> EXPORTCPUCOREMATH 
+void syrk(hoNDArray<double>& C, const hoNDArray<double>& A, char uplo, bool isATA)
+{
+    try
+    {
+        typedef double T;
+
+        GADGET_CHECK_THROW( (&A!=&C) );
+
+        char TA;
+
+        lapack_int lda = (lapack_int)A.get_size(0);
+        const T* pA = A.begin(); 
+
+        lapack_int M = (lapack_int)A.get_size(0);
+        lapack_int K = (lapack_int)A.get_size(1);
+        if ( isATA )
+        { 
+            M = (lapack_int)A.get_size(1);
+            K = (lapack_int)A.get_size(0);
+        }
+
+        if ( (C.get_size(0)!=M) || (C.get_size(1)!=M) )
+        {
+            C.create(M, M);
+        }
+
+        T* pC = C.begin();
+        lapack_int ldc = (lapack_int)C.get_size(0);
+
+        double alpha(1), beta(0);
+
+        if ( isATA )
+        {
+            TA = 'T';
+        }
+        else
+        {
+            TA = 'N';
+        }
+
+        dsyrk_(&uplo, &TA, &M, &K, &alpha, pA, &lda, &beta, pC, &ldc);
+    }
+    catch(...)
+    {
+        GADGET_THROW("Errors in syrk(hoNDArray<double>& C, const hoNDArray<double>& A, char uplo, bool isATA) ...");
+    }
+}
+
+template<> EXPORTCPUCOREMATH 
+void syrk(hoNDArray< std::complex<float> >& C, const hoNDArray< std::complex<float> >& A, char uplo, bool isATA)
+{
+    try
+    {
+        typedef  std::complex<float>  T;
+
+        GADGET_CHECK_THROW( (&A!=&C) );
+
+        char TA;
+
+        lapack_int lda = (lapack_int)A.get_size(0);
+        const T* pA = A.begin(); 
+
+        lapack_int N = (lapack_int)A.get_size(0);
+        lapack_int K = (lapack_int)A.get_size(1);
+        if ( isATA )
+        { 
+            N = (lapack_int)A.get_size(1);
+            K = (lapack_int)A.get_size(0);
+        }
+
+        GADGET_CHECK_THROW ( (C.get_size(0)==N) && (C.get_size(1)==N) );
+
+        T* pC = C.begin();
+        lapack_int ldc = (lapack_int)C.get_size(0);
+
+        lapack_complex_float alpha(1), beta(0);
+
+        if ( isATA )
+        {
+            TA = 'T';
+        }
+        else
+        {
+            TA = 'N';
+        }
+
+        csyrk_(&uplo, &TA, &N, &K, &alpha, pA, &lda, &beta, pC, &ldc);
+    }
+    catch(...)
+    {
+        GADGET_THROW("Errors in syrk(hoNDArray< std::complex<float> >& C, const hoNDArray< std::complex<float> >& A, char uplo, bool isATA) ...");
+    }
+}
+
+template<> EXPORTCPUCOREMATH 
+void syrk(hoNDArray< complext<float> >& C, const hoNDArray< complext<float> >& A, char uplo, bool isATA)
+{
+    try
+    {
+        typedef  hoNDArray< std::complex<float> > ArrayType;
+        syrk( reinterpret_cast<ArrayType&>(C), reinterpret_cast<const ArrayType&>(A), uplo, isATA);
+    }
+    catch(...)
+    {
+        GADGET_THROW("Errors in syrk(hoNDArray< complext<float> >& C, const hoNDArray< complext<float> >& A, char uplo, bool isATA) ...");
+    }
+}
+
+template<> EXPORTCPUCOREMATH 
+void syrk(hoNDArray< std::complex<double> >& C, const hoNDArray< std::complex<double> >& A, char uplo, bool isATA)
+{
+    try
+    {
+        typedef  std::complex<double>  T;
+
+        GADGET_CHECK_THROW( (&A!=&C) );
+
+        char TA;
+
+        lapack_int lda = (lapack_int)A.get_size(0);
+        const T* pA = A.begin(); 
+
+        lapack_int M = (lapack_int)A.get_size(0);
+        lapack_int K = (lapack_int)A.get_size(1);
+        if ( isATA )
+        { 
+            M = (lapack_int)A.get_size(1);
+            K = (lapack_int)A.get_size(0);
+        }
+
+        if ( (C.get_size(0)!=M) || (C.get_size(1)!=M) )
+        {
+            C.create(M, M);
+        }
+
+        T* pC = C.begin();
+        lapack_int ldc = (lapack_int)C.get_size(0);
+
+        lapack_complex_double alpha(1), beta(0);
+
+        if ( isATA )
+        {
+            TA = 'T';
+        }
+        else
+        {
+            TA = 'N';
+        }
+
+        zsyrk_(&uplo, &TA, &M, &K, &alpha, pA, &lda, &beta, pC, &ldc);
+    }
+    catch(...)
+    {
+        GADGET_THROW("Errors in syrk(hoNDArray< std::complex<double> >& C, const hoNDArray< std::complex<double> >& A, char uplo, bool isATA) ...");
+    }
+}
+
+template<> EXPORTCPUCOREMATH 
+void syrk(hoNDArray< complext<double> >& C, const hoNDArray< complext<double> >& A, char uplo, bool isATA)
+{
+    try
+    {
+        typedef  hoNDArray< std::complex<double> > ArrayType;
+        syrk( reinterpret_cast<ArrayType&>(C), reinterpret_cast<const ArrayType&>(A), uplo, isATA);
+    }
+    catch(...)
+    {
+        GADGET_THROW("Errors in syrk(hoNDArray< complext<double> >& C, const hoNDArray< complext<double> >& A, char uplo, bool isATA) ...");
+    }
+}
+
+/// ------------------------------------------------------------------------------------
+
+template<> EXPORTCPUCOREMATH 
+void herk(hoNDArray<float>& C, const hoNDArray<float>& A, char uplo, bool isAHA)
+{
+    syrk(C, A, uplo, isAHA);
+}
+
+template<> EXPORTCPUCOREMATH 
+void herk(hoNDArray<double>& C, const hoNDArray<double>& A, char uplo, bool isAHA)
+{
+    syrk(C, A, uplo, isAHA);
+}
+
+template<> EXPORTCPUCOREMATH 
+void herk(hoNDArray< std::complex<float> >& C, const hoNDArray< std::complex<float> >& A, char uplo, bool isAHA)
+{
+    try
+    {
+        typedef  std::complex<float>  T;
+
+        GADGET_CHECK_THROW( (&A!=&C) );
+
+        char TA;
+
+        lapack_int lda = (lapack_int)A.get_size(0);
+        const T* pA = A.begin(); 
+
+        lapack_int N = (lapack_int)A.get_size(0);
+        lapack_int K = (lapack_int)A.get_size(1);
+        if ( isAHA )
+        { 
+            N = (lapack_int)A.get_size(1);
+            K = (lapack_int)A.get_size(0);
+        }
+
+        if ( (C.get_size(0)!=N) || (C.get_size(1)!=N) )
+        {
+            C.create(N, N);
+        }
+
+        T* pC = C.begin();
+        lapack_int ldc = (lapack_int)C.get_size(0);
+
+        lapack_complex_float alpha(1), beta(0);
+
+        if ( isAHA )
+        {
+            TA = 'C';
+        }
+        else
+        {
+            TA = 'N';
+        }
+
+        cherk_(&uplo, &TA, &N, &K, &alpha, pA, &lda, &beta, pC, &ldc);
+    }
+    catch(...)
+    {
+        GADGET_THROW("Errors in herk(hoNDArray< std::complex<float> >& C, const hoNDArray< std::complex<float> >& A, char uplo, bool isAHA) ...");
+    }
+}
+
+template<> EXPORTCPUCOREMATH 
+void herk(hoNDArray< complext<float> >& C, const hoNDArray< complext<float> >& A, char uplo, bool isATA)
+{
+    try
+    {
+        typedef  hoNDArray< std::complex<float> > ArrayType;
+        herk( reinterpret_cast<ArrayType&>(C), reinterpret_cast<const ArrayType&>(A), uplo, isATA);
+    }
+    catch(...)
+    {
+        GADGET_THROW("Errors in herk(hoNDArray< complext<float> >& C, const hoNDArray< complext<float> >& A, char uplo, bool isATA) ...");
+    }
+}
+
+template<> EXPORTCPUCOREMATH 
+void herk(hoNDArray< std::complex<double> >& C, const hoNDArray< std::complex<double> >& A, char uplo, bool isAHA)
+{
+    try
+    {
+        typedef  std::complex<double>  T;
+
+        GADGET_CHECK_THROW( (&A!=&C) );
+
+        char TA;
+
+        lapack_int lda = (lapack_int)A.get_size(0);
+        const T* pA = A.begin(); 
+
+        lapack_int N = (lapack_int)A.get_size(0);
+        lapack_int K = (lapack_int)A.get_size(1);
+        if ( isAHA )
+        { 
+            N = (lapack_int)A.get_size(1);
+            K = (lapack_int)A.get_size(0);
+        }
+
+        if ( (C.get_size(0)!=N) || (C.get_size(1)!=N) )
+        {
+            C.create(N, N);
+        }
+
+        T* pC = C.begin();
+        lapack_int ldc = (lapack_int)C.get_size(0);
+
+        lapack_complex_double alpha(1), beta(0);
+
+        if ( isAHA )
+        {
+            TA = 'C';
+        }
+        else
+        {
+            TA = 'N';
+        }
+
+        zherk_(&uplo, &TA, &N, &K, &alpha, pA, &lda, &beta, pC, &ldc);
+    }
+    catch(...)
+    {
+        GADGET_THROW("Errors in herk(hoNDArray< std::complex<double> >& C, const hoNDArray< std::complex<double> >& A, char uplo, bool isAHA) ...");
+    }
+}
+
+template<> EXPORTCPUCOREMATH 
+void herk(hoNDArray< complext<double> >& C, const hoNDArray< complext<double> >& A, char uplo, bool isATA)
+{
+    try
+    {
+        typedef  hoNDArray< std::complex<double> > ArrayType;
+        herk( reinterpret_cast<ArrayType&>(C), reinterpret_cast<const ArrayType&>(A), uplo, isATA);
+    }
+    catch(...)
+    {
+        GADGET_THROW("Errors in herk(hoNDArray< complext<double> >& C, const hoNDArray< complext<double> >& A, char uplo, bool isATA) ...");
+    }
+}
+
+/// ------------------------------------------------------------------------------------
+
+template<typename T> 
+void potrf(hoNDArray<T>& A, char uplo)
+{
+    try
+    {
+        if( A.get_number_of_elements()==0 ) return;
+        GADGET_CHECK_THROW(A.get_size(0)==A.get_size(1));
+
+        lapack_int info;
+        lapack_int n = (lapack_int)(A.get_size(0));
+        T* pA = A.begin();
+        lapack_int lda = (lapack_int)(A.get_size(0));
+
+        if ( typeid(T)==typeid(float) )
+        {
+            spotrf_(&uplo, &n, reinterpret_cast<float*>(pA), &lda, &info);
+        }
+        else if ( typeid(T)==typeid(double) )
+        {
+            dpotrf_(&uplo, &n, reinterpret_cast<double*>(pA), &lda, &info);
+        }
+        else if ( (typeid(T)==typeid( std::complex<float> )) || (typeid(T)==typeid( complext<float> )) )
+        {
+            cpotrf_(&uplo, &n, reinterpret_cast<lapack_complex_float*>(pA), &lda, &info);
+        }
+        else if ( (typeid(T)==typeid( std::complex<float> )) || (typeid(T)==typeid( complext<double> )) )
+        {
+            zpotrf_(&uplo, &n, reinterpret_cast<lapack_complex_double*>(pA), &lda, &info);
+        }
+        else
+        {
+            GADGET_THROW("potrf : unsupported type ... ");
+        }
+
+        GADGET_CHECK_THROW(info==0);
+
+        if ( uplo == 'U' )
+        {
+            // GADGET_CHECK_THROW(A.lowerTri(0));
+
+            size_t r, c;
+            for (c=0; c<n; c++)
+            {
+                for (r=c+1; r<n; r++)
+                {
+                    pA[r + c*n] = 0;
+                }
+            }
+        }
+        else
+        {
+            // GADGET_CHECK_THROW(A.upperTri(0));
+
+            size_t r, c;
+            for (r=0; r<n; r++)
+            {
+                for (c=r+1; c<n; c++)
+                {
+                    pA[r + c*n] = 0;
+                }
+            }
+        }
+    }
+    catch(...)
+    {
+        GADGET_THROW("Errors in potrf(hoNDArray<T>& A, char uplo) ...");
+    }
+}
+
+template EXPORTCPUCOREMATH void potrf(hoNDArray<float>& A, char uplo);
+template EXPORTCPUCOREMATH void potrf(hoNDArray<double>& A, char uplo);
+template EXPORTCPUCOREMATH void potrf(hoNDArray< std::complex<float> >& A, char uplo);
+template EXPORTCPUCOREMATH void potrf(hoNDArray< complext<float> >& A, char uplo);
+template EXPORTCPUCOREMATH void potrf(hoNDArray< std::complex<double> >& A, char uplo);
+template EXPORTCPUCOREMATH void potrf(hoNDArray< complext<double> >& A, char uplo);
+
+/// ------------------------------------------------------------------------------------
+
+template<typename T> 
+void heev(hoNDArray<T>& A, hoNDArray<typename realType<T>::Type>& eigenValue)
+{
+    try
+    {
+        lapack_int M = (lapack_int)A.get_size(0);
+        GADGET_CHECK_THROW(A.get_size(1) == M);
+
+        if ( (eigenValue.get_size(0)!=M) || (eigenValue.get_size(1)!=1) )
+        {
+            eigenValue.create(M, 1);
+        }
+
+        lapack_int info;
+        char jobz = 'V';
+        char uplo = 'L';
+        T* pA = A.begin();
+        typename realType<T>::Type* pEV = eigenValue.begin();
+
+        //if ( typeid(T)==typeid(float) )
+        //{
+        //    info = LAPACKE_ssyev(LAPACK_COL_MAJOR, jobz, uplo, M, reinterpret_cast<float*>(pA), M, reinterpret_cast<float*>(pEV));
+        //}
+        //else if ( typeid(T)==typeid(double) )
+        //{
+        //    info = LAPACKE_dsyev(LAPACK_COL_MAJOR, jobz, uplo, M, reinterpret_cast<double*>(pA), M, reinterpret_cast<double*>(pEV));
+        //}
+        //else if ( (typeid(T)==typeid( std::complex<float> )) || (typeid(T)==typeid( complext<float> )) )
+        //{
+        //    info = LAPACKE_cheev(LAPACK_COL_MAJOR, jobz, uplo, M, reinterpret_cast<lapack_complex_float*>(pA), M, reinterpret_cast<float*>(pEV));
+        //}
+        //else if ( (typeid(T)==typeid( std::complex<float> )) || (typeid(T)==typeid( complext<double> )) )
+        //{
+        //    info = LAPACKE_zheev(LAPACK_COL_MAJOR, jobz, uplo, M, reinterpret_cast<lapack_complex_double*>(pA), M, reinterpret_cast<double*>(pEV));
+        //}
+        //else
+        //{
+        //    GADGET_THROW("heev : unsupported type " << typeid(T).name());
+        //}
+
+        lapack_int lwork;
+        lwork = M*M;
+
+        if ( typeid(T)==typeid(float) )
+        {
+            hoNDArray<float> work(M, M);
+            ssyev_(&jobz, &uplo, &M, reinterpret_cast<float*>(pA), &M, reinterpret_cast<float*>(pEV), work.begin(), &lwork, &info);
+        }
+        else if ( typeid(T)==typeid(double) )
+        {
+            hoNDArray<double> work(M, M);
+            dsyev_(&jobz, &uplo, &M, reinterpret_cast<double*>(pA), &M, reinterpret_cast<double*>(pEV), work.begin(), &lwork, &info);
+        }
+        else if ( (typeid(T)==typeid( std::complex<float> )) || (typeid(T)==typeid( complext<float> )) )
+        {
+            hoNDArray< std::complex<float> > work(M, M);
+            hoNDArray<float> rwork(3*M);
+            cheev_(&jobz, &uplo, &M, reinterpret_cast<lapack_complex_float*>(pA), &M, reinterpret_cast<float*>(pEV), reinterpret_cast<lapack_complex_float*>(work.begin()), &lwork, rwork.begin(), &info);
+        }
+        else if ( (typeid(T)==typeid( std::complex<float> )) || (typeid(T)==typeid( complext<double> )) )
+        {
+            hoNDArray< std::complex<double> > work(M, M);
+            hoNDArray<double> rwork(3*M);
+            zheev_(&jobz, &uplo, &M, reinterpret_cast<lapack_complex_double*>(pA), &M, reinterpret_cast<double*>(pEV), reinterpret_cast<lapack_complex_double*>(work.begin()), &lwork, rwork.begin(), &info);
+        }
+        else
+        {
+            GADGET_THROW("heev : unsupported type ... ");
+        }
+
+        GADGET_CHECK_THROW(info==0);
+    }
+    catch (...)
+    {
+        GADGET_THROW("Errors in heev(hoNDArray<T>& A, hoNDArray<typename realType<T>::Type>& eigenValue) ... ");
+    }
+}
+
+template EXPORTCPUCOREMATH void heev(hoNDArray<float>& A, hoNDArray<float>& eigenValue);
+template EXPORTCPUCOREMATH void heev(hoNDArray<double>& A, hoNDArray<double>& eigenValue);
+template EXPORTCPUCOREMATH void heev(hoNDArray< std::complex<float> >& A, hoNDArray<float>& eigenValue);
+template EXPORTCPUCOREMATH void heev(hoNDArray< complext<float> >& A, hoNDArray<float>& eigenValue);
+template EXPORTCPUCOREMATH void heev(hoNDArray< std::complex<double> >& A, hoNDArray<double>& eigenValue);
+template EXPORTCPUCOREMATH void heev(hoNDArray< complext<double> >& A, hoNDArray<double>& eigenValue);
+
+template<typename T> 
+void heev(hoNDArray< std::complex<T> >& A, hoNDArray< std::complex<T> >& eigenValue)
+{
+    try
+    {
+        long long M = (long long)A.get_size(0);
+        GADGET_CHECK_THROW(A.get_size(1) == M);
+
+        if ( (eigenValue.get_size(0)!=M) || (eigenValue.get_size(1)!=1) )
+        {
+            eigenValue.create(M, 1);
+        }
+
+        hoNDArray<typename realType<T>::Type> D(M, 1);
+        heev(A, D);
+        eigenValue.copyFrom(D);
+    }
+    catch (...)
+    {
+        GADGET_THROW("Errors in heev(hoNDArray< std::complex<T> >& A, hoNDArray< std::complex<T> >& eigenValue) ... ");
+    }
+}
+
+template EXPORTCPUCOREMATH void heev(hoNDArray< std::complex<float> >& A, hoNDArray< std::complex<float> >& eigenValue);
+template EXPORTCPUCOREMATH void heev(hoNDArray< std::complex<double> >& A, hoNDArray< std::complex<double> >& eigenValue);
+
+/// ------------------------------------------------------------------------------------
+
+template<typename T> 
+void potri(hoNDArray<T>& A)
+{
+    try
+    {
+        if( A.get_number_of_elements()==0 ) return;
+        GADGET_CHECK_THROW(A.get_size(0)==A.get_size(1));
+
+        lapack_int info;
+        char uplo = 'L';
+        lapack_int n = (lapack_int)A.get_size(0);
+        T* pA = A.begin();
+        lapack_int lda = (lapack_int)A.get_size(0);
+
+        //if ( typeid(T)==typeid(float) )
+        //{
+        //    info = LAPACKE_spotrf(LAPACK_COL_MAJOR, uplo, n, reinterpret_cast<float*>(pA), lda);
+        //    GADGET_CHECK_THROW(info==0);
+
+        //    info = LAPACKE_spotri(LAPACK_COL_MAJOR, uplo, n, reinterpret_cast<float*>(pA), lda);
+        //    GADGET_CHECK_THROW(info==0);
+        //}
+        //else if ( typeid(T)==typeid(double) )
+        //{
+        //    info = LAPACKE_dpotrf(LAPACK_COL_MAJOR, uplo, n, reinterpret_cast<double*>(pA), lda);
+        //    GADGET_CHECK_THROW(info==0);
+
+        //    info = LAPACKE_dpotri(LAPACK_COL_MAJOR, uplo, n, reinterpret_cast<double*>(pA), lda);
+        //    GADGET_CHECK_THROW(info==0);
+        //}
+        //else if ( (typeid(T)==typeid( std::complex<float> )) || (typeid(T)==typeid( complext<float> )) )
+        //{
+        //    info = LAPACKE_cpotrf(LAPACK_COL_MAJOR, uplo, n, reinterpret_cast<lapack_complex_float*>(pA), lda);
+        //    GADGET_CHECK_THROW(info==0);
+
+        //    info = LAPACKE_cpotri(LAPACK_COL_MAJOR, uplo, n, reinterpret_cast<lapack_complex_float*>(pA), lda);
+        //    GADGET_CHECK_THROW(info==0);
+        //}
+        //else if ( (typeid(T)==typeid( std::complex<float> )) || (typeid(T)==typeid( complext<double> )) )
+        //{
+        //    info = LAPACKE_zpotrf(LAPACK_COL_MAJOR, uplo, n, reinterpret_cast<lapack_complex_double*>(pA), lda);
+        //    GADGET_CHECK_THROW(info==0);
+
+        //    info = LAPACKE_zpotri(LAPACK_COL_MAJOR, uplo, n, reinterpret_cast<lapack_complex_double*>(pA), lda);
+        //    GADGET_CHECK_THROW(info==0);
+        //}
+        //else
+        //{
+        //    GADGET_THROW("potri : unsupported type " << typeid(T).name());
+        //}
+
+        if ( typeid(T)==typeid(float) )
+        {
+            spotrf_(&uplo, &n, reinterpret_cast<float*>(pA), &lda, &info);
+            GADGET_CHECK_THROW(info==0);
+
+            spotri_(&uplo, &n, reinterpret_cast<float*>(pA), &lda, &info);
+            GADGET_CHECK_THROW(info==0);
+        }
+        else if ( typeid(T)==typeid(double) )
+        {
+            dpotrf_(&uplo, &n, reinterpret_cast<double*>(pA), &lda, &info);
+            GADGET_CHECK_THROW(info==0);
+
+            dpotri_(&uplo, &n, reinterpret_cast<double*>(pA), &lda, &info);
+            GADGET_CHECK_THROW(info==0);
+        }
+        else if ( (typeid(T)==typeid( std::complex<float> )) || (typeid(T)==typeid( complext<float> )) )
+        {
+            cpotrf_(&uplo, &n, reinterpret_cast<lapack_complex_float*>(pA), &lda, &info);
+            GADGET_CHECK_THROW(info==0);
+
+            cpotri_(&uplo, &n, reinterpret_cast<lapack_complex_float*>(pA), &lda, &info);
+            GADGET_CHECK_THROW(info==0);
+        }
+        else if ( (typeid(T)==typeid( std::complex<float> )) || (typeid(T)==typeid( complext<double> )) )
+        {
+            zpotrf_(&uplo, &n, reinterpret_cast<lapack_complex_double*>(pA), &lda, &info);
+            GADGET_CHECK_THROW(info==0);
+
+            zpotri_(&uplo, &n, reinterpret_cast<lapack_complex_double*>(pA), &lda, &info);
+            GADGET_CHECK_THROW(info==0);
+        }
+        else
+        {
+            GADGET_THROW("potri : unsupported type ... ");
+        }
+    }
+    catch(...)
+    {
+        GADGET_THROW("Errors in potri(hoNDArray<T>& A) ...");
+    }
+}
+
+template EXPORTCPUCOREMATH void potri(hoNDArray<float>& A);
+template EXPORTCPUCOREMATH void potri(hoNDArray<double>& A);
+template EXPORTCPUCOREMATH void potri(hoNDArray< std::complex<float> >& A);
+template EXPORTCPUCOREMATH void potri(hoNDArray< complext<float> >& A);
+template EXPORTCPUCOREMATH void potri(hoNDArray< std::complex<double> >& A);
+template EXPORTCPUCOREMATH void potri(hoNDArray< complext<double> >& A);
+
+/// ------------------------------------------------------------------------------------
+
+template<typename T> 
+void trtri(hoNDArray<T>& A, char uplo)
+{
+    try
+    {
+        if( A.get_number_of_elements()==0 ) return;
+        GADGET_CHECK_THROW(A.get_size(0)==A.get_size(1));
+
+        lapack_int info;
+        char diag = 'N';
+        lapack_int n = (lapack_int)A.get_size(0);
+        T* pA = A.begin();
+        lapack_int lda = (lapack_int)A.get_size(0);
+
+        /*if ( typeid(T)==typeid(float) )
+        {
+            info = LAPACKE_strtri(LAPACK_COL_MAJOR, uplo, diag, n, reinterpret_cast<float*>(pA), lda);
+        }
+        else if ( typeid(T)==typeid(double) )
+        {
+            info = LAPACKE_dtrtri(LAPACK_COL_MAJOR, uplo, diag, n, reinterpret_cast<double*>(pA), lda);
+        }
+        else if ( (typeid(T)==typeid( std::complex<float> )) || (typeid(T)==typeid( complext<float> )) )
+        {
+            info = LAPACKE_ctrtri(LAPACK_COL_MAJOR, uplo, diag, n, reinterpret_cast<lapack_complex_float*>(pA), lda);
+        }
+        else if ( (typeid(T)==typeid( std::complex<float> )) || (typeid(T)==typeid( complext<double> )) )
+        {
+            info = LAPACKE_ztrtri(LAPACK_COL_MAJOR, uplo, diag, n, reinterpret_cast<lapack_complex_double*>(pA), lda);
+        }
+        else
+        {
+            GADGET_THROW("trtri : unsupported type " << typeid(T).name());
+        }*/
+
+        if ( typeid(T)==typeid(float) )
+        {
+            strtri_(&uplo, &diag, &n, reinterpret_cast<float*>(pA), &lda, &info);
+        }
+        else if ( typeid(T)==typeid(double) )
+        {
+            dtrtri_(&uplo, &diag, &n, reinterpret_cast<double*>(pA), &lda, &info);
+        }
+        else if ( (typeid(T)==typeid( std::complex<float> )) || (typeid(T)==typeid( complext<float> )) )
+        {
+            ctrtri_(&uplo, &diag, &n, reinterpret_cast<lapack_complex_float*>(pA), &lda, &info);
+        }
+        else if ( (typeid(T)==typeid( std::complex<float> )) || (typeid(T)==typeid( complext<double> )) )
+        {
+            ztrtri_(&uplo, &diag, &n, reinterpret_cast<lapack_complex_double*>(pA), &lda, &info);
+        }
+        else
+        {
+            GADGET_THROW("trtri : unsupported type ... ");
+        }
+
+        GADGET_CHECK_THROW(info==0);
+    }
+    catch(...)
+    {
+        GADGET_THROW("Errors in trtri(hoNDArray<float>& A, char uplo) ...");
+    }
+}
+
+template EXPORTCPUCOREMATH void trtri(hoNDArray<float>& A, char uplo);
+template EXPORTCPUCOREMATH void trtri(hoNDArray<double>& A, char uplo);
+template EXPORTCPUCOREMATH void trtri(hoNDArray< std::complex<float> >& A, char uplo);
+template EXPORTCPUCOREMATH void trtri(hoNDArray< complext<float> >& A, char uplo);
+template EXPORTCPUCOREMATH void trtri(hoNDArray< std::complex<double> >& A, char uplo);
+template EXPORTCPUCOREMATH void trtri(hoNDArray< complext<double> >& A, char uplo);
+
+/// ------------------------------------------------------------------------------------
+
+template<typename T>
+void posv(hoNDArray<T>& A, hoNDArray<T>& b)
+{
+    try
+    {
+        if( A.get_number_of_elements()==0 ) return;
+        if( b.get_number_of_elements()==0 ) return;
+        GADGET_CHECK_THROW(A.get_size(0)==b.get_size(0));
+
+        lapack_int info;
+        char uplo = 'L';
+        lapack_int n = (lapack_int)A.get_size(0);
+        lapack_int nrhs = (lapack_int)b.get_size(1);
+        T* pA = A.begin();
+        lapack_int lda = (lapack_int)A.get_size(0);
+        T* pB = b.begin();
+        lapack_int ldb = (lapack_int)b.get_size(0);
+
+        /*if ( typeid(T)==typeid(float) )
+        {
+            info = LAPACKE_sposv(LAPACK_COL_MAJOR, uplo, n, nrhs, reinterpret_cast<float*>(pA), lda, reinterpret_cast<float*>(pB), ldb);
+        }
+        else if ( typeid(T)==typeid(double) )
+        {
+            info = LAPACKE_dposv(LAPACK_COL_MAJOR, uplo, n, nrhs, reinterpret_cast<double*>(pA), lda, reinterpret_cast<double*>(pB), ldb);
+        }
+        else if ( (typeid(T)==typeid( std::complex<float> )) || (typeid(T)==typeid( complext<float> )) )
+        {
+            info = LAPACKE_cposv(LAPACK_COL_MAJOR, uplo, n, nrhs, reinterpret_cast<lapack_complex_float*>(pA), lda, reinterpret_cast<lapack_complex_float*>(pB), ldb);
+        }
+        else if ( (typeid(T)==typeid( std::complex<float> )) || (typeid(T)==typeid( complext<double> )) )
+        {
+            info = LAPACKE_zposv(LAPACK_COL_MAJOR, uplo, n, nrhs, reinterpret_cast<lapack_complex_double*>(pA), lda, reinterpret_cast<lapack_complex_double*>(pB), ldb);
+        }
+        else
+        {
+            GADGET_THROW("posv : unsupported type ... ");
+        }*/
+
+        if ( typeid(T)==typeid(float) )
+        {
+            sposv_(&uplo, &n, &nrhs, reinterpret_cast<float*>(pA), &lda, reinterpret_cast<float*>(pB), &ldb, &info);
+        }
+        else if ( typeid(T)==typeid(double) )
+        {
+            dposv_(&uplo, &n, &nrhs, reinterpret_cast<double*>(pA), &lda, reinterpret_cast<double*>(pB), &ldb, &info);
+        }
+        else if ( (typeid(T)==typeid( std::complex<float> )) || (typeid(T)==typeid( complext<float> )) )
+        {
+            cposv_(&uplo, &n, &nrhs, reinterpret_cast<lapack_complex_float*>(pA), &lda, reinterpret_cast<lapack_complex_float*>(pB), &ldb, &info);
+        }
+        else if ( (typeid(T)==typeid( std::complex<double> )) || (typeid(T)==typeid( complext<double> )) )
+        {
+            zposv_(&uplo, &n, &nrhs, reinterpret_cast<lapack_complex_double*>(pA), &lda, reinterpret_cast<lapack_complex_double*>(pB), &ldb, &info);
+        }
+        else
+        {
+            GADGET_THROW("posv : unsupported type ... ");
+        }
+
+        GADGET_CHECK_THROW(info==0);
+    }
+    catch(...)
+    {
+        GADGET_THROW("Errors in posv(hoNDArray<T>& A, hoNDArray<T>& b) ...");
+    }
+}
+
+template EXPORTCPUCOREMATH void posv(hoNDArray<float>& A, hoNDArray<float>& b);
+template EXPORTCPUCOREMATH void posv(hoNDArray<double>& A, hoNDArray<double>& b);
+template EXPORTCPUCOREMATH void posv(hoNDArray< std::complex<float> >& A, hoNDArray< std::complex<float> >& b);
+template EXPORTCPUCOREMATH void posv(hoNDArray< complext<float> >& A, hoNDArray< complext<float> >& b);
+template EXPORTCPUCOREMATH void posv(hoNDArray< std::complex<double> >& A, hoNDArray< std::complex<double> >& b);
+template EXPORTCPUCOREMATH void posv(hoNDArray< complext<double> >& A, hoNDArray< complext<double> >& b);
+
+/// ------------------------------------------------------------------------------------
+
+template<> EXPORTCPUCOREMATH
+void hesv(hoNDArray< float >& A, hoNDArray< float >& b)
+{
+    typedef float T;
+    try
+    {
+        if( A.get_number_of_elements()==0 ) return;
+        if( b.get_number_of_elements()==0 ) return;
+        GADGET_CHECK_THROW(A.get_size(0)==b.get_size(0));
+
+        lapack_int info(0);
+        char uplo = 'L';
+        lapack_int n = (lapack_int)A.get_size(0);
+        lapack_int nrhs = (lapack_int)b.get_size(1);
+        T* pA = A.begin();
+        lapack_int lda = (lapack_int)A.get_size(0);
+        T* pB = b.begin();
+        lapack_int ldb = (lapack_int)b.get_size(0);
+
+        hoNDArray<lapack_int> ipiv_array(n);
+        Gadgetron::clear(ipiv_array);
+        lapack_int* ipiv = ipiv_array.begin();
+
+        lapack_int lwork(n*n);
+        hoNDArray<T> work_array(lwork);
+        Gadgetron::clear(work_array);
+        T* work = work_array.begin();
+
+        ssysv_(&uplo, &n, &nrhs, reinterpret_cast<float*>(pA), &lda, ipiv, reinterpret_cast<float*>(pB), &ldb, reinterpret_cast<float*>(work), &lwork, &info);
+
+        GADGET_CHECK_THROW(info==0);
+    }
+    catch(...)
+    {
+        GADGET_THROW("Errors in hesv(hoNDArray< float >& A, hoNDArray< float >& b) ...");
+    }
+}
+
+template<> EXPORTCPUCOREMATH
+void hesv(hoNDArray< double >& A, hoNDArray< double >& b)
+{
+    typedef double T;
+    try
+    {
+        if( A.get_number_of_elements()==0 ) return;
+        if( b.get_number_of_elements()==0 ) return;
+        GADGET_CHECK_THROW(A.get_size(0)==b.get_size(0));
+
+        lapack_int info(0);
+        char uplo = 'L';
+        lapack_int n = (lapack_int)A.get_size(0);
+        lapack_int nrhs = (lapack_int)b.get_size(1);
+        T* pA = A.begin();
+        lapack_int lda = (lapack_int)A.get_size(0);
+        T* pB = b.begin();
+        lapack_int ldb = (lapack_int)b.get_size(0);
+
+        hoNDArray<lapack_int> ipiv_array(n);
+        Gadgetron::clear(ipiv_array);
+        lapack_int* ipiv = ipiv_array.begin();
+
+        lapack_int lwork(n*n);
+        hoNDArray<T> work_array(lwork);
+        Gadgetron::clear(work_array);
+        T* work = work_array.begin();
+
+        dsysv_(&uplo, &n, &nrhs, reinterpret_cast<double*>(pA), &lda, ipiv, reinterpret_cast<double*>(pB), &ldb, reinterpret_cast<double*>(work), &lwork, &info);
+
+        GADGET_CHECK_THROW(info==0);
+    }
+    catch(...)
+    {
+        GADGET_THROW("Errors in hesv(hoNDArray< double >& A, hoNDArray< double >& b) ...");
+    }
+}
+
+template<> EXPORTCPUCOREMATH
+void hesv(hoNDArray< std::complex<float> >& A, hoNDArray< std::complex<float> >& b)
+{
+    typedef std::complex<float> T;
+    try
+    {
+        if( A.get_number_of_elements()==0 ) return;
+        if( b.get_number_of_elements()==0 ) return;
+        GADGET_CHECK_THROW(A.get_size(0)==b.get_size(0));
+
+        lapack_int info(0);
+        char uplo = 'L';
+        lapack_int n = (lapack_int)A.get_size(0);
+        lapack_int nrhs = (lapack_int)b.get_size(1);
+        T* pA = A.begin();
+        lapack_int lda = (lapack_int)A.get_size(0);
+        T* pB = b.begin();
+        lapack_int ldb = (lapack_int)b.get_size(0);
+
+        hoNDArray<lapack_int> ipiv_array(n);
+        Gadgetron::clear(ipiv_array);
+        lapack_int* ipiv = ipiv_array.begin();
+
+        lapack_int lwork(n*n);
+        hoNDArray<T> work_array(lwork);
+        Gadgetron::clear(work_array);
+        T* work = work_array.begin();
+
+        chesv_(&uplo, &n, &nrhs, reinterpret_cast<lapack_complex_float*>(pA), &lda, ipiv, reinterpret_cast<lapack_complex_float*>(pB), &ldb, reinterpret_cast<lapack_complex_float*>(work), &lwork, &info);
+
+        GADGET_CHECK_THROW(info==0);
+    }
+    catch(...)
+    {
+        GADGET_THROW("Errors in hesv(hoNDArray< std::complex<float> >& A, hoNDArray< std::complex<float> >& b) ...");
+    }
+}
+
+template<> EXPORTCPUCOREMATH
+void hesv(hoNDArray< complext<float> >& A, hoNDArray< complext<float> >& b)
+{
+    typedef hoNDArray< std::complex<float> > ArrayType;
+    try
+    {
+        hesv( reinterpret_cast<ArrayType&>(A), reinterpret_cast<ArrayType&>(b) );
+    }
+    catch(...)
+    {
+        GADGET_THROW("Errors in hesv(hoNDArray< complext<float> >& A, hoNDArray< complext<float> >& b) ...");
+    }
+}
+
+template<> EXPORTCPUCOREMATH
+void hesv(hoNDArray< std::complex<double> >& A, hoNDArray< std::complex<double> >& b)
+{
+    typedef std::complex<double> T;
+    try
+    {
+        if( A.get_number_of_elements()==0 ) return;
+        if( b.get_number_of_elements()==0 ) return;
+        GADGET_CHECK_THROW(A.get_size(0)==b.get_size(0));
+
+        lapack_int info(0);
+        char uplo = 'L';
+        lapack_int n = (lapack_int)A.get_size(0);
+        lapack_int nrhs = (lapack_int)b.get_size(1);
+        T* pA = A.begin();
+        lapack_int lda = (lapack_int)A.get_size(0);
+        T* pB = b.begin();
+        lapack_int ldb = (lapack_int)b.get_size(0);
+
+        hoNDArray<lapack_int> ipiv_array(n);
+        Gadgetron::clear(ipiv_array);
+        lapack_int* ipiv = ipiv_array.begin();
+
+        lapack_int lwork(n*n);
+        hoNDArray<T> work_array(lwork);
+        Gadgetron::clear(work_array);
+        T* work = work_array.begin();
+
+        zhesv_(&uplo, &n, &nrhs, reinterpret_cast<lapack_complex_double*>(pA), &lda, ipiv, reinterpret_cast<lapack_complex_double*>(pB), &ldb, reinterpret_cast<lapack_complex_double*>(work), &lwork, &info);
+
+        GADGET_CHECK_THROW(info==0);
+    }
+    catch(...)
+    {
+        GADGET_THROW("Errors in hesv(hoNDArray< std::complex<double> >& A, hoNDArray< std::complex<double> >& b) ...");
+    }
+}
+
+template<> EXPORTCPUCOREMATH
+void hesv(hoNDArray< complext<double> >& A, hoNDArray< complext<double> >& b)
+{
+    typedef hoNDArray< std::complex<double> > ArrayType;
+    try
+    {
+        hesv( reinterpret_cast<ArrayType&>(A), reinterpret_cast<ArrayType&>(b) );
+    }
+    catch(...)
+    {
+        GADGET_THROW("Errors in hesv(hoNDArray< complext<double> >& A, hoNDArray< complext<double> >& b) ...");
+    }
+}
+
+/// ------------------------------------------------------------------------------------
+
+template<> EXPORTCPUCOREMATH
+void gesv(hoNDArray<float>& A, hoNDArray<float>& b)
+{
+    typedef float T;
+
+    try
+    {
+        if( A.get_number_of_elements()==0 ) return;
+        if( b.get_number_of_elements()==0 ) return;
+        GADGET_CHECK_THROW(A.get_size(0)==b.get_size(0));
+
+        lapack_int info(0);
+        lapack_int n = (lapack_int)A.get_size(0);
+        lapack_int nrhs = (lapack_int)b.get_size(1);
+        T* pA = A.begin();
+        lapack_int lda = (lapack_int)A.get_size(0);
+        T* pB = b.begin();
+        lapack_int ldb = (lapack_int)b.get_size(1);
+
+        hoNDArray<lapack_int> work(n);
+        Gadgetron::clear(work);
+        lapack_int* ipiv = work.begin();
+
+        sgesv_(&n, &nrhs, reinterpret_cast<float*>(pA), &lda, ipiv, reinterpret_cast<float*>(pB), &ldb, &info);
+
+        GADGET_CHECK_THROW(info==0);
+    }
+    catch(...)
+    {
+        GADGET_THROW("Errors in gesv(hoNDArray<float>& A, hoNDArray<float>& b) ...");
+    }
+}
+
+template<> EXPORTCPUCOREMATH
+void gesv(hoNDArray<double>& A, hoNDArray<double>& b)
+{
+    typedef double T;
+
+    try
+    {
+        if( A.get_number_of_elements()==0 ) return;
+        if( b.get_number_of_elements()==0 ) return;
+        GADGET_CHECK_THROW(A.get_size(0)==b.get_size(0));
+
+        lapack_int info(0);
+        lapack_int n = (lapack_int)A.get_size(0);
+        lapack_int nrhs = (lapack_int)b.get_size(1);
+        T* pA = A.begin();
+        lapack_int lda = (lapack_int)A.get_size(0);
+        T* pB = b.begin();
+        lapack_int ldb = (lapack_int)b.get_size(0);
+
+        hoNDArray<lapack_int> work(n);
+        Gadgetron::clear(work);
+        lapack_int* ipiv = work.begin();
+
+        dgesv_(&n, &nrhs, reinterpret_cast<double*>(pA), &lda, ipiv, reinterpret_cast<double*>(pB), &ldb, &info);
+
+        GADGET_CHECK_THROW(info==0);
+    }
+    catch(...)
+    {
+        GADGET_THROW("Errors in gesv(hoNDArray<double>& A, hoNDArray<double>& b) ...");
+    }
+}
+
+template<> EXPORTCPUCOREMATH
+void gesv(hoNDArray< std::complex<float> >& A, hoNDArray< std::complex<float> >& b)
+{
+    typedef std::complex<float> T;
+    try
+    {
+        if( A.get_number_of_elements()==0 ) return;
+        if( b.get_number_of_elements()==0 ) return;
+        GADGET_CHECK_THROW(A.get_size(0)==b.get_size(0));
+
+        lapack_int info(0);
+        lapack_int n = (lapack_int)A.get_size(0);
+        lapack_int nrhs = (lapack_int)b.get_size(1);
+        T* pA = A.begin();
+        lapack_int lda = (lapack_int)A.get_size(0);
+        T* pB = b.begin();
+        lapack_int ldb = (lapack_int)b.get_size(0);
+
+        hoNDArray<lapack_int> work(n);
+        Gadgetron::clear(work);
+        lapack_int* ipiv = work.begin();
+
+        cgesv_(&n, &nrhs, reinterpret_cast<lapack_complex_float*>(pA), &lda, ipiv, reinterpret_cast<lapack_complex_float*>(pB), &ldb, &info);
+
+        GADGET_CHECK_THROW(info==0);
+    }
+    catch(...)
+    {
+        GADGET_THROW("Errors in gesv(hoNDArray< std::complex<float> >& A, hoNDArray< std::complex<float> >& b) ...");
+    }
+}
+
+template<> EXPORTCPUCOREMATH
+void gesv(hoNDArray< complext<float> >& A, hoNDArray< complext<float> >& b)
+{
+    typedef hoNDArray< std::complex<float> > ArrayType;
+    try
+    {
+        gesv( reinterpret_cast<ArrayType&>(A), reinterpret_cast<ArrayType&>(b) );
+    }
+    catch(...)
+    {
+        GADGET_THROW("Errors in gesv(hoNDArray< complext<float> >& A, hoNDArray< complext<float> >& b) ...");
+    }
+}
+
+template<> EXPORTCPUCOREMATH
+void gesv(hoNDArray< std::complex<double> >& A, hoNDArray< std::complex<double> >& b)
+{
+    typedef std::complex<double> T;
+    try
+    {
+        if( A.get_number_of_elements()==0 ) return;
+        if( b.get_number_of_elements()==0 ) return;
+        GADGET_CHECK_THROW(A.get_size(0)==b.get_size(0));
+
+        lapack_int info(0);
+        lapack_int n = (lapack_int)A.get_size(0);
+        lapack_int nrhs = (lapack_int)b.get_size(1);
+        T* pA = A.begin();
+        lapack_int lda = (lapack_int)A.get_size(0);
+        T* pB = b.begin();
+        lapack_int ldb = (lapack_int)b.get_size(0);
+
+        hoNDArray<lapack_int> work(n);
+        Gadgetron::clear(work);
+        lapack_int* ipiv = work.begin();
+
+        zgesv_(&n, &nrhs, reinterpret_cast<lapack_complex_double*>(pA), &lda, ipiv, reinterpret_cast<lapack_complex_double*>(pB), &ldb, &info);
+
+        GADGET_CHECK_THROW(info==0);
+    }
+    catch(...)
+    {
+        GADGET_THROW("Errors in gesv(hoNDArray< std::complex<double> >& A, hoNDArray< std::complex<double> >& b) ...");
+    }
+}
+
+template<> EXPORTCPUCOREMATH
+void gesv(hoNDArray< complext<double> >& A, hoNDArray< complext<double> >& b)
+{
+    typedef hoNDArray< std::complex<double> > ArrayType;
+    try
+    {
+        gesv( reinterpret_cast<ArrayType&>(A), reinterpret_cast<ArrayType&>(b) );
+    }
+    catch(...)
+    {
+        GADGET_THROW("Errors in gesv(hoNDArray< complext<double> >& A, hoNDArray< complext<double> >& b) ...");
+    }
+}
+
+/// ------------------------------------------------------------------------------------
+
+/// Computes the LU factorization of a general m-by-n matrix
+/// this function is called by general matrix inversion
+template<typename T> 
+void getrf(hoNDArray<T>& A, hoNDArray<lapack_int>& ipiv)
+{
+    try
+    {
+        if( A.get_number_of_elements()==0 ) return;
+
+        lapack_int info;
+        lapack_int m = (lapack_int)A.get_size(0);
+        lapack_int n = (lapack_int)A.get_size(1);
+
+        T* pA = A.begin();
+        lapack_int lda = (lapack_int)A.get_size(0);
+
+        ipiv.create( GT_MIN(m, n) );
+        lapack_int* pIPIV = ipiv.begin();
+
+        //if ( typeid(T)==typeid(float) )
+        //{
+        //    info = LAPACKE_sgetrf(LAPACK_COL_MAJOR, m, n, reinterpret_cast<float*>(pA), lda, reinterpret_cast<lapack_int*>(pIPIV));
+        //}
+        //else if ( typeid(T)==typeid(double) )
+        //{
+        //    info = LAPACKE_dgetrf(LAPACK_COL_MAJOR, m, n, reinterpret_cast<double*>(pA), lda, reinterpret_cast<lapack_int*>(pIPIV));
+        //}
+        //else if ( (typeid(T)==typeid( std::complex<float> )) || (typeid(T)==typeid( complext<float> )) )
+        //{
+        //    info = LAPACKE_cgetrf(LAPACK_COL_MAJOR, m, n, reinterpret_cast<lapack_complex_float*>(pA), lda, reinterpret_cast<lapack_int*>(pIPIV));
+        //}
+        //else if ( (typeid(T)==typeid( std::complex<float> )) || (typeid(T)==typeid( complext<double> )) )
+        //{
+        //    info = LAPACKE_zgetrf(LAPACK_COL_MAJOR, m, n, reinterpret_cast<lapack_complex_double*>(pA), lda, reinterpret_cast<lapack_int*>(pIPIV));
+        //}
+        //else
+        //{
+        //    GADGET_THROW("getrf : unsupported type " << typeid(T).name());
+        //}
+
+        if ( typeid(T)==typeid(float) )
+        {
+            sgetrf_(&m, &n, reinterpret_cast<float*>(pA), &lda, reinterpret_cast<lapack_int*>(pIPIV), &info);
+        }
+        else if ( typeid(T)==typeid(double) )
+        {
+            dgetrf_(&m, &n, reinterpret_cast<double*>(pA), &lda, reinterpret_cast<lapack_int*>(pIPIV), &info);
+        }
+        else if ( (typeid(T)==typeid( std::complex<float> )) || (typeid(T)==typeid( complext<float> )) )
+        {
+            cgetrf_(&m, &n, reinterpret_cast<lapack_complex_float*>(pA), &lda, reinterpret_cast<lapack_int*>(pIPIV), &info);
+        }
+        else if ( (typeid(T)==typeid( std::complex<double> )) || (typeid(T)==typeid( complext<double> )) )
+        {
+            zgetrf_(&m, &n, reinterpret_cast<lapack_complex_double*>(pA), &lda, reinterpret_cast<lapack_int*>(pIPIV), &info);
+        }
+        else
+        {
+            GADGET_THROW("getrf : unsupported type ... ");
+        }
+
+        GADGET_CHECK_THROW(info==0);
+    }
+    catch(...)
+    {
+        GADGET_THROW("Errors in getrf(hoNDArray<T>& A, hoNDArray<T>& ipiv) ...");
+    }
+}
+
+template EXPORTCPUCOREMATH void getrf(hoNDArray<float>& A, hoNDArray<lapack_int>& ipiv);
+template EXPORTCPUCOREMATH void getrf(hoNDArray<double>& A, hoNDArray<lapack_int>& ipiv);
+template EXPORTCPUCOREMATH void getrf(hoNDArray< std::complex<float> >& A, hoNDArray<lapack_int>& ipiv);
+template EXPORTCPUCOREMATH void getrf(hoNDArray< complext<float> >& A, hoNDArray<lapack_int>& ipiv);
+template EXPORTCPUCOREMATH void getrf(hoNDArray< std::complex<double> >& A, hoNDArray<lapack_int>& ipiv);
+template EXPORTCPUCOREMATH void getrf(hoNDArray< complext<double> >& A, hoNDArray<lapack_int>& ipiv);
+
+/// ------------------------------------------------------------------------------------
+
+/// Computes the inverse of an LU-factored general matrix
+template<typename T> 
+void getri(hoNDArray<T>& A)
+{
+    try
+    {
+        if( A.get_number_of_elements()==0 ) return;
+
+        lapack_int info;
+        lapack_int m = (lapack_int)A.get_size(0);
+        lapack_int n = (lapack_int)A.get_size(1);
+        GADGET_CHECK_THROW(m==n);
+
+        T* pA = A.begin();
+        lapack_int lda = (lapack_int)A.get_size(0);
+
+        hoNDArray<lapack_int> ipiv;
+        getrf(A, ipiv);
+
+        lapack_int* pIPIV = ipiv.begin();
+
+        lapack_int lwork = m*m;
+
+        /*if ( typeid(T)==typeid(float) )
+        {
+            info = LAPACKE_sgetri(LAPACK_COL_MAJOR, m, reinterpret_cast<float*>(pA), lda, reinterpret_cast<lapack_int*>(pIPIV));
+        }
+        else if ( typeid(T)==typeid(double) )
+        {
+            info = LAPACKE_dgetri(LAPACK_COL_MAJOR, m, reinterpret_cast<double*>(pA), lda, reinterpret_cast<lapack_int*>(pIPIV));
+        }
+        else if ( (typeid(T)==typeid( std::complex<float> )) || (typeid(T)==typeid( complext<float> )) )
+        {
+            info = LAPACKE_cgetri(LAPACK_COL_MAJOR, m, reinterpret_cast<lapack_complex_float*>(pA), lda, reinterpret_cast<lapack_int*>(pIPIV));
+        }
+        else if ( (typeid(T)==typeid( std::complex<float> )) || (typeid(T)==typeid( complext<double> )) )
+        {
+            info = LAPACKE_zgetri(LAPACK_COL_MAJOR, m, reinterpret_cast<lapack_complex_double*>(pA), lda, reinterpret_cast<lapack_int*>(pIPIV));
+        }
+        else
+        {
+            GADGET_THROW("getri : unsupported type " << typeid(T).name());
+        }*/
+
+        if ( typeid(T)==typeid(float) )
+        {
+            hoNDArray<float> work(m, m);
+            sgetri_(&m, reinterpret_cast<float*>(pA), &lda, reinterpret_cast<lapack_int*>(pIPIV), work.begin(), &lwork, &info);
+        }
+        else if ( typeid(T)==typeid(double) )
+        {
+            hoNDArray<double> work(m, m);
+            dgetri_(&m, reinterpret_cast<double*>(pA), &lda, reinterpret_cast<lapack_int*>(pIPIV), work.begin(), &lwork, &info);
+        }
+        else if ( (typeid(T)==typeid( std::complex<float> )) || (typeid(T)==typeid( complext<float> )) )
+        {
+            hoNDArray< std::complex<float> > work(m, m);
+            cgetri_(&m, reinterpret_cast<lapack_complex_float*>(pA), &lda, reinterpret_cast<lapack_int*>(pIPIV), reinterpret_cast<lapack_complex_float*>(work.begin()), &lwork, &info);
+        }
+        else if ( (typeid(T)==typeid( std::complex<double> )) || (typeid(T)==typeid( complext<double> )) )
+        {
+            hoNDArray< std::complex<double> > work(m, m);
+            zgetri_(&m, reinterpret_cast<lapack_complex_double*>(pA), &lda, reinterpret_cast<lapack_int*>(pIPIV), reinterpret_cast<lapack_complex_double*>(work.begin()), &lwork, &info);
+        }
+        else
+        {
+            GADGET_THROW("getri : unsupported type ... ");
+        }
+
+        GADGET_CHECK_THROW(info==0);
+    }
+    catch(...)
+    {
+        GADGET_THROW("Errors in getri(hoNDArray<T>& A) ...");
+    }
+}
+
+template EXPORTCPUCOREMATH void getri(hoNDArray<float>& A);
+template EXPORTCPUCOREMATH void getri(hoNDArray<double>& A);
+template EXPORTCPUCOREMATH void getri(hoNDArray< std::complex<float> >& A);
+template EXPORTCPUCOREMATH void getri(hoNDArray< complext<float> >& A);
+template EXPORTCPUCOREMATH void getri(hoNDArray< std::complex<double> >& A);
+template EXPORTCPUCOREMATH void getri(hoNDArray< complext<double> >& A);
+
+/// ------------------------------------------------------------------------------------
+
+template<typename T>
+void SolveLinearSystem_Tikhonov(hoNDArray<T>& A, hoNDArray<T>& b, hoNDArray<T>& x, double lamda)
+{
+    GADGET_CHECK_THROW(b.get_size(0)==A.get_size(0));
+
+    hoNDArray<T> AHA(A.get_size(1), A.get_size(1));
+    Gadgetron::clear(AHA);
+
+    // hoNDArray<T> ACopy(A);
+    // GADGET_CHECK_THROW(gemm(AHA, ACopy, true, A, false));
+
+    //GADGET_MSG("SolveLinearSystem_Tikhonov - A = " << Gadgetron::norm2(A));
+    //GADGET_MSG("SolveLinearSystem_Tikhonov - b = " << Gadgetron::norm2(b));
+
+    char uplo = 'L';
+    bool isAHA = true;
+    herk(AHA, A, uplo, isAHA);
+    //GADGET_MSG("SolveLinearSystem_Tikhonov - AHA = " << Gadgetron::norm2(AHA));
+
+    x.create(A.get_size(1), b.get_size(1));
+    gemm(x, A, true, b, false);
+    //GADGET_MSG("SolveLinearSystem_Tikhonov - x = " << Gadgetron::norm2(x));
+
+    // apply the Tikhonov regularization
+    // Ideally, we shall apply the regularization is lamda*maxEigenValue
+    // However, computing the maximal eigenvalue is computational intensive
+    // A natural alternative is to use the trace of AHA matrix, which is the sum of all eigen values
+    // Since all eigen values are positive, the lamda*maxEigenValue is only ~10-20% different from lamda*sum(all eigenValues)
+    // for more information, refer to:
+    // Tikhonov A.N., Goncharsky A.V., Stepanov V.V., Yagola A.G., 1995,
+    // Numerical Methods for the Solution of Ill-Posed Problems, Kluwer Academic Publishers.
+
+    size_t col = AHA.get_size(0);
+    size_t c;
+
+    double trA = abs(AHA(0, 0));
+    for ( c=1; c<col; c++ )
+    {
+        //const T v = AHA(c, c);
+        //const typename realType<T>::Type rv = v.real();
+        //const typename realType<T>::Type iv = v.imag();
+        // trA += std::sqrt(rv*rv + iv*iv);
+        trA += abs( AHA(c, c) );
+    }
+    //GADGET_MSG("SolveLinearSystem_Tikhonov - trA = " << trA);
+
+    double value = trA*lamda/col;
+    for ( c=0; c<col; c++ )
+    {
+        //const T v = AHA(c, c);
+        //const typename realType<T>::Type rv = v.real();
+        //const typename realType<T>::Type iv = v.imag();
+
+        //AHA(c,c) = T( (typename realType<T>::Type)( std::sqrt(rv*rv + iv*iv) + value ) );
+        AHA(c,c) = T( (typename realType<T>::Type)( abs( AHA(c, c) ) + value ) );
+    }
+
+    // if the data is properly SNR unit scaled, the minimal eigen value of AHA will be around 4.0 (real and imag have noise sigma being ~1.0)
+    if ( trA/col < 4.0 )
+    {
+        typename realType<T>::Type scalingFactor = (typename realType<T>::Type)(col*4.0/trA);
+        GADGET_MSG("SolveLinearSystem_Tikhonov - trA is too small : " << trA << " for matrix order : " << col);
+        GADGET_MSG("SolveLinearSystem_Tikhonov - scale the AHA and x by " << scalingFactor);
+        Gadgetron::scal( scalingFactor, AHA);
+        Gadgetron::scal( scalingFactor, x);
+    }
+
+    try
+    {
+        posv(AHA, x);
+        //GADGET_MSG("SolveLinearSystem_Tikhonov - solution = " << Gadgetron::norm2(x));
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("posv failed in SolveLinearSystem_Tikhonov(... ) ... ");
+        GADGET_MSG("A = " << Gadgetron::norm2(A));
+        GADGET_MSG("b = " << Gadgetron::norm2(b));
+        GADGET_MSG("AHA = " << Gadgetron::norm2(AHA));
+        GADGET_MSG("trA = " << trA);
+        GADGET_MSG("x = " << Gadgetron::norm2(x));
+
+        gemm(x, A, true, b, false);
+        GADGET_MSG("SolveLinearSystem_Tikhonov - x = " << Gadgetron::norm2(x));
+
+        try
+        {
+            hesv(AHA, x);
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("hesv failed in SolveLinearSystem_Tikhonov(... ) ... ");
+
+            gemm(x, A, true, b, false);
+            GADGET_MSG("SolveLinearSystem_Tikhonov - x = " << Gadgetron::norm2(x));
+
+            try
+            {
+                gesv(AHA, x);
+            }
+            catch(...)
+            {
+                GADGET_ERROR_MSG("gesv failed in SolveLinearSystem_Tikhonov(... ) ... ");
+                throw;
+            }
+        }
+    }
+}
+
+template EXPORTCPUCOREMATH void SolveLinearSystem_Tikhonov(hoNDArray<float>& A, hoNDArray<float>& b, hoNDArray<float>& x, double lamda);
+template EXPORTCPUCOREMATH void SolveLinearSystem_Tikhonov(hoNDArray<double>& A, hoNDArray<double>& b, hoNDArray<double>& x, double lamda);
+template EXPORTCPUCOREMATH void SolveLinearSystem_Tikhonov(hoNDArray< std::complex<float> >& A, hoNDArray< std::complex<float> >& b, hoNDArray< std::complex<float> >& x, double lamda);
+template EXPORTCPUCOREMATH void SolveLinearSystem_Tikhonov(hoNDArray< complext<float> >& A, hoNDArray< complext<float> >& b, hoNDArray< complext<float> >& x, double lamda);
+template EXPORTCPUCOREMATH void SolveLinearSystem_Tikhonov(hoNDArray< std::complex<double> >& A, hoNDArray< std::complex<double> >& b, hoNDArray< std::complex<double> >& x, double lamda);
+template EXPORTCPUCOREMATH void SolveLinearSystem_Tikhonov(hoNDArray< complext<double> >& A, hoNDArray< complext<double> >& b, hoNDArray< complext<double> >& x, double lamda);
+
+#endif // defined(USE_MKL) || defined(USE_LAPACK)
+
+}
diff --git a/toolboxes/core/cpu/math/hoNDArray_linalg.h b/toolboxes/core/cpu/math/hoNDArray_linalg.h
new file mode 100644
index 0000000..52db92f
--- /dev/null
+++ b/toolboxes/core/cpu/math/hoNDArray_linalg.h
@@ -0,0 +1,90 @@
+
+#pragma once
+
+#include "cpucore_math_export.h"
+
+#ifdef USE_ARMADILLO
+    #include "hoArmadillo.h"
+#endif // USE_ARMADILLO
+
+#ifndef lapack_int
+    #define lapack_int int
+#endif // lapack_int
+
+/// ----------------------------------------------------------------------
+/// the fortran interface of lapack and blas functions are called
+/// ----------------------------------------------------------------------
+
+namespace Gadgetron
+{
+
+// following matrix computation calls lapacke functions
+
+/// C = A*B for complex float
+EXPORTCPUCOREMATH void gemm(hoNDArray< std::complex<float> >& C, const hoNDArray< std::complex<float> >& A, const hoNDArray< std::complex<float> >& B);
+/// if transA==true, C = A'*B
+/// if transB==true, C=A*B'
+/// if both are true, C=A'*B'
+template<typename T> EXPORTCPUCOREMATH
+void gemm(hoNDArray<T>& C, 
+        const hoNDArray<T>& A, bool transA, 
+        const hoNDArray<T>& B, bool transB);
+
+/// perform a symmetric rank-k update (no conjugated).
+template<typename T> EXPORTCPUCOREMATH 
+void syrk(hoNDArray<T>& C, const hoNDArray<T>& A, char uplo, bool isATA);
+
+/// perform a Hermitian rank-k update.
+template<typename T> EXPORTCPUCOREMATH 
+void herk(hoNDArray<T>& C, const hoNDArray<T>& A, char uplo, bool isAHA);
+
+/// compute the Cholesky factorization of a real symmetric positive definite matrix A
+template<typename T> EXPORTCPUCOREMATH 
+void potrf(hoNDArray<T>& A, char uplo);
+
+/// compute all eigenvalues and eigenvectors of a Hermitian matrix A
+template<typename T> EXPORTCPUCOREMATH 
+void heev(hoNDArray<T>& A, hoNDArray<typename realType<T>::Type>& eigenValue);
+
+template<typename T> EXPORTCPUCOREMATH
+void heev(hoNDArray< std::complex<T> >& A, hoNDArray<  std::complex<T> >& eigenValue);
+
+/// compute inverse of a symmetric (Hermitian) positive-definite matrix A
+template<typename T> EXPORTCPUCOREMATH 
+void potri(hoNDArray<T>& A);
+
+/// compute the inverse of a triangular matrix A
+template<typename T> EXPORTCPUCOREMATH 
+void trtri(hoNDArray<T>& A, char uplo);
+
+/// solve Ax=b, a symmetric or Hermitian positive-definite matrix A and multiple right-hand sides b
+/// b is replaced with x
+template<typename T> EXPORTCPUCOREMATH
+void posv(hoNDArray<T>& A, hoNDArray<T>& b);
+
+/// solve Ax=b, a square symmetric / hermitian matrix A and multiple right-hand sides b
+/// for float and double, A is a symmetric matrix
+/// for complex type, A is a hermitian matrix
+/// b is replaced with x
+template<typename T> EXPORTCPUCOREMATH
+void hesv(hoNDArray<T>& A, hoNDArray<T>& b);
+
+/// solve Ax=b, a square matrix A and multiple right-hand sides b
+/// b is replaced with x
+template<typename T> EXPORTCPUCOREMATH
+void gesv(hoNDArray<T>& A, hoNDArray<T>& b);
+
+/// solve Ax=b with Tikhonov regularization
+template<typename T> EXPORTCPUCOREMATH
+void SolveLinearSystem_Tikhonov(hoNDArray<T>& A, hoNDArray<T>& b, hoNDArray<T>& x, double lamda);
+
+/// Computes the LU factorization of a general m-by-n matrix
+/// this function is called by general matrix inversion
+template<typename T> EXPORTCPUCOREMATH 
+void getrf(hoNDArray<T>& A, hoNDArray<lapack_int>& ipiv);
+
+/// Computes the inverse of an LU-factored general matrix
+template<typename T> EXPORTCPUCOREMATH 
+void getri(hoNDArray<T>& A);
+
+}
diff --git a/toolboxes/core/cpu/math/hoNDArray_math.h b/toolboxes/core/cpu/math/hoNDArray_math.h
new file mode 100644
index 0000000..dda0d0c
--- /dev/null
+++ b/toolboxes/core/cpu/math/hoNDArray_math.h
@@ -0,0 +1,4 @@
+#pragma once
+
+#include "hoNDArray_elemwise.h"
+#include "hoNDArray_reductions.h"
diff --git a/toolboxes/core/cpu/math/hoNDArray_math_util.cpp b/toolboxes/core/cpu/math/hoNDArray_math_util.cpp
new file mode 100644
index 0000000..a5631d5
--- /dev/null
+++ b/toolboxes/core/cpu/math/hoNDArray_math_util.cpp
@@ -0,0 +1,2178 @@
+#include "hoNDArray_math_util.h"
+
+#ifndef lapack_int
+    #define lapack_int int
+#endif // lapack_int
+
+#ifndef lapack_complex_float
+    #define lapack_complex_float  std::complex<float> 
+#endif // lapack_complex_float
+
+#ifndef lapack_complex_double
+    #define lapack_complex_double  std::complex<double> 
+#endif // #ifndef lapack_complex_double
+
+//Declaration of BLAS and LAPACK routines
+extern "C"
+{
+    /// Finds the index of the element with the maximal absolute value.
+    lapack_int isamax_(lapack_int* N, float* x, lapack_int* incx);
+    lapack_int idamax_(lapack_int* N, double* x, lapack_int* incx);
+    lapack_int icamax_(lapack_int* N, lapack_complex_float* x, lapack_int* incx);
+    lapack_int izamax_(lapack_int* N, lapack_complex_double* x, lapack_int* incx);
+}
+
+#define NumElementsUseThreading 64*1024
+
+namespace Gadgetron
+{
+    // --------------------------------------------------------------------------------
+
+    inline void add(size_t N, const float* x, const float* y, float* r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, r, x, y) if(N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; ++n)
+        {
+            r[n] = x[n] + y[n];
+        }
+    }
+
+    inline void add(size_t N, const double* x, const double* y, double* r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, r, x, y) if(N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; ++n)
+        {
+            r[n] = x[n] + y[n];
+        }
+    }
+
+    inline void add(size_t N, const  std::complex<float> * x, const  std::complex<float> * y,  std::complex<float> * r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, r, x, y) if(N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; ++n)
+        {
+            const  std::complex<float> & vx = x[n];
+            const float re1 = vx.real();
+            const float im1 = vx.imag();
+
+            const  std::complex<float> & vy = y[n];
+            const float re2 = vy.real();
+            const float im2 = vy.imag();
+
+            reinterpret_cast<float(&)[2]>(r[n])[0] = re1 + re2;
+            reinterpret_cast<float(&)[2]>(r[n])[1] = im1 + im2;
+        }
+    }
+
+    inline void add(size_t N, const  std::complex<double> * x, const  std::complex<double> * y,  std::complex<double> * r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, r, x, y) if(N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; ++n)
+        {
+            const  std::complex<double> & vx = x[n];
+            const double re1 = vx.real();
+            const double im1 = vx.imag();
+
+            const  std::complex<double> & vy = y[n];
+            const double re2 = vy.real();
+            const double im2 = vy.imag();
+
+            reinterpret_cast<double(&)[2]>(r[n])[0] = re1 + re2;
+            reinterpret_cast<double(&)[2]>(r[n])[1] = im1 + im2;
+        }
+    }
+
+    template <typename T> 
+    void add(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r)
+    {
+        GADGET_DEBUG_CHECK_THROW(x.get_number_of_elements()==y.get_number_of_elements());
+        if ( r.get_number_of_elements()!=x.get_number_of_elements())
+        {
+            r = x;
+        }
+
+        add(x.get_number_of_elements(), x.begin(), y.begin(), r.begin());
+    }
+
+    template EXPORTCPUCOREMATH void add(const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& r);
+    template EXPORTCPUCOREMATH void add(const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& r);
+    template EXPORTCPUCOREMATH void add(const hoNDArray< std::complex<float> >& x, const hoNDArray< std::complex<float> >& y, hoNDArray< std::complex<float> >& r);
+    template EXPORTCPUCOREMATH void add(const hoNDArray< std::complex<double> >& x, const hoNDArray< std::complex<double> >& y, hoNDArray< std::complex<double> >& r);
+
+    // --------------------------------------------------------------------------------
+
+    inline void subtract(size_t N, const float* x, const float* y, float* r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, r, x, y) if(N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; ++n)
+        {
+            r[n] = x[n] - y[n];
+        }
+    }
+
+    inline void subtract(size_t N, const double* x, const double* y, double* r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, r, x, y) if(N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; ++n)
+        {
+            r[n] = x[n] - y[n];
+        }
+    }
+
+    inline void subtract(size_t N, const  std::complex<float> * x, const  std::complex<float> * y,  std::complex<float> * r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, r, x, y) if(N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; ++n)
+        {
+            const  std::complex<float> & vx = x[n];
+            const float re1 = vx.real();
+            const float im1 = vx.imag();
+
+            const  std::complex<float> & vy = y[n];
+            const float re2 = vy.real();
+            const float im2 = vy.imag();
+
+            reinterpret_cast<float(&)[2]>(r[n])[0] = re1 - re2;
+            reinterpret_cast<float(&)[2]>(r[n])[1] = im1 - im2;
+        }
+    }
+
+    inline void subtract(size_t N, const  std::complex<double> * x, const  std::complex<double> * y,  std::complex<double> * r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, r, x, y) if(N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; ++n)
+        {
+            const  std::complex<double> & vx = x[n];
+            const double re1 = vx.real();
+            const double im1 = vx.imag();
+
+            const  std::complex<double> & vy = y[n];
+            const double re2 = vy.real();
+            const double im2 = vy.imag();
+
+            reinterpret_cast<double(&)[2]>(r[n])[0] = re1 - re2;
+            reinterpret_cast<double(&)[2]>(r[n])[1] = im1 - im2;
+        }
+    }
+
+    template <typename T> 
+    void subtract(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r)
+    {
+        GADGET_DEBUG_CHECK_THROW(x.get_number_of_elements()==y.get_number_of_elements());
+        if ( r.get_number_of_elements()!=x.get_number_of_elements())
+        {
+            r = x;
+        }
+
+        subtract(x.get_number_of_elements(), x.begin(), y.begin(), r.begin());
+    }
+
+    template EXPORTCPUCOREMATH void subtract(const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& r);
+    template EXPORTCPUCOREMATH void subtract(const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& r);
+    template EXPORTCPUCOREMATH void subtract(const hoNDArray< std::complex<float> >& x, const hoNDArray< std::complex<float> >& y, hoNDArray< std::complex<float> >& r);
+    template EXPORTCPUCOREMATH void subtract(const hoNDArray< std::complex<double> >& x, const hoNDArray< std::complex<double> >& y, hoNDArray< std::complex<double> >& r);
+
+    // --------------------------------------------------------------------------------
+
+    template <typename T> 
+    inline void multiply(size_t N, const T* x, const T* y, T* r)
+    {
+        long long n;
+        #pragma omp parallel for default(none) private(n) shared(N, x, y, r) if (N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; n++ )
+        {
+            const T& a = x[n];
+            const T& b = y[n];
+            r[n] = a*b;
+        }
+    }
+
+    inline void multiply(size_t N, const  std::complex<float> * x, const  std::complex<float> * y,  std::complex<float> * r)
+    {
+        long long n;
+        #pragma omp parallel for default(none) private(n) shared(N, x, y, r) if (N>NumElementsUseThreading)
+        for (n = 0; n < (long long)N; n++)
+        {
+            const std::complex<float>& a1 = x[n];
+            const std::complex<float>& b1 = y[n];
+            const float a = a1.real();
+            const float b = a1.imag();
+            const float c = b1.real();
+            const float d = b1.imag();
+
+            reinterpret_cast<float(&)[2]>(r[n])[0] = a*c-b*d;
+            reinterpret_cast<float(&)[2]>(r[n])[1] = a*d+b*c;
+        }
+    }
+
+    inline void multiply(size_t N, const  std::complex<double> * x, const  std::complex<double> * y,  std::complex<double> * r)
+    {
+        long long n;
+        #pragma omp parallel for default(none) private(n) shared(N, x, y, r) if (N>NumElementsUseThreading)
+        for (n = 0; n < (long long)N; n++)
+        {
+            const std::complex<double>& a1 = x[n];
+            const std::complex<double>& b1 = y[n];
+            const double a = a1.real();
+            const double b = a1.imag();
+            const double c = b1.real();
+            const double d = b1.imag();
+
+            reinterpret_cast<double(&)[2]>(r[n])[0] = a*c-b*d;
+            reinterpret_cast<double(&)[2]>(r[n])[1] = a*d+b*c;
+        }
+    }
+
+    template <typename T> 
+    void multiply(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r)
+    {
+        GADGET_DEBUG_CHECK_THROW(x.get_number_of_elements()==y.get_number_of_elements());
+        if ( r.get_number_of_elements()!=x.get_number_of_elements())
+        {
+            r = x;
+        }
+
+        multiply(x.get_number_of_elements(), x.begin(), y.begin(), r.begin());
+    }
+
+    template EXPORTCPUCOREMATH void multiply(const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& r);
+    template EXPORTCPUCOREMATH void multiply(const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& r);
+    template EXPORTCPUCOREMATH void multiply(const hoNDArray< std::complex<float> >& x, const hoNDArray< std::complex<float> >& y, hoNDArray< std::complex<float> >& r);
+    template EXPORTCPUCOREMATH void multiply(const hoNDArray< std::complex<double> >& x, const hoNDArray< std::complex<double> >& y, hoNDArray< std::complex<double> >& r);
+
+    // --------------------------------------------------------------------------------
+
+    template <typename T> 
+    inline void divide(size_t N, const T* x, const T* y, T* r)
+    {
+        long long n;
+        #pragma omp parallel for default(none) private(n) shared(N, x, y, r) if (N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; n++ )
+        {
+            const T& a = x[n];
+            const T& b = y[n];
+            r[n] = a/b;
+        }
+    }
+
+    inline void divide(size_t N, const  std::complex<float> * x, const  std::complex<float> * y,  std::complex<float> * r)
+    {
+        long long n;
+        #pragma omp parallel for default(none) private(n) shared(N, x, y, r) if (N>NumElementsUseThreading)
+        for (n = 0; n < (long long)N; n++)
+        {
+            const std::complex<float>& a1 = x[n];
+            const std::complex<float>& b1 = y[n];
+            const float a = a1.real();
+            const float b = a1.imag();
+            const float c = b1.real();
+            const float d = b1.imag();
+
+            const float m = 1/(c*c+d*d);
+
+            reinterpret_cast<float(&)[2]>(r[n])[0] = (a*c+b*d)*m;
+            reinterpret_cast<float(&)[2]>(r[n])[1] = (b*c-a*d)*m;
+        }
+    }
+
+    inline void divide(size_t N, const  std::complex<double> * x, const  std::complex<double> * y,  std::complex<double> * r)
+    {
+        long long n;
+        #pragma omp parallel for default(none) private(n) shared(N, x, y, r) if (N>NumElementsUseThreading)
+        for (n = 0; n < (long long)N; n++)
+        {
+            const std::complex<double>& a1 = x[n];
+            const std::complex<double>& b1 = y[n];
+            const double a = a1.real();
+            const double b = a1.imag();
+            const double c = b1.real();
+            const double d = b1.imag();
+
+            const double m = 1/(c*c+d*d);
+
+            reinterpret_cast<double(&)[2]>(r[n])[0] = (a*c+b*d)*m;
+            reinterpret_cast<double(&)[2]>(r[n])[1] = (b*c-a*d)*m;
+        }
+    }
+
+    template <typename T> 
+    void divide(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r)
+    {
+        GADGET_DEBUG_CHECK_THROW(x.get_number_of_elements()==y.get_number_of_elements());
+        if ( r.get_number_of_elements()!=x.get_number_of_elements())
+        {
+            r = x;
+        }
+
+        divide(x.get_number_of_elements(), x.begin(), y.begin(), r.begin());
+    }
+
+    template EXPORTCPUCOREMATH void divide(const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& r);
+    template EXPORTCPUCOREMATH void divide(const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& r);
+    template EXPORTCPUCOREMATH void divide(const hoNDArray< std::complex<float> >& x, const hoNDArray< std::complex<float> >& y, hoNDArray< std::complex<float> >& r);
+    template EXPORTCPUCOREMATH void divide(const hoNDArray< std::complex<double> >& x, const hoNDArray< std::complex<double> >& y, hoNDArray< std::complex<double> >& r);
+
+    // --------------------------------------------------------------------------------
+
+    template <typename T> 
+    void sqrt(const hoNDArray<T>& x, hoNDArray<T>& r)
+    {
+        if ( r.get_number_of_elements()!=x.get_number_of_elements())
+        {
+            r.create(x.get_dimensions());
+        }
+
+        size_t N = x.get_number_of_elements();
+        const T* pX = x.begin();
+        T* pR = r.begin();
+
+        long long n;
+        #pragma omp parallel for default(none) private(n) shared(N, pX, pR) if (N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; n++ )
+        {
+            pR[n] = std::sqrt(pX[n]);
+        }
+    }
+
+    template EXPORTCPUCOREMATH void sqrt(const hoNDArray<float>& x, hoNDArray<float>& r);
+    template EXPORTCPUCOREMATH void sqrt(const hoNDArray<double>& x, hoNDArray<double>& r);
+    template EXPORTCPUCOREMATH void sqrt(const hoNDArray< std::complex<float> >& x, hoNDArray< std::complex<float> >& r);
+    template EXPORTCPUCOREMATH void sqrt(const hoNDArray< std::complex<double> >& x, hoNDArray< std::complex<double> >& r);
+
+    // --------------------------------------------------------------------------------
+
+    template <typename T> 
+    void minAbsolute(const hoNDArray<T>& x, T& r, size_t& ind)
+    {
+        size_t N = x.get_number_of_elements();
+        const T* pX = x.begin();
+
+        ind = 0;
+        if ( N == 0 ) return;
+
+        long long n;
+
+        typename realType<T>::Type v = abs(pX[0]);
+        typename realType<T>::Type v2;
+
+        ind = 0;
+        for ( n=1; n<(long long)N; n++ )
+        {
+            v2 = std::abs(pX[n]);
+            if ( v2 < v )
+            {
+                v = v2;
+                ind = n;
+            }
+        }
+
+        r = pX[ind];
+    }
+
+    template EXPORTCPUCOREMATH void minAbsolute(const hoNDArray<float>& x, float& r, size_t& ind);
+    template EXPORTCPUCOREMATH void minAbsolute(const hoNDArray<double>& x, double& r, size_t& ind);
+    template EXPORTCPUCOREMATH void minAbsolute(const hoNDArray< std::complex<float> >& x,  std::complex<float> & r, size_t& ind);
+    template EXPORTCPUCOREMATH void minAbsolute(const hoNDArray< std::complex<double> >& x,  std::complex<double> & r, size_t& ind);
+
+    // --------------------------------------------------------------------------------
+
+    template <typename T> 
+    void maxAbsolute(const hoNDArray<T>& x, T& r, size_t& ind)
+    {
+        size_t N = x.get_number_of_elements();
+        const T* pX = x.begin();
+
+        ind = 0;
+        if ( N == 0 ) return;
+
+        long long n;
+
+        typename realType<T>::Type v = abs(pX[0]);
+        typename realType<T>::Type v2;
+
+        ind = 0;
+        for ( n=1; n<(long long)N; n++ )
+        {
+            v2 = std::abs(pX[n]);
+            if ( v2 > v )
+            {
+                v = v2;
+                ind = n;
+            }
+        }
+
+        r = pX[ind];
+    }
+
+    template EXPORTCPUCOREMATH void maxAbsolute(const hoNDArray<float>& x, float& r, size_t& ind);
+    template EXPORTCPUCOREMATH void maxAbsolute(const hoNDArray<double>& x, double& r, size_t& ind);
+    template EXPORTCPUCOREMATH void maxAbsolute(const hoNDArray< std::complex<float> >& x,  std::complex<float> & r, size_t& ind);
+    template EXPORTCPUCOREMATH void maxAbsolute(const hoNDArray< std::complex<double> >& x,  std::complex<double> & r, size_t& ind);
+
+    // --------------------------------------------------------------------------------
+
+    inline void multiplyConj(size_t N, const  std::complex<float> * x, const  std::complex<float> * y,  std::complex<float> * r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, x, y, r) if (N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; n++ )
+        {
+            const float a = x[n].real();
+            const float b = x[n].imag();
+            const float c = y[n].real();
+            const float d = y[n].imag();
+
+            reinterpret_cast<float(&)[2]>(r[n])[0] = (a*c + b*d);
+            reinterpret_cast<float(&)[2]>(r[n])[1] = (c*b - a*d);
+        }
+    }
+
+    inline void multiplyConj(size_t N, const  std::complex<double> * x, const  std::complex<double> * y,  std::complex<double> * r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, x, y, r) if (N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; n++ )
+        {
+            const double a = x[n].real();
+            const double b = x[n].imag();
+            const double c = y[n].real();
+            const double d = y[n].imag();
+
+            reinterpret_cast<double(&)[2]>(r[n])[0] = (a*c + b*d);
+            reinterpret_cast<double(&)[2]>(r[n])[1] = (c*b - a*d);
+        }
+    }
+
+    template <typename T> 
+    void multiplyConj(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r)
+    {
+        GADGET_DEBUG_CHECK_THROW(x.get_number_of_elements()==y.get_number_of_elements());
+        if ( r.get_number_of_elements()!=x.get_number_of_elements())
+        {
+            r = x;
+        }
+
+        multiplyConj(x.get_number_of_elements(), x.begin(), y.begin(), r.begin());
+    }
+
+    template EXPORTCPUCOREMATH void multiplyConj(const hoNDArray< std::complex<float> >& x, const hoNDArray< std::complex<float> >& y, hoNDArray< std::complex<float> >& r);
+    template EXPORTCPUCOREMATH void multiplyConj(const hoNDArray< std::complex<double> >& x, const hoNDArray< std::complex<double> >& y, hoNDArray< std::complex<double> >& r);
+
+    // --------------------------------------------------------------------------------
+
+    inline void conjugate(size_t N, const  std::complex<float> * x,  std::complex<float> * r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, x, r) if (N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; n++ )
+        {
+            reinterpret_cast<float(&)[2]>(r[n])[0] = reinterpret_cast< const float(&)[2]>(x[n])[0];
+            reinterpret_cast<float(&)[2]>(r[n])[1] = -(reinterpret_cast< const float(&)[2]>(x[n])[1]);
+        }
+    }
+
+    inline void conjugate(size_t N, const  std::complex<double> * x,  std::complex<double> * r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, x, r) if (N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; n++ )
+        {
+            reinterpret_cast<double(&)[2]>(r[n])[0] = reinterpret_cast< const double(&)[2]>(x[n])[0];
+            reinterpret_cast<double(&)[2]>(r[n])[1] = -(reinterpret_cast<const double(&)[2]>(x[n])[1]);
+        }
+    }
+
+    template <typename T> 
+    void conjugate(const hoNDArray<T>& x, hoNDArray<T>& r)
+    {
+        if ( r.get_number_of_elements()!=x.get_number_of_elements())
+        {
+            r.create(x.get_dimensions());
+        }
+
+        conjugate(x.get_number_of_elements(), x.begin(), r.begin());
+    }
+
+    template EXPORTCPUCOREMATH void conjugate(const hoNDArray< std::complex<float> >& x, hoNDArray< std::complex<float> >& r);
+    template EXPORTCPUCOREMATH void conjugate(const hoNDArray< std::complex<double> >& x, hoNDArray< std::complex<double> >& r);
+
+    // --------------------------------------------------------------------------------
+
+    template <typename T> 
+    inline void addEpsilon(size_t N, T* x)
+    {
+        typename realType<T>::Type eps = std::numeric_limits<typename realType<T>::Type>::epsilon();
+
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, x, eps) if (N>NumElementsUseThreading)
+        for (n=0; n<(long long)N; n++ )
+        {
+            if ( std::abs(x[n]) < eps )
+            {
+                x[n] += eps;
+            }
+        }
+    }
+
+    inline void addEpsilon(size_t N,  std::complex<float> * x)
+    {
+        const float eps = std::numeric_limits<float>::epsilon();
+
+        long long n;
+
+        #pragma omp parallel for private(n) if (N>NumElementsUseThreading)
+        for (n=0; n<(long long)N; n++ )
+        {
+            if ( std::abs(x[n]) < eps )
+            {
+                reinterpret_cast<float(&)[2]>(x[n])[0] += eps;
+            }
+        }
+    }
+
+    inline void addEpsilon(size_t N,  std::complex<double> * x)
+    {
+        const double eps = std::numeric_limits<double>::epsilon();
+
+        long long n;
+
+        #pragma omp parallel for private(n) if (N>NumElementsUseThreading)
+        for (n=0; n<(long long)N; n++ )
+        {
+            if ( std::abs(x[n]) < eps )
+            {
+                reinterpret_cast<double(&)[2]>(x[n])[0] += eps;
+            }
+        }
+    }
+
+    template <typename T> 
+    void addEpsilon(hoNDArray<T>& x)
+    {
+        addEpsilon(x.get_number_of_elements(), x.begin());
+    }
+
+    template EXPORTCPUCOREMATH void addEpsilon(hoNDArray<float>& x);
+    template EXPORTCPUCOREMATH void addEpsilon(hoNDArray<double>& x);
+    template EXPORTCPUCOREMATH void addEpsilon(hoNDArray< std::complex<float> >& x);
+    template EXPORTCPUCOREMATH void addEpsilon(hoNDArray< std::complex<double> >& x);
+
+    // --------------------------------------------------------------------------------
+
+    inline void norm2(size_t N, const float* x, float& r)
+    {
+        long long i;
+
+        float sum(0);
+
+        #pragma omp parallel for private(i) reduction(+:sum) if (N>NumElementsUseThreading)
+        for (i = 0; i < (long long)N; i++)
+        {
+            const float& re = x[i];
+            sum += ( re*re );
+        }
+
+        r = std::sqrt(sum);
+    }
+
+    inline void norm2(size_t N, const double* x, double& r)
+    {
+        long long i;
+
+        double sum(0);
+
+        #pragma omp parallel for private(i) reduction(+:sum) if (N>NumElementsUseThreading)
+        for (i = 0; i < (long long)N; i++)
+        {
+            const double& re = x[i];
+            sum += ( re*re );
+        }
+
+        r = std::sqrt(sum);
+    }
+
+    inline void norm2(size_t N, const  std::complex<float> * x, float& r)
+    {
+        long long i;
+
+        float sum(0);
+
+        #pragma omp parallel for private(i) reduction(+:sum) if (N>NumElementsUseThreading)
+        for (i = 0; i < (long long)N; i++)
+        {
+            const std::complex<float>& c = x[i];
+            const float re = c.real();
+            const float im = c.imag();
+            sum += ( (re*re) + (im * im) );
+        }
+
+        r = std::sqrt(sum);
+    }
+
+    inline void norm2(size_t N, const  std::complex<double> * x, double& r)
+    {
+        long long i;
+
+        double sum(0);
+
+        #pragma omp parallel for private(i) reduction(+:sum) if (N>NumElementsUseThreading)
+        for (i = 0; i < (long long)N; i++)
+        {
+            const std::complex<double>& c = x[i];
+            const double re = c.real();
+            const double im = c.imag();
+            sum += ( (re*re) + (im * im) );
+        }
+
+        r = std::sqrt(sum);
+    }
+
+    template <typename T> 
+    void norm2(const hoNDArray<T>& x, typename realType<T>::Type& r)
+    {
+        norm2(x.get_number_of_elements(), x.begin(), r);
+    }
+
+    template EXPORTCPUCOREMATH void norm2(const hoNDArray<float>& x, float& r);
+    template EXPORTCPUCOREMATH void norm2(const hoNDArray<double>& x, double& r);
+    template EXPORTCPUCOREMATH void norm2(const hoNDArray< std::complex<float> >& x, float& r);
+    template EXPORTCPUCOREMATH void norm2(const hoNDArray< std::complex<double> >& x, double& r);
+
+    template <typename T> inline 
+    typename realType<T>::Type norm2(const hoNDArray<T>& x)
+    {
+        typename realType<T>::Type r;
+        norm2(x, r);
+        return r;
+    }
+
+    template EXPORTCPUCOREMATH float norm2(const hoNDArray<float>& x);
+    template EXPORTCPUCOREMATH double norm2(const hoNDArray<double>& x);
+    template EXPORTCPUCOREMATH float norm2(const hoNDArray< std::complex<float> >& x);
+    template EXPORTCPUCOREMATH double norm2(const hoNDArray< std::complex<double> >& x);
+
+    // --------------------------------------------------------------------------------
+
+    template <typename T> inline 
+    void norm1(size_t N, const T* x, typename realType<T>::Type& r)
+    {
+        long long n;
+
+        typename realType<T>::Type norm1Sum(0);
+
+        #pragma omp parallel for private(n) reduction(+:norm1Sum) if (N>NumElementsUseThreading)
+        for (n=0; n<(long long)N; n++)
+        {
+            const T& c = x[n];
+            norm1Sum += GT_ABS(c);
+        }
+
+        r = norm1Sum;
+    }
+
+    inline void norm1(size_t N, const  std::complex<float> * x, float& r)
+    {
+        long long i;
+        float sum = 0.0f;
+        #pragma omp parallel for private(i) reduction(+:sum) if (N>NumElementsUseThreading)
+        for (i = 0; i < (long long)N; i++)
+        {
+            const std::complex<float>& c = x[i];
+            const float re = c.real();
+            const float im = c.imag();
+            sum += std::sqrt( (re*re) + (im * im) );
+        }
+
+        r = sum;
+    }
+
+    inline void norm1(size_t N, const  std::complex<double> * x, double& r)
+    {
+        long long i;
+        double sum = 0.0;
+        #pragma omp parallel for private(i) reduction(+:sum) if (N>NumElementsUseThreading)
+        for (i = 0; i < (long long)N; i++)
+        {
+            const std::complex<double>& c = x[i];
+            const double re = c.real();
+            const double im = c.imag();
+            sum += std::sqrt( (re*re) + (im * im) );
+        }
+
+        r = sum;
+    }
+
+    template <typename T> 
+    void norm1(const hoNDArray<T>& x, typename realType<T>::Type& r)
+    {
+        norm1(x.get_number_of_elements(), x.begin(), r);
+    }
+
+    template EXPORTCPUCOREMATH void norm1(const hoNDArray<float>& x, float& r);
+    template EXPORTCPUCOREMATH void norm1(const hoNDArray<double>& x, double& r);
+    template EXPORTCPUCOREMATH void norm1(const hoNDArray< std::complex<float> >& x, float& r);
+    template EXPORTCPUCOREMATH void norm1(const hoNDArray< std::complex<double> >& x, double& r);
+
+    template <typename T> inline 
+    typename realType<T>::Type norm1(const hoNDArray<T>& x)
+    {
+        typename realType<T>::Type r;
+        norm1(x, r);
+        return r;
+    }
+
+    template EXPORTCPUCOREMATH float norm1(const hoNDArray<float>& x);
+    template EXPORTCPUCOREMATH double norm1(const hoNDArray<double>& x);
+    template EXPORTCPUCOREMATH float norm1(const hoNDArray< std::complex<float> >& x);
+    template EXPORTCPUCOREMATH double norm1(const hoNDArray< std::complex<double> >& x);
+
+    // --------------------------------------------------------------------------------
+
+    inline void dotc(size_t N, const  std::complex<float> * x, const  std::complex<float> * y,  std::complex<float> & r)
+    {
+        long long n;
+
+        float sum(0);
+
+        float sa(0), sb(0);
+
+        #pragma omp parallel for private(n) reduction(+:sa) if (N>NumElementsUseThreading)
+        for (n = 0; n < (long long)N; n++)
+        {
+            const float a = x[n].real();
+            const float b = x[n].imag();
+            const float c = y[n].real();
+            const float d = y[n].imag();
+
+            sa += (a*c + b*d);
+            sb += (c*b - a*d);
+        }
+
+        reinterpret_cast<float(&)[2]>(r)[0] = sa;
+        reinterpret_cast<float(&)[2]>(r)[1] = sb;
+    }
+
+    inline void dotc(size_t N, const  std::complex<double> * x, const  std::complex<double> * y,  std::complex<double> & r)
+    {
+        long long n;
+
+        double sum(0);
+
+        double sa(0), sb(0);
+
+        #pragma omp parallel for private(n) reduction(+:sa) if (N>NumElementsUseThreading)
+        for (n = 0; n < (long long)N; n++)
+        {
+            const double a = x[n].real();
+            const double b = x[n].imag();
+            const double c = y[n].real();
+            const double d = y[n].imag();
+
+            sa += (a*c + b*d);
+            sb += (c*b - a*d);
+        }
+
+        reinterpret_cast<double(&)[2]>(r)[0] = sa;
+        reinterpret_cast<double(&)[2]>(r)[1] = sb;
+    }
+
+    template <typename T> 
+    void dotc(const hoNDArray<T>& x, const hoNDArray<T>& y, T& r)
+    {
+        GADGET_DEBUG_CHECK_THROW(x.get_number_of_elements()==y.get_number_of_elements());
+        dotc(x.get_number_of_elements(), x.begin(), y.begin(), r);
+    }
+
+    template EXPORTCPUCOREMATH void dotc(const hoNDArray< std::complex<float> >& x, const hoNDArray< std::complex<float> >& y,  std::complex<float> & r);
+    template EXPORTCPUCOREMATH void dotc(const hoNDArray< std::complex<double> >& x, const hoNDArray< std::complex<double> >& y,  std::complex<double> & r);
+
+    template <typename T> 
+    T dotc(const hoNDArray<T>& x, const hoNDArray<T>& y)
+    {
+        T r;
+        dotc(x, y, r);
+        return r;
+    }
+
+    template EXPORTCPUCOREMATH std::complex<float> dotc(const hoNDArray< std::complex<float> >& x, const hoNDArray< std::complex<float> >& y);
+    template EXPORTCPUCOREMATH std::complex<double> dotc(const hoNDArray< std::complex<double> >& x, const hoNDArray< std::complex<double> >& y);
+
+    // --------------------------------------------------------------------------------
+
+    inline void dotu(size_t N, const float* x, const float* y, float& r)
+    {
+        long long n;
+
+        float res(0);
+
+        #pragma omp parallel for private(n) reduction(+:res) if (N>NumElementsUseThreading)
+        for (n=0; n<(long long)N; n++)
+        {
+            res += x[n]*y[n];
+        }
+
+        r = res;
+    }
+
+    inline void dotu(size_t N, const double* x, const double* y, double& r)
+    {
+        long long n;
+
+        double res(0);
+
+        #pragma omp parallel for private(n) reduction(+:res) if (N>NumElementsUseThreading)
+        for (n=0; n<(long long)N; n++)
+        {
+            res += x[n]*y[n];
+        }
+
+        r = res;
+    }
+
+    inline void dotu(size_t N, const  std::complex<float> * x, const  std::complex<float> * y,  std::complex<float> & r)
+    {
+        long long n;
+
+         std::complex<float>  sum(0);
+
+        float sa(0), sb(0);
+        #pragma omp parallel for private(n) reduction(+:sa) reduction(+:sb) if (N>NumElementsUseThreading)
+        for (n = 0; n < (long long)N; n++)
+        {
+            const float a = x[n].real();
+            const float b = x[n].imag();
+            const float c = y[n].real();
+            const float d = y[n].imag();
+
+            sa += (a*c - b*d);
+            sb += (c*b + a*d);
+        }
+
+        reinterpret_cast<float(&)[2]>(r)[0] = sa;
+        reinterpret_cast<float(&)[2]>(r)[1] = sb;
+    }
+
+    inline void dotu(size_t N, const  std::complex<double> * x, const  std::complex<double> * y,  std::complex<double> & r)
+    {
+        long long n;
+
+         std::complex<double>  sum(0);
+
+        double sa(0), sb(0);
+        #pragma omp parallel for private(n) reduction(+:sa) reduction(+:sb) if (N>NumElementsUseThreading)
+        for (n = 0; n < (long long)N; n++)
+        {
+            const double a = x[n].real();
+            const double b = x[n].imag();
+            const double c = y[n].real();
+            const double d = y[n].imag();
+
+            sa += (a*c - b*d);
+            sb += (c*b + a*d);
+        }
+
+        reinterpret_cast<double(&)[2]>(r)[0] = sa;
+        reinterpret_cast<double(&)[2]>(r)[1] = sb;
+    }
+
+    template <typename T> 
+    void dotu(const hoNDArray<T>& x, const hoNDArray<T>& y, T& r)
+    {
+        GADGET_DEBUG_CHECK_THROW(x.get_number_of_elements()==y.get_number_of_elements());
+        dotu(x.get_number_of_elements(), x.begin(), y.begin(), r);
+    }
+
+    template EXPORTCPUCOREMATH void dotu(const hoNDArray<float>& x, const hoNDArray<float>& y, float& r);
+    template EXPORTCPUCOREMATH void dotu(const hoNDArray<double>& x, const hoNDArray<double>& y, double& r);
+    template EXPORTCPUCOREMATH void dotu(const hoNDArray< std::complex<float> >& x, const hoNDArray< std::complex<float> >& y, std::complex<float>& r);
+    template EXPORTCPUCOREMATH void dotu(const hoNDArray< std::complex<double> >& x, const hoNDArray< std::complex<double> >& y, std::complex<double>& r);
+
+    template <typename T> 
+    T dotu(const hoNDArray<T>& x, const hoNDArray<T>& y)
+    {
+        T r = 0;
+        dotu(x, y, r);
+        return r;
+    }
+
+    template EXPORTCPUCOREMATH float dotu(const hoNDArray<float>& x, const hoNDArray<float>& y);
+    template EXPORTCPUCOREMATH double dotu(const hoNDArray<double>& x, const hoNDArray<double>& y);
+    template EXPORTCPUCOREMATH  std::complex<float>  dotu(const hoNDArray< std::complex<float> >& x, const hoNDArray< std::complex<float> >& y);
+    template EXPORTCPUCOREMATH  std::complex<double>  dotu(const hoNDArray< std::complex<double> >& x, const hoNDArray< std::complex<double> >& y);
+
+    // --------------------------------------------------------------------------------
+
+    template <typename T> 
+    void absolute(size_t N, const T* x, typename realType<T>::Type* r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, x, r) if (N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; n++ )
+        {
+            r[n]= GT_ABS(x[n]);
+        }
+    }
+
+    inline void absolute(size_t N, const  std::complex<float> * x, float* r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, x, r) if (N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; n++ )
+        {
+            const  std::complex<float> & c = x[n];
+            const float re = c.real();
+            const float im = c.imag();
+            r[n]= std::sqrt( (re*re) + (im * im) );
+        }
+    }
+
+    inline void absolute(size_t N, const  std::complex<double> * x, double* r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, x, r) if (N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; n++ )
+        {
+            const  std::complex<double> & c = x[n];
+            const double re = c.real();
+            const double im = c.imag();
+            r[n]= std::sqrt( (re*re) + (im * im) );
+        }
+    }
+
+    template <typename T> 
+    void absolute(const hoNDArray<T>& x, hoNDArray<typename realType<T>::Type>& r)
+    {
+        if ( r.get_number_of_elements()!=x.get_number_of_elements())
+        {
+            r.create(x.get_dimensions());
+        }
+
+        absolute(x.get_number_of_elements(), x.begin(), r.begin());
+    }
+
+    template EXPORTCPUCOREMATH void absolute(const hoNDArray<float>& x, hoNDArray<float>& r);
+    template EXPORTCPUCOREMATH void absolute(const hoNDArray<double>& x, hoNDArray<double>& r);
+    template EXPORTCPUCOREMATH void absolute(const hoNDArray< std::complex<float> >& x, hoNDArray<float>& r);
+    template EXPORTCPUCOREMATH void absolute(const hoNDArray< std::complex<double> >& x, hoNDArray<double>& r);
+
+    // --------------------------------------------------------------------------------
+
+    inline void absolute(size_t N, const std::complex<float>* x, std::complex<float>* r)
+    {
+        try
+        {
+            long long n;
+
+            #pragma omp parallel for default(none) private(n) shared(N, x, r) if (N>NumElementsUseThreading)
+            for ( n=0; n<(long long)N; n++ )
+            {
+                const std::complex<float>& c = x[n];
+                const float re = c.real();
+                const float im = c.imag();
+
+                reinterpret_cast<float(&)[2]>(r[n])[0] = std::sqrt( (re*re) + (im * im) );
+                reinterpret_cast<float(&)[2]>(r[n])[1] = 0;
+            }
+        }
+        catch(...)
+        {
+            GADGET_THROW("Error happened in absolute(size_t N, const std::complex<float>* x, std::complex<float>* r) ... ");
+        }
+    }
+
+    inline void absolute(size_t N, const std::complex<double>* x, std::complex<double>* r)
+    {
+        try
+        {
+            long long n;
+
+            #pragma omp parallel for default(none) private(n) shared(N, x, r) if (N>NumElementsUseThreading)
+            for ( n=0; n<(long long)N; n++ )
+            {
+                const std::complex<double>& c = x[n];
+                const double re = c.real();
+                const double im = c.imag();
+
+                reinterpret_cast<double(&)[2]>(r[n])[0] = std::sqrt( (re*re) + (im * im) );
+                reinterpret_cast<double(&)[2]>(r[n])[1] = 0;
+            }
+        }
+        catch(...)
+        {
+            GADGET_THROW("Error happened in absolute(size_t N, const std::complex<double>* x, std::complex<double>* r) ... ");
+        }
+    }
+
+    template <typename T> 
+    void absolute(const hoNDArray< std::complex<T> >& x, hoNDArray< std::complex<T> >& r)
+    {
+        if ( r.get_number_of_elements()!=x.get_number_of_elements())
+        {
+            r.create(x.get_dimensions());
+        }
+
+        absolute(x.get_number_of_elements(), x.begin(), r.begin());
+    }
+
+    template EXPORTCPUCOREMATH void absolute(const hoNDArray< std::complex<float> >& x, hoNDArray< std::complex<float> >& r);
+    template EXPORTCPUCOREMATH void absolute(const hoNDArray< std::complex<double> >& x, hoNDArray< std::complex<double> >& r);
+
+    // --------------------------------------------------------------------------------
+
+    template <typename T> 
+    void argument(const hoNDArray<T>& x, hoNDArray<typename realType<T>::Type>& r)
+    {
+        if ( r.get_number_of_elements()!=x.get_number_of_elements())
+        {
+            r.create(x.get_dimensions());
+        }
+
+        size_t N = x.get_number_of_elements();
+        const T* pX = x.begin();
+        typename realType<T>::Type* pR = r.begin();
+
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, pX, pR) if (N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; n++ )
+        {
+            pR[n] = std::arg( pX[n] );
+        }
+    }
+
+    template EXPORTCPUCOREMATH void argument(const hoNDArray< std::complex<float> >& x, hoNDArray<float>& r);
+    template EXPORTCPUCOREMATH void argument(const hoNDArray< std::complex<double> >& x, hoNDArray<double>& r);
+
+    // --------------------------------------------------------------------------------
+
+    template <typename T> 
+    void inv(const hoNDArray<T>& x, hoNDArray<T>& r)
+    {
+        if ( !r.dimensions_equal(&x) )
+        {
+            r = x;
+        }
+
+        size_t N = x.get_number_of_elements();
+        const T* pX = x.begin();
+        T* pR = r.begin();
+
+        T v(1.0);
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, pX, pR, v) if (N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; n++ )
+        {
+            pR[n] = v/pX[n];
+        }
+    }
+
+    template EXPORTCPUCOREMATH void inv(const hoNDArray<float>& x, hoNDArray<float>& r);
+    template EXPORTCPUCOREMATH void inv(const hoNDArray<double>& x, hoNDArray<double>& r);
+    template EXPORTCPUCOREMATH void inv(const hoNDArray< std::complex<float> >& x, hoNDArray< std::complex<float> >& r);
+    template EXPORTCPUCOREMATH void inv(const hoNDArray< std::complex<double> >& x, hoNDArray< std::complex<double> >& r);
+
+    // --------------------------------------------------------------------------------
+
+    template<typename T> 
+    void conv2(size_t RO, size_t E1, size_t num, const T* x, size_t kRO, size_t kE1, const T* y, T* z)
+    {
+        try
+        {
+            long long halfKRO = (long long)(kRO/2);
+            long long halfKE1 = (long long)(kE1/2);
+
+            hoNDArray<T> flipY(2*halfKRO+1, 2*halfKE1+1);
+            T* pKer = flipY.begin();
+
+            long long n;
+            long long ro, e1;
+
+            // flip the kernel
+            for ( e1=0; e1<(long long)kE1; e1++ )
+            {
+                long long flip_e1 = 2*halfKE1 - e1;
+
+                for ( ro=0; ro<(long long)kRO; ro++ )
+                {
+                    long long flip_ro = 2*halfKRO - ro;
+
+                    flipY(flip_ro, flip_e1) = y[ro+e1*kRO];
+                }
+            }
+
+            // perform the convolution
+            #pragma omp parallel for default(none) private(n, ro, e1) shared(num, x, RO, E1, z, halfKRO, halfKE1, pKer)
+            for ( n=0; n<(long long)num; n++ )
+            {
+                const T* pX = x + n*RO*E1;
+                T* pZ = z + n*RO*E1;
+
+                long long kro, ke1, dro, de1;
+
+                for ( e1=0; e1<(long long)E1; e1++ )
+                {
+                    for ( ro=0; ro<(long long)RO; ro++ )
+                    {
+                        pZ[ro + e1*RO] = 0;
+                        for ( ke1=-halfKE1; ke1<=halfKE1; ke1++ )
+                        {
+                            de1 = ke1 + e1;
+                            if ( de1 < 0 )
+                            {
+                                de1 += E1;
+                            }
+                            else if ( de1 >= (long long)E1 )
+                            {
+                                de1 -= E1;
+                            }
+
+                            for ( kro=-halfKRO; kro<=halfKRO; kro++ )
+                            {
+                                dro = kro + ro;
+                                if ( dro < 0 )
+                                {
+                                    dro += RO;
+                                }
+                                else if ( dro >= (long long)RO )
+                                {
+                                    dro -= RO;
+                                }
+
+                                pZ[ro + e1*RO] += pKer[ kro+halfKRO + (ke1+halfKE1) * (2*halfKRO+1) ] * pX[dro + de1*RO];
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_THROW("Errors happened in conv2(size_t RO, size_t E1, size_t num, const T* x, size_t kRO, size_t kE1, const T* y, T* z) ... ");
+        }
+    }
+
+    template<typename T> 
+    void conv2(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& z)
+    {
+        try
+        {
+            if ( !z.dimensions_equal(&x) )
+            {
+                z = x;
+            }
+
+            long long RO = (long long) x.get_size(0);
+            long long E1 = (long long) x.get_size(1);
+            long long num = ((long long) x.get_number_of_elements()) / (RO*E1);
+
+            long long kRO = (long long) y.get_size(0);
+            long long kE1 = (long long) y.get_size(1);
+
+            conv2(RO, E1, num, x.begin(), kRO, kE1, y.begin(), z.begin());
+        }
+        catch(...)
+        {
+            GADGET_THROW("Errors happened in conv2(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& z) ... ");
+        }
+    }
+
+    template EXPORTCPUCOREMATH void conv2(const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& z);
+    template EXPORTCPUCOREMATH void conv2(const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& z);
+    template EXPORTCPUCOREMATH void conv2(const hoNDArray< std::complex<float> >& x, const hoNDArray< std::complex<float> >& y, hoNDArray< std::complex<float> >& z);
+    template EXPORTCPUCOREMATH void conv2(const hoNDArray< std::complex<double> >& x, const hoNDArray< std::complex<double> >& y, hoNDArray< std::complex<double> >& z);
+
+    // --------------------------------------------------------------------------------
+
+    template<typename T> 
+    void conv3(size_t RO, size_t E1, size_t E2, size_t num, const T* x, size_t kRO, size_t kE1, size_t kE2, const T* y, T* z)
+    {
+        try
+        {
+            long long halfKRO = (long long)(kRO/2);
+            long long halfKE1 = (long long)(kE1/2);
+            long long halfKE2 = (long long)(kE2/2);
+
+            hoNDArray<T> flipY(2*halfKRO+1, 2*halfKE1+1, 2*halfKE2+1);
+            T* pKer = flipY.begin();
+
+            long long n, e2;
+            long long ro, e1;
+
+            // flip the kernel
+            for ( e2=0; e2<(long long)kE2; e2++ )
+            {
+                long long flip_e2 = 2*halfKE2 - e2;
+
+                for ( e1=0; e1<(long long)kE1; e1++ )
+                {
+                    long long flip_e1 = 2*halfKE1 - e1;
+
+                    for ( ro=0; ro<(long long)kRO; ro++ )
+                    {
+                        long long flip_ro = 2*halfKRO - ro;
+
+                        flipY(flip_ro, flip_e1, flip_e2) = y[ro+e1*kRO+e2*kRO*kE1];
+                    }
+                }
+            }
+
+            // perform the convolution
+            #pragma omp parallel for default(none) private(n) shared(num, x, RO, E1, E2, z, halfKRO, halfKE1, halfKE2, pKer) if ( num > 8 )
+            for ( n=0; n<(long long)num; n++ )
+            {
+                const T* pX = x + n*RO*E1*E2;
+                T* pZ = z + n*RO*E1*E2;
+
+                long long kro, ke1, ke2, dro, de1, de2;
+
+                #pragma omp parallel for default(none) private(ro, e1, e2, kro, ke1, ke2, dro, de1, de2) shared(pX, RO, E1, E2, pZ, halfKRO, halfKE1, halfKE2, pKer)
+                for ( e2=0; e2<(long long)E2; e2++ )
+                {
+                    for ( e1=0; e1<(long long)E1; e1++ )
+                    {
+                        for ( ro=0; ro<(long long)RO; ro++ )
+                        {
+                            pZ[ro + e1*RO + e2*RO*E1] = 0;
+                            for ( ke2=-halfKE2; ke2<=halfKE2; ke2++ )
+                            {
+                                de2 = ke2 + e2;
+                                if ( de2 < 0 )
+                                {
+                                    de2 += E2;
+                                }
+                                else if ( de2 >= (long long)E2 )
+                                {
+                                    de2 -= E2;
+                                }
+
+                                for ( ke1=-halfKE1; ke1<=halfKE1; ke1++ )
+                                {
+                                    de1 = ke1 + e1;
+                                    if ( de1 < 0 )
+                                    {
+                                        de1 += E1;
+                                    }
+                                    else if ( de1 >= (long long)E1 )
+                                    {
+                                        de1 -= E1;
+                                    }
+
+                                    for ( kro=-halfKRO; kro<=halfKRO; kro++ )
+                                    {
+                                        dro = kro + ro;
+                                        if ( dro < 0 )
+                                        {
+                                            dro += RO;
+                                        }
+                                        else if ( dro >= (long long)RO )
+                                        {
+                                            dro -= RO;
+                                        }
+
+                                        pZ[ro + e1*RO + e2*RO*E1] += pKer[ kro+halfKRO + (ke1+halfKE1)*(2*halfKRO+1) + (ke2+halfKE2)*(2*halfKRO+1)*(2*halfKE1+1) ] * pX[dro + de1*RO + de2*RO*E1];
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_THROW("Errors happened in conv3(size_t RO, size_t E1, size_t E2, size_t num, const T* x, size_t kRO, size_t kE1, size_t kE2, const T* y, T* z) ... ");
+        }
+    }
+
+    template<typename T> 
+    void conv3(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& z)
+    {
+        try
+        {
+            if ( !z.dimensions_equal(&x) )
+            {
+                z = x;
+            }
+
+            long long RO = (long long) x.get_size(0);
+            long long E1 = (long long) x.get_size(1);
+            long long E2 = (long long) x.get_size(2);
+            long long num = ((long long)x.get_number_of_elements()) / (RO*E1*E2);
+
+            long long kRO = (long long) y.get_size(0);
+            long long kE1 = (long long) y.get_size(1);
+            long long kE2 = (long long) y.get_size(2);
+
+            conv3(RO, E1, E2, num, x.begin(), kRO, kE1, kE2, y.begin(), z.begin());
+        }
+        catch(...)
+        {
+            GADGET_THROW("Errors happened in conv3(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& z) ... ");
+        }
+    }
+
+    template EXPORTCPUCOREMATH void conv3(const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& z);
+    template EXPORTCPUCOREMATH void conv3(const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& z);
+    template EXPORTCPUCOREMATH void conv3(const hoNDArray< std::complex<float> >& x, const hoNDArray< std::complex<float> >& y, hoNDArray< std::complex<float> >& z);
+    template EXPORTCPUCOREMATH void conv3(const hoNDArray< std::complex<double> >& x, const hoNDArray< std::complex<double> >& y, hoNDArray< std::complex<double> >& z);
+
+    // --------------------------------------------------------------------------------
+
+    inline void axpy(float a, size_t N, const float* x, const float* y, float* r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, r, a , x, y) if(N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; ++n)
+        {
+            r[n] = a*x[n] + y[n];
+        }
+    }
+
+    inline void axpy(double a, size_t N, const double* x, const double* y, double* r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, r, a , x, y) if(N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; ++n)
+        {
+            r[n] = a*x[n] + y[n];
+        }
+    }
+
+    inline void axpy( std::complex<float>  a, size_t N, const  std::complex<float> * x, const  std::complex<float> * y,  std::complex<float> * r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, r, a, x, y) if(N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; ++n)
+        {
+            const  std::complex<float> & vx = x[n];
+            const float re1 = vx.real();
+            const float im1 = vx.imag();
+
+            const  std::complex<float> & vy = y[n];
+            const float re2 = vy.real();
+            const float im2 = vy.imag();
+
+            const float ar = a.real();
+            const float ai = a.imag();
+
+            reinterpret_cast<float(&)[2]>(r[n])[0] = re2 + ar*re1 - ai*im1;
+            reinterpret_cast<float(&)[2]>(r[n])[1] = im2 + ar*im1 + ai*re1;
+        }
+    }
+
+    inline void axpy( std::complex<double>  a, size_t N, const  std::complex<double> * x, const  std::complex<double> * y,  std::complex<double> * r)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, r, a, x, y) if(N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; ++n)
+        {
+            const  std::complex<double> & vx = x[n];
+            const double re1 = vx.real();
+            const double im1 = vx.imag();
+
+            const  std::complex<double> & vy = y[n];
+            const double re2 = vy.real();
+            const double im2 = vy.imag();
+
+            const double ar = a.real();
+            const double ai = a.imag();
+
+            reinterpret_cast<double(&)[2]>(r[n])[0] = re2 + ar*re1 - ai*im1;
+            reinterpret_cast<double(&)[2]>(r[n])[1] = im2 + ar*im1 + ai*re1;
+        }
+    }
+
+    template <typename T> 
+    void axpy(T a, const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r)
+    {
+        GADGET_DEBUG_CHECK_THROW(x.get_number_of_elements()==y.get_number_of_elements());
+
+        if ( r.get_number_of_elements() != x.get_number_of_elements() )
+        {
+            r = y;
+        }
+        else
+        {
+            if ( &r != &y )
+            {
+                memcpy(r.begin(), y.begin(), r.get_number_of_bytes());
+            }
+        }
+
+        axpy(a, x.get_number_of_elements(), x.begin(), y.begin(), r.begin());
+    }
+
+    template EXPORTCPUCOREMATH void axpy(float a, const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& r);
+    template EXPORTCPUCOREMATH void axpy(double a, const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& r);
+    template EXPORTCPUCOREMATH void axpy( std::complex<float>  a, const hoNDArray< std::complex<float> >& x, const hoNDArray< std::complex<float> >& y, hoNDArray< std::complex<float> >& r);
+    template EXPORTCPUCOREMATH void axpy( std::complex<double>  a, const hoNDArray< std::complex<double> >& x, const hoNDArray< std::complex<double> >& y, hoNDArray< std::complex<double> >& r);
+
+    // --------------------------------------------------------------------------------
+
+    inline void scal(size_t N, float a, float* x)
+    {
+        long long n;
+        #pragma omp parallel for default(none) private(n) shared(N, x, a) if (N>NumElementsUseThreading)
+        for (n = 0; n < (long long)N; n++)
+        {
+            x[n] *= a;
+        }
+    }
+
+    inline void scal(size_t N, double a, double* x)
+    {
+        long long n;
+        #pragma omp parallel for default(none) private(n) shared(N, x, a) if (N>NumElementsUseThreading)
+        for (n = 0; n < (long long)N; n++)
+        {
+            x[n] *= a;
+        }
+    }
+
+    inline void scal(size_t N,  std::complex<float>  a,  std::complex<float> * x)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, x, a) if (N>NumElementsUseThreading)
+        for (n = 0; n < (long long)N; n++)
+        {
+            const  std::complex<float> & c = x[n];
+            const float re = c.real();
+            const float im = c.imag();
+
+            const float ar = a.real();
+            const float ai = a.imag();
+
+            reinterpret_cast<float(&)[2]>(x[n])[0] = re*ar-im*ai;
+            reinterpret_cast<float(&)[2]>(x[n])[1] = re*ai+im*ar;
+        }
+    }
+
+    inline void scal(size_t N,  std::complex<double>  a,  std::complex<double> * x)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, x, a) if (N>NumElementsUseThreading)
+        for (n = 0; n < (long long)N; n++)
+        {
+            const  std::complex<double> & c = x[n];
+            const double re = c.real();
+            const double im = c.imag();
+
+            const double ar = a.real();
+            const double ai = a.imag();
+
+            reinterpret_cast<double(&)[2]>(x[n])[0] = re*ar-im*ai;
+            reinterpret_cast<double(&)[2]>(x[n])[1] = re*ai+im*ar;
+        }
+    }
+
+    template <typename T> 
+    void scal(T a, hoNDArray<T>& x)
+    {
+        scal(x.get_number_of_elements(), a, x.begin());
+    }
+
+    template EXPORTCPUCOREMATH void scal(float a, hoNDArray<float>& x);
+    template EXPORTCPUCOREMATH void scal(double a, hoNDArray<double>& x);
+    template EXPORTCPUCOREMATH void scal( std::complex<float>  a, hoNDArray< std::complex<float> >& x);
+    template EXPORTCPUCOREMATH void scal( std::complex<double>  a, hoNDArray< std::complex<double> >& x);
+
+    // --------------------------------------------------------------------------------
+
+    inline void scal(size_t N, float a,  std::complex<float> * x)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, x, a) if (N>NumElementsUseThreading)
+        for (n = 0; n < (long long)N; n++)
+        {
+            const  std::complex<float> & c = x[n];
+            const float re = c.real();
+            const float im = c.imag();
+
+            reinterpret_cast<float(&)[2]>(x[n])[0] = re*a;
+            reinterpret_cast<float(&)[2]>(x[n])[1] = im*a;
+        }
+    }
+
+    inline void scal(size_t N, double a,  std::complex<double> * x)
+    {
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, x, a) if (N>NumElementsUseThreading)
+        for (n = 0; n < (long long)N; n++)
+        {
+            const  std::complex<double> & c = x[n];
+            const double re = c.real();
+            const double im = c.imag();
+
+            reinterpret_cast<double(&)[2]>(x[n])[0] = re*a;
+            reinterpret_cast<double(&)[2]>(x[n])[1] = im*a;
+        }
+    }
+
+    template <typename T> 
+    void scal(T a, hoNDArray< std::complex<T> >& x)
+    {
+        scal(x.get_number_of_elements(), a, x.begin());
+    }
+
+    template EXPORTCPUCOREMATH void scal(float a, hoNDArray< std::complex<float> >& x);
+    template EXPORTCPUCOREMATH void scal(double a, hoNDArray< std::complex<double> >& x);
+
+    // --------------------------------------------------------------------------------
+
+    template <typename T> 
+    struct hoCompAscending
+    {
+        bool operator() (T a, T b) { return (a>=b); }
+    };
+
+    template <typename T> 
+    struct hoCompDescending
+    {
+        bool operator() (T a, T b) { return (a<b); }
+    };
+
+    template <typename T> 
+    void sort(size_t N, const T* x, T* r, bool isascending)
+    {
+        if ( r != x )
+        {
+            memcpy(r, x, sizeof(T)*N);
+        }
+
+        if ( isascending )
+        {
+            hoCompAscending<T> obj;
+            std::sort(r, r+N, obj);
+        }
+        else
+        {
+            hoCompDescending<T> obj;
+            std::sort(r, r+N, obj);
+        }
+    }
+
+    template <typename T> 
+    void sort(const hoNDArray<T>& x, hoNDArray<T>& r, bool isascending)
+    {
+        if ( &r != &x )
+        {
+            if ( r.get_number_of_elements()!=x.get_number_of_elements())
+            {
+                r = x;
+            }
+            else
+            {
+                memcpy(r.begin(), x.begin(), x.get_number_of_bytes());
+            }
+        }
+
+        sort(x.get_number_of_elements(), x.begin(), r.begin(), isascending);
+    }
+
+    template EXPORTCPUCOREMATH void sort(const hoNDArray<float>& x, hoNDArray<float>& r, bool isascending);
+    template EXPORTCPUCOREMATH void sort(const hoNDArray<double>& x, hoNDArray<double>& r, bool isascending);
+
+// --------------------------------------------------------------------------------
+
+    template<typename T> void fill( hoNDArray<T>* x, T val)
+    {
+        size_t N = x->get_number_of_elements();
+        T* pX = x->begin();
+
+        long long n;
+        #pragma omp parallel for default(none) private(n) shared(N, pX, val) if (N>NumElementsUseThreading)
+        for ( n=0; n<(long long)N; n++ )
+        {
+            pX[n] = val;
+        }
+    }
+
+    template EXPORTCPUCOREMATH void fill( hoNDArray<float>* x, float val);
+    template EXPORTCPUCOREMATH void fill( hoNDArray<double>* x, double val);
+    template EXPORTCPUCOREMATH void fill( hoNDArray< std::complex<float> >* x,  std::complex<float>  val);
+    template EXPORTCPUCOREMATH void fill( hoNDArray< std::complex<double> >* x,  std::complex<double>  val);
+
+    // --------------------------------------------------------------------------------
+
+    template<typename T> void fill( hoNDArray<T>& x, T val )
+    {
+        Gadgetron::fill( &x, val);
+    }
+
+    template EXPORTCPUCOREMATH void fill( hoNDArray<float>& x, float val);
+    template EXPORTCPUCOREMATH void fill( hoNDArray<double>& x, double val);
+    template EXPORTCPUCOREMATH void fill( hoNDArray< std::complex<float> >& x,  std::complex<float>  val);
+    template EXPORTCPUCOREMATH void fill( hoNDArray< std::complex<double> >& x,  std::complex<double>  val);
+
+    // --------------------------------------------------------------------------------
+
+    inline void asum(size_t N, const float* x, float& r)
+    {
+        long long i;
+        float sum(0);
+        #pragma omp parallel for private(i) reduction(+:sum) if (N>NumElementsUseThreading)
+        for (i = 0; i < (long long)N; i++)
+        {
+            sum += GT_ABS(x[i]);
+        }
+
+        r = sum;
+    }
+
+    inline void asum(size_t N, const double* x, double& r)
+    {
+        long long i;
+        double sum(0);
+        #pragma omp parallel for private(i) reduction(+:sum) if (N>NumElementsUseThreading)
+        for (i = 0; i < (long long)N; i++)
+        {
+            sum += GT_ABS(x[i]);
+        }
+
+        r = sum;
+    }
+
+    inline void asum(size_t N, const  std::complex<float> * x, float& r)
+    {
+        long long i;
+        float sum(0);
+        #pragma omp parallel for private(i) reduction(+:sum) if (N>NumElementsUseThreading)
+        for (i = 0; i < (long long)N; i++)
+        {
+            const  std::complex<float> & c = x[i];
+            const float re = c.real();
+            const float im = c.imag();
+            sum += ( GT_ABS(re) + GT_ABS(im) );
+        }
+
+        r = sum;
+    }
+
+    inline void asum(size_t N, const  std::complex<double> * x, double& r)
+    {
+        long long i;
+        double sum(0);
+        #pragma omp parallel for private(i) reduction(+:sum) if (N>NumElementsUseThreading)
+        for (i = 0; i < (long long)N; i++)
+        {
+            const  std::complex<double> & c = x[i];
+            const double re = c.real();
+            const double im = c.imag();
+            sum += ( GT_ABS(re) + GT_ABS(im) );
+        }
+
+        r = sum;
+    }
+
+    template<class T> void asum(const hoNDArray<T>& x, typename realType<T>::Type& r)
+    {
+        asum(x.get_number_of_elements(), x.begin(), r);
+    }
+
+    template EXPORTCPUCOREMATH void asum( const hoNDArray<float>& x, float& r);
+    template EXPORTCPUCOREMATH void asum( const hoNDArray<double>& x, double& r);
+    template EXPORTCPUCOREMATH void asum( const hoNDArray< std::complex<float> >& x, float& r);
+    template EXPORTCPUCOREMATH void asum( const hoNDArray< std::complex<double> >& x, double& r);
+
+    template<class T> typename realType<T>::Type asum(const hoNDArray<T>& x)
+    {
+        typename realType<T>::Type r;
+        asum(x, r);
+        return r;
+    }
+
+    template EXPORTCPUCOREMATH float asum( const hoNDArray<float>& x);
+    template EXPORTCPUCOREMATH double asum( const hoNDArray<double>& x);
+    template EXPORTCPUCOREMATH float asum( const hoNDArray< std::complex<float> >& x);
+    template EXPORTCPUCOREMATH double asum( const hoNDArray< std::complex<double> >& x);
+
+    // --------------------------------------------------------------------------------
+
+    inline size_t amax(size_t N, const float* x)
+    {
+        lapack_int num = (lapack_int)(N);
+        lapack_int incx = 1;
+
+        return isamax_(&num, (float*)(x), &incx);
+    }
+
+    inline size_t amax(size_t N, const double* x)
+    {
+        lapack_int num = (lapack_int)(N);
+        lapack_int incx = 1;
+
+        return idamax_(&num, (double*)(x), &incx);
+    }
+
+    inline size_t amax(size_t N, const  std::complex<float> * x)
+    {
+        lapack_int num = (lapack_int)(N);
+        lapack_int incx = 1;
+
+        return icamax_(&num, (lapack_complex_float*)(x), &incx);
+    }
+
+    inline size_t amax(size_t N, const  std::complex<double> * x)
+    {
+        lapack_int num = (lapack_int)(N);
+        lapack_int incx = 1;
+
+        return izamax_(&num, (lapack_complex_double*)(x), &incx);
+    }
+
+    template<class T> size_t amax(const hoNDArray<T>& x)
+    {
+        return amax(x.get_number_of_elements(), x.begin());
+    }
+
+    template EXPORTCPUCOREMATH size_t amax( const hoNDArray<float>& x);
+    template EXPORTCPUCOREMATH size_t amax( const hoNDArray<double>& x);
+    template EXPORTCPUCOREMATH size_t amax( const hoNDArray< std::complex<float> >& x);
+    template EXPORTCPUCOREMATH size_t amax( const hoNDArray< std::complex<double> >& x);
+
+    // --------------------------------------------------------------------------------
+
+    template<class T> 
+    void real_imag_to_complex(const hoNDArray<typename realType<T>::Type>& real, const hoNDArray<typename realType<T>::Type>& imag, hoNDArray<T>& cplx)
+    {
+        try
+        {
+            GADGET_CHECK_THROW(real.dimensions_equal(&imag));
+
+            if ( !cplx.dimensions_equal(&real) )
+            {
+                cplx.create(real.get_dimensions());
+            }
+
+            T* pRes = cplx.begin();
+            const typename realType<T>::Type* pReal = real.begin();
+            const typename realType<T>::Type* pImag = imag.begin();
+
+            size_t N = real.get_number_of_elements();
+
+            long long n;
+            #pragma omp parallel for private(n) shared(N, pRes, pReal, pImag)
+            for ( n=0; n<(long long)N; n++ )
+            {
+                pRes[n] = T(pReal[n], pImag[n]);
+            }
+        }
+        catch(...)
+        {
+            GADGET_THROW("Errors in real_imag_to_complex(...) ... ");
+        }
+    }
+
+    template EXPORTCPUCOREMATH void real_imag_to_complex(const hoNDArray<float>& real, const hoNDArray<float>& imag, hoNDArray< std::complex<float> >& cplx);
+    template EXPORTCPUCOREMATH void real_imag_to_complex(const hoNDArray<double>& real, const hoNDArray<double>& imag, hoNDArray< std::complex<double> >& cplx);
+
+    // --------------------------------------------------------------------------------
+
+    template<class T> 
+    void complex_to_real_imag(const hoNDArray<T>& cplx, hoNDArray<typename realType<T>::Type>& real, hoNDArray<typename realType<T>::Type>& imag)
+    {
+        try
+        {
+            if ( !real.dimensions_equal(&cplx) )
+            {
+                real.create(cplx.get_dimensions());
+            }
+
+            if ( !imag.dimensions_equal(&cplx) )
+            {
+                imag.create(cplx.get_dimensions());
+            }
+
+            const T* pRes = cplx.begin();
+            typename realType<T>::Type* pReal = real.begin();
+            typename realType<T>::Type* pImag = imag.begin();
+
+            size_t N = real.get_number_of_elements();
+
+            long long n;
+            #pragma omp parallel for default(none) private(n) shared(N, pRes, pReal, pImag)
+            for ( n=0; n<(long long)N; n++ )
+            {
+                pReal[n] = pRes[n].real();
+                pImag[n] = pRes[n].imag();
+            }
+        }
+        catch(...)
+        {
+            GADGET_THROW("Errors in complex_to_real_imag(...) ... ");
+        }
+    }
+
+    template EXPORTCPUCOREMATH void complex_to_real_imag(const hoNDArray< std::complex<float> >& cplx, hoNDArray<float>& real, hoNDArray<float>& imag);
+    template EXPORTCPUCOREMATH void complex_to_real_imag(const hoNDArray< std::complex<double> >& cplx, hoNDArray<double>& real, hoNDArray<double>& imag);
+
+    template <> EXPORTCPUCOREMATH
+    void complex_to_real_imag(const hoNDArray<float>& cplx, hoNDArray<float>& real, hoNDArray<float>& imag)
+    {
+        try
+        {
+            if ( !real.dimensions_equal(&cplx) )
+            {
+                real.create(cplx.get_dimensions());
+            }
+
+            if ( !imag.dimensions_equal(&cplx) )
+            {
+                imag.create(cplx.get_dimensions());
+            }
+
+            const float* pRes = cplx.begin();
+            float* pReal = real.begin();
+            float* pImag = imag.begin();
+
+            size_t N = real.get_number_of_elements();
+
+            long long n;
+            #pragma omp parallel for default(none) private(n) shared(N, pRes, pReal, pImag)
+            for ( n=0; n<(long long)N; n++ )
+            {
+                pReal[n] = pRes[n];
+                pImag[n] = 0;
+            }
+        }
+        catch(...)
+        {
+            GADGET_THROW("Errors in complex_to_real_imag(...) ... ");
+        }
+    }
+
+    template<> EXPORTCPUCOREMATH 
+    void complex_to_real_imag(const hoNDArray<double>& cplx, hoNDArray<double>& real, hoNDArray<double>& imag)
+    {
+        try
+        {
+            if ( !real.dimensions_equal(&cplx) )
+            {
+                real.create(cplx.get_dimensions());
+            }
+
+            if ( !imag.dimensions_equal(&cplx) )
+            {
+                imag.create(cplx.get_dimensions());
+            }
+
+            const double* pRes = cplx.begin();
+            double* pReal = real.begin();
+            double* pImag = imag.begin();
+
+            size_t N = real.get_number_of_elements();
+
+            long long n;
+            #pragma omp parallel for default(none) private(n) shared(N, pRes, pReal, pImag)
+            for ( n=0; n<(long long)N; n++ )
+            {
+                pReal[n] = pRes[n];
+                pImag[n] = 0;
+            }
+        }
+        catch(...)
+        {
+            GADGET_THROW("Errors in complex_to_real_imag(...) ... ");
+        }
+    }
+
+    // --------------------------------------------------------------------------------
+
+    template<class T> 
+    void complex_to_real(const hoNDArray<T>& cplx, hoNDArray<typename realType<T>::Type>& real)
+    {
+        try
+        {
+            if ( !real.dimensions_equal(&cplx) )
+            {
+                real.create(cplx.get_dimensions());
+            }
+
+            const T* pRes = cplx.begin();
+            typename realType<T>::Type* pReal = real.begin();
+
+            size_t N = real.get_number_of_elements();
+
+            long long n;
+            #pragma omp parallel for default(none) private(n) shared(N, pRes, pReal)
+            for ( n=0; n<(long long)N; n++ )
+            {
+                pReal[n] = pRes[n].real();
+            }
+        }
+        catch(...)
+        {
+            GADGET_THROW("Errors in complex_to_real(...) ... ");
+        }
+    }
+
+    template EXPORTCPUCOREMATH void complex_to_real(const hoNDArray< std::complex<float> >& cplx, hoNDArray<float>& real);
+    template EXPORTCPUCOREMATH void complex_to_real(const hoNDArray< std::complex<double> >& cplx, hoNDArray<double>& real);
+
+    template<class T> 
+    void complex_to_real(const hoNDArray<T>& cplx, hoNDArray<T>& real)
+    {
+        try
+        {
+            if ( !real.dimensions_equal(&cplx) )
+            {
+                real.create(cplx.get_dimensions());
+            }
+
+            const T* pRes = cplx.begin();
+            T* pReal = real.begin();
+
+            size_t N = real.get_number_of_elements();
+
+            long long n;
+            #pragma omp parallel for private(n) shared(N, pRes, pReal)
+            for ( n=0; n<(long long)N; n++ )
+            {
+                pReal[n] = T(pRes[n].real(), 0);
+            }
+        }
+        catch(...)
+        {
+            GADGET_THROW("Errors in complex_to_real(...) ... ");
+        }
+    }
+
+    template EXPORTCPUCOREMATH void complex_to_real(const hoNDArray< std::complex<float> >& cplx, hoNDArray< std::complex<float> >& real);
+    template EXPORTCPUCOREMATH void complex_to_real(const hoNDArray< std::complex<double> >& cplx, hoNDArray< std::complex<double> >& real);
+
+    template<class T> 
+    void complex_to_real(hoNDArray<T>& cplx)
+    {
+        try
+        {
+            T* pRes = cplx.begin();
+
+            size_t N = cplx.get_number_of_elements();
+
+            long long n;
+            #pragma omp parallel for private(n) shared(N, pRes)
+            for ( n=0; n<(long long)N; n++ )
+            {
+                pRes[n] = T(pRes[n].real(), 0);
+            }
+        }
+        catch(...)
+        {
+            GADGET_THROW("Errors in complex_to_real(...) ... ");
+        }
+    }
+
+    template EXPORTCPUCOREMATH void complex_to_real(hoNDArray< std::complex<float> >& cplx);
+    template EXPORTCPUCOREMATH void complex_to_real(hoNDArray< std::complex<double> >& cplx);
+
+    // --------------------------------------------------------------------------------
+
+    template<class T> 
+    void complex_to_imag(const hoNDArray<T>& cplx, hoNDArray<typename realType<T>::Type>& imag)
+    {
+        try
+        {
+            if ( !imag.dimensions_equal(&cplx) )
+            {
+                imag.create(cplx.get_dimensions());
+            }
+
+            const T* pRes = cplx.begin();
+            typename realType<T>::Type* pImag = imag.begin();
+
+            size_t N = imag.get_number_of_elements();
+
+            long long n;
+            #pragma omp parallel for default(none) private(n) shared(N, pRes, pImag)
+            for ( n=0; n<(long long)N; n++ )
+            {
+                pImag[n] = pRes[n].imag();
+            }
+        }
+        catch(...)
+        {
+            GADGET_THROW("Errors in complex_to_imag(...) ... ");
+        }
+    }
+
+    template EXPORTCPUCOREMATH void complex_to_imag(const hoNDArray< std::complex<float> >& cplx, hoNDArray<float>& imag);
+    template EXPORTCPUCOREMATH void complex_to_imag(const hoNDArray< std::complex<double> >& cplx, hoNDArray<double>& imag);
+
+    template<class T> 
+    void complex_to_imag(const hoNDArray<T>& cplx, hoNDArray<T>& imag)
+    {
+        try
+        {
+            if ( !imag.dimensions_equal(&cplx) )
+            {
+                imag.create(cplx.get_dimensions());
+            }
+
+            const T* pRes = cplx.begin();
+            T* pImag = imag.begin();
+
+            size_t N = imag.get_number_of_elements();
+
+            long long n;
+            #pragma omp parallel for private(n) shared(N, pRes, pImag)
+            for ( n=0; n<(long long)N; n++ )
+            {
+                pImag[n] = T(0, pRes[n].imag());
+            }
+        }
+        catch(...)
+        {
+            GADGET_THROW("Errors in complex_to_imag(...) ... ");
+        }
+    }
+
+    template EXPORTCPUCOREMATH void complex_to_imag(const hoNDArray< std::complex<float> >& cplx, hoNDArray< std::complex<float> >& imag);
+    template EXPORTCPUCOREMATH void complex_to_imag(const hoNDArray< std::complex<double> >& cplx, hoNDArray< std::complex<double> >& imag);
+
+    template<class T> 
+    void complex_to_imag(hoNDArray<T>& cplx)
+    {
+        try
+        {
+            T* pRes = cplx.begin();
+
+            size_t N = cplx.get_number_of_elements();
+
+            long long n;
+            #pragma omp parallel for private(n) shared(N, pRes)
+            for ( n=0; n<(long long)N; n++ )
+            {
+                pRes[n] = T( pRes[n].real(), 0);
+            }
+        }
+        catch(...)
+        {
+            GADGET_THROW("Errors in complex_to_imag(...) ... ");
+        }
+    }
+
+    template EXPORTCPUCOREMATH void complex_to_imag(hoNDArray< std::complex<float> >& cplx);
+    template EXPORTCPUCOREMATH void complex_to_imag(hoNDArray< std::complex<double> >& cplx);
+
+    // --------------------------------------------------------------------------------
+
+    template<class T> 
+    void real_to_complex(const hoNDArray<typename realType<T>::Type>& real, hoNDArray<T>& cplx)
+    {
+        try
+        {
+            if ( !cplx.dimensions_equal(&real) )
+            {
+                cplx.create(real.get_dimensions());
+            }
+
+            const typename realType<T>::Type* pReal = real.begin();
+            T* pRes = cplx.begin();
+
+            size_t N = real.get_number_of_elements();
+
+            long long n;
+            #pragma omp parallel for private(n) shared(N, pRes, pReal)
+            for ( n=0; n<(long long)N; n++ )
+            {
+                pRes[n] = T(pReal[n], 0);
+            }
+        }
+        catch(...)
+        {
+            GADGET_THROW("Errors in real_to_complex(...) ... ");
+        }
+    }
+
+    template EXPORTCPUCOREMATH void real_to_complex(const hoNDArray< float >& real, hoNDArray< std::complex<float> >& cplx);
+    template EXPORTCPUCOREMATH void real_to_complex(const hoNDArray< double >& real, hoNDArray< std::complex<double> >& cplx);
+
+    // --------------------------------------------------------------------------------
+
+    template <class T>
+    void minValue(const hoNDArray<T>& a, T& v)
+    {
+        typedef T ValueType;
+
+        try
+        {
+            const ValueType* pA = a.begin();
+            size_t n = a.get_number_of_elements();
+            v = pA[0];
+
+            size_t ii;
+            for (ii=1; ii<n; ii++)
+            {
+                if (pA[ii]<v) v = pA[ii];
+            }
+        }
+        catch(...)
+        {
+            GADGET_THROW("Errors in minValue(const hoNDArray<T>& a, T& v) ... ");
+        }
+    }
+
+    template EXPORTCPUCOREMATH void minValue(const hoNDArray<float>& a, float& v);
+    template EXPORTCPUCOREMATH void minValue(const hoNDArray<double>& a, double& v);
+
+    template <class T>
+    void maxValue(const hoNDArray<T>& a, T& v)
+    {
+        typedef T ValueType;
+
+        try
+        {
+            const ValueType* pA = a.begin();
+            size_t n = a.get_number_of_elements();
+            v = pA[0];
+
+            size_t ii;
+            for (ii=1; ii<n; ii++)
+            {
+                if (pA[ii]>v) v = pA[ii];
+            }
+        }
+        catch(...)
+        {
+            GADGET_THROW("Errors in maxValue(const hoNDArray<T>& a, T& v) ... ");
+        }
+    }
+
+    template EXPORTCPUCOREMATH void maxValue(const hoNDArray<float>& a, float& v);
+    template EXPORTCPUCOREMATH void maxValue(const hoNDArray<double>& a, double& v);
+
+    // --------------------------------------------------------------------------------
+}
diff --git a/toolboxes/core/cpu/math/hoNDArray_math_util.h b/toolboxes/core/cpu/math/hoNDArray_math_util.h
new file mode 100644
index 0000000..64d7acd
--- /dev/null
+++ b/toolboxes/core/cpu/math/hoNDArray_math_util.h
@@ -0,0 +1,27 @@
+/** \file  hoNDArray_math_util.h
+    \brief math functions for hoNDArray and hoNDImage not using armadillo
+*/
+
+#pragma once
+
+#include "hoNDArray.h"
+#include "ho2DArray.h"
+#include "ho3DArray.h"
+#include "ho4DArray.h"
+#include "ho5DArray.h"
+#include "ho6DArray.h"
+#include "ho7DArray.h"
+#include "hoNDImage.h"
+
+#include "complext.h"
+#include "cpucore_math_export.h"
+#include "GadgetronCommon.h"
+#include <complex>
+
+namespace Gadgetron
+{
+
+    
+
+    
+}
diff --git a/toolboxes/core/cpu/math/hoNDArray_reductions.cpp b/toolboxes/core/cpu/math/hoNDArray_reductions.cpp
new file mode 100644
index 0000000..12c3a6d
--- /dev/null
+++ b/toolboxes/core/cpu/math/hoNDArray_reductions.cpp
@@ -0,0 +1,933 @@
+#include "hoNDArray_reductions.h"
+#include "hoArmadillo.h"
+
+#ifndef lapack_int
+    #define lapack_int int
+#endif // lapack_int
+
+#ifndef lapack_complex_float
+    #define lapack_complex_float  std::complex<float> 
+#endif // lapack_complex_float
+
+#ifndef lapack_complex_double
+    #define lapack_complex_double  std::complex<double> 
+#endif // #ifndef lapack_complex_double
+
+#define NumElementsUseThreading 64*1024
+
+//Declaration of BLAS and LAPACK routines
+extern "C"
+{
+    /// Finds the index of the element with the maximal absolute value.
+    lapack_int isamax_(lapack_int* N, float* x, lapack_int* incx);
+    lapack_int idamax_(lapack_int* N, double* x, lapack_int* incx);
+    lapack_int icamax_(lapack_int* N, lapack_complex_float* x, lapack_int* incx);
+    lapack_int izamax_(lapack_int* N, lapack_complex_double* x, lapack_int* incx);
+}
+
+namespace Gadgetron{
+
+    // --------------------------------------------------------------------------------
+
+    template<class REAL> REAL max(hoNDArray<REAL>* data){
+        return as_arma_col(data).max();
+    }
+
+    // --------------------------------------------------------------------------------
+
+    template<class REAL> REAL min(hoNDArray<REAL>* data){
+        return as_arma_col(data).min();
+    }
+
+    // --------------------------------------------------------------------------------
+
+    template<class T> T mean(hoNDArray<T>* data){
+        return (typename stdType<T>::Type) arma::mean(as_arma_col(data));
+    }
+
+    // --------------------------------------------------------------------------------
+
+    template<class T> T sum(hoNDArray<T>* data){
+        return (typename stdType<T>::Type) arma::sum(as_arma_col(data));
+    }
+
+    // --------------------------------------------------------------------------------
+
+    template<class T> T stddev(hoNDArray<T>* data){
+        return (typename stdType<T>::Type) arma::stddev(as_arma_col(data));
+    }
+
+    // --------------------------------------------------------------------------------
+
+    template<class T> T dot( hoNDArray<T> *x, hoNDArray<T> *y, bool cc )
+    {
+        if( x == 0x0 || y == 0x0 )
+            throw std::runtime_error("Gadgetron::dot(): Invalid input array");
+
+        if( x->get_number_of_elements() != y->get_number_of_elements() )
+            throw std::runtime_error("Gadgetron::dot(): Array sizes mismatch");
+
+        arma::Col<typename stdType<T>::Type> xM = as_arma_col(x);
+        arma::Col<typename stdType<T>::Type> yM = as_arma_col(y);
+        typename stdType<T>::Type res = (cc) ? arma::cdot(xM,yM) : arma::dot(xM,yM);
+        return *((T*)(&res));
+    }
+
+    // --------------------------------------------------------------------------------
+
+    inline void asum(size_t N, const float* x, float& r)
+    {
+        long long i;
+        float sum(0);
+        #pragma omp parallel for private(i) reduction(+:sum) if (N>NumElementsUseThreading)
+        for (i = 0; i < (long long)N; i++)
+        {
+            sum += GT_ABS(x[i]);
+        }
+
+        r = sum;
+    }
+
+    inline void asum(size_t N, const double* x, double& r)
+    {
+        long long i;
+        double sum(0);
+        #pragma omp parallel for private(i) reduction(+:sum) if (N>NumElementsUseThreading)
+        for (i = 0; i < (long long)N; i++)
+        {
+            sum += GT_ABS(x[i]);
+        }
+
+        r = sum;
+    }
+
+    inline void asum(size_t N, const  std::complex<float> * x, float& r)
+    {
+        long long i;
+        float sum(0);
+        #pragma omp parallel for private(i) reduction(+:sum) if (N>NumElementsUseThreading)
+        for (i = 0; i < (long long)N; i++)
+        {
+            const  std::complex<float> & c = x[i];
+            const float re = c.real();
+            const float im = c.imag();
+            sum += ( GT_ABS(re) + GT_ABS(im) );
+        }
+
+        r = sum;
+    }
+
+    inline void asum(size_t N, const  std::complex<double> * x, double& r)
+    {
+        long long i;
+        double sum(0);
+        #pragma omp parallel for private(i) reduction(+:sum) if (N>NumElementsUseThreading)
+        for (i = 0; i < (long long)N; i++)
+        {
+            const  std::complex<double> & c = x[i];
+            const double re = c.real();
+            const double im = c.imag();
+            sum += ( GT_ABS(re) + GT_ABS(im) );
+        }
+
+        r = sum;
+    }
+
+    template<class T> void asum(const hoNDArray<T>& x, typename realType<T>::Type& r)
+    {
+        asum(x.get_number_of_elements(), x.begin(), r);
+    }
+
+    template EXPORTCPUCOREMATH void asum( const hoNDArray<float>& x, float& r);
+    template EXPORTCPUCOREMATH void asum( const hoNDArray<double>& x, double& r);
+    template EXPORTCPUCOREMATH void asum( const hoNDArray< std::complex<float> >& x, float& r);
+    template EXPORTCPUCOREMATH void asum( const hoNDArray< std::complex<double> >& x, double& r);
+
+    template<class T> typename realType<T>::Type asum(const hoNDArray<T>& x)
+    {
+        typename realType<T>::Type r;
+        asum(x, r);
+        return r;
+    }
+
+    template EXPORTCPUCOREMATH float asum( const hoNDArray<float>& x);
+    template EXPORTCPUCOREMATH double asum( const hoNDArray<double>& x);
+    template EXPORTCPUCOREMATH float asum( const hoNDArray< std::complex<float> >& x);
+    template EXPORTCPUCOREMATH double asum( const hoNDArray< std::complex<double> >& x);
+
+    template<class T> typename realType<T>::Type asum( hoNDArray<T> *x )
+    {
+        if( x == 0x0 )
+            throw std::runtime_error("Gadgetron::asum(): Invalid input array");
+
+        typedef typename realType<T>::Type realT;
+        arma::Col<typename stdType<T>::Type> xM = as_arma_col(x);
+        return realT(arma::norm(xM,1));
+    }
+
+    template<class T> T asum( hoNDArray< std::complex<T> > *x )
+    {
+        if( x == 0x0 )
+            throw std::runtime_error("Gadgetron::asum(): Invalid input array");
+
+        return arma::norm(arma::abs(real(as_arma_col(x)))+arma::abs(imag(as_arma_col(x))),1);
+    }
+
+    template<class T> T asum( hoNDArray< complext<T> > *x )
+    {
+        if( x == 0x0 )
+            throw std::runtime_error("Gadgetron::asum(): Invalid input array");
+
+        return arma::norm(arma::abs(real(as_arma_col(x)))+arma::abs(imag(as_arma_col(x))),1);
+    }
+
+    // --------------------------------------------------------------------------------
+
+    template <typename T> inline 
+    void norm1(size_t N, const T* x, typename realType<T>::Type& r)
+    {
+        long long n;
+
+        typename realType<T>::Type norm1Sum(0);
+
+        #pragma omp parallel for private(n) reduction(+:norm1Sum) if (N>NumElementsUseThreading)
+        for (n=0; n<(long long)N; n++)
+        {
+            const T& c = x[n];
+            norm1Sum += GT_ABS(c);
+        }
+
+        r = norm1Sum;
+    }
+
+    inline void norm1(size_t N, const  std::complex<float> * x, float& r)
+    {
+        long long i;
+        float sum = 0.0f;
+        #pragma omp parallel for private(i) reduction(+:sum) if (N>NumElementsUseThreading)
+        for (i = 0; i < (long long)N; i++)
+        {
+            const std::complex<float>& c = x[i];
+            const float re = c.real();
+            const float im = c.imag();
+            sum += std::sqrt( (re*re) + (im * im) );
+        }
+
+        r = sum;
+    }
+
+    inline void norm1(size_t N, const  complext<float> * x, float& r)
+    {
+        norm1(N, (std::complex<float> *)x, r);
+    }
+
+    inline void norm1(size_t N, const  std::complex<double> * x, double& r)
+    {
+        long long i;
+        double sum = 0.0;
+        #pragma omp parallel for private(i) reduction(+:sum) if (N>NumElementsUseThreading)
+        for (i = 0; i < (long long)N; i++)
+        {
+            const std::complex<double>& c = x[i];
+            const double re = c.real();
+            const double im = c.imag();
+            sum += std::sqrt( (re*re) + (im * im) );
+        }
+
+        r = sum;
+    }
+
+    inline void norm1(size_t N, const  complext<double> * x, double& r)
+    {
+        norm1(N, (std::complex<double> *)x, r);
+    }
+
+    template <typename T> 
+    void norm1(const hoNDArray<T>& x, typename realType<T>::Type& r)
+    {
+        norm1(x.get_number_of_elements(), x.begin(), r);
+    }
+
+    template EXPORTCPUCOREMATH void norm1(const hoNDArray<float>& x, float& r);
+    template EXPORTCPUCOREMATH void norm1(const hoNDArray<double>& x, double& r);
+    template EXPORTCPUCOREMATH void norm1(const hoNDArray< std::complex<float> >& x, float& r);
+    template EXPORTCPUCOREMATH void norm1(const hoNDArray< complext<float> >& x, float& r);
+    template EXPORTCPUCOREMATH void norm1(const hoNDArray< std::complex<double> >& x, double& r);
+    template EXPORTCPUCOREMATH void norm1(const hoNDArray< complext<double> >& x, double& r);
+
+    template <typename T> inline 
+    typename realType<T>::Type norm1(const hoNDArray<T>& x)
+    {
+        typename realType<T>::Type r;
+        norm1(x, r);
+        return r;
+    }
+
+    template EXPORTCPUCOREMATH float norm1(const hoNDArray<float>& x);
+    template EXPORTCPUCOREMATH double norm1(const hoNDArray<double>& x);
+    template EXPORTCPUCOREMATH float norm1(const hoNDArray< std::complex<float> >& x);
+    template EXPORTCPUCOREMATH float norm1(const hoNDArray< complext<float> >& x);
+    template EXPORTCPUCOREMATH double norm1(const hoNDArray< std::complex<double> >& x);
+    template EXPORTCPUCOREMATH double norm1(const hoNDArray< complext<double> >& x);
+
+    template<class T> typename realType<T>::Type nrm1( hoNDArray<T> *x )
+    {
+        if( x == 0x0 )
+            throw std::runtime_error("Gadgetron::nrm2(): Invalid input array");
+
+        /*typedef typename realType<T>::Type realT;
+        arma::Col<typename stdType<T>::Type> xM = as_arma_col(x);
+        return realT(arma::norm(xM,1));*/
+
+        return norm1(*x);
+    }
+
+    // --------------------------------------------------------------------------------
+
+    inline void norm2(size_t N, const float* x, float& r)
+    {
+        long long i;
+
+        float sum(0);
+
+        #pragma omp parallel for private(i) reduction(+:sum) if (N>NumElementsUseThreading)
+        for (i = 0; i < (long long)N; i++)
+        {
+            const float& re = x[i];
+            sum += ( re*re );
+        }
+
+        r = std::sqrt(sum);
+    }
+
+    inline void norm2(size_t N, const double* x, double& r)
+    {
+        long long i;
+
+        double sum(0);
+
+        #pragma omp parallel for private(i) reduction(+:sum) if (N>NumElementsUseThreading)
+        for (i = 0; i < (long long)N; i++)
+        {
+            const double& re = x[i];
+            sum += ( re*re );
+        }
+
+        r = std::sqrt(sum);
+    }
+
+    inline void norm2(size_t N, const  std::complex<float> * x, float& r)
+    {
+        long long i;
+
+        float sum(0);
+
+        #pragma omp parallel for private(i) reduction(+:sum) if (N>NumElementsUseThreading)
+        for (i = 0; i < (long long)N; i++)
+        {
+            const std::complex<float>& c = x[i];
+            const float re = c.real();
+            const float im = c.imag();
+            sum += ( (re*re) + (im * im) );
+        }
+
+        r = std::sqrt(sum);
+    }
+
+    inline void norm2(size_t N, const  complext<float> * x, float& r)
+    {
+        norm2(N, (std::complex<float> *)x, r);
+    }
+
+    inline void norm2(size_t N, const  std::complex<double> * x, double& r)
+    {
+        long long i;
+
+        double sum(0);
+
+        #pragma omp parallel for private(i) reduction(+:sum) if (N>NumElementsUseThreading)
+        for (i = 0; i < (long long)N; i++)
+        {
+            const std::complex<double>& c = x[i];
+            const double re = c.real();
+            const double im = c.imag();
+            sum += ( (re*re) + (im * im) );
+        }
+
+        r = std::sqrt(sum);
+    }
+
+    inline void norm2(size_t N, const  complext<double> * x, double& r)
+    {
+        norm2(N, (std::complex<double> *)x, r);
+    }
+
+    template <typename T> 
+    void norm2(const hoNDArray<T>& x, typename realType<T>::Type& r)
+    {
+        norm2(x.get_number_of_elements(), x.begin(), r);
+    }
+
+    template EXPORTCPUCOREMATH void norm2(const hoNDArray<float>& x, float& r);
+    template EXPORTCPUCOREMATH void norm2(const hoNDArray<double>& x, double& r);
+    template EXPORTCPUCOREMATH void norm2(const hoNDArray< std::complex<float> >& x, float& r);
+    template EXPORTCPUCOREMATH void norm2(const hoNDArray< complext<float> >& x, float& r);
+    template EXPORTCPUCOREMATH void norm2(const hoNDArray< std::complex<double> >& x, double& r);
+    template EXPORTCPUCOREMATH void norm2(const hoNDArray< complext<double> >& x, double& r);
+
+    template <typename T> inline 
+    typename realType<T>::Type norm2(const hoNDArray<T>& x)
+    {
+        typename realType<T>::Type r;
+        norm2(x, r);
+        return r;
+    }
+
+    template EXPORTCPUCOREMATH float norm2(const hoNDArray<float>& x);
+    template EXPORTCPUCOREMATH double norm2(const hoNDArray<double>& x);
+    template EXPORTCPUCOREMATH float norm2(const hoNDArray< std::complex<float> >& x);
+    template EXPORTCPUCOREMATH float norm2(const hoNDArray< complext<float> >& x);
+    template EXPORTCPUCOREMATH double norm2(const hoNDArray< std::complex<double> >& x);
+    template EXPORTCPUCOREMATH double norm2(const hoNDArray< complext<double> >& x);
+
+    template<class T> typename realType<T>::Type nrm2( hoNDArray<T> *x )
+    {
+        if( x == 0x0 )
+            throw std::runtime_error("Gadgetron::nrm2(): Invalid input array");
+
+        /*typedef typename realType<T>::Type realT;
+        arma::Col<typename stdType<T>::Type> xM = as_arma_col(x);
+        return realT(arma::norm(xM,2));*/
+
+        return norm2(*x);
+    }
+
+    // --------------------------------------------------------------------------------
+
+    template <typename T> 
+    void minAbsolute(const hoNDArray<T>& x, T& r, size_t& ind)
+    {
+        size_t N = x.get_number_of_elements();
+        const T* pX = x.begin();
+
+        ind = 0;
+        if ( N == 0 ) return;
+
+        long long n;
+
+        typename realType<T>::Type v = abs(pX[0]);
+        typename realType<T>::Type v2;
+
+        ind = 0;
+        for ( n=1; n<(long long)N; n++ )
+        {
+            v2 = std::abs(pX[n]);
+            if ( v2 < v )
+            {
+                v = v2;
+                ind = n;
+            }
+        }
+
+        r = pX[ind];
+    }
+
+    template EXPORTCPUCOREMATH void minAbsolute(const hoNDArray<float>& x, float& r, size_t& ind);
+    template EXPORTCPUCOREMATH void minAbsolute(const hoNDArray<double>& x, double& r, size_t& ind);
+    template EXPORTCPUCOREMATH void minAbsolute(const hoNDArray< std::complex<float> >& x,  std::complex<float> & r, size_t& ind);
+    template EXPORTCPUCOREMATH void minAbsolute(const hoNDArray< std::complex<double> >& x,  std::complex<double> & r, size_t& ind);
+
+    template<class T> size_t amin( hoNDArray<T> *x )
+    {
+        if( x == 0x0 )
+            throw std::runtime_error("Gadgetron::amin(): Invalid input array");
+
+        typedef typename realType<T>::Type realT;
+        arma::Col<realT> xM = arma::abs(as_arma_col(x));
+        arma::uword idx;
+        realT min = xM.min(idx);
+        return idx;
+    }
+
+    template<class T> size_t amin( hoNDArray< std::complex<T> > *x )
+    {
+        if( x == 0x0 )
+            throw std::runtime_error("Gadgetron::amin(): Invalid input array");
+
+        arma::Col<T> xM = arma::abs(real(as_arma_col(x)))+arma::abs(imag(as_arma_col(x)));
+        arma::uword idx;
+        T min = xM.min(idx);
+        return idx;
+    }
+
+    template<class T> size_t amin( hoNDArray< complext<T> > *x )
+    {
+        if( x == 0x0 )
+            throw std::runtime_error("Gadgetron::amin(): Invalid input array");
+
+        arma::Col<T> xM = arma::abs(real(as_arma_col(x)))+arma::abs(imag(as_arma_col(x)));
+        arma::uword idx;
+        T min = xM.min(idx);
+        return idx;
+    }
+
+    // --------------------------------------------------------------------------------
+
+    template <typename T> 
+    void maxAbsolute(const hoNDArray<T>& x, T& r, size_t& ind)
+    {
+        size_t N = x.get_number_of_elements();
+        const T* pX = x.begin();
+
+        ind = 0;
+        if ( N == 0 ) return;
+
+        long long n;
+
+        typename realType<T>::Type v = abs(pX[0]);
+        typename realType<T>::Type v2;
+
+        ind = 0;
+        for ( n=1; n<(long long)N; n++ )
+        {
+            v2 = std::abs(pX[n]);
+            if ( v2 > v )
+            {
+                v = v2;
+                ind = n;
+            }
+        }
+
+        r = pX[ind];
+    }
+
+    template EXPORTCPUCOREMATH void maxAbsolute(const hoNDArray<float>& x, float& r, size_t& ind);
+    template EXPORTCPUCOREMATH void maxAbsolute(const hoNDArray<double>& x, double& r, size_t& ind);
+    template EXPORTCPUCOREMATH void maxAbsolute(const hoNDArray< std::complex<float> >& x,  std::complex<float> & r, size_t& ind);
+    template EXPORTCPUCOREMATH void maxAbsolute(const hoNDArray< std::complex<double> >& x,  std::complex<double> & r, size_t& ind);
+
+    // --------------------------------------------------------------------------------
+
+    inline size_t amax(size_t N, const float* x)
+    {
+        lapack_int num = (lapack_int)(N);
+        lapack_int incx = 1;
+
+        return isamax_(&num, (float*)(x), &incx);
+    }
+
+    inline size_t amax(size_t N, const double* x)
+    {
+        lapack_int num = (lapack_int)(N);
+        lapack_int incx = 1;
+
+        return idamax_(&num, (double*)(x), &incx);
+    }
+
+    inline size_t amax(size_t N, const  std::complex<float> * x)
+    {
+        lapack_int num = (lapack_int)(N);
+        lapack_int incx = 1;
+
+        return icamax_(&num, (lapack_complex_float*)(x), &incx);
+    }
+
+    inline size_t amax(size_t N, const  std::complex<double> * x)
+    {
+        lapack_int num = (lapack_int)(N);
+        lapack_int incx = 1;
+
+        return izamax_(&num, (lapack_complex_double*)(x), &incx);
+    }
+
+    template<class T> size_t amax(const hoNDArray<T>& x)
+    {
+        return amax(x.get_number_of_elements(), x.begin());
+    }
+
+    template EXPORTCPUCOREMATH size_t amax( const hoNDArray<float>& x);
+    template EXPORTCPUCOREMATH size_t amax( const hoNDArray<double>& x);
+    template EXPORTCPUCOREMATH size_t amax( const hoNDArray< std::complex<float> >& x);
+    template EXPORTCPUCOREMATH size_t amax( const hoNDArray< std::complex<double> >& x);
+
+    template<class T> size_t amax( hoNDArray<T> *x )
+    {
+        if( x == 0x0 )
+            throw std::runtime_error("Gadgetron::amax(): Invalid input array");
+
+        typedef typename realType<T>::Type realT;
+        arma::Col<realT> xM = arma::abs(as_arma_col(x));
+        arma::uword idx;
+        realT max = xM.max(idx);
+        return idx;
+    }
+
+    template<class T> size_t amax( hoNDArray< std::complex<T> > *x )
+    {
+        if( x == 0x0 )
+            throw std::runtime_error("Gadgetron::amax(): Invalid input array");
+
+        arma::Col<T> xM = arma::abs(real(as_arma_col(x)))+arma::abs(imag(as_arma_col(x)));
+        arma::uword idx;
+        T max = xM.max(idx);
+        return idx;
+    }
+
+    template<class T> size_t amax( hoNDArray< complext<T> > *x )
+    {
+        if( x == 0x0 )
+            throw std::runtime_error("Gadgetron::amax(): Invalid input array");
+
+        arma::Col<T> xM = arma::abs(real(as_arma_col(x)))+arma::abs(imag(as_arma_col(x)));
+        arma::uword idx;
+        T max = xM.max(idx);
+        return idx;
+    }
+
+    // --------------------------------------------------------------------------------
+
+    template EXPORTCPUCOREMATH float max(hoNDArray<float>*);
+    template EXPORTCPUCOREMATH float min(hoNDArray<float>*);
+    template EXPORTCPUCOREMATH float mean(hoNDArray<float>*);
+    template EXPORTCPUCOREMATH float sum(hoNDArray<float>*);
+    template EXPORTCPUCOREMATH float stddev(hoNDArray<float>*);
+
+    template EXPORTCPUCOREMATH double max(hoNDArray<double>*);
+    template EXPORTCPUCOREMATH double min(hoNDArray<double>*);
+    template EXPORTCPUCOREMATH double mean(hoNDArray<double>*);
+    template EXPORTCPUCOREMATH double sum(hoNDArray<double>*);
+    template EXPORTCPUCOREMATH double stddev(hoNDArray<double>*);
+
+    template EXPORTCPUCOREMATH complext<double> mean(hoNDArray<complext<double> >*);
+    template EXPORTCPUCOREMATH complext<double> sum(hoNDArray<complext<double> >*);
+
+    template EXPORTCPUCOREMATH complext<float> mean(hoNDArray<complext<float> >*);
+    template EXPORTCPUCOREMATH complext<float> sum(hoNDArray<complext<float> >*);
+
+
+    template EXPORTCPUCOREMATH float dot<float>( hoNDArray<float>*, hoNDArray<float>*, bool );
+    template EXPORTCPUCOREMATH float asum<float>( hoNDArray<float>* );
+    template EXPORTCPUCOREMATH float nrm2<float>( hoNDArray<float>* );
+
+    template EXPORTCPUCOREMATH size_t amin<float>( hoNDArray<float>* );
+    template EXPORTCPUCOREMATH size_t amax<float>( hoNDArray<float>* );
+
+    template EXPORTCPUCOREMATH double dot<double>( hoNDArray<double>*, hoNDArray<double>*, bool );
+    template EXPORTCPUCOREMATH double asum<double>( hoNDArray<double>* );
+    template EXPORTCPUCOREMATH double nrm2<double>( hoNDArray<double>* );
+
+    template EXPORTCPUCOREMATH size_t amin<double>( hoNDArray<double>* );
+    template EXPORTCPUCOREMATH size_t amax<double>( hoNDArray<double>* );
+
+    template EXPORTCPUCOREMATH std::complex<float> dot< std::complex<float> >( hoNDArray< std::complex<float> >*, hoNDArray< std::complex<float> >*, bool );
+    template EXPORTCPUCOREMATH float asum<float>( hoNDArray< std::complex<float> >* );
+    template EXPORTCPUCOREMATH float nrm2< std::complex<float> >( hoNDArray< std::complex<float> >* );
+    template EXPORTCPUCOREMATH float nrm1< std::complex<float> >( hoNDArray< std::complex<float> >* );
+    template EXPORTCPUCOREMATH size_t amin<float>( hoNDArray< std::complex<float> >* );
+    template EXPORTCPUCOREMATH size_t amax<float>( hoNDArray< std::complex<float> >* );
+
+    template EXPORTCPUCOREMATH std::complex<double> dot< std::complex<double> >( hoNDArray< std::complex<double> >*, hoNDArray< std::complex<double> >*, bool );
+    template EXPORTCPUCOREMATH double asum<double>( hoNDArray< std::complex<double> >* );
+    template EXPORTCPUCOREMATH double nrm2< std::complex<double> >( hoNDArray< std::complex<double> >* );
+    template EXPORTCPUCOREMATH double nrm1< std::complex<double> >( hoNDArray< std::complex<double> >* );
+    template EXPORTCPUCOREMATH size_t amin<double>( hoNDArray< std::complex<double> >* );
+    template EXPORTCPUCOREMATH size_t amax<double>( hoNDArray< std::complex<double> >* );
+
+    template EXPORTCPUCOREMATH complext<float> dot< complext<float> >( hoNDArray< complext<float> >*, hoNDArray< complext<float> >*, bool );
+    template EXPORTCPUCOREMATH float asum<float>( hoNDArray< complext<float> >* );
+    template EXPORTCPUCOREMATH float nrm2< complext<float> >( hoNDArray< complext<float> >* );
+    template EXPORTCPUCOREMATH float nrm1< complext<float> >( hoNDArray< complext<float> >* );
+    template EXPORTCPUCOREMATH size_t amin<float>( hoNDArray< complext<float> >* );
+    template EXPORTCPUCOREMATH size_t amax<float>( hoNDArray< complext<float> >* );
+
+    template EXPORTCPUCOREMATH complext<double> dot< complext<double> >( hoNDArray< complext<double> >*, hoNDArray< complext<double> >*, bool );
+    template EXPORTCPUCOREMATH double asum<double>( hoNDArray< complext<double> >* );
+    template EXPORTCPUCOREMATH double nrm2< complext<double> >( hoNDArray< complext<double> >* );
+    template EXPORTCPUCOREMATH size_t amin<double>( hoNDArray< complext<double> >* );
+    template EXPORTCPUCOREMATH size_t amax<double>( hoNDArray< complext<double> >* );
+
+    // --------------------------------------------------------------------------------
+
+    inline void dotc(size_t N, const  std::complex<float> * x, const  std::complex<float> * y,  std::complex<float> & r)
+    {
+        long long n;
+
+        float sum(0);
+
+        float sa(0), sb(0);
+
+        #pragma omp parallel for private(n) reduction(+:sa) if (N>NumElementsUseThreading)
+        for (n = 0; n < (long long)N; n++)
+        {
+            const float a = x[n].real();
+            const float b = x[n].imag();
+            const float c = y[n].real();
+            const float d = y[n].imag();
+
+            sa += (a*c + b*d);
+            sb += (c*b - a*d);
+        }
+
+        reinterpret_cast<float(&)[2]>(r)[0] = sa;
+        reinterpret_cast<float(&)[2]>(r)[1] = sb;
+    }
+
+    inline void dotc(size_t N, const  std::complex<double> * x, const  std::complex<double> * y,  std::complex<double> & r)
+    {
+        long long n;
+
+        double sum(0);
+
+        double sa(0), sb(0);
+
+        #pragma omp parallel for private(n) reduction(+:sa) if (N>NumElementsUseThreading)
+        for (n = 0; n < (long long)N; n++)
+        {
+            const double a = x[n].real();
+            const double b = x[n].imag();
+            const double c = y[n].real();
+            const double d = y[n].imag();
+
+            sa += (a*c + b*d);
+            sb += (c*b - a*d);
+        }
+
+        reinterpret_cast<double(&)[2]>(r)[0] = sa;
+        reinterpret_cast<double(&)[2]>(r)[1] = sb;
+    }
+
+    template <typename T> 
+    void dotc(const hoNDArray<T>& x, const hoNDArray<T>& y, T& r)
+    {
+        GADGET_DEBUG_CHECK_THROW(x.get_number_of_elements()==y.get_number_of_elements());
+        dotc(x.get_number_of_elements(), x.begin(), y.begin(), r);
+    }
+
+    template EXPORTCPUCOREMATH void dotc(const hoNDArray< std::complex<float> >& x, const hoNDArray< std::complex<float> >& y,  std::complex<float> & r);
+    template EXPORTCPUCOREMATH void dotc(const hoNDArray< std::complex<double> >& x, const hoNDArray< std::complex<double> >& y,  std::complex<double> & r);
+
+    template <typename T> 
+    T dotc(const hoNDArray<T>& x, const hoNDArray<T>& y)
+    {
+        T r;
+        dotc(x, y, r);
+        return r;
+    }
+
+    template EXPORTCPUCOREMATH std::complex<float> dotc(const hoNDArray< std::complex<float> >& x, const hoNDArray< std::complex<float> >& y);
+    template EXPORTCPUCOREMATH std::complex<double> dotc(const hoNDArray< std::complex<double> >& x, const hoNDArray< std::complex<double> >& y);
+
+    // --------------------------------------------------------------------------------
+
+    inline void dotu(size_t N, const float* x, const float* y, float& r)
+    {
+        long long n;
+
+        float res(0);
+
+        #pragma omp parallel for private(n) reduction(+:res) if (N>NumElementsUseThreading)
+        for (n=0; n<(long long)N; n++)
+        {
+            res += x[n]*y[n];
+        }
+
+        r = res;
+    }
+
+    inline void dotu(size_t N, const double* x, const double* y, double& r)
+    {
+        long long n;
+
+        double res(0);
+
+        #pragma omp parallel for private(n) reduction(+:res) if (N>NumElementsUseThreading)
+        for (n=0; n<(long long)N; n++)
+        {
+            res += x[n]*y[n];
+        }
+
+        r = res;
+    }
+
+    inline void dotu(size_t N, const  std::complex<float> * x, const  std::complex<float> * y,  std::complex<float> & r)
+    {
+        long long n;
+
+         std::complex<float>  sum(0);
+
+        float sa(0), sb(0);
+        #pragma omp parallel for private(n) reduction(+:sa) reduction(+:sb) if (N>NumElementsUseThreading)
+        for (n = 0; n < (long long)N; n++)
+        {
+            const float a = x[n].real();
+            const float b = x[n].imag();
+            const float c = y[n].real();
+            const float d = y[n].imag();
+
+            sa += (a*c - b*d);
+            sb += (c*b + a*d);
+        }
+
+        reinterpret_cast<float(&)[2]>(r)[0] = sa;
+        reinterpret_cast<float(&)[2]>(r)[1] = sb;
+    }
+
+    inline void dotu(size_t N, const  std::complex<double> * x, const  std::complex<double> * y,  std::complex<double> & r)
+    {
+        long long n;
+
+         std::complex<double>  sum(0);
+
+        double sa(0), sb(0);
+        #pragma omp parallel for private(n) reduction(+:sa) reduction(+:sb) if (N>NumElementsUseThreading)
+        for (n = 0; n < (long long)N; n++)
+        {
+            const double a = x[n].real();
+            const double b = x[n].imag();
+            const double c = y[n].real();
+            const double d = y[n].imag();
+
+            sa += (a*c - b*d);
+            sb += (c*b + a*d);
+        }
+
+        reinterpret_cast<double(&)[2]>(r)[0] = sa;
+        reinterpret_cast<double(&)[2]>(r)[1] = sb;
+    }
+
+    template <typename T> 
+    void dotu(const hoNDArray<T>& x, const hoNDArray<T>& y, T& r)
+    {
+        GADGET_DEBUG_CHECK_THROW(x.get_number_of_elements()==y.get_number_of_elements());
+        dotu(x.get_number_of_elements(), x.begin(), y.begin(), r);
+    }
+
+    template EXPORTCPUCOREMATH void dotu(const hoNDArray<float>& x, const hoNDArray<float>& y, float& r);
+    template EXPORTCPUCOREMATH void dotu(const hoNDArray<double>& x, const hoNDArray<double>& y, double& r);
+    template EXPORTCPUCOREMATH void dotu(const hoNDArray< std::complex<float> >& x, const hoNDArray< std::complex<float> >& y, std::complex<float>& r);
+    template EXPORTCPUCOREMATH void dotu(const hoNDArray< std::complex<double> >& x, const hoNDArray< std::complex<double> >& y, std::complex<double>& r);
+
+    template <typename T> 
+    T dotu(const hoNDArray<T>& x, const hoNDArray<T>& y)
+    {
+        T r = 0;
+        dotu(x, y, r);
+        return r;
+    }
+
+    template EXPORTCPUCOREMATH float dotu(const hoNDArray<float>& x, const hoNDArray<float>& y);
+    template EXPORTCPUCOREMATH double dotu(const hoNDArray<double>& x, const hoNDArray<double>& y);
+    template EXPORTCPUCOREMATH  std::complex<float>  dotu(const hoNDArray< std::complex<float> >& x, const hoNDArray< std::complex<float> >& y);
+    template EXPORTCPUCOREMATH  std::complex<double>  dotu(const hoNDArray< std::complex<double> >& x, const hoNDArray< std::complex<double> >& y);
+
+    // --------------------------------------------------------------------------------
+
+    template <typename T> 
+    struct hoCompAscending
+    {
+        bool operator() (T a, T b) { return (a>=b); }
+    };
+
+    template <typename T> 
+    struct hoCompDescending
+    {
+        bool operator() (T a, T b) { return (a<b); }
+    };
+
+    template <typename T> 
+    void sort(size_t N, const T* x, T* r, bool isascending)
+    {
+        if ( r != x )
+        {
+            memcpy(r, x, sizeof(T)*N);
+        }
+
+        if ( isascending )
+        {
+            hoCompAscending<T> obj;
+            std::sort(r, r+N, obj);
+        }
+        else
+        {
+            hoCompDescending<T> obj;
+            std::sort(r, r+N, obj);
+        }
+    }
+
+    template <typename T> 
+    void sort(const hoNDArray<T>& x, hoNDArray<T>& r, bool isascending)
+    {
+        if ( &r != &x )
+        {
+            if ( r.get_number_of_elements()!=x.get_number_of_elements())
+            {
+                r = x;
+            }
+            else
+            {
+                memcpy(r.begin(), x.begin(), x.get_number_of_bytes());
+            }
+        }
+
+        sort(x.get_number_of_elements(), x.begin(), r.begin(), isascending);
+    }
+
+    template EXPORTCPUCOREMATH void sort(const hoNDArray<float>& x, hoNDArray<float>& r, bool isascending);
+    template EXPORTCPUCOREMATH void sort(const hoNDArray<double>& x, hoNDArray<double>& r, bool isascending);
+
+    // --------------------------------------------------------------------------------
+
+    template <class T>
+    void minValue(const hoNDArray<T>& a, T& v)
+    {
+        typedef T ValueType;
+
+        try
+        {
+            const ValueType* pA = a.begin();
+            size_t n = a.get_number_of_elements();
+            v = pA[0];
+
+            size_t ii;
+            for (ii=1; ii<n; ii++)
+            {
+                if (pA[ii]<v) v = pA[ii];
+            }
+        }
+        catch(...)
+        {
+            GADGET_THROW("Errors in minValue(const hoNDArray<T>& a, T& v) ... ");
+        }
+    }
+
+    template EXPORTCPUCOREMATH void minValue(const hoNDArray<float>& a, float& v);
+    template EXPORTCPUCOREMATH void minValue(const hoNDArray<double>& a, double& v);
+
+    template <class T>
+    void maxValue(const hoNDArray<T>& a, T& v)
+    {
+        typedef T ValueType;
+
+        try
+        {
+            const ValueType* pA = a.begin();
+            size_t n = a.get_number_of_elements();
+            v = pA[0];
+
+            size_t ii;
+            for (ii=1; ii<n; ii++)
+            {
+                if (pA[ii]>v) v = pA[ii];
+            }
+        }
+        catch(...)
+        {
+            GADGET_THROW("Errors in maxValue(const hoNDArray<T>& a, T& v) ... ");
+        }
+    }
+
+    template EXPORTCPUCOREMATH void maxValue(const hoNDArray<float>& a, float& v);
+    template EXPORTCPUCOREMATH void maxValue(const hoNDArray<double>& a, double& v);
+
+    // --------------------------------------------------------------------------------
+}
diff --git a/toolboxes/core/cpu/math/hoNDArray_reductions.h b/toolboxes/core/cpu/math/hoNDArray_reductions.h
new file mode 100644
index 0000000..639623d
--- /dev/null
+++ b/toolboxes/core/cpu/math/hoNDArray_reductions.h
@@ -0,0 +1,203 @@
+#pragma once
+
+#include "hoNDArray.h"
+#include "cpucore_math_export.h"
+
+#ifdef max
+    #undef max
+#endif // max
+
+#ifdef min
+    #undef min
+#endif // min
+
+namespace Gadgetron{
+
+    /***
+    * Finds the maximum element of the array
+    */
+    template<class REAL> EXPORTCPUCOREMATH REAL max(hoNDArray<REAL>* data);
+
+    /***
+    * Finds the minimum element of the array
+    */
+    template<class REAL> EXPORTCPUCOREMATH REAL min(hoNDArray<REAL>* data);
+
+    /***
+    * Finds the mean of the array
+    */
+    template<class T> EXPORTCPUCOREMATH T mean(hoNDArray<T>* data);
+
+    /***
+    * Calculates the sum of the array
+    */
+    template<class T> EXPORTCPUCOREMATH T sum(hoNDArray<T>* data);
+
+    /***
+    * Calculates the std of the array
+    */
+    template<class T> EXPORTCPUCOREMATH T stddev(hoNDArray<T>* data);
+
+    /**
+    * @brief Calculates the dot product of two arrays (as vectors).
+    * @param[in] x Array 1. For complex arrays the complex conjugate of x is used.
+    * @param[in] y Array 2.
+    * @param[in] cc Specifies whether to use the complex conjugate of x (when applicable).
+    * @return The dot product of x and y
+    */
+    template<class T> EXPORTCPUCOREMATH T dot( hoNDArray<T> *x, hoNDArray<T> *y, bool cc = true );
+
+    /**
+    * @brief Calculates the sum of the l1-norms of the array entries
+    * @param[in] arr Input array
+    * @return The l1-norm of the array
+    */
+    template<class T> EXPORTCPUCOREMATH typename realType<T>::Type asum( hoNDArray<T> *x );
+    template<class T> EXPORTCPUCOREMATH void asum(const hoNDArray<T>& x, typename realType<T>::Type& r);
+    template<class T> EXPORTCPUCOREMATH typename realType<T>::Type asum(const hoNDArray<T>& x);
+
+    /**
+    * @brief Calculates the sum of the l1-norms of the array entries
+    * @param[in] arr Input array
+    * @return The l1-norm of the array
+    */
+    template<class T> EXPORTCPUCOREMATH T asum( hoNDArray< std::complex<T> > *x );
+
+    /**
+    * @brief Calculates the sum of the l1-norms of the array entries
+    * @param[in] arr Input array
+    * @return The l1-norm of the array
+    */
+    template<class T> EXPORTCPUCOREMATH T asum( hoNDArray< complext<T> > *x );
+
+    /**
+    * @brief Calculates the l2-norm of the array (as a vector)
+    * @param[in] arr Input array
+    * @return The l2-norm of the array
+    */
+    template<class T> EXPORTCPUCOREMATH typename realType<T>::Type nrm2( hoNDArray<T> *x );
+
+    /**
+    * @brief Calculates the l1-norm of the array (as a vector)
+    * @param[in] arr Input array
+    * @return The l1-norm of the array
+    */
+    template<class T> EXPORTCPUCOREMATH typename realType<T>::Type nrm1( hoNDArray<T> *x );
+
+    /**
+    * @brief Returns the index of the array element with the smallest absolute value (l1 norm)
+    * @param[in] x Input data
+    * @return The array index corresponding to the smallest element in the array (0-indexing)
+    */
+    template<class T> EXPORTCPUCOREMATH size_t amin( hoNDArray<T> *x );
+
+    /**
+    * @brief Returns the index of the array element with the smallest absolute value (l1 norm)
+    * @param[in] x Input data
+    * @return The array index corresponding to the smallest element in the array (0-indexing)
+    */
+    template<class T> EXPORTCPUCOREMATH size_t amin( hoNDArray< std::complex<T> > *x );
+
+    /**
+    * @brief Returns the index of the array element with the smallest absolute value (l1 norm)
+    * @param[in] x Input data
+    * @return The array index corresponding to the smallest element in the array (0-indexing)
+    */
+    template<class T> EXPORTCPUCOREMATH size_t amin( hoNDArray< complext<T> > *x );
+
+    /**
+    * @brief Returns the index of the array element with the largest absolute value (l1-norm)
+    * @param[in] x Input data
+    * @return The array index corresponding to the largest element in the array (0-indexing)
+    */
+    template<class T> EXPORTCPUCOREMATH size_t amax( hoNDArray<T> *x );
+
+    /**
+    * @brief Returns the index of the array element with the largest absolute value (l1-norm)
+    * @param[in] x Input data
+    * @return The array index corresponding to the largest element in the array (0-indexing)
+    */
+    template<class T> EXPORTCPUCOREMATH size_t amax( hoNDArray< std::complex<T> > *x );
+
+    /**
+    * @brief Returns the index of the array element with the largest absolute value (l1-norm)
+    * @param[in] x Input data
+    * @return The array index corresponding to the largest element in the array (0-indexing)
+    */
+    template<class T> EXPORTCPUCOREMATH size_t amax( hoNDArray< complext<T> > *x );
+
+    /**
+    * @brief ind = min(abs(x(:))
+    find the minimal absolute value of x and its position index ind
+    r = x[ind], not abs(x[ind])
+    */
+    template <typename T> EXPORTCPUCOREMATH 
+    void minAbsolute(const hoNDArray<T>& x, T& r, size_t& ind);
+
+    /**
+    * @brief ind = max(abs(x(:))
+    find the miximal absolute value of x and its position index ind
+    r = x[ind], not abs(x[ind])
+    */
+    template <typename T> EXPORTCPUCOREMATH 
+    void maxAbsolute(const hoNDArray<T>& x, T& r, size_t& ind);
+
+    /**
+    * @brief r = norm(x(:), 2)
+    compute L2 norm of x
+    */
+    template <typename T> EXPORTCPUCOREMATH 
+    void norm2(const hoNDArray<T>& x, typename realType<T>::Type& r);
+
+    template <typename T> EXPORTCPUCOREMATH 
+    typename realType<T>::Type norm2(const hoNDArray<T>& x);
+
+    /**
+    * @brief r = norm(x(:), 1)
+    compute L1 norm of x = sum( abs(x(:) )
+    */
+    template <typename T> EXPORTCPUCOREMATH 
+    void norm1(const hoNDArray<T>& x, typename realType<T>::Type& r);
+
+    template <typename T> EXPORTCPUCOREMATH 
+    typename realType<T>::Type norm1(const hoNDArray<T>& x);
+
+    /**
+    * @brief dot product of conj(x) and y
+    r = conj(x) dot y
+    */
+    template <typename T> EXPORTCPUCOREMATH 
+    void dotc(const hoNDArray<T>& x, const hoNDArray<T>& y, T& r);
+
+    template <typename T> EXPORTCPUCOREMATH 
+    T dotc(const hoNDArray<T>& x, const hoNDArray<T>& y);
+
+    /**
+    * @brief dot product of x and y
+    r = x dot y
+    */
+    template <typename T> EXPORTCPUCOREMATH 
+    void dotu(const hoNDArray<T>& x, const hoNDArray<T>& y, T& r);
+
+    template <typename T> EXPORTCPUCOREMATH 
+    T dotu(const hoNDArray<T>& x, const hoNDArray<T>& y);
+
+    /**
+    * @brief sort the ND array
+    */
+    template <typename T> EXPORTCPUCOREMATH void sort(const hoNDArray<T>& x, hoNDArray<T>& r, bool isascending);
+
+    /**
+    * @brief finds the index of the element with the maximal absolute value.
+    */
+    template<class T> EXPORTCPUCOREMATH size_t amax(const hoNDArray<T>& x);
+
+    /**
+    * @brief get the min and max value from an array (only for float and double type)
+    */
+    template <class T> EXPORTCPUCOREMATH 
+    void minValue(const hoNDArray<T>& a, T& v);
+
+    template <class T> EXPORTCPUCOREMATH 
+    void maxValue(const hoNDArray<T>& a, T& v);
+}
diff --git a/toolboxes/core/cpu/math/hoNDImage_util.cpp b/toolboxes/core/cpu/math/hoNDImage_util.cpp
new file mode 100644
index 0000000..1801791
--- /dev/null
+++ b/toolboxes/core/cpu/math/hoNDImage_util.cpp
@@ -0,0 +1,877 @@
+/** \file   hoNDImage_util.hxx
+    \brief  operations on the hoNDImage class.
+*/
+
+#include "hoNDImage_util.h"
+#include "hoNDBoundaryHandler.h"
+
+namespace Gadgetron
+{
+
+template<class T, unsigned int D> 
+bool gradient(const hoNDImage<T, D>& x, hoNDImage<T, D> gx[])
+{
+    try
+    {
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            if ( !gx[ii].dimensions_equal(x) )
+            {
+                gx[ii] = x;
+            }
+        }
+
+        if ( D == 1 )
+        {
+            long long sx = (long long)x.get_size(0);
+            const T* pX = x.begin();
+            T* pGx = gx[0].begin();
+
+            long long x;
+
+            // #pragma omp parallel for default(none) private(x) shared(sx, pX, pGx)
+            for ( x=1; x<sx-1; x++ )
+            {
+                pGx[x] = pX[x+1] - pX[x-1];
+            }
+
+            pGx[0] = pX[1] - pX[0];
+            pGx[sx-1] = pX[sx-1] - pX[sx-2];
+        }
+        else if ( D == 2 )
+        {
+            long long sx = (long long)x.get_size(0);
+            long long sy = (long long)x.get_size(1);
+
+            const T* pX = x.begin();
+            T* pGx = gx[0].begin();
+            T* pGy = gx[1].begin();
+
+            long long x, y;
+
+            // #pragma omp parallel for default(none) private(x, y) shared(sx, sy, pX, pGx, pGy)
+            for ( y=1; y<sy-1; y++ )
+            {
+                for ( x=1; x<sx-1; x++ )
+                {
+                    size_t offset = x + y*sx;
+
+                    pGx[offset] = pX[offset+1] - pX[offset-1];
+                    pGy[offset] = pX[offset+sx] - pX[offset-sx];
+                }
+            }
+
+            // #pragma omp parallel for default(none) private(x) shared(sx, sy, pX, pGx, pGy)
+            for ( x=1; x<sx-1; x++ )
+            {
+                pGx[x] = pX[x+1] - pX[x-1];
+
+                size_t offset = x + (sy-1)*sx;
+                pGx[offset] = pX[offset+1] - pX[offset-1];
+
+                pGy[x] = pX[x+sx] - pX[x];
+                pGy[x + (sy-1)*sx] = pX[x + (sy-1)*sx] - pX[x + (sy-2)*sx];
+            }
+
+            // #pragma omp parallel for default(none) private(y) shared(sx, sy, pX, pGx, pGy)
+            for ( y=1; y<sy-1; y++ )
+            {
+                size_t offset = y*sx;
+                pGy[offset] = pX[offset+sx] - pX[offset-sx];
+
+                pGx[offset] = pX[offset+1] - pX[offset];
+
+                offset = sx-1 + y*sx;
+                pGy[offset] = pX[offset+sx] - pX[offset-sx];
+
+                pGx[offset] = pX[offset] - pX[offset-1];
+            }
+
+            pGx[0] = pX[1]-pX[0];
+            pGx[sx-1] = pX[sx-1]-pX[sx-2];
+            pGx[(sy-1)*sx] = pX[(sy-1)*sx+1]-pX[(sy-1)*sx];
+            pGx[sx*sy-1] = pX[sx*sy-1]-pX[sx*sy-2];
+
+            pGy[0] = pX[sx]-pX[0];
+            pGy[sx-1] = pX[2*sx-1]-pX[sx-1];
+            pGy[(sy-1)*sx] = pX[(sy-1)*sx] - pX[(sy-2)*sx];
+            pGy[sx*sy-1] = pX[sx*sy-1] - pX[sx*sy-1-sx];
+        }
+        else if ( D == 3 )
+        {
+            long long sx = (long long)x.get_size(0);
+            long long sy = (long long)x.get_size(1);
+            long long sz = (long long)x.get_size(2);
+
+            const T* pX = x.begin();
+            T* pGx = gx[0].begin();
+            T* pGy = gx[1].begin();
+            T* pGz = gx[2].begin();
+
+            long long x, y, z;
+
+            #pragma omp parallel default(none) private(x, y, z) shared(sx, sy, sz, pX, pGx, pGy, pGz)
+            {
+                long long z_positive, z_negative, y_positive, y_negative;
+                size_t offset, offset_z_positive, offset_z_negative, offset_y_positive, offset_y_negative;
+
+                #pragma omp for 
+                for ( z=0; z<sz; z++ )
+                {
+                    z_positive = z+1;
+                    z_positive = (z_positive==sz) ? sz-1 : z_positive;
+
+                    z_negative = z-1;
+                    z_negative = (z_negative==-1) ? 0 : z_negative;
+
+                    for ( y=0; y<sy; y++ )
+                    {
+
+                        y_positive = y+1;
+                        y_positive = (y_positive==sy) ? sy-1 : y_positive;
+
+                        y_negative = y-1;
+                        y_negative = (y_negative==-1) ? 0 : y_negative;
+
+                        offset = y*sx + z*sx*sy;
+
+                        offset_z_positive = y*sx + z_positive*sx*sy;
+                        offset_z_negative = y*sx + z_negative*sx*sy;
+
+                        offset_y_positive = y_positive*sx + z*sx*sy;
+                        offset_y_negative = y_negative*sx + z*sx*sy;
+
+                        for ( x=1; x<sx-1; x++ )
+                        {
+                            pGx[offset+x] = pX[offset+x+1] - pX[offset+x-1];
+                            pGy[offset+x] = pX[offset_y_positive+x] - pX[offset_y_negative+x];
+                            pGz[offset+x] = pX[offset_z_positive+x] - pX[offset_z_negative+x];
+                        }
+
+                        // x = 0
+                        pGx[offset] = pX[offset+1] - pX[offset];
+                        pGy[offset] = pX[offset_y_positive] - pX[offset_y_negative];
+                        pGz[offset] = pX[offset_z_positive] - pX[offset_z_negative];
+
+                        // x = sx-1
+                        pGx[offset+sx-1] = pX[offset+sx-1] - pX[offset+sx-2];
+                        pGy[offset+sx-1] = pX[offset_y_positive+sx-1] - pX[offset_y_negative+sx-1];
+                        pGz[offset+sx-1] = pX[offset_z_positive+sx-1] - pX[offset_z_negative+sx-1];
+                    }
+                }
+            }
+        }
+        else
+        {
+            size_t N = x.get_number_of_elements();
+
+            long long n;
+
+            std::vector<size_t> dim(D);
+            x.get_dimensions(dim);
+
+            #pragma omp parallel default(none) private(n) shared(N, dim, x, gx)
+            {
+                size_t ind[D];
+                size_t ind_positive[D];
+                size_t ind_negative[D];
+                bool inside = true;
+                unsigned int ii;
+
+                #pragma omp for 
+                for ( n=0; n<(long long)N; n++ )
+                {
+                    x.calculate_index(n, ind);
+
+                    inside = true;
+                    for ( ii=0; ii<D; ii++ )
+                    {
+                        if ( ind[ii]==0 || ind[ii]==dim[ii]-1 )
+                        {
+                            inside = false;
+                            break;
+                        }
+                    }
+
+                    if ( inside )
+                    {
+                        for ( ii=0; ii<D; ii++ )
+                        {
+                            memcpy(ind_positive, ind, sizeof(size_t)*D);
+                            memcpy(ind_negative, ind, sizeof(size_t)*D);
+
+                            ind_positive[ii] = ind[ii] + 1;
+                            ind_negative[ii] = ind[ii] - 1;
+
+                            gx[ii](n) = x(ind_positive) - x(ind_negative);
+                        }
+                    }
+                    else
+                    {
+                        for ( ii=0; ii<D; ii++ )
+                        {
+                            memcpy(ind_positive, ind, sizeof(size_t)*D);
+                            memcpy(ind_negative, ind, sizeof(size_t)*D);
+
+                            ind_positive[ii] = ind[ii] + 1;
+                            ind_positive[ii] = (ind_positive[ii]==dim[ii]) ? dim[ii]-1 : ind_positive[ii];
+
+                            ind_negative[ii] = ind[ii] - 1;
+                            ind_negative[ii] = (ind_negative[ii]==-1) ? 0 : ind_negative[ii];
+
+                            gx[ii](n) = x(ind_positive) - x(ind_negative);
+                        }
+                    }
+                }
+            }
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Errors happened in gradient(const hoNDImage<T, D>& x, hoNDImage<T, D> gx[D]) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T> 
+bool gaussianKernel(T sigma, double kerWidthInUnitOfSigma, double deltaKer, hoNDArray<T>& ker)
+{
+    try
+    {
+        long long N  =  (long long)(2*std::ceil(kerWidthInUnitOfSigma*sigma/deltaKer) + 1);
+
+        ker.create(N);
+
+        T kerSum = 0;
+
+        T D = (T)( (deltaKer*deltaKer)/(2*sigma*sigma) );
+
+        long long ii;
+        for ( ii=-N/2; ii<=N/2; ii++ )
+        {
+            ker(ii+N/2) = exp( -(ii*ii*D) );
+            kerSum += ker(ii+N/2);
+        }
+
+        T GNorm = (T)(1/std::sqrt(2*3.141592653579*sigma*sigma));
+        GNorm /= kerSum;
+
+        Gadgetron::scal(GNorm, ker);
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Errors happened in gaussianKernel(T sigma, double kerWidthInUnitOfSigma, double deltaKer, hoNDArray<T>& ker) ... ");
+        return false;
+    }
+    return true;
+}
+
+// As well-know in the computer vision, the gaussian filter is implemented as the DERICHE filter
+// therefore, the computation cost is independent from the sigma
+// [1] Deriche, R., 1992, Recursively implementing the Gaussian and its derivatives: Proceedings of the 2nd International Conference on Image Processing, Singapore, p. 263�267.
+// [2] http://en.wikipedia.org/wiki/Deriche_edge_detector gives details about this filter
+// this implementation is based on this webpage
+
+template <class T, class T2>
+inline void DericheSmoothing(T* pData, size_t N, T* mem, T2 sigma, size_t offset=0)
+{
+    typedef typename realType<T>::Type real_type;
+
+    if ( sigma < 1e-6 ) sigma = (T2)(1e-6);
+
+    // following the note of http://en.wikipedia.org/wiki/Deriche_edge_detector
+
+    real_type alpha = (real_type)(1.4105/sigma); // this value 1.4105 is from equation 37 of ref [1]
+    real_type e_alpha = (real_type)( exp( (double)(-alpha) ) );
+    real_type e_alpha_sqr = e_alpha*e_alpha;
+    real_type k = ( (1-e_alpha)*(1-e_alpha) ) / ( 1 + 2*alpha*e_alpha - e_alpha_sqr );
+
+    real_type a1 = k;
+    real_type a2 = k * e_alpha * (alpha-1);
+    real_type a3 = k * e_alpha * (alpha+1);
+    real_type a4 = -k * e_alpha_sqr;
+
+    real_type b1 = 2 * e_alpha;
+    real_type b2 = -e_alpha_sqr;
+
+    // compute the left to right filtering and the right to left filtering
+    // for the speed, just use the zero boundary condition
+    // TODO: try out other boundary conditions
+    T* forward = mem;
+    T* reverse = mem + N;
+
+    if ( offset == 0 )
+    {
+        forward[0] = a1 * pData[0];
+        reverse[N-1] = 0;
+
+        size_t ii;
+
+        if ( N > 1 )
+        {
+            forward[1] = a1 * pData[1] + a2*pData[0] + b1 * forward[0];
+            reverse[N-2] = a3 * pData[N-1] + b1 * reverse[N-1];
+
+            for ( ii=2; ii<N; ii++ )
+            {
+                forward[ii] = (a1*pData[ii] + a2*pData[ii-1]) + (b1*forward[ii-1] + b2*forward[ii-2]);
+                reverse[N-1-ii] = (a3*pData[N-ii] + a4*pData[N-ii+1]) + (b1*reverse[N-ii] + b2*reverse[N-ii+1]);
+            }
+        }
+
+        // Gadgetron::math::add(N, forward, reverse, pData);
+
+        for ( ii=0; ii<N; ii++ )
+        {
+            pData[ii] = forward[ii] + reverse[ii];
+        }
+    }
+    else
+    {
+        forward[0] = a1 * pData[0];
+        reverse[N-1] = 0;
+
+        if ( N > 1 )
+        {
+            forward[1] = a1 * pData[offset] + a2*pData[0] + b1 * forward[0];
+            reverse[N-2] = a3 * pData[(N-1)*offset] + b1 * reverse[N-1];
+
+            size_t ii;
+            for ( ii=2; ii<N; ii++ )
+            {
+                forward[ii] = (a1*pData[ii*offset] + a2*pData[(ii-1)*offset]) + (b1*forward[ii-1] + b2*forward[ii-2]);
+                reverse[N-1-ii] = (a3*pData[(N-ii)*offset] + a4*pData[(N-ii+1)*offset]) + (b1*reverse[N-ii] + b2*reverse[N-ii+1]);
+            }
+
+            for ( ii=0; ii<N; ii++ )
+            {
+                pData[ii*offset] = forward[ii] + reverse[ii];
+            }
+        }
+    }
+}
+
+template<class ArrayType, class T2> 
+bool filterGaussian(ArrayType& img, T2 sigma[], typename ArrayType::value_type* mem)
+{
+    try
+    {
+        typedef typename ArrayType::value_type T;
+
+        size_t D = img.get_number_of_dimensions();
+
+        if ( D == 1 )
+        {
+            if ( sigma[0] > 0 )
+            {
+                size_t sx = img.get_size(0);
+
+                bool allocate = false;
+                if ( mem == NULL )
+                {
+                    mem = new T[2*sx];
+                    allocate = true;
+                }
+
+                Gadgetron::DericheSmoothing(img.begin(), sx, mem, sigma[0]);
+
+                if ( allocate ) delete [] mem;
+            }
+        }
+        else if ( D == 2 )
+        {
+            long long sx = (long long)img.get_size(0);
+            long long sy = (long long)img.get_size(1);
+
+            T* pData = img.begin();
+
+            long long x, y;
+
+            if ( mem != NULL )
+            {
+                if ( sigma[0] > 0 )
+                {
+                    // filter along x
+                    {
+                        for ( y=0; y<sy; y++ )
+                        {
+                            Gadgetron::DericheSmoothing(pData+y*sx, sx, mem, sigma[0]);
+                        }
+                    }
+                }
+
+                if ( sigma[1] > 0 )
+                {
+                    // filter along y
+                    {
+                        for ( x=0; x<sx; x++ )
+                        {
+                            Gadgetron::DericheSmoothing(pData+x, sy, mem, sigma[1], sx);
+                        }
+                    }
+                }
+            }
+            else
+            {
+                if ( sigma[0] > 0 )
+                {
+                    // filter along x
+                    // #pragma omp parallel default(none) private(y) shared(sx, sy, pData, sigma)
+                    {
+                        T* mem = new T[2*sx];
+
+                        // #pragma omp for 
+                        for ( y=0; y<sy; y++ )
+                        {
+                            Gadgetron::DericheSmoothing(pData+y*sx, sx, mem, sigma[0]);
+                        }
+
+                        delete [] mem;
+                    }
+                }
+
+                if ( sigma[1] > 0 )
+                {
+                    // filter along y
+                    //#pragma omp parallel default(none) private(x) shared(sx, sy, pData, sigma)
+                    {
+                        T* mem = new T[2*sy];
+
+                        // #pragma omp for 
+                        for ( x=0; x<sx; x++ )
+                        {
+                            Gadgetron::DericheSmoothing(pData+x, sy, mem, sigma[1], sx);
+                        }
+
+                        delete [] mem;
+                    }
+                }
+            }
+        }
+        else if ( D == 3 )
+        {
+            long long sx = (long long)img.get_size(0);
+            long long sy = (long long)img.get_size(1);
+            long long sz = (long long)img.get_size(2);
+
+            T* pData = img.begin();
+
+            long long x, y, z;
+
+            if ( sigma[0] > 0 )
+            {
+                // filter along x
+                #pragma omp parallel default(none) private(y, z) shared(sx, sy, sz, pData, sigma)
+                {
+                    T* mem = new T[2*sx];
+
+                    #pragma omp for 
+                    for ( z=0; z<sz; z++ )
+                    {
+                        for ( y=0; y<sy; y++ )
+                        {
+                            Gadgetron::DericheSmoothing(pData+y*sx+z*sx*sy, sx, mem, sigma[0]);
+                        }
+                    }
+
+                    delete [] mem;
+                }
+            }
+
+            if ( sigma[1] > 0 )
+            {
+                // filter along y
+                #pragma omp parallel default(none) private(x, y, z) shared(sx, sy, sz, pData, sigma)
+                {
+                    T* buf = new T[3*sy];
+                    T* mem = buf + sy;
+
+                    #pragma omp for 
+                    for ( z=0; z<sz; z++ )
+                    {
+                        for ( x=0; x<sx; x++ )
+                        {
+                            size_t offset = x + z*sx*sy;
+
+                            for ( y=0; y<sy; y++ )
+                            {
+                                buf[y] = pData[offset + y*sx];
+                            }
+
+                            Gadgetron::DericheSmoothing(buf, sy, mem, sigma[1]);
+
+                            for ( y=0; y<sy; y++ )
+                            {
+                                pData[offset + y*sx] = buf[y];
+                            }
+                        }
+                    }
+
+                    delete [] buf;
+                }
+            }
+
+            if ( sigma[2] > 0 )
+            {
+                // filter along z
+                #pragma omp parallel default(none) private(x, y, z) shared(sx, sy, sz, pData, sigma)
+                {
+                    T* buf = new T[3*sz];
+                    T* mem = buf + sz;
+
+                    #pragma omp for 
+                    for ( y=0; y<sy; y++ )
+                    {
+                        for ( x=0; x<sx; x++ )
+                        {
+                            size_t offset = x + y*sx;
+
+                            for ( z=0; z<sz; z++ )
+                            {
+                                buf[z] = pData[offset + z*sx*sy];
+                            }
+
+                            Gadgetron::DericheSmoothing(buf, sz, mem, sigma[2]);
+
+                            for ( z=0; z<sz; z++ )
+                            {
+                                pData[offset + z*sx*sy] = buf[z];
+                            }
+                        }
+                    }
+
+                    delete [] buf;
+                }
+            }
+        }
+        else if ( D == 4 )
+        {
+            long long sx = (long long)img.get_size(0);
+            long long sy = (long long)img.get_size(1);
+            long long sz = (long long)img.get_size(2);
+            long long st = (long long)img.get_size(3);
+
+            T* pData = img.begin();
+
+            long long x, y, z, t;
+
+            if ( sigma[0] > 0 )
+            {
+                // filter along x
+                #pragma omp parallel default(none) private(y, z, t) shared(sx, sy, sz, st, pData, sigma)
+                {
+                    T* mem = new T[2*sx];
+
+                    #pragma omp for 
+                    for ( t=0; t<st; t++ )
+                    {
+                        for ( z=0; z<sz; z++ )
+                        {
+                            for ( y=0; y<sy; y++ )
+                            {
+                                Gadgetron::DericheSmoothing(pData+y*sx+z*sx*sy+t*sx*sy*sz, sx, mem, sigma[0]);
+                            }
+                        }
+                    }
+
+                    delete [] mem;
+                }
+            }
+
+            if ( sigma[1] > 0 )
+            {
+                // filter along y
+                #pragma omp parallel default(none) private(x, y, z, t) shared(sx, sy, sz, st, pData, sigma)
+                {
+                    T* buf = new T[3*sy];
+                    T* mem = buf + sy;
+
+                    #pragma omp for 
+                    for ( t=0; t<st; t++ )
+                    {
+                        for ( z=0; z<sz; z++ )
+                        {
+                            for ( x=0; x<sx; x++ )
+                            {
+                                size_t offset = x + z*sx*sy + t*sx*sy*sz;
+
+                                for ( y=0; y<sy; y++ )
+                                {
+                                    buf[y] = pData[offset + y*sx];
+                                }
+
+                                Gadgetron::DericheSmoothing(buf, sy, mem, sigma[1]);
+
+                                for ( y=0; y<sy; y++ )
+                                {
+                                    pData[offset + y*sx] = buf[y];
+                                }
+                            }
+                        }
+                    }
+
+                    delete [] buf;
+                }
+            }
+
+            if ( sigma[2] > 0 )
+            {
+                // filter along z
+                #pragma omp parallel default(none) private(x, y, z, t) shared(sx, sy, sz, st, pData, sigma)
+                {
+                    T* buf = new T[3*sz];
+                    T* mem = buf + sz;
+
+                    #pragma omp for 
+                    for ( t=0; t<st; t++ )
+                    {
+                        for ( y=0; y<sy; y++ )
+                        {
+                            for ( x=0; x<sx; x++ )
+                            {
+                                size_t offset = x + y*sx + t*sx*sy*sz;
+
+                                for ( z=0; z<sz; z++ )
+                                {
+                                    buf[z] = pData[offset + z*sx*sy];
+                                }
+
+                                Gadgetron::DericheSmoothing(buf, sz, mem, sigma[2]);
+
+                                for ( z=0; z<sz; z++ )
+                                {
+                                    pData[offset + z*sx*sy] = buf[z];
+                                }
+                            }
+                        }
+                    }
+
+                    delete [] buf;
+                }
+            }
+
+            if ( sigma[3] > 0 )
+            {
+                // filter along t
+                #pragma omp parallel default(none) private(x, y, z, t) shared(sx, sy, sz, st, pData, sigma)
+                {
+                    T* buf = new T[3*st];
+                    T* mem = buf + st;
+
+                    #pragma omp for 
+                    for ( z=0; z<sz; z++ )
+                    {
+                        for ( y=0; y<sy; y++ )
+                        {
+                            for ( x=0; x<sx; x++ )
+                            {
+                                size_t offset = x + y*sx + z*sx*sy;
+
+                                for ( t=0; t<st; t++ )
+                                {
+                                    buf[t] = pData[offset + t*sx*sy*sz];
+                                }
+
+                                Gadgetron::DericheSmoothing(buf, st, mem, sigma[3]);
+
+                                for ( t=0; t<st; t++ )
+                                {
+                                    pData[offset + t*sx*sy*sz] = buf[t];
+                                }
+                            }
+                        }
+                    }
+
+                    delete [] buf;
+                }
+            }
+        }
+        else
+        {
+            std::vector<long long> dim(D);
+
+            unsigned int ii;
+            for ( ii=0; ii<D; ii++ )
+            {
+                dim[ii] = (long long)img.get_size(ii);
+            }
+
+            T* pData = img.begin();
+
+            long long N = (long long)img.get_number_of_elements();
+
+            std::vector<size_t> offsetFactor(D);
+            img.get_offset_factor(offsetFactor);
+
+            // filter along every dimension
+            for ( ii=0; ii<D; ii++ )
+            {
+                if ( sigma[ii] > 0 )
+                {
+                    long long num = N/dim[ii];
+
+                    long long n;
+
+                    if ( ii == 0 )
+                    {
+                        #pragma omp parallel default(none) private(n) shared(num, dim, pData, sigma)
+                        {
+                            T* mem = new T[ 2*dim[0] ];
+
+                            #pragma omp for 
+                            for ( n=0; n<num; n++ )
+                            {
+                                Gadgetron::DericheSmoothing(pData+n*dim[0], dim[0], mem, sigma[0]);
+                            }
+
+                            delete [] mem;
+                        }
+                    }
+                    else
+                    {
+                        std::vector<size_t> dimCurr(D-1);
+
+                        unsigned int jj;
+                        for ( jj=0; jj<D; jj++ )
+                        {
+                            if ( jj < ii )
+                            {
+                                dimCurr[jj] = dim[jj];
+                            }
+
+                            if ( jj > ii )
+                            {
+                                dimCurr[jj-1] = dim[jj];
+                            }
+                        }
+
+                        std::vector<size_t> offsetFactorCurr(D-1);
+                        NDArray<T>::calculate_offset_factors(dimCurr, offsetFactorCurr);
+
+                        #pragma omp parallel default(none) private(n) shared(D, num, dim, img, pData, sigma, ii, offsetFactor, offsetFactorCurr)
+                        {
+                            T* buf = new T[ 3*dim[ii] ];
+                            T* mem = buf + dim[ii];
+
+                            std::vector<size_t> ind(D);
+                            std::vector<size_t> indCurr(D-1);
+
+                            std::vector<size_t> offset(dim[ii]);
+
+                            #pragma omp for 
+                            for ( n=0; n<num; n++ )
+                            {
+                                NDArray<T>::calculate_index(n, offsetFactorCurr, indCurr);
+
+                                unsigned int jj;
+                                for ( jj=0; jj<D; jj++ )
+                                {
+                                    if ( jj < ii )
+                                    {
+                                        ind[jj] = indCurr[jj];
+                                    }
+
+                                    if ( jj > ii )
+                                    {
+                                        ind[jj] = indCurr[jj-1];
+                                    }
+                                }
+
+                                ind[ii] = 0;
+                                offset[0] = img.calculate_offset(ind);
+                                buf[0] = pData[ offset[0] ];
+
+                                long long d;
+                                for ( d=1; d<dim[ii]; d++ )
+                                {
+                                    offset[d] = offset[d-1] + offsetFactor[ii];
+                                    buf[d] = pData[ offset[d] ];
+                                }
+
+                                Gadgetron::DericheSmoothing(buf, dim[ii], mem, sigma[ii]);
+
+                                for ( d=0; d<dim[ii]; d++ )
+                                {
+                                    pData[ offset[d] ] = buf[d];
+                                }
+                            }
+
+                            delete [] buf;
+                        }
+                    }
+                }
+            }
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Errors happened in filterGaussian(const hoNDImage<T, D>& x, T sigma[], typename ArrayType::value_type* mem) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template EXPORTCPUCOREMATH bool filterGaussian(hoNDArray<float>& img, float sigma[], float* mem);
+template EXPORTCPUCOREMATH bool filterGaussian(hoNDArray<float>& img, double sigma[], float* mem);
+template EXPORTCPUCOREMATH bool filterGaussian(hoNDArray<double>& img, double sigma[], double* mem);
+template EXPORTCPUCOREMATH bool filterGaussian(hoNDArray<double>& img, float sigma[], double* mem);
+
+template EXPORTCPUCOREMATH bool filterGaussian(hoNDArray< std::complex<float> >& img, float sigma[],  std::complex<float> * mem);
+template EXPORTCPUCOREMATH bool filterGaussian(hoNDArray< std::complex<double> >& img, double sigma[],  std::complex<double> * mem);
+template EXPORTCPUCOREMATH bool filterGaussian(hoNDArray< std::complex<float> >& img, double sigma[],  std::complex<float> * mem);
+template EXPORTCPUCOREMATH bool filterGaussian(hoNDArray< std::complex<double> >& img, float sigma[],  std::complex<double> * mem);
+
+template EXPORTCPUCOREMATH bool filterGaussian(ho2DArray<float>& img, float sigma[], float* mem);
+template EXPORTCPUCOREMATH bool filterGaussian(ho2DArray<float>& img, double sigma[], float* mem);
+template EXPORTCPUCOREMATH bool filterGaussian(ho2DArray<double>& img, float sigma[], double* mem);
+template EXPORTCPUCOREMATH bool filterGaussian(ho2DArray<double>& img, double sigma[], double* mem);
+
+template EXPORTCPUCOREMATH bool filterGaussian(hoMatrix<float>& img, float sigma[], float* mem);
+template EXPORTCPUCOREMATH bool filterGaussian(hoMatrix<float>& img, double sigma[], float* mem);
+template EXPORTCPUCOREMATH bool filterGaussian(hoMatrix<double>& img, double sigma[], double* mem);
+template EXPORTCPUCOREMATH bool filterGaussian(hoMatrix<double>& img, float sigma[], double* mem);
+
+template EXPORTCPUCOREMATH bool gaussianKernel(float sigma, double kerWidthInUnitOfSigma, double deltaKer, hoNDArray<float>& ker);
+template EXPORTCPUCOREMATH bool gaussianKernel(double sigma, double kerWidthInUnitOfSigma, double deltaKer, hoNDArray<double>& ker);
+
+#define DimImage 1
+#include "hoNDImage_util_instantiate.hxx"
+#undef DimImage
+
+#define DimImage 2
+#include "hoNDImage_util_instantiate.hxx"
+#undef DimImage
+
+#define DimImage 3
+#include "hoNDImage_util_instantiate.hxx"
+#undef DimImage
+
+#define DimImage 4
+#include "hoNDImage_util_instantiate.hxx"
+#undef DimImage
+
+#define DimImage 5
+#include "hoNDImage_util_instantiate.hxx"
+#undef DimImage
+
+#define DimImage 6
+#include "hoNDImage_util_instantiate.hxx"
+#undef DimImage
+
+#define DimImage 7
+#include "hoNDImage_util_instantiate.hxx"
+#undef DimImage
+
+#define DimImage 8
+#include "hoNDImage_util_instantiate.hxx"
+#undef DimImage
+
+#define DimImage 9
+#include "hoNDImage_util_instantiate.hxx"
+#undef DimImage
+
+}
diff --git a/toolboxes/core/cpu/math/hoNDImage_util.h b/toolboxes/core/cpu/math/hoNDImage_util.h
new file mode 100644
index 0000000..d561ca3
--- /dev/null
+++ b/toolboxes/core/cpu/math/hoNDImage_util.h
@@ -0,0 +1,76 @@
+/** \file hoNDImage_util.h
+\brief math operations on the hoNDImage class.
+*/
+
+#pragma once
+
+#include "ho2DArray.h"
+#include "ho3DArray.h"
+#include "ho4DArray.h"
+#include "ho5DArray.h"
+#include "ho6DArray.h"
+#include "ho7DArray.h"
+#include "hoNDImage.h"
+#include "cpucore_math_export.h"
+
+#include "GadgetronCommon.h"
+#include <complex>
+
+#include "hoNDArray_reductions.h"
+#include "hoNDArray_elemwise.h"
+#include "hoNDInterpolator.h"
+
+namespace Gadgetron
+{
+    /// compute the gradient for an ND image
+    /// the central difference is computed, the border-value boundary condition is used
+    template<class T, unsigned int D> EXPORTCPUCOREMATH bool gradient(const hoNDImage<T, D>& x, hoNDImage<T, D> gx[]);
+
+    /// compute a gaussian kernel
+    template<class T> EXPORTCPUCOREMATH bool gaussianKernel(T sigma, double kerWidthInUnitOfSigma, double deltaKer, hoNDArray<T>& ker);
+
+    /// perform the gaussian filter for every dimension
+    /// sigma is in the unit of pixel
+    template<class ArrayType, class T2> EXPORTCPUCOREMATH bool filterGaussian(ArrayType& x, T2 sigma[], typename ArrayType::value_type* mem=NULL);
+
+    /// perform midian filter
+    /// w is the window size
+    template<class ArrayType> bool filterMedian(const ArrayType& img, size_t w[], ArrayType& img_out);
+
+    /// downsample the image by a ratio
+    /// new image size = image size / ratio
+    /// e.g., if ratio = 2, downsample by 2
+    template<typename T, typename InterpolatorType, unsigned int D> 
+    bool downsampleImage(const hoNDImage<T, D>& in, InterpolatorType& interp, hoNDImage<T, D>& out, float ratio[]);
+
+    /// upsample the image by a ratio
+    /// new image size = image size * ratio
+    /// e.g., if ratio = 2, upsample by 2
+    template<typename T, typename InterpolatorType, unsigned int D> 
+    bool upsampleImage(const hoNDImage<T, D>& in, InterpolatorType& interp, hoNDImage<T, D>& out, float ratio[]);
+
+    /// resample the image to specific image size
+    /// input and output images occupy the same space region
+    /// the pixel size of output images are adjusted accordingly
+    template<typename T, typename InterpolatorType, unsigned int D> 
+    bool resampleImage(const hoNDImage<T, D>& in, InterpolatorType& interp, const std::vector<size_t>& dim_out, hoNDImage<T, D>& out);
+
+    /// reduce image size by 2 with averaging across two neighbors
+    template<typename T, typename BoundaryHandlerType, unsigned int D> 
+    bool downsampleImageBy2WithAveraging(const hoNDImage<T, D>& in, BoundaryHandlerType& bh, hoNDImage<T, D>& out);
+
+    /// expand image size by 2 with linear interpolation
+    template<typename T, typename BoundaryHandlerType, unsigned int D> 
+    bool expandImageBy2(const hoNDImage<T, D>& in, BoundaryHandlerType& bh, hoNDImage<T, D>& out);
+
+    /// filter the image along the first dimension using a 1D kernel
+    template<class ArrayType> bool filter1D(const ArrayType& img, const hoNDArray<typename realType<typename ArrayType::value_type>::Type>& ker, GT_BOUNDARY_CONDITION bh, ArrayType& img_out);
+
+    /**
+    * @brief r = correlation_coefficient(a, b)
+    */
+    template <typename T, unsigned int D> 
+    bool corrCoef(const hoNDImage<T, D>& a, const hoNDImage<T, D>& b, T& r);
+}
+
+#include "hoNDImage_util.hxx"
diff --git a/toolboxes/core/cpu/math/hoNDImage_util.hxx b/toolboxes/core/cpu/math/hoNDImage_util.hxx
new file mode 100644
index 0000000..df0c74a
--- /dev/null
+++ b/toolboxes/core/cpu/math/hoNDImage_util.hxx
@@ -0,0 +1,1020 @@
+/** \file   hoNDImage_util.hxx
+    \brief  operations on the hoNDImage class.
+*/
+
+namespace Gadgetron
+{
+    template <typename T, unsigned int D> 
+    bool corrCoef(const hoNDImage<T, D>& a, const hoNDImage<T, D>& b, T& r)
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(a.dimensions_equal(&b));
+
+            r = -1;
+
+            T ma, mb;
+            ma = Gadgetron::mean( const_cast< hoNDImage<T, D>* >(&a) );
+            mb = Gadgetron::mean( const_cast< hoNDImage<T, D>* >(&b) );
+
+            size_t N = a.get_number_of_elements();
+
+            const T* pA = a.begin();
+            const T* pB = b.begin();
+
+            size_t n;
+
+            double x(0), y(0), z(0);
+            for ( n=0; n<N; n++ )
+            {
+                x += (pA[n]-ma)*(pA[n]-ma);
+                y += (pB[n]-mb)*(pB[n]-mb);
+                z += (pA[n]-ma)*(pB[n]-mb);
+            }
+
+            double p = std::sqrt(x*y);
+            if ( p > 0 )
+            {
+                r = (T)(z/p);
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in corrCoef(const hoNDImage<T, D>& a, const hoNDImage<T, D>& b, T& r) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename T, typename InterpolatorType, unsigned int D> 
+    bool downsampleImage(const hoNDImage<T, D>& in, InterpolatorType& interp, hoNDImage<T, D>& out, float ratio[])
+    {
+        try
+        {
+            std::vector<size_t> dim(D);
+            in.get_dimensions(dim);
+
+            std::vector<size_t> dim_out(D);
+
+            unsigned int ii;
+            for ( ii=0; ii<D; ii++ )
+            {
+                dim_out[ii] = (size_t)(dim[ii]/ratio[ii]);
+            }
+
+            return Gadgetron::resampleImage(in, interp, dim_out, out);
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in downsampleImage(const hoNDImage<T, D>& in, InterpolatorType& interp, hoNDImage<T, D>& out, float ratio[]) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename T, typename InterpolatorType, unsigned int D> 
+    bool upsampleImage(const hoNDImage<T, D>& in, InterpolatorType& interp, hoNDImage<T, D>& out, float ratio[])
+    {
+        try
+        {
+            std::vector<size_t> dim(D);
+            in.get_dimensions(dim);
+
+            std::vector<size_t> dim_out(D);
+
+            unsigned int ii;
+            for ( ii=0; ii<D; ii++ )
+            {
+                dim_out[ii] = (size_t)(dim[ii]*ratio[ii]);
+            }
+
+            return Gadgetron::resampleImage(in, interp, dim_out, out);
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in upsampleImage(const hoNDImage<T, D>& in, InterpolatorType& interp, hoNDImage<T, D>& out, float ratio[]) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename T, typename InterpolatorType, unsigned int D> 
+    bool resampleImage(const hoNDImage<T, D>& in, InterpolatorType& interp, const std::vector<size_t>& dim_out, hoNDImage<T, D>& out)
+    {
+        try
+        {
+            typedef typename hoNDImage<T, D>::coord_type coord_type;
+
+            /// get the coordinate parameters
+            std::vector<size_t> dim;
+            in.get_dimensions(dim);
+
+            std::vector<coord_type> pixelSize;
+            in.get_pixel_size(pixelSize);
+
+            std::vector<coord_type> origin;
+            in.get_origin(origin);
+
+            typename hoNDImage<T, D>::axis_type axis;
+            in.get_axis(axis);
+
+            /// compute new pixel sizes
+            std::vector<coord_type> pixelSize_out(D);
+
+            unsigned int ii;
+            for ( ii=0; ii<D; ii++ )
+            {
+                if ( dim_out[ii] > 1 )
+                {
+                    pixelSize_out[ii] = (dim[ii]-1)*pixelSize[ii] / (dim_out[ii]-1);
+                }
+                else
+                {
+                    pixelSize_out[ii] = (dim[ii]-1)*pixelSize[ii];
+                }
+            }
+
+            /// set up the out image
+            out.create(dim_out, pixelSize_out, origin, axis);
+
+            /// set up the interpolator
+            interp.setArray( const_cast< hoNDImage<T, D>& >(in) );
+
+            /// compute the out image
+
+            size_t N = out.get_number_of_elements();
+
+            if ( D == 2 )
+            {
+                long long ox = (long long)dim_out[0];
+                long long oy = (long long)dim_out[1];
+
+                long long x, y;
+
+                #pragma omp parallel default(none) private(x, y) shared(N, ox, oy, in, out, interp)
+                {
+                    coord_type px, py, ix_in, iy_in;
+
+                    #pragma omp for 
+                    for ( y=0; y<oy; y++ )
+                    {
+                        for ( x=0; x<ox; x++ )
+                        {
+                            out.image_to_world( (size_t)x, (size_t)y, px, py);
+
+                            in.world_to_image(px, py, ix_in, iy_in);
+
+                            out( (size_t)(x+y*ox) ) = interp(ix_in, iy_in);
+                        }
+                    }
+                }
+            }
+            else if ( D == 3 )
+            {
+                long long ox = (long long)dim_out[0];
+                long long oy = (long long)dim_out[1];
+                long long oz = (long long)dim_out[2];
+
+                long long x, y, z;
+
+                #pragma omp parallel default(none) private(x, y, z) shared(N, ox, oy, oz, in, out, interp)
+                {
+                    coord_type ix_in, iy_in, iz_in;
+                    coord_type px, py, pz;
+
+                    #pragma omp for 
+                    for ( z=0; z<oz; z++ )
+                    {
+                        for ( y=0; y<oy; y++ )
+                        {
+                            size_t offset = y*ox + z*ox*oy;
+
+                            for ( x=0; x<ox; x++ )
+                            {
+                                out.image_to_world( (size_t)x, (size_t)y, (size_t)z, px, py, pz);
+
+                                in.world_to_image(px, py, pz, ix_in, iy_in, iz_in);
+
+                                out( (size_t)(x+offset) ) = interp(ix_in, iy_in, iz_in);
+                            }
+                        }
+                    }
+                }
+            }
+            else if ( D == 4 )
+            {
+                long long ox = (long long)dim_out[0];
+                long long oy = (long long)dim_out[1];
+                long long oz = (long long)dim_out[2];
+                long long ot = (long long)dim_out[3];
+
+                long long x, y, z, t;
+
+                #pragma omp parallel default(none) private(x, y, z, t) shared(N, ox, oy, oz, ot, in, out, interp)
+                {
+                    coord_type ix_in, iy_in, iz_in, it_in;
+                    coord_type px, py, pz, pt;
+
+                    #pragma omp for 
+                    for ( t=0; t<ot; t++ )
+                    {
+                        for ( z=0; z<oz; z++ )
+                        {
+                            for ( y=0; y<oy; y++ )
+                            {
+                                size_t offset = y*ox + z*ox*oy + t*ox*oy*oz;
+
+                                for ( x=0; x<ox; x++ )
+                                {
+                                    out.image_to_world( (size_t)x, (size_t)y, (size_t)z, (size_t)t, px, py, pz, pt);
+
+                                    in.world_to_image(px, py, pz, pt, ix_in, iy_in, iz_in, it_in);
+
+                                    out( (size_t)(x+offset) ) = interp(ix_in, iy_in, iz_in, it_in);
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            else
+            {
+                long long n;
+
+                #pragma omp parallel default(none) private(n) shared(N, in, out, interp)
+                {
+                    std::vector<size_t> ind_o(D);
+                    std::vector<coord_type> ind_i(D);
+
+                    std::vector<coord_type> pos(D);
+
+                    #pragma omp for 
+                    for ( n=0; n<N; n++ )
+                    {
+                        out.calculate_index(n, ind_o);
+                        out.image_to_world(ind_o, pos);
+
+                        in.world_to_image(pos, ind_i);
+
+                        out(n) = interp(ind_i);
+                    }
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in resampleImage(const hoNDImage<T, D>& in, InterpolatorType& interp, hoNDImage<T, D>& out, size_t size_out[D]) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename T, typename BoundaryHandlerType, unsigned int D> 
+    bool downsampleImageBy2WithAveraging(const hoNDImage<T, D>& in, BoundaryHandlerType& bh, hoNDImage<T, D>& out)
+    {
+        try
+        {
+            typedef typename hoNDImage<T, D>::coord_type coord_type;
+
+            bh.setArray( const_cast< hoNDImage<T, D>& >(in) );
+
+            /// get the coordinate parameters
+            std::vector<size_t> dim;
+            in.get_dimensions(dim);
+
+            std::vector<coord_type> pixelSize;
+            in.get_pixel_size(pixelSize);
+
+            std::vector<coord_type> origin;
+            in.get_origin(origin);
+
+            typename hoNDImage<T, D>::axis_type axis;
+            in.get_axis(axis);
+
+            /// compute out image size and pixel size
+            std::vector<size_t> dim_out(D);
+            std::vector<coord_type> pixelSize_out(D);
+
+            unsigned int ii;
+            for ( ii=0; ii<D; ii++ )
+            {
+                dim_out[ii] = (dim[ii] >> 1);
+                pixelSize_out[ii] = 2*pixelSize[ii];
+            }
+
+            out.create(dim_out, pixelSize_out, origin, axis);
+
+            if ( D == 2 )
+            {
+                gt_index_type sx = (gt_index_type)dim_out[0];
+                gt_index_type sy = (gt_index_type)dim_out[1];
+
+                T weight = 1.0/5;
+
+                gt_index_type x, y;
+
+                #pragma omp parallel for default(none) private(x, y) shared(sx, sy, bh, out)
+                for ( y=0; y<sy; y++ )
+                {
+                    gt_index_type iy = y<<1;
+
+                    for ( x=0; x<sx; x++ )
+                    {
+                        gt_index_type ix = x<<1;
+                        out( (size_t)(x+y*sx) ) = bh(ix, iy) + ( bh(ix+1, iy) + bh(ix-1, iy) ) + ( bh(ix, iy+1) + bh(ix, iy-1) );
+                    }
+                }
+
+                Gadgetron::scal(weight, out);
+            }
+            else if ( D == 3 )
+            {
+                gt_index_type sx = (gt_index_type)dim_out[0];
+                gt_index_type sy = (gt_index_type)dim_out[1];
+                gt_index_type sz = (gt_index_type)dim_out[2];
+
+                T weight = 1.0/7;
+
+                gt_index_type x, y, z;
+
+                #pragma omp parallel for default(none) private(x, y, z) shared(sx, sy, sz, bh, out)
+                for ( z=0; z<sz; z++ )
+                {
+                    gt_index_type iz = z<<1;
+
+                    for ( y=0; y<sy; y++ )
+                    {
+                        gt_index_type iy = y<<1;
+
+                        size_t offset = y*sx + z*sx*sy;
+
+                        for ( x=0; x<sx; x++ )
+                        {
+                            gt_index_type ix = x<<1;
+
+                            out( (size_t)(x+offset) ) = bh(ix, iy, iz) 
+                                        + ( bh(ix+1, iy, iz) + bh(ix-1, iy, iz) ) 
+                                        + ( bh(ix, iy+1, iz) + bh(ix, iy-1, iz) )
+                                        + ( bh(ix, iy, iz+1) + bh(ix, iy, iz-1) );
+                        }
+                    }
+                }
+
+                Gadgetron::scal(weight, out);
+            }
+            else if ( D == 4 )
+            {
+                gt_index_type sx = (gt_index_type)dim_out[0];
+                gt_index_type sy = (gt_index_type)dim_out[1];
+                gt_index_type sz = (gt_index_type)dim_out[2];
+                gt_index_type st = (gt_index_type)dim_out[3];
+
+                T weight = 1.0/9;
+
+                gt_index_type x, y, z, t;
+
+                #pragma omp parallel for default(none) private(x, y, z, t) shared(sx, sy, sz, st, bh, out)
+                for ( t=0; t<st; t++ )
+                {
+                    gt_index_type it = t<<1;
+
+                    for ( z=0; z<sz; z++ )
+                    {
+                        gt_index_type iz = z<<1;
+
+                        for ( y=0; y<sy; y++ )
+                        {
+                            gt_index_type iy = y<<1;
+
+                            size_t offset = y*sx + z*sx*sy + t*sx*sy*sz;
+
+                            for ( x=0; x<sx; x++ )
+                            {
+                                gt_index_type ix = x<<1;
+
+                                out( (size_t)(x+offset) ) = bh(ix, iy, iz, it) 
+                                            + ( bh(ix+1, iy, iz, it) + bh(ix-1, iy, iz, it) ) 
+                                            + ( bh(ix, iy+1, iz, it) + bh(ix, iy-1, iz, it) )
+                                            + ( bh(ix, iy, iz+1, it) + bh(ix, iy, iz-1, it) )
+                                            + ( bh(ix, iy, iz, it+1) + bh(ix, iy, iz, it-1) );
+                            }
+                        }
+                    }
+                }
+
+                Gadgetron::scal(weight, out);
+            }
+            else
+            {
+                T weight = 1.0/(2*D+1);
+
+                gt_index_type N = out.get_number_of_elements();
+
+                gt_index_type n;
+
+                #pragma omp parallel default(none) private(n) shared(N, bh, out, dim_out)
+                {
+                    std::vector<size_t> ind_out(D);
+                    std::vector<gt_index_type> ind_in(D);
+
+                    #pragma omp for 
+                    for ( n=0; n<N; n++ )
+                    {
+                        out.calculate_index(n, ind_out);
+
+                        unsigned int ii;
+                        for ( ii=0; ii<D; ii++ )
+                        {
+                            ind_in[ii] = ind_out[ii]<<1;
+                        }
+
+                        T v = bh(ind_in);
+
+                        for ( ii=0; ii<D; ii++ )
+                        {
+                            ind_in[ii]++;
+                            v += bh(ind_in);
+
+                            ind_in[ii]--;
+                            ind_in[ii]--;
+                            v += bh(ind_in);
+
+                            ind_in[ii]++;
+                        }
+
+                        out(n) = v;
+                    }
+                }
+
+                Gadgetron::scal(weight, out);
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in downsampleImageBy2WithAveraging(const hoNDImage<T, D>& in, hoNDImage<T, D>& out) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename T, typename BoundaryHandlerType, unsigned int D> 
+    bool expandImageBy2(const hoNDImage<T, D>& in, BoundaryHandlerType& bh, hoNDImage<T, D>& out)
+    {
+        try
+        {
+            typedef typename hoNDImage<T, D>::coord_type coord_type;
+
+            bh.setArray( const_cast< hoNDImage<T, D>& >(in) );
+
+            /// get the coordinate parameters
+            std::vector<size_t> dim;
+            in.get_dimensions(dim);
+
+            std::vector<coord_type> pixelSize;
+            in.get_pixel_size(pixelSize);
+
+            std::vector<coord_type> origin;
+            in.get_origin(origin);
+
+            typename hoNDImage<T, D>::axis_type axis;
+            in.get_axis(axis);
+
+            /// compute out pixel size
+            std::vector<coord_type> pixelSize_out(D);
+
+            unsigned int ii;
+            for ( ii=0; ii<D; ii++ )
+            {
+                pixelSize_out[ii] = pixelSize[ii]* (coord_type)0.5;
+            }
+
+            out.set_pixel_size(pixelSize_out);
+            out.set_origin(origin);
+            out.set_axis(axis);
+
+            if ( D == 2 )
+            {
+                gt_index_type sx = (gt_index_type)dim[0];
+                gt_index_type sy = (gt_index_type)dim[1];
+
+                gt_index_type x, y;
+
+                #pragma omp parallel for default(none) private(x, y) shared(sx, sy, bh, out)
+                for ( y=0; y<sy; y++ )
+                {
+                    size_t oy = y<<1;
+
+                    for ( x=0; x<sx; x++ )
+                    {
+                        size_t ox = x<<1;
+
+                        T p00 = bh(x, y);
+                        T p10 = bh(x+1, y);
+                        T p01 = bh(x, y+1);
+                        T p11 = bh(x+1, y+1);
+
+                        out( ox, oy ) = p00;
+                        out( ox+1, oy ) = 0.5*(p00 + p10);
+                        out( ox, oy+1 ) = 0.5*(p00 + p01);
+                        out( ox+1, oy+1 ) = 0.25*(p00+p10+p01+p11);
+                    }
+                }
+
+                // if out has odd sizes
+                gt_index_type sx_out = (gt_index_type)out.get_size(0);
+                gt_index_type sy_out = (gt_index_type)out.get_size(1);
+
+                if ( (2*sx) < sx_out )
+                {
+                    for ( y=0; y<sy_out; y++ )
+                    {
+                        size_t offset = y*sx_out + sx_out-1;
+                        out(offset) = out(offset-1);
+                    }
+                }
+
+                if ( (2*sy) < sy_out )
+                {
+                    memcpy(out.begin()+(sy_out-1)*sx_out, out.begin()+(sy_out-2)*sx_out, sizeof(T)*sx_out);
+                }
+            }
+            else if ( D == 3 )
+            {
+                gt_index_type sx = (gt_index_type)dim[0];
+                gt_index_type sy = (gt_index_type)dim[1];
+                gt_index_type sz = (gt_index_type)dim[2];
+
+                gt_index_type x, y, z;
+
+                #pragma omp parallel for default(none) private(x, y, z) shared(sx, sy, sz, bh, out)
+                for ( z=0; z<sz; z++ )
+                {
+                    size_t oz = z<<1;
+
+                    for ( y=0; y<sy; y++ )
+                    {
+                        size_t oy = y<<1;
+
+                        for ( x=0; x<sx; x++ )
+                        {
+                            size_t ox = x<<1;
+
+                            T p000 = bh(x, y, z);
+                            T p100 = bh(x+1, y, z);
+                            T p010 = bh(x, y+1, z);
+                            T p110 = bh(x+1, y+1, z);
+
+                            T p001 = bh(x, y, z+1);
+                            T p101 = bh(x+1, y, z+1);
+                            T p011 = bh(x, y+1, z+1);
+                            T p111 = bh(x+1, y+1, z+1);
+
+                            out( ox, oy, oz ) = p000;
+                            out( ox+1, oy, oz ) = 0.5*(p000 + p100);
+                            out( ox, oy+1, oz ) = 0.5*(p000 + p010);
+                            out( ox+1, oy+1, oz ) = 0.25*(p000+p100+p010+p110);
+
+                            out( ox, oy, oz+1 ) = 0.5*(p000 + p001);
+                            out( ox+1, oy, oz+1 ) = 0.25*(p000 + p100 + p001 + p101);
+                            out( ox, oy+1, oz+1 ) = 0.25*(p000 + p010 + p001 + p011);
+                            out( ox+1, oy+1, oz+1 ) = 0.125*(p000+p100+p010+p110+p001+p101+p011+p111);
+                        }
+                    }
+                }
+
+                // if out has odd sizes
+                gt_index_type sx_out = (gt_index_type)out.get_size(0);
+                gt_index_type sy_out = (gt_index_type)out.get_size(1);
+                gt_index_type sz_out = (gt_index_type)out.get_size(2);
+
+                if ( (2*sx) < sx_out )
+                {
+                    for ( z=0; z<sz_out; z++ )
+                    {
+                        for ( y=0; y<sy_out; y++ )
+                        {
+                            size_t offset = y*sx_out + z*sx_out*sy_out;
+
+                            out( size_t(sx_out-1+offset) ) = out( size_t(sx_out-2+offset) );
+                        }
+                    }
+                }
+
+                if ( (2*sy) < sy_out )
+                {
+                    for ( z=0; z<sz_out; z++ )
+                    {
+                        size_t offset = z*sx_out*sy_out + (sy_out-1)*sx_out;
+
+                        for ( x=0; x<sx_out; x++ )
+                        {
+                            out( (size_t)(x+offset) ) = out( (size_t)(x+offset-sx_out) );
+                        }
+                    }
+                }
+
+                if ( (2*sz) < sz_out )
+                {
+                    memcpy(out.begin()+(sz_out-1)*sx_out*sy_out, out.begin()+(sz_out-2)*sx_out*sy_out, sizeof(T)*sx_out*sy_out);
+                }
+            }
+            else
+            {
+                hoNDInterpolatorLinear<hoNDImage<T, D> > interp(const_cast< hoNDImage<T, D>& >(in), bh);
+
+                gt_index_type N = (gt_index_type)(out.get_number_of_elements());
+
+                gt_index_type n;
+
+                #pragma omp parallel default(none) private(n) shared(N, bh, in, out, interp)
+                {
+                    std::vector<size_t> ind_out(D);
+                    std::vector<coord_type> ind_in(D);
+
+                    #pragma omp for 
+                    for ( n=0; n<N; n++ )
+                    {
+                        out.calculate_index(n, ind_out);
+
+                        unsigned int ii;
+                        for ( ii=0; ii<D; ii++ )
+                        {
+                            ind_in[ii] = (coord_type)(ind_out[ii]*0.5);
+                        }
+
+                        out( (size_t)(n) ) = interp(ind_in);
+                    }
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in expandImageBy2(const hoNDImage<T, D>& in, BoundaryHandlerType& bh, hoNDImage<T, D>& out) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<class ArrayType> 
+    bool filterMedian(const ArrayType& img, size_t w[], ArrayType& img_out)
+    {
+        try
+        {
+            typedef typename ArrayType::value_type T;
+
+            size_t D = img.get_number_of_dimensions();
+
+            img_out = img;
+
+            if ( D == 1 )
+            {
+                long long halfW = w[0]/2;
+                long long N = (long long)img.get_number_of_elements();
+
+                long long n, m, t;
+
+                #pragma omp parallel default(none) private(n, m, t) shared(halfW, N, img, img_out)
+                {
+                    std::vector<T> buf(2*halfW+1);
+
+                    #pragma omp for 
+                    for ( n=0; n<N; n++ )
+                    {
+                        for ( m=-halfW; m<=halfW; m++ )
+                        {
+                            t = n + m;
+                            if ( t<0 ) t = 0;
+                            if ( t > N-1 ) t = N-1;
+                            buf[m+halfW] = img( (size_t)t );
+                        }
+
+                        std::sort(buf.begin(), buf.end());
+
+                        img_out(n) = buf[halfW];
+                    }
+                }
+            }
+            else if ( D == 2 )
+            {
+                long long halfX = w[0]/2;
+                long long halfY = w[1]/2;
+                long long sx = (long long)img.get_size(0);
+                long long sy = (long long)img.get_size(1);
+
+                const T* pImg = img.begin();
+                T* pImgOut = img_out.begin();
+
+                long long WX = 2*halfX+1;
+                long long WY = 2*halfY+1;
+
+                long long medianInd = WX*WY/2;
+
+                long long x, y, tx, ty, hx, hy;
+                #pragma omp parallel default(none) private(x, y, tx, ty, hx, hy) shared(halfX, halfY, sx, sy, WX, WY, pImg, pImgOut, medianInd)
+                {
+                    std::vector<T> buf(WX*WY);
+
+                    #pragma omp for 
+                    for ( y=halfY; y<sy-halfY; y++ )
+                    {
+                        for ( x=halfX; x<sx-halfX; x++ )
+                        {
+                            size_t ind(0);
+                            for ( hy=-halfY; hy<=halfY; hy++ )
+                            {
+                                ty = hy + y;
+
+                                for ( hx=-halfX; hx<=halfX; hx++ )
+                                {
+                                    tx = hx + x;
+
+                                    buf[ind++] = pImg[tx + ty*sx];
+                                }
+                            }
+
+                            std::sort(buf.begin(), buf.end());
+
+                            pImgOut[x + y*sx] = buf[medianInd];
+                        }
+                    }
+                }
+
+                std::vector<T> buf(WX*WY);
+
+                for ( y=0; y<halfY; y++ )
+                {
+                    for ( x=0; x<sx; x++ )
+                    {
+                        size_t ind(0);
+                        for ( hy=-halfY; hy<=halfY; hy++ )
+                        {
+                            ty = hy + y;
+                            if ( ty < 0 ) ty = 0;
+
+                            for ( hx=-halfX; hx<=halfX; hx++ )
+                            {
+                                tx = hx + x;
+                                if ( tx < 0 ) tx = 0;
+                                if ( tx > sx-1 ) tx = sx-1;
+
+                                buf[ind++] = pImg[tx + ty*sx];
+                            }
+                        }
+
+                        std::sort(buf.begin(), buf.end());
+
+                        pImgOut[x + y*sx] = buf[medianInd];
+                    }
+                }
+
+                for ( y=sy-halfY; y<sy; y++ )
+                {
+                    for ( x=0; x<sx; x++ )
+                    {
+                        size_t ind(0);
+                        for ( hy=-halfY; hy<=halfY; hy++ )
+                        {
+                            ty = hy + y;
+                            if ( ty > sy-1 ) ty = sy-1;
+
+                            for ( hx=-halfX; hx<=halfX; hx++ )
+                            {
+                                tx = hx + x;
+                                if ( tx < 0 ) tx = 0;
+                                if ( tx > sx-1 ) tx = sx-1;
+
+                                buf[ind++] = pImg[tx + ty*sx];
+                            }
+                        }
+
+                        std::sort(buf.begin(), buf.end());
+
+                        pImgOut[x + y*sx] = buf[medianInd];
+                    }
+                }
+            }
+            else if ( D == 3 )
+            {
+                long long halfX = w[0]/2;
+                long long halfY = w[1]/2;
+                long long halfZ = w[2]/2;
+                long long sx = (long long)img.get_size(0);
+                long long sy = (long long)img.get_size(1);
+                long long sz = (long long)img.get_size(2);
+
+                const T* pImg = img.begin();
+                T* pImgOut = img_out.begin();
+
+                long long WX = 2*halfX+1;
+                long long WY = 2*halfY+1;
+                long long WZ = 2*halfZ+1;
+
+                long long medianInd = WX*WY*WZ/2;
+
+                long long x, y, z, tx, ty, tz, hx, hy, hz;
+                #pragma omp parallel default(none) private(x, y, z, tx, ty, tz, hx, hy, hz) shared(halfX, halfY, halfZ, sx, sy, sz, WX, WY, WZ, pImg, pImgOut, medianInd)
+                {
+                    std::vector<T> buf(WX*WY*WZ);
+
+                    #pragma omp for 
+                    for ( z=halfZ; z<sz-halfZ; z++ )
+                    {
+                        for ( y=halfY; y<sy-halfY; y++ )
+                        {
+                            for ( x=halfX; x<sx-halfX; x++ )
+                            {
+                                size_t ind(0);
+                                for ( hz=-halfZ; hz<=halfZ; hz++ )
+                                {
+                                    tz = hz + z;
+
+                                    for ( hy=-halfY; hy<=halfY; hy++ )
+                                    {
+                                        ty = hy + y;
+
+                                        for ( hx=-halfX; hx<=halfX; hx++ )
+                                        {
+                                            tx = hx + x;
+
+                                            buf[ind++] = pImg[tx + ty*sx + tz*sx*sy];
+                                        }
+                                    }
+                                }
+
+                                std::sort(buf.begin(), buf.end());
+
+                                pImgOut[x + y*sx + z*sx*sy] = buf[medianInd];
+                            }
+                        }
+                    }
+                }
+
+                std::vector<T> buf(WX*WY*WZ);
+
+                for ( z=0; z<halfZ; z++ )
+                {
+                    for ( y=0; y<sy; y++ )
+                    {
+                        for ( x=0; x<sx; x++ )
+                        {
+                            size_t ind(0);
+                            for ( hz=-halfZ; hz<=halfZ; hz++ )
+                            {
+                                tz = hz + z;
+                                if ( tz < 0 ) tz = 0;
+
+                                for ( hy=-halfY; hy<=halfY; hy++ )
+                                {
+                                    ty = hy + y;
+                                    if ( ty < 0 ) ty = 0;
+                                    if ( ty > sy-1 ) ty = sy-1;
+
+                                    for ( hx=-halfX; hx<=halfX; hx++ )
+                                    {
+                                        tx = hx + x;
+                                        if ( tx < 0 ) tx = 0;
+                                        if ( tx > sx-1 ) tx = sx-1;
+
+                                        buf[ind++] = pImg[tx + ty*sx + tz*sx*sy];
+                                    }
+                                }
+                            }
+
+                            std::sort(buf.begin(), buf.end());
+
+                            pImgOut[x + y*sx + z*sx*sy] = buf[medianInd];
+                        }
+                    }
+                }
+
+                for ( z=sz-halfZ; z<sz; z++ )
+                {
+                    for ( y=0; y<sy; y++ )
+                    {
+                        for ( x=0; x<sx; x++ )
+                        {
+                            size_t ind(0);
+                            for ( hz=-halfZ; hz<=halfZ; hz++ )
+                            {
+                                tz = hz + z;
+                                if ( tz > sz-1 ) tz = sz-1;
+
+                                for ( hy=-halfY; hy<=halfY; hy++ )
+                                {
+                                    ty = hy + y;
+                                    if ( ty < 0 ) ty = 0;
+                                    if ( ty > sy-1 ) ty = sy-1;
+
+                                    for ( hx=-halfX; hx<=halfX; hx++ )
+                                    {
+                                        tx = hx + x;
+                                        if ( tx < 0 ) tx = 0;
+                                        if ( tx > sx-1 ) tx = sx-1;
+
+                                        buf[ind++] = pImg[tx + ty*sx + tz*sx*sy];
+                                    }
+                                }
+                            }
+
+                            std::sort(buf.begin(), buf.end());
+
+                            pImgOut[x + y*sx + z*sx*sy] = buf[medianInd];
+                        }
+                    }
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in filterMedian(const ArrayType& img, size_t w[], ArrayType& img_out) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<class ArrayType> 
+    bool filter1D(const ArrayType& img, const hoNDArray<typename realType<typename ArrayType::value_type>::Type>& ker, GT_BOUNDARY_CONDITION bh, ArrayType& img_out)
+    {
+        try
+        {
+            typedef typename ArrayType::value_type T;
+            typedef typename realType<T>::Type real_value_type;
+
+            long long RO = (long long)img.get_size(0);
+            long long num = (long long)(img.get_number_of_elements()/RO);
+
+            img_out = img;
+
+            long long kerLen = (long long)ker.get_size(0);
+            long long kerHalfLen = kerLen/2;
+
+            const real_value_type* pKer = ker.begin();
+
+            long long ii;
+
+            #pragma omp parallel default(none) private(ii) shared(bh, num, RO, img, img_out, kerLen, kerHalfLen, pKer)
+            {
+                hoNDBoundaryHandler<ArrayType>* pBH = NULL;
+
+                hoNDBoundaryHandlerFixedValue<ArrayType> bhFixedValue;
+                hoNDBoundaryHandlerBorderValue<ArrayType> bhBorderValue;
+                hoNDBoundaryHandlerPeriodic<ArrayType> bhPeriodic;
+                hoNDBoundaryHandlerMirror<ArrayType> bhMirror;
+
+                pBH = &bhBorderValue;
+
+                if ( bh == GT_BOUNDARY_CONDITION_FIXEDVALUE )
+                {
+                    pBH = &bhFixedValue;
+                }
+                else if ( bh == GT_BOUNDARY_CONDITION_BORDERVALUE )
+                {
+                    pBH = &bhBorderValue;
+                }
+                else if ( bh == GT_BOUNDARY_CONDITION_PERIODIC )
+                {
+                    pBH = &bhPeriodic;
+                }
+                else if ( bh == GT_BOUNDARY_CONDITION_MIRROR )
+                {
+                    pBH = &bhMirror;
+                }
+
+                #pragma omp for 
+                for ( ii=0; ii<num; ii++ )
+                {
+                    ArrayType img1D(RO, const_cast<T*>(img.begin()+ii*RO));
+                    pBH->setArray(img1D);
+
+                    ArrayType img_out1D(RO, img_out.begin()+ii*RO);
+
+                    long long k, j;
+                    for ( k=0; k<RO; k++ )
+                    {
+                        T v = 0;
+                        for ( j=0; j<kerLen; j++ )
+                        {
+                            v += (*pBH)(k+j-kerHalfLen) * pKer[kerLen-j-1];
+                        }
+
+                        img_out1D(k) = v;
+                    }
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in filter1D(const ArrayType& img, const hoNDArray<T>& ker, GT_BOUNDARY_CONDITION bh, ArrayType& img_out) ... ");
+            return false;
+        }
+
+        return true;
+    }
+}
diff --git a/toolboxes/core/cpu/math/hoNDImage_util_instantiate.hxx b/toolboxes/core/cpu/math/hoNDImage_util_instantiate.hxx
new file mode 100644
index 0000000..fb05753
--- /dev/null
+++ b/toolboxes/core/cpu/math/hoNDImage_util_instantiate.hxx
@@ -0,0 +1,15 @@
+
+template EXPORTCPUCOREMATH bool filterGaussian(hoNDImage<float, DimImage>& img, float sigma[], float* mem);
+template EXPORTCPUCOREMATH bool filterGaussian(hoNDImage<double, DimImage>& img, double sigma[], double* mem);
+template EXPORTCPUCOREMATH bool filterGaussian(hoNDImage<float, DimImage>& img, double sigma[], float* mem);
+template EXPORTCPUCOREMATH bool filterGaussian(hoNDImage<double, DimImage>& img, float sigma[], double* mem);
+
+template EXPORTCPUCOREMATH bool filterGaussian(hoNDImage< std::complex<float> , DimImage>& img, float sigma[],  std::complex<float> * mem);
+template EXPORTCPUCOREMATH bool filterGaussian(hoNDImage< std::complex<double> , DimImage>& img, double sigma[],  std::complex<double> * mem);
+template EXPORTCPUCOREMATH bool filterGaussian(hoNDImage< std::complex<float> , DimImage>& img, double sigma[],  std::complex<float> * mem);
+template EXPORTCPUCOREMATH bool filterGaussian(hoNDImage< std::complex<double> , DimImage>& img, float sigma[],  std::complex<double> * mem);
+
+template EXPORTCPUCOREMATH bool gradient(const hoNDImage<float, DimImage>& x, hoNDImage<float, DimImage> gx[]);
+template EXPORTCPUCOREMATH bool gradient(const hoNDImage<double, DimImage>& x, hoNDImage<double, DimImage> gx[]);
+template EXPORTCPUCOREMATH bool gradient(const hoNDImage< std::complex<float> , DimImage>& x, hoNDImage< std::complex<float> , DimImage> gx[]);
+template EXPORTCPUCOREMATH bool gradient(const hoNDImage< std::complex<double> , DimImage>& x, hoNDImage< std::complex<double> , DimImage> gx[]);
diff --git a/toolboxes/core/gpu/CMakeLists.txt b/toolboxes/core/gpu/CMakeLists.txt
index 740091f..03254d8 100644
--- a/toolboxes/core/gpu/CMakeLists.txt
+++ b/toolboxes/core/gpu/CMakeLists.txt
@@ -3,19 +3,16 @@ if (WIN32)
 endif (WIN32)
 
 if(WIN32)
-link_directories(${Boost_LIBRARY_DIRS})
+  link_directories(${Boost_LIBRARY_DIRS})
 endif(WIN32)
 
-find_package(CULA REQUIRED)
-
 include_directories( 
   ${CUDA_INCLUDE_DIRS}
-  ${CULA_INCLUDE_DIR}
   ${CMAKE_SOURCE_DIR}/toolboxes/core
   ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu
 )
 
-cuda_add_library(gpucore SHARED 
+cuda_add_library(gadgetron_toolbox_gpucore SHARED 
     check_CUDA.h
     CUBLASContextProvider.h
     cudaDeviceManager.h
@@ -25,15 +22,12 @@ cuda_add_library(gpucore SHARED
     cuNDArray_operators.h
     cuNDArray_utils.h
     cuNDArray_reductions.h
-    cuNDFFT.h
-    cuNDFFT.cpp
     GadgetronCuException.h
     gpucore_export.h
     GPUTimer.h
     hoCuNDArray.h
     hoCuNDArray_blas.h
     hoCuNDArray_elemwise.h
-    hoCuNDArray_operators.h
     hoCuNDArray_utils.h
     radial_utilities.h
     real_utilities_device.h
@@ -49,15 +43,18 @@ cuda_add_library(gpucore SHARED
     cudaDeviceManager.cpp
   )
 
-target_link_libraries(gpucore cpucore 
+set_target_properties(gadgetron_toolbox_gpucore PROPERTIES VERSION ${GADGETRON_VERSION_STRING} SOVERSION ${GADGETRON_SOVERSION})
+
+target_link_libraries(gadgetron_toolbox_gpucore 
+  gadgetron_toolbox_cpucore 
   ${Boost_LIBRARIES}
   ${CUDA_LIBRARIES} 
   ${CUDA_CUFFT_LIBRARIES} 
   ${CUDA_CUBLAS_LIBRARIES} 
-  ${CULA_LIBRARIES}
+  ${MKL_LIBRARIES}
   )
 
-install(TARGETS gpucore DESTINATION lib)
+install(TARGETS gadgetron_toolbox_gpucore DESTINATION lib COMPONENT main)
 
 install(FILES
   gpucore_export.h
@@ -70,12 +67,10 @@ install(FILES
   cuNDArray_reductions.h
   hoCuNDArray.h
   hoCuNDArray_blas.h
-  hoCuNDArray_operators.h
   hoCuNDArray_elemwise.h
   hoCuNDArray_utils.h
   hoCuNDArray_math.h
   GPUTimer.h				
-  cuNDFFT.h
   GadgetronCuException.h
   radial_utilities.h
   real_utilities_device.h
@@ -83,4 +78,4 @@ install(FILES
   cudaDeviceManager.h
   CUBLASContextProvider.h
   setup_grid.h
-  DESTINATION include)
+  DESTINATION include COMPONENT main)
diff --git a/toolboxes/core/gpu/CUBLASContextProvider.cpp b/toolboxes/core/gpu/CUBLASContextProvider.cpp
index 4d1e38e..9a8c702 100644
--- a/toolboxes/core/gpu/CUBLASContextProvider.cpp
+++ b/toolboxes/core/gpu/CUBLASContextProvider.cpp
@@ -1,13 +1,10 @@
-/*
- * CUBLASContextProvider.cpp
- *
- *  Created on: Mar 22, 2012
- *      Author: Michael S. Hansen
- */
-
 #include "CUBLASContextProvider.h"
+
 #include <cuda_runtime_api.h>
+
+#ifdef _WITH_CULA_SUPPORT
 #include <cula_lapack_device.h>
+#endif
 
 
 CUBLASContextProvider* CUBLASContextProvider::instance()
@@ -24,7 +21,10 @@ CUBLASContextProvider::~CUBLASContextProvider()
 		if (cudaSetDevice(it->first)!= cudaSuccess) {
 		    std::cerr << "Error: unable to set CUDA device." << std::endl;
 		}
+
+#ifdef _WITH_CULA_SUPPORT
 		culaShutdown();
+#endif
 
 		cublasDestroy_v2(it->second);
 		it++;
@@ -90,12 +90,14 @@ cublasHandle_t* CUBLASContextProvider::getCublasHandle(int device_no)
 
 	handles_[device_no] = handle;
 
+#ifdef _WITH_CULA_SUPPORT
 	culaStatus s;
 	s = culaInitialize();
 	if(s != culaNoError) {
 		std::cerr << "CUBLASContextProvider: failed to initialize CULA" << std::endl;
 		return 0;
 	}
+#endif
 
 	if (current_device_no != device_no) {
 		//We must switch context back
diff --git a/toolboxes/core/gpu/GPUTimer.h b/toolboxes/core/gpu/GPUTimer.h
index 16128cd..a2cb182 100644
--- a/toolboxes/core/gpu/GPUTimer.h
+++ b/toolboxes/core/gpu/GPUTimer.h
@@ -13,63 +13,61 @@
 
 namespace Gadgetron{
 
-  class GPUTimer
-  {
-  public:
-
-    GPUTimer() : name_("GPUTimer"), timing_in_destruction_(true)
-    {
-      start();
-    }
-
-    GPUTimer(bool timing) : name_("GPUTimer"), timing_in_destruction_(timing)
+    class GPUTimer
     {
-      if ( timing_in_destruction_ )
+    public:
+        GPUTimer() : name_("GPUTimer"), timing_in_destruction_(true)
         {
-          start();
+            start();
         }
-    }
 
-    GPUTimer(const char* name) : name_(name), timing_in_destruction_(true)
-    {
-      start();
-    }
+        GPUTimer(bool timing) : name_("GPUTimer"), timing_in_destruction_(timing)
+        {
+            if ( timing_in_destruction_ )
+            {
+                start();
+            }
+        }
 
-    virtual ~GPUTimer() 
-    {
-      if ( timing_in_destruction_ )
+        GPUTimer(const char* name) : name_(name), timing_in_destruction_(true)
         {
-          stop();
+            start();
         }
-    }
 
-    virtual void start()
-    {
-      cudaEventCreate(&start_event_);
-      cudaEventCreate(&stop_event_);
-      cudaEventRecord( start_event_, 0 );
-    }
+        virtual ~GPUTimer() 
+        {
+            if ( timing_in_destruction_ )
+            {
+                stop();
+            }
+        }
 
-    virtual void stop()
-    {
-      float time;
-      cudaEventRecord( stop_event_, 0 );
-      cudaEventSynchronize( stop_event_ );
-      cudaEventElapsedTime( &time, start_event_, stop_event_ );
-      cudaEventDestroy( start_event_ );
-      cudaEventDestroy( stop_event_ );
+        virtual void start()
+        {
+            cudaEventCreate(&start_event_);
+            cudaEventCreate(&stop_event_);
+            cudaEventRecord( start_event_, 0 );
+        }
 
-      std::cout << name_ << ": " << time << " ms" << std::endl; std::cout.flush();
-    }
+        virtual void stop()
+        {
+            float time;
+            cudaEventRecord( stop_event_, 0 );
+            cudaEventSynchronize( stop_event_ );
+            cudaEventElapsedTime( &time, start_event_, stop_event_ );
+            cudaEventDestroy( start_event_ );
+            cudaEventDestroy( stop_event_ );
+
+            std::cout << name_ << ": " << time << " ms" << std::endl; std::cout.flush();
+        }
 
-    void set_timing_in_destruction(bool timing) { timing_in_destruction_ = timing; }
+        void set_timing_in_destruction(bool timing) { timing_in_destruction_ = timing; }
 
-    cudaEvent_t start_event_;
-    cudaEvent_t stop_event_;
+        cudaEvent_t start_event_;
+        cudaEvent_t stop_event_;
 
-    std::string name_;
-    bool timing_in_destruction_;
-  };
+        std::string name_;
+        bool timing_in_destruction_;
+    };
 }
-
 #endif //__GPUTIMER_H
diff --git a/toolboxes/core/gpu/cuNDArray.h b/toolboxes/core/gpu/cuNDArray.h
index 143e4a1..b8fb088 100644
--- a/toolboxes/core/gpu/cuNDArray.h
+++ b/toolboxes/core/gpu/cuNDArray.h
@@ -1,5 +1,5 @@
 /** \file cuNDArray.h
-    \brief GPU-based N-dimensional array (data container)
+\brief GPU-based N-dimensional array (data container)
 */
 
 #ifndef CUNDARRAY_H
@@ -11,7 +11,7 @@
 #include "complext.h"
 #include "GadgetronCuException.h"
 #include "check_CUDA.h"
-
+#include "hoCuNDArray.h"
 #include <boost/shared_ptr.hpp>
 #include <cuda.h>
 #include <cuda_runtime_api.h>
@@ -19,683 +19,809 @@
 
 namespace Gadgetron{
 
-  template <class T> class cuNDArray : public NDArray<T>
-  {
+    template <typename T> class cuNDArray : public NDArray<T>
+    {
+
+    public:
+
+        // Constructors
+        //
+
+        cuNDArray();
+        cuNDArray(const cuNDArray<T> &a);
+        cuNDArray(const cuNDArray<T> *a);
+        cuNDArray(const hoNDArray<T> &a);
+        cuNDArray(hoNDArray<T> *a);
+
+        cuNDArray(std::vector<size_t> *dimensions);
+        cuNDArray(std::vector<size_t> *dimensions, int device_no);
+        cuNDArray(std::vector<size_t> *dimensions, T* data, bool delete_data_on_destruct = false);
+
+        cuNDArray(std::vector<size_t> &dimensions);
+        cuNDArray(std::vector<size_t> &dimensions, int device_no);
+        cuNDArray(std::vector<size_t> &dimensions, T* data, bool delete_data_on_destruct = false);
+
+        cuNDArray(boost::shared_ptr<std::vector<size_t> > dimensions);
+        cuNDArray(boost::shared_ptr<std::vector<size_t> > dimensions, int device_no);
+        cuNDArray(boost::shared_ptr<std::vector<size_t> > dimensions, T* data, bool delete_data_on_destruct = false);
+
+        cuNDArray(size_t len);
+        cuNDArray(size_t sx, size_t sy);
+        cuNDArray(size_t sx, size_t sy, size_t sz);
+        cuNDArray(size_t sx, size_t sy, size_t sz, size_t st);
+        cuNDArray(size_t sx, size_t sy, size_t sz, size_t st, size_t sp);
+        cuNDArray(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq);
+        cuNDArray(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr);
+        cuNDArray(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, size_t ss);
+
+        // Destructor
+        virtual ~cuNDArray();
+
+        // Assignment operator
+        cuNDArray<T>& operator=(const cuNDArray<T>& rhs);
+        cuNDArray<T>& operator=(const hoNDArray<T>& rhs);
+
+        virtual void create(std::vector<size_t> *dimensions);
+        virtual void create(std::vector<size_t> *dimensions, int device_no);
+        virtual void create(std::vector<size_t> *dimensions, T* data, bool delete_data_on_destruct = false);
+
+        virtual void create(std::vector<size_t> &dimensions);
+        virtual void create(std::vector<size_t> &dimensions, int device_no);
+        virtual void create(std::vector<size_t> &dimensions, T* data, bool delete_data_on_destruct = false);
 
-  public:
+        virtual void create(boost::shared_ptr<std::vector<size_t> > dimensions);
+        virtual void create(boost::shared_ptr<std::vector<size_t> > dimensions, int device_no);
+        virtual void create(boost::shared_ptr<std::vector<size_t> > dimensions, T* data, bool delete_data_on_destruct = false);
 
-    // Constructors
-    //
+        virtual void create(size_t len);
+        virtual void create(size_t sx, size_t sy);
+        virtual void create(size_t sx, size_t sy, size_t sz);
+        virtual void create(size_t sx, size_t sy, size_t sz, size_t st);
+        virtual void create(size_t sx, size_t sy, size_t sz, size_t st, size_t sp);
+        virtual void create(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq);
+        virtual void create(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr);
+        virtual void create(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, size_t ss);
 
-    cuNDArray() : NDArray<T>::NDArray() 
+        virtual boost::shared_ptr< hoNDArray<T> > to_host() const;
+        virtual void to_host( hoNDArray<T> *out ) const;
+
+        virtual void set_device(int device);
+        int get_device();
+
+        thrust::device_ptr<T> get_device_ptr();
+        thrust::device_ptr<T> begin();
+        thrust::device_ptr<T> end();
+
+        T at( size_t idx );
+        T operator[]( size_t idx );
+
+
+    protected:
+
+        int device_; 
+
+        virtual void allocate_memory();
+        virtual void deallocate_memory();
+    };
+
+    template <typename T> 
+    cuNDArray<T>::cuNDArray() : NDArray<T>::NDArray() 
     { 
-      cudaGetDevice(&this->device_); 
-    }
-
-    cuNDArray(const cuNDArray<T> &a) : NDArray<T>::NDArray() 
-    {
-      cudaGetDevice(&this->device_);
-      this->data_ = 0;
-      this->dimensions_ = a.get_dimensions();
-      allocate_memory();
-      if (a.device_ == this->device_) {
-        CUDA_CALL(cudaMemcpy(this->data_, a.data_, this->elements_*sizeof(T), cudaMemcpyDeviceToDevice));
-      } else {
-        //This memory is on a different device, we must move it.
-        cudaSetDevice(a.device_);
-        boost::shared_ptr< hoNDArray<T> > tmp = a.to_host();
-        cudaSetDevice(this->device_);
-        cudaError_t err = cudaMemcpy(this->data_, tmp->get_data_ptr(), this->elements_*sizeof(T), cudaMemcpyHostToDevice);
-        if (err !=cudaSuccess) {
-          deallocate_memory();
-          this->data_ = 0;
-          this->dimensions_->clear();
-          throw cuda_error(err);
-        }
-      }
-    }
-
-    cuNDArray(const cuNDArray<T> *a) : NDArray<T>::NDArray() 
-    {
-      cudaGetDevice(&this->device_);
-      this->data_ = 0;
-      this->dimensions_ = a->get_dimensions();
-      allocate_memory();
-      if (a->device_ == this->device_) {
-        CUDA_CALL(cudaMemcpy(this->data_, a->data_, this->elements_*sizeof(T), cudaMemcpyDeviceToDevice));
-      } else {
-        //This memory is on a different device, we must move it.
-        cudaSetDevice(a->device_);
-        boost::shared_ptr< hoNDArray<T> > tmp = a->to_host();
-        cudaSetDevice(this->device_);
-        cudaError_t err = cudaMemcpy(this->data_, tmp->get_data_ptr(), this->elements_*sizeof(T), cudaMemcpyHostToDevice);
-        if (err !=cudaSuccess) {
-          deallocate_memory();
-          this->data_ = 0;
-          this->dimensions_->clear();
-          throw cuda_error(err);
-        }
-      }
-    }
-
-    cuNDArray(const hoNDArray<T> &a) : NDArray<T>::NDArray() 
-    {
-      cudaGetDevice(&this->device_);
-      this->dimensions_ = a.get_dimensions();
-      allocate_memory();
-      if (cudaMemcpy(this->data_, a.get_data_ptr(), this->elements_*sizeof(T), cudaMemcpyHostToDevice) != cudaSuccess) {
-        deallocate_memory();
+        cudaGetDevice(&this->device_); 
+    }
+
+    template <typename T> 
+    cuNDArray<T>::cuNDArray(const cuNDArray<T> &a) : NDArray<T>::NDArray() 
+    {
+        cudaGetDevice(&this->device_);
         this->data_ = 0;
-        this->dimensions_->clear();
-      }
+        this->dimensions_ = a.get_dimensions();
+        allocate_memory();
+        if (a.device_ == this->device_) {
+            CUDA_CALL(cudaMemcpy(this->data_, a.data_, this->elements_*sizeof(T), cudaMemcpyDeviceToDevice));
+        } else {
+            //This memory is on a different device, we must move it.
+            cudaSetDevice(a.device_);
+            boost::shared_ptr< hoNDArray<T> > tmp = a.to_host();
+            cudaSetDevice(this->device_);
+            cudaError_t err = cudaMemcpy(this->data_, tmp->get_data_ptr(), this->elements_*sizeof(T), cudaMemcpyHostToDevice);
+            if (err !=cudaSuccess) {
+                deallocate_memory();
+                this->data_ = 0;
+                this->dimensions_->clear();
+                throw cuda_error(err);
+            }
+        }
     }
 
-    cuNDArray(hoNDArray<T> *a) : NDArray<T>::NDArray() 
+    template <typename T> 
+    cuNDArray<T>::cuNDArray(const cuNDArray<T> *a) : NDArray<T>::NDArray() 
     {
-      cudaGetDevice(&this->device_);
-      this->dimensions_ = a->get_dimensions();
-      allocate_memory();
-      if (cudaMemcpy(this->data_, a->get_data_ptr(), this->elements_*sizeof(T), cudaMemcpyHostToDevice) != cudaSuccess) {
-        deallocate_memory();
+        cudaGetDevice(&this->device_);
         this->data_ = 0;
-        this->dimensions_->clear();
-      }
+        this->dimensions_ = a->get_dimensions();
+        allocate_memory();
+        if (a->device_ == this->device_) {
+            CUDA_CALL(cudaMemcpy(this->data_, a->data_, this->elements_*sizeof(T), cudaMemcpyDeviceToDevice));
+        } else {
+            //This memory is on a different device, we must move it.
+            cudaSetDevice(a->device_);
+            boost::shared_ptr< hoNDArray<T> > tmp = a->to_host();
+            cudaSetDevice(this->device_);
+            cudaError_t err = cudaMemcpy(this->data_, tmp->get_data_ptr(), this->elements_*sizeof(T), cudaMemcpyHostToDevice);
+            if (err !=cudaSuccess) {
+                deallocate_memory();
+                this->data_ = 0;
+                this->dimensions_->clear();
+                throw cuda_error(err);
+            }
+        }
+    }
+
+    template <typename T> 
+    cuNDArray<T>::cuNDArray(const hoNDArray<T> &a) : NDArray<T>::NDArray() 
+    {
+        cudaGetDevice(&this->device_);
+        this->dimensions_ = a.get_dimensions();
+        allocate_memory();
+        if (cudaMemcpy(this->data_, a.get_data_ptr(), this->elements_*sizeof(T), cudaMemcpyHostToDevice) != cudaSuccess) {
+            deallocate_memory();
+            this->data_ = 0;
+            this->dimensions_->clear();
+        }
+    }
+
+    template <typename T> 
+    cuNDArray<T>::cuNDArray(hoNDArray<T> *a) : NDArray<T>::NDArray() 
+    {
+        cudaGetDevice(&this->device_);
+        this->dimensions_ = a->get_dimensions();
+        allocate_memory();
+        if (cudaMemcpy(this->data_, a->get_data_ptr(), this->elements_*sizeof(T), cudaMemcpyHostToDevice) != cudaSuccess) {
+            deallocate_memory();
+            this->data_ = 0;
+            this->dimensions_->clear();
+        }
     }
 
-    cuNDArray(std::vector<size_t> *dimensions) : NDArray<T>::NDArray() 
+    template <typename T> 
+    cuNDArray<T>::cuNDArray(std::vector<size_t> *dimensions) : NDArray<T>::NDArray() 
     {
-      cudaGetDevice(&this->device_);
-      create(dimensions);
+        cudaGetDevice(&this->device_);
+        create(dimensions);
     }
 
-    cuNDArray(std::vector<size_t> *dimensions, int device_no) : NDArray<T>::NDArray() 
+    template <typename T> 
+    cuNDArray<T>::cuNDArray(std::vector<size_t> *dimensions, int device_no) : NDArray<T>::NDArray() 
     {
-      cudaGetDevice(&this->device_);
-      create(dimensions,device_no);
+        cudaGetDevice(&this->device_);
+        create(dimensions,device_no);
     }
 
-    cuNDArray(std::vector<size_t> *dimensions, T* data, bool delete_data_on_destruct = false) : NDArray<T>::NDArray()
+    template <typename T> 
+    cuNDArray<T>::cuNDArray(std::vector<size_t> *dimensions, T* data, bool delete_data_on_destruct) : NDArray<T>::NDArray()
     {
-      cudaGetDevice(&this->device_);
-      create(dimensions,data,delete_data_on_destruct);
+        cudaGetDevice(&this->device_);
+        create(dimensions,data,delete_data_on_destruct);
     }
 
-    cuNDArray(std::vector<size_t> &dimensions) : NDArray<T>::NDArray() 
+    template <typename T> 
+    cuNDArray<T>::cuNDArray(std::vector<size_t> &dimensions) : NDArray<T>::NDArray() 
     {
-      cudaGetDevice(&this->device_);
-      create(dimensions);
+        cudaGetDevice(&this->device_);
+        create(dimensions);
     }
 
-    cuNDArray(std::vector<size_t> &dimensions, int device_no) : NDArray<T>::NDArray() 
+    template <typename T> 
+    cuNDArray<T>::cuNDArray(std::vector<size_t> &dimensions, int device_no) : NDArray<T>::NDArray() 
     {
-      cudaGetDevice(&this->device_);
-      create(dimensions,device_no);
+        cudaGetDevice(&this->device_);
+        create(dimensions,device_no);
     }
 
-    cuNDArray(std::vector<size_t> &dimensions, T* data, bool delete_data_on_destruct = false) : NDArray<T>::NDArray()
+    template <typename T> 
+    cuNDArray<T>::cuNDArray(std::vector<size_t> &dimensions, T* data, bool delete_data_on_destruct) : NDArray<T>::NDArray()
     {
-      cudaGetDevice(&this->device_);
-      create(dimensions,data,delete_data_on_destruct);
+        cudaGetDevice(&this->device_);
+        create(dimensions,data,delete_data_on_destruct);
     }
 
-    cuNDArray(boost::shared_ptr<std::vector<size_t> > dimensions) : NDArray<T>::NDArray()
+    template <typename T> 
+    cuNDArray<T>::cuNDArray(boost::shared_ptr<std::vector<size_t> > dimensions) : NDArray<T>::NDArray()
     {
-      cudaGetDevice(&this->device_);
-      create(dimensions.get());
+        cudaGetDevice(&this->device_);
+        create(dimensions.get());
     }
 
-    cuNDArray(boost::shared_ptr<std::vector<size_t> > dimensions, int device_no) : NDArray<T>::NDArray()
+    template <typename T> 
+    cuNDArray<T>::cuNDArray(boost::shared_ptr<std::vector<size_t> > dimensions, int device_no) : NDArray<T>::NDArray()
     {
-      cudaGetDevice(&this->device_);
-      create(dimensions.get(),device_no);
+        cudaGetDevice(&this->device_);
+        create(dimensions.get(),device_no);
     }
 
-    cuNDArray(boost::shared_ptr<std::vector<size_t> > dimensions, T* data, bool delete_data_on_destruct = false) : NDArray<T>::NDArray()
+    template <typename T> 
+    cuNDArray<T>::cuNDArray(boost::shared_ptr<std::vector<size_t> > dimensions, T* data, bool delete_data_on_destruct) : NDArray<T>::NDArray()
     {
-      cudaGetDevice(&this->device_);
-      create(dimensions.get(),data,delete_data_on_destruct);
+        cudaGetDevice(&this->device_);
+        create(dimensions.get(),data,delete_data_on_destruct);
     }
 
-    cuNDArray(size_t len)
+    template <typename T> 
+    cuNDArray<T>::cuNDArray(size_t len)
     {
-      std::vector<size_t> dim(1);
-      dim[0] = len;
-      cudaGetDevice(&this->device_);
-      create(dim);
+        std::vector<size_t> dim(1);
+        dim[0] = len;
+        cudaGetDevice(&this->device_);
+        create(dim);
     }
 
-    cuNDArray(size_t sx, size_t sy)
+    template <typename T> 
+    cuNDArray<T>::cuNDArray(size_t sx, size_t sy)
     {
-      std::vector<size_t> dim(2);
-      dim[0] = sx;
-      dim[1] = sy;
-      cudaGetDevice(&this->device_);
-      create(dim);
+        std::vector<size_t> dim(2);
+        dim[0] = sx;
+        dim[1] = sy;
+        cudaGetDevice(&this->device_);
+        create(dim);
     }
 
-    cuNDArray(size_t sx, size_t sy, size_t sz)
+    template <typename T> 
+    cuNDArray<T>::cuNDArray(size_t sx, size_t sy, size_t sz)
     {
-      std::vector<size_t> dim(3);
-      dim[0] = sx;
-      dim[1] = sy;
-      dim[2] = sz;
-      cudaGetDevice(&this->device_);
-      create(dim);
+        std::vector<size_t> dim(3);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        cudaGetDevice(&this->device_);
+        create(dim);
     }
 
-    cuNDArray(size_t sx, size_t sy, size_t sz, size_t st)
+    template <typename T> 
+    cuNDArray<T>::cuNDArray(size_t sx, size_t sy, size_t sz, size_t st)
     {
-      std::vector<size_t> dim(4);
-      dim[0] = sx;
-      dim[1] = sy;
-      dim[2] = sz;
-      dim[3] = st;
-      cudaGetDevice(&this->device_);
-      create(dim);
+        std::vector<size_t> dim(4);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        dim[3] = st;
+        cudaGetDevice(&this->device_);
+        create(dim);
     }
 
-    cuNDArray(size_t sx, size_t sy, size_t sz, size_t st, size_t sp)
+    template <typename T> 
+    cuNDArray<T>::cuNDArray(size_t sx, size_t sy, size_t sz, size_t st, size_t sp)
     {
-      std::vector<size_t> dim(5);
-      dim[0] = sx;
-      dim[1] = sy;
-      dim[2] = sz;
-      dim[3] = st;
-      dim[4] = sp;
-      cudaGetDevice(&this->device_);
-      create(dim);
+        std::vector<size_t> dim(5);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        dim[3] = st;
+        dim[4] = sp;
+        cudaGetDevice(&this->device_);
+        create(dim);
     }
 
-    cuNDArray(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq)
+    template <typename T> 
+    cuNDArray<T>::cuNDArray(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq)
     {
-      std::vector<size_t> dim(6);
-      dim[0] = sx;
-      dim[1] = sy;
-      dim[2] = sz;
-      dim[3] = st;
-      dim[4] = sp;
-      dim[5] = sq;
-      cudaGetDevice(&this->device_);
-      create(dim);
+        std::vector<size_t> dim(6);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        dim[3] = st;
+        dim[4] = sp;
+        dim[5] = sq;
+        cudaGetDevice(&this->device_);
+        create(dim);
     }
 
-    cuNDArray(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr)
+    template <typename T> 
+    cuNDArray<T>::cuNDArray(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr)
     {
-      std::vector<size_t> dim(7);
-      dim[0] = sx;
-      dim[1] = sy;
-      dim[2] = sz;
-      dim[3] = st;
-      dim[4] = sp;
-      dim[5] = sq;
-      dim[6] = sr;
-      cudaGetDevice(&this->device_);
-      create(dim);
+        std::vector<size_t> dim(7);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        dim[3] = st;
+        dim[4] = sp;
+        dim[5] = sq;
+        dim[6] = sr;
+        cudaGetDevice(&this->device_);
+        create(dim);
     }
 
-    cuNDArray(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, size_t ss)
+    template <typename T> 
+    cuNDArray<T>::cuNDArray(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, size_t ss)
     {
-      std::vector<size_t> dim(8);
-      dim[0] = sx;
-      dim[1] = sy;
-      dim[2] = sz;
-      dim[3] = st;
-      dim[4] = sp;
-      dim[5] = sq;
-      dim[6] = sr;
-      dim[7] = ss;
-      cudaGetDevice(&this->device_);
-      create(dim);
+        std::vector<size_t> dim(8);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        dim[3] = st;
+        dim[4] = sp;
+        dim[5] = sq;
+        dim[6] = sr;
+        dim[7] = ss;
+        cudaGetDevice(&this->device_);
+        create(dim);
     }
 
-    // Destructor
-    virtual ~cuNDArray()
+    template <typename T> 
+    cuNDArray<T>:: ~cuNDArray()
     { 
-      if (this->delete_data_on_destruct_) 
-        deallocate_memory();  
-    }
-
-    // Assignment operator
-    cuNDArray<T>& operator=(const cuNDArray<T>& rhs)
-    {
-      int cur_device; 
-      CUDA_CALL(cudaGetDevice(&cur_device));
-      bool dimensions_match = this->dimensions_equal(&rhs);
-      if (dimensions_match && (rhs.device_ == cur_device) && (cur_device == this->device_)) {
-        CUDA_CALL(cudaMemcpy(this->data_, rhs.data_, this->elements_*sizeof(T), cudaMemcpyDeviceToDevice));
-      }
-      else {
-        CUDA_CALL(cudaSetDevice(this->device_));
-        if( !dimensions_match ){
-          if(!this->delete_data_on_destruct_){
-            throw std::runtime_error("Array dimensions mismatch in cuNDArray::operator=. Cannot change dimensions of non-destructable array.");
-          }
-          deallocate_memory();
-          this->elements_ = rhs.elements_;
-          this->dimensions_ = rhs.get_dimensions();
-          allocate_memory();
-        }
-        if (this->device_ == rhs.device_) {
-          if (cudaMemcpy(this->data_, rhs.data_, this->elements_*sizeof(T), cudaMemcpyDeviceToDevice) !=cudaSuccess) {	    
-            cudaSetDevice(cur_device);
-            throw cuda_error("cuNDArray::operator=: failed to copy data (2)");
-          }
-        } else {
-          if( cudaSetDevice(rhs.device_) != cudaSuccess) {
-            cudaSetDevice(cur_device);
-            throw cuda_error("cuNDArray::operator=: unable to set device no (2)");
-          }
-          boost::shared_ptr< hoNDArray<T> > tmp = rhs.to_host();
-          if( cudaSetDevice(this->device_) != cudaSuccess) {
-            cudaSetDevice(cur_device);
-            throw cuda_error("cuNDArray::operator=: unable to set device no (3)");
-          }
-          if (cudaMemcpy(this->data_, tmp->get_data_ptr(), this->elements_*sizeof(T), cudaMemcpyHostToDevice) != cudaSuccess) {
-            cudaSetDevice(cur_device);
-            throw cuda_error("cuNDArray::operator=: failed to copy data (3)");
-          }
+        if (this->delete_data_on_destruct_) 
+            deallocate_memory();  
+    }
+
+    template <typename T> 
+    cuNDArray<T>& cuNDArray<T>::operator=(const cuNDArray<T>& rhs)
+    {
+        int cur_device; 
+        CUDA_CALL(cudaGetDevice(&cur_device));
+        bool dimensions_match = this->dimensions_equal(&rhs);
+        if (dimensions_match && (rhs.device_ == cur_device) && (cur_device == this->device_)) {
+            CUDA_CALL(cudaMemcpy(this->data_, rhs.data_, this->elements_*sizeof(T), cudaMemcpyDeviceToDevice));
         }
-        if( cudaSetDevice(cur_device) != cudaSuccess) {
-          throw cuda_error("cuNDArray::operator=: unable to restore to current device");
-        }
-      }
-      return *this;
-    }
-
-    cuNDArray<T>& operator=(const hoNDArray<T>& rhs)
-    {
-      int cur_device; 
-      CUDA_CALL(cudaGetDevice(&cur_device));
-      bool dimensions_match = this->dimensions_equal(&rhs);
-      if (dimensions_match && (cur_device == this->device_)) {
-        CUDA_CALL(cudaMemcpy(this->get_data_ptr(), rhs.get_data_ptr(), this->get_number_of_elements()*sizeof(T), cudaMemcpyHostToDevice));
-      }
-      else {
-        CUDA_CALL(cudaSetDevice(this->device_));
-        if( !dimensions_match ){
-          if (!this->delete_data_on_destruct_){
-            throw std::runtime_error("Array dimensions mismatch in cuNDArray::operator=. Cannot change dimensions of non-destructable array.");}          deallocate_memory();
-          this->elements_ = rhs.get_number_of_elements();
-          this->dimensions_ = rhs.get_dimensions();
-          allocate_memory();
-        }
-        if (cudaMemcpy(this->get_data_ptr(), rhs.get_data_ptr(), this->get_number_of_elements()*sizeof(T),
-                       cudaMemcpyHostToDevice) !=cudaSuccess) {
-          cudaSetDevice(cur_device);
-          throw cuda_error("cuNDArray::operator=: failed to copy data (1)");
+        else {
+            CUDA_CALL(cudaSetDevice(this->device_));
+            if( !dimensions_match ){
+                deallocate_memory();
+                this->elements_ = rhs.elements_;
+                this->dimensions_ = rhs.get_dimensions();
+                allocate_memory();
+            }
+            if (this->device_ == rhs.device_) {
+                if (cudaMemcpy(this->data_, rhs.data_, this->elements_*sizeof(T), cudaMemcpyDeviceToDevice) !=cudaSuccess) {	    
+                    cudaSetDevice(cur_device);
+                    throw cuda_error("cuNDArray::operator=: failed to copy data (2)");
+                }
+            } else {
+                if( cudaSetDevice(rhs.device_) != cudaSuccess) {
+                    cudaSetDevice(cur_device);
+                    throw cuda_error("cuNDArray::operator=: unable to set device no (2)");
+                }
+                boost::shared_ptr< hoNDArray<T> > tmp = rhs.to_host();
+                if( cudaSetDevice(this->device_) != cudaSuccess) {
+                    cudaSetDevice(cur_device);
+                    throw cuda_error("cuNDArray::operator=: unable to set device no (3)");
+                }
+                if (cudaMemcpy(this->data_, tmp->get_data_ptr(), this->elements_*sizeof(T), cudaMemcpyHostToDevice) != cudaSuccess) {
+                    cudaSetDevice(cur_device);
+                    throw cuda_error("cuNDArray::operator=: failed to copy data (3)");
+                }
+            }
+            if( cudaSetDevice(cur_device) != cudaSuccess) {
+                throw cuda_error("cuNDArray::operator=: unable to restore to current device");
+            }
         }
-        if( cudaSetDevice(cur_device) != cudaSuccess) {
-          throw cuda_error("cuNDArray::operator=: unable to restore to current device");
+        return *this;
+    }
+
+    template <typename T> 
+    cuNDArray<T>& cuNDArray<T>::operator=(const hoNDArray<T>& rhs)
+    {
+        int cur_device; 
+        CUDA_CALL(cudaGetDevice(&cur_device));
+        bool dimensions_match = this->dimensions_equal(&rhs);
+        if (dimensions_match && (cur_device == this->device_)) {
+            CUDA_CALL(cudaMemcpy(this->get_data_ptr(), rhs.get_data_ptr(), this->get_number_of_elements()*sizeof(T), cudaMemcpyHostToDevice));
+        }
+        else {
+            CUDA_CALL(cudaSetDevice(this->device_));
+            if( !dimensions_match ){
+                deallocate_memory();
+                this->elements_ = rhs.get_number_of_elements();
+                this->dimensions_ = rhs.get_dimensions();
+                allocate_memory();
+            }
+            if (cudaMemcpy(this->get_data_ptr(), rhs.get_data_ptr(), this->get_number_of_elements()*sizeof(T),
+                cudaMemcpyHostToDevice) !=cudaSuccess) {
+                    cudaSetDevice(cur_device);
+                    throw cuda_error("cuNDArray::operator=: failed to copy data (1)");
+            }
+            if( cudaSetDevice(cur_device) != cudaSuccess) {
+                throw cuda_error("cuNDArray::operator=: unable to restore to current device");
+            }
         }
-      }
-      return *this;
+        return *this;
     }
 
-    virtual void create(std::vector<size_t> *dimensions)
+    template <typename T> 
+    inline void cuNDArray<T>::create(std::vector<size_t> *dimensions)
     {
-      if ( this->dimensions_equal(dimensions) )
+        if ( this->dimensions_equal(dimensions) )
         {
-          return;
+            return;
         }
 
-      return NDArray<T>::create(dimensions);
+        return NDArray<T>::create(dimensions);
     }
 
-    virtual void create(std::vector<size_t> *dimensions, int device_no)
+    template <typename T> 
+    inline void cuNDArray<T>::create(std::vector<size_t> *dimensions, int device_no)
     {
-      if (device_no < 0){
-        throw cuda_error("cuNDArray::create: illegal device no");
-      }
+        if (device_no < 0){
+            throw cuda_error("cuNDArray::create: illegal device no");
+        }
 
-      if ( this->dimensions_equal(dimensions) && this->device_==device_no )
+        if ( this->dimensions_equal(dimensions) && this->device_==device_no )
         {
-          return;
+            return;
         }
 
-      this->device_ = device_no; 
-      NDArray<T>::create(dimensions);
+        this->device_ = device_no; 
+        NDArray<T>::create(dimensions);
     }
 
-    virtual void create(std::vector<size_t> *dimensions, T* data, bool delete_data_on_destruct = false)
+    template <typename T> 
+    void cuNDArray<T>::create(std::vector<size_t> *dimensions, T* data, bool delete_data_on_destruct)
     {
-      if (!data) {
-        throw std::runtime_error("cuNDArray::create: 0x0 pointer provided");
-      }
+        if (!data) {
+            throw std::runtime_error("cuNDArray::create: 0x0 pointer provided");
+        }
 
-      int tmp_device; 
-      if( cudaGetDevice(&tmp_device) != cudaSuccess) {
-        throw cuda_error("cuNDArray::create: Unable to query for device");
-      }
+        int tmp_device; 
+        if( cudaGetDevice(&tmp_device) != cudaSuccess) {
+            throw cuda_error("cuNDArray::create: Unable to query for device");
+        }
 
-      cudaDeviceProp deviceProp; 
-      if( cudaGetDeviceProperties( &deviceProp, tmp_device) != cudaSuccess) {
-        throw cuda_error("cuNDArray::create: Unable to query device properties");
-      }
+        cudaDeviceProp deviceProp; 
+        if( cudaGetDeviceProperties( &deviceProp, tmp_device) != cudaSuccess) {
+            throw cuda_error("cuNDArray::create: Unable to query device properties");
+        }
 
-      if (deviceProp.unifiedAddressing) {
-        cudaPointerAttributes attrib;
-        if (cudaPointerGetAttributes(&attrib, data) != cudaSuccess) {
-          CHECK_FOR_CUDA_ERROR();
-          throw cuda_error("cuNDArray::create: Unable to determine attributes of pointer");
+        if (deviceProp.unifiedAddressing) {
+            cudaPointerAttributes attrib;
+            if (cudaPointerGetAttributes(&attrib, data) != cudaSuccess) {
+                CHECK_FOR_CUDA_ERROR();
+                throw cuda_error("cuNDArray::create: Unable to determine attributes of pointer");
+            }
+            this->device_ = attrib.device;
+        } else {
+            this->device_ = tmp_device;
         }
-        this->device_ = attrib.device;
-      } else {
-        this->device_ = tmp_device;
-      }
 
-      NDArray<T>::create(dimensions, data, delete_data_on_destruct);
+        NDArray<T>::create(dimensions, data, delete_data_on_destruct);
     }
 
-    virtual void create(std::vector<size_t> &dimensions)
+    template <typename T> 
+    inline void cuNDArray<T>::create(std::vector<size_t> &dimensions)
     {
-      if ( this->dimensions_equal(&dimensions) )
+        if ( this->dimensions_equal(&dimensions) )
         {
-          return;
+            return;
         }
 
-      return NDArray<T>::create(dimensions);
+        return NDArray<T>::create(dimensions);
     }
 
-    virtual void create(std::vector<size_t> &dimensions, int device_no)
+    template <typename T> 
+    inline void cuNDArray<T>::create(std::vector<size_t> &dimensions, int device_no)
     {
-      if (device_no < 0){
-        throw cuda_error("cuNDArray::create: illegal device no");
-      }
+        if (device_no < 0){
+            throw cuda_error("cuNDArray::create: illegal device no");
+        }
 
-      if ( this->dimensions_equal(&dimensions) && this->device_==device_no )
+        if ( this->dimensions_equal(&dimensions) && this->device_==device_no )
         {
-          return;
+            return;
         }
 
-      this->device_ = device_no; 
-      NDArray<T>::create(dimensions);
+        this->device_ = device_no; 
+        NDArray<T>::create(dimensions);
     }
 
-    virtual void create(std::vector<size_t> &dimensions, T* data, bool delete_data_on_destruct = false)
+    template <typename T> 
+    inline void cuNDArray<T>::create(std::vector<size_t> &dimensions, T* data, bool delete_data_on_destruct)
     {
-      if (!data) {
-        throw std::runtime_error("cuNDArray::create: 0x0 pointer provided");
-      }
+        if (!data) {
+            throw std::runtime_error("cuNDArray::create: 0x0 pointer provided");
+        }
 
-      int tmp_device; 
-      if( cudaGetDevice(&tmp_device) != cudaSuccess) {
-        throw cuda_error("cuNDArray::create: Unable to query for device");
-      }
+        int tmp_device; 
+        if( cudaGetDevice(&tmp_device) != cudaSuccess) {
+            throw cuda_error("cuNDArray::create: Unable to query for device");
+        }
 
-      cudaDeviceProp deviceProp;
-      if( cudaGetDeviceProperties( &deviceProp, tmp_device) != cudaSuccess) {
-        throw cuda_error("cuNDArray::create: Unable to query device properties");
-      }
+        cudaDeviceProp deviceProp;
+        if( cudaGetDeviceProperties( &deviceProp, tmp_device) != cudaSuccess) {
+            throw cuda_error("cuNDArray::create: Unable to query device properties");
+        }
 
-      if (deviceProp.unifiedAddressing) {
-        cudaPointerAttributes attrib;
-        if (cudaPointerGetAttributes(&attrib, data) != cudaSuccess) {
-          CHECK_FOR_CUDA_ERROR();
-          throw cuda_error("cuNDArray::create: Unable to determine attributes of pointer");
+        if (deviceProp.unifiedAddressing) {
+            cudaPointerAttributes attrib;
+            if (cudaPointerGetAttributes(&attrib, data) != cudaSuccess) {
+                CHECK_FOR_CUDA_ERROR();
+                throw cuda_error("cuNDArray::create: Unable to determine attributes of pointer");
+            }
+            this->device_ = attrib.device;
+        } else {
+            this->device_ = tmp_device;
         }
-        this->device_ = attrib.device;
-      } else {
-        this->device_ = tmp_device;
-      }
 
-      NDArray<T>::create(dimensions, data, delete_data_on_destruct);
+        NDArray<T>::create(dimensions, data, delete_data_on_destruct);
     }
 
-    virtual void create(boost::shared_ptr<std::vector<size_t> > dimensions){
-      this->create(dimensions.get());
+    template <typename T> 
+    inline void cuNDArray<T>::create(boost::shared_ptr<std::vector<size_t> > dimensions){
+        this->create(dimensions.get());
     }
 
-    virtual void create(boost::shared_ptr<std::vector<size_t> > dimensions, int device_no){
-      this->create(dimensions.get(),device_no);
+    template <typename T> 
+    inline void cuNDArray<T>::create(boost::shared_ptr<std::vector<size_t> > dimensions, int device_no){
+        this->create(dimensions.get(),device_no);
     }
 
-    virtual void create(boost::shared_ptr<std::vector<size_t> > dimensions, T* data, bool delete_data_on_destruct = false){
-      this->create(dimensions.get(), data, delete_data_on_destruct);
+    template <typename T> 
+    inline void cuNDArray<T>::create(boost::shared_ptr<std::vector<size_t> > dimensions, T* data, bool delete_data_on_destruct){
+        this->create(dimensions.get(), data, delete_data_on_destruct);
     }
 
-    virtual void create(size_t len)
+    template <typename T> 
+    inline void cuNDArray<T>::create(size_t len)
     {
-      std::vector<size_t> dim(1);
-      dim[0] = len;
-      this->create(dim);
+        std::vector<size_t> dim(1);
+        dim[0] = len;
+        this->create(dim);
     }
 
-    virtual void create(size_t sx, size_t sy)
+    template <typename T> 
+    inline void cuNDArray<T>::create(size_t sx, size_t sy)
     {
-      std::vector<size_t> dim(2);
-      dim[0] = sx;
-      dim[1] = sy;
-      this->create(dim);
+        std::vector<size_t> dim(2);
+        dim[0] = sx;
+        dim[1] = sy;
+        this->create(dim);
     }
 
-    virtual void create(size_t sx, size_t sy, size_t sz)
+    template <typename T> 
+    inline void cuNDArray<T>::create(size_t sx, size_t sy, size_t sz)
     {
-      std::vector<size_t> dim(3);
-      dim[0] = sx;
-      dim[1] = sy;
-      dim[2] = sz;
-      this->create(dim);
+        std::vector<size_t> dim(3);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        this->create(dim);
     }
 
-    virtual void create(size_t sx, size_t sy, size_t sz, size_t st)
+    template <typename T> 
+    inline void cuNDArray<T>::create(size_t sx, size_t sy, size_t sz, size_t st)
     {
-      std::vector<size_t> dim(4);
-      dim[0] = sx;
-      dim[1] = sy;
-      dim[2] = sz;
-      dim[3] = st;
-      this->create(dim);
+        std::vector<size_t> dim(4);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        dim[3] = st;
+        this->create(dim);
     }
 
-    virtual void create(size_t sx, size_t sy, size_t sz, size_t st, size_t sp)
+    template <typename T> 
+    inline void cuNDArray<T>::create(size_t sx, size_t sy, size_t sz, size_t st, size_t sp)
     {
-      std::vector<size_t> dim(5);
-      dim[0] = sx;
-      dim[1] = sy;
-      dim[2] = sz;
-      dim[3] = st;
-      dim[4] = sp;
-      this->create(dim);
+        std::vector<size_t> dim(5);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        dim[3] = st;
+        dim[4] = sp;
+        this->create(dim);
     }
 
-    virtual void create(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq)
+    template <typename T> 
+    inline void cuNDArray<T>::create(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq)
     {
-      std::vector<size_t> dim(6);
-      dim[0] = sx;
-      dim[1] = sy;
-      dim[2] = sz;
-      dim[3] = st;
-      dim[4] = sp;
-      dim[5] = sq;
-      this->create(dim);
+        std::vector<size_t> dim(6);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        dim[3] = st;
+        dim[4] = sp;
+        dim[5] = sq;
+        this->create(dim);
     }
 
-    virtual void create(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr)
+    template <typename T> 
+    inline void cuNDArray<T>::create(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr)
     {
-      std::vector<size_t> dim(7);
-      dim[0] = sx;
-      dim[1] = sy;
-      dim[2] = sz;
-      dim[3] = st;
-      dim[4] = sp;
-      dim[5] = sq;
-      dim[6] = sr;
-      this->create(dim);
+        std::vector<size_t> dim(7);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        dim[3] = st;
+        dim[4] = sp;
+        dim[5] = sq;
+        dim[6] = sr;
+        this->create(dim);
     }
 
-    virtual void create(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, size_t ss)
+    template <typename T> 
+    inline void cuNDArray<T>::create(size_t sx, size_t sy, size_t sz, size_t st, size_t sp, size_t sq, size_t sr, size_t ss)
     {
-      std::vector<size_t> dim(8);
-      dim[0] = sx;
-      dim[1] = sy;
-      dim[2] = sz;
-      dim[3] = st;
-      dim[4] = sp;
-      dim[5] = sq;
-      dim[6] = sr;
-      dim[7] = ss;
-      this->create(dim);
+        std::vector<size_t> dim(8);
+        dim[0] = sx;
+        dim[1] = sy;
+        dim[2] = sz;
+        dim[3] = st;
+        dim[4] = sp;
+        dim[5] = sq;
+        dim[6] = sr;
+        dim[7] = ss;
+        this->create(dim);
     }
 
-    virtual boost::shared_ptr< hoNDArray<T> > to_host() const
+    template <typename T> 
+    inline boost::shared_ptr< hoNDArray<T> > cuNDArray<T>::to_host() const
     {
-      boost::shared_ptr< hoNDArray<T> > ret(new hoNDArray<T>(this->dimensions_.get()));
-      if (cudaMemcpy(ret->get_data_ptr(), this->data_, this->elements_*sizeof(T), cudaMemcpyDeviceToHost) != cudaSuccess) {
-        throw cuda_error("cuNDArray::to_host(): failed to copy memory from device");
-      }
+        boost::shared_ptr< hoNDArray<T> > ret(new hoNDArray<T>(this->dimensions_.get()));
+        if (cudaMemcpy(ret->get_data_ptr(), this->data_, this->elements_*sizeof(T), cudaMemcpyDeviceToHost) != cudaSuccess) {
+            throw cuda_error("cuNDArray::to_host(): failed to copy memory from device");
+        }
 
-      return ret;
+        return ret;
     }
 
-    virtual void to_host( hoNDArray<T> *out ) const 
+    template <typename T> 
+    inline void cuNDArray<T>::to_host( hoNDArray<T> *out ) const 
     {
-      if( !out ){
-        throw std::runtime_error("cuNDArray::to_host(): illegal array passed.");
-      }
+        if( !out ){
+            throw std::runtime_error("cuNDArray::to_host(): illegal array passed.");
+        }
 
-      if( out->get_number_of_elements() != this->get_number_of_elements() ){	
-        out->create( this->get_dimensions().get());
-      }
+        if( out->get_number_of_elements() != this->get_number_of_elements() ){	
+            out->create( this->get_dimensions().get());
+        }
 
-      if( cudaMemcpy( out->get_data_ptr(), this->data_, this->elements_*sizeof(T), cudaMemcpyDeviceToHost) != cudaSuccess) {
-        throw cuda_error("cuNDArray::to_host(): failed to copy memory from device");
-      }
+        if( cudaMemcpy( out->get_data_ptr(), this->data_, this->elements_*sizeof(T), cudaMemcpyDeviceToHost) != cudaSuccess) {
+            throw cuda_error("cuNDArray::to_host(): failed to copy memory from device");
+        }
     }
 
-    virtual void set_device(int device)
+    template <typename T> 
+    inline void cuNDArray<T>::set_device(int device)
     {
-      if( device_ == device )
-        return;
+        if( device_ == device )
+            return;
 
-      int cur_device;
-      if( cudaGetDevice(&cur_device) != cudaSuccess) {
-        throw cuda_error("cuNDArray::set_device: unable to get device no");
-      }
+        int cur_device;
+        if( cudaGetDevice(&cur_device) != cudaSuccess) {
+            throw cuda_error("cuNDArray::set_device: unable to get device no");
+        }
 
-      if( cur_device != device_ && cudaSetDevice(device_) != cudaSuccess) {
-        throw cuda_error("cuNDArray::set_device: unable to set device no");
-      }
+        if( cur_device != device_ && cudaSetDevice(device_) != cudaSuccess) {
+            throw cuda_error("cuNDArray::set_device: unable to set device no");
+        }
 
-      boost::shared_ptr< hoNDArray<T> > tmp = to_host();
-      deallocate_memory();
-      if( cudaSetDevice(device) != cudaSuccess) {
-        cudaSetDevice(cur_device);
-        throw cuda_error("cuNDArray::set_device: unable to set device no (2)");
-      }
+        boost::shared_ptr< hoNDArray<T> > tmp = to_host();
+        deallocate_memory();
+        if( cudaSetDevice(device) != cudaSuccess) {
+            cudaSetDevice(cur_device);
+            throw cuda_error("cuNDArray::set_device: unable to set device no (2)");
+        }
 
-      device_ = device;
-      allocate_memory();
-      if (cudaMemcpy(this->data_, tmp->get_data_ptr(), this->elements_*sizeof(T), cudaMemcpyHostToDevice) != cudaSuccess) {
-        cudaSetDevice(cur_device);
-        throw cuda_error("cuNDArray::set_device: failed to copy data");
-      }
+        device_ = device;
+        allocate_memory();
+        if (cudaMemcpy(this->data_, tmp->get_data_ptr(), this->elements_*sizeof(T), cudaMemcpyHostToDevice) != cudaSuccess) {
+            cudaSetDevice(cur_device);
+            throw cuda_error("cuNDArray::set_device: failed to copy data");
+        }
 
-      if( cudaSetDevice(cur_device) != cudaSuccess) {
-        throw cuda_error("cuNDArray::set_device: unable to restore device to current device");
-      }
+        if( cudaSetDevice(cur_device) != cudaSuccess) {
+            throw cuda_error("cuNDArray::set_device: unable to restore device to current device");
+        }
     }
 
-    inline int get_device() { return device_; }
+    template <typename T> 
+    inline int cuNDArray<T>::get_device() { return device_; }
 
-    thrust::device_ptr<T> get_device_ptr(){
-      return thrust::device_ptr<T>(this->data_);
+    template <typename T> 
+    inline thrust::device_ptr<T> cuNDArray<T>::get_device_ptr()
+    {
+        return thrust::device_ptr<T>(this->data_);
     }
 
-    thrust::device_ptr<T> begin(){
-      return thrust::device_ptr<T>(this->data_);
+    template <typename T> 
+    inline thrust::device_ptr<T> cuNDArray<T>::begin()
+    {
+        return thrust::device_ptr<T>(this->data_);
     }
 
-    thrust::device_ptr<T> end(){
-      return thrust::device_ptr<T>(this->data_)+this->get_number_of_elements();
+    template <typename T> 
+    inline thrust::device_ptr<T> cuNDArray<T>::end()
+    {
+        return thrust::device_ptr<T>(this->data_)+this->get_number_of_elements();
     }
 
-    T at( size_t idx ){
-      if( idx >= this->get_number_of_elements() ){
-        throw std::runtime_error("cuNDArray::at(): index out of range.");
-      }
-      T res;
-      CUDA_CALL(cudaMemcpy(&res, &this->get_data_ptr()[idx], sizeof(T), cudaMemcpyDeviceToHost));
-      return res;
+    template <typename T> 
+    inline T cuNDArray<T>::at( size_t idx )
+    {
+        if( idx >= this->get_number_of_elements() ){
+            throw std::runtime_error("cuNDArray::at(): index out of range.");
+        }
+        T res;
+        CUDA_CALL(cudaMemcpy(&res, &this->get_data_ptr()[idx], sizeof(T), cudaMemcpyDeviceToHost));
+        return res;
     }
 
-    T operator[]( size_t idx ){
-      if( idx >= this->get_number_of_elements() ){
-        throw std::runtime_error("cuNDArray::operator[]: index out of range.");
-      }
-      T res;
-      CUDA_CALL(cudaMemcpy(&res, &this->get_data_ptr()[idx], sizeof(T), cudaMemcpyDeviceToHost));
-      return res;
+    template <typename T> 
+    inline T cuNDArray<T>::operator[]( size_t idx )
+    {
+        if( idx >= this->get_number_of_elements() ){
+            throw std::runtime_error("cuNDArray::operator[]: index out of range.");
+        }
+        T res;
+        CUDA_CALL(cudaMemcpy(&res, &this->get_data_ptr()[idx], sizeof(T), cudaMemcpyDeviceToHost));
+        return res;
     }
 
-  protected:
-
-    int device_; 
-
-    virtual void allocate_memory()
+    template <typename T> 
+    void cuNDArray<T>::allocate_memory()
     {
-      deallocate_memory();
-      this->elements_ = 1;
+        deallocate_memory();
 
-      if (this->dimensions_->empty())
-        throw std::runtime_error("cuNDArray::allocate_memory() : dimensions is empty.");
-      
-      for (size_t i = 0; i < this->dimensions_->size(); i++) {
-        this->elements_ *= (*this->dimensions_)[i];
-      } 
-      
-      size_t size = this->elements_ * sizeof(T);
+        this->elements_ = 1;
+        if (this->dimensions_->empty())
+            throw std::runtime_error("cuNDArray::allocate_memory() : dimensions is empty.");
+        for (size_t i = 0; i < this->dimensions_->size(); i++) {
+            this->elements_ *= (*this->dimensions_)[i];
+        } 
 
-      int device_no_old;
-      if (cudaGetDevice(&device_no_old) != cudaSuccess) {
-        throw cuda_error("cuNDArray::allocate_memory: unable to get device no");
-      }
+        size_t size = this->elements_ * sizeof(T);
 
-      if (device_ != device_no_old) {
-        if (cudaSetDevice(device_) != cudaSuccess) {
-          throw cuda_error("cuNDArray::allocate_memory: unable to set device no");
+        int device_no_old;
+        if (cudaGetDevice(&device_no_old) != cudaSuccess) {
+            throw cuda_error("cuNDArray::allocate_memory: unable to get device no");
         }
-      }
 
-      if (cudaMalloc((void**) &this->data_,size) != cudaSuccess) {
-        size_t free = 0, total = 0;
-        cudaMemGetInfo(&free, &total);
-        std::stringstream err("cuNDArray::allocate_memory() : Error allocating CUDA memory");
-        err << "CUDA Memory: " << free << " (" << total << ")";
+        if (device_ != device_no_old) {
+            if (cudaSetDevice(device_) != cudaSuccess) {
+                throw cuda_error("cuNDArray::allocate_memory: unable to set device no");
+            }
+        }
 
-        err << "   memory requested: " << size << "( ";
-        for (size_t i = 0; i < this->dimensions_->size(); i++) {
-          std::cerr << (*this->dimensions_)[i] << " ";
+        if (cudaMalloc((void**) &this->data_,size) != cudaSuccess) {
+            size_t free = 0, total = 0;
+            cudaMemGetInfo(&free, &total);
+            std::stringstream err("cuNDArray::allocate_memory() : Error allocating CUDA memory");
+            err << "CUDA Memory: " << free << " (" << total << ")";
+
+            err << "   memory requested: " << size << "( ";
+            for (size_t i = 0; i < this->dimensions_->size(); i++) {
+                std::cerr << (*this->dimensions_)[i] << " ";
+            }
+            err << ")";
+            this->data_ = 0;
+            throw std::runtime_error(err.str());
         }
-        err << ")";
-        this->data_ = 0;
-        throw std::runtime_error(err.str());
-      }
 
-      if (device_ != device_no_old) {
-        if (cudaSetDevice(device_no_old) != cudaSuccess) {
-          throw cuda_error("cuNDArray::allocate_memory: unable to restore device no");
+        if (device_ != device_no_old) {
+            if (cudaSetDevice(device_no_old) != cudaSuccess) {
+                throw cuda_error("cuNDArray::allocate_memory: unable to restore device no");
+            }
         }
-      }
     }
 
-    virtual void deallocate_memory()
+    template <typename T> 
+    void cuNDArray<T>::deallocate_memory()
     {
-      if (this->data_) {
+        if (this->data_) {
 
-        int device_no_old;
-        CUDA_CALL(cudaGetDevice(&device_no_old));
-        if (device_ != device_no_old) {
-          CUDA_CALL(cudaSetDevice(device_));
-        }
+            int device_no_old;
+            CUDA_CALL(cudaGetDevice(&device_no_old));
+            if (device_ != device_no_old) {
+                CUDA_CALL(cudaSetDevice(device_));
+            }
 
-        CUDA_CALL(cudaFree(this->data_));
-        if (device_ != device_no_old) {
-          CUDA_CALL(cudaSetDevice(device_no_old));
+            CUDA_CALL(cudaFree(this->data_));
+            if (device_ != device_no_old) {
+                CUDA_CALL(cudaSetDevice(device_no_old));
+            }
+            this->data_ = 0;
         }
-        this->data_ = 0;
-      }
     }
-  };
+
+
+
 }
 
 #endif //CUNDARRAY_H
diff --git a/toolboxes/core/gpu/cuNDArray_blas.h b/toolboxes/core/gpu/cuNDArray_blas.h
index ed2cd6f..018786f 100644
--- a/toolboxes/core/gpu/cuNDArray_blas.h
+++ b/toolboxes/core/gpu/cuNDArray_blas.h
@@ -32,6 +32,7 @@ namespace Gadgetron{
    * @brief Gets the index of the index of the element with minimum absolute
    * @param x Input data
    * @return index of absolute minimum values
+   * @details Note that this returns the C-style index and NOT the Fortran index.
    */
   template<class T> EXPORTGPUCORE size_t amin( cuNDArray<T> *x );
   
diff --git a/toolboxes/core/gpu/cuNDArray_elemwise.cu b/toolboxes/core/gpu/cuNDArray_elemwise.cu
index 9456f3e..cd32347 100644
--- a/toolboxes/core/gpu/cuNDArray_elemwise.cu
+++ b/toolboxes/core/gpu/cuNDArray_elemwise.cu
@@ -278,6 +278,40 @@ Gadgetron::real_to_complex( cuNDArray<typename realType<T>::Type> *x )
   return result;
 }
 
+template <typename T,typename T2> struct cuNDA_convert_to : public thrust::unary_function<T,T2>
+{
+  __device__ T2 operator()(T &x) const {return T2(x);}
+};
+
+template <typename T,typename T2> struct cuNDA_convert_to<complext<T>,complext<T2> > : public thrust::unary_function<complext<T>,complext<T2> >
+{
+  __device__ complext<T2> operator()(complext<T> &x) const {return complext<T2>(x.vec[0],x.vec[1]);}
+};
+
+template<class T, class T2> boost::shared_ptr< cuNDArray<T2> >
+Gadgetron::convert_to( cuNDArray<T> *x )
+{
+  if( x == 0x0 )
+    throw std::runtime_error("Gadgetron::convert_to(): Invalid input array");
+
+  boost::shared_ptr< cuNDArray<T2> > result(new cuNDArray<T2>());
+  result->create(x->get_dimensions());
+  thrust::device_ptr<T2> resPtr = result->get_device_ptr();
+  thrust::device_ptr<T> xPtr = x->get_device_ptr();
+  thrust::transform(xPtr,xPtr+x->get_number_of_elements(),resPtr,cuNDA_convert_to<T,T2>());
+  return result;
+}
+
+template<class T, class T2> void
+Gadgetron::convert_to( cuNDArray<T> *x ,cuNDArray<T2> * y)
+{
+  if( x == 0x0 || !x->dimensions_equal(y))
+    throw std::runtime_error("Gadgetron::convert_to(): Invalid input array");
+  thrust::device_ptr<T2> resPtr = y->get_device_ptr();
+  thrust::device_ptr<T> xPtr = x->get_device_ptr();
+  thrust::transform(xPtr,xPtr+x->get_number_of_elements(),resPtr,cuNDA_convert_to<T,T2>());
+}
+
 template<class T> void Gadgetron::clear( cuNDArray<T> *x )
 {
   if( x == 0x0 )
@@ -529,6 +563,7 @@ template EXPORTGPUCORE void Gadgetron::sgn_inplace<float>( cuNDArray<float>* );
 template EXPORTGPUCORE void Gadgetron::clear<float>( cuNDArray<float>* );
 template EXPORTGPUCORE void Gadgetron::fill<float>( cuNDArray<float>*, float );
 template EXPORTGPUCORE void Gadgetron::clamp<float>( cuNDArray<float>*, float, float );
+template EXPORTGPUCORE void Gadgetron::clamp<float>( cuNDArray<float>*, float, float, float,float );
 template EXPORTGPUCORE void Gadgetron::clamp_min<float>( cuNDArray<float>*, float );
 template EXPORTGPUCORE void Gadgetron::clamp_max<float>( cuNDArray<float>*, float );
 template EXPORTGPUCORE void Gadgetron::normalize<float>( cuNDArray<float>*, float );
@@ -553,6 +588,7 @@ template EXPORTGPUCORE void Gadgetron::sgn_inplace<double>( cuNDArray<double>* )
 template EXPORTGPUCORE void Gadgetron::clear<double>( cuNDArray<double>* );
 template EXPORTGPUCORE void Gadgetron::fill<double>( cuNDArray<double>*, double );
 template EXPORTGPUCORE void Gadgetron::clamp<double>( cuNDArray<double>*, double, double );
+template EXPORTGPUCORE void Gadgetron::clamp<double>( cuNDArray<double>*, double, double, double, double );
 template EXPORTGPUCORE void Gadgetron::clamp_min<double>( cuNDArray<double>*, double );
 template EXPORTGPUCORE void Gadgetron::clamp_max<double>( cuNDArray<double>*, double );
 template EXPORTGPUCORE void Gadgetron::normalize<double>( cuNDArray<double>*, double );
@@ -654,3 +690,13 @@ template EXPORTGPUCORE boost::shared_ptr< cuNDArray<double> > Gadgetron::real<do
 template EXPORTGPUCORE boost::shared_ptr< cuNDArray<double> > Gadgetron::imag<double_complext>( cuNDArray<double_complext>* );
 template EXPORTGPUCORE boost::shared_ptr< cuNDArray<double_complext> > Gadgetron::conj<double_complext>( cuNDArray<double_complext>* );
 template EXPORTGPUCORE boost::shared_ptr< cuNDArray<double_complext> > Gadgetron::real_to_complex<double_complext>( cuNDArray<double>* );
+
+template boost::shared_ptr< cuNDArray<double> > Gadgetron::convert_to<float,double>( cuNDArray<float>* );
+template boost::shared_ptr< cuNDArray<float> > Gadgetron::convert_to<double,float>( cuNDArray<double>* );
+template boost::shared_ptr< cuNDArray<double_complext> > Gadgetron::convert_to<float_complext,double_complext>( cuNDArray<float_complext>* );
+template boost::shared_ptr< cuNDArray<float_complext> > Gadgetron::convert_to<double_complext,float_complext>( cuNDArray<double_complext>* );
+
+template void Gadgetron::convert_to<float,double>( cuNDArray<float>*,cuNDArray<double>* );
+template void Gadgetron::convert_to<double,float>( cuNDArray<double>*, cuNDArray<float>* );
+template void Gadgetron::convert_to<float_complext,double_complext>( cuNDArray<float_complext>*,cuNDArray<double_complext>*  );
+template void Gadgetron::convert_to<double_complext,float_complext>( cuNDArray<double_complext>*, cuNDArray<float_complext>*);
diff --git a/toolboxes/core/gpu/cuNDArray_elemwise.h b/toolboxes/core/gpu/cuNDArray_elemwise.h
index 772b408..6be2c9e 100644
--- a/toolboxes/core/gpu/cuNDArray_elemwise.h
+++ b/toolboxes/core/gpu/cuNDArray_elemwise.h
@@ -146,12 +146,26 @@ namespace Gadgetron{
    */
   template<class T> EXPORTGPUCORE boost::shared_ptr< cuNDArray<T> > real_to_complex( cuNDArray<typename realType<T>::Type> *x );
   
+  /**
+   * Converts array from type T to type T2
+   * @param[in] x Input array
+   * @return A copy of x with the type T2
+   */
+  template<class T,class T2> EXPORTGPUCORE boost::shared_ptr< cuNDArray<T2> > convert_to( cuNDArray<T> *x );
+
+  /**
+   * Converts array from type T to type T2. Input and output array must be same size.
+   * @param[in] x Input array
+   * @param[out] y Output array, will contain a copy of x with type T2
+   */
+  template<class T,class T2> EXPORTGPUCORE void convert_to( cuNDArray<T> *x, cuNDArray<T2> *y );
+
   //
   // From hereon the functions are all in-place although without the _inplace suffix...
   //
 
   /**
-   * @brief Clears the array to all zeros ( in place). Faster than fill.
+   * @brief Clears the array to all zeros (in place). Faster than fill.
    * @param[in,out] x Input and output array.
    */
   template<class T> EXPORTGPUCORE void clear( cuNDArray<T> *x );
diff --git a/toolboxes/core/gpu/cuNDArray_fileio.h b/toolboxes/core/gpu/cuNDArray_fileio.h
new file mode 100644
index 0000000..d6c5e36
--- /dev/null
+++ b/toolboxes/core/gpu/cuNDArray_fileio.h
@@ -0,0 +1,9 @@
+#pragma once
+
+namespace Gadgetron{
+template<class T> void write_nd_array(cuNDArray<T>* array, std::string s){
+	write_nd_array(array->to_host().get(),s.c_str());
+}
+
+
+}
diff --git a/toolboxes/core/gpu/cuNDArray_math.h b/toolboxes/core/gpu/cuNDArray_math.h
index 3141f5f..bb8e90d 100644
--- a/toolboxes/core/gpu/cuNDArray_math.h
+++ b/toolboxes/core/gpu/cuNDArray_math.h
@@ -2,5 +2,6 @@
 
 #include "cuNDArray_operators.h"
 #include "cuNDArray_elemwise.h"
+#include "cuNDArray_reductions.h"
 #include "cuNDArray_blas.h"
 #include "cuNDArray_utils.h"
diff --git a/toolboxes/core/gpu/cuNDArray_operators.cu b/toolboxes/core/gpu/cuNDArray_operators.cu
index b84cf28..243ef3f 100644
--- a/toolboxes/core/gpu/cuNDArray_operators.cu
+++ b/toolboxes/core/gpu/cuNDArray_operators.cu
@@ -34,7 +34,7 @@ namespace Gadgetron{
   // This transform support batch mode when the number of elements in x is a multiple of the number of elements in y
   //
   template<class T,class S,class F>  
-  void equals_transform(cuNDArray<T> &x, cuNDArray<S> &y){
+  static void equals_transform(cuNDArray<T> &x, cuNDArray<S> &y){
     if (x.dimensions_equal(&y)){
       thrust::transform(x.begin(), x.end(), y.begin(), x.begin(), F());
     } else if (compatible_dimensions(x,y))
@@ -72,89 +72,89 @@ namespace Gadgetron{
     __device__ complext<T> operator()(const complext<T> &x, const T &y) const {return x/y;}
   };
 
-  template<class T> cuNDArray<T>& operator+= (cuNDArray<T> &x, cuNDArray<T> &y){
+  template<class T> cuNDArray<typename boost::enable_if<enable_operators<T>, T >::type > & operator+= (cuNDArray<T> &x, cuNDArray<T> &y){
     equals_transform< T,T,thrust::plus<T> >(x,y);
     return x;
   }
 
-  template<class T> cuNDArray<T>& operator+= (cuNDArray<T> &x , T y){
+  template<class T> cuNDArray<typename boost::enable_if<enable_operators<T>, T >::type > & operator+= (cuNDArray<T> &x , T y){
     thrust::constant_iterator<T> iter(y);
     thrust::transform(x.begin(), x.end(), iter, x.begin(), thrust::plus<T>());
     return x;
   }
 
-  template<class T> cuNDArray< complext<T> >& operator+= (cuNDArray< complext<T> > &x , cuNDArray<T> &y){
+  template<class T> cuNDArray<complext<typename boost::enable_if<enable_operators<T>, T >::type > >& operator+= (cuNDArray< complext<T> > &x , cuNDArray<T> &y){
     equals_transform< complext<T>,T,cuNDA_plus<T> >(x,y);
     return x;
   }
 
-  template<class T> cuNDArray< complext<T> >& operator+= (cuNDArray<complext<T> > &x , T y){
+  template<class T> cuNDArray<complext<typename boost::enable_if<enable_operators<T>, T >::type > >& operator+= (cuNDArray<complext<T> > &x , T y){
     thrust::constant_iterator<T> iter(y);
     thrust::transform(x.begin(), x.end(), iter, x.begin(), cuNDA_plus<T>());
     return x;
   }
 
-  template<class T> cuNDArray<T>& operator-= (cuNDArray<T> & x , cuNDArray<T> & y){
+  template<class T> cuNDArray<typename boost::enable_if<enable_operators<T>, T >::type >& operator-= (cuNDArray<T> & x , cuNDArray<T> & y){
     equals_transform< T,T,thrust::minus<T> >(x,y);
     return x;
   }
 
-  template<class T> cuNDArray<T>& operator-= (cuNDArray<T> &x , T y){
+  template<class T> cuNDArray<typename boost::enable_if<enable_operators<T>, T >::type >& operator-= (cuNDArray<T> &x , T y){
     thrust::constant_iterator<T> iter(y);
     thrust::transform(x.begin(), x.end(), iter, x.begin(), thrust::minus<T>());
     return x;
   }
 
-  template<class T> cuNDArray< complext<T> >& operator-= (cuNDArray< complext<T> > &x , cuNDArray<T> &y){
+  template<class T> cuNDArray<complext<typename boost::enable_if<enable_operators<T>, T >::type > >& operator-= (cuNDArray< complext<T> > &x , cuNDArray<T> &y){
     equals_transform< complext<T>,T,cuNDA_minus<T> >(x,y);
     return x;
   }
 
-  template<class T> cuNDArray< complext<T> >& operator-= (cuNDArray<complext<T> > &x , T y){
+  template<class T> cuNDArray<complext<typename boost::enable_if<enable_operators<T>, T >::type > >& operator-= (cuNDArray<complext<T> > &x , T y){
     thrust::constant_iterator<T> iter(y);
     thrust::transform(x.begin(), x.end(), iter, x.begin(), cuNDA_minus<T>());
     return x;
   }
 
-  template<class T> cuNDArray<T>& operator*= (cuNDArray<T> &x , cuNDArray<T> &y){
+  template<class T> cuNDArray<typename boost::enable_if<enable_operators<T>, T >::type >& operator*= (cuNDArray<T> &x , cuNDArray<T> &y){
     equals_transform< T,T,thrust::multiplies<T> >(x,y);
     return x;
   }
 
-  template<class T> cuNDArray<T>& operator*= (cuNDArray<T> &x , T y){
+  template<class T> cuNDArray<typename boost::enable_if<enable_operators<T>, T >::type >& operator*= (cuNDArray<T> &x , T y){
     thrust::constant_iterator<T> iter(y);
     thrust::transform(x.begin(), x.end(), iter, x.begin(), thrust::multiplies<T>());
     return x;
   }
 
-  template<class T> cuNDArray< complext<T> >& operator*= (cuNDArray< complext<T> > &x , cuNDArray<T> &y){
+  template<class T> cuNDArray<complext<typename boost::enable_if<enable_operators<T>, T >::type > >& operator*= (cuNDArray< complext<T> > &x , cuNDArray<T> &y){
     equals_transform< complext<T>,T,cuNDA_multiply<T> >(x,y);
     return x;
   }
 
-  template<class T> cuNDArray< complext<T> >& operator*= (cuNDArray<complext<T> > &x , T y){
+  template<class T> cuNDArray<complext<typename boost::enable_if<enable_operators<T>, T >::type > >& operator*= (cuNDArray<complext<T> > &x , T y){
     thrust::constant_iterator<T> iter(y);
     thrust::transform(x.begin(), x.end(), iter, x.begin(), cuNDA_multiply<T>());
     return x;
   }
 
-  template<class T> cuNDArray<T>& operator/= (cuNDArray<T> &x , cuNDArray<T> &y){
+  template<class T> cuNDArray<typename boost::enable_if<enable_operators<T>, T >::type >& operator/= (cuNDArray<T> &x , cuNDArray<T> &y){
     equals_transform< T,T,thrust::divides<T> >(x,y);
     return x;
   }
 
-  template<class T> cuNDArray<T>& operator/= (cuNDArray<T> &x , T y){
+  template<class T> cuNDArray<typename boost::enable_if<enable_operators<T>, T >::type >& operator/= (cuNDArray<T> &x , T y){
     thrust::constant_iterator<T> iter(y);
     thrust::transform(x.begin(), x.end(), iter, x.begin(), thrust::divides<T>());
     return x;
   }
 
-  template<class T> cuNDArray< complext<T> >& operator/= (cuNDArray< complext<T> > &x , cuNDArray<T> &y){
+  template<class T> cuNDArray<complext<typename boost::enable_if<enable_operators<T>, T >::type > >& operator/= (cuNDArray< complext<T> > &x , cuNDArray<T> &y){
     equals_transform< complext<T>,T,cuNDA_divide<T> >(x,y);
     return x;
   }
 
-  template<class T> cuNDArray< complext<T> >& operator/= (cuNDArray<complext<T> > &x , T y){
+  template<class T> cuNDArray<complext<typename boost::enable_if<enable_operators<T>, T >::type > >& operator/= (cuNDArray<complext<T> > &x , T y){
     thrust::constant_iterator<T> iter(y);
     thrust::transform(x.begin(), x.end(), iter, x.begin(), cuNDA_divide<T>());
     return x;
diff --git a/toolboxes/core/gpu/cuNDArray_operators.h b/toolboxes/core/gpu/cuNDArray_operators.h
index 356c1a0..fb486a9 100644
--- a/toolboxes/core/gpu/cuNDArray_operators.h
+++ b/toolboxes/core/gpu/cuNDArray_operators.h
@@ -18,6 +18,7 @@
 
 #include "cuNDArray.h"
 #include "gpucore_export.h"
+#include "Gadgetron_enable_types.h"
 
 namespace Gadgetron {
 
@@ -30,14 +31,14 @@ namespace Gadgetron {
    * Then the sizes of the first n array dimensions must match between x and y.
    * If x contains further dimensions the operator is batched across those dimensions.
    */
-  template<class T> EXPORTGPUCORE cuNDArray<T>& operator+= (cuNDArray<T> &x, cuNDArray<T> &y);
+  template<class T> EXPORTGPUCORE cuNDArray<typename boost::enable_if<enable_operators<T>, T >::type > & operator+= (cuNDArray<T> &x, cuNDArray<T> &y);
   
   /**
    * @brief Implementation of element-wise operator+= on a cuNDArray with a scalar value.
    * @param[in,out] x Input and output array.
    * @param[in] y Input scalar.
    */
-  template<class T> EXPORTGPUCORE cuNDArray<T>& operator+= (cuNDArray<T> &x, T y );
+  template<class T> EXPORTGPUCORE cuNDArray<typename boost::enable_if<enable_operators<T>, T >::type > & operator+= (cuNDArray<T> &x, T y );
     
   /**
    * @brief Implementation of element-wise operator+= on two cuNDArrays.
@@ -48,14 +49,14 @@ namespace Gadgetron {
    * Then the sizes of the first n array dimensions must match between x and y.
    * If x contains further dimensions the operator is batched across those dimensions.
    */
-  template<class T> EXPORTGPUCORE cuNDArray< complext<T> >& operator+= (cuNDArray<complext<T> > &x, cuNDArray<T> &y);
+  template<class T> EXPORTGPUCORE cuNDArray<complext<typename boost::enable_if<enable_operators<T>, T >::type > > & operator+= (cuNDArray<complext<T> > &x, cuNDArray<T> &y);
 
   /**
    * @brief Implementation of element-wise operator+= on a cuNDArray with a scalar value.
    * @param[in,out] x Input and output array.
    * @param[in] y Input scalar.
    */
-  template<class T> EXPORTGPUCORE cuNDArray< complext<T> >& operator+= (cuNDArray<complext<T> > &x, T y );
+  template<class T> EXPORTGPUCORE cuNDArray<complext< typename boost::enable_if<enable_operators<T>, T >::type > > & operator+= (cuNDArray<complext<T> > &x, T y );
 
   /**
    * @brief Implementation of element-wise operator-= on two cuNDArrays.
@@ -66,14 +67,14 @@ namespace Gadgetron {
    * Then the sizes of the first n array dimensions must match between x and y.
    * If x contains further dimensions the operator is batched across those dimensions.
    */
-  template<class T> EXPORTGPUCORE cuNDArray<T>& operator-= (cuNDArray<T> &x, cuNDArray<T> &y);
+  template<class T> EXPORTGPUCORE cuNDArray<typename boost::enable_if<enable_operators<T>, T >::type > & operator-= (cuNDArray<T> &x, cuNDArray<T> &y);
   
   /**
    * @brief Implementation of element-wise operator-= on a cuNDArray with a scalar value.
    * @param[in,out] x Input and output array.
    * @param[in] y Input scalar.
    */
-  template<class T> EXPORTGPUCORE cuNDArray<T>& operator-= (cuNDArray<T> &x, T y );
+  template<class T> EXPORTGPUCORE cuNDArray<typename boost::enable_if<enable_operators<T>, T >::type > & operator-= (cuNDArray<T> &x, T y );
     
   /**
    * @brief Implementation of element-wise operator-= on two cuNDArrays.
@@ -84,14 +85,14 @@ namespace Gadgetron {
    * Then the sizes of the first n array dimensions must match between x and y.
    * If x contains further dimensions the operator is batched across those dimensions.
    */
-  template<class T> EXPORTGPUCORE cuNDArray< complext<T> >& operator-= (cuNDArray<complext<T> > &x, cuNDArray<T> &y);
+  template<class T> EXPORTGPUCORE cuNDArray<complext< typename boost::enable_if<enable_operators<T>, T >::type > > & operator-= (cuNDArray<complext<T> > &x, cuNDArray<T> &y);
 
   /**
    * @brief Implementation of element-wise operator-= on a cuNDArray with a scalar value.
    * @param[in,out] x Input and output array.
    * @param[in] y Input scalar.
    */
-  template<class T> EXPORTGPUCORE cuNDArray< complext<T> >& operator-= (cuNDArray<complext<T> > &x, T y );
+  template<class T> EXPORTGPUCORE cuNDArray<complext< typename boost::enable_if<enable_operators<T>, T >::type > > & operator-= (cuNDArray<complext<T> > &x, T y );
 
   /**
    * @brief Implementation of element-wise operator*= on two cuNDArrays.
@@ -102,14 +103,14 @@ namespace Gadgetron {
    * Then the sizes of the first n array dimensions must match between x and y.
    * If x contains further dimensions the operator is batched across those dimensions.
    */
-  template<class T> EXPORTGPUCORE cuNDArray<T>& operator*= (cuNDArray<T> &x, cuNDArray<T> &y);
+  template<class T> EXPORTGPUCORE cuNDArray< typename boost::enable_if<enable_operators<T>, T >::type >  & operator*= (cuNDArray<T> &x, cuNDArray<T> &y);
   
   /**
    * @brief Implementation of element-wise operator*= on a cuNDArray with a scalar value.
    * @param[in,out] x Input and output array.
    * @param[in] y Input scalar.
    */
-  template<class T> EXPORTGPUCORE cuNDArray<T>& operator*= (cuNDArray<T> &x, T y );
+  template<class T> EXPORTGPUCORE cuNDArray< typename boost::enable_if<enable_operators<T>, T >::type >  & operator*= (cuNDArray<T> &x, T y );
     
   /**
    * @brief Implementation of element-wise operator*= on two cuNDArrays.
@@ -120,14 +121,14 @@ namespace Gadgetron {
    * Then the sizes of the first n array dimensions must match between x and y.
    * If x contains further dimensions the operator is batched across those dimensions.
    */
-  template<class T> EXPORTGPUCORE cuNDArray< complext<T> >& operator*= (cuNDArray<complext<T> > &x, cuNDArray<T> &y);
+  template<class T> EXPORTGPUCORE cuNDArray<complext< typename boost::enable_if<enable_operators<T>, T >::type > > & operator*= (cuNDArray<complext<T> > &x, cuNDArray<T> &y);
 
   /**
    * @brief Implementation of element-wise operator*= on a cuNDArray with a scalar value.
    * @param[in,out] x Input and output array.
    * @param[in] y Input scalar.
    */
-  template<class T> EXPORTGPUCORE cuNDArray< complext<T> >& operator*= (cuNDArray<complext<T> > &x, T y );
+  template<class T> EXPORTGPUCORE cuNDArray<complext< typename boost::enable_if<enable_operators<T>, T >::type > > & operator*= (cuNDArray<complext<T> > &x, T y );
 
   /**
    * @brief Implementation of element-wise operator/= on two cuNDArrays.
@@ -138,14 +139,14 @@ namespace Gadgetron {
    * Then the sizes of the first n array dimensions must match between x and y.
    * If x contains further dimensions the operator is batched across those dimensions.
    */
-  template<class T> EXPORTGPUCORE cuNDArray<T>& operator/= (cuNDArray<T> &x, cuNDArray<T> &y);
+  template<class T> EXPORTGPUCORE cuNDArray< typename boost::enable_if<enable_operators<T>, T >::type >  & operator/= (cuNDArray<T> &x, cuNDArray<T> &y);
   
   /**
    * @brief Implementation of element-wise operator/= on a cuNDArray with a scalar value.
    * @param[in,out] x Input and output array.
    * @param[in] y Input scalar.
    */
-  template<class T> EXPORTGPUCORE cuNDArray<T>& operator/= (cuNDArray<T> &x, T y );
+  template<class T> EXPORTGPUCORE cuNDArray< typename boost::enable_if<enable_operators<T>, T >::type > & operator/= (cuNDArray<T> &x, T y );
     
   /**
    * @brief Implementation of element-wise operator/= on two cuNDArrays.
@@ -156,12 +157,12 @@ namespace Gadgetron {
    * Then the sizes of the first n array dimensions must match between x and y.
    * If x contains further dimensions the operator is batched across those dimensions.
    */
-  template<class T> EXPORTGPUCORE cuNDArray< complext<T> >& operator/= (cuNDArray<complext<T> > &x, cuNDArray<T> &y);
+  template<class T> EXPORTGPUCORE cuNDArray<complext< typename boost::enable_if<enable_operators<T>, T >::type > >  & operator/= (cuNDArray<complext<T> > &x, cuNDArray<T> &y);
 
   /**
    * @brief Implementation of element-wise operator/= on a cuNDArray with a scalar value.
    * @param[in,out] x Input and output array.
    * @param[in] y Input scalar.
    */
-  template<class T> EXPORTGPUCORE cuNDArray< complext<T> >& operator/= (cuNDArray<complext<T> > &x, T y );
+  template<class T> EXPORTGPUCORE cuNDArray<complext< typename boost::enable_if<enable_operators<T>, T >::type > >  & operator/= (cuNDArray<complext<T> > &x, T y );
 }
diff --git a/toolboxes/core/gpu/cuNDArray_utils.cu b/toolboxes/core/gpu/cuNDArray_utils.cu
index 735dafb..3c097a1 100644
--- a/toolboxes/core/gpu/cuNDArray_utils.cu
+++ b/toolboxes/core/gpu/cuNDArray_utils.cu
@@ -9,10 +9,10 @@
 namespace Gadgetron {
 
   template <class T> 
-  __global__ void cuNDArray_permute_kernel( T* in, T* out, 
+  __global__ void cuNDArray_permute_kernel(const  T*  __restrict__ in, T* __restrict__ out,
                                             unsigned int ndim,
-                                            unsigned int* dims,
-                                            unsigned int* strides_out,
+                                            const unsigned int* __restrict__ dims,
+                                            const unsigned int* __restrict__ strides_out,
                                             unsigned int elements,
                                             int shift_mode)
   {
@@ -231,7 +231,7 @@ namespace Gadgetron {
   //
   template<class T> 
   __global__ void expand_kernel( 
-                                T *in, T *out, 
+                                const T * __restrict__ in, T * __restrict__ out,
                                 unsigned int number_of_elements_in, unsigned int number_of_elements_out, unsigned int new_dim_size )
   {
     const unsigned int idx = blockIdx.y*gridDim.x*blockDim.x + blockIdx.x*blockDim.x+threadIdx.x;    
@@ -269,7 +269,7 @@ namespace Gadgetron {
   // Crop
   template<class T, unsigned int D> __global__ void crop_kernel
   ( vector_td<unsigned int,D> offset, vector_td<unsigned int,D> matrix_size_in, vector_td<unsigned int,D> matrix_size_out,
-    T *in, T *out, unsigned int num_batches, unsigned int num_elements )
+    const T * __restrict__ in, T * __restrict__ out, unsigned int num_batches, unsigned int num_elements )
   {
     const unsigned int idx = blockIdx.y*gridDim.x*blockDim.x + blockIdx.x*blockDim.x+threadIdx.x;
     const unsigned int frame_offset = idx/num_elements;
@@ -342,7 +342,7 @@ namespace Gadgetron {
   // Expand and zero fill
   template<class T, unsigned int D> 
   __global__ void pad_kernel( vector_td<unsigned int,D> matrix_size_in, vector_td<unsigned int,D> matrix_size_out,
-                              T *in, T *out, unsigned int number_of_batches, unsigned int num_elements, T val )
+                              const T * __restrict__ in, T * __restrict__ out, unsigned int number_of_batches, unsigned int num_elements, T val )
   {
     const unsigned int idx = blockIdx.y*gridDim.x*blockDim.x + blockIdx.x*blockDim.x+threadIdx.x;
     const unsigned int frame_offset = idx/num_elements;
@@ -512,8 +512,8 @@ namespace Gadgetron {
   upsample_kernel( typename uintd<D>::Type matrix_size_in,
                    typename uintd<D>::Type matrix_size_out,
                    unsigned int num_batches,
-                   T *image_in,
-                   T *image_out )
+                   const T * __restrict__ image_in,
+                   T * __restrict__ image_out )
   {
     typedef typename realType<T>::Type REAL;
     
@@ -606,8 +606,8 @@ namespace Gadgetron {
   downsample_kernel( typename intd<D>::Type matrix_size_in,
                      typename intd<D>::Type matrix_size_out,
                      int num_batches,
-                     T *image_in,
-                     T *image_out )
+                     const T * __restrict__ image_in,
+                     T * __restrict__ image_out )
   {
     typedef typename realType<T>::Type REAL;
     
diff --git a/toolboxes/core/gpu/cuNDArray_utils.h b/toolboxes/core/gpu/cuNDArray_utils.h
index ebe49e9..84c001f 100644
--- a/toolboxes/core/gpu/cuNDArray_utils.h
+++ b/toolboxes/core/gpu/cuNDArray_utils.h
@@ -1,3 +1,6 @@
+/**
+ * @file cuNDArray_utils.h
+ */
 #pragma once
 
 #include "cuNDArray.h"
@@ -6,55 +9,120 @@
 
 namespace Gadgetron{
 
-  template<class T> EXPORTGPUCORE boost::shared_ptr< cuNDArray<T> >
-  shift_dim( cuNDArray<T> *in, int shift );
-
-  template<class T> EXPORTGPUCORE void
-  shift_dim( cuNDArray<T> *in, cuNDArray<T> *out, int shift );
-  
-  template<class T> EXPORTGPUCORE boost::shared_ptr< cuNDArray<T> >
-  permute( cuNDArray<T> *in, std::vector<size_t> *dim_order, int shift_mode = 0 );
-  
-  template<class T> EXPORTGPUCORE void
-  permute( cuNDArray<T> *in, cuNDArray<T> *out, std::vector<size_t> *dim_order, int shift_mode = 0 );
-
-  template<class T, unsigned int D> EXPORTGPUCORE boost::shared_ptr< cuNDArray<T> >
-  crop( typename uint64d<D>::Type crop_offset, typename uint64d<D>::Type crop_size, cuNDArray<T> *in );
-
-  template<class T, unsigned int D> EXPORTGPUCORE
-  void crop( typename uint64d<D>::Type crop_offset, cuNDArray<T> *in, cuNDArray<T> *out );
-  
-  template<class T, unsigned int D> EXPORTGPUCORE boost::shared_ptr< cuNDArray<T> >
-  pad( typename uint64d<D>::Type size, cuNDArray<T> *in, T val = T(0) );
-
-  template<class T, unsigned int D> EXPORTGPUCORE
-  void pad( cuNDArray<T> *in, cuNDArray<T> *out, T val = T(0) );
-  
-  template<class T, unsigned int D> EXPORTGPUCORE
-  void fill_border( typename uint64d<D>::Type matrix_size, cuNDArray<T> *image, T val = T(0) );
-
-  /***
-   * @brief Fills the image with a given value outside a radius from the center
-   * @param radius
-   * @param in_out
-   * @param val
-   */
-  template<class T, unsigned int D>
-  void fill_border( typename realType<T>::Type radius, cuNDArray<T> *in_out, T val= T(0) );
-
-  // Expand array to new dimension
-  template<class T> EXPORTGPUCORE boost::shared_ptr<cuNDArray<T> > 
-  expand(cuNDArray<T> *data, size_t added_dim_size );
-  
-  template<class T, unsigned int D> EXPORTGPUCORE 
-  boost::shared_ptr< cuNDArray<T> > upsample( cuNDArray<T>* in );
-
-  template<class T, unsigned int D> EXPORTGPUCORE
-  void upsample( cuNDArray<T> *in, cuNDArray<T> *out );
-
-  template<class T, unsigned int D> EXPORTGPUCORE 
-  boost::shared_ptr< cuNDArray<T> > downsample( cuNDArray<T>* in );
-
-  template<class T, unsigned int D> EXPORTGPUCORE
-  void downsample( cuNDArray<T> *in, cuNDArray<T> *out );
+/**
+ * @brief Cyclicly shifts the order of the array dimensions
+ */
+template<class T> EXPORTGPUCORE boost::shared_ptr< cuNDArray<T> >
+shift_dim( cuNDArray<T> *in, int shift );
+/**
+ * @brief Cyclicly shifts the order of the array dimensions
+ */
+template<class T> EXPORTGPUCORE void
+shift_dim( cuNDArray<T> *in, cuNDArray<T> *out, int shift );
+
+/**
+ * @brief Permutes the array dimensions following the specified dimension order
+ */
+template<class T> EXPORTGPUCORE boost::shared_ptr< cuNDArray<T> >
+permute( cuNDArray<T> *in, std::vector<size_t> *dim_order, int shift_mode = 0 );
+
+/**
+ * @brief Permutes the array dimensions following the specified dimension order
+ */
+template<class T> EXPORTGPUCORE void
+permute( cuNDArray<T> *in, cuNDArray<T> *out, std::vector<size_t> *dim_order, int shift_mode = 0 );
+
+/**
+ * @brief Creates a cropped version of the array
+ * @param[in] crop_offset Offset of the corner of the crop size
+ * @param[in] crop_size Size of the output array
+ * @param[in] in Array to crop
+ */
+template<class T, unsigned int D> EXPORTGPUCORE boost::shared_ptr< cuNDArray<T> >
+crop( typename uint64d<D>::Type crop_offset, typename uint64d<D>::Type crop_size, cuNDArray<T> *in );
+
+/**
+ * @brief Creates a cropped version of the array
+ * @param[in] crop_offset Offset of the corner of the crop size
+ * @param[in] in Array to crop
+ * @param[out] out Array into which the cropped array is placed
+ */
+template<class T, unsigned int D> EXPORTGPUCORE
+void crop( typename uint64d<D>::Type crop_offset, cuNDArray<T> *in, cuNDArray<T> *out );
+
+/**
+ * @brief Creates a padded version of the array
+  * @param[in] size Size of the output array
+ * @param[in] in Array to pad
+ * @param[in] val Numerical value of the padding
+ */
+template<class T, unsigned int D> EXPORTGPUCORE boost::shared_ptr< cuNDArray<T> >
+pad( typename uint64d<D>::Type size, cuNDArray<T> *in, T val = T(0) );
+
+
+/**
+ * @brief Creates a padded version of the array
+ * @param[in] in Array to pad
+ * @param[in] out Output array
+ * @param[in] val Numerical value of the padding
+ */
+template<class T, unsigned int D> EXPORTGPUCORE
+void pad( cuNDArray<T> *in, cuNDArray<T> *out, T val = T(0) );
+
+/**
+ * @brief Fills the image with a given value outside a box
+ * @param[in] matrix_size Box size
+ * @param[in,out] image Array to fill
+ * @param[in] val Fill value
+ */
+template<class T, unsigned int D> EXPORTGPUCORE
+void fill_border( typename uint64d<D>::Type matrix_size, cuNDArray<T> *image, T val = T(0) );
+
+/**
+ * @brief Fills the image with a given value outside a radius from the center
+ * @param[in] radius Radius of the circle
+ * @param[in,out] in_out Array to fill
+ * @param[in] val Fill value
+ */
+template<class T, unsigned int D>
+void fill_border( typename realType<T>::Type radius, cuNDArray<T> *in_out, T val= T(0) );
+
+// Expand array to new dimension
+/**
+ * @brief Creates a new array, expanded into an additional dimension
+ * @param[in] data Input data
+ * @param[in] added_dim_size Size of the new dimension
+ */
+template<class T> EXPORTGPUCORE boost::shared_ptr<cuNDArray<T> >
+expand(cuNDArray<T> *data, size_t added_dim_size );
+
+/**
+ * @brief Creates an array of 2 times the size, created via linear interpolation
+ * @param[in] in Array to upsample
+ */
+template<class T, unsigned int D> EXPORTGPUCORE
+boost::shared_ptr< cuNDArray<T> > upsample( cuNDArray<T>* in );
+
+/**
+ * @brief Creates an array of 2 times the size, created via linear interpolation
+ * @param[in] in Array to upsample
+ * @param[out] out Output array
+ */
+template<class T, unsigned int D> EXPORTGPUCORE
+void upsample( cuNDArray<T> *in, cuNDArray<T> *out );
+
+/**
+ * @brief Creates an array of half the size, created via linear interpolation
+ * @param[in] in Array to downsample
+ */
+template<class T, unsigned int D> EXPORTGPUCORE
+boost::shared_ptr< cuNDArray<T> > downsample( cuNDArray<T>* in );
+
+/**
+ * @brief Creates an array of half the size, created via linear interpolation
+ * @param[in] in Array to downsample
+ * @param[out] out Output Array
+ */
+template<class T, unsigned int D> EXPORTGPUCORE
+void downsample( cuNDArray<T> *in, cuNDArray<T> *out );
 }
diff --git a/toolboxes/core/gpu/cuNDFFT.cpp b/toolboxes/core/gpu/cuNDFFT.cpp
deleted file mode 100644
index c09b9dd..0000000
--- a/toolboxes/core/gpu/cuNDFFT.cpp
+++ /dev/null
@@ -1,156 +0,0 @@
-#include "cuNDFFT.h"
-#include "vector_td.h"
-#include "cuNDArray.h"
-#include "cuNDArray_utils.h"
-#include "cuNDArray_operators.h"
-
-#include <cufft.h>
-#include <cuComplex.h>
-#include <sstream>
-
-namespace Gadgetron{
-
-
-	template<class T> cuNDFFT<T>* cuNDFFT<T>::instance(){
-		if (!__instance)
-			__instance = new cuNDFFT<T>;
-		return __instance;
-	}
-
-	template<class T> cuNDFFT<T>* cuNDFFT<T>::__instance = NULL;
-  template<class T> cufftType_t get_transform_type();
-  template<> cufftType_t get_transform_type<float>() { return CUFFT_C2C; }
-  template<> cufftType_t get_transform_type<double>() { return CUFFT_Z2Z; }
-  
-  template<class T> cufftResult_t cuNDA_FFT_execute( cufftHandle plan, cuNDArray< complext<T> > *in_out, int direction );
-  
-  template<> cufftResult_t cuNDA_FFT_execute<float>( cufftHandle plan, cuNDArray<float_complext> *in_out, int direction ){
-    return cufftExecC2C(plan, (cuFloatComplex*)in_out->get_data_ptr(), (cuFloatComplex*)in_out->get_data_ptr(), direction); }
-
-  template<> cufftResult_t cuNDA_FFT_execute<double>( cufftHandle plan, cuNDArray<double_complext> *in_out, int direction ){
-    return cufftExecZ2Z(plan, (cuDoubleComplex*)in_out->get_data_ptr(), (cuDoubleComplex*)in_out->get_data_ptr(), direction); }
-  
-  template<class T> void
-  cuNDFFT<T>::fft_int( cuNDArray< complext<T> > *input, std::vector<size_t> *dims_to_transform, int direction, bool do_scale )
-  {
-    std::vector<size_t> new_dim_order;
-    std::vector<size_t> reverse_dim_order;
-    std::vector<size_t> dims;
-    std::vector<size_t> dim_count(input->get_number_of_dimensions(),0);
-    
-    size_t array_ndim = input->get_number_of_dimensions();
-    boost::shared_ptr< std::vector<size_t> > array_dims = input->get_dimensions();
-    
-    dims = std::vector<size_t>(dims_to_transform->size(),0);
-    for (size_t i = 0; i < dims_to_transform->size(); i++) {
-      if ((*dims_to_transform)[i] >= array_ndim) {
-    	std::stringstream ss;
-    	ss << "cuNDFFT::fft Invalid dimensions specified for transform " << (*dims_to_transform)[i] << "max " << array_ndim;
-	throw std::runtime_error(ss.str());;
-      }
-      if (dim_count[(*dims_to_transform)[i]] > 0) {
-	throw std::runtime_error("cuNDFFT::fft Invalid dimensions (duplicates) specified for transform");;
-      }
-      dim_count[(*dims_to_transform)[i]]++;
-      dims[dims_to_transform->size()-1-i] = (*array_dims)[(*dims_to_transform)[i]];
-    }
-    
-    new_dim_order = *dims_to_transform;
-    for (size_t i = 0; i < array_ndim; i++) {
-      if (!dim_count[i]) new_dim_order.push_back(i);
-    }
-    
-    reverse_dim_order = std::vector<size_t>(array_ndim,0);
-    for (size_t i = 0; i < array_ndim; i++) {
-      reverse_dim_order[new_dim_order[i]] = i;
-    }
-    
-    size_t ndim = dims.size();
-    size_t batches = 0;
-    size_t elements_in_ft = 1;
-    for (size_t i = 0; i < dims.size(); i++) 
-      elements_in_ft *= dims[i];
-    batches = input->get_number_of_elements() / elements_in_ft;
-    
-    cufftHandle plan;
-    cufftResult ftres;
-    
-    std::vector<int> int_dims;
-    for( unsigned int i=0; i<dims.size(); i++ )
-      int_dims.push_back((int)dims[i]);
-
-    ftres = cufftPlanMany(&plan,ndim,&int_dims[0], &int_dims[0], 1, elements_in_ft, &int_dims[0], 1, elements_in_ft, get_transform_type<T>(), batches);
-    if (ftres != CUFFT_SUCCESS) {
-      std::stringstream ss;
-      ss << "cuNDFFT FFT plan failed: " << ftres;
-      throw std::runtime_error(ss.str());;
-    }
-    
-    //IFFTSHIFT
-    *input = *permute(input,&new_dim_order,-1);
-    
-    if( cuNDA_FFT_execute<T>( plan, input, direction ) != CUFFT_SUCCESS ) {
-      throw std::runtime_error("cuNDFFT FFT execute failed");;
-    }
-    
-    ftres = cufftDestroy( plan );
-    if (ftres != CUFFT_SUCCESS) {
-      std::stringstream ss;
-      ss << "cuNDFFT FFT plan destroy failed: " << ftres;
-      throw std::runtime_error(ss.str());;
-    }
-    
-    if (do_scale) {
-      *input /= T(elements_in_ft);
-    }
-    
-    //FFTSHIFT 
-    *input = *permute(input,&reverse_dim_order,1);
-  }
-  
-  template<class T> void
-  cuNDFFT<T>::fft( cuNDArray< complext<T> > *input, std::vector<size_t> *dims_to_transform )
-  {
-    fft_int(input, dims_to_transform, CUFFT_FORWARD, false);
-  }
-  
-  template<class T> void
-  cuNDFFT<T>::ifft( cuNDArray< complext<T> > *input, std::vector<size_t> *dims_to_transform, bool do_scale )
-  {
-    fft_int(input, dims_to_transform, CUFFT_INVERSE, do_scale);
-  }
-  
-  template<class T> void
-  cuNDFFT<T>::fft( cuNDArray< complext<T> > *input, unsigned int dim_to_transform )
-  {
-    std::vector<size_t> dims(1,dim_to_transform);
-    fft_int(input, &dims, CUFFT_FORWARD, false);
-  }
-  
-  template<class T> void
-  cuNDFFT<T>::ifft( cuNDArray< complext<T> > *input, unsigned int dim_to_transform, bool do_scale )
-  {
-    std::vector<size_t> dims(1,dim_to_transform);
-    fft_int(input, &dims, CUFFT_INVERSE, do_scale);
-  }
-  
-  template<class T> void
-  cuNDFFT<T>::fft( cuNDArray< complext<T> > *input )
-  {
-    std::vector<size_t> dims(input->get_number_of_dimensions(),0);
-    for (size_t i = 0; i < dims.size(); i++) dims[i] = i;
-    fft_int(input, &dims, CUFFT_FORWARD, false);
-  }
-  
-  template<class T> void
-  cuNDFFT<T>::ifft( cuNDArray<complext<T> > *input, bool do_scale )
-  {
-    std::vector<size_t> dims(input->get_number_of_dimensions(),0);
-    for (size_t i = 0; i < dims.size(); i++) dims[i] = i;
-    fft_int(input, &dims, CUFFT_INVERSE, do_scale);
-  }
-  
-  // Instantiation
-  template class EXPORTGPUCORE cuNDFFT<float>;
-  template class EXPORTGPUCORE cuNDFFT<double>;
-}
diff --git a/toolboxes/core/gpu/cuNDFFT.h b/toolboxes/core/gpu/cuNDFFT.h
deleted file mode 100644
index 71a9bfd..0000000
--- a/toolboxes/core/gpu/cuNDFFT.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/** \file cuNDFFT.h
-    \brief Wrapper of the CUFFT library for ndarrays of type Gadgetron::complext.
- */
-
-#ifndef CUFFT_H
-#define CUFFT_H
-#pragma once
-
-#include "cuNDArray.h"
-#include "gpucore_export.h"
-
-namespace Gadgetron{
-
-/** \class cuNDFFT
-      \brief Wrapper of the CUFFT library for ndarrays of type complext.
-
-      Wrapper of the CUFFT library for ndarrays of type complext<REAL>.
-      The class' template type is a REAL, ie. float or double.
- */
-template<class T> class EXPORTGPUCORE cuNDFFT
-{
-
-public:
-
-
-
-	static cuNDFFT<T>* instance();
-
-
-	void fft ( cuNDArray<complext<T> > *input, std::vector<size_t> *dims_to_transform );
-	void ifft( cuNDArray<complext<T> > *input, std::vector<size_t> *dims_to_transform, bool do_scale = true );
-
-	void fft ( cuNDArray<complext<T> > *input, unsigned int dim_to_transform);
-	void ifft( cuNDArray<complext<T> > *input, unsigned int dim_to_transform, bool do_scale = true );
-
-	void fft ( cuNDArray<complext<T> > *input );
-	void ifft( cuNDArray<complext<T> > *input, bool do_scale = true );
-
-protected:
-	void fft_int( cuNDArray<complext<T> > *input, std::vector<size_t> *dims_to_transform, int direction, bool do_scale = true );
-
-	cuNDFFT() {}
-	virtual ~cuNDFFT() {}
-	static cuNDFFT<T>* __instance;
-
-};
-}
-
-#endif
diff --git a/toolboxes/core/gpu/hoCuNDArray_math.h b/toolboxes/core/gpu/hoCuNDArray_math.h
index 36ae858..671eb07 100644
--- a/toolboxes/core/gpu/hoCuNDArray_math.h
+++ b/toolboxes/core/gpu/hoCuNDArray_math.h
@@ -2,5 +2,4 @@
 
 #include "hoCuNDArray_blas.h"
 #include "hoCuNDArray_elemwise.h"
-#include "hoCuNDArray_operators.h"
 #include "hoCuNDArray_utils.h"
diff --git a/toolboxes/core/gpu/hoCuNDArray_operators.h b/toolboxes/core/gpu/hoCuNDArray_operators.h
deleted file mode 100644
index d2daa11..0000000
--- a/toolboxes/core/gpu/hoCuNDArray_operators.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/**
- * \file hoCuNDArray_operators.h
- * \brief Operators on the hoCuNDArray class. For now just delegates everything to hoNDArray operators.
- */
-
-#pragma once
-
-#include "hoNDArray_operators.h"
-
diff --git a/toolboxes/core/gpu/radial_utilities.cu b/toolboxes/core/gpu/radial_utilities.cu
index 945dce4..f113a85 100644
--- a/toolboxes/core/gpu/radial_utilities.cu
+++ b/toolboxes/core/gpu/radial_utilities.cu
@@ -149,10 +149,10 @@ namespace Gadgetron{
   compute_radial_neighbors( REAL sample_idx_on_profile, REAL angular_offset, REAL alpha, 
                             REAL one_over_radial_oversampling_factor, REAL one_over_num_profiles,
                             REAL bias, REAL samples_per_profile, REAL profile, REAL num_profiles,
-                            typename reald<REAL,2>::Type *p1, typename reald<REAL,2>::Type *p2, 
-                            typename reald<REAL,2>::Type *p3, typename reald<REAL,2>::Type *p4,
-                            typename reald<REAL,2>::Type *p5, typename reald<REAL,2>::Type *p6, 
-                            typename reald<REAL,2>::Type *p7, typename reald<REAL,2>::Type *p8  )
+                            typename reald<REAL,2>::Type * __restrict__ p1, typename reald<REAL,2>::Type * __restrict__ p2,
+                            typename reald<REAL,2>::Type * __restrict__ p3, typename reald<REAL,2>::Type * __restrict__ p4,
+                            typename reald<REAL,2>::Type * __restrict__ p5, typename reald<REAL,2>::Type * __restrict__ p6,
+                            typename reald<REAL,2>::Type * __restrict__ p7, typename reald<REAL,2>::Type * __restrict__ p8  )
   {
     // The sample positions (scales) can be either of the _local_ indices 'sample_idx_on_profile' or 'samples_per_projection'-'sample_idx_on_profile'
     // Beware of "skewness" around the origin, i.e. +1 sample one one side
diff --git a/toolboxes/core/vector_td.h b/toolboxes/core/vector_td.h
index 0205a85..4840df9 100644
--- a/toolboxes/core/vector_td.h
+++ b/toolboxes/core/vector_td.h
@@ -14,6 +14,12 @@
 
 #include "core_defines.h"
 
+#include <stdlib.h> // for size_t
+
+#ifdef max
+#undef max
+#endif // max
+
 namespace Gadgetron{
 
   template<class T, unsigned int D> class vector_td
@@ -21,7 +27,9 @@ namespace Gadgetron{
   public:
 
     T vec[D];
-     __inline__ __host__ __device__ vector_td(const vector_td & other){
+    __inline__ __host__ __device__ vector_td(){};
+
+    __inline__ __host__ __device__ vector_td(const vector_td & other){
        	for (unsigned int i = 0; i < D; i++)
            	vec[i] = other[i];
         }
diff --git a/toolboxes/core/vector_td_utilities.h b/toolboxes/core/vector_td_utilities.h
index 1380290..503efca 100644
--- a/toolboxes/core/vector_td_utilities.h
+++ b/toolboxes/core/vector_td_utilities.h
@@ -19,6 +19,7 @@
 #include <vector>
 #include <iostream>
 #include <algorithm>
+#include <cmath>
 
 #ifdef max
 #undef max
@@ -30,8 +31,8 @@
 
 #ifndef __CUDA_ARCH__ // workaround for nvcc
 using std::ceil;  
-using std::abs;   
 using std::floor; 
+using std::abs;   
 using std::sqrt;
 #endif
 
@@ -298,7 +299,7 @@ namespace Gadgetron{
   }
 
   template<class T, unsigned int D> inline
-  vector_td<T,D> from_std_vector( std::vector<T> _vector )
+  vector_td<T,D> from_std_vector( std::vector<T> &_vector )
   {
     vector_td<T,D> out;
     for( unsigned int i=0; i<D; i++ ){
diff --git a/toolboxes/ct/CMakeLists.txt b/toolboxes/ct/CMakeLists.txt
new file mode 100644
index 0000000..fdc9262
--- /dev/null
+++ b/toolboxes/ct/CMakeLists.txt
@@ -0,0 +1,3 @@
+IF(CUDA_FOUND)
+  add_subdirectory(xray)
+ENDIF(CUDA_FOUND)
diff --git a/toolboxes/ct/xray/CMakeLists.txt b/toolboxes/ct/xray/CMakeLists.txt
new file mode 100644
index 0000000..a4a2f34
--- /dev/null
+++ b/toolboxes/ct/xray/CMakeLists.txt
@@ -0,0 +1,3 @@
+if(CUDA_FOUND AND HDF5_FOUND)
+  add_subdirectory(gpu)
+endif()
diff --git a/toolboxes/ct/xray/gpu/CBCT_acquisition.h b/toolboxes/ct/xray/gpu/CBCT_acquisition.h
new file mode 100644
index 0000000..88d0278
--- /dev/null
+++ b/toolboxes/ct/xray/gpu/CBCT_acquisition.h
@@ -0,0 +1,298 @@
+/**
+ * Data format for CBCT acquisition (data and geometry)
+ **/
+
+#pragma once
+
+#include "vector_td_io.h"
+#include "hoNDArray.h"
+#include "hoNDArray_utils.h"
+
+#include <hdf5.h>
+#include <hdf5_hl.h>
+#include <vector>
+#include <sstream>
+#include <stdexcept>
+#include <boost/shared_ptr.hpp>
+
+namespace Gadgetron{
+
+class CBCT_geometry
+{
+public:
+
+	CBCT_geometry() {
+		SAD_ = 0.0f;
+		SDD_ = 0.0f;
+		FOV_ = floatd2(0.0f);
+	}
+
+	~CBCT_geometry() {}
+
+	inline void set_SAD( float SAD ) { SAD_ = SAD; }
+	inline float get_SAD() { return SAD_; }
+
+	inline void set_SDD( float SDD ) { SDD_ = SDD; }
+	inline float get_SDD() { return SDD_; }
+
+	inline void set_FOV( floatd2 v ) { FOV_ = v; }
+	inline floatd2 get_FOV() { return FOV_; }
+
+	inline void set_angles( std::vector<float> &angles ) { angles_ = angles; }
+	inline std::vector<float>& get_angles() { return angles_; }
+
+	inline void set_offsets( std::vector<floatd2> &offsets ) { offsets_ = offsets; }
+	inline std::vector<floatd2>& get_offsets() { return offsets_; }
+
+	// Basic output support
+	//
+
+	void print( std::ostream& os )
+	{
+		os << "------------ GEOMETRY ------------" << std::endl;
+		if (angles_.size() == 0)
+			os << "Angles: " << "EMPTY" << std::endl;
+		else {
+			os << "Angles: ";
+			os << "Angles: " << angles_.front() << " ... " << angles_.back()
+	  										 << ", number of angles: " << angles_.size() << std::endl;
+		}
+
+		if (offsets_.size() == 0)
+			os << "Offsets: " << "EMPTY" << std::endl;
+		else {
+			os << "Offsets: contains " << offsets_.size() << " elements" << std::endl;
+		}
+
+		os << "SDD: " << SDD_ << "mm" << std::endl;
+		os << "SAD: " << SAD_ << "mm" << std::endl;
+		os << "FOV: " << FOV_ << "mm" << std::endl;
+		os << "----------------------------------" << std::endl;
+	}
+
+	void save( hid_t file_id )
+	{
+		{
+			unsigned int dataformat_version=2;
+			hsize_t dims[1] = {1};
+			H5LTmake_dataset(file_id, "/geometry_dataformat_version", 1, dims, H5T_NATIVE_UINT, &dataformat_version);
+		}
+		{
+			hsize_t dims[1] = {1};
+			H5LTmake_dataset(file_id, "/SAD", 1, dims, H5T_NATIVE_FLOAT, &SAD_);
+		}
+		{
+			hsize_t dims[1] = {1};
+			H5LTmake_dataset(file_id, "/SDD", 1, dims, H5T_NATIVE_FLOAT, &SDD_);
+		}
+		{
+			hsize_t dims[1] = {2};
+			H5LTmake_dataset(file_id, "/FOV", 1, dims, H5T_NATIVE_FLOAT, &FOV_);
+		}
+		{
+			hsize_t dims[1] = {angles_.size()};
+			H5LTmake_dataset(file_id, "/angles", 1, dims, H5T_NATIVE_FLOAT, &angles_[0]);
+		}
+		{
+			std::vector<float> offsetx, offsety;
+			for( unsigned int i=0; i<offsets_.size(); i++ ){
+				floatd2 offset = offsets_[i];
+				offsetx.push_back(offset[0]);
+				offsety.push_back(offset[1]);
+			}
+			hsize_t dims[1] = {offsets_.size()};
+			H5LTmake_dataset(file_id, "/offsetx", 1, dims, H5T_NATIVE_FLOAT, &offsetx[0]);
+			H5LTmake_dataset(file_id, "/offsety", 1, dims, H5T_NATIVE_FLOAT, &offsety[0]);
+		}
+	}
+
+protected:
+
+	float SAD_;
+	float SDD_;
+	floatd2 FOV_;
+	std::vector<float> angles_;
+	std::vector<floatd2> offsets_;
+};
+
+class CBCT_acquisition {
+
+public:
+
+	CBCT_acquisition() {}
+
+	CBCT_acquisition( boost::shared_ptr< hoNDArray<float> > projections,
+			boost::shared_ptr<CBCT_geometry> geometry )
+	{
+		geometry_ = geometry;
+		projections_ = projections;
+	}
+
+	virtual ~CBCT_acquisition() {}
+
+	inline void set_geometry( boost::shared_ptr<CBCT_geometry> geometry ) {
+		geometry_ = geometry;
+	}
+
+	inline boost::shared_ptr<CBCT_geometry> get_geometry() {
+		return geometry_; }
+
+	inline void set_projections( boost::shared_ptr< hoNDArray<float> > projections ) {
+		projections_ = projections;
+	}
+
+	inline boost::shared_ptr< hoNDArray<float> > get_projections() {
+		return projections_;
+	}
+
+	void downsample( unsigned int num_downsamples )
+	{
+		for (int k = 0; k < num_downsamples; k++)
+			projections_ = Gadgetron::downsample<float,2>(projections_.get());
+	}
+
+	void load( std::string filename )
+	{
+		// Open hdf5 file
+		//
+
+		hid_t file_id = H5Fopen (filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
+
+		// Load geometry.
+		// This loader is for version 2 of the format
+		//
+
+		unsigned int geom_dataformat_version;
+		herr_t errCode;
+		errCode = H5LTread_dataset (file_id, "/geometry_dataformat_version", H5T_NATIVE_UINT, &geom_dataformat_version);
+
+		if (errCode < 0){
+			throw std::runtime_error("Error reading /geometry_dataformat_version from file.");
+		}
+		unsigned int needed = 2;
+		if (!(geom_dataformat_version == needed)) {
+			std::stringstream ss;
+			ss << "wrong geometry data version inside hdf5 file, found: "
+					<< geom_dataformat_version << ", needed: " << needed << std::endl;
+			throw std::runtime_error(ss.str());
+		}
+
+		// Allocate new geometry
+		//
+
+		geometry_ = boost::shared_ptr<CBCT_geometry>(new CBCT_geometry());
+
+		// Get angles array
+		//
+
+		hsize_t dim;
+		errCode = H5LTget_dataset_info(file_id,"/angles",&dim,NULL,NULL);
+		if (errCode < 0) 	throw std::runtime_error("Error getting /angles dataset info from file.");
+
+		std::vector<float> angles (dim,0.0f);
+		geometry_->set_angles(angles);
+		errCode=H5LTread_dataset (file_id, "/angles", H5T_NATIVE_FLOAT, &geometry_->get_angles()[0]);
+		if (errCode < 0) 	throw std::runtime_error("Error reading /angles from file.");
+
+		// Get offsets array
+		//
+
+		errCode=H5LTget_dataset_info(file_id,"/offsetx",&dim,NULL,NULL);
+		if (errCode < 0) 	throw std::runtime_error("Error getting /offsetx dataset info from file.");
+		std::vector<float> offsets_x = std::vector<float>(dim,0.0f);
+		errCode=H5LTread_dataset (file_id, "/offsetx", H5T_NATIVE_FLOAT, &offsets_x[0]);
+		if (errCode < 0) 	throw std::runtime_error("Error reading /offsetx from file.");
+
+		errCode=H5LTget_dataset_info(file_id,"/offsety",&dim,NULL,NULL);
+		if (errCode < 0) 	throw std::runtime_error("Error getting /offsety dataset info from file.");
+		std::vector<float> offsets_y = std::vector<float>(dim,0.0f);
+		errCode=H5LTread_dataset (file_id, "/offsety", H5T_NATIVE_FLOAT, &offsets_y[0]);
+		if (errCode < 0) 	throw std::runtime_error("Error reading /offsety from file.");
+
+		if( offsets_x.size() != offsets_y.size() ){
+			throw std::runtime_error("CBCT_geometry::load : x/y offset arrays has different lengths");
+		}
+
+		geometry_->get_offsets().clear();
+		for( unsigned int i=0; i<offsets_x.size(); i++ ){
+			geometry_->get_offsets().push_back(floatd2(offsets_x[i], offsets_y[i]));
+		}
+
+		// Test data format of the projections
+		//
+
+		unsigned int proj_dataformat_version;
+		errCode=H5LTread_dataset (file_id, "/projection_dataformat_version", H5T_NATIVE_UINT, &proj_dataformat_version);
+		if (errCode < 0) 	throw std::runtime_error("Error reading /projection_dataformat_version from file.");
+
+		needed = 1;
+		if (!(proj_dataformat_version == needed)) {
+			std::stringstream ss;
+			ss << "wrong projection data format version inside hdf5 file, found: "
+					<< proj_dataformat_version << ", needed: " << needed;
+			throw std::runtime_error(ss.str());
+		}
+
+		hsize_t vec_dim[3];
+		errCode=H5LTget_dataset_info(file_id,"/projections",vec_dim,NULL,NULL);
+		if (errCode < 0) 	throw std::runtime_error("Error getting /projections dataset info from file.");
+		std::vector<size_t> dims;
+		dims.push_back(vec_dim[2]);
+		dims.push_back(vec_dim[1]);
+		dims.push_back(vec_dim[0]);
+
+		projections_ = boost::shared_ptr<hoNDArray<float> >(new hoNDArray<float>(&dims));
+		errCode=H5LTread_dataset (file_id,"/projections", H5T_NATIVE_FLOAT, projections_->get_data_ptr());
+		if (errCode < 0) 	throw std::runtime_error("Error reading /projections from file.");
+
+		// Get SAD / SDD / FOV
+		//
+
+		float SAD, SDD;
+		floatd2 FOV;
+
+		errCode=H5LTread_dataset (file_id, "/SAD", H5T_NATIVE_FLOAT, &SAD);
+		if (errCode < 0) 	throw std::runtime_error("Error reading /SAD from file.");
+		errCode=H5LTread_dataset (file_id, "/SDD", H5T_NATIVE_FLOAT, &SDD);
+		if (errCode < 0) 	throw std::runtime_error("Error reading /SDD from file.");
+		errCode=H5LTread_dataset (file_id, "/FOV", H5T_NATIVE_FLOAT, &FOV);
+		if (errCode < 0){
+			floatd2 spacing;
+			errCode=H5LTread_dataset (file_id, "/spacing", H5T_NATIVE_FLOAT, &spacing);
+			FOV[0] = spacing[0]*dims[0];
+			FOV[1] = spacing[1]*dims[1];
+			if (errCode < 0) throw std::runtime_error("Error reading /FOV from file.");
+		}
+
+		geometry_->set_SAD(SAD);
+		geometry_->set_SDD(SDD);
+		geometry_->set_FOV(FOV);
+		H5Fclose (file_id);
+	}
+
+	void save( std::string filename )
+	{
+		hid_t file_id = H5Fcreate (filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
+
+		unsigned int dataformat_version=1;
+		hsize_t dims[1] = {1};
+		H5LTmake_dataset(file_id,"/projection_dataformat_version", 1, dims, H5T_NATIVE_UINT, &dataformat_version);
+
+		boost::shared_ptr<std::vector<size_t > > pdims = projections_->get_dimensions();
+		hsize_t *dims2 = new hsize_t[pdims->size()];
+		for (int i = 0; i < pdims->size(); i++)
+			dims2[i] = pdims->at(pdims->size()-i-1);
+		H5LTmake_dataset(file_id,"/projections", pdims->size(), dims2, H5T_NATIVE_FLOAT, projections_->get_data_ptr());
+		delete[] dims2;
+
+		geometry_->save(file_id);
+
+		H5Fclose (file_id);
+	}
+
+protected:
+
+	boost::shared_ptr<CBCT_geometry> geometry_;
+	boost::shared_ptr< hoNDArray<float> > projections_;
+};
+}
diff --git a/toolboxes/ct/xray/gpu/CBCT_binning.h b/toolboxes/ct/xray/gpu/CBCT_binning.h
new file mode 100644
index 0000000..5812f64
--- /dev/null
+++ b/toolboxes/ct/xray/gpu/CBCT_binning.h
@@ -0,0 +1,166 @@
+/**
+ * Temporal binning for CBCT
+ **/
+
+#pragma once
+
+#include "CBCT_acquisition.h"
+
+#include <hdf5.h>
+#include <hdf5_hl.h>
+#include <vector>
+#include <set>
+#include <stdexcept>
+#include <boost/iterator/counting_iterator.hpp>
+
+namespace Gadgetron {
+
+  class CBCT_binning
+  {
+
+  public:
+    
+    CBCT_binning() {}
+    CBCT_binning( std::vector< std::vector<unsigned int> > binning ) : binning_(binning) {}
+
+    ~CBCT_binning() {}
+
+    inline unsigned int get_number_of_bins()
+    {
+      return binning_.size();
+    }
+
+    inline unsigned int get_number_of_projections()
+    {
+      unsigned int acc = 0;
+      for( unsigned int i=0; i<binning_.size(); i++ )
+        acc += binning_[i].size();
+      return acc;
+    }
+
+    inline unsigned int get_number_of_projections( unsigned int bin )
+    {
+      if( bin >= binning_.size() )
+        throw std::runtime_error("CBCT_binning::get_number_of_projections(int) : bin is out of range");
+      else
+        return binning_[bin].size();
+    }
+
+    inline int get_maximum_projection_index()
+    {
+      int max_proj = -1;
+      for( unsigned int i=0; i<binning_.size(); i++ )
+        for( unsigned int j=0; j<binning_[i].size(); j++ )
+          if( int(binning_[i][j]) > max_proj ) 
+            max_proj = binning_[i][j];
+      return max_proj;
+    }
+    
+    inline void set_bins( std::vector< std::vector<unsigned int> > &bins ) {
+      binning_ = bins;
+    }
+
+    inline std::vector< std::vector<unsigned int> > get_bins() {
+      return binning_;
+    }
+
+    inline void set_bin( std::vector<unsigned int> &bin, unsigned int bin_number )
+    {
+      if( bin_number > binning_.size() )
+        throw std::runtime_error("CBCT_binning::set_bin() : bin is out of range");
+      else if( bin_number == binning_.size() )
+        binning_.push_back(bin);
+      else
+        binning_[bin_number] = bin;
+    }
+
+    inline std::vector<unsigned int> get_bin( unsigned int bin )
+    {
+      if( bin >= binning_.size() )
+        throw std::runtime_error("CBCT_binning::get_bin() : bin is out of range");
+      else
+        return binning_[bin];
+    }
+
+    inline void set_as_default_3d_bin( unsigned int num_projections )
+    {
+      binning_.push_back( std::vector<unsigned int>( boost::counting_iterator<unsigned int>(0),
+                                                     boost::counting_iterator<unsigned int>(num_projections) ));
+    }
+
+    CBCT_binning get_3d_binning(){
+    	std::vector<std::vector<unsigned int> > bin_3d;
+    	std::set<unsigned int> uniques;
+    	for (int i = 0; i < binning_.size(); i++){
+    		uniques.insert(binning_[i].begin(),binning_[i].end());
+    	}
+
+    	bin_3d.push_back(std::vector<unsigned int>(uniques.begin(),uniques.end()));
+    	return CBCT_binning(bin_3d);
+    }
+
+    void load( std::string filename )
+    {
+      // Open file and make sure it is the expected version
+      //
+
+      hid_t file_id = H5Fopen (filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
+
+      unsigned int dataformat_version;
+      herr_t errCode;
+      errCode=H5LTread_dataset (file_id, "/binning_dataformat_version", H5T_NATIVE_UINT, &dataformat_version);
+
+      if(errCode < 0)
+        throw std::runtime_error("Error reading /binning_dataformat_version");
+
+      unsigned int needed = 1;
+      if (!(dataformat_version == needed)) {
+        std::cerr << "wrong format of binning file, found: "
+                  << dataformat_version << ", needed: " << needed << std::endl;
+        exit(EXIT_FAILURE);
+      }
+
+      // And get the bins
+      //
+
+      binning_.clear();
+
+      unsigned int numBins;
+      errCode=H5LTread_dataset (file_id, "/numBins", H5T_NATIVE_UINT, &numBins);
+      if(errCode < 0)
+        throw std::runtime_error("Error reading /numBins_dataformat_version");
+      //std::cout << "Found " << numBins << " bins in file" << filename << std::endl;
+
+      // Ok, so this really isn't very elegant.
+      // A folder in the hdf5 file containing the data would be better...
+      //
+
+      for (unsigned int i=1; i<=numBins; i++) {
+        std::stringstream path;
+        path << "/bin_" << i;
+        hsize_t dim;
+        errCode=H5LTget_dataset_info(file_id,path.str().c_str(),&dim,NULL,NULL);
+        if(errCode < 0)
+          throw std::runtime_error("Error reading bin info");
+        binning_.push_back(std::vector<unsigned int>(dim,0.0f));
+        errCode=H5LTread_dataset (file_id, path.str().c_str(), H5T_NATIVE_UINT, &binning_.back()[0]);
+        if(errCode < 0)
+          throw std::runtime_error("Error reading bin data");
+      }
+    }
+
+    void print( std::ostream &os = std::cout )
+    {
+      os << "---------- BINNING DATA ----------" << std::endl;
+      os << "Number of bins: " << binning_.size() << std::endl;
+      for (unsigned int b=0; b<binning_.size(); b++)
+        os << "Number of projections in bin[" << b
+           << "]: " << binning_[b].size() << std::endl;
+      os << "----------------------------------" << std::endl;
+    }
+
+  protected:
+    std::vector< std::vector<unsigned int> > binning_;
+  };
+}
+
diff --git a/toolboxes/ct/xray/gpu/CMakeLists.txt b/toolboxes/ct/xray/gpu/CMakeLists.txt
new file mode 100644
index 0000000..575c4f4
--- /dev/null
+++ b/toolboxes/ct/xray/gpu/CMakeLists.txt
@@ -0,0 +1,49 @@
+find_package(HDF5 REQUIRED HL)
+
+if (WIN32)
+  ADD_DEFINITIONS(-D__BUILD_GADGETRON_GPUXRAY__)
+endif (WIN32)
+
+
+include_directories(
+  ${CMAKE_SOURCE_DIR}/toolboxes/core
+  ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu
+  ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/image
+  ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/math
+  ${CMAKE_SOURCE_DIR}/toolboxes/core/gpu
+  ${CMAKE_SOURCE_DIR}/toolboxes/nfft/gpu
+  ${CMAKE_SOURCE_DIR}/toolboxes/registration/optical_flow/cpu
+  ${CMAKE_SOURCE_DIR}/toolboxes/solvers/cpu
+  ${CMAKE_SOURCE_DIR}/toolboxes/solvers
+  ${CMAKE_SOURCE_DIR}/toolboxes/solvers/gpu
+  ${CMAKE_SOURCE_DIR}/toolboxes/operators
+  ${CUDA_INCLUDE_DIRS}
+  ${Boost_INCLUDE_DIR}
+  ${HDF5_INCLUDE_DIR}
+  ${HDF5_INCLUDE_DIR}/cpp
+  ${ISMRMRD_INCLUDE_DIR}
+  ${ARMADILLO_INCLUDE_DIRS}
+)
+
+cuda_add_library(gadgetron_toolbox_gpuxray SHARED
+  conebeam_projection.cu 
+  hoCuConebeamProjectionOperator.cpp 
+  )
+
+set_target_properties(gadgetron_toolbox_gpuxray PROPERTIES VERSION ${GADGETRON_VERSION_STRING} SOVERSION ${GADGETRON_SOVERSION})
+
+target_link_libraries(
+  gadgetron_toolbox_gpuxray gadgetron_toolbox_gpucore gadgetron_toolbox_gpunfft
+  gadgetron_toolbox_cpucore_math
+  ${CUDA_LIBRARIES}
+)
+
+install(TARGETS gadgetron_toolbox_gpuxray DESTINATION lib COMPONENT main)
+
+install(FILES 
+  CBCT_acquisition.h
+  CBCT_binning.h
+  conebeam_projection.h
+  hoCuConebeamProjectionOperator.h
+  gpuxray_export.h 
+  DESTINATION include COMPONENT main)
diff --git a/toolboxes/ct/xray/gpu/conebeam_projection.cu b/toolboxes/ct/xray/gpu/conebeam_projection.cu
new file mode 100644
index 0000000..0774dbd
--- /dev/null
+++ b/toolboxes/ct/xray/gpu/conebeam_projection.cu
@@ -0,0 +1,1151 @@
+//
+// This code performs 3D cone beam CT forwards and backwards projection
+//
+
+#include "conebeam_projection.h"
+#include "float3x3.h"
+#include "hoCuNDArray_math.h"
+#include "vector_td.h"
+#include "cuNDArray_elemwise.h"
+#include "cuNDArray_operators.h"
+#include "cuNDArray_utils.h"
+#include "cuNFFT.h"
+#include "check_CUDA.h"
+#include "GPUTimer.h"
+#include "cudaDeviceManager.h"
+#include "hoNDArray_fileio.h"
+#include "setup_grid.h"
+
+#include <cuda_runtime_api.h>
+#include <math_constants.h>
+#include <cufft.h>
+#include <iostream>
+#include <cmath>
+#include <algorithm>
+#include <vector>
+
+#define PS_ORIGIN_CENTERING
+#define IS_ORIGIN_CENTERING
+//#define FLIP_Z_AXIS
+
+// Read the projection/image data respectively as a texture (for input)
+// - taking advantage of the cache and hardware interpolation
+//
+
+#define NORMALIZED_TC 1
+
+static texture<float, 3, cudaReadModeElementType> 
+image_tex( NORMALIZED_TC, cudaFilterModeLinear, cudaAddressModeBorder );
+
+static texture<float, cudaTextureType2DLayered, cudaReadModeElementType> 
+projections_tex( NORMALIZED_TC, cudaFilterModeLinear, cudaAddressModeBorder );
+
+namespace Gadgetron 
+{
+
+// Utility to convert from degrees to radians
+//
+
+static inline __host__ __device__
+float degrees2radians(float degree) {
+	return degree * (CUDART_PI_F/180.0f);
+}
+
+// Utilities for filtering in frequency space
+//
+
+static boost::shared_ptr< cuNDArray<float_complext> > cb_fft( cuNDArray<float> *data )
+  		{
+	if( data == 0x0 )
+		throw std::runtime_error("CB FFT : illegal input pointer provided");
+
+	std::vector<size_t> in_dims = *data->get_dimensions();
+	std::vector<size_t> out_dims;
+	out_dims.push_back((in_dims[0]>>1)+1);
+	out_dims.push_back(in_dims[1]);
+	out_dims.push_back(in_dims[2]);
+
+	boost::shared_ptr< cuNDArray<float_complext> > result( new cuNDArray<float_complext>(&out_dims) );
+	cufftHandle plan;
+
+	if( cufftPlanMany( &plan, 1, (int*)(&in_dims[0]), 0x0, 1, in_dims[0], 0x0, 1, out_dims[0], CUFFT_R2C, in_dims[1]*in_dims[2] ) != CUFFT_SUCCESS) {
+		throw std::runtime_error("CB FFT plan failed");
+	}
+
+	if( cufftExecR2C( plan, data->get_data_ptr(), (cuFloatComplex*) result->get_data_ptr() ) != CUFFT_SUCCESS ) {
+		throw std::runtime_error("CB FFT execute failed");;
+	}
+
+	if( cufftDestroy(plan) != CUFFT_SUCCESS) {
+		throw std::runtime_error("CB FFT failed to destroy plan");
+	}
+
+	return result;
+  		}
+
+static void cb_ifft( cuNDArray<float_complext> *in_data, cuNDArray<float> *out_data )
+{
+	if( in_data == 0x0 || out_data == 0x0 )
+		throw std::runtime_error("CB FFT : illegal input or output pointer provided");
+
+	std::vector<size_t> in_dims = *in_data->get_dimensions();
+	std::vector<size_t> out_dims = *out_data->get_dimensions();
+
+	cufftHandle plan;
+
+	if( cufftPlanMany( &plan, 1, (int*)(&out_dims[0]), 0x0, 1, in_dims[0], 0x0, 1, out_dims[0], CUFFT_C2R, in_dims[1]*in_dims[2] ) != CUFFT_SUCCESS) {
+		throw std::runtime_error("CB iFFT plan failed");
+	}
+
+	if( cufftExecC2R( plan, (cuFloatComplex*) in_data->get_data_ptr(), out_data->get_data_ptr() ) != CUFFT_SUCCESS ) {
+		throw std::runtime_error("CB iFFT execute failed");;
+	}
+
+	if( cufftDestroy(plan) != CUFFT_SUCCESS) {
+		throw std::runtime_error("CB iFFT failed to destroy plan");
+	}
+
+	*out_data /= float(out_dims[0]);
+}
+
+//
+// Redundancy correction for short scan mode
+// - i.e. for less than a full rotation of data
+//
+// See "Optimal short scan convolution reconstruction for fanbeam CT", Dennis Parker, Med. Phys. 9(2) 1982
+// and (for the implementation) "Parker weights revisited", Wesarg et al, Med. Phys. 29(3) 2002.
+//
+
+static __device__ const float epsilon = 0.001f;
+
+static __inline__ __device__ float S( float beta )
+{
+	if( beta <= -0.5f ) return 0.0f;
+	else if( beta > -0.5f && beta < 0.5f ) return 0.5f*(1.0f+sinf(CUDART_PI_F*beta));
+	else /*if( beta >= 0.5f )*/ return 1.0f;
+}
+
+static __inline__ __device__ float B( float alpha, float delta )
+{
+	return 2.0f*(delta-alpha)+epsilon;
+}
+
+static __inline__ __device__ float b( float alpha, float delta )
+{
+	const float q = 0.1f; // with q=1 this formulae reduce to conventional Parker weights
+	return q*B(alpha, delta);
+}
+
+__global__ void
+redundancy_correct_kernel( float *projections,
+		const float * __restrict__ angles,
+		uintd3 dims, // Dimensions of the projections array
+		float delta  // The half-fan angle
+)
+{
+	const unsigned int idx = blockIdx.y*gridDim.x*blockDim.x + blockIdx.x*blockDim.x+threadIdx.x;
+	const unsigned int num_elements = prod(dims);
+
+	if( idx < num_elements ){
+
+		const float in = projections[idx];
+		const uintd3 co = idx_to_co<3>( idx, dims );
+		const float tan_delta = tanf(delta);
+		const float alpha = -atanf((float(co[0])/float(dims[0])-0.5f)*2.0f*tan_delta);
+		const float beta = degrees2radians(angles[co[2]]);
+
+		float omega = 0.5f*(S(beta/b(alpha, delta)-0.5f)+
+				S((beta+2.0f*(alpha-delta)-epsilon)/b(alpha, delta)+0.5f)-
+				S((beta-CUDART_PI_F+2.0f*alpha)/b(-alpha, delta)-0.5f)-
+				S((beta-CUDART_PI_F-2.0f*delta-epsilon)/b(-alpha, delta)+0.5f));
+
+		projections[idx] = in*omega;
+	}
+}
+
+void
+redundancy_correct( cuNDArray<float> *projections,
+		float *angles_DevPtr,
+		float delta // The half-fan angle in radians
+)
+{
+	//
+	// Validate the input
+	//
+
+	if( projections == 0x0 ){
+		throw std::runtime_error("Error: redundancy_correct: illegal array pointer provided");
+	}
+
+	if( projections->get_number_of_dimensions() != 3 ){
+		throw std::runtime_error("Error: redundancy_correct: projections array must be three-dimensional");
+	}
+
+	const size_t projection_res_x = projections->get_size(0);
+	const size_t projection_res_y = projections->get_size(1);
+	const size_t num_projections = projections->get_size(2);
+	uintd3 dims(projection_res_x, projection_res_y, num_projections);
+
+	// Launch kernel
+	//
+
+	dim3 dimBlock, dimGrid;
+	setup_grid( prod(dims), &dimBlock, &dimGrid );
+
+	redundancy_correct_kernel<<< dimGrid, dimBlock >>>( projections->get_data_ptr(), angles_DevPtr, dims, delta );
+	CHECK_FOR_CUDA_ERROR();
+}
+
+
+/***
+ * Redundancy (or offset) correction from Wang. Med. Phys 2002, doi: 10.1118/1.1489043
+ */
+__global__ static void
+offset_correct_kernel( float *projections,
+		const floatd2 * __restrict__ offsets,
+		uintd3 dims, // Dimensions of the projections array
+		floatd2 phys_dims, // Physical dimensions in mm
+		float SAD, // Source origin distance
+		float SDD // Source detector distance
+)
+{
+	const unsigned int idx = blockIdx.y*gridDim.x*blockDim.x + blockIdx.x*blockDim.x+threadIdx.x;
+	const unsigned int num_elements = prod(dims);
+
+	if( idx < num_elements ){
+
+		const uintd3 co = idx_to_co<3>( idx, dims );
+		const floatd2 offset = offsets[co[2]];
+		const float t = phys_dims[0]*(float(co[0])/(float(dims[0]))-0.5f)+offset[0];
+		const float omega = phys_dims[0]/2.0f-fabs(offset[0]);
+		//const float omega = phys_dims[0]*float(co[0])/(2.0f*float(dims[0]));
+
+		if( fabs(t) <= fabs(omega) ){
+			//float w = 0.5*sinf(CUDART_PI_F*atanf(t/SDD)/(2.0f*atanf(omega/SDD)))+0.5;
+			float sqrt_w = sinf(CUDART_PI_F*(t+omega)/(4.0f*omega));
+			float w = sqrt_w*sqrt_w;
+			projections[idx] *= w;
+		}
+	}
+}
+
+static void
+offset_correct( cuNDArray<float> *projections,
+		floatd2* offsets, // Ptr to cuda array
+		floatd2 phys_dims,
+		float SAD, // Source origin distance
+		float SDD // Source detector distance
+)
+{
+	//
+	// Validate the input
+	//
+
+	if( projections == 0x0 ){
+		throw std::runtime_error("Error: offset_correct: illegal array pointer provided");
+	}
+
+	if( projections->get_number_of_dimensions() != 3 ){
+		throw std::runtime_error("Error: offset_correct: projections array must be three-dimensional");
+	}
+
+	const size_t projection_res_x = projections->get_size(0);
+	const size_t projection_res_y = projections->get_size(1);
+	const size_t num_projections = projections->get_size(2);
+	uintd3 dims(projection_res_x, projection_res_y, num_projections);
+
+	// Launch kernel
+	//
+
+	dim3 dimBlock, dimGrid;
+	setup_grid( prod(dims), &dimBlock, &dimGrid );
+
+	offset_correct_kernel<<< dimGrid, dimBlock >>>( projections->get_data_ptr(), offsets, dims, phys_dims, SAD, SDD );
+	CHECK_FOR_CUDA_ERROR();
+}
+
+
+/***
+ * Redundancy (or offset) correction from Wang. Med. Phys 2002, doi: 10.1118/1.1489043
+ */
+__global__ static void
+offset_correct_kernel_sqrt( float *projections,
+		const floatd2 * __restrict__ offsets,
+		uintd3 dims, // Dimensions of the projections array
+		floatd2 phys_dims, // Physical dimensions in mm
+		float SAD, // Source origin distance
+		float SDD // Source detector distance
+)
+{
+	const unsigned int idx = blockIdx.y*gridDim.x*blockDim.x + blockIdx.x*blockDim.x+threadIdx.x;
+	const unsigned int num_elements = prod(dims);
+
+	if( idx < num_elements ){
+
+		const uintd3 co = idx_to_co<3>( idx, dims );
+		const floatd2 offset = offsets[co[2]];
+		const float t = phys_dims[0]*(float(co[0])/(float(dims[0]))-0.5f)+offset[0];
+		const float omega = phys_dims[0]/2.0f-fabs(offset[0]);
+		//const float omega = phys_dims[0]*float(co[0])/(2.0f*float(dims[0]));
+
+		if( fabs(t) <= fabs(omega) ){
+			//float w = 0.5*sinf(CUDART_PI_F*atanf(t/SDD)/(2.0f*atanf(omega/SDD)))+0.5;
+			float sqrt_w = sinf(CUDART_PI_F*(t+omega)/(4.0f*omega));
+			projections[idx] *= sqrt_w;
+		}
+	}
+}
+
+static void
+offset_correct_sqrt( cuNDArray<float> *projections,
+		floatd2* offsets, // Ptr to cuda array
+		floatd2 phys_dims,
+		float SAD, // Source origin distance
+		float SDD // Source detector distance
+)
+{
+	//
+	// Validate the input
+	//
+
+	if( projections == 0x0 ){
+		throw std::runtime_error("Error: offset_correct: illegal array pointer provided");
+	}
+
+	if( projections->get_number_of_dimensions() != 3 ){
+		throw std::runtime_error("Error: offset_correct: projections array must be three-dimensional");
+	}
+
+	const size_t projection_res_x = projections->get_size(0);
+	const size_t projection_res_y = projections->get_size(1);
+	const size_t num_projections = projections->get_size(2);
+	uintd3 dims(projection_res_x, projection_res_y, num_projections);
+
+	// Launch kernel
+	//
+
+	dim3 dimBlock, dimGrid;
+	setup_grid( prod(dims), &dimBlock, &dimGrid );
+
+	offset_correct_kernel_sqrt<<< dimGrid, dimBlock >>>( projections->get_data_ptr(), offsets, dims, phys_dims, SAD, SDD );
+	CHECK_FOR_CUDA_ERROR();
+}
+
+
+void apply_offset_correct(hoCuNDArray<float>* projections,std::vector<floatd2>& offsets,		floatd2 ps_dims_in_mm, float SDD,	float SAD){
+
+	std::vector<size_t> dims = *projections->get_dimensions();
+	size_t projection_size = dims[0]*dims[1];
+
+
+	thrust::device_vector<floatd2> offsets_devVec(offsets);
+	//Calculate number of projections we can fit on device, rounded to nearest MB
+	size_t batch_size = (1024)*(cudaDeviceManager::Instance()->getFreeMemory()/(1024*projection_size*sizeof(float)));
+	size_t remaining = dims[2];
+
+	for (unsigned int i = 0; i < dims[2]/(batch_size+1)+1; i++){
+		std::vector<size_t> projection_dims = dims;
+		projection_dims[2] = std::min(remaining,batch_size);
+		//Make a view of the batch of projections
+		hoCuNDArray<float> projections_view(projection_dims,projections->get_data_ptr()+batch_size*i);
+		cuNDArray<float> cu_projections(projections_view); //Copy to device
+		floatd2* cu_offsets = thrust::raw_pointer_cast(&offsets_devVec[i*batch_size]);
+		offset_correct_sqrt(&cu_projections,cu_offsets,ps_dims_in_mm,SAD,SDD);
+
+		cudaMemcpy(projections_view.get_data_ptr(),cu_projections.get_data_ptr(),cu_projections.get_number_of_bytes(),cudaMemcpyDeviceToHost);
+		remaining -= batch_size;
+	}
+}
+
+//
+// Forwards projection
+//
+
+__global__ void
+conebeam_forwards_projection_kernel( float * __restrict__ projections,
+		float * __restrict__ angles,
+		floatd2 *offsets,
+		floatd3 is_dims_in_pixels,
+		floatd3 is_dims_in_mm,
+		intd2 ps_dims_in_pixels_int,
+		floatd2 ps_dims_in_mm,
+		int num_projections,
+		float SDD,
+		float SAD,
+		int num_samples_per_ray )
+{
+	const int idx = blockIdx.y*gridDim.x*blockDim.x + blockIdx.x*blockDim.x+threadIdx.x;
+	const int num_elements = prod(ps_dims_in_pixels_int)*num_projections;
+
+	if( idx < num_elements){
+
+		const intd3 co = idx_to_co<3>( idx, intd3(ps_dims_in_pixels_int[0], ps_dims_in_pixels_int[1], num_projections) );
+
+		// Projection space dimensions and spacing
+		//
+
+		const floatd2 ps_dims_in_pixels = floatd2(ps_dims_in_pixels_int[0], ps_dims_in_pixels_int[1]);
+		const floatd2 ps_spacing = ps_dims_in_mm / ps_dims_in_pixels;
+
+		// Determine projection angle and rotation matrix
+		//
+
+		const float angle = angles[co[2]];
+		const float3x3 rotation = calcRotationMatrixAroundZ(degrees2radians(angle));
+
+		// Find start and end point for the line integral (image space)
+		//
+
+		floatd3 startPoint = floatd3(0.0f, -SAD, 0.0f);
+		startPoint = mul(rotation, startPoint);
+
+		// Projection plate indices
+		//
+
+#ifdef PS_ORIGIN_CENTERING
+		const floatd2 ps_pc = floatd2(co[0], co[1]) + floatd2(0.5);
+#else
+		const floatd2 ps_pc = floatd2(co[0], co[1]);
+#endif
+
+		// Convert the projection plate coordinates into image space,
+		// - local to the plate in metric units
+		// - including half-fan and sag correction
+		//
+
+		const floatd2 proj_coords = (ps_pc / ps_dims_in_pixels - 0.5f) * ps_dims_in_mm + offsets[co[2]];
+
+		// Define the end point for the line integrals
+		//
+
+		const float ADD = SDD - SAD; // in mm.
+		floatd3 endPoint = floatd3(proj_coords[0], ADD, proj_coords[1]);
+		endPoint = mul(rotation, endPoint);
+
+		// Find direction vector of the line integral
+		//
+
+		floatd3 dir = endPoint-startPoint;
+
+		// Perform integration only inside the bounding cylinder of the image volume
+		//
+
+		const floatd3 vec_over_dir = (is_dims_in_mm-startPoint)/dir;
+		const floatd3 vecdiff_over_dir = (-is_dims_in_mm-startPoint)/dir;
+		const floatd3 start = amin(vecdiff_over_dir, vec_over_dir);
+		const floatd3 end   = amax(vecdiff_over_dir, vec_over_dir);
+
+		float a1 = fmax(max(start),0.0f);
+		float aend = fmin(min(end),1.0f);
+		startPoint += a1*dir;
+
+		const float sampling_distance = norm((aend-a1)*dir)/num_samples_per_ray;
+
+		// Now perform conversion of the line integral start/end into voxel coordinates
+		//
+
+		startPoint /= is_dims_in_mm;
+#ifdef FLIP_Z_AXIS
+		startPoint[2] *= -1.0f;
+#endif
+		startPoint += 0.5f;
+		dir /= is_dims_in_mm;
+#ifdef FLIP_Z_AXIS
+		dir[2] *= -1.0f;
+#endif
+		dir /= float(num_samples_per_ray); // now in step size units
+
+		//
+		// Perform line integration
+		//
+
+		float result = 0.0f;
+
+		for ( int sampleIndex = 0; sampleIndex<num_samples_per_ray; sampleIndex++) {
+
+#ifndef IS_ORIGIN_CENTERING
+			floatd3 samplePoint = startPoint+dir*float(sampleIndex) + floatd3(0.5f)/is_dims_in_pixels;
+#else
+			floatd3 samplePoint = startPoint+dir*float(sampleIndex);
+#endif
+
+			// Accumulate result
+			//
+
+			result += tex3D( image_tex, samplePoint[0], samplePoint[1], samplePoint[2] );
+		}
+
+		// Output (normalized to the length of the ray)
+		//
+
+		projections[idx] = result*sampling_distance;
+	}
+}
+
+//
+// Forwards projection of a 3D volume onto a set of (binned) projections
+//
+
+void
+conebeam_forwards_projection( hoCuNDArray<float> *projections,
+		hoCuNDArray<float> *image,
+		std::vector<float> angles,
+		std::vector<floatd2> offsets,
+		std::vector<unsigned int> indices,
+		int projections_per_batch,
+		float samples_per_pixel,
+		floatd3 is_dims_in_mm,
+		floatd2 ps_dims_in_mm,
+		float SDD,
+		float SAD)
+{
+	//
+	// Validate the input
+	//
+
+	if( projections == 0x0 || image == 0x0 ){
+		throw std::runtime_error("Error: conebeam_forwards_projection: illegal array pointer provided");
+	}
+
+	if( projections->get_number_of_dimensions() != 3 ){
+		throw std::runtime_error("Error: conebeam_forwards_projection: projections array must be three-dimensional");
+	}
+
+	if( image->get_number_of_dimensions() != 3 ){
+		throw std::runtime_error("Error: conebeam_forwards_projection: image array must be three-dimensional");
+	}
+
+	if( projections->get_size(2) != angles.size() || projections->get_size(2) != offsets.size() ) {
+		throw std::runtime_error("Error: conebeam_forwards_projection: inconsistent sizes of input arrays/vectors");
+	}
+
+	int projection_res_x = projections->get_size(0);
+	int projection_res_y = projections->get_size(1);
+
+	int num_projections_in_bin = indices.size();
+	int num_projections_in_all_bins = projections->get_size(2);
+
+	int matrix_size_x = image->get_size(0);
+	int matrix_size_y = image->get_size(1);
+	int matrix_size_z = image->get_size(2);
+
+	hoCuNDArray<float> *int_projections = projections;
+
+	if( projections_per_batch > num_projections_in_bin )
+		projections_per_batch = num_projections_in_bin;
+
+	int num_batches = (num_projections_in_bin+projections_per_batch-1) / projections_per_batch;
+
+	// Build texture from input image
+	//
+
+	cudaFuncSetCacheConfig(conebeam_forwards_projection_kernel, cudaFuncCachePreferL1);
+	cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
+	cudaExtent extent;
+	extent.width = matrix_size_x;
+	extent.height = matrix_size_y;
+	extent.depth = matrix_size_z;
+
+	cudaMemcpy3DParms cpy_params = {0};
+	cpy_params.kind = cudaMemcpyHostToDevice;
+	cpy_params.extent = extent;
+
+	cudaArray *image_array;
+	cudaMalloc3DArray(&image_array, &channelDesc, extent);
+	CHECK_FOR_CUDA_ERROR();
+
+	cpy_params.dstArray = image_array;
+	cpy_params.srcPtr = make_cudaPitchedPtr
+			((void*)image->get_data_ptr(), extent.width*sizeof(float), extent.width, extent.height);
+	cudaMemcpy3D(&cpy_params);
+	CHECK_FOR_CUDA_ERROR();
+
+	cudaBindTextureToArray(image_tex, image_array, channelDesc);
+	CHECK_FOR_CUDA_ERROR();
+
+	// Allocate the angles, offsets and projections in device memory
+	//
+
+	float *projections_DevPtr, *projections_DevPtr2;
+	cudaMalloc( (void**) &projections_DevPtr, projection_res_x*projection_res_y*projections_per_batch*sizeof(float));
+	cudaMalloc( (void**) &projections_DevPtr2, projection_res_x*projection_res_y*projections_per_batch*sizeof(float));
+
+	cudaStream_t mainStream, indyStream;
+	cudaStreamCreate(&mainStream);
+	cudaStreamCreate(&indyStream);
+
+	std::vector<float> angles_vec;
+	std::vector<floatd2> offsets_vec;
+
+	for( int p=0; p<indices.size(); p++ ){
+
+		int from_id = indices[p];
+
+		if( from_id >= num_projections_in_all_bins ) {
+			throw std::runtime_error("Error: conebeam_forwards_projection: illegal index in bin");
+		}
+
+		angles_vec.push_back(angles[from_id]);
+		offsets_vec.push_back(offsets[from_id]);
+	}
+
+	thrust::device_vector<float> angles_devVec(angles_vec);
+	thrust::device_vector<floatd2> offsets_devVec(offsets_vec);
+
+	//
+	// Iterate over the batches
+	//
+
+	for (unsigned int batch=0; batch<num_batches; batch++ ){
+
+		int from_projection = batch * projections_per_batch;
+		int to_projection = (batch+1) * projections_per_batch;
+
+		if (to_projection > num_projections_in_bin)
+			to_projection = num_projections_in_bin;
+
+		int projections_in_batch = to_projection-from_projection;
+
+		// Block/grid configuration
+		//
+
+		dim3 dimBlock, dimGrid;
+		setup_grid( projection_res_x*projection_res_y*projections_in_batch, &dimBlock, &dimGrid );
+
+		// Launch kernel
+		//
+
+		floatd3 is_dims_in_pixels(matrix_size_x, matrix_size_y, matrix_size_z);
+		intd2 ps_dims_in_pixels(projection_res_x, projection_res_y);
+
+		float* raw_angles = thrust::raw_pointer_cast(&angles_devVec[from_projection]);
+		floatd2* raw_offsets = thrust::raw_pointer_cast(&offsets_devVec[from_projection]);
+
+		conebeam_forwards_projection_kernel<<< dimGrid, dimBlock, 0, mainStream >>>
+				( projections_DevPtr, raw_angles, raw_offsets,
+						is_dims_in_pixels, is_dims_in_mm, ps_dims_in_pixels, ps_dims_in_mm,
+						projections_in_batch, SDD, SAD, samples_per_pixel*float(matrix_size_x) );
+
+		// If not initial batch, start copying the old stuff
+		//
+
+		int p = from_projection;
+		while( p<to_projection) {
+
+			int num_sequential_projections = 1;
+			while( p+num_sequential_projections < to_projection &&
+					indices[p+num_sequential_projections]==(indices[p+num_sequential_projections-1]+1) ){
+				num_sequential_projections++;
+			}
+
+			int to_id = indices[p];
+			int size = projection_res_x*projection_res_y;
+
+			cudaMemcpyAsync( int_projections->get_data_ptr()+to_id*size,
+					projections_DevPtr+(p-from_projection)*size,
+					size*num_sequential_projections*sizeof(float),
+					cudaMemcpyDeviceToHost, mainStream);
+
+			p += num_sequential_projections;
+		}
+
+		std::swap(projections_DevPtr, projections_DevPtr2);
+		std::swap(mainStream, indyStream);
+	}
+
+	cudaFree(projections_DevPtr2);
+	cudaFree(projections_DevPtr);
+	cudaFreeArray(image_array);
+
+	CUDA_CALL(cudaStreamDestroy(indyStream));
+	CUDA_CALL(cudaStreamDestroy(mainStream));
+	CHECK_FOR_CUDA_ERROR();
+
+}
+
+template <bool FBP> __global__ void
+conebeam_backwards_projection_kernel( float * __restrict__ image,
+		const float * __restrict__ angles,
+		floatd2 *offsets,
+		intd3 is_dims_in_pixels_int,
+		floatd3 is_dims_in_mm,
+		floatd2 ps_dims_in_pixels,
+		floatd2 ps_dims_in_mm,
+		int num_projections_in_batch,
+		float num_projections_in_bin,
+		float SDD,
+		float SAD,
+		bool accumulate )
+{
+	// Image voxel to backproject into (pixel coordinate and index)
+	//
+
+	const int idx = blockIdx.y*gridDim.x*blockDim.x + blockIdx.x*blockDim.x+threadIdx.x;
+	const int num_elements = prod(is_dims_in_pixels_int);
+
+	if( idx < num_elements ){
+
+		const intd3 co = idx_to_co<3>(idx, is_dims_in_pixels_int);
+
+#ifdef IS_ORIGIN_CENTERING
+		const floatd3 is_pc = floatd3(co[0], co[1], co[2]) + floatd3(0.5);
+#else
+		const floatd3 is_pc = floatd3(co[0], co[1], co[2]);
+#endif
+
+		// Normalized image space coordinate [-0.5, 0.5[
+		//
+
+		const floatd3 is_dims_in_pixels(is_dims_in_pixels_int[0],is_dims_in_pixels_int[1],is_dims_in_pixels_int[2]);
+
+#ifdef FLIP_Z_AXIS
+		floatd3 is_nc = is_pc / is_dims_in_pixels - floatd3(0.5f);
+		is_nc[2] *= -1.0f;
+#else
+		const floatd3 is_nc = is_pc / is_dims_in_pixels - floatd3(0.5f);
+#endif
+
+		// Image space coordinate in metric units
+		//
+
+		const floatd3 pos = is_nc * is_dims_in_mm;
+
+		// Read the existing output value for accumulation at this point.
+		// The cost of this fetch is hidden by the loop
+
+		const float incoming = (accumulate) ? image[idx] : 0.0f;
+
+		// Backprojection loop
+		//
+
+		float result = 0.0f;
+
+		for( int projection = 0; projection < num_projections_in_batch; projection++ ) {
+
+			// Projection angle
+			//
+
+			const float angle = degrees2radians(angles[projection]);
+
+			// Projection rotation matrix
+			//
+
+			const float3x3 inverseRotation = calcRotationMatrixAroundZ(-angle);
+
+			// Rotated image coordinate (local to the projection's coordinate system)
+			//
+
+			const floatd3 pos_proj = mul(inverseRotation, pos);
+
+			// Project the image position onto the projection plate.
+			// Account for half-fan and sag offsets.
+			//
+
+			const floatd3 startPoint = floatd3(0.0f, -SAD, 0.0f);
+			floatd3 dir = pos_proj - startPoint;
+			dir = dir / dir[1];
+			const floatd3 endPoint = startPoint + dir * SDD;
+			const floatd2 endPoint2d = floatd2(endPoint[0], endPoint[2]) - offsets[projection];
+
+			// Convert metric projection coordinates into pixel coordinates
+			//
+
+#ifndef PS_ORIGIN_CENTERING
+			floatd2 ps_pc = ((endPoint2d / ps_dims_in_mm) + floatd2(0.5f)) + floatd2(0.5f)/ps_dims_in_pixels;
+			//floatd2 ps_pc = ((endPoint2d / ps_dims_in_mm) + floatd2(0.5f)) * ps_dims_in_pixels + floatd2(0.5f);
+#else
+			floatd2 ps_pc = ((endPoint2d / ps_dims_in_mm) + floatd2(0.5f));
+#endif
+
+			// Apply filter (filtered backprojection mode only)
+			//
+
+			float weight = 1.0;
+
+			if( FBP ){
+
+				// Equation 3.59, page 96 and equation 10.2, page 386
+				// in Computed Tomography 2nd edition, Jiang Hsieh
+				//
+
+				const float xx = pos[0];
+				const float yy = pos[1];
+				const float beta = angle;
+				const float r = hypotf(xx,yy);
+				const float phi = atan2f(yy,xx);
+				const float D = SAD;
+				const float ym = r*sinf(beta-phi);
+				const float U = (D+ym)/D;
+				weight = 1.0f/(U*U);
+			}
+
+			// Read the projection data (bilinear interpolation enabled) and accumulate
+			//
+
+			result +=  weight * tex2DLayered( projections_tex, ps_pc[0], ps_pc[1], projection );
+		}
+
+		// Output normalized image
+		//
+
+		image[idx] = incoming + result / num_projections_in_bin;
+	}
+}
+
+//
+// Backprojection
+//
+
+template <bool FBP>
+void conebeam_backwards_projection( hoCuNDArray<float> *projections,
+		hoCuNDArray<float> *image,
+		std::vector<float> angles,
+		std::vector<floatd2> offsets,
+		std::vector<unsigned int> indices,
+		int projections_per_batch,
+		intd3 is_dims_in_pixels,
+		floatd3 is_dims_in_mm,
+		floatd2 ps_dims_in_mm,
+		float SDD,
+		float SAD,
+		bool short_scan,
+		bool use_offset_correction,
+		bool accumulate,
+		cuNDArray<float> *cosine_weights,
+		cuNDArray<float> *frequency_filter
+)
+{
+	//
+	// Validate the input
+	//
+
+	if( projections == 0x0 || image == 0x0 ){
+		throw std::runtime_error("Error: conebeam_backwards_projection: illegal array pointer provided");
+	}
+
+	if( projections->get_number_of_dimensions() != 3 ){
+		throw std::runtime_error("Error: conebeam_backwards_projection: projections array must be three-dimensional");
+	}
+
+	if( image->get_number_of_dimensions() != 3 ){
+		throw std::runtime_error("Error: conebeam_backwards_projection: image array must be three-dimensional");
+	}
+
+	if( projections->get_size(2) != angles.size() || projections->get_size(2) != offsets.size() ) {
+		throw std::runtime_error("Error: conebeam_backwards_projection: inconsistent sizes of input arrays/vectors");
+	}
+
+	if( FBP && !(cosine_weights && frequency_filter) ){
+		throw std::runtime_error("Error: conebeam_backwards_projection: for _filtered_ backprojection both cosine weights and a filter must be provided");
+	}
+
+	// Some utility variables
+	//
+
+	int matrix_size_x = image->get_size(0);
+	int matrix_size_y = image->get_size(1);
+	int matrix_size_z = image->get_size(2);
+
+	floatd3 is_dims(matrix_size_x, matrix_size_y, matrix_size_z);
+	int num_image_elements = matrix_size_x*matrix_size_y*matrix_size_z;
+
+	int projection_res_x = projections->get_size(0);
+	int projection_res_y = projections->get_size(1);
+
+	floatd2 ps_dims_in_pixels(projection_res_x, projection_res_y);
+
+	int num_projections_in_all_bins = projections->get_size(2);
+	int num_projections_in_bin = indices.size();
+
+	if( projections_per_batch > num_projections_in_bin )
+		projections_per_batch = num_projections_in_bin;
+
+	int num_batches = (num_projections_in_bin+projections_per_batch-1) / projections_per_batch;
+
+	// Allocate device memory for the backprojection result
+	//
+
+	boost::shared_ptr< cuNDArray<float> > image_device;
+
+	if( accumulate ){
+		image_device = boost::shared_ptr< cuNDArray<float> >(new cuNDArray<float>(image));
+	}
+	else{
+		image_device = boost::shared_ptr< cuNDArray<float> >(new cuNDArray<float>(image->get_dimensions().get()));
+	}
+
+	// Allocate the angles, offsets and projections in device memory
+	//
+
+	float *projections_DevPtr, *projections_DevPtr2;
+	cudaMalloc( (void**) &projections_DevPtr, projection_res_x*projection_res_y*projections_per_batch*sizeof(float));
+	cudaMalloc( (void**) &projections_DevPtr2, projection_res_x*projection_res_y*projections_per_batch*sizeof(float));
+
+	cudaStream_t mainStream, indyStream;
+	cudaStreamCreate(&mainStream);
+	cudaStreamCreate(&indyStream);
+
+	std::vector<float> angles_vec;
+	std::vector<floatd2> offsets_vec;
+
+	for( int p=0; p<indices.size(); p++ ){
+
+		int from_id = indices[p];
+
+		if( from_id >= num_projections_in_all_bins ) {
+			throw std::runtime_error("Error: conebeam_backwards_projection: illegal index in bin");
+		}
+
+		angles_vec.push_back(angles[from_id]);
+		offsets_vec.push_back(offsets[from_id]);
+	}
+
+	thrust::device_vector<float> angles_devVec(angles_vec);
+	thrust::device_vector<floatd2> offsets_devVec(offsets_vec);
+
+	// From/to for the first batch
+	// - to enable working streams...
+	//
+
+	int from_projection = 0;
+	int to_projection = projections_per_batch;
+
+	if (to_projection > num_projections_in_bin )
+		to_projection = num_projections_in_bin;
+
+	int projections_in_batch = to_projection-from_projection;
+
+	std::vector<size_t> dims;
+	dims.push_back(projection_res_x);
+	dims.push_back(projection_res_y);
+	dims.push_back(projections_in_batch);
+
+	std::vector<size_t> dims_next;
+
+	cuNDArray<float> *projections_batch = new cuNDArray<float>(&dims, projections_DevPtr);
+
+	// Upload first projections batch adhering to the binning.
+	// Be sure to copy sequentially numbered projections in one copy operation.
+	//
+
+	{
+		int p = from_projection;
+
+		while( p<to_projection ) {
+
+			int num_sequential_projections = 1;
+			while( p+num_sequential_projections < to_projection &&
+					indices[p+num_sequential_projections]==(indices[p+num_sequential_projections-1]+1) ){
+				num_sequential_projections++;
+			}
+
+			int from_id = indices[p];
+			int size = projection_res_x*projection_res_y;
+
+			cudaMemcpyAsync( projections_batch->get_data_ptr()+(p-from_projection)*size,
+					projections->get_data_ptr()+from_id*size,
+					size*num_sequential_projections*sizeof(float), cudaMemcpyHostToDevice, mainStream );
+
+			CHECK_FOR_CUDA_ERROR();
+
+			p += num_sequential_projections;
+		}
+	}
+
+	//
+	// Iterate over batches
+	//
+
+	for( int batch = 0; batch < num_batches; batch++ ) {
+
+		from_projection = batch * projections_per_batch;
+		to_projection = (batch+1) * projections_per_batch;
+
+		if (to_projection > num_projections_in_bin )
+			to_projection = num_projections_in_bin;
+
+		projections_in_batch = to_projection-from_projection;
+
+		float* raw_angles = thrust::raw_pointer_cast(&angles_devVec[from_projection]);
+		floatd2* raw_offsets = thrust::raw_pointer_cast(&offsets_devVec[from_projection]);
+
+
+		if( FBP ){
+
+			// Apply cosine weighting : "SDD / sqrt(SDD*SDD + u*u + v*v)"
+			// - with (u,v) positions given in metric units on a virtual detector at the origin
+			//
+
+			*projections_batch *= *cosine_weights;
+
+			// Redundancy correct
+			// - for short scan mode
+			//
+
+			if( short_scan ){
+				float delta = std::atan(ps_dims_in_mm[0]/(2.0f*SDD));
+				redundancy_correct( projections_batch, raw_angles, delta );
+			}
+
+			// Apply frequency filter
+			// - use zero padding to avoid the cyclic boundary conditions induced by the fft
+			//
+
+			std::vector<size_t> batch_dims = *projections_batch->get_dimensions();
+			uint64d3 pad_dims(batch_dims[0]<<1, batch_dims[1], batch_dims[2]);
+			boost::shared_ptr< cuNDArray<float> > padded_projections = pad<float,3>( pad_dims, projections_batch );
+			boost::shared_ptr< cuNDArray<complext<float> > > complex_projections = cb_fft( padded_projections.get() );
+			*complex_projections *= *frequency_filter;
+			cb_ifft( complex_projections.get(), padded_projections.get() );
+			uint64d3 crop_offsets(batch_dims[0]>>1, 0, 0);
+			crop<float,3>( crop_offsets, padded_projections.get(), projections_batch );
+
+			// Apply offset correction
+					// - for half fan mode, sag correction etc.
+					//
+			if (use_offset_correction)
+				offset_correct( projections_batch, raw_offsets, ps_dims_in_mm, SAD, SDD );
+
+
+		} else if (use_offset_correction)
+			offset_correct_sqrt( projections_batch, raw_offsets, ps_dims_in_mm, SAD, SDD );
+
+		// Build array for input texture
+		//
+
+		cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
+		cudaExtent extent;
+		extent.width = projection_res_x;
+		extent.height = projection_res_y;
+		extent.depth = projections_in_batch;
+
+		cudaArray *projections_array;
+		cudaMalloc3DArray( &projections_array, &channelDesc, extent, cudaArrayLayered );
+		CHECK_FOR_CUDA_ERROR();
+
+		cudaMemcpy3DParms cpy_params = {0};
+		cpy_params.extent = extent;
+		cpy_params.dstArray = projections_array;
+		cpy_params.kind = cudaMemcpyDeviceToDevice;
+		cpy_params.srcPtr =
+				make_cudaPitchedPtr( (void*)projections_batch->get_data_ptr(), projection_res_x*sizeof(float),
+						projection_res_x, projection_res_y );
+		cudaMemcpy3DAsync( &cpy_params, mainStream );
+		CHECK_FOR_CUDA_ERROR();
+
+		cudaBindTextureToArray( projections_tex, projections_array, channelDesc );
+		CHECK_FOR_CUDA_ERROR();
+
+		// Upload projections for the next batch
+		// - to enable streaming
+		//
+
+		if( batch < num_batches-1 ){ // for using multiple streams to hide the cost of the uploads
+
+			int from_projection_next = (batch+1) * projections_per_batch;
+			int to_projection_next = (batch+2) * projections_per_batch;
+
+			if (to_projection_next > num_projections_in_bin )
+				to_projection_next = num_projections_in_bin;
+
+			int projections_in_batch_next = to_projection_next-from_projection_next;
+
+			// printf("batch: %03i, handling projections: %03i - %03i, angles: %.2f - %.2f\n",
+			//	 batch+1, from_projection_next, to_projection_next-1, angles[from_projection_next], angles[to_projection_next-1]);
+
+			// Allocate device memory for projections and upload
+			//
+
+			dims_next.clear();
+			dims_next.push_back(projection_res_x);
+			dims_next.push_back(projection_res_y);
+			dims_next.push_back(projections_in_batch_next);
+
+			cuNDArray<float> projections_batch_next(&dims, projections_DevPtr2);
+
+			// Upload projections adhering to the binning.
+			// Be sure to copy sequentially numbered projections in one copy operation.
+			//
+
+			int p = from_projection_next;
+
+			while( p<to_projection_next ) {
+
+				int num_sequential_projections = 1;
+				while( p+num_sequential_projections < to_projection_next &&
+						indices[p+num_sequential_projections]==(indices[p+num_sequential_projections-1]+1) ){
+					num_sequential_projections++;
+				}
+
+				int from_id = indices[p];
+				int size = projection_res_x*projection_res_y;
+
+				cudaMemcpyAsync( projections_batch_next.get_data_ptr()+(p-from_projection_next)*size,
+						projections->get_data_ptr()+from_id*size,
+						size*num_sequential_projections*sizeof(float), cudaMemcpyHostToDevice, indyStream );
+
+				CHECK_FOR_CUDA_ERROR();
+
+				p += num_sequential_projections;
+			}
+		}
+
+		// Define dimensions of grid/blocks.
+		//
+
+		dim3 dimBlock, dimGrid;
+		setup_grid( matrix_size_x*matrix_size_y*matrix_size_z, &dimBlock, &dimGrid );
+
+		// Invoke kernel
+		//
+
+		cudaFuncSetCacheConfig(conebeam_backwards_projection_kernel<FBP>, cudaFuncCachePreferL1);
+
+		conebeam_backwards_projection_kernel<FBP><<< dimGrid, dimBlock, 0, mainStream >>>
+				( image_device->get_data_ptr(), raw_angles, raw_offsets,
+						is_dims_in_pixels, is_dims_in_mm, ps_dims_in_pixels, ps_dims_in_mm,
+						projections_in_batch, num_projections_in_bin, SDD, SAD, (batch==0) ? accumulate : true );
+
+		CHECK_FOR_CUDA_ERROR();
+
+		// Cleanup
+		//
+
+		cudaUnbindTexture(projections_tex);
+		cudaFreeArray(projections_array);
+		CHECK_FOR_CUDA_ERROR();
+
+		std::swap(projections_DevPtr, projections_DevPtr2);
+		std::swap(mainStream, indyStream);
+
+		delete projections_batch;
+		if( batch < num_batches-1 )
+			projections_batch = new cuNDArray<float>(&dims_next, projections_DevPtr);
+	}
+
+	// Copy result from device to host
+	//
+
+	cudaMemcpy( image->get_data_ptr(), image_device->get_data_ptr(),
+			num_image_elements*sizeof(float), cudaMemcpyDeviceToHost );
+
+	CHECK_FOR_CUDA_ERROR();
+
+	cudaFree(projections_DevPtr2);
+	cudaFree(projections_DevPtr);
+	CUDA_CALL(cudaStreamDestroy(indyStream));
+	CUDA_CALL(cudaStreamDestroy(mainStream));
+	CHECK_FOR_CUDA_ERROR();
+}
+
+// Template instantiations
+//
+
+template void conebeam_backwards_projection<false>
+( hoCuNDArray<float>*, hoCuNDArray<float>*, std::vector<float>, std::vector<floatd2>, std::vector<unsigned int>,
+		int, intd3, floatd3, floatd2, float, float, bool, bool, bool, cuNDArray<float>*, cuNDArray<float>* );
+
+template void conebeam_backwards_projection<true>
+( hoCuNDArray<float>*, hoCuNDArray<float>*, std::vector<float>, std::vector<floatd2>, std::vector<unsigned int>,
+		int, intd3, floatd3, floatd2, float, float, bool, bool, bool, cuNDArray<float>*, cuNDArray<float>* );
+}
diff --git a/toolboxes/ct/xray/gpu/conebeam_projection.h b/toolboxes/ct/xray/gpu/conebeam_projection.h
new file mode 100644
index 0000000..ffea9c4
--- /dev/null
+++ b/toolboxes/ct/xray/gpu/conebeam_projection.h
@@ -0,0 +1,76 @@
+#pragma once
+
+#include "hoCuNDArray.h"
+#include "cuNDArray.h"
+#include "vector_td.h"
+#include "gpuxray_export.h"
+
+namespace Gadgetron {
+
+
+
+/**
+ * Applies the sqrt of the FPB offset correction. Should be used on projection data with offset detector for iterative reconstruction.
+ * @param projections
+ * @param offsets
+ * @param ps_dims_in_mm
+ * @param SDD
+ * @param SAD
+ */
+void apply_offset_correct(hoCuNDArray<float>* projections,std::vector<floatd2>& offsets,		floatd2 ps_dims_in_mm, float SDD,	float SAD);
+/**
+ *
+ * @param projections
+ * @param image
+ * @param angles
+ * @param offsets
+ * @param indices
+ * @param projections_per_batch
+ * @param samples_per_pixel
+ * @param is_dims_in_mm
+ * @param ps_dims_in_mm
+ * @param SDD
+ * @param SAD
+ * @param accumulate
+ */
+  // Forwards projection of a 3D volume onto a set of projections.
+  // - dependening on the provided binnning indices, just a subset of the projections can be targeted.
+  //
+  
+  EXPORTGPUXRAY void conebeam_forwards_projection
+    ( hoCuNDArray<float> *projections,
+				hoCuNDArray<float> *image,
+				std::vector<float> angles, 
+				std::vector<floatd2> offsets, 
+				std::vector<unsigned int> indices,
+				int projections_per_batch, 
+				float samples_per_pixel,
+				floatd3 is_dims_in_mm, 
+				floatd2 ps_dims_in_mm,
+				float SDD, 
+				float SAD
+  );
+  
+  // Backprojection of a set of projections onto a 3D volume.
+  // - depending on the provided binnning indices, just a subset of the projections can be included
+  //
+
+  template <bool FBP> EXPORTGPUXRAY void conebeam_backwards_projection( 
+        hoCuNDArray<float> *projections,
+        hoCuNDArray<float> *image,
+        std::vector<float> angles, 
+        std::vector<floatd2> offsets, 
+        std::vector<unsigned int> indices,
+        int projections_per_batch,
+        intd3 is_dims_in_pixels, 
+        floatd3 is_dims_in_mm, 
+        floatd2 ps_dims_in_mm,
+        float SDD, 
+        float SAD,
+        bool short_scan,
+        bool use_offset_correction,
+        bool accumulate, 
+        cuNDArray<float> *cosine_weights = 0x0,
+        cuNDArray<float> *frequency_filter = 0x0
+  );
+}
diff --git a/toolboxes/ct/xray/gpu/float3x3.h b/toolboxes/ct/xray/gpu/float3x3.h
new file mode 100644
index 0000000..db06acc
--- /dev/null
+++ b/toolboxes/ct/xray/gpu/float3x3.h
@@ -0,0 +1,66 @@
+#pragma once
+
+//#define SINCOSF __sincosf // fast math
+#define SINCOSF sincosf
+#include "vector_td_utilities.h"
+
+namespace Gadgetron{
+
+struct float3x3 {
+    floatd3 row0;
+    floatd3 row1;
+    floatd3 row2;
+};
+
+__inline__ __host__ __device__ 
+float3x3 make_float3x3(float v0, float v1, float v2,
+                       float v3, float v4, float v5,
+                       float v6, float v7, float v8) {
+    float3x3 m;
+    m.row0 = floatd3(v0, v1, v2);
+    m.row1 = floatd3(v3, v4, v5);
+    m.row2 = floatd3(v6, v7, v8);
+    return m;
+}
+
+__inline__ __device__ 
+floatd3 mul(float3x3 m, floatd3 v) {
+    return floatd3( dot(m.row0,v), dot(m.row1,v), dot(m.row2,v) );
+}
+
+
+__inline__ __device__ float3x3 calcRotationMatrixAroundX(float angle) {
+    float cos_angle, sin_angle;
+    SINCOSF(angle, &sin_angle, &cos_angle);
+  
+    // Build projection rotation matrix
+    float3x3 rotation = make_float3x3(1,         0,          0,
+                                      0, cos_angle, -sin_angle,
+                                      0, sin_angle,  cos_angle);
+    return rotation;
+}
+
+__inline__ __device__ float3x3 calcRotationMatrixAroundY(float angle) {
+    float cos_angle, sin_angle;
+    SINCOSF(angle, &sin_angle, &cos_angle);
+  
+    // Build projection rotation matrix
+    float3x3 rotation = make_float3x3( cos_angle, 0, sin_angle,
+                                               0, 1,         0,
+                                      -sin_angle, 0, cos_angle);
+    return rotation;
+}
+
+__inline__ __host__  __device__ float3x3 calcRotationMatrixAroundZ(float angle) {
+    float cos_angle, sin_angle;
+    sincosf(angle, &sin_angle, &cos_angle);
+  
+    // Build projection rotation matrix
+    float3x3 rotation = make_float3x3(cos_angle, -sin_angle, 0,
+                                      sin_angle,  cos_angle, 0,
+                                              0,          0, 1);
+    return rotation;
+}
+
+
+}
diff --git a/toolboxes/ct/xray/gpu/gpuxray_export.h b/toolboxes/ct/xray/gpu/gpuxray_export.h
new file mode 100644
index 0000000..3e420fd
--- /dev/null
+++ b/toolboxes/ct/xray/gpu/gpuxray_export.h
@@ -0,0 +1,19 @@
+/** \file gpuxray_export.h
+    \brief Required definitions for Windows, importing/exporting dll symbols 
+*/
+
+#ifndef GPUXRAY_EXPORT_H_
+#define GPUXRAY_EXPORT_H_
+
+#if defined (WIN32)
+#if defined (__BUILD_GADGETRON_GPUXRAY__) || defined (gpuxray_EXPORTS)
+#define EXPORTGPUXRAY __declspec(dllexport)
+#else
+#define EXPORTGPUXRAY __declspec(dllimport)
+#endif
+#else
+#define EXPORTGPUXRAY
+#endif
+
+
+#endif /* GPUXRAY_EXPORT_H_ */
diff --git a/toolboxes/ct/xray/gpu/hoCuConebeamProjectionOperator.cpp b/toolboxes/ct/xray/gpu/hoCuConebeamProjectionOperator.cpp
new file mode 100644
index 0000000..7b065c7
--- /dev/null
+++ b/toolboxes/ct/xray/gpu/hoCuConebeamProjectionOperator.cpp
@@ -0,0 +1,261 @@
+#include "hoCuConebeamProjectionOperator.h"
+#include "conebeam_projection.h"
+#include "vector_td_operators.h"
+#include "cuNDArray_operators.h"
+#include "cuNDArray_blas.h"
+#include "GPUTimer.h"
+
+#include <vector>
+#include <stdio.h>
+
+namespace Gadgetron
+{
+
+void hoCuConebeamProjectionOperator
+::offset_correct(hoCuNDArray<float>* projections){
+
+	if( !preprocessed_ ){
+		throw std::runtime_error( "Error: hoCuConebeamProjectionOperator::offset_correct: setup not performed");
+	}
+	float SDD = acquisition_->get_geometry()->get_SDD();
+	float SAD = acquisition_->get_geometry()->get_SAD();
+	floatd2 ps_dims_in_mm = acquisition_->get_geometry()->get_FOV();
+	apply_offset_correct( projections,acquisition_->get_geometry()->get_offsets(),ps_dims_in_mm, SDD, SAD);
+}
+
+void hoCuConebeamProjectionOperator
+::compute_default_frequency_filter()
+{
+	// This code computes the default frequency filter used in filtered backprojection
+	// _Important_ aspects:
+	// - the filter is defined as single precision weights (non-complex)
+	// - the filter defines the scalar weights for the positive frequencies only (i.e "one side)
+	//   - however, the size of the filter still equals the full size of the 1D dimension to filter +1 ...
+	//   - ... due to zero padding and cufft expecting an additional element.
+	//
+
+	if( !preprocessed_ )
+		throw std::runtime_error("Error: hoCuConebeamProjectionOperator::compute_default_frequency_filter() : setup not performed");
+
+	std::vector<size_t> dims;
+	dims.push_back(acquisition_->get_projections()->get_size(0)+1);
+
+	hoCuNDArray<float> host_weights(&dims);
+	float* data = host_weights.get_data_ptr();
+
+	const float A2 = dims[0]*dims[0];
+
+#ifdef USE_OMP
+#pragma omp parallel for
+#endif    
+	for( int i=0; i<dims[0]; i++ ) {
+		float k = float(i);
+		data[i] = k*A2/(A2-k*k)*std::exp(-A2/(A2-k*k)); // From Guo et al, Journal of X-Ray Science and Technology 2011, doi: 10.3233/XST-2011-0294
+	}
+
+	frequency_filter_ = boost::shared_ptr< cuNDArray<float> >(new cuNDArray<float>(&host_weights));
+	float sum = asum(frequency_filter_.get());
+	*frequency_filter_ *= (dims[0]/sum);
+}
+
+void hoCuConebeamProjectionOperator
+::compute_cosine_weights()
+{
+	if( !preprocessed_ )
+		throw std::runtime_error("Error: hoCuConebeamProjectionOperator::compute_cosine_weights() : setup not performed");
+
+	uintd2 ps_dims_in_pixels( acquisition_->get_projections()->get_size(0), acquisition_->get_projections()->get_size(1) );
+	floatd2 ps_dims_in_mm = acquisition_->get_geometry()->get_FOV();
+
+	double SAD = double(acquisition_->get_geometry()->get_SAD());
+	double SDD = double(acquisition_->get_geometry()->get_SDD());
+
+	std::vector<size_t> dims;
+	dims.push_back(ps_dims_in_pixels[0]);
+	dims.push_back(ps_dims_in_pixels[1]);
+
+	hoCuNDArray<float> weights(&dims);
+	float* data = weights.get_data_ptr();
+
+#ifdef USE_OMP
+#pragma omp parallel for
+#endif
+	for(  int y=0; y<ps_dims_in_pixels[1]; y++ ) {
+		for( int x=0; x<ps_dims_in_pixels[0]; x++ ) {
+
+			double xx = (( double(x) / double(ps_dims_in_pixels[0])) - 0.5) * ps_dims_in_mm[0];
+			double yy = (( double(y) / double(ps_dims_in_pixels[1])) - 0.5) * ps_dims_in_mm[1];
+			double s = SAD * xx/SDD;
+			double v = SAD * yy/SDD;
+
+			// Equation 10.1, page 386 in Computed Tomography 2nd edition, Jiang Hsieh
+			//
+
+			double value = SAD / std::sqrt( SAD*SAD + s*s + v*v );
+			data[x+y*ps_dims_in_pixels[0]] = float(value);
+		}
+	}
+	cosine_weights_ = boost::shared_ptr< cuNDArray<float> >(new cuNDArray<float>(&weights));
+}
+
+void hoCuConebeamProjectionOperator
+::mult_M( hoCuNDArray<float> *image, hoCuNDArray<float> *projections, bool accumulate )
+{
+
+	// Validate the input
+	//
+
+	if( image == 0x0 || projections == 0x0 ){
+		throw std::runtime_error("Error: hoCuConebeamProjectionOperator::mult_M: illegal array pointer provided");
+	}
+
+	if( (image->get_number_of_dimensions() != 4) &&  (image->get_number_of_dimensions() != 3) ){
+		throw std::runtime_error("Error: hoCuConebeamProjectionOperator::mult_M: image array must be four or three -dimensional");
+	}
+
+	if( projections->get_number_of_dimensions() != 3 ){
+		throw std::runtime_error("Error: hoCuConebeamProjectionOperator::mult_M: projections array must be three-dimensional");
+	}
+
+	if( !preprocessed_ ){
+		throw std::runtime_error( "Error: hoCuConebeamProjectionOperator::mult_M: setup not performed");
+	}
+
+	if( !binning_.get() ){
+		throw std::runtime_error( "Error: hoCuConebeamProjectionOperator::mult_M: binning not provided");
+	}
+
+	if( projections->get_size(2) != acquisition_->get_geometry()->get_angles().size() ||
+			projections->get_size(2) != acquisition_->get_geometry()->get_offsets().size() ){
+		throw std::runtime_error("Error: hoCuConebeamProjectionOperator::mult_M: inconsistent sizes of input arrays/vectors");
+	}
+
+	hoCuNDArray<float> *projections2 = projections;
+	if (accumulate)
+	  projections2 = new hoCuNDArray<float>(projections->get_dimensions());
+	// Iterate over the temporal dimension.
+	// I.e. reconstruct one 3D volume at a time.
+	//
+
+	for( int b=0; b<binning_->get_number_of_bins(); b++ ) {
+
+		floatd2 ps_dims_in_pixels_float(projections->get_size(0), projections->get_size(1));
+		floatd2 ps_dims_in_mm = acquisition_->get_geometry()->get_FOV();
+		floatd2 ps_spacing_in_mm = ps_dims_in_mm / ps_dims_in_pixels_float;
+
+		float SDD = acquisition_->get_geometry()->get_SDD();
+		float SAD = acquisition_->get_geometry()->get_SAD();
+
+		std::vector<size_t> dims_3d = *image->get_dimensions();
+		if (dims_3d.size()==4)
+			dims_3d.pop_back();
+
+		int num_3d_elements = dims_3d[0]*dims_3d[1]*dims_3d[2];
+
+		//Make a 3d view into the 4d image
+		hoCuNDArray<float> image_3d(&dims_3d, image->get_data_ptr()+b*num_3d_elements);
+
+		conebeam_forwards_projection( projections2, &image_3d,
+				acquisition_->get_geometry()->get_angles(),
+				acquisition_->get_geometry()->get_offsets(),
+				binning_->get_bin(b),
+				projections_per_batch_, samples_per_pixel_,
+				is_dims_in_mm_, ps_dims_in_mm,
+					      SDD, SAD);
+	}
+
+	if (use_offset_correction_ && !use_fbp_)
+	  this->offset_correct(projections2);
+	if (accumulate){
+	  *projections += *projections2;
+	  delete projections2;
+	}
+
+
+}
+
+void hoCuConebeamProjectionOperator
+::mult_MH( hoCuNDArray<float> *projections, hoCuNDArray<float> *image, bool accumulate )
+{
+
+	// Validate the input
+	//
+
+	if( image == 0x0 || projections == 0x0 ){
+		throw std::runtime_error("Error: hoCuConebeamProjectionOperator::mult_MH:: illegal array pointer provided");
+	}
+
+	if( (image->get_number_of_dimensions() != 4) &&  (image->get_number_of_dimensions() != 3) ){
+		throw std::runtime_error("Error: hoCuConebeamProjectionOperator::mult_MH: image array must be four or three -dimensional");
+	}
+
+	if( projections->get_number_of_dimensions() != 3 ){
+		throw std::runtime_error("Error: hoCuConebeamProjectionOperator::mult_MH: projections array must be three-dimensional");
+	}
+
+	if( !preprocessed_ ){
+		throw std::runtime_error( "Error: hoCuConebeamProjectionOperator::mult_MH: setup not performed");
+	}
+
+	if( !binning_.get() ){
+		throw std::runtime_error( "Error: hoCuConebeamProjectionOperator::mult_MH: binning not provided");
+	}
+
+	if( projections->get_size(2) != acquisition_->get_geometry()->get_angles().size() ||
+			projections->get_size(2) != acquisition_->get_geometry()->get_offsets().size() ){
+		throw std::runtime_error("Error: hoCuConebeamProjectionOperator::mult_MH: inconsistent sizes of input arrays/vectors");
+	}
+
+	// Iterate over the temporal dimension.
+	// I.e. reconstruct one 3D volume at a time.
+	//
+
+	for( int b=0; b<binning_->get_number_of_bins(); b++ ) {
+
+		floatd2 ps_dims_in_pixels_float(projections->get_size(0), projections->get_size(1));
+		floatd2 ps_dims_in_mm = acquisition_->get_geometry()->get_FOV();
+		floatd2 ps_spacing_in_mm = ps_dims_in_mm / ps_dims_in_pixels_float;
+
+		intd3 is_dims_in_pixels( image->get_size(0), image->get_size(1), image->get_size(2) );
+
+		float SDD = acquisition_->get_geometry()->get_SDD();
+		float SAD = acquisition_->get_geometry()->get_SAD();
+
+		std::vector<size_t> dims_3d = *image->get_dimensions();
+		if (dims_3d.size() ==4)
+			dims_3d.pop_back();
+
+		int num_3d_elements = dims_3d[0]*dims_3d[1]*dims_3d[2];
+
+		hoCuNDArray<float> image_3d(&dims_3d, image->get_data_ptr()+b*num_3d_elements);
+
+		if( use_fbp_ ){
+
+			if( !cosine_weights_.get() )
+				compute_cosine_weights();
+
+			if( !frequency_filter_.get() )
+				compute_default_frequency_filter();
+
+			conebeam_backwards_projection<true>
+			( projections, &image_3d,
+					acquisition_->get_geometry()->get_angles(),
+					acquisition_->get_geometry()->get_offsets(),
+					binning_->get_bin(b),
+					projections_per_batch_,
+					is_dims_in_pixels, is_dims_in_mm_, ps_dims_in_mm,
+					SDD, SAD, short_scan_, use_offset_correction_, accumulate,
+					cosine_weights_.get(), frequency_filter_.get() );
+		}
+		else
+			conebeam_backwards_projection<false>
+		( projections, &image_3d,
+				acquisition_->get_geometry()->get_angles(),
+				acquisition_->get_geometry()->get_offsets(),
+				binning_->get_bin(b),
+				projections_per_batch_,
+				is_dims_in_pixels, is_dims_in_mm_, ps_dims_in_mm,
+				SDD, SAD, short_scan_, use_offset_correction_, accumulate );
+	}
+}
+}
diff --git a/toolboxes/ct/xray/gpu/hoCuConebeamProjectionOperator.h b/toolboxes/ct/xray/gpu/hoCuConebeamProjectionOperator.h
new file mode 100644
index 0000000..fa2b39c
--- /dev/null
+++ b/toolboxes/ct/xray/gpu/hoCuConebeamProjectionOperator.h
@@ -0,0 +1,150 @@
+#pragma once
+
+#include "cuNDArray.h"
+#include "linearOperator.h"
+#include "CBCT_acquisition.h"
+#include "CBCT_binning.h"
+#include "hoCuNDArray_math.h"
+#include "gpuxray_export.h"
+
+#include <numeric>
+#include <math_constants.h>
+#include <vector>
+
+namespace Gadgetron{
+  
+  class EXPORTGPUXRAY hoCuConebeamProjectionOperator : public linearOperator< hoCuNDArray<float> >
+  {
+  public:
+    hoCuConebeamProjectionOperator() : linearOperator< hoCuNDArray<float> >()
+    {
+      samples_per_pixel_ = 1.5f;
+      projections_per_batch_ = 20;
+      use_fbp_ = false;
+      short_scan_ = false;
+      preprocessed_ = false;
+      use_offset_correction_ = false;
+      allow_offset_correction_override_ = true;
+    }
+
+    virtual ~hoCuConebeamProjectionOperator() {}
+
+    virtual void mult_M( hoCuNDArray<float> *in, hoCuNDArray<float> *out, bool accumulate = false );
+    virtual void mult_MH( hoCuNDArray<float> *in, hoCuNDArray<float> *out, bool accumulate = false );
+
+    virtual void offset_correct(hoCuNDArray<float>* proj);
+
+    virtual void setup( boost::shared_ptr<CBCT_acquisition> acquisition,
+                        floatd3 is_dims_in_mm )
+    {      
+      acquisition_ = acquisition;
+      is_dims_in_mm_ = is_dims_in_mm;
+      
+      // Determine the minimum and maximum angles scanned and transform array angles from [0;max_angle_].
+      //
+      
+      std::vector<float> &angles = acquisition->get_geometry()->get_angles();      
+      float min_value = *std::min_element(angles.begin(), angles.end() );
+      transform(angles.begin(), angles.end(), angles.begin(), bind2nd(std::minus<float>(), min_value));
+ 
+      // Are we in a short scan setup?
+      // - we say yes if we have covered less than PI+3*delta radians
+      //
+
+      float angle_span = *std::max_element(angles.begin(), angles.end() );
+      floatd2 ps_dims_in_mm = acquisition_->get_geometry()->get_FOV();
+      float SDD = acquisition_->get_geometry()->get_SDD();
+      float delta = std::atan(ps_dims_in_mm[0]/(2.0f*SDD)); // Fan angle
+      
+      if( angle_span*CUDART_PI_F/180.0f > CUDART_PI_F+3.0f*delta )
+        short_scan_ = false;
+      else
+        short_scan_ = true;
+      
+      /*
+      std::cout << std::endl <<  *std::min_element(angles.begin(), angles.end() ) << " " 
+      << *std::max_element(angles.begin(), angles.end() ) << std::endl;
+      */
+
+      std::vector<floatd2> offsets = acquisition_->get_geometry()->get_offsets();
+      floatd2 mean_offset = std::accumulate(offsets.begin(),offsets.end(),floatd2(0,0))/float(offsets.size());
+
+      if( allow_offset_correction_override_ && mean_offset[0] > ps_dims_in_mm[0]*0.1f )
+      	use_offset_correction_ = true;
+      
+      preprocessed_ = true;
+    }
+
+    virtual void setup( boost::shared_ptr<CBCT_acquisition> acquisition,
+                        boost::shared_ptr<CBCT_binning> binning,
+                        floatd3 is_dims_in_mm )
+    {
+      binning_ = binning;
+      setup( acquisition, is_dims_in_mm );
+    }
+
+
+    inline void set_use_filtered_backprojection( bool use_fbp ){
+      use_fbp_ = use_fbp;      
+    }
+
+    inline void set_use_offset_correction( bool use_correction ){
+      use_offset_correction_ = use_correction;
+      allow_offset_correction_override_ = false;
+    }
+
+    inline bool get_use_offset_correction(){
+      return use_offset_correction_;
+    }
+
+    inline void set_num_projections_per_batch( unsigned int projections_per_batch ){
+      projections_per_batch_ = projections_per_batch;
+    }
+
+    inline void set_num_samples_per_pixel( float samples_per_pixel ){
+      samples_per_pixel_ = samples_per_pixel;
+    }
+
+    inline void set_frequency_filter( boost::shared_ptr< cuNDArray<float> > weights ){
+      frequency_filter_ = weights;
+    }
+
+    void set_acquisition( boost::shared_ptr<CBCT_acquisition> acquisition ){
+      acquisition_ = acquisition;
+    }
+
+    boost::shared_ptr<CBCT_acquisition> get_acquisition(){
+      return acquisition_;
+    }
+
+    void set_binning( boost::shared_ptr<CBCT_binning> binning ){
+      binning_ = binning;
+    }
+
+    boost::shared_ptr<CBCT_binning> get_binning(){
+      return binning_;
+    }
+    
+    virtual boost::shared_ptr< linearOperator< hoCuNDArray<float> > > clone() {
+      return linearOperator< hoCuNDArray<float> >::clone(this);
+    }
+    
+  protected:
+    virtual void compute_default_frequency_filter();
+    virtual void compute_cosine_weights();
+
+  protected:
+    boost::shared_ptr<CBCT_acquisition> acquisition_;
+    boost::shared_ptr<CBCT_binning> binning_;
+    floatd3 is_dims_in_mm_;
+    float samples_per_pixel_;
+    bool use_fbp_;
+    unsigned int projections_per_batch_;
+    bool preprocessed_;
+    bool short_scan_;
+    bool use_offset_correction_;
+    bool allow_offset_correction_override_;
+    boost::shared_ptr< cuNDArray<float> > cosine_weights_;
+    boost::shared_ptr< cuNDArray<float> > frequency_filter_;
+  };
+}
diff --git a/toolboxes/fft/CMakeLists.txt b/toolboxes/fft/CMakeLists.txt
new file mode 100644
index 0000000..2ce2ea8
--- /dev/null
+++ b/toolboxes/fft/CMakeLists.txt
@@ -0,0 +1,11 @@
+if(FFTW3_FOUND)
+  add_subdirectory(cpu)
+else(FFTW3_FOUND)
+  message("FFTW not found, not compiling cpu fft toolbox")
+endif(FFTW3_FOUND)
+
+if(CUDA_FOUND)
+  add_subdirectory(gpu)
+else(CUDA_FOUND)
+  message("CUDA not found, not compiling gpu fft toolbox")
+endif(CUDA_FOUND)
\ No newline at end of file
diff --git a/toolboxes/fft/cpu/CMakeLists.txt b/toolboxes/fft/cpu/CMakeLists.txt
new file mode 100644
index 0000000..ea9513d
--- /dev/null
+++ b/toolboxes/fft/cpu/CMakeLists.txt
@@ -0,0 +1,45 @@
+if (WIN32)
+  ADD_DEFINITIONS(-D__BUILD_GADGETRON_CPUFFT__)
+endif (WIN32)
+
+include_directories(
+  ${CMAKE_SOURCE_DIR}/toolboxes/core
+  ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu
+  ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/image
+  ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/math
+  ${FFTW3_INCLUDE_DIR}
+  ${Boost_INCLUDE_DIR}
+  ${ISMRMRD_INCLUDE_DIR}
+  ${ARMADILLO_INCLUDE_DIRS}
+  )
+
+add_library(gadgetron_toolbox_cpufft ${LIBTYPE} 
+  cpufft_export.h 
+  hoNDFFT.h
+  hoNDFFT.cpp
+  )
+
+set_target_properties(gadgetron_toolbox_cpufft PROPERTIES VERSION ${GADGETRON_VERSION_STRING} SOVERSION ${GADGETRON_SOVERSION})
+
+if (MKL_FOUND)
+    target_link_libraries(gadgetron_toolbox_cpufft
+      gadgetron_toolbox_cpucore
+      gadgetron_toolbox_cpucore_math
+      ${MKL_LIBRARIES} 
+      ${Boost_LIBRARIES} 
+      )
+else (MKL_FOUND)
+target_link_libraries(gadgetron_toolbox_cpufft
+  gadgetron_toolbox_cpucore
+  gadgetron_toolbox_cpucore_math
+  ${FFTW3_LIBRARIES} 
+  ${Boost_LIBRARIES} 
+  )
+endif (MKL_FOUND)
+
+install(TARGETS gadgetron_toolbox_cpufft DESTINATION lib COMPONENT main)
+
+install(FILES
+  cpufft_export.h 
+  hoNDFFT.h
+  DESTINATION include COMPONENT main)
diff --git a/toolboxes/fft/cpu/cpufft_export.h b/toolboxes/fft/cpu/cpufft_export.h
new file mode 100644
index 0000000..46c8064
--- /dev/null
+++ b/toolboxes/fft/cpu/cpufft_export.h
@@ -0,0 +1,22 @@
+/** \file cpufft_export.h
+    \brief Required definitions for Windows, importing/exporting dll symbols 
+*/
+
+#ifndef CPUFFT_EXPORT_H_
+#define CPUFFT_EXPORT_H_
+
+#if defined (WIN32)
+    #ifdef BUILD_TOOLBOX_STATIC
+        #define EXPORTCPUFFT
+    #else
+        #if defined (__BUILD_GADGETRON_CPUFFT__) || defined (cpufft_EXPORTS)
+            #define EXPORTCPUFFT __declspec(dllexport)
+        #else
+            #define EXPORTCPUFFT __declspec(dllimport)
+        #endif
+    #endif
+#else
+#define EXPORTCPUFFT
+#endif
+
+#endif /* CPUCORE_EXPORT_H_ */
diff --git a/toolboxes/fft/cpu/hoNDFFT.cpp b/toolboxes/fft/cpu/hoNDFFT.cpp
new file mode 100644
index 0000000..a71009f
--- /dev/null
+++ b/toolboxes/fft/cpu/hoNDFFT.cpp
@@ -0,0 +1,1693 @@
+#include "hoNDFFT.h"
+#include "hoMatrix.h"
+#include "hoNDArray_elemwise.h"
+#include "hoNDArray_math.h"
+
+namespace Gadgetron{
+
+    template<typename T> hoNDFFT<T>* hoNDFFT<T>::instance()
+    {
+        if (!instance_) instance_ = new hoNDFFT<T>();
+        return instance_;
+    }
+
+    template<class T> hoNDFFT<T>* hoNDFFT<T>::instance_ = NULL;
+
+    template<class T> void hoNDFFT<T>::fft_int(hoNDArray< ComplexType >* input, size_t dim_to_transform, int sign)
+    {
+        if (sign != -1 && sign != 1) return;
+        if (dim_to_transform >= input->get_number_of_dimensions()) return;
+
+        int stride     = 1;           //Distance between points in transform
+        int dist       = 1;           //Distance between vectors
+        int trafos     = 1;           //Transformations per chunk
+        int chunks     = 1;           //Number of chunks
+        int chunk_size = 1;           //Points per chunk
+        int length     = 1;           //Length of each transform
+        int total_dist = 1;
+
+        T scale = 0.0;
+
+        void* fft_plan        = 0;
+        T*    fft_storage     = 0;
+
+        T* fft_buffer = 0;
+        T* data_ptr = 0;
+
+        //Set sizes
+        length = (int)input->get_size(dim_to_transform);
+
+        if (sign == 1)
+        {
+            scale = (T)(1.0/length);
+        }
+        else
+        {
+            scale = (T)1.0;
+        }
+
+        if (dim_to_transform != 0)
+        {
+            for (size_t i = 0; i < dim_to_transform; i++)
+            {
+                chunk_size *= (int)input->get_size(i);
+            }
+            stride = chunk_size;
+            trafos = chunk_size;
+            chunk_size *= length;
+
+            for (size_t i = dim_to_transform+1; i < input->get_number_of_dimensions(); i++)
+            {
+                chunks *= (int)input->get_size(i);
+            }
+        }
+        else
+        {
+            for (size_t i = 1; i < input->get_number_of_dimensions(); i++)
+            {
+                trafos *= (int)input->get_size(i);
+            }
+            chunk_size = trafos*length;
+
+            dist = length;
+        }
+
+        //*2 real and imag
+        chunk_size *= 2;
+        dist *= 2;
+        total_dist = trafos*dist;
+
+
+        //Allocate storage and make plan
+        {
+            mutex_.lock();
+            fft_storage = (T*)fftw_malloc_ptr_(sizeof(T)*length*2);
+            if (fft_storage == 0)
+            {
+                std::cout << "Failed to allocate buffer for FFT" << std::endl;
+                return;
+            }
+            fft_buffer = (T*)fft_storage;
+
+            unsigned planner_flags = FFTW_MEASURE | FFTW_DESTROY_INPUT;
+
+            fft_plan = fftw_plan_dft_1d_ptr_(length, fft_storage, fft_storage, sign, planner_flags);
+
+            if (fft_plan == 0)
+            {
+                fftw_free_ptr_(fft_storage);
+                std::cout << "Failed to create plan for FFT" << std::endl;
+                return;
+            }
+            mutex_.unlock();
+        }
+
+        //Grab address of data
+        data_ptr = reinterpret_cast<T*>(input->get_data_ptr());
+
+        register int idx1_max = chunks*chunk_size;
+        register int idx1, idx2;       //Index variables
+        register int idx2_limit;
+        register int middle_point = ((length+1)>>1)<<1;
+        register int length2 = length<<1;
+        register int stride2 = stride<<1;
+
+        for (idx1 = 0; idx1 < idx1_max; idx1+=chunk_size) //Loop over all chunks
+        {
+            idx2_limit = idx1+total_dist;
+            for (idx2 = idx1; idx2 < idx2_limit; idx2+=dist) //Loop over all transformations
+            {
+                ///Copy data to buffer.
+                {
+                    register int j, idx3 = idx2;
+                    for (j = middle_point; j < length2; idx3+=stride2)
+                    {
+                        fft_buffer[j++] = data_ptr[idx3  ];
+                        fft_buffer[j++] = data_ptr[idx3+1];
+                    }
+                    for (j = 0; j < middle_point; idx3+=stride2)
+                    {
+                        fft_buffer[j++] = data_ptr[idx3  ];
+                        fft_buffer[j++] = data_ptr[idx3+1];
+                    }
+                }
+
+                fftw_execute_ptr_(fft_plan);
+
+                {
+                    register int j, idx3 = idx2;
+
+                    for (j = middle_point; j < length2; idx3+=stride2)
+                    {
+                        data_ptr[idx3  ] = fft_buffer[j++]*scale;
+                        data_ptr[idx3+1] = fft_buffer[j++]*scale;
+                    }
+                    for (j = 0; j < middle_point; idx3+=stride2)
+                    {
+                        data_ptr[idx3  ] = fft_buffer[j++]*scale;
+                        data_ptr[idx3+1] = fft_buffer[j++]*scale;
+                    }
+                }
+
+            } //Loop over transformations
+        } //Loop over chunks
+
+        //clean up
+        {
+            mutex_.lock();
+            if (fft_plan != 0)
+            {
+                fftw_destroy_plan_ptr_(fft_plan);
+            }
+
+            if (fft_storage != 0)
+            {
+                fftw_free_ptr_(fft_storage);
+            }
+            mutex_.unlock();
+        }
+    }
+
+    template<> void hoNDFFT<float>::set_function_pointers()
+    {
+        fftw_import_wisdom_from_file_ptr_ = &fftwf_import_wisdom_from_file;
+        fftw_export_wisdom_to_file_ptr_ = &fftwf_export_wisdom_to_file;
+        fftw_cleanup_ptr_ = &fftwf_cleanup;
+        fftw_malloc_ptr_ = &fftwf_malloc;
+        fftw_free_ptr_ = &fftwf_free;
+        fftw_execute_ptr_ = (void (*)(void*))(&fftwf_execute);
+        fftw_plan_dft_1d_ptr_ = (void* (*)(int, void*, void*, int, unsigned))(&fftwf_plan_dft_1d);
+        fftw_destroy_plan_ptr_ = (void (*)(void*))(&fftwf_destroy_plan);
+    }
+
+    template<> void hoNDFFT<double>::set_function_pointers()
+    {
+        fftw_import_wisdom_from_file_ptr_ = &fftw_import_wisdom_from_file;
+        fftw_export_wisdom_to_file_ptr_ = &fftw_export_wisdom_to_file;
+        fftw_cleanup_ptr_ = &fftw_cleanup;
+        fftw_malloc_ptr_ = &fftw_malloc;
+        fftw_free_ptr_ = &fftw_free;
+        fftw_execute_ptr_ = (void (*)(void*))(&fftw_execute);
+        fftw_plan_dft_1d_ptr_ = (void* (*)(int, void*, void*, int, unsigned))(&fftw_plan_dft_1d);
+        fftw_destroy_plan_ptr_ = (void (*)(void*))(&fftw_destroy_plan);
+    }
+
+    template<typename T> 
+    inline size_t hoNDFFT<T>::fftshiftPivot(size_t x)
+    {
+        return (size_t)(ceil(x*0.5));
+    }
+
+    template<typename T> 
+    inline size_t hoNDFFT<T>::ifftshiftPivot(size_t x)
+    {
+        return (size_t)(floor(x*0.5));
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::fftshift1D(const ComplexType* a, ComplexType* r, size_t x, size_t pivot)
+    {
+        try
+        {
+            memcpy(r, a+pivot, sizeof(ComplexType)*(x-pivot));
+            memcpy(r+x-pivot, a, sizeof(ComplexType)*pivot);
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in hoNDFFT<T>::fftshift1D(const ComplexType* a, ComplexType* r, size_t x, size_t pivot) ...");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::ifftshift1D(const ComplexType* a, ComplexType* r, size_t x, size_t pivot)
+    {
+        return fftshift1D(a, r, x, pivot);
+    }
+
+    template<typename T> 
+    bool hoNDFFT<T>::fftshiftPivot1D(ComplexType* a, size_t x, size_t n, size_t pivot)
+    {
+        try
+        {
+            long long counter;
+
+            #pragma omp parallel private(counter) shared(n, x, pivot, a) if ( n > 256 )
+            {
+                hoNDArray< ComplexType > aTmp(x);
+
+                #pragma omp for
+                for ( counter=0; counter<(long long)n; counter++ )
+                {
+                    fftshift1D(a+counter*x, aTmp.begin(), x, pivot);
+                    memcpy(a+counter*x, aTmp.begin(), sizeof(ComplexType)*x);
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in hoNDFFT<T>::fftshiftPivot1D(ComplexType* a, size_t x, size_t n, size_t pivot) ...");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename T> 
+    bool hoNDFFT<T>::fftshiftPivot1D(const ComplexType* a, ComplexType* r, size_t x, size_t n, size_t pivot)
+    {
+        try
+        {
+            long long counter;
+
+            #pragma omp parallel for private(counter) shared(n, x, pivot, a, r) if ( n > 256 )
+            for ( counter=0; counter<(long long)n; counter++ )
+            {
+                fftshift1D(a+counter*x, r+counter*x, x, pivot);
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in hoNDFFT<T>::fftshiftPivot1D(const ComplexType* a, ComplexType* r, size_t x, size_t n, size_t pivot) ...");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename T> 
+    bool hoNDFFT<T>::fftshift1D(hoNDArray< ComplexType >& a)
+    {
+        try
+        {
+            size_t x = a.get_size(0);
+            size_t pivot = fftshiftPivot(x);
+            size_t numOfShifts = a.get_number_of_elements()/x;
+
+            GADGET_CHECK_RETURN_FALSE(fftshiftPivot1D(a.begin(), x, numOfShifts, pivot));
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in hoNDFFT<T>::fftshift1D(hoNDArray< ComplexType >& a) ...");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename T> 
+    bool hoNDFFT<T>::fftshift1D(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r)
+    {
+        try
+        {
+            if ( !r.dimensions_equal(&a) )
+            {
+                r = a;
+            }
+
+            size_t x = a.get_size(0);
+            size_t pivot = fftshiftPivot(x);
+            size_t numOfShifts = a.get_number_of_elements()/x;
+
+            GADGET_CHECK_RETURN_FALSE(fftshiftPivot1D(a.begin(), r.begin(), x, numOfShifts, pivot));
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in hoNDFFT<T>::fftshift1D(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r) ...");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename T> 
+    bool hoNDFFT<T>::ifftshift1D(hoNDArray< ComplexType >& a)
+    {
+        try
+        {
+            size_t x = a.get_size(0);
+            size_t pivot = ifftshiftPivot(x);
+            size_t numOfShifts = a.get_number_of_elements()/x;
+
+            GADGET_CHECK_RETURN_FALSE(fftshiftPivot1D(a.begin(), x, numOfShifts, pivot));
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in hoNDFFT<T>::ifftshift1D(hoNDArray< ComplexType >& a) ...");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename T> 
+    bool hoNDFFT<T>::ifftshift1D(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r)
+    {
+        try
+        {
+            if ( !r.dimensions_equal(&a) )
+            {
+                r = a;
+            }
+
+            size_t x = a.get_size(0);
+            size_t pivot = ifftshiftPivot(x);
+            size_t numOfShifts = a.get_number_of_elements()/x;
+
+            GADGET_CHECK_RETURN_FALSE(fftshiftPivot1D(a.begin(), r.begin(), x, numOfShifts, pivot));
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in hoNDFFT<T>::ifftshift1D(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r) ...");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename T> 
+    bool hoNDFFT<T>::fftshiftPivot2D(const ComplexType* a, ComplexType* r, size_t x, size_t y, size_t n, size_t pivotx, size_t pivoty)
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(a!=NULL);
+            GADGET_CHECK_RETURN_FALSE(r!=NULL);
+
+            long long tt;
+
+            #pragma omp parallel for private(tt) shared(a, r, x, y, n, pivotx, pivoty) if (n>16)
+            for ( tt=0; tt<(long long)n; tt++ )
+            {
+                const ComplexType* ac = a + tt*x*y;
+                ComplexType* rc = r + tt*x*y;
+
+                size_t ay, ry;
+
+                for ( ay=pivoty; ay<y; ay++ )
+                {
+                    ry = ay - pivoty;
+                    memcpy(rc+ry*x, ac+ay*x+pivotx, sizeof(ComplexType)*(x-pivotx));
+                    memcpy(rc+ry*x+x-pivotx, ac+ay*x, sizeof(ComplexType)*pivotx);
+                }
+
+                for ( ay=0; ay<pivoty; ay++ )
+                {
+                    ry = ay + y - pivoty;
+                    memcpy(rc+ry*x, ac+ay*x+pivotx, sizeof(ComplexType)*(x-pivotx));
+                    memcpy(rc+ry*x+x-pivotx, ac+ay*x, sizeof(ComplexType)*pivotx);
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in hoNDFFT<T>::fftshiftPivot2D(const ComplexType* a, ComplexType* r, size_t x, size_t y, size_t n, size_t pivotx, size_t pivoty) ...");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename T> 
+    bool hoNDFFT<T>::fftshiftPivot2D(ComplexType* a, size_t x, size_t y, size_t n, size_t pivotx, size_t pivoty)
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(a!=NULL);
+
+            long long tt;
+
+            #pragma omp parallel private(tt) shared(a, x, y, n, pivotx, pivoty) if (n>16)
+            {
+                hoNDArray< ComplexType > aTmp(x*y);
+                ComplexType* rc = aTmp.begin();
+
+                #pragma omp for
+                for ( tt=0; tt<(long long)n; tt++ )
+                {
+                    ComplexType* ac = a + tt*x*y;
+
+                    size_t ay, ry;
+
+                    for ( ay=pivoty; ay<y; ay++ )
+                    {
+                        ry = ay - pivoty;
+                        memcpy(rc+ry*x, ac+ay*x+pivotx, sizeof(ComplexType)*(x-pivotx));
+                        memcpy(rc+ry*x+x-pivotx, ac+ay*x, sizeof(ComplexType)*pivotx);
+                    }
+
+                    for ( ay=0; ay<pivoty; ay++ )
+                    {
+                        ry = ay + y - pivoty;
+                        memcpy(rc+ry*x, ac+ay*x+pivotx, sizeof(ComplexType)*(x-pivotx));
+                        memcpy(rc+ry*x+x-pivotx, ac+ay*x, sizeof(ComplexType)*pivotx);
+                    }
+
+                    memcpy(ac, rc, sizeof(ComplexType)*x*y);
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in hoNDFFT<T>::fftshiftPivot2D(ComplexType* a, size_t x, size_t y, size_t n, size_t pivotx, size_t pivoty) ...");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::fftshift2D(const ComplexType* a, ComplexType* r, size_t x, size_t y, size_t n)
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(a!=NULL);
+            GADGET_CHECK_RETURN_FALSE(r!=NULL);
+
+            size_t pivotx = fftshiftPivot(x);
+            size_t pivoty = fftshiftPivot(y);
+
+            GADGET_CHECK_RETURN_FALSE(fftshiftPivot2D(a, r, x, y, n, pivotx, pivoty));
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in hoNDFFT<T>::fftshift2D(const ComplexType* a, ComplexType* r, size_t x, size_t y, size_t n) ...");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::ifftshift2D(const ComplexType* a, ComplexType* r, size_t x, size_t y, size_t n)
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(a!=NULL);
+            GADGET_CHECK_RETURN_FALSE(r!=NULL);
+
+            size_t pivotx = ifftshiftPivot(x);
+            size_t pivoty = ifftshiftPivot(y);
+
+            GADGET_CHECK_RETURN_FALSE(fftshiftPivot2D(a, r, x, y, n, pivotx, pivoty));
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in hoNDFFT<T>::ifftshift2D(const ComplexType* a, ComplexType* r, size_t x, size_t y, size_t n) ...");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::fftshift2D(ComplexType* a, size_t x, size_t y, size_t n)
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(a!=NULL);
+
+            size_t pivotx = fftshiftPivot(x);
+            size_t pivoty = fftshiftPivot(y);
+
+            GADGET_CHECK_RETURN_FALSE(fftshiftPivot2D(a, x, y, n, pivotx, pivoty));
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in hoNDFFT<T>::fftshift2D(ComplexType* a, size_t x, size_t y, size_t n) ...");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::ifftshift2D(ComplexType* a, size_t x, size_t y, size_t n)
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(a!=NULL);
+
+            size_t pivotx = ifftshiftPivot(x);
+            size_t pivoty = ifftshiftPivot(y);
+
+            GADGET_CHECK_RETURN_FALSE(fftshiftPivot2D(a, x, y, n, pivotx, pivoty));
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in hoNDFFT<T>::ifftshift2D(ComplexType* a, size_t x, size_t y, size_t n) ...");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::fftshift2D(hoNDArray< ComplexType >& a)
+    {
+        size_t n = a.get_number_of_elements()/(a.get_size(0)*a.get_size(1));
+        return fftshift2D(a.begin(), a.get_size(0), a.get_size(1), n);
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::fftshift2D(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r)
+    {
+        if ( !r.dimensions_equal(&a) )
+        {
+            r = a;
+        }
+
+        size_t n = a.get_number_of_elements()/(a.get_size(0)*a.get_size(1));
+        return fftshift2D(a.begin(), r.begin(), a.get_size(0), a.get_size(1), n);
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::ifftshift2D(hoNDArray< ComplexType >& a)
+    {
+        size_t n = a.get_number_of_elements()/(a.get_size(0)*a.get_size(1));
+        return ifftshift2D(a.begin(), a.get_size(0), a.get_size(1), n);
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::ifftshift2D(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r)
+    {
+        if ( !r.dimensions_equal(&a) )
+        {
+            r = a;
+        }
+
+        size_t n = a.get_number_of_elements()/(a.get_size(0)*a.get_size(1));
+        return ifftshift2D(a.begin(), r.begin(), a.get_size(0), a.get_size(1), n);
+    }
+
+    template<typename T> 
+    bool hoNDFFT<T>::fftshiftPivot3D(const ComplexType* a, ComplexType* r, size_t x, size_t y, size_t z, size_t n, size_t pivotx, size_t pivoty,  size_t pivotz)
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(a!=NULL);
+            GADGET_CHECK_RETURN_FALSE(r!=NULL);
+
+            long long tt;
+
+#pragma omp parallel for private(tt) shared(a, r, x, y, z, n, pivotx, pivoty, pivotz) if (n>16)
+            for ( tt=0; tt<(long long)n; tt++ )
+            {
+                size_t ay, ry, az, rz;
+
+                for ( az=pivotz; az<z; az++ )
+                {
+                    rz = az - pivotz;
+
+                    const ComplexType* ac = a + tt*x*y*z + az*x*y;
+                    ComplexType* rc = r + tt*x*y*z + rz*x*y;
+
+                    for ( ay=pivoty; ay<y; ay++ )
+                    {
+                        ry = ay - pivoty;
+                        memcpy(rc+ry*x, ac+ay*x+pivotx, sizeof(ComplexType)*(x-pivotx));
+                        memcpy(rc+ry*x+x-pivotx, ac+ay*x, sizeof(ComplexType)*pivotx);
+                    }
+
+                    for ( ay=0; ay<pivoty; ay++ )
+                    {
+                        ry = ay + y - pivoty;
+                        memcpy(rc+ry*x, ac+ay*x+pivotx, sizeof(ComplexType)*(x-pivotx));
+                        memcpy(rc+ry*x+x-pivotx, ac+ay*x, sizeof(ComplexType)*pivotx);
+                    }
+                }
+
+                for ( az=0; az<pivotz; az++ )
+                {
+                    rz = az + z - pivotz;
+
+                    const ComplexType* ac = a + tt*x*y*z + az*x*y;
+                    ComplexType* rc = r + tt*x*y*z + rz*x*y;
+
+                    for ( ay=pivoty; ay<y; ay++ )
+                    {
+                        ry = ay - pivoty;
+                        memcpy(rc+ry*x, ac+ay*x+pivotx, sizeof(ComplexType)*(x-pivotx));
+                        memcpy(rc+ry*x+x-pivotx, ac+ay*x, sizeof(ComplexType)*pivotx);
+                    }
+
+                    for ( ay=0; ay<pivoty; ay++ )
+                    {
+                        ry = ay + y - pivoty;
+                        memcpy(rc+ry*x, ac+ay*x+pivotx, sizeof(ComplexType)*(x-pivotx));
+                        memcpy(rc+ry*x+x-pivotx, ac+ay*x, sizeof(ComplexType)*pivotx);
+                    }
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in hoNDFFT<T>::fftshiftPivot3D(const ComplexType* a, ComplexType* r, size_t x, size_t y, size_t z, size_t n, unsigned pivotx, unsigned pivoty,  unsigned pivotz) ...");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename T> 
+    bool hoNDFFT<T>::fftshiftPivot3D(ComplexType* a, size_t x, size_t y, size_t z, size_t n, size_t pivotx, size_t pivoty,  size_t pivotz)
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(a!=NULL);
+
+            long long tt;
+
+#pragma omp parallel private(tt) shared(a, x, y, z, n, pivotx, pivoty, pivotz) if (n>16)
+            {
+                hoNDArray< ComplexType > aTmp(x*y*z);
+
+#pragma omp for
+                for ( tt=0; tt<(long long)n; tt++ )
+                {
+                    size_t ay, ry, az, rz;
+
+                    for ( az=pivotz; az<z; az++ )
+                    {
+                        rz = az - pivotz;
+
+                        const ComplexType* ac = a + tt*x*y*z + az*x*y;
+                        ComplexType* rc = aTmp.begin() + rz*x*y;
+
+                        for ( ay=pivoty; ay<y; ay++ )
+                        {
+                            ry = ay - pivoty;
+                            memcpy(rc+ry*x, ac+ay*x+pivotx, sizeof(ComplexType)*(x-pivotx));
+                            memcpy(rc+ry*x+x-pivotx, ac+ay*x, sizeof(ComplexType)*pivotx);
+                        }
+
+                        for ( ay=0; ay<pivoty; ay++ )
+                        {
+                            ry = ay + y - pivoty;
+                            memcpy(rc+ry*x, ac+ay*x+pivotx, sizeof(ComplexType)*(x-pivotx));
+                            memcpy(rc+ry*x+x-pivotx, ac+ay*x, sizeof(ComplexType)*pivotx);
+                        }
+                    }
+
+                    for ( az=0; az<pivotz; az++ )
+                    {
+                        rz = az + z - pivotz;
+
+                        const ComplexType* ac = a + tt*x*y*z + az*x*y;
+                        ComplexType* rc = aTmp.begin() + rz*x*y;
+
+                        for ( ay=pivoty; ay<y; ay++ )
+                        {
+                            ry = ay - pivoty;
+                            memcpy(rc+ry*x, ac+ay*x+pivotx, sizeof(ComplexType)*(x-pivotx));
+                            memcpy(rc+ry*x+x-pivotx, ac+ay*x, sizeof(ComplexType)*pivotx);
+                        }
+
+                        for ( ay=0; ay<pivoty; ay++ )
+                        {
+                            ry = ay + y - pivoty;
+                            memcpy(rc+ry*x, ac+ay*x+pivotx, sizeof(ComplexType)*(x-pivotx));
+                            memcpy(rc+ry*x+x-pivotx, ac+ay*x, sizeof(ComplexType)*pivotx);
+                        }
+                    }
+
+                    memcpy(a+tt*x*y*z, aTmp.begin(), sizeof(ComplexType)*x*y*z);
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in hoNDFFT<T>::fftshiftPivot3D(ComplexType* a, size_t x, size_t y, size_t z, size_t n, unsigned pivotx, unsigned pivoty,  unsigned pivotz) ...");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::fftshift3D(const ComplexType* a, ComplexType* r, size_t x, size_t y, size_t z, size_t n)
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(a!=NULL);
+            GADGET_CHECK_RETURN_FALSE(r!=NULL);
+
+            size_t pivotx = fftshiftPivot(x);
+            size_t pivoty = fftshiftPivot(y);
+            size_t pivotz = fftshiftPivot(z);
+
+            GADGET_CHECK_RETURN_FALSE(fftshiftPivot3D(a, r, x, y, z, n, pivotx, pivoty, pivotz));
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in hoNDFFT<T>::fftshift3D(const ComplexType* a, ComplexType* r, size_t x, size_t y, size_t z, size_t n) ...");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::ifftshift3D(const ComplexType* a, ComplexType* r, size_t x, size_t y, size_t z, size_t n)
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(a!=NULL);
+            GADGET_CHECK_RETURN_FALSE(r!=NULL);
+
+            size_t pivotx = ifftshiftPivot(x);
+            size_t pivoty = ifftshiftPivot(y);
+            size_t pivotz = ifftshiftPivot(z);
+
+            GADGET_CHECK_RETURN_FALSE(fftshiftPivot3D(a, r, x, y, z, n, pivotx, pivoty, pivotz));
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in hoNDFFT<T>::ifftshift3D(const ComplexType* a, ComplexType* r, size_t x, size_t y, size_t z, size_t n) ...");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::fftshift3D(ComplexType* a, size_t x, size_t y, size_t z, size_t n)
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(a!=NULL);
+
+            size_t pivotx = fftshiftPivot(x);
+            size_t pivoty = fftshiftPivot(y);
+            size_t pivotz = fftshiftPivot(z);
+
+            GADGET_CHECK_RETURN_FALSE(fftshiftPivot3D(a, x, y, z, n, pivotx, pivoty, pivotz));
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in hoNDFFT<T>::fftshift3D(ComplexType* a, size_t x, size_t y, size_t z, size_t n) ...");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::ifftshift3D(ComplexType* a, size_t x, size_t y, size_t z, size_t n)
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(a!=NULL);
+
+            size_t pivotx = ifftshiftPivot(x);
+            size_t pivoty = ifftshiftPivot(y);
+            size_t pivotz = ifftshiftPivot(z);
+
+            GADGET_CHECK_RETURN_FALSE(fftshiftPivot3D(a, x, y, z, n, pivotx, pivoty, pivotz));
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in hoNDFFT<T>::ifftshift3D(ComplexType* a, size_t x, size_t y, size_t z, size_t n) ...");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::fftshift3D(hoNDArray< ComplexType >& a)
+    {
+        size_t n = a.get_number_of_elements()/(a.get_size(0)*a.get_size(1)*a.get_size(2));
+        return fftshift3D(a.begin(), a.get_size(0), a.get_size(1), a.get_size(2), n);
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::fftshift3D(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r)
+    {
+        if ( !r.dimensions_equal(&a) )
+        {
+            r = a;
+        }
+
+        size_t n = a.get_number_of_elements()/(a.get_size(0)*a.get_size(1)*a.get_size(2));
+        return fftshift3D(a.begin(), r.begin(), a.get_size(0), a.get_size(1), a.get_size(2), n);
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::ifftshift3D(hoNDArray< ComplexType >& a)
+    {
+        size_t n = a.get_number_of_elements()/(a.get_size(0)*a.get_size(1)*a.get_size(2));
+        return ifftshift3D(a.begin(), a.get_size(0), a.get_size(1), a.get_size(2), n);
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::ifftshift3D(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r)
+    {
+        if ( !r.dimensions_equal(&a) )
+        {
+            r = a;
+        }
+
+        size_t n = a.get_number_of_elements()/(a.get_size(0)*a.get_size(1)*a.get_size(2));
+        return ifftshift3D(a.begin(), r.begin(), a.get_size(0), a.get_size(1), a.get_size(2), n);
+    }
+
+    // -----------------------------------------------------------------------------------------
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::fft1(hoNDArray< ComplexType >& a)
+    {
+        return fft1(a, true);
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::ifft1(hoNDArray< ComplexType >& a)
+    {
+        return fft1(a, false);
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::fft1(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r)
+    {
+        if ( !r.dimensions_equal(&a) )
+        {
+            r.create(a.get_dimensions());
+        }
+
+        return fft1(const_cast<hoNDArray< ComplexType >&>(a), r, true);
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::ifft1(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r)
+    {
+        if ( !r.dimensions_equal(&a) )
+        {
+            r.create(a.get_dimensions());
+        }
+
+        return fft1(const_cast<hoNDArray< ComplexType >&>(a), r, false);
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::fft1c(hoNDArray< ComplexType >& a)
+    {
+        GADGET_CHECK_RETURN_FALSE(ifftshift1D(a));
+        GADGET_CHECK_RETURN_FALSE(fft1(a));
+        GADGET_CHECK_RETURN_FALSE(fftshift1D(a));
+        return true;
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::ifft1c(hoNDArray< ComplexType >& a)
+    {
+        GADGET_CHECK_RETURN_FALSE(ifftshift1D(a));
+        GADGET_CHECK_RETURN_FALSE(ifft1(a));
+        GADGET_CHECK_RETURN_FALSE(fftshift1D(a));
+        return true;
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::fft1c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r)
+    {
+        GADGET_CHECK_RETURN_FALSE(ifftshift1D(a, r));
+        GADGET_CHECK_RETURN_FALSE(fft1(r));
+        GADGET_CHECK_RETURN_FALSE(fftshift1D(r));
+        return true;
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::ifft1c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r)
+    {
+        GADGET_CHECK_RETURN_FALSE(ifftshift1D(a, r));
+        GADGET_CHECK_RETURN_FALSE(ifft1(r));
+        GADGET_CHECK_RETURN_FALSE(fftshift1D(r));
+        return true;
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::fft1c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, hoNDArray< ComplexType >& buf)
+    {
+        GADGET_CHECK_RETURN_FALSE(ifftshift1D(a, r));
+        GADGET_CHECK_RETURN_FALSE(fft1(r, buf));
+        GADGET_CHECK_RETURN_FALSE(fftshift1D(buf, r));
+        return true;
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::ifft1c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, hoNDArray< ComplexType >& buf)
+    {
+        GADGET_CHECK_RETURN_FALSE(ifftshift1D(a, r));
+        GADGET_CHECK_RETURN_FALSE(ifft1(r, buf));
+        GADGET_CHECK_RETURN_FALSE(fftshift1D(buf, r));
+        return true;
+    }
+
+    // -----------------------------------------------------------------------------------------
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::fft2(hoNDArray< ComplexType >& a)
+    {
+        return fft2(a, true);
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::ifft2(hoNDArray< ComplexType >& a)
+    {
+        return fft2(a, false);
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::fft2(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r)
+    {
+        //r = a;
+        //return fft2(r);
+        if ( !r.dimensions_equal(&a) )
+        {
+            r.create(a.get_dimensions());
+        }
+
+        return fft2(const_cast<hoNDArray< ComplexType >&>(a), r, true);
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::ifft2(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r)
+    {
+        /*r = a;
+        return ifft2(r);*/
+
+        if ( !r.dimensions_equal(&a) )
+        {
+            r.create(a.get_dimensions());
+        }
+
+        return fft2(const_cast<hoNDArray< ComplexType >&>(a), r, false);
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::fft2c(hoNDArray< ComplexType >& a)
+    {
+        GADGET_CHECK_RETURN_FALSE(ifftshift2D(a));
+        GADGET_CHECK_RETURN_FALSE(fft2(a));
+        GADGET_CHECK_RETURN_FALSE(fftshift2D(a));
+        return true;
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::ifft2c(hoNDArray< ComplexType >& a)
+    {
+        GADGET_CHECK_RETURN_FALSE(ifftshift2D(a));
+        GADGET_CHECK_RETURN_FALSE(ifft2(a));
+        GADGET_CHECK_RETURN_FALSE(fftshift2D(a));
+        return true;
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::fft2c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r)
+    {
+        GADGET_CHECK_RETURN_FALSE(ifftshift2D(a, r));
+        GADGET_CHECK_RETURN_FALSE(fft2(r));
+        GADGET_CHECK_RETURN_FALSE(fftshift2D(r));
+        return true;
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::ifft2c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r)
+    {
+        GADGET_CHECK_RETURN_FALSE(ifftshift2D(a, r));
+        GADGET_CHECK_RETURN_FALSE(ifft2(r));
+        GADGET_CHECK_RETURN_FALSE(fftshift2D(r));
+        return true;
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::fft2c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, hoNDArray< ComplexType >& buf)
+    {
+        GADGET_CHECK_RETURN_FALSE(ifftshift2D(a, r));
+        GADGET_CHECK_RETURN_FALSE(fft2(r, buf));
+        GADGET_CHECK_RETURN_FALSE(fftshift2D(buf, r));
+        return true;
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::ifft2c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, hoNDArray< ComplexType >& buf)
+    {
+        GADGET_CHECK_RETURN_FALSE(ifftshift2D(a, r));
+        GADGET_CHECK_RETURN_FALSE(ifft2(r, buf));
+        GADGET_CHECK_RETURN_FALSE(fftshift2D(buf, r));
+        return true;
+    }
+
+    // -----------------------------------------------------------------------------------------
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::fft3(hoNDArray< ComplexType >& a)
+    {
+        return fft3(a, true);
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::ifft3(hoNDArray< ComplexType >& a)
+    {
+        return fft3(a, false);
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::fft3(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r)
+    {
+        /*r = a;
+        return fft3(r);*/
+        if ( !r.dimensions_equal(&a) )
+        {
+            r.create(a.get_dimensions());
+        }
+
+        return fft3(const_cast<hoNDArray< ComplexType >&>(a), r, true);
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::ifft3(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r)
+    {
+        /*r = a;
+        return ifft3(r);*/
+        if ( !r.dimensions_equal(&a) )
+        {
+            r.create(a.get_dimensions());
+        }
+
+        return fft3(const_cast<hoNDArray< ComplexType >&>(a), r, false);
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::fft3c(hoNDArray< ComplexType >& a)
+    {
+        GADGET_CHECK_RETURN_FALSE(ifftshift3D(a));
+        GADGET_CHECK_RETURN_FALSE(fft3(a));
+        GADGET_CHECK_RETURN_FALSE(fftshift3D(a));
+        return true;
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::ifft3c(hoNDArray< ComplexType >& a)
+    {
+        GADGET_CHECK_RETURN_FALSE(ifftshift3D(a));
+        GADGET_CHECK_RETURN_FALSE(ifft3(a));
+        GADGET_CHECK_RETURN_FALSE(fftshift3D(a));
+        return true;
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::fft3c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r)
+    {
+        GADGET_CHECK_RETURN_FALSE(ifftshift3D(a, r));
+        GADGET_CHECK_RETURN_FALSE(fft3(r));
+        GADGET_CHECK_RETURN_FALSE(fftshift3D(r));
+        return true;
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::ifft3c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r)
+    {
+        GADGET_CHECK_RETURN_FALSE(ifftshift3D(a, r));
+        GADGET_CHECK_RETURN_FALSE(ifft3(r));
+        GADGET_CHECK_RETURN_FALSE(fftshift3D(r));
+        return true;
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::fft3c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, hoNDArray< ComplexType >& buf)
+    {
+        GADGET_CHECK_RETURN_FALSE(ifftshift3D(a, r));
+        GADGET_CHECK_RETURN_FALSE(fft3(r, buf));
+        GADGET_CHECK_RETURN_FALSE(fftshift3D(buf, r));
+        return true;
+    }
+
+    template<typename T> 
+    inline bool hoNDFFT<T>::ifft3c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, hoNDArray< ComplexType >& buf)
+    {
+        GADGET_CHECK_RETURN_FALSE(ifftshift3D(a, r));
+        GADGET_CHECK_RETURN_FALSE(ifft3(r, buf));
+        GADGET_CHECK_RETURN_FALSE(fftshift3D(buf, r));
+        return true;
+    }
+
+    template<typename T> 
+    bool hoNDFFT<T>::fft1(hoNDArray< ComplexType >& a, bool forward)
+    {
+        hoNDArray< ComplexType > res(a);
+        if ( !fft1(res, a, forward) )
+        {
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename T> 
+    bool hoNDFFT<T>::fft2(hoNDArray< ComplexType >& a, bool forward)
+    {
+        hoNDArray< ComplexType > res(a);
+        if ( !fft2(res, a, forward) )
+        {
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename T> 
+    bool hoNDFFT<T>::fft3(hoNDArray< ComplexType >& a, bool forward)
+    {
+        hoNDArray< ComplexType > res(a);
+        if ( !fft3(res, a, forward) )
+        {
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename T> 
+    bool hoNDFFT<T>::fft1(hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, bool forward)
+    {
+        r = a;
+
+        int n0 = (int)a.get_size(0);
+        T fftRatio = T(1.0/std::sqrt( T(n0) ));
+
+        int num = (int)(a.get_number_of_elements()/n0);
+        int num_thr = get_num_threads_fft1(n0, num);
+
+        int n;
+
+        if ( typeid(T) == typeid(float) )
+        {
+            fftwf_plan p;
+
+            /* it is found that code piece like this:
+
+                #pragma omp parallel for private(n) shared(num, p, a, n0, r) num_threads(num_thr)
+                for ( n=0; n<num; n++ )
+                {
+                    fftwf_execute_dft(p, reinterpret_cast<fftwf_complex*>(a.begin()+n*n0), 
+                        reinterpret_cast<fftwf_complex*>(r.begin()+n*n0));
+                }
+
+                can cause occasion failture of fft operation.
+
+                This could be due to that fftwf_execute_dft may not be thread-safe.
+            */
+
+            /*if( num_thr > 1 )
+            {
+                {
+                    mutex_.lock();
+                    if ( forward )
+                    {
+                        p = fftwf_plan_dft_1d(n0, 
+                                reinterpret_cast<fftwf_complex*>(a.begin()), 
+                                reinterpret_cast<fftwf_complex*>(r.begin()),
+                                FFTW_FORWARD, FFTW_ESTIMATE);
+                    }
+                    else
+                    {
+                        p = fftwf_plan_dft_1d(n0, 
+                                reinterpret_cast<fftwf_complex*>(a.begin()), 
+                                reinterpret_cast<fftwf_complex*>(r.begin()),
+                                FFTW_BACKWARD, FFTW_ESTIMATE);
+                    }
+                    mutex_.unlock();
+                }
+
+                #pragma omp parallel for private(n) shared(num, p, a, n0, r) num_threads(num_thr)
+                for ( n=0; n<num; n++ )
+                {
+                    fftwf_execute_dft(p, reinterpret_cast<fftwf_complex*>(a.begin()+n*n0), 
+                        reinterpret_cast<fftwf_complex*>(r.begin()+n*n0));
+                }
+
+                {
+                    mutex_.lock();
+                    fftwf_destroy_plan(p);
+                    mutex_.unlock();
+                }
+            }
+            else
+            {*/
+                // multiple fft interface
+                {
+                    mutex_.lock();
+                    if ( forward )
+                    {
+                        p = fftwf_plan_many_dft(1, &n0, num,
+                                      reinterpret_cast<fftwf_complex*>(a.begin()), NULL,
+                                      1, n0,
+                                      reinterpret_cast<fftwf_complex*>(r.begin()), NULL,
+                                      1, n0,
+                                      FFTW_FORWARD, FFTW_ESTIMATE);
+                    }
+                    else
+                    {
+                        p = fftwf_plan_many_dft(1, &n0, num,
+                                      reinterpret_cast<fftwf_complex*>(a.begin()), NULL,
+                                      1, n0,
+                                      reinterpret_cast<fftwf_complex*>(r.begin()), NULL,
+                                      1, n0,
+                                      FFTW_BACKWARD, FFTW_ESTIMATE);
+                    }
+                    mutex_.unlock();
+                }
+
+                fftwf_execute(p);
+
+                {
+                    mutex_.lock();
+                    fftwf_destroy_plan(p);
+                    mutex_.unlock();
+                }
+            //}
+        }
+        else if ( typeid(T) == typeid(double) )
+        {
+            fftw_plan p;
+
+            /*if( num_thr > 1 )
+            {
+                {
+                    mutex_.lock();
+                    if ( forward )
+                    {
+                        p = fftw_plan_dft_1d(n0, 
+                                reinterpret_cast<fftw_complex*>(a.begin()), 
+                                reinterpret_cast<fftw_complex*>(r.begin()),
+                                FFTW_FORWARD, FFTW_ESTIMATE);
+                    }
+                    else
+                    {
+                        p = fftw_plan_dft_1d(n0, 
+                                reinterpret_cast<fftw_complex*>(a.begin()), 
+                                reinterpret_cast<fftw_complex*>(r.begin()),
+                                FFTW_BACKWARD, FFTW_ESTIMATE);
+                    }
+                    mutex_.unlock();
+                }
+
+                #pragma omp parallel for private(n) shared(num, p, a, n0, r) num_threads(num_thr)
+                for ( n=0; n<num; n++ )
+                {
+                    fftw_execute_dft(p, reinterpret_cast<fftw_complex*>(a.begin()+n*n0), 
+                        reinterpret_cast<fftw_complex*>(r.begin()+n*n0));
+                }
+
+                {
+                    mutex_.lock();
+                    fftw_destroy_plan(p);
+                    mutex_.unlock();
+                }
+            }
+            else
+            {*/
+                // multiple fft interface
+                {
+                    mutex_.lock();
+                    if ( forward )
+                    {
+                        p = fftw_plan_many_dft(1, &n0, num,
+                                      reinterpret_cast<fftw_complex*>(a.begin()), NULL,
+                                      1, n0,
+                                      reinterpret_cast<fftw_complex*>(r.begin()), NULL,
+                                      1, n0,
+                                      FFTW_FORWARD, FFTW_ESTIMATE);
+                    }
+                    else
+                    {
+                        p = fftw_plan_many_dft(1, &n0, num,
+                                      reinterpret_cast<fftw_complex*>(a.begin()), NULL,
+                                      1, n0,
+                                      reinterpret_cast<fftw_complex*>(r.begin()), NULL,
+                                      1, n0,
+                                      FFTW_BACKWARD, FFTW_ESTIMATE);
+                    }
+                    mutex_.unlock();
+                }
+
+                fftw_execute(p);
+
+                {
+                    mutex_.lock();
+                    fftw_destroy_plan(p);
+                    mutex_.unlock();
+                }
+            //}
+        }
+
+        Gadgetron::scal(fftRatio, r);
+
+        return true;
+    }
+
+    template<typename T> 
+    bool hoNDFFT<T>::fft2(hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, bool forward)
+    {
+        r = a;
+
+        int n0 = (int)a.get_size(1);
+        int n1 = (int)a.get_size(0);
+
+        T fftRatio = T(1.0/std::sqrt( T(n0*n1) ));
+
+        int num = (int)(a.get_number_of_elements()/(n0*n1));
+        int num_thr = get_num_threads_fft2(n0, n1, num);
+
+        int n;
+
+        if ( typeid(T) == typeid(float) )
+        {
+            fftwf_plan p;
+
+            /*if ( num_thr > 1 )
+            {
+                {
+                    mutex_.lock();
+                    if ( forward )
+                    {
+                        p = fftwf_plan_dft_2d(n0, n1,
+                                reinterpret_cast<fftwf_complex*>(a.begin()), 
+                                reinterpret_cast<fftwf_complex*>(r.begin()),
+                                FFTW_FORWARD, FFTW_ESTIMATE);
+                    }
+                    else
+                    {
+                        p = fftwf_plan_dft_2d(n0, n1,
+                                reinterpret_cast<fftwf_complex*>(a.begin()), 
+                                reinterpret_cast<fftwf_complex*>(r.begin()),
+                                FFTW_BACKWARD, FFTW_ESTIMATE);
+                    }
+                    mutex_.unlock();
+                }
+
+                #pragma omp parallel for private(n) shared(num, p, a, n0, n1, r) num_threads(num_thr)
+                for ( n=0; n<num; n++ )
+                {
+                    fftwf_execute_dft(p, reinterpret_cast<fftwf_complex*>(a.begin()+n*n0*n1), 
+                        reinterpret_cast<fftwf_complex*>(r.begin()+n*n0*n1));
+                }
+
+                {
+                    mutex_.lock();
+                    fftwf_destroy_plan(p);
+                    mutex_.unlock();
+                }
+            }
+            else
+            {*/
+                // multiple fft interface
+
+                int n[] = {n0, n1};
+                int idist = n0*n1;
+                int odist = n0*n1;
+
+                {
+                    mutex_.lock();
+                    if ( forward )
+                    {
+                        p = fftwf_plan_many_dft(2, n, num,
+                                      reinterpret_cast<fftwf_complex*>(a.begin()), NULL,
+                                      1, idist,
+                                      reinterpret_cast<fftwf_complex*>(r.begin()), NULL,
+                                      1, odist,
+                                      FFTW_FORWARD, FFTW_ESTIMATE);
+                    }
+                    else
+                    {
+                        p = fftwf_plan_many_dft(2, n, num,
+                                      reinterpret_cast<fftwf_complex*>(a.begin()), NULL,
+                                      1, idist,
+                                      reinterpret_cast<fftwf_complex*>(r.begin()), NULL,
+                                      1, odist,
+                                      FFTW_BACKWARD, FFTW_ESTIMATE);
+                    }
+                    mutex_.unlock();
+                }
+
+                fftwf_execute(p);
+
+                {
+                    mutex_.lock();
+                    fftwf_destroy_plan(p);
+                    mutex_.unlock();
+                }
+            //}
+        }
+        else if ( typeid(T) == typeid(double) )
+        {
+            fftw_plan p;
+
+            /*if ( num_thr > 1 )
+            {
+                {
+                    mutex_.lock();
+                    if ( forward )
+                    {
+                        p = fftw_plan_dft_2d(n0, n1,
+                                reinterpret_cast<fftw_complex*>(a.begin()), 
+                                reinterpret_cast<fftw_complex*>(r.begin()),
+                                FFTW_FORWARD, FFTW_ESTIMATE);
+                    }
+                    else
+                    {
+                        p = fftw_plan_dft_2d(n0, n1,
+                                reinterpret_cast<fftw_complex*>(a.begin()), 
+                                reinterpret_cast<fftw_complex*>(r.begin()),
+                                FFTW_BACKWARD, FFTW_ESTIMATE);
+                    }
+                    mutex_.unlock();
+                }
+
+                #pragma omp parallel for private(n) shared(num, p, a, n0, n1, r) num_threads(num_thr)
+                for ( n=0; n<num; n++ )
+                {
+                    fftw_execute_dft(p, reinterpret_cast<fftw_complex*>(a.begin()+n*n0*n1), 
+                        reinterpret_cast<fftw_complex*>(r.begin()+n*n0*n1));
+                }
+
+                {
+                    mutex_.lock();
+                    fftw_destroy_plan(p);
+                    mutex_.unlock();
+                }
+            }
+            else
+            {*/
+                // multiple fft interface
+
+                int n[] = {n0, n1};
+                int idist = n0*n1;
+                int odist = n0*n1;
+
+                {
+                    mutex_.lock();
+                    if ( forward )
+                    {
+                        p = fftw_plan_many_dft(2, n, num,
+                                      reinterpret_cast<fftw_complex*>(a.begin()), NULL,
+                                      1, idist,
+                                      reinterpret_cast<fftw_complex*>(r.begin()), NULL,
+                                      1, odist,
+                                      FFTW_FORWARD, FFTW_ESTIMATE);
+                    }
+                    else
+                    {
+                        p = fftw_plan_many_dft(2, n, num,
+                                      reinterpret_cast<fftw_complex*>(a.begin()), NULL,
+                                      1, idist,
+                                      reinterpret_cast<fftw_complex*>(r.begin()), NULL,
+                                      1, odist,
+                                      FFTW_BACKWARD, FFTW_ESTIMATE);
+                    }
+                    mutex_.unlock();
+                }
+
+                fftw_execute(p);
+
+                {
+                    mutex_.lock();
+                    fftw_destroy_plan(p);
+                    mutex_.unlock();
+                }
+            //}
+        }
+
+        Gadgetron::scal(fftRatio, r);
+
+        return true;
+    }
+
+    template<typename T> 
+    bool hoNDFFT<T>::fft3(hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, bool forward)
+    {
+        r = a;
+
+        int n2 = (int)a.get_size(0);
+        int n1 = (int)a.get_size(1);
+        int n0 = (int)a.get_size(2);
+
+        T fftRatio = T(1.0/std::sqrt( T(n0*n1*n2) ));
+
+        int num = (int)(a.get_number_of_elements()/(n0*n1*n2));
+        int num_thr = get_num_threads_fft3(n0, n1, n2, num);
+
+        long long n;
+
+        if ( typeid(T) == typeid(float) )
+        {
+            fftwf_plan p;
+
+            {
+                mutex_.lock();
+                if ( forward )
+                {
+                    p = fftwf_plan_dft_3d(n0, n1, n2, 
+                            reinterpret_cast<fftwf_complex*>(a.begin()), 
+                            reinterpret_cast<fftwf_complex*>(r.begin()),
+                            FFTW_FORWARD, FFTW_ESTIMATE);
+                }
+                else
+                {
+                    p = fftwf_plan_dft_3d(n0, n1, n2, 
+                            reinterpret_cast<fftwf_complex*>(a.begin()), 
+                            reinterpret_cast<fftwf_complex*>(r.begin()),
+                            FFTW_BACKWARD, FFTW_ESTIMATE);
+                }
+                mutex_.unlock();
+            }
+
+            // #pragma omp parallel for private(n) shared(num, p, a, n0, n1, n2, r) if (num_thr > 1) num_threads(num_thr)
+            for ( n=0; n<num; n++ )
+            {
+                fftwf_execute_dft(p, reinterpret_cast<fftwf_complex*>(a.begin()+n*n0*n1*n2), 
+                    reinterpret_cast<fftwf_complex*>(r.begin()+n*n0*n1*n2));
+            }
+
+            {
+                mutex_.lock();
+                fftwf_destroy_plan(p);
+                mutex_.unlock();
+            }
+        }
+        else if ( typeid(T) == typeid(double) )
+        {
+            fftw_plan p;
+
+            {
+                mutex_.lock();
+                if ( forward )
+                {
+                    p = fftw_plan_dft_3d(n0, n1, n2, 
+                            reinterpret_cast<fftw_complex*>(a.begin()), 
+                            reinterpret_cast<fftw_complex*>(r.begin()),
+                            FFTW_FORWARD, FFTW_ESTIMATE);
+                }
+                else
+                {
+                    p = fftw_plan_dft_3d(n0, n1, n2, 
+                            reinterpret_cast<fftw_complex*>(a.begin()), 
+                            reinterpret_cast<fftw_complex*>(r.begin()),
+                            FFTW_BACKWARD, FFTW_ESTIMATE);
+                }
+                mutex_.unlock();
+            }
+
+            // #pragma omp parallel for private(n) shared(num, p, a, n0, n1, n2, r) if (num_thr > 1) num_threads(num_thr)
+            for ( n=0; n<num; n++ )
+            {
+                fftw_execute_dft(p, reinterpret_cast<fftw_complex*>(a.begin()+n*n0*n1*n2), 
+                    reinterpret_cast<fftw_complex*>(r.begin()+n*n0*n1*n2));
+            }
+
+            {
+                mutex_.lock();
+                fftw_destroy_plan(p);
+                mutex_.unlock();
+            }
+        }
+
+        Gadgetron::scal(fftRatio, r);
+
+        return true;
+    }
+
+    // TODO: implement more optimized threading strategy
+    template<typename T> 
+    inline int hoNDFFT<T>::get_num_threads_fft1(size_t n0, size_t num)
+    {
+        if ( num_of_max_threads_ == 1 ) return 1;
+
+        if ( n0*num>1024*128 )
+        {
+            return num_of_max_threads_;
+        }
+        else if ( n0*num>512*128 )
+        {
+            return ( (num_of_max_threads_>8) ? 8 : num_of_max_threads_);
+        }
+        else if ( n0*num>256*128 )
+        {
+            return ( (num_of_max_threads_>4) ? 4 : num_of_max_threads_);
+        }
+        else if ( n0*num>128*128 )
+        {
+            return 2;
+        }
+
+        return 1;
+    }
+
+    template<typename T> 
+    inline int hoNDFFT<T>::get_num_threads_fft2(size_t n0, size_t n1, size_t num)
+    {
+        if ( num_of_max_threads_ == 1 ) return 1;
+
+        if ( n0*n1*num>128*128*64 )
+        {
+            return num_of_max_threads_;
+        }
+        else if ( n0*n1*num>128*128*32 )
+        {
+            return ( (num_of_max_threads_>8) ? 8 : num_of_max_threads_);
+        }
+        else if ( n0*n1*num>128*128*16 )
+        {
+            return ( (num_of_max_threads_>4) ? 4 : num_of_max_threads_);
+        }
+        else if ( n0*n1*num>128*128*8 )
+        {
+            return 2;
+        }
+
+        return 1;
+    }
+
+    template<typename T> 
+    inline int hoNDFFT<T>::get_num_threads_fft3(size_t n0, size_t n1, size_t n2, size_t num)
+    {
+        if ( num_of_max_threads_ == 1 ) return 1;
+
+        if ( num >= num_of_max_threads_ )
+        {
+            return num_of_max_threads_;
+        }
+
+        return 1;
+    }
+
+    // -----------------------------------------------------------------------------------------
+
+    // 
+    // Instantiation
+    //
+
+    template class EXPORTCPUFFT hoNDFFT<float>;
+    template class EXPORTCPUFFT hoNDFFT<double>;
+}
diff --git a/toolboxes/fft/cpu/hoNDFFT.h b/toolboxes/fft/cpu/hoNDFFT.h
new file mode 100644
index 0000000..93023b2
--- /dev/null
+++ b/toolboxes/fft/cpu/hoNDFFT.h
@@ -0,0 +1,245 @@
+/** \file hoNDFFT.h
+    \brief Wrappers for FFTW for ndarrays of type std::complex.
+*/
+
+#ifndef hoNDFFT_H
+#define hoNDFFT_H
+
+#include "hoNDArray.h"
+#include "cpufft_export.h"
+
+#include <boost/thread/mutex.hpp>
+#include <iostream>
+#include <fftw3.h>
+#include <complex>
+
+#ifdef USE_OMP
+    #include "omp.h"
+#endif // USE_OMP
+
+namespace Gadgetron{
+
+    /** 
+    Generic class for Fast Fourier Transforms using FFTW on the hoNDArray class.
+    This class is a singleton because the planning and memory allocation routines of FFTW are NOT threadsafe.
+    The class' template type is a REAL, ie. float or double.
+
+    Access using e.g.
+    FFT<float>::instance()
+    */
+    template <typename T> class EXPORTCPUFFT hoNDFFT
+    {
+    public:
+
+        typedef std::complex<T> ComplexType;
+
+        static hoNDFFT<T>* instance(); 
+
+        void fft(hoNDArray< ComplexType >* input, unsigned int dim_to_transform)
+        {
+            //-1 refers to the sign of the transform, -1 for FFTW_FORWARD
+            fft_int(input,dim_to_transform,-1);
+        }
+
+        void ifft(hoNDArray< ComplexType >* input, unsigned int dim_to_transform)
+        {
+            //1 refers to the sign of the transform, +1 for FFTW_BACKWARD
+            fft_int(input,dim_to_transform,1);
+        }
+
+        void fft(hoNDArray< ComplexType >* input)
+        {
+            for (size_t i = 0; i < input->get_number_of_dimensions(); i++) {
+                //-1 refers to the sign of the transform, -1 for FFTW_FORWARD
+                fft_int(input,i,-1);
+            }
+        }
+
+        void ifft(hoNDArray< ComplexType >* input)
+        {
+            for (size_t i = 0; i < input->get_number_of_dimensions(); i++) {
+                //1 refers to the sign of the transform, +1 for FFTW_BACKWARD
+                fft_int(input,i,1);
+            }
+        }
+
+
+        void fft(hoNDArray< complext<T> >* input, unsigned int dim_to_transform)
+        {
+            fft((hoNDArray<ComplexType>*) input, dim_to_transform);
+        }
+
+        void ifft(hoNDArray< complext<T> >* input, unsigned int dim_to_transform)
+        {
+            ifft((hoNDArray<ComplexType>*) input, dim_to_transform);
+        }
+
+        void fft(hoNDArray< complext<T> >* input)
+        {
+            fft((hoNDArray<ComplexType>*) input);
+        }
+
+        void ifft(hoNDArray< complext<T> >* input)
+        {
+        	ifft((hoNDArray<ComplexType>*) input);
+        }
+
+
+        // 1D
+        bool fftshift1D(hoNDArray< ComplexType >& a);
+        bool fftshift1D(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r);
+
+        bool ifftshift1D(hoNDArray< ComplexType >& a);
+        bool ifftshift1D(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r);
+
+        // 2D
+        bool fftshift2D(hoNDArray< ComplexType >& a);
+        bool fftshift2D(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r);
+
+        bool ifftshift2D(hoNDArray< ComplexType >& a);
+        bool ifftshift2D(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r);
+
+        // 3D
+        bool fftshift3D(hoNDArray< ComplexType >& a);
+        bool fftshift3D(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r);
+
+        bool ifftshift3D(hoNDArray< ComplexType >& a);
+        bool ifftshift3D(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r);
+
+        // 1D fft, in-place and out-of-place
+        // the first dimension will be transformed
+        bool fft1(hoNDArray< ComplexType >& a);
+        bool ifft1(hoNDArray< ComplexType >& a);
+
+        bool fft1(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r);
+        bool ifft1(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r);
+
+        // centered 1D fft
+        bool fft1c(hoNDArray< ComplexType >& a);
+        bool ifft1c(hoNDArray< ComplexType >& a);
+
+        bool fft1c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r);
+        bool ifft1c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r);
+
+        bool fft1c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, hoNDArray< ComplexType >& buf);
+        bool ifft1c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, hoNDArray< ComplexType >& buf);
+
+        // 2D fft, in-place and out-of-place
+        // the first and second dimensions will be transformed
+        bool fft2(hoNDArray< ComplexType >& a);
+        bool ifft2(hoNDArray< ComplexType >& a);
+
+        bool fft2(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r);
+        bool ifft2(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r);
+
+        // centered 2D fft
+        bool fft2c(hoNDArray< ComplexType >& a);
+        bool ifft2c(hoNDArray< ComplexType >& a);
+
+        bool fft2c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r);
+        bool ifft2c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r);
+
+        bool fft2c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, hoNDArray< ComplexType >& buf);
+        bool ifft2c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, hoNDArray< ComplexType >& buf);
+
+        // 3D fft, in-place and out-of-place
+        // the first, second and third dimensions will be transformed
+        bool fft3(hoNDArray< ComplexType >& a);
+        bool ifft3(hoNDArray< ComplexType >& a);
+
+        bool fft3(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r);
+        bool ifft3(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r);
+
+        // centered 3D fft
+        bool fft3c(hoNDArray< ComplexType >& a);
+        bool ifft3c(hoNDArray< ComplexType >& a);
+
+        bool fft3c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r);
+        bool ifft3c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r);
+
+        bool fft3c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, hoNDArray< ComplexType >& buf);
+        bool ifft3c(const hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, hoNDArray< ComplexType >& buf);
+
+    protected:
+
+        //We are making these protected since this class is a singleton
+
+        hoNDFFT() {
+            set_function_pointers();
+
+#ifdef USE_OMP
+            num_of_max_threads_ = omp_get_num_procs();
+#else
+            num_of_max_threads_ = 1;
+#endif // USE_OMP
+        }
+
+        virtual ~hoNDFFT() { fftw_cleanup_ptr_(); }
+
+        void fft_int(hoNDArray< ComplexType >* input, size_t dim_to_transform, int sign);
+
+        void set_function_pointers();
+
+        int   (*fftw_import_wisdom_from_file_ptr_)(FILE*);
+        void  (*fftw_export_wisdom_to_file_ptr_)(FILE*);
+        void  (*fftw_cleanup_ptr_)(void);
+        void* (*fftw_malloc_ptr_)(size_t);
+        void  (*fftw_free_ptr_)(void* p);
+        void  (*fftw_execute_ptr_)(void*);
+        void* (*fftw_plan_dft_1d_ptr_)(int, void*, void*, int, unsigned);
+        void  (*fftw_destroy_plan_ptr_)(void*);
+
+        static hoNDFFT<T>* instance_;
+        boost::mutex mutex_;
+
+        int num_of_max_threads_;
+
+        // the fft and ifft shift pivot for a certain length
+        // [0 .. pivot-1] will be shifted to the right end
+        size_t fftshiftPivot(size_t len);
+        size_t ifftshiftPivot(size_t len);
+
+        // 1D
+        bool fftshift1D(const ComplexType* a, ComplexType* r, size_t x, size_t pivot);
+        bool ifftshift1D(const ComplexType* a, ComplexType* r, size_t x, size_t pivot);
+
+        bool fftshiftPivot1D(ComplexType* a, size_t x, size_t n, size_t pivot);
+        bool fftshiftPivot1D(const ComplexType* a, ComplexType* r, size_t x, size_t n, size_t pivot);
+
+        // 2D
+        bool fftshiftPivot2D(const ComplexType* a, ComplexType* r, size_t x, size_t y, size_t n, size_t pivotx, size_t pivoty);
+        bool fftshiftPivot2D(ComplexType* a, size_t x, size_t y, size_t n, size_t pivotx, size_t pivoty);
+
+        bool fftshift2D(const ComplexType* a, ComplexType* r, size_t x, size_t y, size_t n);
+        bool ifftshift2D(const ComplexType* a, ComplexType* r, size_t x, size_t y, size_t n);
+
+        bool fftshift2D(ComplexType* a, size_t x, size_t y, size_t n);
+        bool ifftshift2D(ComplexType* a, size_t x, size_t y, size_t n);
+
+        // 3D
+        bool fftshiftPivot3D(const ComplexType* a, ComplexType* r, size_t x, size_t y, size_t z, size_t n, size_t pivotx, size_t pivoty, size_t pivotz);
+        bool fftshiftPivot3D(ComplexType* a, size_t x, size_t y, size_t z, size_t n, size_t pivotx, size_t pivoty, size_t pivotz);
+
+        bool fftshift3D(const ComplexType* a, ComplexType* r, size_t x, size_t y, size_t z, size_t n);
+        bool ifftshift3D(const ComplexType* a, ComplexType* r, size_t x, size_t y, size_t z, size_t n);
+
+        bool fftshift3D(ComplexType* a, size_t x, size_t y, size_t z, size_t n);
+        bool ifftshift3D(ComplexType* a, size_t x, size_t y, size_t z, size_t n);
+
+        // forward: true, fft; false, inverse fft
+        bool fft1(hoNDArray< ComplexType >& a, bool forward);
+        bool fft2(hoNDArray< ComplexType >& a, bool forward);
+        bool fft3(hoNDArray< ComplexType >& a, bool forward);
+
+        bool fft1(hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, bool forward);
+        bool fft2(hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, bool forward);
+        bool fft3(hoNDArray< ComplexType >& a, hoNDArray< ComplexType >& r, bool forward);
+
+        // get the number of threads used for fft
+        int get_num_threads_fft1(size_t n0, size_t num);
+        int get_num_threads_fft2(size_t n0, size_t n1, size_t num);
+        int get_num_threads_fft3(size_t n0, size_t n1, size_t n2, size_t num);
+    };
+}
+
+#endif //hoNDFFT_H
diff --git a/toolboxes/fft/gpu/CMakeLists.txt b/toolboxes/fft/gpu/CMakeLists.txt
new file mode 100644
index 0000000..5715a1d
--- /dev/null
+++ b/toolboxes/fft/gpu/CMakeLists.txt
@@ -0,0 +1,33 @@
+if (WIN32)
+  ADD_DEFINITIONS(-D__BUILD_GADGETRON_GPUFFT__)
+endif (WIN32)
+
+include_directories( 
+  ${CMAKE_SOURCE_DIR}/toolboxes/core
+  ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu
+  ${CMAKE_SOURCE_DIR}/toolboxes/core/gpu
+  ${CUDA_INCLUDE_DIRS}
+  ${Boost_INCLUDE_DIR}
+)
+
+cuda_add_library(gadgetron_toolbox_gpufft SHARED 
+    cuNDFFT.h
+    cuNDFFT.cpp
+  )
+
+set_target_properties(gadgetron_toolbox_gpufft PROPERTIES VERSION ${GADGETRON_VERSION_STRING} SOVERSION ${GADGETRON_SOVERSION})
+
+target_link_libraries(gadgetron_toolbox_gpufft 
+  gadgetron_toolbox_cpucore
+  gadgetron_toolbox_gpucore 
+  ${Boost_LIBRARIES}
+  ${CUDA_LIBRARIES} 
+  ${CUDA_CUFFT_LIBRARIES} 
+  )
+
+install(TARGETS gadgetron_toolbox_gpufft DESTINATION lib COMPONENT main)
+
+install(FILES
+  gpufft_export.h
+  cuNDFFT.h
+  DESTINATION include COMPONENT main)
diff --git a/toolboxes/fft/gpu/cuNDFFT.cpp b/toolboxes/fft/gpu/cuNDFFT.cpp
new file mode 100644
index 0000000..d9224c2
--- /dev/null
+++ b/toolboxes/fft/gpu/cuNDFFT.cpp
@@ -0,0 +1,157 @@
+#include "cuNDFFT.h"
+#include "vector_td.h"
+#include "cuNDArray.h"
+#include "cuNDArray_utils.h"
+#include "cuNDArray_operators.h"
+
+#include <cufft.h>
+#include <cuComplex.h>
+#include <sstream>
+
+namespace Gadgetron{
+
+  template<class T> cuNDFFT<T>* cuNDFFT<T>::instance()
+  {
+    if (!__instance)
+	  __instance = new cuNDFFT<T>;
+	  return __instance;
+  }
+
+  template<class T> cuNDFFT<T>* cuNDFFT<T>::__instance = NULL;
+
+  template<class T> cufftType_t get_transform_type();
+  template<> cufftType_t get_transform_type<float>() { return CUFFT_C2C; }
+  template<> cufftType_t get_transform_type<double>() { return CUFFT_Z2Z; }
+  
+  template<class T> cufftResult_t cuNDA_FFT_execute( cufftHandle plan, cuNDArray< complext<T> > *in_out, int direction );
+  
+  template<> cufftResult_t cuNDA_FFT_execute<float>( cufftHandle plan, cuNDArray<float_complext> *in_out, int direction ){
+    return cufftExecC2C(plan, (cuFloatComplex*)in_out->get_data_ptr(), (cuFloatComplex*)in_out->get_data_ptr(), direction); }
+
+  template<> cufftResult_t cuNDA_FFT_execute<double>( cufftHandle plan, cuNDArray<double_complext> *in_out, int direction ){
+    return cufftExecZ2Z(plan, (cuDoubleComplex*)in_out->get_data_ptr(), (cuDoubleComplex*)in_out->get_data_ptr(), direction); }
+  
+  template<class T> void
+  cuNDFFT<T>::fft_int( cuNDArray< complext<T> > *input, std::vector<size_t> *dims_to_transform, int direction, bool do_scale )
+  {
+    std::vector<size_t> new_dim_order;
+    std::vector<size_t> reverse_dim_order;
+    std::vector<size_t> dims;
+    std::vector<size_t> dim_count(input->get_number_of_dimensions(),0);
+    
+    size_t array_ndim = input->get_number_of_dimensions();
+    boost::shared_ptr< std::vector<size_t> > array_dims = input->get_dimensions();
+    
+    dims = std::vector<size_t>(dims_to_transform->size(),0);
+    for (size_t i = 0; i < dims_to_transform->size(); i++) {
+      if ((*dims_to_transform)[i] >= array_ndim) {
+    	std::stringstream ss;
+    	ss << "cuNDFFT::fft Invalid dimensions specified for transform " << (*dims_to_transform)[i] << "max " << array_ndim;
+	throw std::runtime_error(ss.str());;
+      }
+      if (dim_count[(*dims_to_transform)[i]] > 0) {
+	throw std::runtime_error("cuNDFFT::fft Invalid dimensions (duplicates) specified for transform");;
+      }
+      dim_count[(*dims_to_transform)[i]]++;
+      dims[dims_to_transform->size()-1-i] = (*array_dims)[(*dims_to_transform)[i]];
+    }
+    
+    new_dim_order = *dims_to_transform;
+    for (size_t i = 0; i < array_ndim; i++) {
+      if (!dim_count[i]) new_dim_order.push_back(i);
+    }
+    
+    reverse_dim_order = std::vector<size_t>(array_ndim,0);
+    for (size_t i = 0; i < array_ndim; i++) {
+      reverse_dim_order[new_dim_order[i]] = i;
+    }
+    
+    size_t ndim = dims.size();
+    size_t batches = 0;
+    size_t elements_in_ft = 1;
+    for (size_t i = 0; i < dims.size(); i++) 
+      elements_in_ft *= dims[i];
+    batches = input->get_number_of_elements() / elements_in_ft;
+    
+    cufftHandle plan;
+    cufftResult ftres;
+    
+    std::vector<int> int_dims;
+    for( unsigned int i=0; i<dims.size(); i++ )
+      int_dims.push_back((int)dims[i]);
+
+    ftres = cufftPlanMany(&plan,ndim,&int_dims[0], &int_dims[0], 1, elements_in_ft, &int_dims[0], 1, elements_in_ft, get_transform_type<T>(), batches);
+    if (ftres != CUFFT_SUCCESS) {
+      std::stringstream ss;
+      ss << "cuNDFFT FFT plan failed: " << ftres;
+      throw std::runtime_error(ss.str());;
+    }
+    
+    //IFFTSHIFT
+    *input = *permute(input,&new_dim_order,-1);
+    
+    if( cuNDA_FFT_execute<T>( plan, input, direction ) != CUFFT_SUCCESS ) {
+      throw std::runtime_error("cuNDFFT FFT execute failed");;
+    }
+    
+    ftres = cufftDestroy( plan );
+    if (ftres != CUFFT_SUCCESS) {
+      std::stringstream ss;
+      ss << "cuNDFFT FFT plan destroy failed: " << ftres;
+      throw std::runtime_error(ss.str());;
+    }
+    
+    if (do_scale) {
+      *input /= T(elements_in_ft);
+    }
+    
+    //FFTSHIFT 
+    *input = *permute(input,&reverse_dim_order,1);
+  }
+  
+  template<class T> void
+  cuNDFFT<T>::fft( cuNDArray< complext<T> > *input, std::vector<size_t> *dims_to_transform )
+  {
+    fft_int(input, dims_to_transform, CUFFT_FORWARD, false);
+  }
+  
+  template<class T> void
+  cuNDFFT<T>::ifft( cuNDArray< complext<T> > *input, std::vector<size_t> *dims_to_transform, bool do_scale )
+  {
+    fft_int(input, dims_to_transform, CUFFT_INVERSE, do_scale);
+  }
+  
+  template<class T> void
+  cuNDFFT<T>::fft( cuNDArray< complext<T> > *input, unsigned int dim_to_transform )
+  {
+    std::vector<size_t> dims(1,dim_to_transform);
+    fft_int(input, &dims, CUFFT_FORWARD, false);
+  }
+  
+  template<class T> void
+  cuNDFFT<T>::ifft( cuNDArray< complext<T> > *input, unsigned int dim_to_transform, bool do_scale )
+  {
+    std::vector<size_t> dims(1,dim_to_transform);
+    fft_int(input, &dims, CUFFT_INVERSE, do_scale);
+  }
+  
+  template<class T> void
+  cuNDFFT<T>::fft( cuNDArray< complext<T> > *input )
+  {
+    std::vector<size_t> dims(input->get_number_of_dimensions(),0);
+    for (size_t i = 0; i < dims.size(); i++) dims[i] = i;
+    fft_int(input, &dims, CUFFT_FORWARD, false);
+  }
+  
+  template<class T> void
+  cuNDFFT<T>::ifft( cuNDArray<complext<T> > *input, bool do_scale )
+  {
+    std::vector<size_t> dims(input->get_number_of_dimensions(),0);
+    for (size_t i = 0; i < dims.size(); i++) dims[i] = i;
+    fft_int(input, &dims, CUFFT_INVERSE, do_scale);
+  }
+  
+  // Instantiation
+  template class EXPORTGPUFFT cuNDFFT<float>;
+  template class EXPORTGPUFFT cuNDFFT<double>;
+}
diff --git a/toolboxes/fft/gpu/cuNDFFT.h b/toolboxes/fft/gpu/cuNDFFT.h
new file mode 100644
index 0000000..a126ac6
--- /dev/null
+++ b/toolboxes/fft/gpu/cuNDFFT.h
@@ -0,0 +1,44 @@
+/** \file cuNDFFT.h
+    \brief Wrapper of the CUFFT library for ndarrays of type Gadgetron::complext.
+*/
+
+#ifndef CUNDFFT_H
+#define CUNDFFT_H
+#pragma once
+
+#include "cuNDArray.h"
+#include "gpufft_export.h"
+
+namespace Gadgetron{
+
+  /** \class cuNDFFT
+      \brief Wrapper of the CUFFT library for ndarrays of type complext.
+
+      Wrapper of the CUFFT library for ndarrays of type complext<REAL>.
+      The class' template type is a REAL, ie. float or double.
+      The FFTs are performed in-place.
+  */
+  template<class T> class EXPORTGPUFFT cuNDFFT
+  {
+  public:
+
+    static cuNDFFT<T>* instance();
+
+    void fft ( cuNDArray<complext<T> > *image, std::vector<size_t> *dims_to_transform );
+    void ifft( cuNDArray<complext<T> > *image, std::vector<size_t> *dims_to_transform, bool do_scale = true );
+
+    void fft ( cuNDArray<complext<T> > *image, unsigned int dim_to_transform);
+    void ifft( cuNDArray<complext<T> > *image, unsigned int dim_to_transform, bool do_scale = true );
+
+    void fft ( cuNDArray<complext<T> > *image );
+    void ifft( cuNDArray<complext<T> > *image, bool do_scale = true );
+
+  protected:   
+    cuNDFFT() {}
+    virtual ~cuNDFFT() {}
+    void fft_int( cuNDArray<complext<T> > *image, std::vector<size_t> *dims_to_transform, int direction, bool do_scale = true );
+    static cuNDFFT<T>* __instance;
+  };
+}
+
+#endif
diff --git a/toolboxes/fft/gpu/gpufft_export.h b/toolboxes/fft/gpu/gpufft_export.h
new file mode 100644
index 0000000..ab0252a
--- /dev/null
+++ b/toolboxes/fft/gpu/gpufft_export.h
@@ -0,0 +1,18 @@
+/** \file gpufft_export.h
+    \brief Required definitions for Windows, importing/exporting dll symbols 
+*/
+
+#ifndef GPUFFT_EXPORT_H_
+#define GPUFFT_EXPORT_H_
+
+#if defined (WIN32)
+    #if defined (__BUILD_GADGETRON_GPUFFT__) || defined (gpufft_EXPORTS)
+        #define EXPORTGPUFFT __declspec(dllexport)
+    #else
+        #define EXPORTGPUFFT __declspec(dllimport)
+    #endif
+#else
+    #define EXPORTGPUFFT
+#endif
+
+#endif
diff --git a/toolboxes/gadgettools/CMakeLists.txt b/toolboxes/gadgettools/CMakeLists.txt
index b232e82..23af338 100644
--- a/toolboxes/gadgettools/CMakeLists.txt
+++ b/toolboxes/gadgettools/CMakeLists.txt
@@ -1,58 +1,55 @@
+if (WIN32)
+    ADD_DEFINITIONS(-D__BUILD_GADGETRON_GADGETTOOLS__)
+endif (WIN32)
 
-IF (WIN32)
-    ADD_DEFINITIONS(-DTIXML_USE_STL)
-ENDIF (WIN32)
-
-find_package(XSD REQUIRED)
-find_package(XercesC REQUIRED)
-
-include_directories(${ACE_INCLUDE_DIR} 
-                    ${Boost_INCLUDE_DIR} 
-                    ${XSD_INCLUDE_DIR} 
+include_directories(
+                    ${CMAKE_BINARY_DIR}/apps/gadgetron
                     ${CMAKE_SOURCE_DIR}/toolboxes/gadgettools
                     ${CMAKE_SOURCE_DIR}/toolboxes/core
+                    ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/image
                     ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu
                     ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/hostutils
                     ${CMAKE_SOURCE_DIR}/apps/gadgetron
                     ${CMAKE_SOURCE_DIR}/gadgets/mri_core 
-                    ${ISMRMRD_XSD_INCLUDE_DIR} 
-                    ${ISMRMRD_INCLUDE_DIR} )
-
-#Process the XSD files
-SET(XSDS schema/gadgetron.xsd)
-SET(XSD_ARGS cxx-tree --generate-serialization)
-WRAP_XSD(XSDS_SOURCES XSD_INCLUDES ${CMAKE_CURRENT_BINARY_DIR}/schema ${XSDS} OPTIONS ${XSD_ARGS})
-INCLUDE_DIRECTORIES(${XSD_INCLUDES} ${XERCESC_INCLUDE_DIR})
-
-message("XSDS_SOURCES is " ${XSDS_SOURCES})
-
-if (MKL_FOUND)
-    INCLUDE_DIRECTORIES( ${MKL_INCLUDE_DIR} )
-endif (MKL_FOUND)
-
-add_library(gadgettools SHARED ${XSDS_SOURCES} 
-                               GadgetImageMessageReader.h 
-                               GadgetImageMessageWriter.h 
-                               gadgettools_export.h 
-                               GadgetronSlotContainer.h 
-                               GadgetronConnector.h 
-                               GadgetronConnector.cpp 
-                               GadgetServerAcceptor.h 
-                               GadgetServerAcceptor.cpp 
-                               GadgetStreamController.h 
-                               GadgetStreamController.cpp
-                               GadgetCloudController.h 
-                               GadgetronCloudConnector.h )
-
-target_link_libraries(gadgettools optimized ${ACE_LIBRARIES} debug ${ACE_DEBUG_LIBRARY} ${XERCESC_LIBRARIES})
-
-install(TARGETS gadgettools DESTINATION lib)
-
-install (FILES  GadgetImageMessageReader.h 
-            GadgetImageMessageWriter.h
+                    ${ACE_INCLUDE_DIR} 
+                    ${Boost_INCLUDE_DIR} 
+ )
+
+
+INCLUDE_DIRECTORIES( ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/image )
+
+if(WIN32)
+    link_directories(${Boost_LIBRARY_DIRS})
+endif(WIN32)
+
+if(WIN32)
+    link_directories(${Boost_LIBRARY_DIRS})
+endif(WIN32)
+
+add_library(gadgetron_toolbox_gadgettools SHARED
+  gadgettools_export.h 
+  GadgetronSlotContainer.h 
+  GadgetronConnector.h 
+  GadgetronConnector.cpp 
+  GadgetCloudController.h 
+  GadgetronCloudConnector.h )
+
+set_target_properties(gadgetron_toolbox_gadgettools PROPERTIES VERSION ${GADGETRON_VERSION_STRING} SOVERSION ${GADGETRON_SOVERSION})
+
+target_link_libraries(gadgetron_toolbox_gadgettools
+                      optimized ${ACE_LIBRARIES} debug ${ACE_DEBUG_LIBRARY}
+                      ${Boost_LIBRARIES})
+
+install(TARGETS gadgetron_toolbox_gadgettools DESTINATION lib COMPONENT main)
+
+install (FILES 
+            GadgetCloudController.h 
+            GadgetronCloudConnector.h 
             GadgetronConnector.h
             gadgettools_export.h
             GadgetronSlotContainer.h
-            GadgetServerAcceptor.h
-            GadgetStreamController.h
-            DESTINATION include)
+            DESTINATION include COMPONENT main)
+
+if(ISMRMRD_FOUND)
+  add_subdirectory(ismrmrd)
+endif(ISMRMRD_FOUND)
diff --git a/toolboxes/gadgettools/GadgetCloudController.h b/toolboxes/gadgettools/GadgetCloudController.h
index 4b9b523..37955b0 100644
--- a/toolboxes/gadgettools/GadgetCloudController.h
+++ b/toolboxes/gadgettools/GadgetCloudController.h
@@ -86,6 +86,28 @@ public:
     // if jobID===-1, all jobs for this node is set to be completed
     int setJobsTobeCompleted(unsigned int nodeID, int jobID=-1);
 
+    // get/set the node status, 0/-1 : available/unavailable
+    int get_node_status(int nodeID, int& status)
+    {
+        ACE_GUARD_RETURN(ACE_Thread_Mutex, guard, cloud_controller_mutex_, -1);
+        if ( (nodeID>=0) && (nodeID<node_status_.size()) )
+        {
+            status = node_status_[nodeID];
+        }
+        else
+        {
+            status = -1;
+        }
+        return 0;
+    }
+
+    int set_node_status(int nodeID, int status)
+    {
+        ACE_GUARD_RETURN(ACE_Thread_Mutex, guard, cloud_controller_mutex_, -1);
+        if ( (nodeID>=0) && (nodeID<node_status_.size()) ) node_status_[nodeID] = status;
+        return 0;
+    }
+
     // append the job list
     int appendJobList(std::vector<JobType*>& job_list, 
         std::vector<JobType*>& completed_job_list, 
@@ -120,12 +142,16 @@ private:
     // node status, 0/-1 : available/unavailable
     std::vector<int> node_status_;
 
+    // number of job actually sent to nodes
+    // if 0, then controller does not need to wait
+    unsigned int number_of_jobs_sent_out_;
+
     // to protect the access to job_status_ and node_id_used_
     ACE_Thread_Mutex cloud_controller_mutex_;
 };
 
 template <typename JobType> 
-GadgetCloudController<JobType>::GadgetCloudController() : cloud_msg_id_reader_(GADGET_MESSAGE_CLOUD_JOB), cloud_msg_id_writer_(GADGET_MESSAGE_CLOUD_JOB), job_handler_(NULL)
+GadgetCloudController<JobType>::GadgetCloudController() : cloud_msg_id_reader_(GADGET_MESSAGE_CLOUD_JOB), cloud_msg_id_writer_(GADGET_MESSAGE_CLOUD_JOB), job_handler_(NULL), number_of_jobs_sent_out_(0)
 {
 
 }
@@ -153,8 +179,8 @@ int GadgetCloudController<JobType>::open(void* p)
 {
     GADGET_DEBUG1("GadgetCloudController::open\n");
 
-    // set the high water mark of message queue to be 2GB
-    this->msg_queue()->high_water_mark(24.0*1024*1024*1024);
+    // set the high water mark of message queue to be 24GB
+    this->msg_queue()->high_water_mark( (size_t)(24.0*1024*1024*1024) );
 
     return 0;
 }
@@ -186,7 +212,7 @@ int GadgetCloudController<JobType>::createConnector(const CloudType& cloud,
     size_t msgID_reader, std::vector<GadgetMessageReader*>& readers, 
     size_t msgID_writer, std::vector<GadgetMessageWriter*>& writers)
 {
-    number_of_nodes_ = cloud.size();
+    number_of_nodes_ = (unsigned int)cloud.size();
 
     if ( readers.size() != number_of_nodes_ ) return -1;
     if ( writers.size() != number_of_nodes_ ) return -1;
@@ -219,7 +245,7 @@ template <typename JobType>
 int GadgetCloudController<JobType>::
 connectToCloud(const CloudType& cloud)
 {
-    number_of_nodes_ = cloud.size();
+    number_of_nodes_ = (unsigned int)cloud.size();
     if ( cloud_connectors_.size() != number_of_nodes_ ) return -1;
 
     node_status_.resize(number_of_nodes_, -1);
@@ -236,20 +262,20 @@ connectToCloud(const CloudType& cloud)
         {
             cloud_connectors_[ii]->set_status(false);
 
-            ACE_Time_Value tv(GADGETRON_TIMEOUT_PERIOD);
+            ACE_Time_Value tv( (time_t)GADGETRON_TIMEOUT_PERIOD );
             ACE_OS::sleep(tv);
 
             GADGET_DEBUG2("Open connection to (%s):%s failed ... \n", host.c_str(), port.c_str());
         }
         else
         {
-            //ACE_Time_Value tv(GADGETRON_TIMEOUT_PERIOD);
-            //ACE_OS::sleep(tv);
+            ACE_Time_Value tv( (time_t)0.5);
+            ACE_OS::sleep(tv);
 
             // send the xml file
             if (cloud_connectors_[ii]->send_gadgetron_configuration_file(cloud[ii].get<2>()) != 0)
             {
-                ACE_Time_Value tv(GADGETRON_TIMEOUT_PERIOD);
+                ACE_Time_Value tv( (time_t)GADGETRON_TIMEOUT_PERIOD );
                 ACE_OS::sleep(tv);
 
                 GADGET_DEBUG2("Unable to send XML configuration to the Gadgetron cloud host (%s):%s ... \n", host.c_str(), port.c_str());
@@ -323,10 +349,10 @@ runJobsOnCloud(std::vector<JobType*>& job_list, std::vector<JobType*>& completed
 
     std::vector<int> node_ids_used(node_ids);
 
-    unsigned int numOfJobs = job_list.size();
+    size_t numOfJobs = job_list.size();
     std::vector<int> job_status(numOfJobs, -1);
 
-    unsigned int ii;
+    size_t ii;
     for( ii=0; ii<numOfJobs; ii++ )
     {
         int nodeID = node_ids_used[ii];
@@ -336,9 +362,9 @@ runJobsOnCloud(std::vector<JobType*>& job_list, std::vector<JobType*>& completed
             continue;
         }
 
-        if ( nodeID > number_of_nodes_ )
+        if ( nodeID >= (int)number_of_nodes_ )
         {
-            nodeID %= number_of_nodes_;
+            nodeID %= (int)number_of_nodes_;
         }
 
         /*while ( node_status_[nodeID] < 0 )
@@ -349,17 +375,38 @@ runJobsOnCloud(std::vector<JobType*>& job_list, std::vector<JobType*>& completed
 
         if ( nodeID != node_ids_used[ii] ) node_ids_used[ii] = nodeID;*/
 
-        if ( node_status_[nodeID] < 0 )
+        int status = -1;
+        this->get_node_status(nodeID, status);
+        if ( status < 0 )
         {
-            node_ids_used[ii] = -1; // local node to perform this job
-            job_status[ii] = 0;
+            // try again
+            if ( number_of_nodes_ > 1 )
+            {
+                nodeID += number_of_nodes_/2;
+                if ( nodeID >= (int)number_of_nodes_ )
+                {
+                    nodeID %= (int)number_of_nodes_;
+                }
+
+                this->get_node_status(nodeID, status);
+            }
+
+            if ( status < 0 )
+            {
+                node_ids_used[ii] = -1; // local node to perform this job
+                job_status[ii] = 0;
+            }
+            else
+            {
+                node_ids_used[ii] = nodeID;
+            }
         }
 
         GADGET_DEBUG2("--> node for job %d is %d ... \n", ii, node_ids_used[ii]);
     }
 
     // append incoming jobs into the list
-    unsigned int startJobID = job_list_.size();
+    size_t startJobID = job_list_.size();
 
     if ( this->appendJobList(job_list, completed_job_list, node_ids_used, job_status) == -1 )
     {
@@ -380,12 +427,12 @@ runJobsOnCloud(std::vector<JobType*>& job_list, std::vector<JobType*>& completed
         GadgetContainerMessage<GadgetMessageIdentifier>* m1 =
                 new GadgetContainerMessage<GadgetMessageIdentifier>();
 
-        m1->getObjectPtr()->id = cloud_msg_id_writer_;
+        m1->getObjectPtr()->id = (ACE_UINT16)cloud_msg_id_writer_;
 
         GadgetContainerMessage<int>* m2 =
                 new GadgetContainerMessage<int>();
 
-        *(m2->getObjectPtr()) = ii+startJobID;
+        *(m2->getObjectPtr()) = (int)(ii+startJobID);
 
         GadgetContainerMessage<JobType>* m3 =
                 new GadgetContainerMessage<JobType>();
@@ -399,13 +446,19 @@ runJobsOnCloud(std::vector<JobType*>& job_list, std::vector<JobType*>& completed
             if (cloud_connectors_[nodeID]->putq(m1) == -1)
             {
                 ACE_DEBUG((LM_ERROR, ACE_TEXT("Unable to send job package %d on queue for node %d \n"), ii+startJobID, nodeID));
+                m1->release();
                 return -1;
             }
             else
             {
                 GADGET_DEBUG2("Send job %d to node %d ... \n", ii+startJobID, nodeID);
+                number_of_jobs_sent_out_++;
             }
         }
+        else
+        {
+            m1->release();
+        }
     }
 
     GADGET_DEBUG1("GadgetCloudController - all jobs sent ... \n");
@@ -468,6 +521,7 @@ closeCloudNode()
             if (cloud_connectors_[nodeID]->putq(m) == -1)
             {
                 ACE_DEBUG((LM_ERROR, ACE_TEXT("Unable to send CLOSE package on queue for node %d \n"), nodeID));
+                m->release();
                 return -1;
             }
         }
@@ -488,7 +542,7 @@ int GadgetCloudController<JobType>::waitForJobToComplete()
     ACE_Time_Value nowait (ACE_OS::gettimeofday ());
 
     //collect a incoming package a package if we have one
-    while (this->getq (mb) != -1)
+    while ( number_of_jobs_sent_out_>0 && (this->getq (mb) != -1) )
     {
         GadgetContainerMessage<int>* m_jobID =
             AsContainerMessage<int>(mb);
@@ -579,20 +633,28 @@ int GadgetCloudController<JobType>::setJobsTobeCompleted(unsigned int nodeID, in
     ACE_GUARD_RETURN(ACE_Thread_Mutex, guard, cloud_controller_mutex_, -1);
     try
     {
-        unsigned int N = this->node_id_used_.size();
-        unsigned int ii;
+        if ( (nodeID>=0) && (nodeID<this->node_status_.size()) )
+        {
+            node_status_[nodeID] = -1;
+        }
+
+        size_t N = this->node_id_used_.size();
+        size_t ii;
         for ( ii=0; ii<N; ii++ )
         {
             if ( this->node_id_used_[ii] == nodeID )
             {
-                if ( jobID>=0 && jobID<this->job_status_.size() )
-                {
-                    this->job_status_[jobID] = 0;
-                }
-                else
-                {
-                    if ( this->job_status_[ii]!= 0 ) this->job_status_[ii] = 0;
-                }
+                //if ( jobID>=0 && jobID<this->job_status_.size() )
+                //{
+                //    this->job_status_[jobID] = 0;
+                //}
+                //else
+                //{
+                //    if ( this->job_status_[ii]!= 0 ) this->job_status_[ii] = 0;
+                //}
+
+                // make sure all jobs on this node is marked as completed
+                if ( this->job_status_[ii]!= 0 ) this->job_status_[ii] = 0;
             }
         }
     }
@@ -613,7 +675,7 @@ int GadgetCloudController<JobType>::appendJobList(std::vector<JobType*>& job_lis
     ACE_GUARD_RETURN(ACE_Thread_Mutex, guard, cloud_controller_mutex_, -1);
     try
     {
-        unsigned int N = job_list.size();
+        size_t N = job_list.size();
 
         if ( completed_job_list.size() != N )
         {
@@ -633,7 +695,7 @@ int GadgetCloudController<JobType>::appendJobList(std::vector<JobType*>& job_lis
             return -1;
         }
 
-        unsigned int ii;
+        size_t ii;
         for ( ii=0; ii<N; ii++ )
         {
             job_list_.push_back(job_list[ii]);
diff --git a/toolboxes/gadgettools/GadgetImageMessageReader.h b/toolboxes/gadgettools/GadgetImageMessageReader.h
deleted file mode 100644
index 700eaaf..0000000
--- a/toolboxes/gadgettools/GadgetImageMessageReader.h
+++ /dev/null
@@ -1,71 +0,0 @@
-#ifndef GADGETSOCKETRECEIVER_H
-#define GADGETSOCKETRECEIVER_H
-
-#include "ace/SOCK_Stream.h"
-#include "ace/Task.h"
-
-#include <complex>
-#include <iostream>
-
-#include "GadgetMRIHeaders.h"
-#include "ismrmrd.h"
-#include "hoNDArray.h"
-#include "GadgetMessageInterface.h"
-
-namespace Gadgetron
-{
-
-/**
-Default implementation of GadgetMessageReader for Image messages
-*/
-
-template <typename T> class GadgetImageMessageReader : public GadgetMessageReader
-{
-
-public:
-    virtual ACE_Message_Block* read(ACE_SOCK_Stream* stream) 
-    {
-        GadgetContainerMessage<ISMRMRD::ImageHeader>* imgh = 
-            new GadgetContainerMessage<ISMRMRD::ImageHeader>();
-
-        ssize_t recv_count = 0;
-        if ((recv_count = stream->recv_n(imgh->getObjectPtr(), sizeof(ISMRMRD::ImageHeader))) <= 0) {
-            ACE_DEBUG( (LM_ERROR, ACE_TEXT("%P, %l, GadgetImageMessageReader, failed to read IMAGE Header\n")) );
-            imgh->release();
-            return 0;
-        }
-
-        std::vector<size_t> dims(3);
-        dims[0] = imgh->getObjectPtr()->matrix_size[0];
-        dims[1] = imgh->getObjectPtr()->matrix_size[1];
-        dims[2] = imgh->getObjectPtr()->matrix_size[2];
-
-        if (imgh->getObjectPtr()->channels > 1) {
-            dims.push_back(imgh->getObjectPtr()->channels);
-        } 
-
-        GadgetContainerMessage< hoNDArray< T > >* data =
-            new GadgetContainerMessage< hoNDArray< T > >();
-
-        try{ data->getObjectPtr()->create(&dims);}
-        catch (std::runtime_error &err){
-            GADGET_DEBUG_EXCEPTION(err,"GadgetImageMessageReader, failed to allocate memory\n");
-            imgh->release();
-            return 0;
-        }
-
-        imgh->cont(data);
-
-        if ((recv_count = stream->recv_n(data->getObjectPtr()->get_data_ptr(), sizeof(T)*data->getObjectPtr()->get_number_of_elements())) <= 0) {
-            ACE_DEBUG( (LM_ERROR, ACE_TEXT("%P, %l, GadgetImageMessageReader, failed to read data from socket\n")) );
-            imgh->release();
-            return 0;
-        }
-
-        return imgh;
-    }
-};
-
-}
-
-#endif //GADGETSOCKETRECEIVER_H
diff --git a/toolboxes/gadgettools/GadgetServerAcceptor.cpp b/toolboxes/gadgettools/GadgetServerAcceptor.cpp
deleted file mode 100644
index 48270b9..0000000
--- a/toolboxes/gadgettools/GadgetServerAcceptor.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
-#include "GadgetServerAcceptor.h"
-#include "GadgetStreamController.h"
-
-using namespace Gadgetron;
-
-GadgetServerAcceptor::~GadgetServerAcceptor ()
-{
-  this->handle_close (ACE_INVALID_HANDLE, 0);
-}
-
-int GadgetServerAcceptor::open (const ACE_INET_Addr &listen_addr)
-{
-  if (this->acceptor_.open (listen_addr, 1) == -1)
-    ACE_ERROR_RETURN ((LM_ERROR,
-                       ACE_TEXT ("%p\n"),
-                       ACE_TEXT ("acceptor.open")),
-                      -1);
-  return this->reactor ()->register_handler
-    (this, ACE_Event_Handler::ACCEPT_MASK);
-}
-
-
-
-
-int GadgetServerAcceptor::handle_input (ACE_HANDLE)
-{
-  GadgetStreamController *controller;
-  ACE_NEW_RETURN (controller, GadgetStreamController, -1);
-  auto_ptr<GadgetStreamController> p (controller);
-
-  if (this->acceptor_.accept (controller->peer ()) == -1)
-    ACE_ERROR_RETURN ((LM_ERROR,
-                       ACE_TEXT ("(%P|%t) %p\n"),
-                       ACE_TEXT ("Failed to accept ")
-                       ACE_TEXT ("controller connection")),
-                      -1);
-  p.release ();
-  controller->reactor (this->reactor ());
-  if (controller->open () == -1)
-    controller->handle_close (ACE_INVALID_HANDLE, 0);
-  return 0;
-}
-
-int GadgetServerAcceptor::handle_close (ACE_HANDLE, ACE_Reactor_Mask)
-{
-  ACE_DEBUG( (LM_DEBUG, 
-	      ACE_TEXT("GadgetServerAcceptor::handle_close")) );
-  
-  GADGET_DEBUG1("Close Data Acceptor\n");
-
-  if (this->acceptor_.get_handle () != ACE_INVALID_HANDLE) {
-    ACE_Reactor_Mask m = 
-      ACE_Event_Handler::ACCEPT_MASK | ACE_Event_Handler::DONT_CALL;
-    this->reactor ()->remove_handler (this, m);
-    this->acceptor_.close ();
-  }
-  return 0;
-}
diff --git a/toolboxes/gadgettools/GadgetServerAcceptor.h b/toolboxes/gadgettools/GadgetServerAcceptor.h
deleted file mode 100644
index d79e2b3..0000000
--- a/toolboxes/gadgettools/GadgetServerAcceptor.h
+++ /dev/null
@@ -1,27 +0,0 @@
-#ifndef _GADGETSERVERACCEPTOR_H
-#define _GADGETSERVERACCEPTOR_H
-
-#include "ace/SOCK_Acceptor.h"
-#include "ace/Reactor.h"
-#include "gadgettools_export.h"
-
-namespace Gadgetron{
-class EXPORTGADGETTOOLS GadgetServerAcceptor : public ACE_Event_Handler
-{
-public:
-  virtual ~GadgetServerAcceptor ();
-
-  int open (const ACE_INET_Addr &listen_addr);
-
-  virtual ACE_HANDLE get_handle (void) const
-    { return this->acceptor_.get_handle (); }
-
-  virtual int handle_input (ACE_HANDLE fd = ACE_INVALID_HANDLE);
-
-  virtual int handle_close (ACE_HANDLE handle,
-                            ACE_Reactor_Mask close_mask);
-protected:
-  ACE_SOCK_Acceptor acceptor_;
-};
-}
-#endif //_GADGETSERVERACCEPTOR_H
diff --git a/toolboxes/gadgettools/GadgetStreamController.cpp b/toolboxes/gadgettools/GadgetStreamController.cpp
deleted file mode 100644
index d9586a9..0000000
--- a/toolboxes/gadgettools/GadgetStreamController.cpp
+++ /dev/null
@@ -1,459 +0,0 @@
-#include "ace/OS_NS_stdlib.h"
-#include "ace/OS_NS_string.h"
-#include "ace/OS_NS_stdio.h"
-#include "ace/DLL.h"
-#include "ace/DLL_Manager.h"
-#include "ace/OS_NS_netdb.h"
-
-#include "GadgetStreamController.h"
-#include "GadgetContainerMessage.h"
-#include "Gadget.h"
-#include "EndGadget.h"
-
-#include "gadgetron.hxx" //Auto generated class representation of gadgetron XML configuration
-#include "url_encode.h"
-
-#include <complex>
-#include <fstream>
-
-using namespace Gadgetron;
-int GadgetStreamController::open (void)
-{
-	//We will set up the controllers message queue such that when a packet is enqueued write will be triggered.
-	this->notifier_.reactor (this->reactor ());
-	this->msg_queue ()->notification_strategy (&this->notifier_);
-    this->msg_queue()->high_water_mark((size_t)(48.0*1024*1024*1024));
-
-	ACE_TCHAR peer_name[MAXHOSTNAMELEN];
-	ACE_INET_Addr peer_addr;
-	if (peer().get_remote_addr (peer_addr) == 0 &&
-			peer_addr.addr_to_string (peer_name, MAXHOSTNAMELEN) == 0)
-		ACE_DEBUG ((LM_DEBUG,
-				ACE_TEXT ("(%P|%t) Connection from %s\n"),
-				peer_name));
-
-	//We have to have these basic types to be able to receive configuration file for stream
-	readers_.insert(GADGET_MESSAGE_CONFIG_FILE,
-			new GadgetMessageConfigFileReader());
-
-	readers_.insert(GADGET_MESSAGE_CONFIG_SCRIPT,
-			new GadgetMessageScriptReader());
-
-	readers_.insert(GADGET_MESSAGE_PARAMETER_SCRIPT,
-			new GadgetMessageScriptReader());
-
-	GadgetModule *head = 0;
-	GadgetModule *tail = 0;
-
-	if (tail == 0) {
-		Gadget* eg = new EndGadget();
-		if (eg) {
-			eg->set_controller(this);
-		}
-
-		ACE_NEW_RETURN(tail,
-				ACE_Module<ACE_MT_SYNCH>( ACE_TEXT("EndGadget"),
-						eg ),
-						-1);
-
-		stream_.open(0,head,tail);
-	}
-
-	this->writer_task_.open();
-
-	return this->reactor ()->register_handler(this,
-			ACE_Event_Handler::READ_MASK);// | ACE_Event_Handler::WRITE_MASK);
-}
-
-
-int GadgetStreamController::handle_input (ACE_HANDLE)
-{
-	//Reading sequence:
-	GadgetMessageIdentifier id;
-	ssize_t recv_cnt = 0;
-	if ((recv_cnt = peer().recv_n (&id, sizeof(GadgetMessageIdentifier))) <= 0) {
-		ACE_DEBUG ((LM_DEBUG,
-				ACE_TEXT ("(%P|%t) GadgetStreamController, unable to read message identifier\n")));
-		return -1;
-	}
-
-	if (id.id == GADGET_MESSAGE_CLOSE) {
-		GADGET_DEBUG1("Received close signal from client. Closing stream...\n");
-		stream_.close(1); //Shutdown gadgets and wait for them
-		GADGET_DEBUG1("Stream closed\n");
-		GADGET_DEBUG1("Closing writer task\n");
-		this->writer_task_.close(1);
-		GADGET_DEBUG1("Writer task closed\n");
-		return 0;
-	}
-
-	GadgetMessageReader* r = readers_.find(id.id);
-
-	if (!r) {
-		GADGET_DEBUG2("Unrecognized Message ID received: %d\n", id.id);
-		return GADGET_FAIL;
-	}
-
-	ACE_Message_Block* mb = r->read(&peer());
-
-	if (!mb) {
-		GADGET_DEBUG1("GadgetMessageReader returned null pointer\n");
-		return GADGET_FAIL;
-	}
-
-	//We need to handle some special cases to make sure that we can get a stream set up.
-	if (id.id == GADGET_MESSAGE_CONFIG_FILE) {
-		GadgetContainerMessage<GadgetMessageConfigurationFile>* cfgm =
-				AsContainerMessage<GadgetMessageConfigurationFile>(mb);
-
-		if (!cfgm) {
-			GADGET_DEBUG1("Failed to cast message block to configuration file\n");
-			mb->release();
-			return GADGET_FAIL;
-		} else {
-			if (this->configure_from_file(std::string(cfgm->getObjectPtr()->configuration_file)) != GADGET_OK) {
-				GADGET_DEBUG1("GadgetStream configuration failed\n");
-				mb->release();
-				return GADGET_FAIL;
-			} else {
-				mb->release();
-				return GADGET_OK;
-			}
-		}
-	} else if (id.id == GADGET_MESSAGE_CONFIG_SCRIPT) {
-		std::string xml_config(mb->rd_ptr(), mb->length());
-		if (this->configure(xml_config) != GADGET_OK) {
-			GADGET_DEBUG1("GadgetStream configuration failed\n");
-			mb->release();
-			return GADGET_FAIL;
-		} else {
-			mb->release();
-			return GADGET_OK;
-		}
-	}
-
-	ACE_Time_Value wait = ACE_OS::gettimeofday() + ACE_Time_Value(0,10000); //10ms from now
-	if (stream_.put(mb) == -1) {
-		GADGET_DEBUG2("Failed to put stuff on stream, too long wait, %d\n",  ACE_OS::last_error () ==  EWOULDBLOCK);
-		mb->release();
-		return GADGET_FAIL;
-	}
-
-	return GADGET_OK;
-}
-
-
-int GadgetStreamController::output_ready(ACE_Message_Block* mb) 
-{ 
-	int res = this->writer_task_.putq(mb);
-	return res;
-}
-
-
-
-int GadgetStreamController::handle_close (ACE_HANDLE, ACE_Reactor_Mask mask)
-{
-	GADGET_DEBUG1("handle_close called\n");
-
-	if (mask == ACE_Event_Handler::WRITE_MASK)
-		return 0;
-
-	GADGET_DEBUG1("Shutting down stream and closing up shop...\n");
-
-	this->stream_.close();
-
-	mask = ACE_Event_Handler::ALL_EVENTS_MASK |
-			ACE_Event_Handler::DONT_CALL;
-
-	this->reactor ()->remove_handler (this, mask);
-
-	//Empty output queue in case there is something on it.
-	int messages_dropped = this->msg_queue ()->flush();
-
-	if (messages_dropped) {
-		GADGET_DEBUG2("Flushed %d messages from output queue\n", messages_dropped);
-		this->reactor ()->handle_events(); //Flush any remaining events before we delete this Stream Controller
-	}
-
-	// Remove all readers and writers
-	//writers_.clear();
-	readers_.clear();
-
-	//Clear DLL handles (to make DLLs unload if needed)
-	for (size_t i = 0; i < dll_handles_.size(); i++) {
-#if defined WIN32
-		dll_handles_[i]->close(0); //On windows we will not unload the DLLs even when there are no more refs
-#else 
-		dll_handles_[i]->close(0); //On Unix/Mac it seems to be OK to do this
-#endif
-	}
-	dll_handles_.clear();
-
-	GADGET_DEBUG1("Stream is closed\n");
-
-	delete this;
-	return 0;
-}
-
-Gadget* GadgetStreamController::find_gadget(std::string gadget_name)
-{
-	GadgetModule* gm = stream_.find(gadget_name.c_str());
-
-	if (gm) {
-		Gadget* g = dynamic_cast<Gadget*>(gm->writer());
-		return g;
-	} else {
-		GADGET_DEBUG2("Gadget with name %s not found! Returning null pointer\n", gadget_name.c_str());
-	}
-
-	return 0;
-}
-
-int GadgetStreamController::configure_from_file(std::string config_xml_filename)
-{
-
-	char * gadgetron_home = ACE_OS::getenv("GADGETRON_HOME");
-	ACE_TCHAR config_file_name[4096];
-	ACE_OS::sprintf(config_file_name, "%s/config/%s", gadgetron_home, config_xml_filename.c_str());
-
-	GADGET_DEBUG2("Running configuration: %s\n", config_file_name);
-
-	std::ifstream file (config_file_name, std::ios::in|std::ios::binary|std::ios::ate);
-	if (file.is_open())
-	{
-		size_t size = file.tellg();
-		char* buffer = new char [size];
-		if (!buffer) {
-			GADGET_DEBUG1("Unable to create temporary buffer for configuration file\n");
-			return GADGET_FAIL;
-		}
-		file.seekg (0, std::ios::beg);
-		file.read (buffer, size);
-		file.close();
-		std::string xml_file_contents(buffer,size);
-
-		return configure(xml_file_contents);
-		delete[] buffer;
-
-	} else {
-		GADGET_DEBUG2("Unable to open configuation file: %s\n", config_file_name);
-		return GADGET_FAIL;
-	}
-
-	return GADGET_OK;
-}
-
-int GadgetStreamController::configure(std::string config_xml_string)
-{
-
-	char * gadgetron_home = ACE_OS::getenv("GADGETRON_HOME");
-	ACE_TCHAR schema_file_name[4096];
-	ACE_OS::sprintf(schema_file_name, "%s/schema/gadgetron.xsd", gadgetron_home);
-
-	std::string tmp(schema_file_name);
-	tmp = url_encode(tmp);
-	ACE_OS_String::strncpy(schema_file_name,tmp.c_str(), 4096);
-
-
-	xml_schema::properties props;
-	props.schema_location (
-	  "http://gadgetron.sf.net/gadgetron",
-	  std::string (schema_file_name));
-
-	std::istringstream str_stream(config_xml_string, std::stringstream::in);
-	std::auto_ptr<gadgetron::gadgetronStreamConfiguration> cfg;
-
-	try {
-		cfg = std::auto_ptr<gadgetron::gadgetronStreamConfiguration>(gadgetron::gadgetronStreamConfiguration_(str_stream,0,props));
-		//cfg = std::auto_ptr<gadgetron::gadgetronStreamConfiguration>(gadgetron::gadgetronStreamConfiguration_(std::string(config_file_name)));
-	}  catch (const xml_schema::exception& e) {
-		GADGET_DEBUG2("Failed to parse Gadget Stream Configuration: %s\n", e.what());
-		return GADGET_FAIL;
-	}
-
-	GADGET_DEBUG2("Found %d readers\n", cfg->reader().size());
-	GADGET_DEBUG2("Found %d writers\n", cfg->writer().size());
-	GADGET_DEBUG2("Found %d gadgets\n", cfg->gadget().size());
-
-	for (gadgetron::gadgetronStreamConfiguration::reader_sequence::iterator i (cfg->reader().begin ()); i != cfg->reader().end(); ++i) {
-		long slot = 0;
-		std::string dllname("");
-		std::string classname("");
-
-		slot = i->slot();
-		dllname = i->dll();
-		classname = i->classname();
-
-		GADGET_DEBUG1("--Found reader declaration\n");
-		GADGET_DEBUG2("  Reader dll: %s\n", dllname.c_str());
-		GADGET_DEBUG2("  Reader class: %s\n", classname.c_str());
-		GADGET_DEBUG2("  Reader slot: %d\n", slot);
-
-		GadgetMessageReader* r =
-				load_dll_component<GadgetMessageReader>(dllname.c_str(),
-						classname.c_str());
-
-		if (!r) {
-			GADGET_DEBUG1("Failed to load GadgetMessageReader from DLL\n");
-			return GADGET_FAIL;
-		}
-
-		readers_.insert((unsigned short)slot, r);
-
-	}
-	//Configuration of readers end
-
-
-	//Configuration of writers
-	for (gadgetron::gadgetronStreamConfiguration::writer_sequence::iterator i (cfg->writer().begin ()); i != cfg->writer().end(); ++i) {
-		long slot = 0;
-		std::string dllname("");
-		std::string classname("");
-
-		slot = i->slot();
-		dllname = i->dll();
-		classname = i->classname();
-
-		GADGET_DEBUG1("--Found writer declaration\n");
-		GADGET_DEBUG2("  Reader dll: %s\n", dllname.c_str());
-		GADGET_DEBUG2("  Reader class: %s\n", classname.c_str());
-		GADGET_DEBUG2("  Reader slot: %d\n", slot);
-
-		GadgetMessageWriter* w =
-				load_dll_component<GadgetMessageWriter>(dllname.c_str(),
-						classname.c_str());
-
-		if (!w) {
-			GADGET_DEBUG1("Failed to load GadgetMessageWriter from DLL\n");
-			return GADGET_FAIL;
-		}
-
-		writer_task_.register_writer(slot, w);
-	}
-	//Configuration of writers end
-
-	//Let's configure the stream
-	GADGET_DEBUG2("Processing %d gadgets in reverse order\n",cfg->gadget().size());
-	for (gadgetron::gadgetronStreamConfiguration::gadget_sequence::reverse_iterator i (cfg->gadget().rbegin ()); i != cfg->gadget().rend(); ++i) {
-		std::string gadgetname("");
-		std::string dllname("");
-		std::string classname("");
-
-		gadgetname = i->name();
-		dllname = i->dll();
-		classname = i->classname();
-
-		GADGET_DEBUG1("--Found gadget declaration\n");
-		GADGET_DEBUG2("  Gadget Name: %s\n", gadgetname.c_str());
-		GADGET_DEBUG2("  Gadget dll: %s\n", dllname.c_str());
-		GADGET_DEBUG2("  Gadget class: %s\n", classname.c_str());
-
-		GadgetModule* m = create_gadget_module(dllname.c_str(),
-				classname.c_str(),
-				gadgetname.c_str());
-
-		if (!m) {
-			GADGET_DEBUG2("Failed to create GadgetModule from %s:%s\n",
-					classname.c_str(),
-					dllname.c_str());
-			return GADGET_FAIL;
-		}
-
-		Gadget* g = dynamic_cast<Gadget*>(m->writer());//Get the gadget out of the module
-
-		GADGET_DEBUG2("  Gadget parameters: %d\n", i->property().size());
-		for (gadgetron::gadget::property_sequence::iterator p (i->property().begin()); p != i->property().end(); ++p) {
-			std::string pname(p->name());
-			std::string pval(p->value());
-			GADGET_DEBUG2("Setting parameter %s = %s\n", pname.c_str(),pval.c_str());
-			g->set_parameter(pname.c_str(),pval.c_str(),false);
-		}
-
-		if (stream_.push(m) < 0) {
-			GADGET_DEBUG2("Failed to push Gadget %s onto stream\n", gadgetname.c_str());
-			delete m;
-			return GADGET_FAIL;
-		}
-
-	}
-
-	GADGET_DEBUG1("Gadget Stream configured\n");
-	stream_configured_ = true;
-
-	return GADGET_OK;
-}
-
-GadgetModule * GadgetStreamController::create_gadget_module(const char* DLL, 
-		const char* gadget,
-		const char* gadget_module_name)
-{
-
-	Gadget* g = load_dll_component<Gadget>(DLL,gadget);
-
-	if (!g) {
-		GADGET_DEBUG1("Failed to load gadget using factory\n");
-		return 0;
-	}
-
-	g->set_controller(this);
-
-	GadgetModule *module = 0;
-	ACE_NEW_RETURN (module,
-			GadgetModule (gadget_module_name, g),
-			0);
-
-	return module;
-}
-
-
-template <class T>  
-T* GadgetStreamController::load_dll_component(const char* DLL, const char* component_name)
-{
-	ACE_DLL_Manager* dllmgr = ACE_DLL_Manager::instance();
-
-	ACE_DLL_Handle* dll = 0;
-	ACE_SHLIB_HANDLE dll_handle = 0;
-
-	ACE_TCHAR dllname[1024];
-#if defined(WIN32) && defined(_DEBUG)
-	ACE_OS::sprintf(dllname, "%s%sd",ACE_DLL_PREFIX, DLL);
-#else
-	ACE_OS::sprintf(dllname, "%s%s",ACE_DLL_PREFIX, DLL);
-#endif
-
-	ACE_TCHAR factoryname[1024];
-	ACE_OS::sprintf(factoryname, "make_%s", component_name);
-
-	dll = dllmgr->open_dll (dllname, ACE_DEFAULT_SHLIB_MODE, dll_handle );
-
-	if (!dll) {
-		GADGET_DEBUG1("Failed to load DLL, Possible reasons: \n");
-		GADGET_DEBUG1("   * Name of DLL is wrong in XML file \n");
-		GADGET_DEBUG1("   * Path of DLL is not in your DLL search path (LD_LIBRARY_PATH on Unix)\n");
-		GADGET_DEBUG1("   * Path of other DLLs that this DLL depends on is not in the search path\n");
-		return 0;
-	} else {
-		dll_handles_.push_back(dll);
-	}
-
-	//Function pointer
-	typedef T* (*ComponentCreator) (void);
-
-	void *void_ptr = dll->symbol (factoryname);
-	ptrdiff_t tmp = reinterpret_cast<ptrdiff_t> (void_ptr);
-	ComponentCreator cc = reinterpret_cast<ComponentCreator> (tmp);
-
-	if (cc == 0) {
-		GADGET_DEBUG2("Failed to load factory (%s) from DLL (%s)\n", dllname, factoryname);
-		return 0;
-	}
-
-	T* c = cc();
-
-	if (!c) {
-		GADGET_DEBUG1("Failed to create component using factory\n");
-		return 0;
-	}
-
-	return c;
-}
diff --git a/toolboxes/gadgettools/GadgetStreamController.h b/toolboxes/gadgettools/GadgetStreamController.h
deleted file mode 100644
index 912f54d..0000000
--- a/toolboxes/gadgettools/GadgetStreamController.h
+++ /dev/null
@@ -1,559 +0,0 @@
-#ifndef GADGETSTREAMCONTROLLER_H
-#define GADGETSTREAMCONTROLLER_H
-
-#include "ace/Log_Msg.h"
-#include "ace/Reactor.h"
-#include "ace/SOCK_Stream.h"
-#include "ace/Stream.h"
-#include "ace/Message_Queue.h"
-#include "ace/Svc_Handler.h"
-#include "ace/Reactor_Notification_Strategy.h"
-
-#include <complex>
-#include <vector>
-#include "boost/tuple/tuple.hpp"
-#include "boost/tuple/tuple_comparison.hpp"
-#include "boost/tuple/tuple_io.hpp"
-
-#include "gadgettools_export.h"
-#include "Gadgetron.h"
-#include "Gadget.h"
-#include "GadgetMessageInterface.h"
-#include "GadgetronConnector.h"
-#include "GadgetImageMessageReader.h"
-#include "GadgetImageMessageWriter.h"
-
-typedef ACE_Module<ACE_MT_SYNCH> GadgetModule;
-
-namespace Gadgetron{
-
-class EXPORTGADGETTOOLS GadgetStreamController 
-    : public ACE_Svc_Handler<ACE_SOCK_STREAM, ACE_MT_SYNCH>
-{
-public:
-    GadgetStreamController()
-        : stream_configured_(false)
-        , notifier_ (0, this, ACE_Event_Handler::WRITE_MASK)
-        , writer_task_(&this->peer())
-    { }
-
-    virtual ~GadgetStreamController()
-    { 
-        //ACE_DEBUG( (LM_INFO, ACE_TEXT("~GadgetStreamController() called\n")) );
-    }
-
-    //ACE_SOCK_Stream &peer (void) { return this->sock_; }
-
-    int open (void);
-
-    /*
-    virtual ACE_HANDLE get_handle (void) const { 
-    return this->sock_.get_handle (); 
-    }
-    */
-
-    virtual int handle_input (ACE_HANDLE fd = ACE_INVALID_HANDLE);
-    //virtual int handle_output (ACE_HANDLE fd = ACE_INVALID_HANDLE);
-    virtual int handle_close (ACE_HANDLE handle,
-        ACE_Reactor_Mask close_mask);
-
-    virtual int output_ready(ACE_Message_Block* mb);
-
-    virtual Gadget* find_gadget(std::string gadget_name);
-
-private:
-    ACE_Stream<ACE_MT_SYNCH> stream_;
-    bool stream_configured_;
-    WriterTask writer_task_;
-
-    ACE_Reactor_Notification_Strategy notifier_;
-
-    GadgetMessageReaderContainer readers_;
-
-    std::vector<ACE_DLL_Handle*> dll_handles_;
-
-    virtual int configure(std::string config_xml_string);
-    virtual int configure_from_file(std::string config_xml_filename);
-
-    virtual GadgetModule * create_gadget_module(const char* DLL, const char* gadget, const char* gadget_module_name);
-
-    template <class T>  T* load_dll_component(const char* DLL, const char* component_name);
-
-};
-
-//template<typename JobType> 
-//class GadgetCloudController : public ACE_Task<ACE_MT_SYNCH>
-//{
-//public:
-//
-//    typedef boost::tuple<std::string, std::string, std::string> CloudNodeType;
-//    typedef std::vector<CloudNodeType> CloudType;
-//
-//    GadgetCloudController();
-//    virtual ~GadgetCloudController();
-//
-//    // this GadgetCloudController runs in the passive mode
-//    virtual int open(void* = 0);
-//
-//    virtual int close(unsigned long flags);
-//
-//    // create connector and register the reader and writer for every connector
-//    int createConnector(const CloudType& cloud, 
-//        size_t msgID_reader, std::vector<GadgetMessageReader*>& readers, 
-//        size_t msgID_writer, std::vector<GadgetMessageWriter*>& writers);
-//
-//    // connect to the cloud host, need to call createConnector first
-//    // hostnames: the host name or IP addresses for every node
-//    // port_nos: port number for every node
-//    // xmlfiles: the xml configuration file name sent to every node
-//    int connectToCloud(const CloudType& cloud);
-//
-//    // send jobs to the node and wait for jobs to be returned
-//    // for every job, the node id identify which nodes to send this job
-//    // after sending all jobs, this call will block until all jobs are returned
-//    int runJobsOnCloud(const std::vector<int>& node_ids);
-//
-//    // should be called after calling runJobsOnCloud
-//    int waitForJobToComplete();
-//
-//    // wait for all jobs to come back
-//    // all returned jobs will be put into the completed_job_list_
-//    // this function will not return until all jobs are returned
-//    virtual int svc(void);
-//
-//    // list to store jobs sent to nodes
-//    std::vector<JobType*> job_list_;
-//    // list to store completed jobs from the nodes
-//    std::vector<JobType*> completed_job_list_;
-//
-//private:
-//
-//    // connector to every node
-//    // one connector for a node
-//    // node id starts from 0, and increase by 1
-//    std::vector<GadgetronCloudConnector<JobType>* > cloud_connectors_;
-//
-//    size_t cloud_msg_id_reader_;
-//    size_t cloud_msg_id_writer_;
-//
-//    // number of available nodes in the cloud
-//    unsigned int number_of_nodes_;
-//
-//    // node status, 0/-1 : available/unavailable
-//    std::vector<int> node_status_;
-//
-//    // job status, 0/-1 : completed/not completed
-//    std::vector<int> job_status_;
-//
-//    // a condition variable to wake up the caller thread
-//    ACE_Thread_Mutex mutex;
-//    ACE_Condition_Thread_Mutex* cond_;
-//
-//    ACE_Reactor gt_cloud_rector_;
-//};
-//
-//template <typename JobType> 
-//GadgetCloudController<JobType>::GadgetCloudController() : cloud_msg_id_reader_(GADGET_MESSAGE_CLOUD_JOB), cloud_msg_id_writer_(GADGET_MESSAGE_CLOUD_JOB)
-//{
-//    cond_ = new ACE_Condition_Thread_Mutex(mutex, "GadgetCloudController");
-//}
-//
-//template <typename JobType> 
-//GadgetCloudController<JobType>::~GadgetCloudController()
-//{
-//}
-//
-//template <typename JobType> 
-//int GadgetCloudController<JobType>::open(void* p)
-//{
-//    ACE_TRACE(( ACE_TEXT("GadgetCloudController::open") ));
-//
-//    this->reactor(&gt_cloud_rector_);
-//
-//    //if (!this->reactor())
-//    //{
-//    //    ACE_DEBUG((LM_INFO, ACE_TEXT("Setting reactor")));
-//    //    this->reactor(ACE_Reactor::instance());
-//    //}
-//
-//    return this->activate( THR_NEW_LWP | THR_JOINABLE, 1 );
-//}
-//
-//template <typename JobType> 
-//int GadgetCloudController<JobType>::close(unsigned long flags)
-//{
-//    int rval = 0;
-//    if (flags == 1)
-//    {
-//        ACE_Message_Block *hangup = new ACE_Message_Block();
-//        hangup->msg_type( ACE_Message_Block::MB_HANGUP );
-//        if (this->putq(hangup) == -1) {
-//            hangup->release();
-//            ACE_ERROR_RETURN( (LM_ERROR,
-//                    ACE_TEXT("%p\n"),
-//                    ACE_TEXT("GadgetCloudController::close, putq")),
-//                    -1);
-//        }
-//        rval = this->wait();
-//    }
-//    return rval;
-//}
-//
-//template <typename JobType> 
-//int GadgetCloudController<JobType>::createConnector(const CloudType& cloud, 
-//    size_t msgID_reader, std::vector<GadgetMessageReader*>& readers, 
-//    size_t msgID_writer, std::vector<GadgetMessageWriter*>& writers)
-//{
-//    number_of_nodes_ = cloud.size();
-//
-//    if ( readers.size() != number_of_nodes_ ) return -1;
-//    if ( writers.size() != number_of_nodes_ ) return -1;
-//
-//    cloud_connectors_.resize(number_of_nodes_, NULL);
-//    node_status_.resize(number_of_nodes_, -1);
-//
-//    cloud_msg_id_reader_ = msgID_reader;
-//    cloud_msg_id_writer_ = msgID_writer;
-//
-//    unsigned int ii;
-//    for( ii=0; ii<number_of_nodes_; ii++ )
-//    {
-//        GadgetronCloudConnector<JobType>* con;
-//        ACE_NEW_RETURN (con, GadgetronCloudConnector<JobType>, -1);
-//        cloud_connectors_[ii] = con;
-//
-//        cloud_connectors_[ii]->register_reader(cloud_msg_id_reader_, readers[ii] );
-//        cloud_connectors_[ii]->register_writer(cloud_msg_id_writer_, writers[ii] );
-//
-//        cloud_connectors_[ii]->set_cloud_controller(this);
-//    }
-//
-//    return 0;
-//}
-//
-//template <typename JobType> 
-//int GadgetCloudController<JobType>::
-//connectToCloud(const CloudType& cloud)
-//{
-//    number_of_nodes_ = cloud.size();
-//    if ( cloud_connectors_.size() != number_of_nodes_ ) return -1;
-//
-//    unsigned int ii;
-//    for( ii=0; ii<number_of_nodes_; ii++ )
-//    {
-//        if ( cloud_connectors_[ii] == NULL ) return -1;
-//
-//        // if ( cloud_connectors_[ii].open(hostnames[ii], port_nos[ii])!=0 )
-//        if ( cloud_connectors_[ii]->open(cloud[ii].get<0>(), cloud[ii].get<1>())!=0 )
-//        {
-//            ACE_DEBUG(( LM_ERROR, ACE_TEXT("(%p) Open connection to %s:%s failed ... \n"), cloud[ii].get<0>().c_str(), cloud[ii].get<1>().c_str()));
-//        }
-//        else
-//        {
-//            node_status_[ii] = 0;
-//
-//            // send the xml file
-//            if (cloud_connectors_[ii]->send_gadgetron_configuration_file(cloud[ii].get<2>()) != 0)
-//            {
-//                ACE_DEBUG((LM_ERROR, ACE_TEXT("Unable to send XML configuration to the Gadgetron cloud host %s:%s \n"), cloud[ii].get<0>().c_str(), cloud[ii].get<1>().c_str()));
-//                return -1;
-//            }
-//        }
-//    }
-//
-//    bool hasGoodNode = false;
-//    for( ii=0; ii<number_of_nodes_; ii++ )
-//    {
-//        if ( node_status_[ii] == 0 )
-//        {
-//            hasGoodNode = true;
-//            break;
-//        }
-//    }
-//
-//    if ( !hasGoodNode )
-//    {
-//        ACE_DEBUG((LM_ERROR, ACE_TEXT("Unable to find even one good node ... \n")));
-//        return -1;
-//    }
-//
-//    return 0;
-//}
-//
-//template <typename JobType> 
-//int GadgetCloudController<JobType>::
-//runJobsOnCloud(const std::vector<int>& node_ids)
-//{
-//    ACE_DEBUG((LM_INFO, ACE_TEXT("(%t) GadgetCloudController : into runJobsOnCloud(...) ... \n")));
-//
-//    if ( job_list_.empty() )
-//    {
-//        ACE_DEBUG((LM_ERROR, ACE_TEXT("GadgetCloudController : job list is empty ... \n")));
-//        return -1;
-//    }
-//
-//    if ( completed_job_list_.empty() )
-//    {
-//        ACE_DEBUG((LM_ERROR, ACE_TEXT("GadgetCloudController : completed job list is empty ... \n")));
-//        return -1;
-//    }
-//
-//    if ( job_list_.size() != completed_job_list_.size() )
-//    {
-//        ACE_DEBUG((LM_ERROR, ACE_TEXT("GadgetCloudController : job list size does not match ... \n")));
-//        return -1;
-//    }
-//
-//    if ( job_list_.size() != node_ids.size() )
-//    {
-//        ACE_DEBUG((LM_ERROR, ACE_TEXT("GadgetCloudController : job list size does not match the node id size ... \n")));
-//        return -1;
-//    }
-//
-//    std::vector<int> node_ids_used(node_ids);
-//
-//    unsigned int numOfJobs = job_list_.size();
-//    job_status_.resize(numOfJobs, -1);
-//
-//    unsigned int ii;
-//    for( ii=0; ii<numOfJobs; ii++ )
-//    {
-//        int nodeID = node_ids_used[ii];
-//        if ( nodeID == -1 )
-//        {
-//            job_status_[ii] = 0;
-//            continue;
-//        }
-//
-//        if ( nodeID > number_of_nodes_ )
-//        {
-//            nodeID %= number_of_nodes_;
-//        }
-//
-//        while ( node_status_[nodeID] < 0 )
-//        {
-//            nodeID--;
-//            if ( nodeID == 0 ) nodeID = number_of_nodes_;
-//        }
-//
-//        if ( nodeID != node_ids_used[ii] ) node_ids_used[ii] = nodeID;
-//
-//        // send job to a node
-//        GadgetContainerMessage<GadgetMessageIdentifier>* m1 =
-//                new GadgetContainerMessage<GadgetMessageIdentifier>();
-//
-//        m1->getObjectPtr()->id = cloud_msg_id_writer_;
-//
-//        GadgetContainerMessage<int>* m2 =
-//                new GadgetContainerMessage<int>();
-//
-//        *(m2->getObjectPtr()) = ii;
-//
-//        GadgetContainerMessage<JobType>* m3 =
-//                new GadgetContainerMessage<JobType>();
-//
-//        *(m3->getObjectPtr()) = *(job_list_[ii]);
-//        m1->cont(m2);
-//        m2->cont(m3);
-//
-//        if ( node_status_[nodeID] == 0 )
-//        {
-//            if (cloud_connectors_[nodeID]->putq(m1) == -1)
-//            {
-//                ACE_DEBUG((LM_ERROR, ACE_TEXT("Unable to send job package %d on queue for node %d \n"), ii, nodeID));
-//                return -1;
-//            }
-//            else
-//            {
-//                ACE_DEBUG((LM_INFO, ACE_TEXT("Send job %d to node %d ... \n"), ii, nodeID));
-//            }
-//        }
-//    }
-//
-//    std::vector<bool> closeMsgSent(number_of_nodes_, false);
-//    for( ii=0; ii<numOfJobs; ii++ )
-//    {
-//        unsigned int nodeID = node_ids_used[ii];
-//
-//        if ( !closeMsgSent[nodeID] )
-//        {
-//            closeMsgSent[nodeID] = true;
-//
-//            // send the close message for this node
-//            GadgetContainerMessage<GadgetMessageIdentifier>* m = new GadgetContainerMessage<GadgetMessageIdentifier>();
-//            m->getObjectPtr()->id = GADGET_MESSAGE_CLOSE;
-//
-//            if (cloud_connectors_[nodeID]->putq(m) == -1)
-//            {
-//                ACE_DEBUG((LM_ERROR, ACE_TEXT("Unable to send CLOSE package on queue for node %d \n"), nodeID));
-//                return -1;
-//            }
-//        }
-//    }
-//
-//    ACE_DEBUG((LM_INFO, ACE_TEXT("GadgetCloudController thread - all jobs sent ... \n")));
-//
-//    // block the caller thread
-//    // cond_->wait();
-//
-//    // ACE_DEBUG((LM_INFO, ACE_TEXT("GadgetCloudController thread wakes up ... \n")));
-//
-//    return 0;
-//}
-//
-//template <typename JobType> 
-//int GadgetCloudController<JobType>::waitForJobToComplete()
-//{
-//    // block the caller thread
-//    ACE_DEBUG((LM_INFO, ACE_TEXT("(%t) GadgetCloudController thread sleeps ... \n")));
-//    // int ret = cond_->wait();
-//
-//    ACE_Message_Block *mb = 0;
-//    ACE_Time_Value nowait (ACE_OS::gettimeofday ());
-//
-//    //collect a incoming package a package if we have one
-//    while (this->getq (mb) != -1)
-//    {
-//        GadgetContainerMessage<GadgetMessageIdentifier>* mid =
-//            AsContainerMessage<GadgetMessageIdentifier>(mb);
-//
-//        if (!mid)
-//        {
-//            ACE_DEBUG ((LM_ERROR, ACE_TEXT ("Invalid message on GadgetCloudController queue\n")));
-//            mb->release();
-//            cond_->signal();
-//            return -1;
-//        }
-//
-//        //Is this a shutdown message?
-//        if (mid->getObjectPtr()->id == GADGET_MESSAGE_CLOSE)
-//        {
-//            cond_->signal();
-//            return 0;
-//        }
-//
-//        if (mid->getObjectPtr()->id == cloud_msg_id_reader_)
-//        {
-//            GadgetContainerMessage<int>* m_jobID =
-//                AsContainerMessage<int>(mid->cont());
-//
-//            int jobID = *(m_jobID->getObjectPtr());
-//
-//            GadgetContainerMessage<JobType>* job =
-//                AsContainerMessage<JobType>(mid->cont()->cont());
-//
-//            *(completed_job_list_[jobID]) = *(job->getObjectPtr());
-//            job_status_[jobID] = 0;
-//        }
-//
-//        mb->release();
-//
-//        // if all jobs are received, notice the caller thread
-//        bool allJobProcessed = true;
-//        for ( unsigned int ii=0; ii<job_status_.size(); ii++ )
-//        {
-//            if ( job_status_[ii] != 0 )
-//            {
-//                allJobProcessed = false;
-//                break;
-//            }
-//        }
-//
-//        if ( allJobProcessed )
-//        {
-//            ACE_DEBUG ((LM_INFO, ACE_TEXT ("All jobs are completed and returned on GadgetCloudController queue\n")));
-//            break;
-//        }
-//    }
-//
-//    ACE_DEBUG((LM_INFO, ACE_TEXT("(%t) GadgetCloudController thread wakes up ... \n")));
-//    return 0;
-//}
-//
-//template <typename JobType> 
-//int GadgetCloudController<JobType>::svc(void)
-//{
-//    ACE_DEBUG((LM_INFO, ACE_TEXT("(%t) Into GadgetCloudController svc() ... \n")));
-//
-//    this->reactor()->owner(ACE_Thread::self ());//, &old_owner);
-//
-//    this->reactor()->reset_event_loop();
-//
-//    ACE_Time_Value initialDelay (3);
-//    ACE_Time_Value interval (0,100);
-//
-//    //Handle the events
-//    this->reactor()->run_reactor_event_loop();
-//
-//    //this->reactor()->owner(&old_owner);
-//
-//    ACE_DEBUG ((LM_INFO, ACE_TEXT ("(%P|%t) GadgetronConnector svc done...\n")));
-//
-//    //ACE_Message_Block *mb = 0;
-//    //ACE_Time_Value nowait (ACE_OS::gettimeofday ());
-//
-//    ////collect a incoming package a package if we have one
-//    //while (this->getq (mb) != -1)
-//    //{
-//    //    GadgetContainerMessage<GadgetMessageIdentifier>* mid =
-//    //            AsContainerMessage<GadgetMessageIdentifier>(mb);
-//
-//    //    if (!mid)
-//    //    {
-//    //        ACE_DEBUG ((LM_ERROR, ACE_TEXT ("Invalid message on GadgetCloudController queue\n")));
-//    //        mb->release();
-//    //        cond_->signal();
-//    //        return -1;
-//    //    }
-//
-//    //    //Is this a shutdown message?
-//    //    if (mid->getObjectPtr()->id == GADGET_MESSAGE_CLOSE)
-//    //    {
-//    //        cond_->signal();
-//    //        return 0;
-//    //    }
-//
-//    //    if (mid->getObjectPtr()->id == cloud_msg_id_reader_)
-//    //    {
-//    //        GadgetContainerMessage<int>* m_jobID =
-//    //            AsContainerMessage<int>(mid->cont());
-//
-//    //        int jobID = *(m_jobID->getObjectPtr());
-//
-//    //        GadgetContainerMessage<JobType>* job =
-//    //            AsContainerMessage<JobType>(mid->cont()->cont());
-//
-//    //        *(completed_job_list_[jobID]) = *(job->getObjectPtr());
-//    //        job_status_[jobID] = 0;
-//    //    }
-//
-//    //    mb->release();
-//
-//    //    // if all jobs are received, notice the caller thread
-//    //    bool allJobProcessed = true;
-//    //    for ( unsigned int ii=0; ii<job_status_.size(); ii++ )
-//    //    {
-//    //        if ( job_status_[ii] != 0 )
-//    //        {
-//    //            allJobProcessed = false;
-//    //            break;
-//    //        }
-//    //    }
-//
-//    //    if ( allJobProcessed )
-//    //    {
-//    //        ACE_DEBUG ((LM_INFO, ACE_TEXT ("All jobs are completed and returned on GadgetCloudController queue\n")));
-//    //        break;
-//    //    }
-//    //}
-//
-//    //// notice the caller thread
-//    //ACE_DEBUG((LM_INFO, ACE_TEXT("Wake up GadgetCloudController thread ... \n")));
-//
-//    //cond_->signal();
-//
-//    return 0;
-//}
-
-}
-#endif //GADGETSTREAMCONTROLLER_H
diff --git a/toolboxes/gadgettools/GadgetronCloudConnector.h b/toolboxes/gadgettools/GadgetronCloudConnector.h
index bd50e68..50a720f 100644
--- a/toolboxes/gadgettools/GadgetronCloudConnector.h
+++ b/toolboxes/gadgettools/GadgetronCloudConnector.h
@@ -13,7 +13,7 @@
 #include "gadgettools_export.h"
 #include "GadgetMRIHeaders.h"
 
-#define GADGETRON_TIMEOUT_PERIOD 1.0
+#define GADGETRON_TIMEOUT_PERIOD 1.5
 
 namespace Gadgetron
 {
@@ -58,7 +58,7 @@ public:
             if ( retval == -1 )
             {
                 GADGET_DEBUG1("CloudWriterTask svcImpl failed ... \n");
-                ACE_OS::sleep(ACE_Time_Value(GADGETRON_TIMEOUT_PERIOD));
+                ACE_OS::sleep(ACE_Time_Value( (time_t)GADGETRON_TIMEOUT_PERIOD ));
                 return -1;
             }
         }
@@ -166,7 +166,7 @@ public:
 
     int register_reader(size_t slot, GadgetMessageReader* reader)
     {
-        return readers_.insert(slot,reader);
+        return readers_.insert( (unsigned short)slot,reader);
     }
 
     virtual int close(unsigned long flags)
@@ -200,7 +200,7 @@ public:
             if ((recv_count = cloud_connector_->peer().recv_n(&mid, sizeof(GadgetMessageIdentifier))) <= 0)
             {
                 ACE_DEBUG( (LM_ERROR, ACE_TEXT("%P, %l, CloudReaderTask, failed to read message identifier\n")) );
-                ACE_OS::sleep(ACE_Time_Value(GADGETRON_TIMEOUT_PERIOD));
+                ACE_OS::sleep(ACE_Time_Value( (time_t)GADGETRON_TIMEOUT_PERIOD ));
                 cloud_connector_->set_status(false);
                 cloud_connector_->setJobTobeCompletedAndNoticeController();
                 return -1;
@@ -227,13 +227,15 @@ public:
             if (!mb)
             {
                 ACE_DEBUG( (LM_ERROR, ACE_TEXT("%P, %l, CloudReaderTask, Failed to read message\n")) );
-                ACE_OS::sleep(ACE_Time_Value(GADGETRON_TIMEOUT_PERIOD));
+                ACE_OS::sleep(ACE_Time_Value( (time_t)GADGETRON_TIMEOUT_PERIOD ));
                 cloud_connector_->set_status(false);
                 cloud_connector_->setJobTobeCompletedAndNoticeController();
                 return -1;
             }
             else
             {
+                ACE_OS::sleep(ACE_Time_Value( (time_t)(0.5) ));
+
                 if (cloud_connector_->process(mid.id, mb) < 0)
                 {
                     ACE_DEBUG( (LM_ERROR, ACE_TEXT("%P, %l, ReaderTask, Failed to process message\n")) );
@@ -417,6 +419,7 @@ int GadgetronCloudConnector<JobType>::open(std::string hostname, std::string por
     else
     {
         status_ = false;
+        return -1;
     }
 
     return 0;
@@ -429,10 +432,16 @@ int GadgetronCloudConnector<JobType>::process(size_t messageid, ACE_Message_Bloc
     if ( cloud_controller_ == NULL )
     {
         ACE_DEBUG ((LM_ERROR, ACE_TEXT ("(%P|%t) GadgetronCloudConnector, pointer of could controller is null ...\n")));
+        mb->release();
         return -1;
     }
 
-    cloud_controller_->putq(mb);
+    if ( cloud_controller_->putq(mb) == -1)
+    {
+        ACE_DEBUG((LM_ERROR, ACE_TEXT("Unable to put received message into the queue of cloud controller %d \n"), messageid));
+        mb->release();
+        return -1;
+    }
 
     return 0;
 }
@@ -475,6 +484,7 @@ int GadgetronCloudConnector<JobType>::setJobTobeCompletedAndNoticeController(int
 
     ACE_DEBUG( (LM_INFO, ACE_TEXT("%P, %l, GadgetronCloudConnector, into setJobTobeCompletedAndNoticeController(...) ... \n")) );
 
+    // set the job to be completed and invalidate the node
     if ( cloud_controller_->setJobsTobeCompleted(nodeID_, jobID) < 0 )
     {
         ACE_DEBUG( (LM_ERROR, ACE_TEXT("%P, %l, GadgetronCloudConnector, cloud_controller_->setJobsTobeCompleted(%d, %d) failed ... \n"), nodeID_, jobID) );
diff --git a/toolboxes/gadgettools/GadgetronConnector.cpp b/toolboxes/gadgettools/GadgetronConnector.cpp
index 1c7ecd8..c09cbe4 100644
--- a/toolboxes/gadgettools/GadgetronConnector.cpp
+++ b/toolboxes/gadgettools/GadgetronConnector.cpp
@@ -1,21 +1,14 @@
-/*
- * GadgetronConnector.cpp
- *
- *  Created on: Nov 1, 2011
- *      Author: hansenms
- */
-
-#include <ace/SOCK_Connector.h>
 #include "GadgetronConnector.h"
 
+#include <ace/SOCK_Connector.h>
 #include <iostream>
 
 using namespace Gadgetron;
+
 GadgetronConnector::GadgetronConnector()
     //: notifier_ (0, this, ACE_Event_Handler::WRITE_MASK)
     : writer_task_(&this->peer())
 {
-
 }
 
 GadgetronConnector::~GadgetronConnector() {
@@ -220,7 +213,7 @@ int GadgetronConnector::svc(void)
 
 int GadgetronConnector::register_reader(size_t slot, GadgetMessageReader *reader)
 {
-    return readers_.insert(slot,reader);
+    return readers_.insert( (unsigned short)slot,reader);
 }
 
 /*
@@ -259,7 +252,7 @@ int GadgetronConnector::send_gadgetron_configuration_script(std::string config_x
     id.id = GADGET_MESSAGE_CONFIG_SCRIPT;
 
     GadgetMessageScript ini;
-    ini.script_length = config_xml.size()+1;
+    ini.script_length = (ACE_UINT32)config_xml.size()+1;
 
     if (this->peer().send_n(&id, sizeof(GadgetMessageIdentifier)) != sizeof(GadgetMessageIdentifier)) {
         ACE_DEBUG ((LM_ERROR, ACE_TEXT ("(%P|%t) Unable to send GadgetMessageIdentifier\n")));
@@ -285,7 +278,7 @@ int GadgetronConnector::send_gadgetron_parameters(std::string xml_string)
     id.id = GADGET_MESSAGE_PARAMETER_SCRIPT;
 
     GadgetMessageScript conf;
-    conf.script_length = xml_string.size()+1;
+    conf.script_length = (ACE_UINT32)xml_string.size()+1;
     if (this->peer().send_n(&id, sizeof(GadgetMessageIdentifier)) != sizeof(GadgetMessageIdentifier)) {
         ACE_DEBUG ((LM_ERROR, ACE_TEXT ("(%P|%t) Unable to send GadgetMessageIdentifier\n")));
         return -1;
diff --git a/toolboxes/gadgettools/GadgetronConnector.h b/toolboxes/gadgettools/GadgetronConnector.h
index 9b0b291..734a3cd 100644
--- a/toolboxes/gadgettools/GadgetronConnector.h
+++ b/toolboxes/gadgettools/GadgetronConnector.h
@@ -1,25 +1,20 @@
-/*
- * GadgetronConnector.h
- *
- *  Created on: Nov 1, 2011
- *      Author: Michael S. Hansen
- */
-
 #ifndef GADGETRONCONNECTOR_H_
 #define GADGETRONCONNECTOR_H_
 
+#include "GadgetronSlotContainer.h"
+#include "GadgetMessageInterface.h"
+#include "gadgettools_export.h"
+
 #include <ace/Svc_Handler.h>
 #include <ace/Reactor.h>
 #include <ace/SOCK_Stream.h>
 #include <ace/Reactor_Notification_Strategy.h>
 #include <string>
-#include "GadgetronSlotContainer.h"
-#include "GadgetMessageInterface.h"
-#include "gadgettools_export.h"
 
 #define MAXHOSTNAMELENGTH 1024
 
 namespace Gadgetron{
+
 class WriterTask : public ACE_Task<ACE_MT_SYNCH>
 {
 
@@ -46,17 +41,13 @@ public:
 	virtual int open(void* = 0)
 	{
 	  ACE_TRACE(( ACE_TEXT("WriterTask::open") ));
-      this->msg_queue()->high_water_mark(24.0*1024*1024*1024);
-
 	  return this->activate( THR_NEW_LWP | THR_JOINABLE, 1 );
 	}
 
-
 	int register_writer(size_t slot, GadgetMessageWriter* writer) {
-		return writers_.insert(slot,writer);
+		return writers_.insert( (unsigned int)slot,writer);
 	}
 
-
 	virtual int close(unsigned long flags)
 	{
 		int rval = 0;
@@ -120,7 +111,6 @@ public:
 
 	}
 
-
 protected:
 	ACE_SOCK_Stream* socket_;
 	GadgetronSlotContainer<GadgetMessageWriter> writers_;
@@ -165,7 +155,6 @@ protected:
 	GadgetronSlotContainer<GadgetMessageReader> readers_;
 	WriterTask writer_task_;
 	//GadgetronSlotContainer<GadgetMessageWriter> writers_;
-
 };
 
 }
diff --git a/toolboxes/gadgettools/GadgetronOSUtil.cpp b/toolboxes/gadgettools/GadgetronOSUtil.cpp
new file mode 100644
index 0000000..333eafc
--- /dev/null
+++ b/toolboxes/gadgettools/GadgetronOSUtil.cpp
@@ -0,0 +1,50 @@
+
+#include "GadgetronOSUtil.h"
+
+#ifdef _WIN32
+    #include <windows.h>
+#else
+    #include <sys/types.h>
+    #include <sys/stat.h>
+#endif // _WIN32
+
+#include <boost/filesystem.hpp>
+using namespace boost::filesystem;
+
+namespace Gadgetron{
+
+    bool create_folder_with_all_permissions(const std::string& workingdirectory)
+    {
+        if ( !boost::filesystem::exists(workingdirectory) )
+        {
+            boost::filesystem::path workingPath(workingdirectory);
+            if ( !boost::filesystem::create_directory(workingPath) )
+            {
+                GADGET_ERROR_MSG("Error creating the working directory " << workingdirectory);
+                return false;
+            }
+
+            // set the permission for the folder
+            #ifdef _WIN32
+                try
+                {
+                    boost::filesystem::permissions(workingPath, all_all);
+                }
+                catch(...)
+                {
+                    GADGET_ERROR_MSG("Error changing the permission of the working directory " << workingdirectory);
+                }
+            #else
+                // in case an older version of boost is used in non-win system
+                // the system call is used
+                int res = chmod(workingPath.c_str(), S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IWGRP|S_IXGRP|S_IROTH|S_IWOTH|S_IXOTH);
+                if ( res != 0 )
+                {
+                    GADGET_ERROR_MSG("Error changing the permission of the working directory " << workingdirectory);
+                }
+            #endif // _WIN32
+        }
+
+        return true;
+    }
+}
diff --git a/toolboxes/gadgettools/GadgetronOSUtil.h b/toolboxes/gadgettools/GadgetronOSUtil.h
new file mode 100644
index 0000000..a4b890c
--- /dev/null
+++ b/toolboxes/gadgettools/GadgetronOSUtil.h
@@ -0,0 +1,21 @@
+#ifndef GADGETRONOSUTIL_H_
+#define GADGETRONOSUTIL_H_
+
+#include "gadgettools_export.h"
+#include "GadgetronCommon.h"
+#include <string>
+#include <iostream>
+
+#ifdef _WIN32
+    #include <windows.h>
+#else
+    
+#endif // _WIN32
+
+namespace Gadgetron{
+
+    EXPORTGADGETTOOLS bool create_folder_with_all_permissions(const std::string& workingdirectory);
+
+}
+
+#endif /* GADGETRONOSUTIL_H_ */
diff --git a/toolboxes/gadgettools/GadgetronSlotContainer.h b/toolboxes/gadgettools/GadgetronSlotContainer.h
index a68c596..83a3d8c 100644
--- a/toolboxes/gadgettools/GadgetronSlotContainer.h
+++ b/toolboxes/gadgettools/GadgetronSlotContainer.h
@@ -1,10 +1,3 @@
-/*
- * GadgetronSlotContainer.h
- *
- *  Created on: Nov 1, 2011
- *      Author: hansenms
- */
-
 #ifndef GADGETRONSLOTCONTAINER_H_
 #define GADGETRONSLOTCONTAINER_H_
 
diff --git a/toolboxes/gadgettools/gadgettools_export.h b/toolboxes/gadgettools/gadgettools_export.h
index 7142bd7..c7257fc 100644
--- a/toolboxes/gadgettools/gadgettools_export.h
+++ b/toolboxes/gadgettools/gadgettools_export.h
@@ -7,7 +7,7 @@
 
 
 #if defined (WIN32)
-#if defined (__BUILD_GADGETRON_GADGETTOOLS__) || defined (gadgettools_EXPORTS)
+#if defined (__BUILD_GADGETRON_GADGETTOOLS__) || defined (gadgetron_toolbox_gadgettools_EXPORTS)
 #define EXPORTGADGETTOOLS __declspec(dllexport)
 #else
 #define EXPORTGADGETTOOLS __declspec(dllimport)
diff --git a/toolboxes/gadgettools/ismrmrd/CMakeLists.txt b/toolboxes/gadgettools/ismrmrd/CMakeLists.txt
new file mode 100644
index 0000000..b8939be
--- /dev/null
+++ b/toolboxes/gadgettools/ismrmrd/CMakeLists.txt
@@ -0,0 +1,4 @@
+install (FILES 
+            GadgetImageMessageReader.h 
+            GadgetImageMessageWriter.h
+            DESTINATION include COMPONENT main)
diff --git a/toolboxes/gadgettools/ismrmrd/GadgetImageMessageReader.h b/toolboxes/gadgettools/ismrmrd/GadgetImageMessageReader.h
new file mode 100644
index 0000000..fa3252c
--- /dev/null
+++ b/toolboxes/gadgettools/ismrmrd/GadgetImageMessageReader.h
@@ -0,0 +1,196 @@
+#ifndef GADGETSOCKETRECEIVER_H
+#define GADGETSOCKETRECEIVER_H
+
+#include "ace/SOCK_Stream.h"
+#include "ace/Task.h"
+
+#include <complex>
+#include <iostream>
+
+#include "GadgetMRIHeaders.h"
+#include "ismrmrd/ismrmrd.h"
+#include "hoNDArray.h"
+#include "GadgetMessageInterface.h"
+#include "ismrmrd/meta.h"
+
+namespace Gadgetron
+{
+
+/**
+Default implementation of GadgetMessageReader for Image messages
+*/
+
+template <typename T> class GadgetImageMessageReader : public GadgetMessageReader
+{
+
+public:
+    virtual ACE_Message_Block* read(ACE_SOCK_Stream* stream) 
+    {
+        GadgetContainerMessage<ISMRMRD::ImageHeader>* imgh = 
+            new GadgetContainerMessage<ISMRMRD::ImageHeader>();
+
+        ssize_t recv_count = 0;
+        if ((recv_count = stream->recv_n(imgh->getObjectPtr(), sizeof(ISMRMRD::ImageHeader))) <= 0) {
+            ACE_DEBUG( (LM_ERROR, ACE_TEXT("%P, %l, GadgetImageMessageReader, failed to read IMAGE Header\n")) );
+            imgh->release();
+            return 0;
+        }
+
+        std::vector<size_t> dims(3);
+        dims[0] = imgh->getObjectPtr()->matrix_size[0];
+        dims[1] = imgh->getObjectPtr()->matrix_size[1];
+        dims[2] = imgh->getObjectPtr()->matrix_size[2];
+
+        if (imgh->getObjectPtr()->channels > 1) {
+            dims.push_back(imgh->getObjectPtr()->channels);
+        } 
+
+        GadgetContainerMessage< hoNDArray< T > >* data =
+            new GadgetContainerMessage< hoNDArray< T > >();
+
+        try{ data->getObjectPtr()->create(&dims);}
+        catch (std::runtime_error &err){
+            GADGET_DEBUG_EXCEPTION(err,"GadgetImageMessageReader, failed to allocate memory\n");
+            imgh->release();
+            return 0;
+        }
+
+        imgh->cont(data);
+
+        if ((recv_count = stream->recv_n(data->getObjectPtr()->get_data_ptr(), sizeof(T)*data->getObjectPtr()->get_number_of_elements())) <= 0) {
+            ACE_DEBUG( (LM_ERROR, ACE_TEXT("%P, %l, GadgetImageMessageReader, failed to read data from socket\n")) );
+            imgh->release();
+            return 0;
+        }
+
+        return imgh;
+    }
+};
+
+// for images with attributes
+template <typename T> class GadgetImageAttribMessageReader : public GadgetMessageReader
+{
+public:
+
+    typedef unsigned long long size_t_type;
+
+    virtual ACE_Message_Block* read(ACE_SOCK_Stream* stream) 
+    {
+        GadgetContainerMessage<ISMRMRD::ImageHeader>* imgh = 
+            new GadgetContainerMessage<ISMRMRD::ImageHeader>();
+
+        GadgetContainerMessage<ISMRMRD::MetaContainer>* imgAttrib = 
+            new GadgetContainerMessage<ISMRMRD::MetaContainer>();
+
+        // read in ISMRMRD image header
+        ssize_t recv_count = 0;
+        if ((recv_count = stream->recv_n( imgh->getObjectPtr(), sizeof(ISMRMRD::ImageHeader))) <= 0)
+        {
+            ACE_DEBUG( (LM_ERROR, ACE_TEXT("%P, %l, GadgetImageAttribMessageReader, failed to read IMAGE Header\n")) );
+            imgh->release();
+            imgAttrib->release();
+            return 0;
+        }
+
+        // read in gadgetron image meta attributes
+        size_t_type len(0);
+        if ( ( recv_count = stream->recv_n( &len, sizeof(size_t_type)) ) <= 0 )
+        {
+            ACE_DEBUG( (LM_ERROR, ACE_TEXT("%P, %l, GadgetImageAttribMessageReader, failed to read IMAGE Meta Attributes length\n")) );
+            imgh->release();
+            imgAttrib->release();
+            return 0;
+        }
+
+        char* buf = NULL;
+        try
+        {
+            buf = new char[len];
+            if ( buf == NULL )
+            {
+                ACE_DEBUG( (LM_ERROR, ACE_TEXT("%P, %l, GadgetImageAttribMessageReader, failed to allocate IMAGE Meta Attributes buffer\n")) );
+                imgh->release();
+                imgAttrib->release();
+                return 0;
+            }
+
+            memset(buf, '\0', len);
+            memcpy(buf, &len, sizeof(size_t_type));
+        }
+        catch (std::runtime_error &err)
+        {
+            GADGET_DEBUG_EXCEPTION(err,"GadgetImageAttribMessageReader, failed to allocate IMAGE Meta Attributes buffer\n");
+            imgh->release();
+            imgAttrib->release();
+            return 0;
+        }
+
+        if ( ( recv_count = stream->recv_n( buf, len) ) <= 0 )
+        {
+            ACE_DEBUG( (LM_ERROR, ACE_TEXT("%P, %l, GadgetImageAttribMessageReader, failed to read IMAGE Meta Attributes\n")) );
+            imgh->release();
+            imgAttrib->release();
+            delete [] buf;
+            return 0;
+        }
+
+        try
+        {
+            ISMRMRD::deserialize(buf, *imgAttrib->getObjectPtr());
+        }
+        catch(...)
+        {
+            ACE_DEBUG( (LM_ERROR, ACE_TEXT("%P, %l, GadgetImageAttribMessageReader, failed to deserialize IMAGE Meta Attributes\n")) );
+            imgh->release();
+            imgAttrib->release();
+            delete [] buf;
+            return 0;
+        }
+
+        delete [] buf;
+
+        // read in image content
+        std::vector<size_t> dims(3);
+        dims[0] = imgh->getObjectPtr()->matrix_size[0];
+        dims[1] = imgh->getObjectPtr()->matrix_size[1];
+        dims[2] = imgh->getObjectPtr()->matrix_size[2];
+
+        if (imgh->getObjectPtr()->channels > 1)
+        {
+            dims.push_back(imgh->getObjectPtr()->channels);
+        }
+
+        GadgetContainerMessage< hoNDArray< T > >* data = new GadgetContainerMessage< hoNDArray< T > >();
+
+        try
+        {
+            data->getObjectPtr()->create(&dims);
+        }
+        catch (std::runtime_error &err)
+        {
+            GADGET_DEBUG_EXCEPTION(err,"GadgetImageAttribMessageReader, failed to allocate memory\n");
+            imgh->release();
+            imgAttrib->release();
+            data->release();
+            return 0;
+        }
+
+        imgh->cont(data);
+        data->cont(imgAttrib);
+
+        if ((recv_count = stream->recv_n(data->getObjectPtr()->get_data_ptr(), sizeof(T)*data->getObjectPtr()->get_number_of_elements())) <= 0)
+        {
+            ACE_DEBUG( (LM_ERROR, ACE_TEXT("%P, %l, GadgetImageAttribMessageReader, failed to read data from socket\n")) );
+            imgh->release();
+            imgAttrib->release();
+            data->release();
+            return 0;
+        }
+
+        return imgh;
+    }
+};
+
+}
+
+#endif //GADGETSOCKETRECEIVER_H
diff --git a/toolboxes/gadgettools/GadgetImageMessageWriter.h b/toolboxes/gadgettools/ismrmrd/GadgetImageMessageWriter.h
similarity index 100%
rename from toolboxes/gadgettools/GadgetImageMessageWriter.h
rename to toolboxes/gadgettools/ismrmrd/GadgetImageMessageWriter.h
diff --git a/toolboxes/gadgettools/schema/gadgetron.xsd b/toolboxes/gadgettools/schema/gadgetron.xsd
index cfe50ca..ce9537a 100644
--- a/toolboxes/gadgettools/schema/gadgetron.xsd
+++ b/toolboxes/gadgettools/schema/gadgetron.xsd
@@ -1,53 +1,54 @@
 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 <xs:schema xmlns="http://gadgetron.sf.net/gadgetron" xmlns:xs="http://www.w3.org/2001/XMLSchema" elementFormDefault="qualified" targetNamespace="http://gadgetron.sf.net/gadgetron">
 
-  <xs:element name="gadgetronConfiguration">
-    <xs:complexType>
-      <xs:sequence>
+    <xs:element name="gadgetronConfiguration">
+        <xs:complexType>
+            <xs:sequence>
                 <xs:element name="port" type="xs:string"/>
-      </xs:sequence>
-    </xs:complexType>
-  </xs:element>
+                <xs:element maxOccurs="1" minOccurs="0" name="workingdirectory" type="xs:string"/>
+            </xs:sequence>
+        </xs:complexType>
+    </xs:element>
 
-  <xs:element name="gadgetronStreamConfiguration">
-    <xs:complexType>
-      <xs:sequence>
+    <xs:element name="gadgetronStreamConfiguration">
+        <xs:complexType>
+            <xs:sequence>
                 <xs:element maxOccurs="unbounded" minOccurs="0" name="reader">
-                	<xs:complexType>
-					      <xs:sequence>
-					      	<xs:element name="slot" type="xs:unsignedShort"/>
-					      	<xs:element name="dll" type="xs:string"/>
-					      	<xs:element name="classname" type="xs:string"/>
-					      </xs:sequence>
-          			</xs:complexType>
-        		</xs:element>
+                    <xs:complexType>
+                        <xs:sequence>
+                            <xs:element name="slot" type="xs:unsignedShort"/>
+                            <xs:element name="dll" type="xs:string"/>
+                            <xs:element name="classname" type="xs:string"/>
+                        </xs:sequence>
+                    </xs:complexType>
+                </xs:element>
                 <xs:element maxOccurs="unbounded" minOccurs="0" name="writer">
-                	<xs:complexType>
-					      <xs:sequence>
-					      	<xs:element maxOccurs="1" minOccurs="1" name="slot" type="xs:unsignedShort"/>
-					      	<xs:element maxOccurs="1" minOccurs="1"  name="dll" type="xs:string"/>
-					      	<xs:element maxOccurs="1" minOccurs="1"  name="classname" type="xs:string"/>
-					      </xs:sequence>
-          			</xs:complexType>
-        		</xs:element>
+                    <xs:complexType>
+                        <xs:sequence>
+                            <xs:element maxOccurs="1" minOccurs="1" name="slot" type="xs:unsignedShort"/>
+                            <xs:element maxOccurs="1" minOccurs="1"  name="dll" type="xs:string"/>
+                            <xs:element maxOccurs="1" minOccurs="1"  name="classname" type="xs:string"/>
+                        </xs:sequence>
+                    </xs:complexType>
+                </xs:element>
                 <xs:element maxOccurs="unbounded" minOccurs="0" name="gadget">
-                	<xs:complexType>
-					      <xs:sequence>
-					      	<xs:element maxOccurs="1" minOccurs="1"  name="name" type="xs:string"/>
-					      	<xs:element maxOccurs="1" minOccurs="1"  name="dll" type="xs:string"/>
-					      	<xs:element maxOccurs="1" minOccurs="1"  name="classname" type="xs:string"/>
-					      	<xs:element maxOccurs="unbounded" minOccurs="0" name="property">
-					      		<xs:complexType>
-					      			<xs:sequence>
-								      	<xs:element maxOccurs="1" minOccurs="1" name="name" type="xs:string"/>
-								      	<xs:element maxOccurs="1" minOccurs="1" name="value" type="xs:string"/>
-					      			</xs:sequence>		
-					      		</xs:complexType>
-              				</xs:element>
-           				  </xs:sequence>
-          </xs:complexType>
-        </xs:element>
-      </xs:sequence>
-    </xs:complexType>
-  </xs:element>
+                    <xs:complexType>
+                        <xs:sequence>
+                            <xs:element maxOccurs="1" minOccurs="1"  name="name" type="xs:string"/>
+                            <xs:element maxOccurs="1" minOccurs="1"  name="dll" type="xs:string"/>
+                            <xs:element maxOccurs="1" minOccurs="1"  name="classname" type="xs:string"/>
+                            <xs:element maxOccurs="unbounded" minOccurs="0" name="property">
+                                <xs:complexType>
+                                    <xs:sequence>
+                                        <xs:element maxOccurs="1" minOccurs="1" name="name" type="xs:string"/>
+                                        <xs:element maxOccurs="1" minOccurs="1" name="value" type="xs:string"/>
+                                    </xs:sequence>
+                                </xs:complexType>
+                            </xs:element>
+                        </xs:sequence>
+                    </xs:complexType>
+                </xs:element>
+            </xs:sequence>
+        </xs:complexType>
+    </xs:element>
 </xs:schema>
diff --git a/toolboxes/gtplus/CMakeLists.txt b/toolboxes/gtplus/CMakeLists.txt
index b2dcea3..e8a4c90 100644
--- a/toolboxes/gtplus/CMakeLists.txt
+++ b/toolboxes/gtplus/CMakeLists.txt
@@ -1,3 +1,5 @@
+# gadgetron_toolbox_gtplus is a toolbox for general reconstruction to support all ISMRMRD dimensions
+
 if ( HAS_64_BIT )
 
     if (WIN32)
@@ -9,7 +11,7 @@ if ( HAS_64_BIT )
     endif(WIN32)
 
     if (MKL_FOUND)
-        MESSAGE("MKL Found for gtPlus ... ")
+        MESSAGE("MKL Found for gadgetron_toolbox_gtplus ... ")
         list(APPEND EXTRA_MKL_LIBRARIES mkl_core)
         if ( USE_OPENMP )
             list(APPEND EXTRA_MKL_LIBRARIES mkl_intel_thread)
@@ -23,8 +25,6 @@ if ( HAS_64_BIT )
         ${ACE_INCLUDE_DIR} 
         ${Boost_INCLUDE_DIR}
         ${ISMRMRD_INCLUDE_DIR}
-        ${ISMRMRD_XSD_INCLUDE_DIR}
-        ${XSD_INCLUDE_DIR}
         ${FFTW3_INCLUDE_DIR}
         ${ARMADILLO_INCLUDE_DIRS}
         ${MKL_INCLUDE_DIR}
@@ -33,8 +33,12 @@ if ( HAS_64_BIT )
         ${CMAKE_SOURCE_DIR}/toolboxes/core
         ${CMAKE_SOURCE_DIR}/toolboxes/core/gpu
         ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu
+        ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/image
+        ${CMAKE_SOURCE_DIR}/toolboxes/fft/cpu
         ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/hostutils
-        ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/arma_math
+        ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/math
+        ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/image
+        ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/algorithm
         ${CMAKE_SOURCE_DIR}/toolboxes/operators
         ${CMAKE_SOURCE_DIR}/toolboxes/operators/cpu
         ${CMAKE_SOURCE_DIR}/toolboxes/solvers
@@ -42,13 +46,15 @@ if ( HAS_64_BIT )
         ${CMAKE_SOURCE_DIR}/gadgets/core
         ${HDF5_INCLUDE_DIR}
         ${HDF5_INCLUDE_DIR}/cpp
-        ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/arma_math
+        ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/math
         ${CMAKE_SOURCE_DIR}/toolboxes/registration/optical_flow
+        ${CMAKE_SOURCE_DIR}/toolboxes/registration/optical_flow/cpu
         ${CMAKE_SOURCE_DIR}/toolboxes/gtplus
         ${CMAKE_SOURCE_DIR}/toolboxes/gtplus/util
         ${CMAKE_SOURCE_DIR}/toolboxes/gtplus/workflow
-        ${CMAKE_SOURCE_DIR}/toolboxes/gtplus/algorithm
+        ${CMAKE_SOURCE_DIR}/toolboxes/gtplus/algorithm 
         ${CMAKE_SOURCE_DIR}/toolboxes/gtplus/solver
+        ${CMAKE_SOURCE_DIR}/toolboxes/gtplus/application
         ${CMAKE_SOURCE_DIR}/toolboxes/gadgettools
         ${CMAKE_SOURCE_DIR}/apps/gadgetron
         ${CMAKE_SOURCE_DIR}/apps/matlab
@@ -56,169 +62,177 @@ if ( HAS_64_BIT )
         ${CMAKE_SOURCE_DIR}/gadgets/gtPlus 
     )
 
-    set( util_files util/gtPlusIOBase.h
+    set( gtplus_io_header_files 
+        GtPlusIOExport.h 
+        util/gtPlusIOBase.h
+        util/gtPlusIOAnalyze.h)
+
+    set( gtplus_io_src_files 
         util/gtPlusIOBase.cpp
-        util/gtPlusIOAnalyze.h
-        util/gtPlusIOAnalyze.cpp
+        util/gtPlusIOAnalyze.cpp)
+
+    set( util_header_files 
         util/gtPlusMemoryManager.h
-        util/gtPlusMemoryManager.cpp 
         util/hoNDArrayMemoryManaged.h 
-        util/hoNDArrayMemoryManaged.hxx )
-
-    set( workflow_files workflow/gtPlusISMRMRDReconWorkFlow.h
-        workflow/gtPlusISMRMRDReconWorkFlowCartesian.h
-        workflow/gtPlusISMRMRDReconWorkFlowCartesian2DT.h
-        workflow/gtPlusISMRMRDReconWorkFlowCartesian3DT.h
-        workflow/gtPlusISMRMRDReconUtil.h
-        workflow/gtPlusISMRMRDReconUtil.cpp
-        workflow/gtPlusISMRMRDReconUtil.hxx
-        workflow/gtPlusISMRMRDReconWorkOrder.h
-        workflow/gtPlusISMRMRDReconWorkOrder2DT.h
-        workflow/gtPlusISMRMRDReconWorkOrder3DT.h
-        workflow/gtPlusISMRMRDReconWorker.h
-        workflow/gtPlusISMRMRDReconWorker2DT.h
-        workflow/gtPlusISMRMRDReconWorker3DT.h
-        workflow/gtPlusISMRMRDReconWorker2DTGRAPPA.h
-        workflow/gtPlusISMRMRDReconWorker2DTSPIRIT.h
-        workflow/gtPlusISMRMRDReconWorker2DTL1SPIRITNCG.h
-        workflow/gtPlusISMRMRDReconWorker2DTNoAcceleration.h
-        workflow/gtPlusISMRMRDReconWorker3DTGRAPPA.h
-        workflow/gtPlusISMRMRDReconWorker3DTSPIRIT.h
-        workflow/gtPlusISMRMRDReconWorker3DTL1SPIRITNCG.h
-        workflow/gtPlusISMRMRDReconWorker3DTNoAcceleration.h
-        workflow/gtPlusCloudScheduler.h
-        workflow/gtPlusCloudScheduler.cpp )
-
-    set( algorithm_files algorithm/gtPlusAlgorithmBase.h 
-                        algorithm/gtPlusGRAPPA.h 
-                        algorithm/gtPlusSPIRIT.h
-                        algorithm/gtPlusOperator.h 
-                        algorithm/gtPlusSPIRITOperator.h 
-                        algorithm/gtPlusSPIRIT2DOperator.h 
-                        algorithm/gtPlusSPIRIT3DOperator.h 
-                        algorithm/gtPlusSPIRIT2DTOperator.h 
-                        algorithm/gtPlusSPIRITNoNullSpaceOperator.h 
-                        algorithm/gtPlusSPIRITNoNullSpace2DOperator.h 
-                        algorithm/gtPlusSPIRITNoNullSpace3DOperator.h 
-                        algorithm/gtPlusSPIRITNoNullSpace2DTOperator.h 
-                        algorithm/gtPlusWaveletOperator.h 
-                        algorithm/gtPlusWavelet2DOperator.h 
-                        algorithm/gtPlusWavelet3DOperator.h 
-                        algorithm/gtPlusWaveletNoNullSpace2DOperator.h 
-                        algorithm/gtPlusWaveletNoNullSpace3DOperator.h 
-                        algorithm/gtPlusDataFidelityOperator.h )
-
-    set( solver_files solver/gtPlusSolver.h 
-                        solver/gtPlusLinearSolver.h 
-                        solver/gtPlusNonLinearSolver.h
-                        solver/gtPlusLSQRSolver.h 
-                        solver/gtPlusNCGSolver.h )
-
-    set( gtCloud_files config/gtCloud/myCloud_2DT.txt 
-                    config/gtCloud/myCloud_3DT.txt 
-                    config/gtCloud/myCloud_2DT_DualLayer.txt
-                    config/gtCloud/myCloud_2DT_DualLayer_FirstLayer.txt )
-
-    set( config_files config/GadgetronProgram_gtPlus_2DT_Cartesian.xml
-                    config/GadgetronProgram_gtPlus_2DT_Cartesian_L1SPIRIT.xml
-                    config/GadgetronProgram_gtPlus_2DT_Cartesian_SPIRIT.xml
-                    config/GadgetronProgram_gtPlus_2DT_Cartesian_CloudNode.xml
-                    config/GadgetronProgram_gtPlus_2DT_Cartesian_DualLayer_Gateway_L1SPIRIT.xml
-                    config/GadgetronProgram_gtPlus_2DT_Cartesian_DualLayer_Gateway_SPIRIT.xml
-                    config/GadgetronProgram_gtPlus_2DT_Cartesian_FirstLayer_CloudNode.xml
-                    config/GadgetronProgram_gtPlus_2DT_FatWater.xml
-                    config/GadgetronProgram_gtPlus_2DT_LGE.xml
-                    config/GadgetronProgram_gtPlus_2DT_MOLLI.xml
-                    config/GadgetronProgram_gtPlus_2DT_Perfusion.xml
-                    config/GadgetronProgram_gtPlus_2DT_RealTimeCine.xml
-                    config/GadgetronProgram_gtPlus_2DT_RealTimeFlow.xml
-                    config/GadgetronProgram_gtPlus_2DT_T2W.xml
-                    config/GadgetronProgram_gtPlus_3DT_Cartesian.xml
-                    config/GadgetronProgram_gtPlus_3DT_Cartesian_CloudNode.xml
-                    config/GadgetronProgram_gtPlus_3DT_Cartesian_L1SPIRIT.xml
-                    config/GadgetronProgram_gtPlus_3DT_Cartesian_SPIRIT.xml )
-
-    set( schema_files )
+        util/hoNDArrayMemoryManaged.hxx 
+        util/gtPlusUtil.h 
+        util/gtPlusUtil.hxx )
+
+    set( util_src_files 
+        util/gtPlusMemoryManager.cpp )
+
+    if (MKL_FOUND OR ARMADILLO_FOUND)
+        set( workflow_header_files 
+            workflow/gtPlusISMRMRDReconWorkFlow.h
+            workflow/gtPlusISMRMRDReconWorkFlowCartesian.h
+            workflow/gtPlusISMRMRDReconWorkFlowCartesian2DT.h
+            workflow/gtPlusISMRMRDReconWorkFlowCartesian3DT.h
+            workflow/gtPlusISMRMRDReconUtil.h
+            workflow/gtPlusISMRMRDReconUtil.hxx
+            workflow/gtPlusISMRMRDReconCoilMapEstimation.h
+            workflow/gtPlusISMRMRDReconWorkOrder.h
+            workflow/gtPlusISMRMRDReconWorkOrder2DT.h
+            workflow/gtPlusISMRMRDReconWorkOrder3DT.h
+            workflow/gtPlusISMRMRDReconWorker.h
+            workflow/gtPlusISMRMRDReconWorker2DT.h
+            workflow/gtPlusISMRMRDReconWorker3DT.h
+            workflow/gtPlusISMRMRDReconWorker2DTGRAPPA.h
+            workflow/gtPlusISMRMRDReconWorker2DTSPIRIT.h
+            workflow/gtPlusISMRMRDReconWorker2DTL1SPIRITNCG.h
+            workflow/gtPlusISMRMRDReconWorker2DTNoAcceleration.h
+            workflow/gtPlusISMRMRDReconWorker3DTGRAPPA.h
+            workflow/gtPlusISMRMRDReconWorker3DTSPIRIT.h
+            workflow/gtPlusISMRMRDReconWorker3DTL1SPIRITNCG.h
+            workflow/gtPlusISMRMRDReconWorker3DTNoAcceleration.h
+            workflow/gtPlusCloudScheduler.h )
+
+        set( workflow_src_files 
+            workflow/gtPlusISMRMRDReconUtil.cpp
+            workflow/gtPlusCloudScheduler.cpp )
+
+        set( algorithm_header_files 
+            algorithm/gtPlusAlgorithmBase.h 
+            algorithm/gtPlusGRAPPA.h 
+            algorithm/gtPlusSPIRIT.h
+            algorithm/gtPlusOperator.h 
+            algorithm/gtPlusSPIRITOperator.h 
+            algorithm/gtPlusSPIRIT2DOperator.h 
+            algorithm/gtPlusSPIRIT3DOperator.h 
+            algorithm/gtPlusSPIRIT2DTOperator.h 
+            algorithm/gtPlusSPIRITNoNullSpaceOperator.h 
+            algorithm/gtPlusSPIRITNoNullSpace2DOperator.h 
+            algorithm/gtPlusSPIRITNoNullSpace3DOperator.h 
+            algorithm/gtPlusSPIRITNoNullSpace2DTOperator.h 
+            algorithm/gtPlusWaveletOperator.h 
+            algorithm/gtPlusWavelet2DOperator.h 
+            algorithm/gtPlusWavelet3DOperator.h 
+            algorithm/gtPlusWaveletNoNullSpace2DOperator.h 
+            algorithm/gtPlusWaveletNoNullSpace3DOperator.h 
+            algorithm/gtPlusDataFidelityOperator.h 
+            algorithm/FreeFormDeformation/gtplusFFDBase.h
+            algorithm/FreeFormDeformation/gtplusBSplineFFD.h
+            algorithm/FreeFormDeformation/gtplusMLFFD.h
+            algorithm/FreeFormDeformation/gtplusBSplineFFD2D.h
+            algorithm/FreeFormDeformation/gtplusBSplineFFD3D.h 
+            algorithm/FreeFormDeformation/gtplusBSplineFFD4D.h )
+
+        set( algorithm_src_files )
+
+        set( solver_header_files 
+            solver/gtPlusSolver.h 
+            solver/gtPlusLinearSolver.h 
+            solver/gtPlusNonLinearSolver.h
+            solver/gtPlusLSQRSolver.h 
+            solver/gtPlusNCGSolver.h )
+
+        set( solver_src_files )
+
+        set( application_header_files )
+
+        set( application_src_files )
+
+    endif (MKL_FOUND OR ARMADILLO_FOUND)
 
     # matlab
     if (MATLAB_FOUND)
         message("MATLAB FOUND: ${MATLAB_INCLUDE_DIR}, Matlab gt interface is being compiled.")
         SET(CMAKE_DEBUG_POSTFIX)
         include_directories( ${MATLAB_INCLUDE_DIR} )
-        set( matlab_files matlab/gtMatlabConverter.h
-                          matlab/gtMatlabConverterComplex.h )
+        set( matlab_files matlab/gtMatlab.h 
+                          matlab/gtMatlabConverter.h
+                          matlab/gtMatlabConverterComplex.h 
+                          matlab/gtMatlabImage.h )
 
     else(MATLAB_FOUND)
         message("MATLAB NOT FOUND ...")
         set( matlab_files )
     endif(MATLAB_FOUND)
 
-    add_library(gtplus ${LIBTYPE} GtPlusExport.h ${util_files} ${workflow_files} ${algorithm_files} ${solver_files} ${config_files} ${schema_files} ${matlab_files})
-
-    source_group(util FILES ${util_files})
-    source_group(workflow FILES ${workflow_files})
-
-    set( config_gtCloud_files ${config_files} ${gtCloud_files} )
-    source_group(config FILES ${config_gtCloud_files})
-
-    source_group(schema FILES ${schema_files})
-    source_group(algorithm FILES ${algorithm_files})
-    source_group(solver FILES ${solver_files})
-
-    if (MATLAB_FOUND)
-        source_group(matlab FILES ${matlab_files})
-    endif(MATLAB_FOUND)
-
-    target_link_libraries(gtplus cpucore cpucore_math ${MKL_LIBRARIES} ${EXTRA_MKL_LIBRARIES})
-
-    if (CUDA_FOUND)
-        target_link_libraries(gtplus gpuparallelmri)
-    endif (CUDA_FOUND)
-
-    install(TARGETS gtplus DESTINATION lib)
+    set(gtplus_files GtPlusExport.h 
+                GtPlusDefinition.h 
+                ${util_header_files} 
+                ${util_src_files} )
+
+    source_group(util FILES ${util_header_files} ${util_src_files})
+
+    if (MKL_FOUND OR ARMADILLO_FOUND)
+        set(gtplus_files ${gtplus_files} 
+                ${workflow_header_files} 
+                ${workflow_src_files} 
+                ${algorithm_header_files} 
+                ${algorithm_src_files} 
+                ${solver_header_files} 
+                ${solver_src_files} 
+                ${application_header_files} 
+                ${application_src_files} 
+                ${matlab_files} )
+
+        source_group(workflow FILES     ${workflow_header_files}    ${workflow_src_files})
+        source_group(algorithm FILES    ${algorithm_header_files}   ${algorithm_src_files})
+        source_group(solver FILES       ${solver_header_files}      ${solver_src_files})
+        source_group(application FILES  ${application_header_files} ${application_src_files})
+
+        if (MATLAB_FOUND)
+            source_group(matlab FILES ${matlab_files})
+        endif(MATLAB_FOUND)
+    endif (MKL_FOUND OR ARMADILLO_FOUND)
+
+    add_library(gadgetron_toolbox_gtplus_io ${LIBTYPE} ${gtplus_io_header_files} ${gtplus_io_src_files} )
+    set_target_properties(gadgetron_toolbox_gtplus_io PROPERTIES VERSION ${GADGETRON_VERSION_STRING} SOVERSION ${GADGETRON_SOVERSION})
+
+    add_library(gadgetron_toolbox_gtplus ${LIBTYPE} 
+                ${gtplus_files} )
+
+    set_target_properties(gadgetron_toolbox_gtplus PROPERTIES VERSION ${GADGETRON_VERSION_STRING} SOVERSION ${GADGETRON_SOVERSION})
+
+    target_link_libraries(gadgetron_toolbox_gtplus 
+                          gadgetron_toolbox_cpucore 
+                          gadgetron_toolbox_cpucore_math 
+                          gadgetron_toolbox_cpufft
+                          gadgetron_toolbox_gtplus_io 
+                          gadgetron_toolbox_cpucore_math )
+
+    install(TARGETS gadgetron_toolbox_gtplus DESTINATION lib COMPONENT main)
+    install(TARGETS gadgetron_toolbox_gtplus_io DESTINATION lib COMPONENT main)
 
     # install gtplus files
-    install (FILES  GtPlusExport.h
-                    util/gtPlusIOBase.h 
-                    util/gtPlusIOAnalyze.h 
-                    util/hoNDArrayMemoryManaged.h 
-                    util/hoNDArrayMemoryManaged.hxx 
-                    util/gtPlusMemoryManager.h 
-                    workflow/gtPlusISMRMRDReconWorkFlow.h
-                    workflow/gtPlusISMRMRDReconWorkFlowCartesian.h
-                    workflow/gtPlusISMRMRDReconWorkFlowCartesian2DT.h
-                    workflow/gtPlusISMRMRDReconWorkFlowCartesian3DT.h
-                    workflow/gtPlusISMRMRDReconUtil.h
-                    workflow/gtPlusISMRMRDReconUtil.hxx
-                    workflow/gtPlusISMRMRDReconWorkOrder.h
-                    workflow/gtPlusISMRMRDReconWorker.h
-                    workflow/gtPlusISMRMRDReconWorker2DT.h
-                    workflow/gtPlusISMRMRDReconWorker3DT.h
-                    workflow/gtPlusISMRMRDReconWorker2DTGRAPPA.h
-                    workflow/gtPlusISMRMRDReconWorker2DTNoAcceleration.h
-                    workflow/gtPlusISMRMRDReconWorker3DTGRAPPA.h
-                    workflow/gtPlusISMRMRDReconWorker3DTNoAcceleration.h
-                    algorithm/gtPlusAlgorithmBase.h
-                    algorithm/gtPlusGRAPPA.h
-                    algorithm/gtPlusSPIRIT.h
-                    DESTINATION include)
-
-    # install gadgetron program files
-    install (FILES  ${config_files} 
-                    DESTINATION config)
-
-    install (FILES  ${gtCloud_files} 
-                    DESTINATION config/gtCloud)
-
-    install (FILES  ${schema_files} 
-                    DESTINATION schema)
-
-    if (MATLAB_FOUND)
-        add_subdirectory(matlab)
-    endif(MATLAB_FOUND)
-
-    if (GTEST_FOUND)
+    install (FILES  
+            GtPlusExport.h 
+            GtPlusDefinition.h 
+            ${util_header_files} 
+            ${workflow_header_files} 
+            ${algorithm_header_files} 
+            ${solver_header_files} 
+            ${application_header_files} 
+            ${gtplus_io_header_files} 
+            ${matlab_files}
+            DESTINATION include COMPONENT main)
+
+    # This one depends on the gadgets being compiled
+    if (ACE_FOUND AND ISMRMRD_FOUND AND FFTW3_FOUND)
+      if (GTEST_FOUND)
         add_subdirectory(ut)
-    endif (GTEST_FOUND)
+      endif (GTEST_FOUND)
+    endif (ACE_FOUND AND ISMRMRD_FOUND AND FFTW3_FOUND)
 
 endif ( HAS_64_BIT )
diff --git a/toolboxes/gtplus/GtPlusDefinition.h b/toolboxes/gtplus/GtPlusDefinition.h
new file mode 100644
index 0000000..301e9db
--- /dev/null
+++ b/toolboxes/gtplus/GtPlusDefinition.h
@@ -0,0 +1,222 @@
+/** \file   GtPlusDefinition.h
+    \brief  Define the symbols for GtPlus toolbox
+            The ISMRMRD format is fully supported in this toolbox.
+    \author Hui Xue
+*/
+
+#pragma once
+
+#include "GtPlusExport.h"
+
+#include "ismrmrd/ismrmrd.h"
+
+namespace Gadgetron
+{
+    namespace gtPlus
+    {
+        // define the dimensions of ISMRMRD
+        enum ISMRMRDDIM
+        {
+            DIM_ReadOut = 32,
+            DIM_Encoding1,
+            DIM_Channel,
+            DIM_Slice,
+            DIM_Encoding2,
+            DIM_Contrast,
+            DIM_Phase,
+            DIM_Repetition,
+            DIM_Set,
+            DIM_Segment,
+            DIM_Average,
+            DIM_other1,
+            DIM_other2,
+            DIM_other3,
+            DIM_NONE
+        };
+
+        // define the reconstruction algorithms
+        enum ISMRMRDALGO
+        {
+            ISMRMRD_GRAPPA = 64,
+            ISMRMRD_SENSE,
+            ISMRMRD_SPIRIT,
+            ISMRMRD_L1SPIRIT,
+            ISMRMRD_SOFTSENSE,
+            ISMRMRD_L1SOFTSENSE,
+            ISMRMRD_2DTBINNING,
+            ISMRMRD_2DTBINNING_FLOW,
+            ISMRMRD_L1SPIRIT_SLEP,
+            ISMRMRD_L1SPIRIT_SLEP_MOTION_COMP,
+            ISMRMRD_NONE
+        };
+
+        // define the coil sensitivity map estimation algorithms
+        enum ISMRMRDCOILMAPALGO
+        {
+            ISMRMRD_SOUHEIL = 96,
+            ISMRMRD_SOUHEIL_ITER
+        };
+
+        // define the partial fourier/asymmetric echo handling algorithms
+        enum ISMRMRDPFALGO
+        {
+            ISMRMRD_PF_HOMODYNE = 128,          // iterative homodyne
+            ISMRMRD_PF_POCS,                    // POCS
+            ISMRMRD_PF_FENGHUANG,               // convolution based method
+            ISMRMRD_PF_ZEROFILLING_FILTER,      // zero-filling with partial fourier filter
+            ISMRMRD_PF_ZEROFILLING,             // zero-filling without partial fourier filter
+            ISMRMRD_PF_NONE
+        };
+
+        // define the kspace filter type
+        enum ISMRMRDKSPACEFILTER
+        {
+            ISMRMRD_FILTER_GAUSSIAN = 160,
+            ISMRMRD_FILTER_HANNING,
+            ISMRMRD_FILTER_TUKEY,
+            ISMRMRD_FILTER_TAPERED_HANNING,
+            ISMRMRD_FILTER_NONE
+        };
+
+        // define the calibration mode of ISMRMRD
+        enum ISMRMRDCALIBMODE
+        {
+            ISMRMRD_embedded = 256,
+            ISMRMRD_interleaved,
+            ISMRMRD_separate,
+            ISMRMRD_external,
+            ISMRMRD_other,
+            ISMRMRD_noacceleration
+        };
+
+        // define the interpolation method
+        enum ISMRMRDINTERP
+        {
+            ISMRMRD_INTERP_LINEAR = 512,
+            ISMRMRD_INTERP_SPLINE,
+            ISMRMRD_INTERP_BSPLINE
+        };
+
+        // define the interpolation method for retro-gating
+        enum ISMRMRDINTERPRETROGATING
+        {
+            ISMRMRD_INTERP_RETRO_GATING_LINEAR = 600,
+            ISMRMRD_INTERP_RETRO_GATING_CUBIC, 
+            ISMRMRD_INTERP_RETRO_GATING_BSPLINE
+        };
+
+        /// defination of image meta attributes
+        /// user can set these attributes to record some properties of generated imaging results
+        /// how to interpret these attributes depends on the client side
+        #define GTPLUS_IMAGENUMBER                          "GT_ImageNumber"
+        #define GTPLUS_IMAGECOMMENT                         "GT_ImageComment"
+        #define GTPLUS_IMAGEPROCESSINGHISTORY               "GT_ImageProcessingHistory"
+        #define GTPLUS_IMAGE_CATEGORY                       "GT_ImageCategory"
+        #define GTPLUS_SEQUENCEDESCRIPTION                  "GT_SeqDescription"
+        #define GTPLUS_IMAGE_WINDOWCENTER                   "GT_WindowCenter"
+        #define GTPLUS_IMAGE_WINDOWWIDTH                    "GT_WindowWidth"
+        #define GTPLUS_IMAGE_SCALE_RATIO                    "GT_ScaleRatio"
+        #define GTPLUS_IMAGE_SCALE_OFFSET                   "GT_ScaleOffset"
+        #define GTPLUS_IMAGE_COLORMAP                       "GT_ColorMap"
+        #define GTPLUS_IMAGE_ECHOTIME                       "GT_TE"
+        #define GTPLUS_IMAGE_INVERSIONTIME                  "GT_TI"
+
+        /// role of image data
+        #define GTPLUS_DATA_ROLE                            "GT_DataRole"
+        #define GTPLUS_IMAGE_REGULAR                        "GT_Image"
+        #define GTPLUS_IMAGE_RETRO                          "GT_ImageRetro"
+        #define GTPLUS_IMAGE_MOCORECON                      "GT_ImageMoCo"
+        #define GTPLUS_IMAGE_GFACTOR                        "GT_Gfactor"
+        #define GTPLUS_IMAGE_SNR_MAP                        "GT_SNR_MAP"
+        #define GTPLUS_IMAGE_STD_MAP                        "GT_STD_MAP"
+        #define GTPLUS_IMAGE_WRAPAROUNDMAP                  "GT_WrapAround_MAP"
+        #define GTPLUS_IMAGE_PHASE                          "GT_Phase"
+        #define GTPLUS_IMAGE_INTENSITY_UNCHANGED            "GT_Image_Intensity_Unchanged"
+        #define GTPLUS_IMAGE_AIF                            "GT_AIF"
+        // other images than the regular reconstruction results
+        #define GTPLUS_IMAGE_OTHER                          "GT_Image_Other"
+        // other data roles
+        #define GTPLUS_IMAGE_T2W                            "T2W"
+        #define GTPLUS_IMAGE_PD                             "PD"
+        #define GTPLUS_IMAGE_MAGIR                          "MAGIR"
+        #define GTPLUS_IMAGE_PSIR                           "PSIR"
+
+        #define GTPLUS_IMAGE_T1MAP                          "T1"
+        #define GTPLUS_IMAGE_T1SDMAP                        "T1SD"
+        #define GTPLUS_IMAGE_T2MAP                          "T2"
+        #define GTPLUS_IMAGE_T2SDMAP                        "T2SD"
+        #define GTPLUS_IMAGE_T2STARMAP                      "T2STAR"
+        #define GTPLUS_IMAGE_T2STARMASKMAP                  "T2SMASKMAP"
+        #define GTPLUS_IMAGE_T2STARSDMAP                    "T2STARSD"
+        #define GTPLUS_IMAGE_T2STARAMAP                     "T2STARAMAP"
+        #define GTPLUS_IMAGE_T2STARTRUNCMAP                 "T2STARTRUNCMAP"
+
+        #define GTPLUS_IMAGE_FAT                            "FAT"
+        #define GTPLUS_IMAGE_WATER                          "WATER"
+        #define GTPLUS_IMAGE_FREQMAP                        "FREQMAP"
+        #define GTPLUS_IMAGE_B1MAP                          "B1MAP"
+        #define GTPLUS_IMAGE_FLIPANGLEMAP                   "FLIPANGLEMAP"
+
+	//MSH: Interventional MRI (Interactive Real Time, IRT)
+        #define GTPLUS_IMAGE_IRT_IMAGE                      "IRT_IMAGE"
+        #define GTPLUS_IMAGE_IRT_DEVICE                     "IRT_DEVICE"
+        #define GTPLUS_IMAGE_NUM_DEVICE_CHA                 "IRT_NUM_DEVICE_CHA"
+        #define GTPLUS_IMAGE_CUR_DEVICE_CHA                 "IRT_CUR_DEVICE_CHA"
+
+        /// data flow tag
+        /// if this flag is set to be 1 for a image, the image is immediately passed to the next gadget
+        /// if this flag is 0, this image is a stored image by the accummulator
+        /// whether to pass a stored image to the next gadget is determined by the processing gadget itself
+        #define GTPLUS_PASS_IMMEDIATE                       "GT_PASSIMAGE_IMMEDIATE"
+
+        /// data processing tag, used with ImageProcessingHistory
+        #define GTPLUS_IMAGE_SURFACECOILCORRECTION           "NORM"
+        #define GTPLUS_IMAGE_FILTER                          "FIL"
+        #define GTPLUS_IMAGE_MOCO                            "MOCO"
+        #define GTPLUS_IMAGE_AVE                             "AVE"
+
+        /// dimension string
+        #define GTPLUS_RO                                    "RO"
+        #define GTPLUS_E1                                    "E1"
+        #define GTPLUS_CHA                                   "CHA"
+        #define GTPLUS_SLC                                   "SLC"
+        #define GTPLUS_E2                                    "E2"
+        #define GTPLUS_CONTRAST                              "CON"
+        #define GTPLUS_PHASE                                 "PHS"
+        #define GTPLUS_REP                                   "REP"
+        #define GTPLUS_SET                                   "SET"
+        #define GTPLUS_SEGMENT                               "SEG"
+        #define GTPLUS_AVERAGE                               "AVE"
+        #define GTPLUS_OTHER1                                "OTH1"
+        #define GTPLUS_OTHER2                                "OTH2"
+        #define GTPLUS_OTHER3                                "OTH3"
+        #define GTPLUS_NONE                                  "NONE"
+
+        /// ISMRMRD Image fields
+        #define ISMRMRD_IMAGE_version                       "ISMRMRD_IMAGE_version"
+        #define ISMRMRD_IMAGE_flags                         "ISMRMRD_IMAGE_flags"
+        #define ISMRMRD_IMAGE_measurement_uid               "ISMRMRD_IMAGE_measurement_uid"
+        #define ISMRMRD_IMAGE_matrix_size                   "ISMRMRD_IMAGE_matrix_size"
+        #define ISMRMRD_IMAGE_field_of_view                 "ISMRMRD_IMAGE_field_of_view"
+        #define ISMRMRD_IMAGE_channels                      "ISMRMRD_IMAGE_channels"
+        #define ISMRMRD_IMAGE_position                      "ISMRMRD_IMAGE_position"
+        #define ISMRMRD_IMAGE_read_dir                      "ISMRMRD_IMAGE_read_dir"
+        #define ISMRMRD_IMAGE_phase_dir                     "ISMRMRD_IMAGE_phase_dir"
+        #define ISMRMRD_IMAGE_slice_dir                     "ISMRMRD_IMAGE_slice_dir"
+        #define ISMRMRD_IMAGE_patient_table_position        "ISMRMRD_IMAGE_patient_table_position"
+        #define ISMRMRD_IMAGE_average                       "ISMRMRD_IMAGE_average"
+        #define ISMRMRD_IMAGE_slice                         "ISMRMRD_IMAGE_slice"
+        #define ISMRMRD_IMAGE_contrast                      "ISMRMRD_IMAGE_contrast"
+        #define ISMRMRD_IMAGE_phase                         "ISMRMRD_IMAGE_phase"
+        #define ISMRMRD_IMAGE_repetition                    "ISMRMRD_IMAGE_repetition"
+        #define ISMRMRD_IMAGE_set                           "ISMRMRD_IMAGE_set"
+        #define ISMRMRD_IMAGE_acquisition_time_stamp        "ISMRMRD_IMAGE_acquisition_time_stamp"
+        #define ISMRMRD_IMAGE_physiology_time_stamp         "ISMRMRD_IMAGE_physiology_time_stamp"
+        #define ISMRMRD_IMAGE_image_data_type               "ISMRMRD_IMAGE_image_data_type"
+        #define ISMRMRD_IMAGE_image_type                    "ISMRMRD_IMAGE_image_type"
+        #define ISMRMRD_IMAGE_image_index                   "ISMRMRD_IMAGE_image_index"
+        #define ISMRMRD_IMAGE_image_series_index            "ISMRMRD_IMAGE_image_series_index"
+        #define ISMRMRD_IMAGE_user_int                      "ISMRMRD_IMAGE_user_int"
+        #define ISMRMRD_IMAGE_user_float                    "ISMRMRD_IMAGE_user_float"
+    }
+}
diff --git a/toolboxes/gtplus/GtPlusIOExport.h b/toolboxes/gtplus/GtPlusIOExport.h
new file mode 100644
index 0000000..a771a25
--- /dev/null
+++ b/toolboxes/gtplus/GtPlusIOExport.h
@@ -0,0 +1,20 @@
+/** \file       GtPlusIOExport.h
+    \brief      Implement export/import for GtPlus toolbox
+    \author     Hui Xue
+*/
+
+#pragma once
+
+#if defined (WIN32)
+    #ifdef BUILD_TOOLBOX_STATIC
+        #define EXPORTGTPLUSIO 
+    #else
+        #if defined (__BUILD_GADGETRON_PLUS__) || defined (gtplus_io_EXPORTS)
+            #define EXPORTGTPLUSIO __declspec(dllexport)
+        #else
+            #define EXPORTGTPLUSIO __declspec(dllimport)
+        #endif
+    #endif
+#else
+    #define EXPORTGTPLUSIO
+#endif
diff --git a/toolboxes/gtplus/algorithm/FreeFormDeformation/gtplusBSplineFFD.h b/toolboxes/gtplus/algorithm/FreeFormDeformation/gtplusBSplineFFD.h
new file mode 100644
index 0000000..aec7fc1
--- /dev/null
+++ b/toolboxes/gtplus/algorithm/FreeFormDeformation/gtplusBSplineFFD.h
@@ -0,0 +1,820 @@
+/** \file       gtplusBSplineFFD.h
+    \brief      Class for gtPlus BSpline FreeFormDeformation
+    \author     Hui Xue
+*/
+
+#pragma once
+
+#include "gtplusFFDBase.h"
+
+namespace Gadgetron { namespace gtPlus {
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut>
+class gtplusBSplineFFD : public gtplusFFDBase<T, CoordType, DIn, DOut>
+{
+public:
+
+    typedef gtplusFFDBase<T, CoordType, DIn, DOut> BaseClass;
+    typedef gtplusBSplineFFD<T, CoordType, DIn, DOut> Self;
+
+    typedef typename BaseClass::real_value_type real_value_type;
+    typedef real_value_type bspline_float_type;
+
+    typedef typename BaseClass::coord_type coord_type;
+
+    using BaseClass::D;
+    enum { BSPLINELUTSIZE = 1000 };
+    enum { BSPLINEPADDINGSIZE = 4 };
+
+    typedef real_value_type LUTType[BSPLINELUTSIZE][BSPLINEPADDINGSIZE];
+
+    typedef typename BaseClass::CoordArrayType      CoordArrayType;
+    typedef typename BaseClass::ValueArrayType      ValueArrayType;
+    typedef typename BaseClass::ArrayType           ArrayType;
+    typedef typename BaseClass::FFDCtrlPtGridType   FFDCtrlPtGridType;
+    typedef typename BaseClass::PointType           PointType;
+    typedef typename BaseClass::ImageType           ImageType;
+    typedef typename BaseClass::MaskArrayType       MaskArrayType;
+
+    gtplusBSplineFFD();
+    virtual ~gtplusBSplineFFD();
+
+    /// evaluate the FFD at a grid location
+    virtual bool evaluateFFD(const CoordType pt[D], T r[DOut]) const = 0;
+
+    /// evaluate the 1st order derivative of FFD at a grid location
+    virtual bool evaluateFFDDerivative(const CoordType pt[D], T deriv[D][DOut]) const = 0;
+
+    /// evaluate the 2nd order derivative of FFD at a grid location
+    /// dderiv : D*D vector, stores dxx dxy dxz ...; dyx dyy dyz ...; dzx dzy dzz ...
+    virtual bool evaluateFFDSecondOrderDerivative(const CoordType pt[D], T dderiv[D*D][DOut]) const = 0;
+
+    /// compute the FFD approximation once
+    virtual bool ffdApprox(const CoordArrayType& pos, ValueArrayType& value, ValueArrayType& residual, real_value_type& totalResidual, size_t N) = 0;
+
+    /// although BSpline grid has the padding, every index is defined on the unpadded grid
+
+    /// get the size of control point arrays
+    virtual size_t get_size(size_t dimension) const { return ctrl_pt_[0].get_size(dimension)-2*BSPLINEPADDINGSIZE; }
+    virtual std::vector<size_t> get_dimensions() const
+    {
+        std::vector<size_t> dim;
+        ctrl_pt_[0].get_dimensions(dim);
+
+        unsigned int d;
+        for ( d=0; d<DIn; d++ )
+        {
+            dim[d] -= 2*BSPLINEPADDINGSIZE;
+        }
+
+        return dim;
+    }
+
+    /// get the spacing of of control point arrays
+    virtual coord_type get_spacing(size_t dimension) const { return ctrl_pt_[0].get_pixel_size(dimension); }
+    virtual void get_spacing(std::vector<coord_type>& spacing) const { ctrl_pt_[0].get_pixel_size(spacing); }
+
+    /// get/set a control point value
+    virtual T get(size_t x, size_t y, size_t d) const { return ctrl_pt_[d](x+BSPLINEPADDINGSIZE, y+BSPLINEPADDINGSIZE); }
+    virtual void set(size_t x, size_t y, size_t d, T v) { ctrl_pt_[d](x+BSPLINEPADDINGSIZE, y+BSPLINEPADDINGSIZE) = v; }
+
+    virtual T get(size_t x, size_t y, size_t z, size_t d) const { return ctrl_pt_[d](x+BSPLINEPADDINGSIZE, y+BSPLINEPADDINGSIZE, z+BSPLINEPADDINGSIZE); }
+    virtual void set(size_t x, size_t y, size_t z, size_t d, T v) { ctrl_pt_[d](x+BSPLINEPADDINGSIZE, y+BSPLINEPADDINGSIZE, z+BSPLINEPADDINGSIZE) = v; }
+
+    virtual T get(size_t x, size_t y, size_t z, size_t s, size_t d) const { return ctrl_pt_[d](x+BSPLINEPADDINGSIZE, y+BSPLINEPADDINGSIZE, z+BSPLINEPADDINGSIZE, s+BSPLINEPADDINGSIZE); }
+    virtual void set(size_t x, size_t y, size_t z, size_t s, size_t d, T v) { ctrl_pt_[d](x+BSPLINEPADDINGSIZE, y+BSPLINEPADDINGSIZE, z+BSPLINEPADDINGSIZE, s+BSPLINEPADDINGSIZE) = v; }
+
+    /// offset to/from indexes for control points
+    virtual size_t calculate_offset(size_t x, size_t y) const { return ctrl_pt_[0].calculate_offset(x+BSPLINEPADDINGSIZE, y+BSPLINEPADDINGSIZE); }
+
+    virtual void calculate_index( size_t offset, size_t& x, size_t& y ) const
+    {
+        ctrl_pt_[0].calculate_index(offset, x, y);
+        x -= BSPLINEPADDINGSIZE;
+        y -= BSPLINEPADDINGSIZE;
+    }
+
+    virtual size_t calculate_offset(size_t x, size_t y, size_t z) const { return ctrl_pt_[0].calculate_offset(x+BSPLINEPADDINGSIZE, y+BSPLINEPADDINGSIZE, z+BSPLINEPADDINGSIZE); }
+    virtual void calculate_index( size_t offset, size_t& x, size_t& y, size_t& z ) const
+    {
+        ctrl_pt_[0].calculate_index(offset, x, y, z);
+        x -= BSPLINEPADDINGSIZE;
+        y -= BSPLINEPADDINGSIZE;
+        z -= BSPLINEPADDINGSIZE;
+    }
+
+    virtual size_t calculate_offset(size_t x, size_t y, size_t z, size_t s) const { return ctrl_pt_[0].calculate_offset(x+BSPLINEPADDINGSIZE, y+BSPLINEPADDINGSIZE, z+BSPLINEPADDINGSIZE, s+BSPLINEPADDINGSIZE); }
+    virtual void calculate_index( size_t offset, size_t& x, size_t& y, size_t& z, size_t& s ) const
+    {
+        ctrl_pt_[0].calculate_index(offset, x, y, z, s);
+        x -= BSPLINEPADDINGSIZE;
+        y -= BSPLINEPADDINGSIZE;
+        z -= BSPLINEPADDINGSIZE;
+        s -= BSPLINEPADDINGSIZE;
+    }
+
+    /// compute the control point location in world coordinates
+    virtual void get_location(size_t x, size_t y, CoordType& sx, CoordType& sy) const { ctrl_pt_[0].image_to_world(x+BSPLINEPADDINGSIZE, y+BSPLINEPADDINGSIZE, sx, sy); }
+    virtual void get_location(size_t x, size_t y, size_t z, CoordType& sx, CoordType& sy, CoordType& sz) const { ctrl_pt_[0].image_to_world(x+BSPLINEPADDINGSIZE, y+BSPLINEPADDINGSIZE, z+BSPLINEPADDINGSIZE, sx, sy, sz); }
+    virtual void get_location(size_t x, size_t y, size_t z, size_t s, CoordType& sx, CoordType& sy, CoordType& sz, CoordType& ss) const { ctrl_pt_[0].image_to_world(x+BSPLINEPADDINGSIZE, y+BSPLINEPADDINGSIZE, z+BSPLINEPADDINGSIZE, s+BSPLINEPADDINGSIZE, sx, sy, sz, ss); }
+
+    /// convert a world coordinate point to FFD grid location
+    virtual bool world_to_grid(const CoordType pt_w[D], CoordType pt_g[D]) const;
+    virtual bool world_to_grid(CoordType px_w, CoordType py_w, CoordType& px_g, CoordType& py_g) const;
+    virtual bool world_to_grid(CoordType px_w, CoordType py_w, CoordType pz_w, CoordType& px_g, CoordType& py_g, CoordType& pz_g) const;
+    virtual bool world_to_grid(CoordType px_w, CoordType py_w, CoordType pz_w, CoordType ps_w, CoordType& px_g, CoordType& py_g, CoordType& pz_g, CoordType& ps_g) const;
+
+    virtual bool grid_to_world(const CoordType pt_g[D], CoordType pt_w[D]) const;
+    virtual bool grid_to_world(CoordType px_g, CoordType py_g, CoordType& px_w, CoordType& py_w) const;
+    virtual bool grid_to_world(CoordType px_g, CoordType py_g, CoordType pz_g, CoordType& px_w, CoordType& py_w, CoordType& pz_w) const;
+    virtual bool grid_to_world(CoordType px_g, CoordType py_g, CoordType pz_g, CoordType ps_g, CoordType& px_w, CoordType& py_w, CoordType& pz_w, CoordType& ps_w) const;
+
+    /// print info
+    virtual void print(std::ostream& os) const;
+
+    /// compute four BSpline basis functions
+    static bspline_float_type BSpline0(bspline_float_type t)
+    {
+        return (1-t)*(1-t)*(1-t)/(bspline_float_type)6.0;
+    }
+
+    static bspline_float_type BSpline1(bspline_float_type t)
+    {
+        return (3*t*t*t - 6*t*t + 4)/(bspline_float_type)6.0;
+    }
+
+    static bspline_float_type BSpline2(bspline_float_type t)
+    {
+        return (-3*t*t*t + 3*t*t + 3*t + 1)/(bspline_float_type)6.0;
+    }
+
+    static bspline_float_type BSpline3(bspline_float_type t)
+    {
+        return (t*t*t)/(bspline_float_type)6.0;
+    }
+
+    static bspline_float_type BSpline(size_t ind, bspline_float_type t)
+    {
+        switch (ind)
+        {
+        case 0:
+            return BSpline0(t);
+        case 1:
+            return BSpline1(t);
+        case 2:
+            return BSpline2(t);
+        case 3:
+            return BSpline3(t);
+        }
+
+        return 0;
+    }
+
+    /// compute 1st order derivatives of four BSpline basis functions
+    static bspline_float_type BSpline0FirstOrderDeriv(bspline_float_type t)
+    {
+        return -(1-t)*(1-t)/(bspline_float_type)2.0;
+    }
+
+    static bspline_float_type BSpline1FirstOrderDeriv(bspline_float_type t)
+    {
+        return (9*t*t - 12*t)/(bspline_float_type)6.0;
+    }
+
+    static bspline_float_type BSpline2FirstOrderDeriv(bspline_float_type t)
+    {
+        return (-9*t*t + 6*t + 3)/(bspline_float_type)6.0;
+    }
+
+    static bspline_float_type BSpline3FirstOrderDeriv(bspline_float_type t)
+    {
+        return (t*t)/(bspline_float_type)2.0;
+    }
+
+    static bspline_float_type BSplineFirstOrderDeriv(size_t ind, bspline_float_type t)
+    {
+        switch (ind)
+        {
+        case 0:
+            return BSpline0FirstOrderDeriv(t);
+        case 1:
+            return BSpline1FirstOrderDeriv(t);
+        case 2:
+            return BSpline2FirstOrderDeriv(t);
+        case 3:
+            return BSpline3FirstOrderDeriv(t);
+        }
+
+        return 0;
+    }
+
+    /// compute 2nd order derivatives of four BSpline basis functions
+    static bspline_float_type BSpline0SecondOrderDeriv(bspline_float_type t)
+    {
+        return 1 - t;
+    }
+
+    static bspline_float_type BSpline1SecondOrderDeriv(bspline_float_type t)
+    {
+        return 3*t - 2;
+    }
+
+    static bspline_float_type BSpline2SecondOrderDeriv(bspline_float_type t)
+    {
+        return -3*t + 1;
+    }
+
+    static bspline_float_type BSpline3SecondOrderDeriv(bspline_float_type t)
+    {
+        return t;
+    }
+
+    static bspline_float_type BSplineSecondOrderDeriv(size_t ind, bspline_float_type t)
+    {
+        switch (ind)
+        {
+        case 0:
+            return BSpline0SecondOrderDeriv(t);
+        case 1:
+            return BSpline1SecondOrderDeriv(t);
+        case 2:
+            return BSpline2SecondOrderDeriv(t);
+        case 3:
+            return BSpline3SecondOrderDeriv(t);
+        }
+
+        return 0;
+    }
+
+    using BaseClass::performTiming_;
+    using BaseClass::debugFolder_;
+
+protected:
+
+    using BaseClass::ctrl_pt_;
+    using BaseClass::gt_timer1_;
+    using BaseClass::gt_timer2_;
+    using BaseClass::gt_timer3_;
+    using BaseClass::gt_exporter_;
+    using BaseClass::gtPlus_util_;
+    using BaseClass::gtPlus_util_complex_;
+
+    /// load the look up table for BSpline functions
+    virtual bool loadLookUpTable();
+
+    /// initialize the FFD
+    /// define the FFD over a region
+    bool initializeBFFD(const PointType& start, const PointType& end, CoordType gridCtrlPtSpacing[DIn]);
+    bool initializeBFFD(const PointType& start, const PointType& end, size_t gridCtrlPtNum[DIn]);
+    /// define the FFD over the region covered by an image
+    bool initializeBFFD(const ImageType& im, const PointType& start, const PointType& end, CoordType gridCtrlPtSpacing[DIn]);
+    bool initializeBFFD(const ImageType& im, const PointType& start, const PointType& end, size_t gridCtrlPtNum[DIn]);
+
+    /// 2D
+    bool initializeBFFD(const PointType& start, const PointType& end, CoordType dx, CoordType dy);
+    bool initializeBFFD(const PointType& start, const PointType& end, size_t sx, size_t sy);
+    bool initializeBFFD(const ImageType& im, const PointType& start, const PointType& end, CoordType dx, CoordType dy);
+    bool initializeBFFD(const ImageType& im, const PointType& start, const PointType& end, size_t sx, size_t sy);
+
+    /// 3D
+    bool initializeBFFD(const PointType& start, const PointType& end, CoordType dx, CoordType dy, CoordType dz);
+    bool initializeBFFD(const PointType& start, const PointType& end, size_t sx, size_t sy, size_t sz);
+    bool initializeBFFD(const ImageType& im, const PointType& start, const PointType& end, CoordType dx, CoordType dy, CoordType dz);
+    bool initializeBFFD(const ImageType& im, const PointType& start, const PointType& end, size_t sx, size_t sy, size_t sz);
+
+    /// 4D
+    bool initializeBFFD(const PointType& start, const PointType& end, CoordType dx, CoordType dy, CoordType dz, CoordType ds);
+    bool initializeBFFD(const PointType& start, const PointType& end, size_t sx, size_t sy, size_t sz, size_t ss);
+    bool initializeBFFD(const ImageType& im, const PointType& start, const PointType& end, CoordType dx, CoordType dy, CoordType dz, CoordType ds);
+    bool initializeBFFD(const ImageType& im, const PointType& start, const PointType& end, size_t sx, size_t sy, size_t sz, size_t ss);
+
+    /// look up table for BSpline and its first and second order derivatives
+    LUTType LUT_;
+    LUTType LUT1_;
+    LUTType LUT2_;
+};
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+gtplusBSplineFFD<T, CoordType, DIn, DOut>::gtplusBSplineFFD()
+{
+    this->loadLookUpTable();
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+gtplusBSplineFFD<T, CoordType, DIn, DOut>::~gtplusBSplineFFD()
+{
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+bool gtplusBSplineFFD<T, CoordType, DIn, DOut>::loadLookUpTable()
+{
+    try
+    {
+        long long ii;
+        double gapInLUT = (double)(BSPLINELUTSIZE-1);
+
+        #pragma omp parallel for default(none) private(ii) shared(gapInLUT)
+        for (ii=0; ii<(long long)BSPLINELUTSIZE; ii++)
+        {
+            bspline_float_type g = (bspline_float_type)(ii/gapInLUT);
+
+            LUT_[ii][0]   = BSpline0(g);
+            LUT_[ii][1]   = BSpline1(g);
+            LUT_[ii][2]   = BSpline2(g);
+            LUT_[ii][3]   = BSpline3(g);
+
+            LUT1_[ii][0]  = BSpline0FirstOrderDeriv(g);
+            LUT1_[ii][1]  = BSpline1FirstOrderDeriv(g);
+            LUT1_[ii][2]  = BSpline2FirstOrderDeriv(g);
+            LUT1_[ii][3]  = BSpline3FirstOrderDeriv(g);
+
+            LUT2_[ii][0]  = BSpline0SecondOrderDeriv(g);
+            LUT2_[ii][1]  = BSpline1SecondOrderDeriv(g);
+            LUT2_[ii][2]  = BSpline2SecondOrderDeriv(g);
+            LUT2_[ii][3]  = BSpline3SecondOrderDeriv(g);
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Errors happened in gtplusBSplineFFD<T, CoordType, DIn, DOut>::loadLookUpTable() ...");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusBSplineFFD<T, CoordType, DIn, DOut>::world_to_grid(const CoordType pt_w[D], CoordType pt_g[D]) const
+{
+    try
+    {
+        this->ctrl_pt_[0].world_to_image(pt_w, pt_g);
+        unsigned int d;
+        for ( d=0; d<D; d++ )
+        {
+            pt_g[d] -= BSPLINEPADDINGSIZE;
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Errors happened in world_to_grid(const CoordType pt_w[D], CoordType pt_g[D]) const ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusBSplineFFD<T, CoordType, DIn, DOut>::world_to_grid(CoordType px_w, CoordType py_w, CoordType& px_g, CoordType& py_g) const
+{
+    GADGET_CHECK_RETURN_FALSE(DIn==2);
+
+    try
+    {
+        this->ctrl_pt_[0].world_to_image(px_w, py_w, px_g, py_g);
+        px_g -= BSPLINEPADDINGSIZE;
+        py_g -= BSPLINEPADDINGSIZE;
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Errors happened in world_to_grid(CoordType px_w, CoordType py_w, CoordType& px_g, CoordType& py_g) const ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusBSplineFFD<T, CoordType, DIn, DOut>::world_to_grid(CoordType px_w, CoordType py_w, CoordType pz_w, CoordType& px_g, CoordType& py_g, CoordType& pz_g) const
+{
+    GADGET_CHECK_RETURN_FALSE(DIn==3);
+
+    try
+    {
+        this->ctrl_pt_[0].world_to_image(px_w, py_w, pz_w, px_g, py_g, pz_g);
+        px_g -= BSPLINEPADDINGSIZE;
+        py_g -= BSPLINEPADDINGSIZE;
+        pz_g -= BSPLINEPADDINGSIZE;
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Errors happened in world_to_grid(CoordType px_w, CoordType py_w, CoordType pz_w, CoordType& px_g, CoordType& py_g, CoordType& pz_g) const ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusBSplineFFD<T, CoordType, DIn, DOut>::world_to_grid(CoordType px_w, CoordType py_w, CoordType pz_w, CoordType ps_w, CoordType& px_g, CoordType& py_g, CoordType& pz_g, CoordType& ps_g) const
+{
+    GADGET_CHECK_RETURN_FALSE(DIn==4);
+
+    try
+    {
+        this->ctrl_pt_[0].world_to_image(px_w, py_w, pz_w, ps_w, px_g, py_g, pz_g, ps_g);
+        px_g -= BSPLINEPADDINGSIZE;
+        py_g -= BSPLINEPADDINGSIZE;
+        pz_g -= BSPLINEPADDINGSIZE;
+        ps_g -= BSPLINEPADDINGSIZE;
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Errors happened in world_to_grid(CoordType px_w, CoordType py_w, CoordType pz_w, CoordType ps_w, CoordType& px_g, CoordType& py_g, CoordType& pz_g, CoordType& ps_g) const ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusBSplineFFD<T, CoordType, DIn, DOut>::grid_to_world(const CoordType pt_g[D], CoordType pt_w[D]) const
+{
+    try
+    {
+        CoordType pt_g_padded[D];
+        unsigned int d;
+        for ( d=0; d<D; d++ )
+        {
+            pt_g_padded[d] = pt_g[d] + BSPLINEPADDINGSIZE;
+        }
+
+        this->ctrl_pt_[0].image_to_world(pt_g_padded, pt_w);
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Errors happened in grid_to_world(const CoordType pt_g[D], CoordType pt_w[D]) const ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusBSplineFFD<T, CoordType, DIn, DOut>::grid_to_world(CoordType px_g, CoordType py_g, CoordType& px_w, CoordType& py_w) const
+{
+    GADGET_CHECK_RETURN_FALSE(DIn==2);
+
+    try
+    {
+        px_g += BSPLINEPADDINGSIZE;
+        py_g += BSPLINEPADDINGSIZE;
+        this->ctrl_pt_[0].image_to_world(px_g, py_g, px_w, py_w);
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Errors happened in grid_to_world(CoordType px_g, CoordType py_g, CoordType& px_w, CoordType& py_w) const ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusBSplineFFD<T, CoordType, DIn, DOut>::grid_to_world(CoordType px_g, CoordType py_g, CoordType pz_g, CoordType& px_w, CoordType& py_w, CoordType& pz_w) const
+{
+    GADGET_CHECK_RETURN_FALSE(DIn==3);
+
+    try
+    {
+        px_g += BSPLINEPADDINGSIZE;
+        py_g += BSPLINEPADDINGSIZE;
+        pz_g += BSPLINEPADDINGSIZE;
+        this->ctrl_pt_[0].image_to_world(px_g, py_g, pz_g, px_w, py_w, pz_w);
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Errors happened in grid_to_world(CoordType px_g, CoordType py_g, CoordType pz_g, CoordType& px_w, CoordType& py_w, CoordType& pz_w) const ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusBSplineFFD<T, CoordType, DIn, DOut>::grid_to_world(CoordType px_g, CoordType py_g, CoordType pz_g, CoordType ps_g, CoordType& px_w, CoordType& py_w, CoordType& pz_w, CoordType& ps_w) const
+{
+    GADGET_CHECK_RETURN_FALSE(DIn==4);
+
+    try
+    {
+        px_g += BSPLINEPADDINGSIZE;
+        py_g += BSPLINEPADDINGSIZE;
+        pz_g += BSPLINEPADDINGSIZE;
+        ps_g += BSPLINEPADDINGSIZE;
+        this->ctrl_pt_[0].image_to_world(px_g, py_g, pz_g, ps_g, px_w, py_w, pz_w, ps_w);
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Errors happened in grid_to_world(CoordType px_g, CoordType py_g, CoordType pz_g, CoordType ps_g, CoordType& px_w, CoordType& py_w, CoordType& pz_w, CoordType& ps_w) const ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusBSplineFFD<T, CoordType, DIn, DOut>::initializeBFFD(const PointType& start, const PointType& end, CoordType gridCtrlPtSpacing[DIn])
+{
+    try
+    {
+        unsigned int d;
+        for ( d=0; d<DIn; d++ )
+        {
+            GADGET_CHECK_RETURN_FALSE(end(d) > start(d));
+        }
+
+        std::vector<size_t> dim(DIn, 2);
+        std::vector<coord_type> pixelSize(DIn, 1);
+        std::vector<coord_type> origin(DIn, 0);
+
+        for ( d=0; d<DIn; d++ )
+        {
+            dim[d] = GT_MKINT( (end(d)-start(d))/gridCtrlPtSpacing[d] ) + 1;
+            pixelSize[d] = (end(d)-start(d))/(dim[d]-1);
+
+            /// add the padding
+            dim[d] += 2*BSPLINEPADDINGSIZE;
+
+            origin[d] = -pixelSize[d]*BSPLINEPADDINGSIZE;
+        }
+
+        for ( d=0; d<DOut; d++ )
+        {
+            this->ctrl_pt_[d].create(dim, pixelSize, origin);
+            Gadgetron::clear(this->ctrl_pt_[d]);
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in initializeBFFD(const PointType& start, const PointType& end, CoordType gridCtrlPtSpacing[DIn]) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusBSplineFFD<T, CoordType, DIn, DOut>::initializeBFFD(const PointType& start, const PointType& end, size_t gridCtrlPtNum[DIn])
+{
+    try
+    {
+        unsigned int d;
+        for ( d=0; d<DIn; d++ )
+        {
+            GADGET_CHECK_RETURN_FALSE(end(d) > start(d));
+        }
+
+        std::vector<size_t> dim(DIn, 2);
+        std::vector<coord_type> pixelSize(DIn, 1);
+        std::vector<coord_type> origin(DIn, 0);
+
+        for ( d=0; d<DIn; d++ )
+        {
+            dim[d] = gridCtrlPtNum[d];
+            if ( dim[d] < 3 ) dim[d] = 3;
+
+            pixelSize[d] = (end(d)-start(d))/(dim[d]-1);
+
+            /// add the padding
+            dim[d] += 2*BSPLINEPADDINGSIZE;
+
+            origin[d] = -pixelSize[d]*BSPLINEPADDINGSIZE;
+        }
+
+        for ( d=0; d<DOut; d++ )
+        {
+            this->ctrl_pt_[d].create(dim, pixelSize, origin);
+            Gadgetron::clear(this->ctrl_pt_[d]);
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in initializeBFFD(const PointType& start, const PointType& end, CoordType gridCtrlPtNum[DIn]) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusBSplineFFD<T, CoordType, DIn, DOut>::initializeBFFD(const ImageType& im, const PointType& start, const PointType& end, CoordType gridCtrlPtSpacing[DIn])
+{
+    try
+    {
+        unsigned int d;
+        for ( d=0; d<DIn; d++ )
+        {
+            GADGET_CHECK_RETURN_FALSE(end(d) > start(d));
+        }
+
+        std::vector<size_t> dim(DIn, 2);
+        std::vector<coord_type> pixelSize(DIn, 1);
+        std::vector<coord_type> origin(DIn, 0);
+
+        std::vector<coord_type> firstCtrlPt(DIn);
+
+        for ( d=0; d<DIn; d++ )
+        {
+            dim[d] = GT_MKINT( (end(d)-start(d))/gridCtrlPtSpacing[d] ) + 1;
+            pixelSize[d] = (end(d)-start(d))/(dim[d]-1);
+
+            /// add the padding
+            dim[d] += 2*BSPLINEPADDINGSIZE;
+
+            firstCtrlPt[d] = -pixelSize[d]*BSPLINEPADDINGSIZE/im.get_pixel_size(d);
+        }
+        im.image_to_world( firstCtrlPt, origin);
+
+        for ( d=0; d<DOut; d++ )
+        {
+            this->ctrl_pt_[d].create(dim, pixelSize, origin);
+            Gadgetron::clear(this->ctrl_pt_[d]);
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in initializeBFFD(const ImageType& im, const PointType& start, const PointType& end, CoordType gridCtrlPtSpacing[DIn]) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusBSplineFFD<T, CoordType, DIn, DOut>::initializeBFFD(const ImageType& im, const PointType& start, const PointType& end, size_t gridCtrlPtNum[DIn])
+{
+    try
+    {
+        unsigned int d;
+        for ( d=0; d<DIn; d++ )
+        {
+            GADGET_CHECK_RETURN_FALSE(end(d) > start(d));
+        }
+
+        std::vector<size_t> dim(DIn, 2);
+        std::vector<coord_type> pixelSize(DIn, 1);
+        std::vector<coord_type> origin(DIn, 0);
+
+        std::vector<coord_type> firstCtrlPt(DIn);
+
+        for ( d=0; d<DIn; d++ )
+        {
+            dim[d] = gridCtrlPtNum[d];
+            if ( dim[d] < 3 ) dim[d] = 3;
+
+            pixelSize[d] = (end(d)-start(d))/(dim[d]-1);
+
+            /// add the padding
+            dim[d] += 2*BSPLINEPADDINGSIZE;
+
+            firstCtrlPt[d] = -pixelSize[d]*BSPLINEPADDINGSIZE/im.get_pixel_size(d);
+        }
+        im.image_to_world( firstCtrlPt, origin);
+
+        for ( d=0; d<DOut; d++ )
+        {
+            this->ctrl_pt_[d].create(dim, pixelSize, origin);
+            Gadgetron::clear(this->ctrl_pt_[d]);
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in initializeBFFD(const ImageType& im, const PointType& start, const PointType& end, CoordType gridCtrlPtNum[DIn]) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusBSplineFFD<T, CoordType, DIn, DOut>::initializeBFFD(const PointType& start, const PointType& end, CoordType dx, CoordType dy)
+{
+    CoordType gridCtrlPtSpacing[2];
+    gridCtrlPtSpacing[0] = dx;
+    gridCtrlPtSpacing[1] = dy;
+    return this->initializeBFFD(start, end, gridCtrlPtSpacing);
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusBSplineFFD<T, CoordType, DIn, DOut>::initializeBFFD(const PointType& start, const PointType& end, size_t sx, size_t sy)
+{
+    size_t gridCtrlPtNum[2];
+    gridCtrlPtNum[0] = sx;
+    gridCtrlPtNum[1] = sy;
+    return this->initializeBFFD(start, end, gridCtrlPtNum);
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusBSplineFFD<T, CoordType, DIn, DOut>::initializeBFFD(const ImageType& im, const PointType& start, const PointType& end, CoordType dx, CoordType dy)
+{
+    CoordType gridCtrlPtSpacing[2];
+    gridCtrlPtSpacing[0] = dx;
+    gridCtrlPtSpacing[1] = dy;
+    return this->initializeBFFD(im, start, end, gridCtrlPtSpacing);
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusBSplineFFD<T, CoordType, DIn, DOut>::initializeBFFD(const ImageType& im, const PointType& start, const PointType& end, size_t sx, size_t sy)
+{
+    size_t gridCtrlPtNum[2];
+    gridCtrlPtNum[0] = sx;
+    gridCtrlPtNum[1] = sy;
+    return this->initializeBFFD(im, start, end, gridCtrlPtNum);
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusBSplineFFD<T, CoordType, DIn, DOut>::initializeBFFD(const PointType& start, const PointType& end, CoordType dx, CoordType dy, CoordType dz)
+{
+    CoordType gridCtrlPtSpacing[3];
+    gridCtrlPtSpacing[0] = dx;
+    gridCtrlPtSpacing[1] = dy;
+    gridCtrlPtSpacing[2] = dz;
+    return this->initializeBFFD(start, end, gridCtrlPtSpacing);
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusBSplineFFD<T, CoordType, DIn, DOut>::initializeBFFD(const PointType& start, const PointType& end, size_t sx, size_t sy, size_t sz)
+{
+    size_t gridCtrlPtNum[3];
+    gridCtrlPtNum[0] = sx;
+    gridCtrlPtNum[1] = sy;
+    gridCtrlPtNum[2] = sz;
+    return this->initializeBFFD(start, end, gridCtrlPtNum);
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusBSplineFFD<T, CoordType, DIn, DOut>::initializeBFFD(const ImageType& im, const PointType& start, const PointType& end, CoordType dx, CoordType dy, CoordType dz)
+{
+    CoordType gridCtrlPtSpacing[3];
+    gridCtrlPtSpacing[0] = dx;
+    gridCtrlPtSpacing[1] = dy;
+    gridCtrlPtSpacing[2] = dz;
+    return this->initializeBFFD(im, start, end, gridCtrlPtSpacing);
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusBSplineFFD<T, CoordType, DIn, DOut>::initializeBFFD(const ImageType& im, const PointType& start, const PointType& end, size_t sx, size_t sy, size_t sz)
+{
+    size_t gridCtrlPtNum[3];
+    gridCtrlPtNum[0] = sx;
+    gridCtrlPtNum[1] = sy;
+    gridCtrlPtNum[2] = sz;
+    return this->initializeBFFD(im, start, end, gridCtrlPtNum);
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusBSplineFFD<T, CoordType, DIn, DOut>::initializeBFFD(const PointType& start, const PointType& end, CoordType dx, CoordType dy, CoordType dz, CoordType ds)
+{
+    CoordType gridCtrlPtSpacing[4];
+    gridCtrlPtSpacing[0] = dx;
+    gridCtrlPtSpacing[1] = dy;
+    gridCtrlPtSpacing[2] = dz;
+    gridCtrlPtSpacing[3] = ds;
+    return this->initializeBFFD(start, end, gridCtrlPtSpacing);
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusBSplineFFD<T, CoordType, DIn, DOut>::initializeBFFD(const PointType& start, const PointType& end, size_t sx, size_t sy, size_t sz, size_t ss)
+{
+    size_t gridCtrlPtNum[4];
+    gridCtrlPtNum[0] = sx;
+    gridCtrlPtNum[1] = sy;
+    gridCtrlPtNum[2] = sz;
+    gridCtrlPtNum[3] = ss;
+    return this->initializeBFFD(start, end, gridCtrlPtNum);
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusBSplineFFD<T, CoordType, DIn, DOut>::initializeBFFD(const ImageType& im, const PointType& start, const PointType& end, CoordType dx, CoordType dy, CoordType dz, CoordType ds)
+{
+    CoordType gridCtrlPtSpacing[4];
+    gridCtrlPtSpacing[0] = dx;
+    gridCtrlPtSpacing[1] = dy;
+    gridCtrlPtSpacing[2] = dz;
+    gridCtrlPtSpacing[3] = ds;
+    return this->initializeBFFD(im, start, end, gridCtrlPtSpacing);
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusBSplineFFD<T, CoordType, DIn, DOut>::initializeBFFD(const ImageType& im, const PointType& start, const PointType& end, size_t sx, size_t sy, size_t sz, size_t ss)
+{
+    size_t gridCtrlPtNum[4];
+    gridCtrlPtNum[0] = sx;
+    gridCtrlPtNum[1] = sy;
+    gridCtrlPtNum[2] = sz;
+    gridCtrlPtNum[3] = ss;
+    return this->initializeBFFD(im, start, end, gridCtrlPtNum);
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+void gtplusBSplineFFD<T, CoordType, DIn, DOut>::print(std::ostream& os) const
+{
+    using namespace std;
+
+    os << "---------------------- GTPlus BSpline Free Form Deformation ------------------" << endl;
+    os << "Define the interface for BSpline Free Form Deformation (BFFD) " << endl;
+    os << "------------------------------------------------------------------------------" << endl;
+}
+
+}}
diff --git a/toolboxes/gtplus/algorithm/FreeFormDeformation/gtplusBSplineFFD2D.h b/toolboxes/gtplus/algorithm/FreeFormDeformation/gtplusBSplineFFD2D.h
new file mode 100644
index 0000000..d6f27bd
--- /dev/null
+++ b/toolboxes/gtplus/algorithm/FreeFormDeformation/gtplusBSplineFFD2D.h
@@ -0,0 +1,597 @@
+/** \file       gtplusBSplineFFD2D.h
+    \brief      Implement gtPlus 2D BSpline FreeFormDeformation
+    \author     Hui Xue
+*/
+
+#pragma once
+
+#include "gtplusBSplineFFD.h"
+
+namespace Gadgetron { namespace gtPlus {
+
+template <typename T, typename CoordType, unsigned int DOut>
+class gtplusBSplineFFD2D : public gtplusBSplineFFD<T, CoordType, 2, DOut>
+{
+public:
+
+    typedef gtplusBSplineFFD<T, CoordType, 2, DOut> BaseClass;
+    typedef gtplusBSplineFFD2D<T, CoordType, DOut> Self;
+
+    typedef typename BaseClass::real_value_type real_value_type;
+    typedef real_value_type bspline_float_type;
+
+    typedef typename BaseClass::coord_type coord_type;
+
+    using BaseClass::D;
+    using BaseClass::BSPLINELUTSIZE;
+    using BaseClass::BSPLINEPADDINGSIZE;
+
+    typedef typename BaseClass::LUTType             LUTType;
+    typedef typename BaseClass::CoordArrayType      CoordArrayType;
+    typedef typename BaseClass::ValueArrayType      ValueArrayType;
+    typedef typename BaseClass::ArrayType           ArrayType;
+    typedef typename BaseClass::FFDCtrlPtGridType   FFDCtrlPtGridType;
+    typedef typename BaseClass::PointType           PointType;
+    typedef typename BaseClass::ImageType           ImageType;
+
+    /// constructors
+    gtplusBSplineFFD2D();
+    /// define the FFD over a region with specific control point spacing
+    gtplusBSplineFFD2D(const PointType& start, const PointType& end, CoordType dx, CoordType dy);
+    /// define the FFD over the image region with specific control point spacing
+    gtplusBSplineFFD2D(const ImageType& im, CoordType dx, CoordType dy);
+    /// define the FFD over the image region with specific number of control points
+    gtplusBSplineFFD2D(const ImageType& im, size_t sx, size_t sy);
+    /// define the FFD over an array region with specific number of control points
+    gtplusBSplineFFD2D(const ArrayType& a, size_t sx, size_t sy);
+    /// copy constructor
+    gtplusBSplineFFD2D(const Self& bffd);
+
+    virtual ~gtplusBSplineFFD2D();
+
+    /// evaluate the FFD at a grid location
+    virtual bool evaluateFFD(const CoordType pt[D], T r[DOut]) const;
+    virtual bool evaluateFFD(CoordType px, CoordType py, T r[DOut]) const;
+
+    virtual bool evaluateFFDDX(const CoordType pt[D], T dx[DOut]) const;
+    virtual bool evaluateFFDDY(const CoordType pt[D], T dy[DOut]) const;
+
+    virtual bool evaluateWorldDX(const CoordType pt[D], T dx[DOut]) const;
+    virtual bool evaluateWorldDY(const CoordType pt[D], T dy[DOut]) const;
+
+    /// evaluate the 1st order derivative of FFD at a grid location
+    virtual bool evaluateFFDDerivative(const CoordType pt[D], T deriv[D][DOut]) const;
+    virtual bool evaluateFFDDerivative(CoordType px, CoordType py, T deriv[D][DOut]) const;
+
+    /// evaluate the 2nd order derivative of FFD at a grid location
+    /// dderiv : D*D vector, stores dxx dxy dxz ...; dyx dyy dyz ...; dzx dzy dzz ...
+    virtual bool evaluateFFDSecondOrderDerivative(const CoordType pt[D], T dderiv[D*D][DOut]) const;
+    virtual bool evaluateFFDSecondOrderDerivative(CoordType px, CoordType py, T dderiv[D*D][DOut]) const;
+
+    /// compute the FFD approximation once
+    /// pos : the position of input points, 2 by N
+    /// value : the value on input points, DOut by N
+    /// residual : the approximation residual after computing FFD, DOut by N
+    virtual bool ffdApprox(const CoordArrayType& pos, ValueArrayType& value, ValueArrayType& residual, real_value_type& totalResidual, size_t N);
+
+    /// As suggested in ref [2], the BSpline FFD can be refined to achieve better approximation
+    virtual bool refine();
+
+    /// general print function
+    virtual void print(std::ostream& os) const;
+
+    using BaseClass::performTiming_;
+    using BaseClass::debugFolder_;
+
+protected:
+
+    using BaseClass::ctrl_pt_;
+    using BaseClass::gt_timer1_;
+    using BaseClass::gt_timer2_;
+    using BaseClass::gt_timer3_;
+    using BaseClass::gt_exporter_;
+    using BaseClass::gtPlus_util_;
+    using BaseClass::gtPlus_util_complex_;
+    using BaseClass::LUT_;
+    using BaseClass::LUT1_;
+    using BaseClass::LUT2_;
+
+    /// evaluate the FFD
+    /// px and py are at FFD grid
+    /// ordx, ordy indicates the order of derivative; 0/1/2 for 0/1st/2nd derivative
+    virtual bool evaluateFFD2D(CoordType px, CoordType py, size_t ordx, size_t ordy, T r[DOut]) const;
+};
+
+template <typename T, typename CoordType, unsigned int DOut>
+gtplusBSplineFFD2D<T, CoordType, DOut>::gtplusBSplineFFD2D() : BaseClass()
+{
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+gtplusBSplineFFD2D<T, CoordType, DOut>::gtplusBSplineFFD2D(const PointType& start, const PointType& end, CoordType dx, CoordType dy) : BaseClass()
+{
+    GADGET_CHECK_THROW(this->initializeBFFD(start, end, dx, dy));
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+gtplusBSplineFFD2D<T, CoordType, DOut>::gtplusBSplineFFD2D(const ImageType& im, CoordType dx, CoordType dy) : BaseClass()
+{
+    typename ImageType::coord_type x, y;
+
+    PointType start, end;
+
+    im.image_to_world( (size_t)0, (size_t)0, x, y);
+    start(0) = x;
+    start(1) = y;
+
+    im.image_to_world(im.get_size(0)-1, im.get_size(1)-1, x, y);
+    end(0) = x;
+    end(1) = y;
+
+    GADGET_CHECK_THROW(this->initializeBFFD(im, start, end, dx, dy));
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+gtplusBSplineFFD2D<T, CoordType, DOut>::gtplusBSplineFFD2D(const ImageType& im, size_t sx, size_t sy) : BaseClass()
+{
+    PointType start, end;
+
+    typename ImageType::coord_type x, y;
+
+    im.image_to_world( (size_t)0, (size_t)0, x, y);
+    start(0) = x;
+    start(1) = y;
+
+    im.image_to_world(im.get_size(0)-1, im.get_size(1)-1, x, y);
+    end(0) = x;
+    end(1) = y;
+
+    GADGET_CHECK_THROW(this->initializeBFFD(im, start, end, sx, sy));
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+gtplusBSplineFFD2D<T, CoordType, DOut>::gtplusBSplineFFD2D(const ArrayType& a, size_t sx, size_t sy) : BaseClass()
+{
+    PointType start, end;
+
+    start(0) = 0;
+    start(1) = 0;
+
+    end(0) = (CoordType)(a.get_size(0)-1);
+    end(1) = (CoordType)(a.get_size(1)-1);
+
+    GADGET_CHECK_THROW(this->initializeBFFD(start, end, sx, sy));
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+gtplusBSplineFFD2D<T, CoordType, DOut>::gtplusBSplineFFD2D(const Self& bffd) : BaseClass()
+{
+    unsigned int d;
+    for ( d=0; d<DOut; d++ )
+    {
+        this->ctrl_pt_[d].copyFrom( bffd.get_ctrl_pt(d) );
+    }
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+gtplusBSplineFFD2D<T, CoordType, DOut>::~gtplusBSplineFFD2D()
+{
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+bool gtplusBSplineFFD2D<T, CoordType, DOut>::evaluateFFD2D(CoordType px, CoordType py, size_t ordx, size_t ordy, T r[DOut]) const
+{
+    try
+    {
+        GADGET_DEBUG_CHECK_RETURN_FALSE( (px>=-2) && (px<=this->get_size(0)+1) );
+        GADGET_DEBUG_CHECK_RETURN_FALSE( (py>=-2) && (py<=this->get_size(1)+1) );
+
+        GADGET_DEBUG_CHECK_RETURN_FALSE(ordX>=0 && ordX<=2);
+        GADGET_DEBUG_CHECK_RETURN_FALSE(ordY>=0 && ordY<=2);
+        GADGET_DEBUG_CHECK_RETURN_FALSE(ordX+ordY<=2);
+
+        long long ix = (long long)std::floor(px);
+        CoordType deltaX = px-(CoordType)ix;
+        long long lx = GT_MKINT(BSPLINELUTSIZE*deltaX);
+
+        long long iy = (long long)std::floor(py);
+        CoordType deltaY = py-(CoordType)iy;
+        long long ly = GT_MKINT(BSPLINELUTSIZE*deltaY);
+
+        unsigned int d, jj;
+        size_t offset[4];
+        offset[0] = this->calculate_offset(ix-1, iy-1);
+        offset[1] = this->calculate_offset(ix-1, iy);
+        offset[2] = this->calculate_offset(ix-1, iy+1);
+        offset[3] = this->calculate_offset(ix-1, iy+2);
+
+        const LUTType* p_xLUT= &this->LUT_;
+        const LUTType* p_yLUT= &this->LUT_;
+
+        if ( ordx == 1 )
+        {
+            p_xLUT= &this->LUT1_;
+        }
+        else if ( ordx == 2 )
+        {
+            p_xLUT= &this->LUT2_;
+        }
+
+        if ( ordy == 1 )
+        {
+            p_yLUT= &this->LUT1_;
+        }
+        else if ( ordy == 2 )
+        {
+            p_yLUT= &this->LUT2_;
+        }
+
+        const LUTType& xLUT= *p_xLUT;
+        const LUTType& yLUT= *p_yLUT;
+
+        for ( d=0; d<DOut; d++ )
+        {
+            r[d] = 0;
+
+            T v(0);
+            for (jj=0; jj<4; jj++)
+            {
+                v =   ( this->ctrl_pt_[d](offset[jj]  ) * xLUT[lx][0] )
+                    + ( this->ctrl_pt_[d](offset[jj]+1) * xLUT[lx][1] )
+                    + ( this->ctrl_pt_[d](offset[jj]+2) * xLUT[lx][2] )
+                    + ( this->ctrl_pt_[d](offset[jj]+3) * xLUT[lx][3] );
+
+                r[d] += v * yLUT[ly][jj];
+            }
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in evaluateFFD2D(CoordType px, CoordType py, size_t ordx, size_t ordy, T r[DOut]) const ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+inline bool gtplusBSplineFFD2D<T, CoordType, DOut>::evaluateFFD(const CoordType pt[D], T r[DOut]) const
+{
+    return this->evaluateFFD2D(pt[0], pt[1], 0, 0, r);
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+inline bool gtplusBSplineFFD2D<T, CoordType, DOut>::evaluateFFD(CoordType px, CoordType py, T r[DOut]) const
+{
+    return this->evaluateFFD2D(px, py, 0, 0, r);
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+inline bool gtplusBSplineFFD2D<T, CoordType, DOut>::evaluateFFDDX(const CoordType pt[D], T dx[DOut]) const
+{
+    return this->evaluateFFD2D(pt[0], pt[1], 1, 0, dx);
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+inline bool gtplusBSplineFFD2D<T, CoordType, DOut>::evaluateFFDDY(const CoordType pt[D], T dy[DOut]) const
+{
+    return this->evaluateFFD2D(pt[0], pt[1], 0, 1, dy);
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+inline bool gtplusBSplineFFD2D<T, CoordType, DOut>::evaluateWorldDX(const CoordType pt[D], T dx[DOut]) const
+{
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD2D(pt[0], pt[1], 1, 0, dx));
+    coord_type sx = coord_type(1.0)/this->get_spacing(0);
+    unsigned int d;
+    for ( d=0; d<DOut; d++ )
+    {
+        dx[d] *= sx;
+    }
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+inline bool gtplusBSplineFFD2D<T, CoordType, DOut>::evaluateWorldDY(const CoordType pt[D], T dy[DOut]) const
+{
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD2D(pt[0], pt[1], 0, 1, dy));
+    coord_type sy = coord_type(1.0)/this->get_spacing(1);
+    unsigned int d;
+    for ( d=0; d<DOut; d++ )
+    {
+        dy[d] *= sy;
+    }
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+inline bool gtplusBSplineFFD2D<T, CoordType, DOut>::evaluateFFDDerivative(const CoordType pt[D], T deriv[D][DOut]) const
+{
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD2D(pt[0], pt[1], 1, 0, deriv[0]));
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD2D(pt[0], pt[1], 0, 1, deriv[1]));
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+inline bool gtplusBSplineFFD2D<T, CoordType, DOut>::evaluateFFDDerivative(CoordType px, CoordType py, T deriv[D][DOut]) const
+{
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD2D(px, py, 1, 0, deriv[0]));
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD2D(px, py, 0, 1, deriv[1]));
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+inline bool gtplusBSplineFFD2D<T, CoordType, DOut>::evaluateFFDSecondOrderDerivative(const CoordType pt[D], T dderiv[D*D][DOut]) const
+{
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD2D(pt[0], pt[1], 2, 0, dderiv[0]));
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD2D(pt[0], pt[1], 1, 1, dderiv[1]));
+    memcpy(dderiv[2], dderiv[1], DOut*sizeof(T));
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD2D(pt[0], pt[1], 0, 2, dderiv[3]));
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+inline bool gtplusBSplineFFD2D<T, CoordType, DOut>::evaluateFFDSecondOrderDerivative(CoordType px, CoordType py, T dderiv[D*D][DOut]) const
+{
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD2D(px, py, 2, 0, dderiv[0]));
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD2D(px, py, 1, 1, dderiv[1]));
+    memcpy(dderiv[2], dderiv[1], DOut*sizeof(T));
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD2D(px, py, 0, 2, dderiv[3]));
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+bool gtplusBSplineFFD2D<T, CoordType, DOut>::ffdApprox(const CoordArrayType& pos, ValueArrayType& value, ValueArrayType& residual, real_value_type& totalResidual, size_t N)
+{
+    try
+    {
+        GADGET_CHECK_RETURN_FALSE(pos.get_size(0)==2);
+        GADGET_CHECK_RETURN_FALSE(pos.get_size(1)==N);
+
+        GADGET_CHECK_RETURN_FALSE(value.get_size(0)==DOut);
+        GADGET_CHECK_RETURN_FALSE(value.get_size(1)==N);
+
+        if ( !residual.dimensions_equal(&value) )
+        {
+            residual.create(value.get_dimensions());
+            Gadgetron::clear(residual);
+        }
+
+        size_t sx = this->get_size(0);
+        size_t sy = this->get_size(1);
+
+        /// following the definition of ref[2]
+        ho3DArray<T> dx(sx, sy, DOut), ds(sx, sy, DOut);
+        Gadgetron::clear(dx);
+        Gadgetron::clear(ds);
+
+        /// compute the current approximation values
+        ValueArrayType approxValue;
+        approxValue = value;
+
+        /// compute current residual
+        GADGET_CHECK_RETURN_FALSE(this->evaluateFFDArray(pos, approxValue));
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::subtract(value, approxValue, residual));
+
+        /// compute the update of control points
+        unsigned int d;
+
+        long long n;
+        for (n=0; n<(long long)N; n++)
+        {
+            coord_type px = pos(0, n);
+            coord_type py = pos(1, n);
+
+            if ( px<-2 || px>sx+2 || py<-2 || py>sy+2 )
+            {
+                continue;
+            }
+
+            long long ix = (long long)std::floor(px);
+            CoordType deltaX = px-(CoordType)ix;
+
+            long long iy = (long long)std::floor(py);
+            CoordType deltaY = py-(CoordType)iy;
+
+            long long i, j, I, J;
+
+            T dist=0, v, vv, vvv;
+            for (j=0; j<4; j++)
+            {
+                for (i=0; i<4; i++)
+                {
+                    v = this->BSpline(i, deltaX) * this->BSpline(j, deltaY);
+                    dist += v*v;
+                }
+            }
+
+            for (j=0; j<4; j++)
+            {
+                J = j + iy - 1;
+                if ( (J>=0) && (J<(long long)sy) )
+                {
+                    for (i=0; i<4; i++)
+                    {
+                        I = i + ix - 1;
+                        if ( (I>=0) && (I<(long long)sx) )
+                        {
+                            v = this->BSpline(i, deltaX) * this->BSpline(j, deltaY);
+                            vv = v*v;
+                            vvv = vv*v;
+
+                            for ( d=0; d<DOut; d++ )
+                            {
+                                dx(I, J, d) += vvv*residual(d, n)/dist;
+                                ds(I, J, d) += vv;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        /// update the control point values
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::addEpsilon(ds));
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::divide(dx, ds, dx));
+
+        std::vector<size_t> startND(2, BSPLINEPADDINGSIZE), size(2);
+        size[0] = sx;
+        size[1] = sy;
+
+        hoNDArray<T> ctrlPtWithoutPadding(sx, sy);
+
+        for ( d=0; d<DOut; d++ )
+        {
+            hoNDArray<T> dx2D(sx, sy, dx.begin()+d*sx*sy*sizeof(T));
+
+            std::vector<size_t> dim;
+            this->ctrl_pt_[d].get_dimensions(dim);
+            hoNDArray<T> tmpCtrlPt(dim, this->ctrl_pt_[d].begin(), false);
+            Gadgetron::cropUpTo11DArray(tmpCtrlPt, ctrlPtWithoutPadding, startND, size);
+            Gadgetron::add(ctrlPtWithoutPadding, dx2D, ctrlPtWithoutPadding);
+            Gadgetron::setSubArrayUpTo11DArray(ctrlPtWithoutPadding, tmpCtrlPt, startND, size);
+        }
+
+        /*for (j=0; j<sy; j++)
+        {
+            for (i=0; i<sx; i++)
+            {
+                for ( d=0; d<DOut; d++ )
+                {
+                    if ( ds(i, j, d) > 0)
+                    {
+                        this->ctrl_pt_[d](i, j) += dx(i, j, d)/ds(i, j, d);
+                    }
+                }
+            }
+        }*/
+
+        /// calculate residual error
+        totalResidual = 0;
+        GADGET_CHECK_RETURN_FALSE(this->evaluateFFDArray(pos, approxValue));
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::subtract(value, approxValue, residual));
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::norm2(residual, totalResidual));
+        totalResidual = totalResidual / (real_value_type)N;
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in ffdApprox(const CoordArrayType& pos, ValueArrayType& value, ValueArrayType& residual, real_value_type& totalResidual, size_t N) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+bool gtplusBSplineFFD2D<T, CoordType, DOut>::refine()
+{
+    try
+    {
+        size_t sx = this->get_size(0);
+        size_t sy = this->get_size(1);
+
+        /// the refined control point grid definition
+
+        std::vector<size_t> dim(2);
+        dim[0] = 2*sx-1 + 2*BSPLINEPADDINGSIZE;
+        dim[1] = 2*sy-1 + 2*BSPLINEPADDINGSIZE;
+
+        std::vector<coord_type> spacing;
+        this->get_spacing(spacing);
+        spacing[0] /= 2;
+        spacing[1] /= 2;
+
+        std::vector<coord_type> oldOrigin;
+        this->ctrl_pt_[0].get_origin(oldOrigin);
+
+        std::vector<coord_type> gridOrigin(2);
+        this->ctrl_pt_[0].image_to_world( (CoordType)(BSPLINEPADDINGSIZE), (CoordType)(BSPLINEPADDINGSIZE), gridOrigin[0], gridOrigin[1]);
+
+        std::vector<coord_type> origin(2);
+        origin[0] = (oldOrigin[0] + gridOrigin[0])/2;
+        origin[1] = (oldOrigin[1] + gridOrigin[1])/2;
+
+        typename ImageType::axis_type axis;
+        this->ctrl_pt_[0].get_axis(axis);
+
+        /// allocate new control points
+        FFDCtrlPtGridType new_ctrl_pt[DOut];
+
+        unsigned int d;
+        for( d=0; d<DOut; d++ )
+        {
+            new_ctrl_pt[d].create(dim, spacing, origin, axis);
+            Gadgetron::clear(new_ctrl_pt[d]);
+        }
+
+        /// refinement weights, see ref[2]
+        T w[2][3];
+
+        w[0][0] = T(0.125); w[0][1] = T(0.75);  w[0][2] = T(0.125);
+        w[1][0] = 0;        w[1][1] = T(0.5);   w[1][2] = T(0.5);
+
+        /// compute refined control point values
+        int x, y, i_new, j_new, i_old, j_old;
+        for (y=0; y<sy; y++)
+        {
+            for (x=0; x<sx; x++)
+            {
+                for (j_new=0; j_new<2; j_new++)
+                {
+                    for (i_new=0; i_new<2; i_new++)
+                    {
+                        size_t offsetNew = new_ctrl_pt[0].calculate_offset(2*x+i_new+BSPLINEPADDINGSIZE, 2*y+j_new+BSPLINEPADDINGSIZE);
+
+                        for (j_old=0; j_old<3; j_old++)
+                        {
+                            for (i_old=0; i_old<3; i_old++)
+                            {
+                                size_t offsetOld = this->calculate_offset(x+i_old-1, y+j_old-1);
+                                for ( d=0; d<DOut; d++ )
+                                {
+                                    new_ctrl_pt[d](offsetNew) += w[i_new][i_old]*w[j_new][j_old] * this->ctrl_pt_[d](offsetOld);
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        for ( d=0; d<DOut; d++ )
+        {
+            this->ctrl_pt_[d].create(dim, spacing, origin, axis, new_ctrl_pt[d].begin(), true);
+            new_ctrl_pt[d].delete_data_on_destruct(false);
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in refine() ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+void gtplusBSplineFFD2D<T, CoordType, DOut>::print(std::ostream& os) const
+{
+    using namespace std;
+
+    os << "---------------------- GTPlus BSpline 2D Free Form Deformation ------------------" << endl;
+    os << "Implement 2D BSpline Free Form Deformation (BFFD) " << endl;
+
+    std::string elemTypeName = std::string(typeid(T).name());
+    os << "FFD value type is : " << elemTypeName << endl;
+
+    elemTypeName = std::string(typeid(CoordType).name());
+    os << "FFD coord type is : " << elemTypeName << endl;
+
+    os << "Output dimension is : " << DOut << endl;
+    os << "---------------------------------------------------" << endl;
+    os << "BFFD grid information : " << endl;
+    this->ctrl_pt_[0].printContent(os);
+    os << "------------------------------------------------------------------------------" << endl;
+}
+
+}}
diff --git a/toolboxes/gtplus/algorithm/FreeFormDeformation/gtplusBSplineFFD3D.h b/toolboxes/gtplus/algorithm/FreeFormDeformation/gtplusBSplineFFD3D.h
new file mode 100644
index 0000000..97b9fac
--- /dev/null
+++ b/toolboxes/gtplus/algorithm/FreeFormDeformation/gtplusBSplineFFD3D.h
@@ -0,0 +1,740 @@
+/** \file       gtplusBSplineFFD3D.h
+    \brief      Implement gtPlus 2D BSpline FreeFormDeformation
+    \author     Hui Xue
+*/
+
+#pragma once
+
+#include "gtplusBSplineFFD.h"
+
+namespace Gadgetron { namespace gtPlus {
+
+template <typename T, typename CoordType, unsigned int DOut>
+class gtplusBSplineFFD3D : public gtplusBSplineFFD<T, CoordType, 3, DOut>
+{
+public:
+
+    typedef gtplusBSplineFFD<T, CoordType, 3, DOut> BaseClass;
+    typedef gtplusBSplineFFD3D<T, CoordType, DOut> Self;
+
+    typedef typename BaseClass::real_value_type real_value_type;
+    typedef real_value_type bspline_float_type;
+
+    typedef typename BaseClass::coord_type coord_type;
+
+    enum { D = 3 };
+    using BaseClass::BSPLINELUTSIZE;
+    using BaseClass::BSPLINEPADDINGSIZE;
+
+    typedef typename BaseClass::LUTType             LUTType;
+    typedef typename BaseClass::CoordArrayType      CoordArrayType;
+    typedef typename BaseClass::ValueArrayType      ValueArrayType;
+    typedef typename BaseClass::ArrayType           ArrayType;
+    typedef typename BaseClass::FFDCtrlPtGridType   FFDCtrlPtGridType;
+    typedef typename BaseClass::PointType           PointType;
+    typedef typename BaseClass::ImageType           ImageType;
+
+    /// constructors
+    gtplusBSplineFFD3D();
+    /// define the FFD over a region with specific control point spacing
+    gtplusBSplineFFD3D(const PointType& start, const PointType& end, CoordType dx, CoordType dy, CoordType dz);
+    /// define the FFD over the image region with specific control point spacing
+    gtplusBSplineFFD3D(const ImageType& im, CoordType dx, CoordType dy, CoordType dz);
+    /// define the FFD over the image region with specific number of control points
+    gtplusBSplineFFD3D(const ImageType& im, size_t sx, size_t sy, size_t sz);
+    /// define the FFD over an array region with specific number of control points
+    gtplusBSplineFFD3D(const ArrayType& a, size_t sx, size_t sy, size_t sz);
+    /// copy constructor
+    gtplusBSplineFFD3D(const Self& bffd);
+
+    virtual ~gtplusBSplineFFD3D();
+
+    /// evaluate the FFD at a grid location
+    virtual bool evaluateFFD(const CoordType pt[D], T r[DOut]) const;
+    virtual bool evaluateFFD(CoordType px, CoordType py, CoordType pz, T r[DOut]) const;
+
+    virtual bool evaluateFFDDX(const CoordType pt[D], T dx[DOut]) const;
+    virtual bool evaluateFFDDY(const CoordType pt[D], T dy[DOut]) const;
+    virtual bool evaluateFFDDZ(const CoordType pt[D], T dz[DOut]) const;
+
+    virtual bool evaluateWorldDX(const CoordType pt[D], T dx[DOut]) const;
+    virtual bool evaluateWorldDY(const CoordType pt[D], T dy[DOut]) const;
+    virtual bool evaluateWorldDZ(const CoordType pt[D], T dz[DOut]) const;
+
+    /// evaluate the 1st order derivative of FFD at a grid location
+    virtual bool evaluateFFDDerivative(const CoordType pt[D], T deriv[D][DOut]) const;
+    virtual bool evaluateFFDDerivative(CoordType px, CoordType py, CoordType pz, T deriv[D][DOut]) const;
+
+    /// evaluate the 2nd order derivative of FFD at a grid location
+    /// dderiv : D*D vector, stores dxx dxy dxz ...; dyx dyy dyz ...; dzx dzy dzz ...
+    virtual bool evaluateFFDSecondOrderDerivative(const CoordType pt[D], T dderiv[D*D][DOut]) const;
+    virtual bool evaluateFFDSecondOrderDerivative(CoordType px, CoordType py, CoordType pz, T dderiv[D*D][DOut]) const;
+
+    /// compute the FFD approximation once
+    /// pos : the position of input points, D by N
+    /// value : the value on input points, DOut by N
+    /// residual : the approximation residual after computing FFD, DOut by N
+    virtual bool ffdApprox(const CoordArrayType& pos, ValueArrayType& value, ValueArrayType& residual, real_value_type& totalResidual, size_t N);
+
+    /// As suggested in ref [2], the BSpline FFD can be refined to achieve better approximation
+    virtual bool refine();
+
+    /// general print function
+    virtual void print(std::ostream& os) const;
+
+    using BaseClass::performTiming_;
+    using BaseClass::debugFolder_;
+
+protected:
+
+    using BaseClass::ctrl_pt_;
+    using BaseClass::gt_timer1_;
+    using BaseClass::gt_timer2_;
+    using BaseClass::gt_timer3_;
+    using BaseClass::gt_exporter_;
+    using BaseClass::gtPlus_util_;
+    using BaseClass::gtPlus_util_complex_;
+    using BaseClass::LUT_;
+    using BaseClass::LUT1_;
+    using BaseClass::LUT2_;
+
+    /// evaluate the FFD
+    /// px and py are at FFD grid
+    /// ordx, ordy indicates the order of derivative; 0/1/2 for 0/1st/2nd derivative
+    virtual bool evaluateFFD3D(CoordType px, CoordType py, CoordType pz, size_t ordx, size_t ordy, size_t ordz, T r[DOut]) const;
+};
+
+template <typename T, typename CoordType, unsigned int DOut>
+gtplusBSplineFFD3D<T, CoordType, DOut>::gtplusBSplineFFD3D() : BaseClass()
+{
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+gtplusBSplineFFD3D<T, CoordType, DOut>::gtplusBSplineFFD3D(const PointType& start, const PointType& end, CoordType dx, CoordType dy, CoordType dz) : BaseClass()
+{
+    GADGET_CHECK_THROW(this->initializeBFFD(start, end, dx, dy, dz));
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+gtplusBSplineFFD3D<T, CoordType, DOut>::gtplusBSplineFFD3D(const ImageType& im, CoordType dx, CoordType dy, CoordType dz) : BaseClass()
+{
+    PointType start, end;
+
+    typename ImageType::coord_type x, y, z;
+
+    im.image_to_world( (size_t)0, (size_t)0, (size_t)0, x, y, z);
+    start(0) = x;
+    start(1) = y;
+    start(2) = z;
+
+    im.image_to_world(im.get_size(0)-1, im.get_size(1)-1, im.get_size(2)-1, x, y, z);
+    end(0) = x;
+    end(1) = y;
+    end(2) = z;
+
+    GADGET_CHECK_THROW(this->initializeBFFD(im, start, end, dx, dy, dz));
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+gtplusBSplineFFD3D<T, CoordType, DOut>::gtplusBSplineFFD3D(const ImageType& im, size_t sx, size_t sy, size_t sz) : BaseClass()
+{
+    PointType start, end;
+
+    typename ImageType::coord_type x, y, z;
+
+    im.image_to_world( (size_t)0, (size_t)0, (size_t)0, x, y, z);
+    start(0) = x;
+    start(1) = y;
+    start(2) = z;
+
+    im.image_to_world(im.get_size(0)-1, im.get_size(1)-1, im.get_size(2)-1, x, y, z);
+    end(0) = x;
+    end(1) = y;
+    end(2) = z;
+
+    GADGET_CHECK_THROW(this->initializeBFFD(im, start, end, sx, sy, sz));
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+gtplusBSplineFFD3D<T, CoordType, DOut>::gtplusBSplineFFD3D(const ArrayType& a, size_t sx, size_t sy, size_t sz) : BaseClass()
+{
+    PointType start, end;
+
+    start(0) = 0;
+    start(1) = 0;
+    start(2) = 0;
+
+    end(0) = a.get_size(0)-1;
+    end(1) = a.get_size(1)-1;
+    end(2) = a.get_size(2)-1;
+
+    GADGET_CHECK_THROW(this->initializeBFFD(start, end, sx, sy, sz));
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+gtplusBSplineFFD3D<T, CoordType, DOut>::gtplusBSplineFFD3D(const Self& bffd) : BaseClass()
+{
+    unsigned int d;
+    for ( d=0; d<DOut; d++ )
+    {
+        this->ctrl_pt_[d].copyFrom( bffd.get_ctrl_pt(d) );
+    }
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+gtplusBSplineFFD3D<T, CoordType, DOut>::~gtplusBSplineFFD3D()
+{
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+bool gtplusBSplineFFD3D<T, CoordType, DOut>::evaluateFFD3D(CoordType px, CoordType py, CoordType pz, size_t ordx, size_t ordy, size_t ordz, T r[DOut]) const
+{
+    try
+    {
+        GADGET_DEBUG_CHECK_RETURN_FALSE( (px>=-2) && (px<=this->get_size(0)+1) );
+        GADGET_DEBUG_CHECK_RETURN_FALSE( (py>=-2) && (py<=this->get_size(1)+1) );
+        GADGET_DEBUG_CHECK_RETURN_FALSE( (pz>=-2) && (pz<=this->get_size(2)+1) );
+
+        GADGET_DEBUG_CHECK_RETURN_FALSE(ordX>=0 && ordX<=2);
+        GADGET_DEBUG_CHECK_RETURN_FALSE(ordY>=0 && ordY<=2);
+        GADGET_DEBUG_CHECK_RETURN_FALSE(ordZ>=0 && ordZ<=2);
+        GADGET_DEBUG_CHECK_RETURN_FALSE(ordX+ordY+ordZ<=2);
+
+        long long ix = (long long)std::floor(px);
+        CoordType deltaX = px-(CoordType)ix;
+        long long lx = GT_MKINT(BSPLINELUTSIZE*deltaX);
+
+        long long iy = (long long)std::floor(py);
+        CoordType deltaY = py-(CoordType)iy;
+        long long ly = GT_MKINT(BSPLINELUTSIZE*deltaY);
+
+        long long iz = (long long)std::floor(pz);
+        CoordType deltaZ = pz-(CoordType)iz;
+        long long lz = GT_MKINT(BSPLINELUTSIZE*deltaZ);
+
+        unsigned int d, jj, kk;
+        size_t offset[4][4]; // z, y
+        offset[0][0] = this->calculate_offset(ix-1, iy-1, iz-1);
+        offset[0][1] = this->calculate_offset(ix-1, iy  , iz-1);
+        offset[0][2] = this->calculate_offset(ix-1, iy+1, iz-1);
+        offset[0][3] = this->calculate_offset(ix-1, iy+2, iz-1);
+
+        offset[1][0] = this->calculate_offset(ix-1, iy-1, iz);
+        offset[1][1] = this->calculate_offset(ix-1, iy  , iz);
+        offset[1][2] = this->calculate_offset(ix-1, iy+1, iz);
+        offset[1][3] = this->calculate_offset(ix-1, iy+2, iz);
+
+        offset[2][0] = this->calculate_offset(ix-1, iy-1, iz+1);
+        offset[2][1] = this->calculate_offset(ix-1, iy  , iz+1);
+        offset[2][2] = this->calculate_offset(ix-1, iy+1, iz+1);
+        offset[2][3] = this->calculate_offset(ix-1, iy+2, iz+1);
+
+        offset[3][0] = this->calculate_offset(ix-1, iy-1, iz+2);
+        offset[3][1] = this->calculate_offset(ix-1, iy  , iz+2);
+        offset[3][2] = this->calculate_offset(ix-1, iy+1, iz+2);
+        offset[3][3] = this->calculate_offset(ix-1, iy+2, iz+2);
+
+        const LUTType* p_xLUT= &this->LUT_;
+        const LUTType* p_yLUT= &this->LUT_;
+        const LUTType* p_zLUT= &this->LUT_;
+
+        if ( ordx == 1 )
+        {
+            p_xLUT= &this->LUT1_;
+        }
+        else if ( ordx == 2 )
+        {
+            p_xLUT= &this->LUT2_;
+        }
+
+        if ( ordy == 1 )
+        {
+            p_yLUT= &this->LUT1_;
+        }
+        else if ( ordy == 2 )
+        {
+            p_yLUT= &this->LUT2_;
+        }
+
+        if ( ordz == 1 )
+        {
+            p_zLUT= &this->LUT1_;
+        }
+        else if ( ordz == 2 )
+        {
+            p_zLUT= &this->LUT2_;
+        }
+
+        const LUTType& xLUT= *p_xLUT;
+        const LUTType& yLUT= *p_yLUT;
+        const LUTType& zLUT= *p_zLUT;
+
+        for ( d=0; d<DOut; d++ )
+        {
+            r[d] = 0;
+            for (kk=0; kk<4; kk++)
+            {
+                T rv = 0;
+                for (jj=0; jj<4; jj++)
+                {
+                    T v  =  ( this->ctrl_pt_[d](offset[kk][jj])   * xLUT[lx][0] )
+                        + ( this->ctrl_pt_[d](offset[kk][jj]+1) * xLUT[lx][1] )
+                        + ( this->ctrl_pt_[d](offset[kk][jj]+2) * xLUT[lx][2] )
+                        + ( this->ctrl_pt_[d](offset[kk][jj]+3) * xLUT[lx][3] );
+
+                    rv += v * yLUT[ly][jj];
+                }
+
+                r[d] += rv * zLUT[lz][kk];
+            }
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in evaluateFFD3D(CoordType px, CoordType py, CoordType pz, size_t ordx, size_t ordy, size_t ordz, T r[DOut]) const ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+inline bool gtplusBSplineFFD3D<T, CoordType, DOut>::evaluateFFD(const CoordType pt[D], T r[DOut]) const
+{
+    return this->evaluateFFD3D(pt[0], pt[1], pt[2], 0, 0, 0, r);
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+inline bool gtplusBSplineFFD3D<T, CoordType, DOut>::evaluateFFD(CoordType px, CoordType py, CoordType pz, T r[DOut]) const
+{
+    return this->evaluateFFD3D(px, py, pz, 0, 0, 0, r);
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+inline bool gtplusBSplineFFD3D<T, CoordType, DOut>::evaluateFFDDX(const CoordType pt[D], T dx[DOut]) const
+{
+    return this->evaluateFFD3D(pt[0], pt[1], pt[2], 1, 0, 0, dx);
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+inline bool gtplusBSplineFFD3D<T, CoordType, DOut>::evaluateFFDDY(const CoordType pt[D], T dy[DOut]) const
+{
+    return this->evaluateFFD3D(pt[0], pt[1], pt[2], 0, 1, 0, dy);
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+inline bool gtplusBSplineFFD3D<T, CoordType, DOut>::evaluateFFDDZ(const CoordType pt[D], T dz[DOut]) const
+{
+    return this->evaluateFFD3D(pt[0], pt[1], pt[2], 0, 0, 1, dz);
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+inline bool gtplusBSplineFFD3D<T, CoordType, DOut>::evaluateWorldDX(const CoordType pt[D], T dx[DOut]) const
+{
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD3D(pt[0], pt[1], pt[2], 1, 0, 0, dx));
+    coord_type sx = coord_type(1.0)/this->get_spacing(0);
+    unsigned int d;
+    for ( d=0; d<DOut; d++ )
+    {
+        dx[d] *= sx;
+    }
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+inline bool gtplusBSplineFFD3D<T, CoordType, DOut>::evaluateWorldDY(const CoordType pt[D], T dy[DOut]) const
+{
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD3D(pt[0], pt[1], pt[2], 0, 1, 0, dy));
+    coord_type sy = coord_type(1.0)/this->get_spacing(1);
+    unsigned int d;
+    for ( d=0; d<DOut; d++ )
+    {
+        dy[d] *= sy;
+    }
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+inline bool gtplusBSplineFFD3D<T, CoordType, DOut>::evaluateWorldDZ(const CoordType pt[D], T dz[DOut]) const
+{
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD3D(pt[0], pt[1], pt[2], 0, 0, 1, dz));
+    coord_type sz = coord_type(1.0)/this->get_spacing(2);
+    unsigned int d;
+    for ( d=0; d<DOut; d++ )
+    {
+        dz[d] *= sz;
+    }
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+inline bool gtplusBSplineFFD3D<T, CoordType, DOut>::evaluateFFDDerivative(const CoordType pt[D], T deriv[D][DOut]) const
+{
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD3D(pt[0], pt[1], pt[2], 1, 0, 0, deriv[0]));
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD3D(pt[0], pt[1], pt[2], 0, 1, 0, deriv[1]));
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD3D(pt[0], pt[1], pt[2], 0, 0, 1, deriv[2]));
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+inline bool gtplusBSplineFFD3D<T, CoordType, DOut>::evaluateFFDDerivative(CoordType px, CoordType py, CoordType pz, T deriv[D][DOut]) const
+{
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD3D(px, py, pz, 1, 0, 0, deriv[0]));
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD3D(px, py, pz, 0, 1, 0, deriv[1]));
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD3D(px, py, pz, 0, 0, 1, deriv[2]));
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+inline bool gtplusBSplineFFD3D<T, CoordType, DOut>::evaluateFFDSecondOrderDerivative(const CoordType pt[D], T dderiv[D*D][DOut]) const
+{
+    // dxx
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD3D(pt[0], pt[1], pt[2], 2, 0, 0, dderiv[0]));
+    // dxy
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD3D(pt[0], pt[1], pt[2], 1, 1, 0, dderiv[1]));
+    // dxz
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD3D(pt[0], pt[1], pt[2], 1, 0, 1, dderiv[2]));
+    // dyx
+    memcpy(dderiv[3], dderiv[1], DOut*sizeof(T));
+    // dyy
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD3D(pt[0], pt[1], pt[2], 0, 2, 0, dderiv[4]));
+    // dyz
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD3D(pt[0], pt[1], pt[2], 0, 1, 1, dderiv[5]));
+    // dzx
+    memcpy(dderiv[6], dderiv[2], DOut*sizeof(T));
+    // dzy
+    memcpy(dderiv[7], dderiv[5], DOut*sizeof(T));
+    // dzz
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD3D(pt[0], pt[1], pt[2], 0, 0, 2, dderiv[8]));
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+inline bool gtplusBSplineFFD3D<T, CoordType, DOut>::evaluateFFDSecondOrderDerivative(CoordType px, CoordType py, CoordType pz, T dderiv[D*D][DOut]) const
+{
+    // dxx
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD3D(px, py, pz, 2, 0, 0, dderiv[0]));
+    // dxy
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD3D(px, py, pz, 1, 1, 0, dderiv[1]));
+    // dxz
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD3D(px, py, pz, 1, 0, 1, dderiv[2]));
+    // dyx
+    memcpy(dderiv[3], dderiv[1], DOut*sizeof(T));
+    // dyy
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD3D(px, py, pz, 0, 2, 0, dderiv[4]));
+    // dyz
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD3D(px, py, pz, 0, 1, 1, dderiv[5]));
+    // dzx
+    memcpy(dderiv[6], dderiv[2], DOut*sizeof(T));
+    // dzy
+    memcpy(dderiv[7], dderiv[5], DOut*sizeof(T));
+    // dzz
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD3D(px, py, pz, 0, 0, 2, dderiv[8]));
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+bool gtplusBSplineFFD3D<T, CoordType, DOut>::ffdApprox(const CoordArrayType& pos, ValueArrayType& value, ValueArrayType& residual, real_value_type& totalResidual, size_t N)
+{
+    try
+    {
+        GADGET_CHECK_RETURN_FALSE(pos.get_size(0)==D);
+        GADGET_CHECK_RETURN_FALSE(pos.get_size(1)==N);
+
+        GADGET_CHECK_RETURN_FALSE(value.get_size(0)==DOut);
+        GADGET_CHECK_RETURN_FALSE(value.get_size(1)==N);
+
+        if ( !residual.dimensions_equal(&value) )
+        {
+            residual.create(value.get_dimensions());
+            Gadgetron::clear(residual);
+        }
+
+        size_t sx = this->get_size(0);
+        size_t sy = this->get_size(1);
+        size_t sz = this->get_size(2);
+
+        /// following the definition of ref[2]
+        ho4DArray<T> dx(sx, sy, sz, DOut), ds(sx, sy, sz, DOut);
+        Gadgetron::clear(dx);
+        Gadgetron::clear(ds);
+
+        /// compute the current approximation values
+        ValueArrayType approxValue;
+        approxValue = value;
+
+        /// compute current residual
+        GADGET_CHECK_RETURN_FALSE(this->evaluateFFDArray(pos, approxValue));
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::subtract(value, approxValue, residual));
+
+        /// compute the update of control points
+        unsigned int d;
+
+        long long n;
+        for (n=0; n<N; n++)
+        {
+            coord_type px = pos(0, n);
+            coord_type py = pos(1, n);
+            coord_type pz = pos(2, n);
+
+            if ( px<-2 || px>sx+2
+                || py<-2 || py>sy+2
+                || pz<-2 || pz>sz+2 )
+            {
+                continue;
+            }
+
+            long long ix = (long long)std::floor(px);
+            CoordType deltaX = px-(CoordType)ix;
+
+            long long iy = (long long)std::floor(py);
+            CoordType deltaY = py-(CoordType)iy;
+
+            long long iz = (long long)std::floor(pz);
+            CoordType deltaZ = pz-(CoordType)iz;
+
+            long long i, j, k, I, J, K;
+
+            T dist=0, v, vv, vvv;
+            for (k=0; k<4; k++)
+            {
+                for (j=0; j<4; j++)
+                {
+                    for (i=0; i<4; i++)
+                    {
+                        v = this->BSpline(i, deltaX) * this->BSpline(j, deltaY) * this->BSpline(k, deltaZ);
+                        dist += v*v;
+                    }
+                }
+            }
+
+            for (k=0; k<4; k++)
+            {
+                K = k + iz - 1;
+                if ( (K>=0) && (K<(long long)sz) )
+                {
+                    for (j=0; j<4; j++)
+                    {
+                        J = j + iy - 1;
+                        if ( (J>=0) && (J<(long long)sy) )
+                        {
+                            for (i=0; i<4; i++)
+                            {
+                                I = i + ix - 1;
+                                if ( (I>=0) && (I<(long long)sx) )
+                                {
+                                    v = this->BSpline(i, deltaX) * this->BSpline(j, deltaY) * this->BSpline(k, deltaZ);
+                                    vv = v*v;
+                                    vvv = vv*v;
+
+                                    for ( d=0; d<DOut; d++ )
+                                    {
+                                        dx(I, J, K, d) += vvv*residual(d, n)/dist;
+                                        ds(I, J, K, d) += vv;
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        /// update the control point values
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::addEpsilon(ds));
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::divide(dx, ds, dx));
+
+        std::vector<size_t> startND(3, BSPLINEPADDINGSIZE), size(3);
+        size[0] = sx;
+        size[1] = sy;
+        size[2] = sz;
+
+        hoNDArray<T> ctrlPtWithoutPadding(sx, sy, sz);
+
+        for ( d=0; d<DOut; d++ )
+        {
+            hoNDArray<T> dx3D(sx, sy, sz, dx.begin()+d*sx*sy*sz*sizeof(T));
+
+            std::vector<size_t> dim;
+            this->ctrl_pt_[d].get_dimensions(dim);
+            hoNDArray<T> tmpCtrlPt(dim, this->ctrl_pt_[d].begin(), false);
+            Gadgetron::cropUpTo11DArray(tmpCtrlPt, ctrlPtWithoutPadding, startND, size);
+            Gadgetron::add(ctrlPtWithoutPadding, dx3D, ctrlPtWithoutPadding);
+            Gadgetron::setSubArrayUpTo11DArray(ctrlPtWithoutPadding, tmpCtrlPt, startND, size);
+        }
+
+        /// calculate residual error
+        totalResidual = 0;
+        GADGET_CHECK_RETURN_FALSE(this->evaluateFFDArray(pos, approxValue));
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::subtract(value, approxValue, residual));
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::norm2(residual, totalResidual));
+        totalResidual = totalResidual / (real_value_type)N;
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in ffdApprox(const CoordArrayType& pos, ValueArrayType& value, ValueArrayType& residual, real_value_type& totalResidual, size_t N) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+bool gtplusBSplineFFD3D<T, CoordType, DOut>::refine()
+{
+    try
+    {
+        size_t sx = this->get_size(0);
+        size_t sy = this->get_size(1);
+        size_t sz = this->get_size(2);
+
+        /// the refined control point grid definition
+
+        std::vector<size_t> dim(3);
+        dim[0] = 2*sx-1 + 2*BSPLINEPADDINGSIZE;
+        dim[1] = 2*sy-1 + 2*BSPLINEPADDINGSIZE;
+        dim[2] = 2*sz-1 + 2*BSPLINEPADDINGSIZE;
+
+        std::vector<coord_type> spacing;
+        this->get_spacing(spacing);
+        spacing[0] /= 2;
+        spacing[1] /= 2;
+        if ( sz > 1 ) spacing[2] /= 2;
+
+        std::vector<coord_type> oldOrigin;
+        this->ctrl_pt_[0].get_origin(oldOrigin);
+
+        std::vector<coord_type> gridOrigin(3);
+        this->ctrl_pt_[0].image_to_world( (CoordType)(BSPLINEPADDINGSIZE), (CoordType)(BSPLINEPADDINGSIZE), (CoordType)(BSPLINEPADDINGSIZE), gridOrigin[0], gridOrigin[1], gridOrigin[2]);
+
+        std::vector<coord_type> origin(3);
+        origin[0] = (oldOrigin[0] + gridOrigin[0])/2;
+        origin[1] = (oldOrigin[1] + gridOrigin[1])/2;
+        origin[2] = (oldOrigin[2] + gridOrigin[2])/2;
+
+        typename ImageType::axis_type axis;
+        this->ctrl_pt_[0].get_axis(axis);
+
+        /// allocate new control points
+        FFDCtrlPtGridType new_ctrl_pt[DOut];
+
+        unsigned int d;
+        for( d=0; d<DOut; d++ )
+        {
+            new_ctrl_pt[d].create(dim, spacing, origin, axis);
+            Gadgetron::clear(new_ctrl_pt[d]);
+        }
+
+        /// refinement weights, see ref[2]
+        T w[2][3];
+
+        w[0][0] = T(0.125); w[0][1] = T(0.75);  w[0][2] = T(0.125);
+        w[1][0] = 0;        w[1][1] = T(0.5);   w[1][2] = T(0.5);
+
+        /// compute refined control point values
+        int x, y, z, i_new, j_new, k_new, i_old, j_old, k_old;
+        if ( sz > 1 )
+        {
+            for (z=0; z<sz; z++)
+            {
+                for (y=0; y<sy; y++)
+                {
+                    for (x=0; x<sx; x++)
+                    {
+                        for (k_new=0; k_new<2; k_new++)
+                        {
+                            for (j_new=0; j_new<2; j_new++)
+                            {
+                                for (i_new=0; i_new<2; i_new++)
+                                {
+                                    size_t offsetNew = new_ctrl_pt[0].calculate_offset(2*x+i_new+BSPLINEPADDINGSIZE, 2*y+j_new+BSPLINEPADDINGSIZE, 2*z+k_new+BSPLINEPADDINGSIZE);
+
+                                    for (k_old=0; k_old<3; k_old++)
+                                    {
+                                        for (j_old=0; j_old<3; j_old++)
+                                        {
+                                            for (i_old=0; i_old<3; i_old++)
+                                            {
+                                                size_t offsetOld = this->calculate_offset(x+i_old-1, y+j_old-1, z+k_old-1);
+                                                for ( d=0; d<DOut; d++ )
+                                                {
+                                                    new_ctrl_pt[d](offsetNew) += w[i_new][i_old]*w[j_new][j_old]* w[k_new][k_old] * this->ctrl_pt_[d](offsetOld);
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        else
+        {
+            for (y=0; y<sy; y++)
+            {
+                for (x=0; x<sx; x++)
+                {
+                    for (j_new=0; j_new<2; j_new++)
+                    {
+                        for (i_new=0; i_new<2; i_new++)
+                        {
+                            size_t offsetNew = new_ctrl_pt[0].calculate_offset(2*x+i_new+BSPLINEPADDINGSIZE, 2*y+j_new+BSPLINEPADDINGSIZE, BSPLINEPADDINGSIZE);
+
+                            for (j_old=0; j_old<3; j_old++)
+                            {
+                                for (i_old=0; i_old<3; i_old++)
+                                {
+                                    size_t offsetOld = this->calculate_offset(x+i_old-1, y+j_old-1, 0);
+                                    for ( d=0; d<DOut; d++ )
+                                    {
+                                        new_ctrl_pt[d](offsetNew) += w[i_new][i_old]*w[j_new][j_old] * this->ctrl_pt_[d](offsetOld);
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        for ( d=0; d<DOut; d++ )
+        {
+            this->ctrl_pt_[d].create(dim, spacing, origin, axis, new_ctrl_pt[d].begin(), true);
+            new_ctrl_pt[d].delete_data_on_destruct(false);
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in refine() ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+void gtplusBSplineFFD3D<T, CoordType, DOut>::print(std::ostream& os) const
+{
+    using namespace std;
+
+    os << "---------------------- GTPlus BSpline 3D Free Form Deformation ------------------" << endl;
+    os << "Implement 3D BSpline Free Form Deformation (BFFD) " << endl;
+
+    std::string elemTypeName = std::string( typeid(T).name() );
+    os << "FFD value type is : " << elemTypeName << endl;
+
+    elemTypeName = std::string( typeid(CoordType).name() );
+    os << "FFD coord type is : " << elemTypeName << endl;
+
+    os << "Output dimension is : " << DOut << endl;
+    os << "---------------------------------------------------" << endl;
+    os << "BFFD grid information : " << endl;
+    this->ctrl_pt_[0].printContent(os);
+    os << "---------------------------------------------------------------------------------" << endl;
+}
+
+}}
diff --git a/toolboxes/gtplus/algorithm/FreeFormDeformation/gtplusBSplineFFD4D.h b/toolboxes/gtplus/algorithm/FreeFormDeformation/gtplusBSplineFFD4D.h
new file mode 100644
index 0000000..017d792
--- /dev/null
+++ b/toolboxes/gtplus/algorithm/FreeFormDeformation/gtplusBSplineFFD4D.h
@@ -0,0 +1,905 @@
+/** \file       gtplusBSplineFFD4D.h
+    \brief      Implement gtPlus 2D BSpline FreeFormDeformation
+    \author     Hui Xue
+*/
+
+#pragma once
+
+#include "gtplusFFDBase.h"
+
+namespace Gadgetron { namespace gtPlus {
+
+template <typename T, typename CoordType, unsigned int DOut>
+class gtplusBSplineFFD4D : public gtplusBSplineFFD<T, CoordType, 4, DOut>
+{
+public:
+
+    typedef gtplusBSplineFFD<T, CoordType, 4, DOut> BaseClass;
+    typedef gtplusBSplineFFD4D<T, CoordType, DOut> Self;
+
+    typedef typename BaseClass::real_value_type real_value_type;
+    typedef real_value_type bspline_float_type;
+
+    typedef typename BaseClass::coord_type coord_type;
+
+    enum { D = 4 };
+    using BaseClass::BSPLINELUTSIZE;
+    using BaseClass::BSPLINEPADDINGSIZE;
+
+    typedef typename BaseClass::LUTType             LUTType;
+    typedef typename BaseClass::CoordArrayType      CoordArrayType;
+    typedef typename BaseClass::ValueArrayType      ValueArrayType;
+    typedef typename BaseClass::ArrayType           ArrayType;
+    typedef typename BaseClass::FFDCtrlPtGridType   FFDCtrlPtGridType;
+    typedef typename BaseClass::PointType           PointType;
+    typedef typename BaseClass::ImageType           ImageType;
+
+    /// constructors
+    gtplusBSplineFFD4D();
+    /// define the FFD over a region with specific control point spacing
+    gtplusBSplineFFD4D(const PointType& start, const PointType& end, CoordType dx, CoordType dy, CoordType dz, CoordType ds);
+    /// define the FFD over the image region with specific control point spacing
+    gtplusBSplineFFD4D(const ImageType& im, CoordType dx, CoordType dy, CoordType dz, CoordType ds);
+    /// define the FFD over the image region with specific number of control points
+    gtplusBSplineFFD4D(const ImageType& im, size_t sx, size_t sy, size_t sz, size_t ss);
+    /// define the FFD over an array region with specific number of control points
+    gtplusBSplineFFD4D(const ArrayType& a, size_t sx, size_t sy, size_t sz, size_t ss);
+    /// copy constructor
+    gtplusBSplineFFD4D(const Self& bffd);
+
+    virtual ~gtplusBSplineFFD4D();
+
+    /// evaluate the FFD at a grid location
+    virtual bool evaluateFFD(const CoordType pt[D], T r[DOut]) const;
+    virtual bool evaluateFFD(CoordType px, CoordType py, CoordType pz, CoordType ps, T r[DOut]) const;
+
+    virtual bool evaluateFFDDX(const CoordType pt[D], T dx[DOut]) const;
+    virtual bool evaluateFFDDY(const CoordType pt[D], T dy[DOut]) const;
+    virtual bool evaluateFFDDZ(const CoordType pt[D], T dz[DOut]) const;
+    virtual bool evaluateFFDDS(const CoordType pt[D], T ds[DOut]) const;
+
+    virtual bool evaluateWorldDX(const CoordType pt[D], T dx[DOut]) const;
+    virtual bool evaluateWorldDY(const CoordType pt[D], T dy[DOut]) const;
+    virtual bool evaluateWorldDZ(const CoordType pt[D], T dz[DOut]) const;
+    virtual bool evaluateWorldDS(const CoordType pt[D], T ds[DOut]) const;
+
+    /// evaluate the 1st order derivative of FFD at a grid location
+    virtual bool evaluateFFDDerivative(const CoordType pt[D], T deriv[D][DOut]) const;
+    virtual bool evaluateFFDDerivative(CoordType px, CoordType py, CoordType pz, CoordType ps, T deriv[D][DOut]) const;
+
+    /// evaluate the 2nd order derivative of FFD at a grid location
+    /// dderiv : D*D vector, stores dxx dxy dxz ...; dyx dyy dyz ...; dzx dzy dzz ...
+    virtual bool evaluateFFDSecondOrderDerivative(const CoordType pt[D], T dderiv[D*D][DOut]) const;
+    virtual bool evaluateFFDSecondOrderDerivative(CoordType px, CoordType py, CoordType pz, CoordType ps, T dderiv[D*D][DOut]) const;
+
+    /// compute the FFD approximation once
+    /// pos : the position of input points, D by N
+    /// value : the value on input points, DOut by N
+    /// residual : the approximation residual after computing FFD, DOut by N
+    virtual bool ffdApprox(const CoordArrayType& pos, ValueArrayType& value, ValueArrayType& residual, real_value_type& totalResidual, size_t N);
+
+    /// As suggested in ref [2], the BSpline FFD can be refined to achieve better approximation
+    virtual bool refine();
+
+    /// general print function
+    virtual void print(std::ostream& os) const;
+
+    using BaseClass::performTiming_;
+    using BaseClass::debugFolder_;
+
+protected:
+
+    using BaseClass::ctrl_pt_;
+    using BaseClass::gt_timer1_;
+    using BaseClass::gt_timer2_;
+    using BaseClass::gt_timer3_;
+    using BaseClass::gt_exporter_;
+    using BaseClass::gtPlus_util_;
+    using BaseClass::gtPlus_util_complex_;
+    using BaseClass::LUT_;
+    using BaseClass::LUT1_;
+    using BaseClass::LUT2_;
+
+    /// evaluate the FFD
+    /// px and py are at FFD grid
+    /// ordx, ordy indicates the order of derivative; 0/1/2 for 0/1st/2nd derivative
+    virtual bool evaluateFFD4D(CoordType px, CoordType py, CoordType pz, CoordType ps, size_t ordx, size_t ordy, size_t ordz, size_t ords, T r[DOut]) const;
+};
+
+template <typename T, typename CoordType, unsigned int DOut>
+gtplusBSplineFFD4D<T, CoordType, DOut>::gtplusBSplineFFD4D() : BaseClass()
+{
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+gtplusBSplineFFD4D<T, CoordType, DOut>::gtplusBSplineFFD4D(const PointType& start, const PointType& end, CoordType dx, CoordType dy, CoordType dz, CoordType ds) : BaseClass()
+{
+    GADGET_CHECK_THROW(this->initializeBFFD(start, end, dx, dy, dz, ds));
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+gtplusBSplineFFD4D<T, CoordType, DOut>::gtplusBSplineFFD4D(const ImageType& im, CoordType dx, CoordType dy, CoordType dz, CoordType ds) : BaseClass()
+{
+    PointType start, end;
+
+    typename ImageType::coord_type x, y, z, s;
+
+    im.image_to_world(0, 0, 0, 0, x, y, z, s);
+    start(0) = x;
+    start(1) = y;
+    start(2) = z;
+    start(3) = s;
+
+    im.image_to_world(im.get_size(0)-1, im.get_size(1)-1, im.get_size(2)-1, im.get_size(3)-1, x, y, z, s);
+    end(0) = x;
+    end(1) = y;
+    end(2) = z;
+    end(3) = s;
+
+    GADGET_CHECK_THROW(this->initializeBFFD(im, start, end, dx, dy, dz, ds));
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+gtplusBSplineFFD4D<T, CoordType, DOut>::gtplusBSplineFFD4D(const ImageType& im, size_t sx, size_t sy, size_t sz, size_t ss) : BaseClass()
+{
+    PointType start, end;
+
+    typename ImageType::coord_type x, y, z, s;
+
+    im.image_to_world( (size_t)0, (size_t)0, (size_t)0, (size_t)0, x, y, z, s);
+    start(0) = x;
+    start(1) = y;
+    start(2) = z;
+    start(3) = s;
+
+    im.image_to_world(im.get_size(0)-1, im.get_size(1)-1, im.get_size(2)-1, im.get_size(3)-1, x, y, z, s);
+    end(0) = x;
+    end(1) = y;
+    end(2) = z;
+    end(3) = s;
+
+    GADGET_CHECK_THROW(this->initializeBFFD(im, start, end, sx, sy, sz, ss));
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+gtplusBSplineFFD4D<T, CoordType, DOut>::gtplusBSplineFFD4D(const ArrayType& a, size_t sx, size_t sy, size_t sz, size_t ss) : BaseClass()
+{
+    PointType start, end;
+
+    start(0) = 0;
+    start(1) = 0;
+    start(2) = 0;
+    start(3) = 0;
+
+    end(0) = a.get_size(0)-1;
+    end(1) = a.get_size(1)-1;
+    end(2) = a.get_size(2)-1;
+    end(3) = a.get_size(3)-1;
+
+    GADGET_CHECK_THROW(this->initializeBFFD(start, end, sx, sy, sz, ss));
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+gtplusBSplineFFD4D<T, CoordType, DOut>::gtplusBSplineFFD4D(const Self& bffd) : BaseClass()
+{
+    unsigned int d;
+    for ( d=0; d<DOut; d++ )
+    {
+        this->ctrl_pt_[d].copyFrom( bffd.get_ctrl_pt(d) );
+    }
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+gtplusBSplineFFD4D<T, CoordType, DOut>::~gtplusBSplineFFD4D()
+{
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+bool gtplusBSplineFFD4D<T, CoordType, DOut>::evaluateFFD4D(CoordType px, CoordType py, CoordType pz, CoordType ps, size_t ordx, size_t ordy, size_t ordz, size_t ords, T r[DOut]) const
+{
+    try
+    {
+        GADGET_DEBUG_CHECK_RETURN_FALSE( (px>=-2) && (px<=this->get_size(0)+1) );
+        GADGET_DEBUG_CHECK_RETURN_FALSE( (py>=-2) && (py<=this->get_size(1)+1) );
+        GADGET_DEBUG_CHECK_RETURN_FALSE( (pz>=-2) && (pz<=this->get_size(2)+1) );
+        GADGET_DEBUG_CHECK_RETURN_FALSE( (ps>=-2) && (ps<=this->get_size(3)+1) );
+
+        GADGET_DEBUG_CHECK_RETURN_FALSE(ordX>=0 && ordX<=2);
+        GADGET_DEBUG_CHECK_RETURN_FALSE(ordY>=0 && ordY<=2);
+        GADGET_DEBUG_CHECK_RETURN_FALSE(ordZ>=0 && ordZ<=2);
+        GADGET_DEBUG_CHECK_RETURN_FALSE(ordS>=0 && ordS<=2);
+        GADGET_DEBUG_CHECK_RETURN_FALSE(ordX+ordY+ordZ+ordS<=2);
+
+        long long ix = (long long)std::floor(px);
+        CoordType deltaX = px-(CoordType)ix;
+        long long lx = GT_MKINT(BSPLINELUTSIZE*deltaX);
+
+        long long iy = (long long)std::floor(py);
+        CoordType deltaY = py-(CoordType)iy;
+        long long ly = GT_MKINT(BSPLINELUTSIZE*deltaY);
+
+        long long iz = (long long)std::floor(pz);
+        CoordType deltaZ = pz-(CoordType)iz;
+        long long lz = GT_MKINT(BSPLINELUTSIZE*deltaZ);
+
+        long long is = (long long)std::floor(ps);
+        CoordType deltaS = ps-(CoordType)is;
+        long long ls = GT_MKINT(BSPLINELUTSIZE*deltaS);
+
+        unsigned int d, jj, kk, ss;
+        size_t offset[4][4][4]; // s, z, y
+
+        for ( ss=0; ss<4; ss++ )
+        {
+            offset[ss][0][0] = this->calculate_offset(ix-1, iy-1, iz-1, is+ss-1);
+            offset[ss][0][1] = this->calculate_offset(ix-1, iy  , iz-1, is+ss-1);
+            offset[ss][0][2] = this->calculate_offset(ix-1, iy+1, iz-1, is+ss-1);
+            offset[ss][0][3] = this->calculate_offset(ix-1, iy+2, iz-1, is+ss-1);
+
+            offset[ss][1][0] = this->calculate_offset(ix-1, iy-1, iz, is+ss-1);
+            offset[ss][1][1] = this->calculate_offset(ix-1, iy  , iz, is+ss-1);
+            offset[ss][1][2] = this->calculate_offset(ix-1, iy+1, iz, is+ss-1);
+            offset[ss][1][3] = this->calculate_offset(ix-1, iy+2, iz, is+ss-1);
+
+            offset[ss][2][0] = this->calculate_offset(ix-1, iy-1, iz+1, is+ss-1);
+            offset[ss][2][1] = this->calculate_offset(ix-1, iy  , iz+1, is+ss-1);
+            offset[ss][2][2] = this->calculate_offset(ix-1, iy+1, iz+1, is+ss-1);
+            offset[ss][2][3] = this->calculate_offset(ix-1, iy+2, iz+1, is+ss-1);
+
+            offset[ss][3][0] = this->calculate_offset(ix-1, iy-1, iz+2, is+ss-1);
+            offset[ss][3][1] = this->calculate_offset(ix-1, iy  , iz+2, is+ss-1);
+            offset[ss][3][2] = this->calculate_offset(ix-1, iy+1, iz+2, is+ss-1);
+            offset[ss][3][3] = this->calculate_offset(ix-1, iy+2, iz+2, is+ss-1);
+        }
+
+        const LUTType* p_xLUT= &this->LUT_;
+        const LUTType* p_yLUT= &this->LUT_;
+        const LUTType* p_zLUT= &this->LUT_;
+        const LUTType* p_sLUT= &this->LUT_;
+
+        if ( ordx == 1 )
+        {
+            p_xLUT= &this->LUT1_;
+        }
+        else if ( ordx == 2 )
+        {
+            p_xLUT= &this->LUT2_;
+        }
+
+        if ( ordy == 1 )
+        {
+            p_yLUT= &this->LUT1_;
+        }
+        else if ( ordy == 2 )
+        {
+            p_yLUT= &this->LUT2_;
+        }
+
+        if ( ordz == 1 )
+        {
+            p_zLUT= &this->LUT1_;
+        }
+        else if ( ordz == 2 )
+        {
+            p_zLUT= &this->LUT2_;
+        }
+
+        if ( ords == 1 )
+        {
+            p_sLUT= &this->LUT1_;
+        }
+        else if ( ords == 2 )
+        {
+            p_sLUT= &this->LUT2_;
+        }
+
+        const LUTType& xLUT= *p_xLUT;
+        const LUTType& yLUT= *p_yLUT;
+        const LUTType& zLUT= *p_zLUT;
+        const LUTType& sLUT= *p_sLUT;
+
+        for ( d=0; d<DOut; d++ )
+        {
+            r[d] = 0;
+            for (ss=0; ss<4; ss++)
+            {
+                T rs=0;
+                for (kk=0; kk<4; kk++)
+                {
+                    T rv = 0;
+                    for (jj=0; jj<4; jj++)
+                    {
+                        T v  =  ( this->ctrl_pt_[d](offset[ss][kk][jj])   * xLUT[lx][0] )
+                            + ( this->ctrl_pt_[d](offset[ss][kk][jj]+1) * xLUT[lx][1] )
+                            + ( this->ctrl_pt_[d](offset[ss][kk][jj]+2) * xLUT[lx][2] )
+                            + ( this->ctrl_pt_[d](offset[ss][kk][jj]+3) * xLUT[lx][3] );
+
+                        rv += v * yLUT[ly][jj];
+                    }
+
+                    rs += rv * zLUT[lz][kk];
+                }
+
+                r[d] += rs * sLUT[ls][ss];
+            }
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in evaluateFFD4D(CoordType px, CoordType py, CoordType pz, CoordType ps, size_t ordx, size_t ordy, size_t ordz, size_t ords, T r[DOut]) const ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+inline bool gtplusBSplineFFD4D<T, CoordType, DOut>::evaluateFFD(const CoordType pt[D], T r[DOut]) const
+{
+    return this->evaluateFFD4D(pt[0], pt[1], pt[2], pt[3], 0, 0, 0, 0, r);
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+inline bool gtplusBSplineFFD4D<T, CoordType, DOut>::evaluateFFD(CoordType px, CoordType py, CoordType pz, CoordType ps, T r[DOut]) const
+{
+    return this->evaluateFFD4D(px, py, pz, ps, 0, 0, 0, 0, r);
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+inline bool gtplusBSplineFFD4D<T, CoordType, DOut>::evaluateFFDDX(const CoordType pt[D], T dx[DOut]) const
+{
+    return this->evaluateFFD4D(pt[0], pt[1], pt[2], pt[3], 1, 0, 0, 0, dx);
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+inline bool gtplusBSplineFFD4D<T, CoordType, DOut>::evaluateFFDDY(const CoordType pt[D], T dy[DOut]) const
+{
+    return this->evaluateFFD4D(pt[0], pt[1], pt[2], pt[3], 0, 1, 0, 0, dy);
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+inline bool gtplusBSplineFFD4D<T, CoordType, DOut>::evaluateFFDDZ(const CoordType pt[D], T dz[DOut]) const
+{
+    return this->evaluateFFD4D(pt[0], pt[1], pt[2], pt[3], 0, 0, 1, 0, dz);
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+inline bool gtplusBSplineFFD4D<T, CoordType, DOut>::evaluateFFDDS(const CoordType pt[D], T ds[DOut]) const
+{
+    return this->evaluateFFD4D(pt[0], pt[1], pt[2], pt[3], 0, 0, 0, 1, ds);
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+inline bool gtplusBSplineFFD4D<T, CoordType, DOut>::evaluateWorldDX(const CoordType pt[D], T dx[DOut]) const
+{
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD4D(pt[0], pt[1], pt[2], pt[3], 1, 0, 0, 0, dx));
+    coord_type sx = coord_type(1.0)/this->get_spacing(0);
+    unsigned int d;
+    for ( d=0; d<DOut; d++ )
+    {
+        dx[d] *= sx;
+    }
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+inline bool gtplusBSplineFFD4D<T, CoordType, DOut>::evaluateWorldDY(const CoordType pt[D], T dy[DOut]) const
+{
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD4D(pt[0], pt[1], pt[2], pt[3], 0, 1, 0, 0, dy));
+    coord_type sy = coord_type(1.0)/this->get_spacing(1);
+    unsigned int d;
+    for ( d=0; d<DOut; d++ )
+    {
+        dy[d] *= sy;
+    }
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+inline bool gtplusBSplineFFD4D<T, CoordType, DOut>::evaluateWorldDZ(const CoordType pt[D], T dz[DOut]) const
+{
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD4D(pt[0], pt[1], pt[2], pt[3], 0, 0, 1, 0, dz));
+    coord_type sz = coord_type(1.0)/this->get_spacing(2);
+    unsigned int d;
+    for ( d=0; d<DOut; d++ )
+    {
+        dz[d] *= sz;
+    }
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+inline bool gtplusBSplineFFD4D<T, CoordType, DOut>::evaluateWorldDS(const CoordType pt[D], T ds[DOut]) const
+{
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD4D(pt[0], pt[1], pt[2], pt[3], 0, 0, 0, 1, ds));
+    coord_type ss = coord_type(1.0)/this->get_spacing(3);
+    unsigned int d;
+    for ( d=0; d<DOut; d++ )
+    {
+        ds[d] *= ss;
+    }
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+inline bool gtplusBSplineFFD4D<T, CoordType, DOut>::evaluateFFDDerivative(const CoordType pt[D], T deriv[D][DOut]) const
+{
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD4D(pt[0], pt[1], pt[2], pt[3], 1, 0, 0, 0, deriv[0]));
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD4D(pt[0], pt[1], pt[2], pt[3], 0, 1, 0, 0, deriv[1]));
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD4D(pt[0], pt[1], pt[2], pt[3], 0, 0, 1, 0, deriv[2]));
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD4D(pt[0], pt[1], pt[2], pt[3], 0, 0, 0, 1, deriv[3]));
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+inline bool gtplusBSplineFFD4D<T, CoordType, DOut>::evaluateFFDDerivative(CoordType px, CoordType py, CoordType pz, CoordType ps, T deriv[D][DOut]) const
+{
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD4D(px, py, pz, ps, 1, 0, 0, 0, deriv[0]));
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD4D(px, py, pz, ps, 0, 1, 0, 0, deriv[1]));
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD4D(px, py, pz, ps, 0, 0, 1, 0, deriv[2]));
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD4D(px, py, pz, ps, 0, 0, 0, 1, deriv[3]));
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+inline bool gtplusBSplineFFD4D<T, CoordType, DOut>::evaluateFFDSecondOrderDerivative(const CoordType pt[D], T dderiv[D*D][DOut]) const
+{
+    // dxx
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD4D(pt[0], pt[1], pt[2], pt[3], 2, 0, 0, 0, dderiv[0]));
+    // dxy
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD4D(pt[0], pt[1], pt[2], pt[3], 1, 1, 0, 0, dderiv[1]));
+    // dxz
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD4D(pt[0], pt[1], pt[2], pt[3], 1, 0, 1, 0, dderiv[2]));
+    // dxs
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD4D(pt[0], pt[1], pt[2], pt[3], 1, 0, 0, 1, dderiv[3]));
+
+    // dyx
+    memcpy(dderiv[4], dderiv[1], DOut*sizeof(T));
+    // dyy
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD4D(pt[0], pt[1], pt[2], pt[3], 0, 2, 0, 0, dderiv[5]));
+    // dyz
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD4D(pt[0], pt[1], pt[2], pt[3], 0, 1, 1, 0, dderiv[6]));
+    // dys
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD4D(pt[0], pt[1], pt[2], pt[3], 0, 1, 0, 1, dderiv[7]));
+
+    // dzx
+    memcpy(dderiv[8], dderiv[2], DOut*sizeof(T));
+    // dzy
+    memcpy(dderiv[9], dderiv[6], DOut*sizeof(T));
+    // dzz
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD4D(pt[0], pt[1], pt[2], pt[3], 0, 0, 2, 0, dderiv[10]));
+    // dzs
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD4D(pt[0], pt[1], pt[2], pt[3], 0, 0, 1, 1, dderiv[11]));
+
+    // dsx
+    memcpy(dderiv[12], dderiv[3], DOut*sizeof(T));
+    // dsy
+    memcpy(dderiv[13], dderiv[7], DOut*sizeof(T));
+    // dsz
+    memcpy(dderiv[14], dderiv[11], DOut*sizeof(T));
+    // dss
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD4D(pt[0], pt[1], pt[2], pt[3], 0, 0, 0, 2, dderiv[15]));
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+inline bool gtplusBSplineFFD4D<T, CoordType, DOut>::evaluateFFDSecondOrderDerivative(CoordType px, CoordType py, CoordType pz, CoordType ps, T dderiv[D*D][DOut]) const
+{
+    // dxx
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD4D(px, py, pz, ps, 2, 0, 0, 0, dderiv[0]));
+    // dxy
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD4D(px, py, pz, ps, 1, 1, 0, 0, dderiv[1]));
+    // dxz
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD4D(px, py, pz, ps, 1, 0, 1, 0, dderiv[2]));
+    // dxs
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD4D(px, py, pz, ps, 1, 0, 0, 1, dderiv[3]));
+
+    // dyx
+    memcpy(dderiv[4], dderiv[1], DOut*sizeof(T));
+    // dyy
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD4D(px, py, pz, ps, 0, 2, 0, 0, dderiv[5]));
+    // dyz
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD4D(px, py, pz, ps, 0, 1, 1, 0, dderiv[6]));
+    // dys
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD4D(px, py, pz, ps, 0, 1, 0, 1, dderiv[7]));
+
+    // dzx
+    memcpy(dderiv[8], dderiv[2], DOut*sizeof(T));
+    // dzy
+    memcpy(dderiv[9], dderiv[6], DOut*sizeof(T));
+    // dzz
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD4D(px, py, pz, ps, 0, 0, 2, 0, dderiv[10]));
+    // dzs
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD4D(px, py, pz, ps, 0, 0, 1, 1, dderiv[11]));
+
+    // dsx
+    memcpy(dderiv[12], dderiv[3], DOut*sizeof(T));
+    // dsy
+    memcpy(dderiv[13], dderiv[7], DOut*sizeof(T));
+    // dsz
+    memcpy(dderiv[14], dderiv[11], DOut*sizeof(T));
+    // dss
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD4D(px, py, pz, ps, 0, 0, 0, 2, dderiv[15]));
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+bool gtplusBSplineFFD4D<T, CoordType, DOut>::ffdApprox(const CoordArrayType& pos, ValueArrayType& value, ValueArrayType& residual, real_value_type& totalResidual, size_t N)
+{
+    try
+    {
+        GADGET_CHECK_RETURN_FALSE(pos.get_size(0)==D);
+        GADGET_CHECK_RETURN_FALSE(pos.get_size(1)==N);
+
+        GADGET_CHECK_RETURN_FALSE(value.get_size(0)==DOut);
+        GADGET_CHECK_RETURN_FALSE(value.get_size(1)==N);
+
+        std::vector<size_t> dim;
+        value.get_dimensions(dim);
+        if ( !residual.dimensions_equal(&dim) )
+        {
+            residual.create(value.get_dimensions());
+            Gadgetron::clear(residual);
+        }
+
+        size_t sx = this->get_size(0);
+        size_t sy = this->get_size(1);
+        size_t sz = this->get_size(2);
+        size_t ss = this->get_size(3);
+
+        /// following the definition of ref[2]
+        ho5DArray<T> dx(sx, sy, sz, ss, DOut), ds(sx, sy, sz, ss, DOut);
+        Gadgetron::clear(dx);
+        Gadgetron::clear(ds);
+
+        /// compute the current approximation values
+        ValueArrayType approxValue;
+        approxValue = value;
+
+        /// compute current residual
+        GADGET_CHECK_RETURN_FALSE(this->evaluateFFDArray(pos, approxValue));
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::subtract(value, approxValue, residual));
+
+        /// compute the update of control points
+        unsigned int d;
+
+        long long n;
+        for (n=0; n<N; n++)
+        {
+            coord_type px = pos(0, n);
+            coord_type py = pos(1, n);
+            coord_type pz = pos(2, n);
+            coord_type ps = pos(3, n);
+
+            if ( px<-2 || px>sx+2
+                || py<-2 || py>sy+2
+                || pz<-2 || pz>sz+2
+                || ps<-2 || ps>ss+2 )
+            {
+                continue;
+            }
+
+            long long ix = (long long)std::floor(px);
+            CoordType deltaX = px-(CoordType)ix;
+
+            long long iy = (long long)std::floor(py);
+            CoordType deltaY = py-(CoordType)iy;
+
+            long long iz = (long long)std::floor(pz);
+            CoordType deltaZ = pz-(CoordType)iz;
+
+            long long is = (long long)std::floor(ps);
+            CoordType deltaS = ps-(CoordType)is;
+
+            long long i, j, k, s, I, J, K, S;
+
+            T dist=0, v, vv, vvv;
+            for (s=0; s<4; s++)
+            {
+                for (k=0; k<4; k++)
+                {
+                    for (j=0; j<4; j++)
+                    {
+                        for (i=0; i<4; i++)
+                        {
+                            v = (this->BSpline(i, deltaX) * this->BSpline(j, deltaY)) * (this->BSpline(k, deltaZ) * this->BSpline(s, deltaS));
+                            dist += v*v;
+                        }
+                    }
+                }
+            }
+
+            for (s=0; s<4; s++)
+            {
+                S = s + is - 1;
+                if ( (S>=0) && (S<(long long)ss) )
+                {
+                    for (k=0; k<4; k++)
+                    {
+                        K = k + iz - 1;
+                        if ( (K>=0) && (K<(long long)sz) )
+                        {
+                            for (j=0; j<4; j++)
+                            {
+                                J = j + iy - 1;
+                                if ( (J>=0) && (J<(long long)sy) )
+                                {
+                                    for (i=0; i<4; i++)
+                                    {
+                                        I = i + ix - 1;
+                                        if ( (I>=0) && (I<(long long)sx) )
+                                        {
+                                            v = this->BSpline(i, deltaX) * this->BSpline(j, deltaY) * this->BSpline(k, deltaZ) * this->BSpline(s, deltaS);
+                                            vv = v*v;
+                                            vvv = vv*v;
+
+                                            for ( d=0; d<DOut; d++ )
+                                            {
+                                                dx(I, J, K, S, d) += vvv*residual(d, n)/dist;
+                                                ds(I, J, K, S, d) += vv;
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        /// update the control point values
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::addEpsilon(ds));
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::divide(dx, ds, dx));
+
+        std::vector<size_t> startND(4, BSPLINEPADDINGSIZE), size(4);
+        size[0] = sx;
+        size[1] = sy;
+        size[2] = sz;
+        size[3] = ss;
+
+        hoNDArray<T> ctrlPtWithoutPadding(sx, sy, sz, ss);
+
+        for ( d=0; d<DOut; d++ )
+        {
+            hoNDArray<T> dx4D(sx, sy, sz, ss, dx.begin()+d*sx*sy*sz*ss*sizeof(T));
+
+            std::vector<size_t> dim;
+            this->ctrl_pt_[d].get_dimensions(dim);
+            hoNDArray<T> tmpCtrlPt(dim, this->ctrl_pt_[d].begin(), false);
+            Gadgetron::cropUpTo11DArray(tmpCtrlPt, ctrlPtWithoutPadding, startND, size);
+            Gadgetron::add(ctrlPtWithoutPadding, dx4D, ctrlPtWithoutPadding);
+            Gadgetron::setSubArrayUpTo11DArray(ctrlPtWithoutPadding, tmpCtrlPt, startND, size);
+        }
+
+        /// calculate residual error
+        totalResidual = 0;
+        GADGET_CHECK_RETURN_FALSE(this->evaluateFFDArray(pos, approxValue));
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::subtract(value, approxValue, residual));
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::norm2(residual, totalResidual));
+        totalResidual = totalResidual / (real_value_type)N;
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in ffdApprox(const CoordArrayType& pos, ValueArrayType& value, ValueArrayType& residual, real_value_type& totalResidual, size_t N) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+bool gtplusBSplineFFD4D<T, CoordType, DOut>::refine()
+{
+    try
+    {
+        size_t sx = this->get_size(0);
+        size_t sy = this->get_size(1);
+        size_t sz = this->get_size(2);
+        size_t ss = this->get_size(3);
+
+        /// the refined control point grid definition
+
+        std::vector<size_t> dim(4);
+        dim[0] = 2*sx-1 + 2*BSPLINEPADDINGSIZE;
+        dim[1] = 2*sy-1 + 2*BSPLINEPADDINGSIZE;
+        dim[2] = 2*sz-1 + 2*BSPLINEPADDINGSIZE;
+        dim[3] = 2*ss-1 + 2*BSPLINEPADDINGSIZE;
+
+        std::vector<coord_type> spacing;
+        this->get_spacing(spacing);
+        spacing[0] /= 2;
+        spacing[1] /= 2;
+        if ( sz > 1 ) spacing[2] /= 2;
+        if ( ss > 1 ) spacing[3] /= 2;
+
+        std::vector<coord_type> oldOrigin;
+        this->ctrl_pt_[0].get_origin(oldOrigin);
+
+        std::vector<coord_type> gridOrigin(4);
+        this->ctrl_pt_[0].image_to_world( (CoordType)(BSPLINEPADDINGSIZE),
+                                          (CoordType)(BSPLINEPADDINGSIZE),
+                                          (CoordType)(BSPLINEPADDINGSIZE),
+                                          (CoordType)(BSPLINEPADDINGSIZE),
+                                          gridOrigin[0], gridOrigin[1],
+                                          gridOrigin[2], gridOrigin[3]);
+
+        std::vector<coord_type> origin(4);
+        origin[0] = (oldOrigin[0] + gridOrigin[0])/2;
+        origin[1] = (oldOrigin[1] + gridOrigin[1])/2;
+        origin[2] = (oldOrigin[2] + gridOrigin[2])/2;
+        origin[3] = (oldOrigin[3] + gridOrigin[3])/2;
+
+        typename ImageType::axis_type axis;
+        this->ctrl_pt_[0].get_axis(axis);
+
+        /// allocate new control points
+        FFDCtrlPtGridType new_ctrl_pt[DOut];
+
+        unsigned int d;
+        for( d=0; d<DOut; d++ )
+        {
+            new_ctrl_pt[d].create(dim, spacing, origin, axis);
+            Gadgetron::clear(new_ctrl_pt[d]);
+        }
+
+        /// refinement weights, see ref[2]
+        T w[2][3];
+
+        w[0][0] = T(0.125); w[0][1] = T(0.75);  w[0][2] = T(0.125);
+        w[1][0] = 0;        w[1][1] = T(0.5);   w[1][2] = T(0.5);
+
+        /// compute refined control point values
+        int x, y, z, s, i_new, j_new, k_new, s_new, i_old, j_old, k_old, s_old;
+
+        if ( ss>1 && sz>1 )
+        {
+            for (s=0; s<ss; s++)
+            {
+                for (z=0; z<sz; z++)
+                {
+                    for (y=0; y<sy; y++)
+                    {
+                        for (x=0; x<sx; x++)
+                        {
+                            for (s_new=0; s_new<2; s_new++)
+                            {
+                                for (k_new=0; k_new<2; k_new++)
+                                {
+                                    for (j_new=0; j_new<2; j_new++)
+                                    {
+                                        for (i_new=0; i_new<2; i_new++)
+                                        {
+                                            size_t offsetNew = new_ctrl_pt[0].calculate_offset(2*x+i_new+BSPLINEPADDINGSIZE, 2*y+j_new+BSPLINEPADDINGSIZE, 2*z+k_new+BSPLINEPADDINGSIZE, 2*s+s_new+BSPLINEPADDINGSIZE);
+
+                                            for (s_old=0; s_old<3; s_old++)
+                                            {
+                                                for (k_old=0; k_old<3; k_old++)
+                                                {
+                                                    for (j_old=0; j_old<3; j_old++)
+                                                    {
+                                                        for (i_old=0; i_old<3; i_old++)
+                                                        {
+                                                            size_t offsetOld = this->calculate_offset(x+i_old-1, y+j_old-1, z+k_old-1, s+s_old-1);
+                                                            for ( d=0; d<DOut; d++ )
+                                                            {
+                                                                new_ctrl_pt[d](offsetNew) += w[i_new][i_old]*w[j_new][j_old]*w[k_new][k_old]*w[s_new][s_old] * this->ctrl_pt_[d](offsetOld);
+                                                            }
+                                                        }
+                                                    }
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        else if ( ss==1 && sz>1 )
+        {
+            for (z=0; z<sz; z++)
+            {
+                for (y=0; y<sy; y++)
+                {
+                    for (x=0; x<sx; x++)
+                    {
+                        for (k_new=0; k_new<2; k_new++)
+                        {
+                            for (j_new=0; j_new<2; j_new++)
+                            {
+                                for (i_new=0; i_new<2; i_new++)
+                                {
+                                    size_t offsetNew = new_ctrl_pt[0].calculate_offset(2*x+i_new+BSPLINEPADDINGSIZE, 2*y+j_new+BSPLINEPADDINGSIZE, 2*z+k_new+BSPLINEPADDINGSIZE, BSPLINEPADDINGSIZE);
+
+                                    for (k_old=0; k_old<3; k_old++)
+                                    {
+                                        for (j_old=0; j_old<3; j_old++)
+                                        {
+                                            for (i_old=0; i_old<3; i_old++)
+                                            {
+                                                size_t offsetOld = this->calculate_offset(x+i_old-1, y+j_old-1, z+k_old-1, 0);
+                                                for ( d=0; d<DOut; d++ )
+                                                {
+                                                    new_ctrl_pt[d](offsetNew) += w[i_new][i_old]*w[j_new][j_old]*w[k_new][k_old] * this->ctrl_pt_[d](offsetOld);
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        else if ( ss==1 && sz==1 )
+        {
+            for (y=0; y<sy; y++)
+            {
+                for (x=0; x<sx; x++)
+                {
+                    for (j_new=0; j_new<2; j_new++)
+                    {
+                        for (i_new=0; i_new<2; i_new++)
+                        {
+                            size_t offsetNew = new_ctrl_pt[0].calculate_offset(2*x+i_new+BSPLINEPADDINGSIZE, 2*y+j_new+BSPLINEPADDINGSIZE, BSPLINEPADDINGSIZE, BSPLINEPADDINGSIZE);
+
+                            for (j_old=0; j_old<3; j_old++)
+                            {
+                                for (i_old=0; i_old<3; i_old++)
+                                {
+                                    size_t offsetOld = this->calculate_offset(x+i_old-1, y+j_old-1, 0, 0);
+                                    for ( d=0; d<DOut; d++ )
+                                    {
+                                        new_ctrl_pt[d](offsetNew) += w[i_new][i_old]*w[j_new][j_old] * this->ctrl_pt_[d](offsetOld);
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        for ( d=0; d<DOut; d++ )
+        {
+            this->ctrl_pt_[d].create(dim, spacing, origin, axis, new_ctrl_pt[d].begin(), true);
+            new_ctrl_pt[d].delete_data_on_destruct(false);
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in refine() ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DOut>
+void gtplusBSplineFFD4D<T, CoordType, DOut>::print(std::ostream& os) const
+{
+    using namespace std;
+
+    os << "---------------------- GTPlus BSpline 4D Free Form Deformation ------------------" << endl;
+    os << "Implement 4D BSpline Free Form Deformation (BFFD) " << endl;
+
+    std::string elemTypeName = std::string( typeid(T).name() );
+    os << "FFD value type is : " << elemTypeName << endl;
+
+    elemTypeName = std::string( typeid(CoordType).name() );
+    os << "FFD coord type is : " << elemTypeName << endl;
+
+    os << "Output dimension is : " << DOut << endl;
+    os << "---------------------------------------------------" << endl;
+    os << "BFFD grid information : " << endl;
+    this->ctrl_pt_[0].printContent(os);
+    os << "---------------------------------------------------------------------------------" << endl;
+}
+
+}}
diff --git a/toolboxes/gtplus/algorithm/FreeFormDeformation/gtplusFFDBase.h b/toolboxes/gtplus/algorithm/FreeFormDeformation/gtplusFFDBase.h
new file mode 100644
index 0000000..c57eec1
--- /dev/null
+++ b/toolboxes/gtplus/algorithm/FreeFormDeformation/gtplusFFDBase.h
@@ -0,0 +1,1976 @@
+/** \file       gtplusFFDBase.h
+    \brief      Base class for gtPlus FreeFormDeformation package
+
+                FreeFormDeformation (FFD) is a general purpose scatter interpolation algorithm. It is widely used in numerical applications, 
+                such as image registration, data inteprolation and geometric modelling etc.
+
+                [1] http://en.wikipedia.org/wiki/Free-form_deformation
+
+                [2] Seungyong Lee ; Dept. of Comput. Sci., Pohang Inst. of Sci. & Technol., South Korea ; Wolberg, G. ; Sung Yong Shin. Scattered data interpolation with multilevel B-splines. IEEE 
+                    Transactions on Visualization and Computer Graphics, Volume 3, Issue 3, 1997.
+
+                [3] D Rueckert, LI Sonoda, C Hayes, DLG Hill, MO Leach, DJ Hawkes. Nonrigid registration using free-form deformations: application to breast MR images. IEEE 
+                    Transactions on Medical Imaging, Volume 18, Issue 8, 1999.
+
+    \author     Hui Xue
+*/
+
+#pragma once
+
+#include <typeinfo>
+#include "GadgetronTimer.h"
+#include "gtPlusISMRMRDReconUtil.h"
+#include "gtPlusIOAnalyze.h"
+#include "gtPlusMemoryManager.h"
+
+#ifdef USE_OMP
+    #include "omp.h"
+#endif // USE_OMP
+
+namespace Gadgetron { namespace gtPlus {
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut>
+class gtplusFFDBase
+{
+public:
+
+    typedef gtplusFFDBase<T, CoordType, DIn, DOut> Self;
+
+    typedef typename realType<T>::Type real_value_type;
+
+    typedef CoordType coord_type;
+
+    enum { D = DIn };
+
+    /// array to store the coordinates of spatial points
+    /// has the dimension of DIn by N for N points
+    typedef hoNDArray<CoordType> CoordArrayType;
+
+    /// array to store the point value
+    /// for N points, the dimension of array is DOut by N
+    /// DOut is equal or larger than 1; if larger than 1, the 
+    /// vectorized FFD is computed
+    typedef hoNDArray<T> ValueArrayType;
+    typedef ValueArrayType ArrayType;
+
+    typedef hoNDArray<float> MaskArrayType;
+
+    /// control point grip type
+    typedef hoNDImage<T, DIn> FFDCtrlPtGridType;
+
+    /// point type
+    typedef hoNDPoint<CoordType, DIn> PointType;
+
+    /// image type
+    typedef hoNDImage<T, DIn> ImageType;
+
+    gtplusFFDBase();
+    virtual ~gtplusFFDBase();
+
+    /// evaluate the FFD at a grid location
+    /// the input points are in the FFD grid
+    virtual bool evaluateFFD(const CoordType pt[D], T r[DOut]) const = 0;
+    virtual bool evaluateFFD(const CoordType* pt[D], T* r[DOut], size_t N) const;
+    virtual bool evaluateFFD(const PointType& pt, T r[DOut]) const;
+    virtual bool evaluateFFDArray(const CoordArrayType& pts, ValueArrayType& r) const;
+
+    /// evaluate the 1st order derivative of FFD at a grid location
+    /// deriv: derivative for all D dimensions and all DOut values
+    virtual bool evaluateFFDDerivative(const CoordType pt[D], T deriv[D][DOut]) const = 0;
+    virtual bool evaluateFFDDerivative(const PointType& pt, T deriv[D][DOut]) const;
+
+    virtual bool evaluateFFDDX(const CoordType pt[D], T dx[DOut]) const;
+    virtual bool evaluateFFDDY(const CoordType pt[D], T dy[DOut]) const;
+    virtual bool evaluateFFDDZ(const CoordType pt[D], T dz[DOut]) const;
+    virtual bool evaluateFFDDS(const CoordType pt[D], T ds[DOut]) const;
+
+    /// calculate the 1st order derivative of FFD at a world coordinate location with the world coordinate unit
+    virtual bool evaluateWorldDerivative(const CoordType pt[D], T deriv[D][DOut]) const;
+
+    virtual bool evaluateWorldDX(const CoordType pt[D], T dx[DOut]) const;
+    virtual bool evaluateWorldDY(const CoordType pt[D], T dy[DOut]) const;
+    virtual bool evaluateWorldDZ(const CoordType pt[D], T dz[DOut]) const;
+    virtual bool evaluateWorldDS(const CoordType pt[D], T ds[DOut]) const;
+
+    /// evaluate the 2nd order derivative of FFD at a grid location
+    /// dderiv : D*D vector, stores dxx dxy dxz ...; dyx dyy dyz ...; dzx dzy dzz ...
+    virtual bool evaluateFFDSecondOrderDerivative(const CoordType pt[D], T dderiv[D*D][DOut]) const = 0;
+    virtual bool evaluateFFDSecondOrderDerivative(const PointType& pt, T dderiv[D*D][DOut]) const;
+
+    /// evaluate the FFD at a world location
+    virtual bool evaluateFFDW(const CoordType pt[D], T r[DOut]) const;
+    virtual bool evaluateFFDW(CoordType px, CoordType py, T r[DOut]) const;
+    virtual bool evaluateFFDW(CoordType px, CoordType py, CoordType pz, T r[DOut]) const;
+    virtual bool evaluateFFDW(CoordType px, CoordType py, CoordType pz, CoordType ps, T r[DOut]) const;
+
+    virtual bool evaluateFFDDerivativeW(const CoordType pt[D], T deriv[D][DOut]) const;
+    virtual bool evaluateFFDDerivativeW(CoordType px, CoordType py, T deriv[D][DOut]) const;
+    virtual bool evaluateFFDDerivativeW(CoordType px, CoordType py, CoordType pz, T deriv[D][DOut]) const;
+    virtual bool evaluateFFDDerivativeW(CoordType px, CoordType py, CoordType pz, CoordType ps, T deriv[D][DOut]) const;
+
+    virtual bool evaluateFFDSecondOrderDerivativeW(const CoordType pt[D], T dderiv[D*D][DOut]) const;
+    virtual bool evaluateFFDSecondOrderDerivativeW(CoordType px, CoordType py, T dderiv[D*D][DOut]) const;
+    virtual bool evaluateFFDSecondOrderDerivativeW(CoordType px, CoordType py, CoordType pz, T dderiv[D*D][DOut]) const;
+    virtual bool evaluateFFDSecondOrderDerivativeW(CoordType px, CoordType py, CoordType pz, CoordType ps, T dderiv[D*D][DOut]) const;
+
+    /// compute the FFD approximation once
+    /// pos : the position of input points, DIn by N
+    /// value : the value on input points, DOut by N
+    /// residual : the approximation residual after computing FFD, DOut by N
+    /// N : the number of points
+    virtual bool ffdApprox(const CoordArrayType& pos, ValueArrayType& value, ValueArrayType& residual, T& totalResidual, size_t N) = 0;
+
+    /// compute the FFD approximation with refinement, see ref [2]
+    /// numOfRefinement : number of grid refinement
+    virtual bool ffdApprox(const CoordArrayType& pos, ValueArrayType& value, ValueArrayType& residual, real_value_type& totalResidual, size_t N, size_t numOfRefinement);
+
+    /// keep refine the FFD until either the maximal refinement level is reached or total residual is less than a threshold
+    virtual bool ffdApprox(const CoordArrayType& pos, ValueArrayType& value, ValueArrayType& residual, real_value_type& totalResidual, size_t N, size_t& numOfRefinement, real_value_type thresResidual, size_t maxNumOfRefinement);
+
+    //// fft approximation with input in the world coordinates
+    virtual bool ffdApproxW(const CoordArrayType& pos, ValueArrayType& value, ValueArrayType& residual, real_value_type& totalResidual, size_t N, size_t numOfRefinement);
+    virtual bool ffdApproxW(const CoordArrayType& pos, ValueArrayType& value, ValueArrayType& residual, real_value_type& totalResidual, size_t N, size_t& numOfRefinement, real_value_type thresResidual, size_t maxNumOfRefinement);
+
+    /// easy-to-use function calls for image and array
+
+    /// convert every pixel in the image to FFD point inputs with world coordiantes
+    virtual bool imageToFFDInputsW(ImageType target[DOut], CoordArrayType& pos, ValueArrayType& value);
+    /// mask == 0 means this point is excluded from approximation
+    virtual bool imageToFFDInputsW(ImageType target[DOut], const MaskArrayType& mask, CoordArrayType& pos, ValueArrayType& value);
+
+    /// convert every pixel in the array to FFD point inputs with world coordiantes
+    virtual bool arrayToFFDInputsW(ArrayType target[DOut], CoordArrayType& pos, ValueArrayType& value);
+    /// mask == 0 means this point is excluded from approximation
+    virtual bool arrayToFFDInputsW(ArrayType target[DOut], const MaskArrayType& mask, CoordArrayType& pos, ValueArrayType& value);
+
+    /// for Image type
+    virtual bool ffdApproxImage(ImageType target[DOut], real_value_type& totalResidual, size_t numOfRefinement);
+    virtual bool ffdApproxImage(ImageType target[DOut], real_value_type& totalResidual, size_t& numOfRefinement, real_value_type thresResidual, size_t maxNumOfRefinement);
+    virtual bool ffdApproxImage(ImageType& target, real_value_type& totalResidual, size_t numOfRefinement);
+    virtual bool ffdApproxImage(ImageType& target, real_value_type& totalResidual, size_t& numOfRefinement, real_value_type thresResidual, size_t maxNumOfRefinement);
+
+    virtual bool ffdApproxImage(ImageType target[DOut], const MaskArrayType& mask, real_value_type& totalResidual, size_t numOfRefinement);
+    virtual bool ffdApproxImage(ImageType target[DOut], const MaskArrayType& mask, real_value_type& totalResidual, size_t& numOfRefinement, real_value_type thresResidual, size_t maxNumOfRefinement);
+
+    /// for Array type
+    virtual bool ffdApproxArray(ArrayType target[DOut], real_value_type& totalResidual, size_t numOfRefinement);
+    virtual bool ffdApproxArray(ArrayType target[DOut], real_value_type& totalResidual, size_t& numOfRefinement, real_value_type thresResidual, size_t maxNumOfRefinement);
+    virtual bool ffdApproxArray(ArrayType& target, real_value_type& totalResidual, size_t numOfRefinement);
+    virtual bool ffdApproxArray(ArrayType& target, real_value_type& totalResidual, size_t& numOfRefinement, real_value_type thresResidual, size_t maxNumOfRefinement);
+
+    virtual bool ffdApproxArray(ArrayType target[DOut], const MaskArrayType& mask, real_value_type& totalResidual, size_t numOfRefinement);
+    virtual bool ffdApproxArray(ArrayType target[DOut], const MaskArrayType& mask, real_value_type& totalResidual, size_t& numOfRefinement, real_value_type thresResidual, size_t maxNumOfRefinement);
+
+    /// As suggested in ref [2], the BSpline FFD can be refined to achieve better approximation
+    virtual bool refine() = 0;
+
+    /// utility functions for easy-to-use
+
+    /// get control points
+    FFDCtrlPtGridType& get_ctrl_pt(unsigned int d) { return this->ctrl_pt_[d]; }
+    const FFDCtrlPtGridType& get_ctrl_pt(unsigned int d) const { return this->ctrl_pt_[d]; }
+
+    /// get the size of control point arrays
+    virtual size_t get_size(size_t dimension) const { return ctrl_pt_[0].get_size(dimension); }
+    virtual std::vector<size_t> get_dimensions() const { std::vector<size_t> dim; ctrl_pt_[0].get_dimensions(dim); return dim; }
+
+    /// get the spacing of of control point arrays
+    virtual coord_type get_spacing(size_t dimension) const { return ctrl_pt_[0].get_pixel_size(dimension); }
+    virtual void get_spacing(std::vector<coord_type>& spacing) const { ctrl_pt_[0].get_pixel_size(spacing); }
+
+    /// get/set a control point value
+    virtual T get(size_t x, size_t y, size_t d) const { return ctrl_pt_[d](x, y); }
+    virtual void set(size_t x, size_t y, size_t d, T v) { ctrl_pt_[d](x, y) = v; }
+
+    virtual T get(size_t x, size_t y, size_t z, size_t d) const { return ctrl_pt_[d](x, y, z); }
+    virtual void set(size_t x, size_t y, size_t z, size_t d, T v) { ctrl_pt_[d](x, y, z) = v; }
+
+    virtual T get(size_t x, size_t y, size_t z, size_t s, size_t d) const { return ctrl_pt_[d](x, y, z, s); }
+    virtual void set(size_t x, size_t y, size_t z, size_t s, size_t d, T v) { ctrl_pt_[d](x, y, z, s) = v; }
+
+    /// offset to/from indexes for control points
+    virtual size_t calculate_offset(size_t x, size_t y) const { return ctrl_pt_[0].calculate_offset(x, y); }
+    virtual void calculate_index( size_t offset, size_t& x, size_t& y ) const { ctrl_pt_[0].calculate_index(offset, x, y); }
+
+    virtual size_t calculate_offset(size_t x, size_t y, size_t z) const { return ctrl_pt_[0].calculate_offset(x, y, z); }
+    virtual void calculate_index( size_t offset, size_t& x, size_t& y, size_t& z ) const { ctrl_pt_[0].calculate_index(offset, x, y, z); }
+
+    virtual size_t calculate_offset(size_t x, size_t y, size_t z, size_t s) const { return ctrl_pt_[0].calculate_offset(x, y, z, s); }
+    virtual void calculate_index( size_t offset, size_t& x, size_t& y, size_t& z, size_t& s ) const { ctrl_pt_[0].calculate_index(offset, x, y, z, s); }
+
+    /// compute the control point location in world coordinates
+    virtual void get_location(size_t x, size_t y, CoordType& sx, CoordType& sy) const { ctrl_pt_[0].image_to_world(x, y, sx, sy); }
+    virtual void get_location(size_t x, size_t y, size_t z, CoordType& sx, CoordType& sy, CoordType& sz) const { ctrl_pt_[0].image_to_world(x, y, z, sx, sy, sz); }
+    virtual void get_location(size_t x, size_t y, size_t z, size_t s, CoordType& sx, CoordType& sy, CoordType& sz, CoordType& ss) const { ctrl_pt_[0].image_to_world(x, y, z, s, sx, sy, sz, ss); }
+
+    /// convert a world coordinate point to FFD grid location
+    virtual bool world_to_grid(const CoordArrayType& pt_w, CoordArrayType& pt_g) const;
+    virtual bool world_to_grid(const CoordType pt_w[D], CoordType pt_g[D]) const;
+    virtual bool world_to_grid(CoordType px_w, CoordType py_w, CoordType& px_g, CoordType& py_g) const;
+    virtual bool world_to_grid(CoordType px_w, CoordType py_w, CoordType pz_w, CoordType& px_g, CoordType& py_g, CoordType& pz_g) const;
+    virtual bool world_to_grid(CoordType px_w, CoordType py_w, CoordType pz_w, CoordType ps_w, CoordType& px_g, CoordType& py_g, CoordType& pz_g, CoordType& ps_g) const;
+
+    virtual bool grid_to_world(const CoordArrayType& pt_g, CoordArrayType& pt_w) const;
+    virtual bool grid_to_world(const CoordType pt_g[D], CoordType pt_w[D]) const;
+    virtual bool grid_to_world(CoordType px_g, CoordType py_g, CoordType& px_w, CoordType& py_w) const;
+    virtual bool grid_to_world(CoordType px_g, CoordType py_g, CoordType pz_g, CoordType& px_w, CoordType& py_w, CoordType& pz_w) const;
+
+    /// evaluate FFD for every pixel in the target image
+    /// the image pixel will first be converted to world-coordinate
+    /// and then converted to FFD grid location
+    virtual bool evaluateFFDOnImage(ImageType& target) const;
+    virtual bool evaluateFFDOnImage(ImageType target[DOut]) const;
+
+    /// evaluate FFD for every elements in an array
+    /// the point indexes will be taken as the FFD grid location
+    virtual bool evaluateFFDOnArray(hoNDArray<T>& target) const;
+    virtual bool evaluateFFDOnArray(hoNDArray<T> target[DOut]) const;
+
+    /// clear the control points
+    virtual bool clear(T v=0);
+
+    /// print info
+    virtual void print(std::ostream& os) const;
+
+    /// whether to perform timing and print out messages
+    bool performTiming_;
+
+    /// debug folder
+    std::string debugFolder_;
+
+protected:
+
+    /// control points
+    FFDCtrlPtGridType ctrl_pt_[DOut];
+
+    /// clock for timing
+    Gadgetron::GadgetronTimer gt_timer1_;
+    Gadgetron::GadgetronTimer gt_timer2_;
+    Gadgetron::GadgetronTimer gt_timer3_;
+
+    /// exporter
+    Gadgetron::gtPlus::gtPlusIOAnalyze gt_exporter_;
+
+    /// util
+    gtPlusISMRMRDReconUtil<T> gtPlus_util_;
+    gtPlusISMRMRDReconUtilComplex<T> gtPlus_util_complex_;
+};
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+gtplusFFDBase<T, CoordType, DIn, DOut>::gtplusFFDBase()
+{
+    gt_timer1_.set_timing_in_destruction(false);
+    gt_timer2_.set_timing_in_destruction(false);
+    gt_timer3_.set_timing_in_destruction(false);
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+gtplusFFDBase<T, CoordType, DIn, DOut>::~gtplusFFDBase()
+{
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::evaluateFFD(const CoordType* pt[D], T* r[DOut], size_t N) const
+{
+    try
+    {
+        long long n;
+        #pragma omp parallel for private(n) shared(N, pt, r)
+        for ( n=0; n<(long long)N; n++ )
+        {
+            this->evaluateFFD(pt[n], r[n]);
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in evaluateFFD(const CoordType* pt[D], T* r[DOut], size_t N) const ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::evaluateFFD(const PointType& pt, T r[DOut]) const
+{
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFD(pt.begin(), r));
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::evaluateFFDArray(const CoordArrayType& pts, ValueArrayType& r) const
+{
+    try
+    {
+        size_t N = pts.get_size(1);
+        GADGET_CHECK_RETURN_FALSE(pts.get_size(0)==DIn);
+
+        if ( r.get_size(1)!=N || r.get_size(0)!=DOut )
+        {
+            r.create(DOut, N);
+        }
+
+        const CoordType* pPts = pts.begin();
+        T* pR = r.begin();
+
+        long long n;
+        #pragma omp parallel for private(n) shared(N, pPts, pR, r)
+        for ( n=0; n<(long long)N; n++ )
+        {
+            this->evaluateFFD(pPts+n*DIn, pR+n*DOut);
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in evaluateFFDArray(const CoordArrayType& pts, ValueArrayType& r) const ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::evaluateFFDDerivative(const PointType& pt, T deriv[D][DOut]) const
+{
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFDDerivative(pt.begin(), deriv));
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::evaluateFFDDX(const CoordType pt[D], T dx[DOut]) const
+{
+    T deriv[D][DOut];
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFDDerivative(pt, deriv));
+    memcpy(dx, deriv, sizeof(T)*DOut);
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::evaluateFFDDY(const CoordType pt[D], T dy[DOut]) const
+{
+    T deriv[D][DOut];
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFDDerivative(pt, deriv));
+    memcpy(dy, deriv+sizeof(T)*DOut, sizeof(T)*DOut);
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::evaluateFFDDZ(const CoordType pt[D], T dz[DOut]) const
+{
+    T deriv[D][DOut];
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFDDerivative(pt, deriv));
+    memcpy(dz, deriv+2*sizeof(T)*DOut, sizeof(T)*DOut);
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::evaluateFFDDS(const CoordType pt[D], T ds[DOut]) const
+{
+    T deriv[D][DOut];
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFDDerivative(pt, deriv));
+    memcpy(ds, deriv+3*sizeof(T)*DOut, sizeof(T)*DOut);
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::evaluateWorldDerivative(const CoordType pt[D], T deriv[D][DOut]) const
+{
+    CoordType pt_g[D];
+    this->world_to_grid(pt, pt_g);
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFDDerivative(pt_g, deriv));
+
+    std::vector<coord_type> spacing;
+    this->get_spacing(spacing);
+
+    unsigned int d, d2;
+    for ( d=0; d<DIn; d++ )
+    {
+        for ( d2=0; d2<DOut; d2++ )
+        {
+            deriv[d][d2] /= spacing[d];
+        }
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::evaluateWorldDX(const CoordType pt[D], T dx[DOut]) const
+{
+    CoordType pt_g[D];
+    this->world_to_grid(pt, pt_g);
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFDDX(pt_g, dx));
+
+    coord_type sx = coord_type(1.0)/this->get_spacing(0);
+
+    unsigned int d;
+    for ( d=0; d<DOut; d++ )
+    {
+        dx[d] *= sx;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::evaluateWorldDY(const CoordType pt[D], T dy[DOut]) const
+{
+    CoordType pt_g[D];
+    this->world_to_grid(pt, pt_g);
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFDDY(pt_g, dy));
+
+    coord_type sy = coord_type(1.0)/this->get_spacing(1);
+
+    unsigned int d;
+    for ( d=0; d<DOut; d++ )
+    {
+        dy[d] *= sy;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::evaluateWorldDZ(const CoordType pt[D], T dz[DOut]) const
+{
+    CoordType pt_g[D];
+    this->world_to_grid(pt, pt_g);
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFDDZ(pt_g, dz));
+
+    coord_type sz = coord_type(1.0)/this->get_spacing(2);
+
+    unsigned int d;
+    for ( d=0; d<DOut; d++ )
+    {
+        dz[d] *= sz;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::evaluateWorldDS(const CoordType pt[D], T ds[DOut]) const
+{
+    CoordType pt_g[D];
+    this->world_to_grid(pt, pt_g);
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFDDS(pt_g, ds));
+
+    coord_type ss = coord_type(1.0)/this->get_spacing(3);
+
+    unsigned int d;
+    for ( d=0; d<DOut; d++ )
+    {
+        ds[d] *= ss;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::evaluateFFDSecondOrderDerivative(const PointType& pt, T dderiv[D*D][DOut]) const
+{
+    GADGET_CHECK_RETURN_FALSE(this->evaluateFFDDerivative(pt.begin(), dderiv));
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::evaluateFFDW(const CoordType pt[D], T r[DOut]) const
+{
+    CoordType pg[D];
+    this->world_to_grid(pt, pg);
+    return this->evaluateFFD(pg, r);
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::evaluateFFDW(CoordType px, CoordType py, T r[DOut]) const
+{
+    CoordType pg[2];
+    this->world_to_grid(px, py, pg[0], pg[1]);
+    return this->evaluateFFD(pg, r);
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::evaluateFFDW(CoordType px, CoordType py, CoordType pz, T r[DOut]) const
+{
+    CoordType pg[3];
+    this->world_to_grid(px, py, pz, pg[0], pg[1], pg[2]);
+    return this->evaluateFFD(pg, r);
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::evaluateFFDW(CoordType px, CoordType py, CoordType pz, CoordType ps, T r[DOut]) const
+{
+    CoordType pg[4];
+    this->world_to_grid(px, py, pz, ps, pg[0], pg[1], pg[2], pg[3]);
+    return this->evaluateFFD(pg, r);
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::evaluateFFDDerivativeW(const CoordType pt[D], T deriv[D][DOut]) const
+{
+    CoordType pg[D];
+    this->world_to_grid(pt, pg);
+    return this->evaluateFFDDerivative(pg, deriv);
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::evaluateFFDDerivativeW(CoordType px, CoordType py, T deriv[D][DOut]) const
+{
+    CoordType pg[2];
+    this->world_to_grid(px, py, pg[0], pg[1]);
+    return this->evaluateFFDDerivative(pg, deriv);
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::evaluateFFDDerivativeW(CoordType px, CoordType py, CoordType pz, T deriv[D][DOut]) const
+{
+    CoordType pg[3];
+    this->world_to_grid(px, py, pz, pg[0], pg[1], pg[2]);
+    return this->evaluateFFDDerivative(pg, deriv);
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::evaluateFFDDerivativeW(CoordType px, CoordType py, CoordType pz, CoordType ps, T deriv[D][DOut]) const
+{
+    CoordType pg[4];
+    this->world_to_grid(px, py, pz, ps, pg[0], pg[1], pg[2], pg[3]);
+    return this->evaluateFFDDerivative(pg, deriv);
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::evaluateFFDSecondOrderDerivativeW(const CoordType pt[D], T dderiv[D*D][DOut]) const
+{
+    CoordType pg[D];
+    this->world_to_grid(pt, pg);
+    return this->evaluateFFDDerivative(pg, dderiv);
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::evaluateFFDSecondOrderDerivativeW(CoordType px, CoordType py, T dderiv[D*D][DOut]) const
+{
+    CoordType pg[2];
+    this->world_to_grid(px, py, pg[0], pg[1]);
+    return this->evaluateFFDSecondOrderDerivative(pg, dderiv);
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::evaluateFFDSecondOrderDerivativeW(CoordType px, CoordType py, CoordType pz, T dderiv[D*D][DOut]) const
+{
+    CoordType pg[3];
+    this->world_to_grid(px, py, pz, pg[0], pg[1], pg[2]);
+    return this->evaluateFFDSecondOrderDerivative(pg, dderiv);
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::evaluateFFDSecondOrderDerivativeW(CoordType px, CoordType py, CoordType pz, CoordType ps, T dderiv[D*D][DOut]) const
+{
+    CoordType pg[4];
+    this->world_to_grid(px, py, pz, ps, pg[0], pg[1], pg[2], pg[3]);
+    return this->evaluateFFDSecondOrderDerivative(pg, dderiv);
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::world_to_grid(const CoordArrayType& pt_w, CoordArrayType& pt_g) const
+{
+    try
+    {
+        GADGET_CHECK_RETURN_FALSE(pt_w.get_size(0)==DIn);
+
+        if ( pt_g.dimensions_equal(&pt_w) )
+        {
+            pt_g = pt_w;
+        }
+
+        const CoordType* pW = pt_w.begin();
+        CoordType* pG = pt_g.begin();
+
+        size_t N = pt_w.get_size(1);
+
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, pW, pG)
+        for ( n=0; n<(long long)N; n++ )
+        {
+            this->world_to_grid(pW+n*DIn, pG+n*DIn);
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Errors happened in world_to_grid(const CoordArrayType& pt_w, CoordArrayType& pt_g) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::world_to_grid(const CoordType pt_w[D], CoordType pt_g[D]) const
+{
+    try
+    {
+        this->ctrl_pt_[0].world_to_image(pt_w, pt_g);
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Errors happened in world_to_grid(const CoordType pt_w[D], CoordType pt_g[D]) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::world_to_grid(CoordType px_w, CoordType py_w, CoordType& px_g, CoordType& py_g) const
+{
+    GADGET_CHECK_RETURN_FALSE(DIn==2);
+
+    try
+    {
+        this->ctrl_pt_[0].world_to_image(px_w, py_w, px_g, py_g);
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Errors happened in world_to_grid(CoordType px_w, CoordType py_w, CoordType& px_g, CoordType& py_g) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::world_to_grid(CoordType px_w, CoordType py_w, CoordType pz_w, CoordType& px_g, CoordType& py_g, CoordType& pz_g) const
+{
+    GADGET_CHECK_RETURN_FALSE(DIn==3);
+
+    try
+    {
+        this->ctrl_pt_[0].world_to_image(px_w, py_w, pz_w, px_g, py_g, pz_g);
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Errors happened in world_to_grid(CoordType px_w, CoordType py_w, CoordType pz_w, CoordType& px_g, CoordType& py_g, CoordType& pz_g) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::world_to_grid(CoordType px_w, CoordType py_w, CoordType pz_w, CoordType ps_w, CoordType& px_g, CoordType& py_g, CoordType& pz_g, CoordType& ps_g) const
+{
+    GADGET_CHECK_RETURN_FALSE(DIn==4);
+
+    try
+    {
+        this->ctrl_pt_[0].world_to_image(px_w, py_w, pz_w, ps_w, px_g, py_g, pz_g, ps_g);
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Errors happened in world_to_grid(CoordType px_w, CoordType py_w, CoordType pz_w, CoordType ps_w, CoordType& px_g, CoordType& py_g, CoordType& pz_g, CoordType& ps_g) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::grid_to_world(const CoordArrayType& pt_g, CoordArrayType& pt_w) const
+{
+    try
+    {
+        GADGET_CHECK_RETURN_FALSE(pt_g.get_size(0)==DIn);
+
+        if ( pt_w.dimensions_equal(&pt_g) )
+        {
+            pt_w = pt_g;
+        }
+
+        const CoordType* pG = pt_g.begin();
+        CoordType* pW = pt_w.begin();
+
+        size_t N = pt_g.get_size(1);
+
+        long long n;
+
+        #pragma omp parallel for default(none) private(n) shared(N, pG, pW)
+        for ( n=0; n<(long long)N; n++ )
+        {
+            this->grid_to_world(pG+n*DIn, pW+n*DIn);
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Errors happened in grid_to_world(const CoordArrayType& pt_g, CoordArrayType& pt_w) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::grid_to_world(const CoordType pt_g[D], CoordType pt_w[D]) const
+{
+    try
+    {
+        this->ctrl_pt_[0].image_to_world(pt_g, pt_w);
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Errors happened in grid_to_world(const CoordType pt_g[D], CoordType pt_w[D]) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::grid_to_world(CoordType px_g, CoordType py_g, CoordType& px_w, CoordType& py_w) const
+{
+    GADGET_CHECK_RETURN_FALSE(DIn==2);
+
+    try
+    {
+        this->ctrl_pt_[0].image_to_world(px_g, py_g, px_w, py_w);
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Errors happened in grid_to_world(CoordType px_g, CoordType py_g, CoordType& px_w, CoordType& py_w) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::grid_to_world(CoordType px_g, CoordType py_g, CoordType pz_g, CoordType& px_w, CoordType& py_w, CoordType& pz_w) const
+{
+    GADGET_CHECK_RETURN_FALSE(DIn==3);
+
+    try
+    {
+        this->ctrl_pt_[0].image_to_world(px_g, py_g, pz_g, px_w, py_w, pz_w);
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Errors happened in grid_to_world(CoordType px_g, CoordType py_g, CoordType pz_g, CoordType& px_w, CoordType& py_w, CoordType& pz_w) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::ffdApprox(const CoordArrayType& pos, ValueArrayType& value, ValueArrayType& residual, real_value_type& totalResidual, size_t N, size_t numOfRefinement)
+{
+    size_t num;
+    return this->ffdApprox(pos, value, residual, totalResidual, N, num, FLT_EPSILON, numOfRefinement);
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::ffdApprox(const CoordArrayType& pos, ValueArrayType& value, ValueArrayType& residual, real_value_type& totalResidual, size_t N, size_t& numOfRefinement, real_value_type thresResidual, size_t maxNumOfRefinement)
+{
+    try
+    {
+        GADGET_CHECK_RETURN_FALSE(pos.get_size(0)==N);
+        GADGET_CHECK_RETURN_FALSE(pos.get_size(1)==DIn);
+
+        GADGET_CHECK_RETURN_FALSE(value.get_size(0)==N);
+        GADGET_CHECK_RETURN_FALSE(value.get_size(1)==DOut);
+
+        totalResidual = 0;
+
+        if ( !residual.dimensions_equal(&value) )
+        {
+            residual.create(value.get_dimensions());
+            Gadgetron::clear(residual);
+        }
+
+        CoordArrayType posg(pos);
+        CoordArrayType posw(pos);
+        GADGET_CHECK_RETURN_FALSE(this->grid_to_world(posg, posw));
+
+        size_t num;
+        for ( num=0; num<maxNumOfRefinement; num++ )
+        {
+            GADGET_CHECK_RETURN_FALSE(this->ffdApprox(posg, value, residual, totalResidual, N));
+
+            GADGET_CONDITION_MSG(performTiming_, "BSpline FFD refinement " << num << " has residual of " << totalResidual);
+
+            if ( totalResidual < thresResidual )
+            {
+                GADGET_MSG("BSpline FFD residual is too small : " << totalResidual);
+                GADGET_MSG("No further refinement will be computed ... ");
+                break;
+            }
+
+            if ( num<maxNumOfRefinement-1 )
+            {
+                GADGET_CHECK_RETURN_FALSE(this->refine());
+                GADGET_CHECK_RETURN_FALSE(this->world_to_grid(posw, posg));
+            }
+        }
+
+        numOfRefinement = num;
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in ffdApprox(const CoordArrayType& pos, ValueArrayType& value, ValueArrayType& residual, real_value_type& totalResidual, size_t N, size_t& numOfRefinement, real_value_type thresResidual, size_t maxNumOfRefinement) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::ffdApproxW(const CoordArrayType& pos, ValueArrayType& value, ValueArrayType& residual, real_value_type& totalResidual, size_t N, size_t numOfRefinement)
+{
+    size_t num;
+    return this->ffdApproxW(pos, value, residual, totalResidual, N, num, FLT_EPSILON, numOfRefinement);
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::ffdApproxW(const CoordArrayType& pos, ValueArrayType& value, ValueArrayType& residual, real_value_type& totalResidual, size_t N, size_t& numOfRefinement, real_value_type thresResidual, size_t maxNumOfRefinement)
+{
+    try
+    {
+        GADGET_CHECK_RETURN_FALSE(pos.get_size(0)==DIn);
+        GADGET_CHECK_RETURN_FALSE(pos.get_size(1)==N);
+
+        GADGET_CHECK_RETURN_FALSE(value.get_size(0)==DOut);
+        GADGET_CHECK_RETURN_FALSE(value.get_size(1)==N);
+
+        totalResidual = 0;
+
+        if ( !residual.dimensions_equal(&value) )
+        {
+            residual.create(value.get_dimensions());
+            Gadgetron::clear(residual);
+        }
+
+        CoordArrayType posg(pos);
+        GADGET_CHECK_RETURN_FALSE(this->world_to_grid(pos, posg));
+
+        size_t num;
+        for ( num=0; num<maxNumOfRefinement; num++ )
+        {
+            GADGET_CHECK_RETURN_FALSE(this->ffdApprox(posg, value, residual, totalResidual, N));
+
+            GADGET_CONDITION_MSG(performTiming_, "BSpline FFD refinement " << num << " has residual of " << totalResidual);
+
+            if ( totalResidual < thresResidual )
+            {
+                GADGET_MSG("BSpline FFD residual is too small : " << totalResidual);
+                GADGET_MSG("No further refinement will be computed ... ");
+                break;
+            }
+
+            if ( num<maxNumOfRefinement-1 )
+            {
+                GADGET_CHECK_RETURN_FALSE(this->refine());
+                GADGET_CHECK_RETURN_FALSE(this->world_to_grid(pos, posg));
+            }
+        }
+
+        numOfRefinement = num;
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in ffdApprox(const CoordArrayType& pos, ValueArrayType& value, ValueArrayType& residual, real_value_type& totalResidual, size_t N, size_t& numOfRefinement, real_value_type thresResidual, size_t maxNumOfRefinement) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::evaluateFFDOnImage(ImageType& target) const
+{
+    GADGET_CHECK_RETURN_FALSE(DOut==1);
+    return this->evaluateFFDOnImage(&target);
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::evaluateFFDOnImage(ImageType target[DOut]) const
+{
+    try
+    {
+        if ( DIn==2 )
+        {
+            size_t sx = target[0].get_size(0);
+            size_t sy = target[0].get_size(1);
+
+            long long y;
+
+            #pragma omp parallel private(y) shared(sx, sy, target)
+            {
+                coord_type px, py, pg[2];
+                T v[DOut];
+                unsigned int d;
+
+                #pragma omp for 
+                for ( y=0; y<(long long)sy; y++ )
+                {
+                    for ( size_t x=0; x<sx; x++ )
+                    {
+                        size_t offset = x + y*sx;
+
+                        // target to world
+                        target[0].image_to_world(x, size_t(y), px, py);
+
+                        // world to grid
+                        this->world_to_grid(px, py, pg[0], pg[1]);
+
+                        // evaluate the FFD
+                        this->evaluateFFD(pg, v);
+
+                        if ( DOut == 1 )
+                        {
+                            target[0](offset) = v[0];
+                        }
+                        else if ( DOut == 2 )
+                        {
+                            target[0](offset) = v[0];
+                            target[1](offset) = v[1];
+                        }
+                        else if ( DOut == 3 )
+                        {
+                            target[0](offset) = v[0];
+                            target[1](offset) = v[1];
+                            target[2](offset) = v[2];
+                        }
+                        else
+                        {
+                            for ( d=0; d<DOut; d++ )
+                            {
+                                target[d](offset) = v[d];
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        else if ( DIn==3 )
+        {
+            size_t sx = target[0].get_size(0);
+            size_t sy = target[0].get_size(1);
+            size_t sz = target[0].get_size(2);
+
+            long long z;
+
+            #pragma omp parallel private(z) shared(sx, sy, sz, target)
+            {
+                coord_type px, py, pz, pg[3];
+                T v[DOut];
+                unsigned int d;
+
+                #pragma omp for 
+                for ( z=0; z<(long long)sz; z++ )
+                {
+                    for ( size_t y=0; y<sy; y++ )
+                    {
+                        size_t offset = y*sx + z*sx*sy;
+
+                        for ( size_t x=0; x<sx; x++ )
+                        {
+                            // target to world
+                            target[0].image_to_world(x, y, size_t(z), px, py, pz);
+
+                            // world to grid
+                            this->world_to_grid(px, py, pz, pg[0], pg[1], pg[2]);
+
+                            // evaluate the FFD
+                            this->evaluateFFD(pg, v);
+
+                            if ( DOut == 1 )
+                            {
+                                target[0](offset+x) = v[0];
+                            }
+                            else if ( DOut == 2 )
+                            {
+                                target[0](offset+x) = v[0];
+                                target[1](offset+x) = v[1];
+                            }
+                            else if ( DOut == 3 )
+                            {
+                                target[0](offset+x) = v[0];
+                                target[1](offset+x) = v[1];
+                                target[2](offset+x) = v[2];
+                            }
+                            else
+                            {
+                                for ( d=0; d<DOut; d++ )
+                                {
+                                    target[d](offset+x) = v[d];
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        else
+        {
+            size_t numOfPixels = target[0].get_number_of_elements();
+
+            long long n;
+
+            #pragma omp parallel private(n) shared(numOfPixels, target)
+            {
+                size_t ind_target[DIn];
+                coord_type pt_target[DIn];
+                coord_type pt_grid[DIn];
+                T v[DOut];
+                unsigned int d;
+
+                #pragma omp for 
+                for ( n=0; n<(long long)numOfPixels; n++ )
+                {
+                    // target to world
+                    target[0].calculate_index( size_t(n), ind_target );
+
+                    target[0].image_to_world(ind_target, pt_target);
+
+                    // world to grid
+                    this->world_to_grid(pt_target, pt_grid);
+
+                    // evaluate the FFD
+                    this->evaluateFFD(pt_grid, v);
+
+                    if ( DOut == 1 )
+                    {
+                        target[0](n) = v[0];
+                    }
+                    else if ( DOut == 2 )
+                    {
+                        target[0](n) = v[0];
+                        target[1](n) = v[1];
+                    }
+                    else if ( DOut == 3 )
+                    {
+                        target[0](n) = v[0];
+                        target[1](n) = v[1];
+                        target[2](n) = v[2];
+                    }
+                    else
+                    {
+                        for ( d=0; d<DOut; d++ )
+                        {
+                            target[d](n) = v[d];
+                        }
+                    }
+                }
+            }
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in evaluateFFD(ImageType target[DOut]) const ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+bool gtplusFFDBase<T, CoordType, DIn, DOut>::evaluateFFDOnArray(ArrayType& target) const
+{
+    GADGET_CHECK_RETURN_FALSE(DOut==1);
+    return this->evaluateFFDOnArray(&target);
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+bool gtplusFFDBase<T, CoordType, DIn, DOut>::evaluateFFDOnArray(ArrayType target[DOut]) const
+{
+    try
+    {
+        if ( DIn==2 )
+        {
+            size_t sx = target[0].get_size(0);
+            size_t sy = target[0].get_size(1);
+
+            long long y;
+
+            #pragma omp parallel private(y) shared(sx, sy, target)
+            {
+                coord_type pg[2];
+                T v[DOut];
+                unsigned int d;
+
+                #pragma omp for 
+                for ( y=0; y<(long long)sy; y++ )
+                {
+                    for ( size_t x=0; x<sx; x++ )
+                    {
+                        size_t offset = x + y*sx;
+
+                        this->world_to_grid((CoordType)x, (CoordType)y, pg[0], pg[1]);
+
+                        // evaluate the FFD
+                        this->evaluateFFD(pg, v);
+
+                        if ( DOut == 1 )
+                        {
+                            target[0](offset) = v[0];
+                        }
+                        else if ( DOut == 2 )
+                        {
+                            target[0](offset) = v[0];
+                            target[1](offset) = v[1];
+                        }
+                        else if ( DOut == 3 )
+                        {
+                            target[0](offset) = v[0];
+                            target[1](offset) = v[1];
+                            target[2](offset) = v[2];
+                        }
+                        else
+                        {
+                            for ( d=0; d<DOut; d++ )
+                            {
+                                target[d](offset) = v[d];
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        else if ( DIn==3 )
+        {
+            size_t sx = target[0].get_size(0);
+            size_t sy = target[0].get_size(1);
+            size_t sz = target[0].get_size(2);
+
+            long long z;
+
+            #pragma omp parallel private(z) shared(sx, sy, sz, target)
+            {
+                coord_type pg[3];
+                T v[DOut];
+                unsigned int d;
+
+                #pragma omp for 
+                for ( z=0; z<(long long)sz; z++ )
+                {
+                    for ( size_t y=0; y<sy; y++ )
+                    {
+                        size_t offset = y*sx + z*sx*sy;
+
+                        for ( size_t x=0; x<sx; x++ )
+                        {
+                            this->world_to_grid((CoordType)x, (CoordType)y, (CoordType)z, pg[0], pg[1], pg[2]);
+
+                            // evaluate the FFD
+                            this->evaluateFFD(pg, v);
+
+                            if ( DOut == 1 )
+                            {
+                                target[0](offset+x) = v[0];
+                            }
+                            else if ( DOut == 2 )
+                            {
+                                target[0](offset+x) = v[0];
+                                target[1](offset+x) = v[1];
+                            }
+                            else if ( DOut == 3 )
+                            {
+                                target[0](offset+x) = v[0];
+                                target[1](offset+x) = v[1];
+                                target[2](offset+x) = v[2];
+                            }
+                            else
+                            {
+                                for ( d=0; d<DOut; d++ )
+                                {
+                                    target[d](offset+x) = v[d];
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        else
+        {
+            size_t numOfPixels = target[0].get_number_of_elements();
+
+            long long n;
+
+            #pragma omp parallel private(n) shared(numOfPixels, target)
+            {
+                std::vector<size_t> ind_target(DIn);
+                coord_type pt_target[DIn];
+                coord_type pt_grid[DIn];
+                T v[DOut];
+                unsigned int d;
+
+                #pragma omp for 
+                for ( n=0; n<(long long)numOfPixels; n++ )
+                {
+                    ind_target = target[0].calculate_index( size_t(n) );
+
+                    for ( d=0; d<DIn; d++ )
+                    {
+                        pt_target[d] = (CoordType)ind_target[d];
+                    }
+
+                    this->world_to_grid(pt_target, pt_grid);
+
+                    // evaluate the FFD
+                    this->evaluateFFD(pt_grid, v);
+
+                    if ( DOut == 1 )
+                    {
+                        target[0](n) = v[0];
+                    }
+                    else if ( DOut == 2 )
+                    {
+                        target[0](n) = v[0];
+                        target[1](n) = v[1];
+                    }
+                    else if ( DOut == 3 )
+                    {
+                        target[0](n) = v[0];
+                        target[1](n) = v[1];
+                        target[2](n) = v[2];
+                    }
+                    else
+                    {
+                        for ( d=0; d<DOut; d++ )
+                        {
+                            target[d](n) = v[d];
+                        }
+                    }
+                }
+            }
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in evaluateFFD(ArrayType target[DOut]) const ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+bool gtplusFFDBase<T, CoordType, DIn, DOut>::imageToFFDInputsW(ImageType target[DOut], CoordArrayType& pos, ValueArrayType& value)
+{
+    try
+    {
+        size_t N = target[0].get_number_of_elements();
+        pos.create(DIn, N);
+        value.create(DOut, N);
+
+        if ( DIn==2 )
+        {
+            size_t sx = target[0].get_size(0);
+            size_t sy = target[0].get_size(1);
+
+            long long y;
+ 
+            #pragma omp parallel private(y) shared(sx, sy, target, pos, value)
+            {
+                coord_type px, py;
+                unsigned int d;
+
+                #pragma omp for 
+                for ( y=0; y<(long long)sy; y++ )
+                {
+                    for ( size_t x=0; x<sx; x++ )
+                    {
+                        size_t offset = x + y*sx;
+
+                        // target to world
+                        target[0].image_to_world(x, size_t(y), px, py);
+
+                        pos(0, offset) = px;
+                        pos(1, offset) = py;
+
+                        if ( DOut == 1 )
+                        {
+                            value(0, offset) = target[0](offset);
+                        }
+                        else if ( DOut == 2 )
+                        {
+                            value(0, offset) = target[0](offset);
+                            value(1, offset) = target[1](offset);
+                        }
+                        else if ( DOut == 3 )
+                        {
+                            value(0, offset) = target[0](offset);
+                            value(1, offset) = target[1](offset);
+                            value(2, offset) = target[2](offset);
+                        }
+                        else
+                        {
+                            for ( d=0; d<DOut; d++ )
+                            {
+                                value(d, offset) = target[d](offset);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        else if ( DIn==3 )
+        {
+            size_t sx = target[0].get_size(0);
+            size_t sy = target[0].get_size(1);
+            size_t sz = target[0].get_size(2);
+
+            long long z;
+
+            #pragma omp parallel private(z) shared(sx, sy, sz, target, pos, value)
+            {
+                coord_type px, py, pz;
+                unsigned int d;
+
+                #pragma omp for 
+                for ( z=0; z<(long long)sz; z++ )
+                {
+                    for ( size_t y=0; y<sy; y++ )
+                    {
+                        size_t offset = y*sx + z*sx*sy;
+
+                        for ( size_t x=0; x<sx; x++ )
+                        {
+                            // target to world
+                            target[0].image_to_world(x, y, size_t(z), px, py, pz);
+
+                            pos(0, offset) = px;
+                            pos(1, offset) = py;
+                            pos(2, offset) = pz;
+
+                            if ( DOut == 1 )
+                            {
+                                value(0, offset) = target[0](offset);
+                            }
+                            else if ( DOut == 2 )
+                            {
+                                value(0, offset) = target[0](offset);
+                                value(1, offset) = target[1](offset);
+                            }
+                            else if ( DOut == 3 )
+                            {
+                                value(0, offset) = target[0](offset);
+                                value(1, offset) = target[1](offset);
+                                value(2, offset) = target[2](offset);
+                            }
+                            else
+                            {
+                                for ( d=0; d<DOut; d++ )
+                                {
+                                    value(d, offset) = target[d](offset);
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        else
+        {
+            long long n;
+
+            #pragma omp parallel private(n) shared(N, target)
+            {
+                size_t ind_target[DIn];
+                coord_type pt_target[DIn];
+                unsigned int d;
+
+                #pragma omp for 
+                for ( n=0; n<(long long)N; n++ )
+                {
+                    // target to world
+                    target[0].calculate_index( size_t(n), ind_target );
+
+                    target[0].image_to_world(ind_target, pt_target);
+
+                    for ( d=0; d<DIn; d++ )
+                    {
+                        pos(d, n) = pt_target[d];
+                    }
+
+                    if ( DOut == 1 )
+                    {
+                        value(0, n) = target[0](n);
+                    }
+                    else if ( DOut == 2 )
+                    {
+                        value(0, n) = target[0](n);
+                        value(1, n) = target[1](n);
+                    }
+                    else if ( DOut == 3 )
+                    {
+                        value(0, n) = target[0](n);
+                        value(1, n) = target[1](n);
+                        value(2, n) = target[2](n);
+                    }
+                    else
+                    {
+                        for ( d=0; d<DOut; d++ )
+                        {
+                            value(d, n) = target[d](n);
+                        }
+                    }
+                }
+            }
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in imageToFFDInputsW(ImageType target[DOut], CoordArrayType& pos, ValueArrayType& value) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+bool gtplusFFDBase<T, CoordType, DIn, DOut>::imageToFFDInputsW(ImageType target[DOut], const MaskArrayType& mask, CoordArrayType& pos, ValueArrayType& value)
+{
+    try
+    {
+        size_t N = target[0].get_number_of_elements();
+        if ( mask.get_number_of_elements() != N ) return true;
+
+        size_t n, d;
+        size_t numOfPixels = 0;
+        for ( n=0; n<N; n++ )
+        {
+            if ( mask(n)!=0 ) numOfPixels++;
+        }
+
+        CoordArrayType posTmp;
+        ValueArrayType valueTmp;
+
+        GADGET_CHECK_RETURN_FALSE(this->imageToFFDInputsW(target, posTmp, valueTmp));
+
+        pos.create(DIn, numOfPixels);
+        value.create(DOut, numOfPixels);
+
+        numOfPixels = 0;
+        for ( n=0; n<N; n++ )
+        {
+            if ( mask(n)!=0 )
+            {
+                memcpy(pos.begin()+numOfPixels*DIn, posTmp.begin()+n*DIn, sizeof(T)*DIn);
+
+                if ( DOut == 1 )
+                {
+                    value(0, numOfPixels) = valueTmp(0, n);
+                }
+                else if ( DOut == 2 )
+                {
+                    value(0, numOfPixels) = valueTmp(0, n);
+                    value(1, numOfPixels) = valueTmp(1, n);
+                }
+                else if ( DOut == 3 )
+                {
+                    value(0, numOfPixels) = valueTmp(0, n);
+                    value(1, numOfPixels) = valueTmp(1, n);
+                    value(2, numOfPixels) = valueTmp(2, n);
+                }
+                else
+                {
+                    for ( d=0; d<DOut; d++ )
+                    {
+                        value(d, numOfPixels) = valueTmp(d, n);
+                    }
+                }
+
+                numOfPixels++;
+            }
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in imageToFFDInputsW(ImageType target[DOut], const MaskArrayType& mask, CoordArrayType& pos, ValueArrayType& value) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+bool gtplusFFDBase<T, CoordType, DIn, DOut>::arrayToFFDInputsW(ArrayType target[DOut], CoordArrayType& pos, ValueArrayType& value)
+{
+    try
+    {
+        size_t N = target[0].get_number_of_elements();
+        pos.create(DIn, N);
+        value.create(DOut, N);
+
+        if ( DIn==2 )
+        {
+            size_t sx = target[0].get_size(0);
+            size_t sy = target[0].get_size(1);
+
+            long long y;
+ 
+            #pragma omp parallel private(y) shared(sx, sy, target, pos, value)
+            {
+                unsigned int d;
+
+                #pragma omp for 
+                for ( y=0; y<(long long)sy; y++ )
+                {
+                    for ( size_t x=0; x<sx; x++ )
+                    {
+                        size_t offset = x + y*sx;
+
+                        pos(0, offset) = (CoordType)x;
+                        pos(1, offset) = (CoordType)y;
+
+                        if ( DOut == 1 )
+                        {
+                            value(0, offset) = target[0](offset);
+                        }
+                        else if ( DOut == 2 )
+                        {
+                            value(0, offset) = target[0](offset);
+                            value(1, offset) = target[1](offset);
+                        }
+                        else if ( DOut == 3 )
+                        {
+                            value(0, offset) = target[0](offset);
+                            value(1, offset) = target[1](offset);
+                            value(2, offset) = target[2](offset);
+                        }
+                        else
+                        {
+                            for ( d=0; d<DOut; d++ )
+                            {
+                                value(d, offset) = target[d](offset);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        else if ( DIn==3 )
+        {
+            size_t sx = target[0].get_size(0);
+            size_t sy = target[0].get_size(1);
+            size_t sz = target[0].get_size(2);
+
+            long long z;
+
+            #pragma omp parallel private(z) shared(sx, sy, sz, target, pos, value)
+            {
+                unsigned int d;
+
+                #pragma omp for 
+                for ( z=0; z<(long long)sz; z++ )
+                {
+                    for ( size_t y=0; y<sy; y++ )
+                    {
+                        size_t offset = y*sx + z*sx*sy;
+
+                        for ( size_t x=0; x<sx; x++ )
+                        {
+                            pos(0, offset) = (CoordType)x;
+                            pos(1, offset) = (CoordType)y;
+                            pos(2, offset) = (CoordType)z;
+
+                            if ( DOut == 1 )
+                            {
+                                value(0, offset) = target[0](offset);
+                            }
+                            else if ( DOut == 2 )
+                            {
+                                value(0, offset) = target[0](offset);
+                                value(1, offset) = target[1](offset);
+                            }
+                            else if ( DOut == 3 )
+                            {
+                                value(0, offset) = target[0](offset);
+                                value(1, offset) = target[1](offset);
+                                value(2, offset) = target[2](offset);
+                            }
+                            else
+                            {
+                                for ( d=0; d<DOut; d++ )
+                                {
+                                    value(d, offset) = target[d](offset);
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        else
+        {
+            long long n;
+
+            #pragma omp parallel private(n) shared(N, target)
+            {
+                std::vector<size_t> ind_target(DIn);
+                unsigned int d;
+
+                #pragma omp for 
+                for ( n=0; n<(long long)N; n++ )
+                {
+                    ind_target = target[0].calculate_index( size_t(n) );
+
+                    for ( d=0; d<DIn; d++ )
+                    {
+                        pos(d, n) = (CoordType)ind_target[d];
+                    }
+
+                    if ( DOut == 1 )
+                    {
+                        value(0, n) = target[0](n);
+                    }
+                    else if ( DOut == 2 )
+                    {
+                        value(0, n) = target[0](n);
+                        value(1, n) = target[1](n);
+                    }
+                    else if ( DOut == 3 )
+                    {
+                        value(0, n) = target[0](n);
+                        value(1, n) = target[1](n);
+                        value(2, n) = target[2](n);
+                    }
+                    else
+                    {
+                        for ( d=0; d<DOut; d++ )
+                        {
+                            value(d, n) = target[d](n);
+                        }
+                    }
+                }
+            }
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in imageToFFDInputsW(ImageType target[DOut], CoordArrayType& pos, ValueArrayType& value) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+bool gtplusFFDBase<T, CoordType, DIn, DOut>::arrayToFFDInputsW(ArrayType target[DOut], const MaskArrayType& mask, CoordArrayType& pos, ValueArrayType& value)
+{
+    try
+    {
+        size_t N = target[0].get_number_of_elements();
+        if ( mask.get_number_of_elements() != N ) return true;
+
+        size_t n, d;
+        size_t numOfPixels = 0;
+        for ( n=0; n<N; n++ )
+        {
+            if ( mask(n)!=0 ) numOfPixels++;
+        }
+
+        CoordArrayType posTmp;
+        ValueArrayType valueTmp;
+
+        GADGET_CHECK_RETURN_FALSE(this->arrayToFFDInputsW(target, posTmp, valueTmp));
+
+        pos.create(DIn, numOfPixels);
+        value.create(DOut, numOfPixels);
+
+        numOfPixels = 0;
+        for ( n=0; n<N; n++ )
+        {
+            if ( mask(n)!=0 )
+            {
+                memcpy(pos.begin()+numOfPixels*DIn, posTmp.begin()+n*DIn, sizeof(T)*DIn);
+
+                if ( DOut == 1 )
+                {
+                    value(0, numOfPixels) = valueTmp(0, n);
+                }
+                else if ( DOut == 2 )
+                {
+                    value(0, numOfPixels) = valueTmp(0, n);
+                    value(1, numOfPixels) = valueTmp(1, n);
+                }
+                else if ( DOut == 3 )
+                {
+                    value(0, numOfPixels) = valueTmp(0, n);
+                    value(1, numOfPixels) = valueTmp(1, n);
+                    value(2, numOfPixels) = valueTmp(2, n);
+                }
+                else
+                {
+                    for ( d=0; d<DOut; d++ )
+                    {
+                        value(d, numOfPixels) = valueTmp(d, n);
+                    }
+                }
+
+                numOfPixels++;
+            }
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in imageToFFDInputsW(ImageType target[DOut], const MaskArrayType& mask, CoordArrayType& pos, ValueArrayType& value) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::ffdApproxImage(ImageType target[DOut], real_value_type& totalResidual, size_t numOfRefinement)
+{
+    try
+    {
+        size_t N = target[0].get_number_of_elements();
+
+        CoordArrayType pos(DIn, N);
+        ValueArrayType value(DOut, N);
+        ValueArrayType residual(DOut, N);
+
+        GADGET_CHECK_RETURN_FALSE(this->imageToFFDInputsW(target, pos, value));
+        GADGET_CHECK_RETURN_FALSE(this->ffdApproxW(pos, value, residual, totalResidual, N, numOfRefinement));
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in ffdApproxImage(ImageType target[DOut], real_value_type& totalResidual, size_t numOfRefinement) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::ffdApproxImage(ImageType target[DOut], real_value_type& totalResidual, size_t& numOfRefinement, real_value_type thresResidual, size_t maxNumOfRefinement)
+{
+    try
+    {
+        size_t N = target[0].get_number_of_elements();
+
+        CoordArrayType pos(DIn, N);
+        ValueArrayType value(DOut, N);
+        ValueArrayType residual(DOut, N);
+
+        GADGET_CHECK_RETURN_FALSE(this->imageToFFDInputsW(target, pos, value));
+        GADGET_CHECK_RETURN_FALSE(this->ffdApproxW(pos, value, residual, totalResidual, N, numOfRefinement, thresResidual, maxNumOfRefinement));
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in ffdApproxImage(ImageType target[DOut], real_value_type& totalResidual, size_t& numOfRefinement, real_value_type thresResidual, size_t maxNumOfRefinement) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::ffdApproxImage(ImageType& target, real_value_type& totalResidual, size_t numOfRefinement)
+{
+    GADGET_CHECK_RETURN_FALSE(DOut==1);
+    return this->ffdApproxImage(&target, totalResidual, numOfRefinement);
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::ffdApproxImage(ImageType& target, real_value_type& totalResidual, size_t& numOfRefinement, real_value_type thresResidual, size_t maxNumOfRefinement)
+{
+    GADGET_CHECK_RETURN_FALSE(DOut==1);
+    return this->ffdApproxImage(&target, totalResidual, numOfRefinement, thresResidual, maxNumOfRefinement);
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::ffdApproxImage(ImageType target[DOut], const MaskArrayType& mask, real_value_type& totalResidual, size_t numOfRefinement)
+{
+    try
+    {
+        CoordArrayType pos;
+        ValueArrayType value;
+        ValueArrayType residual;
+
+        GADGET_CHECK_RETURN_FALSE(this->imageToFFDInputsW(target, mask, pos, value));
+
+        size_t N = pos.get_size(1);
+        GADGET_CHECK_RETURN_FALSE(this->ffdApproxW(pos, value, residual, totalResidual, N, numOfRefinement));
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in ffdApproxImage(ImageType target[DOut], const MaskArrayType& mask, real_value_type& totalResidual, size_t numOfRefinement) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::ffdApproxImage(ImageType target[DOut], const MaskArrayType& mask, real_value_type& totalResidual, size_t& numOfRefinement, real_value_type thresResidual, size_t maxNumOfRefinement)
+{
+    try
+    {
+        CoordArrayType pos;
+        ValueArrayType value;
+        ValueArrayType residual;
+
+        GADGET_CHECK_RETURN_FALSE(this->imageToFFDInputsW(target, mask, pos, value));
+
+        size_t N = pos.get_size(1);
+        GADGET_CHECK_RETURN_FALSE(this->ffdApproxW(pos, value, residual, totalResidual, N, numOfRefinement, thresResidual, maxNumOfRefinement));
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in ffdApproxImage(ImageType target[DOut], const MaskArrayType& mask, real_value_type& totalResidual, size_t& numOfRefinement, real_value_type thresResidual, size_t maxNumOfRefinement) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::ffdApproxArray(ArrayType target[DOut], real_value_type& totalResidual, size_t numOfRefinement)
+{
+    try
+    {
+        size_t N = target[0].get_number_of_elements();
+
+        CoordArrayType pos(DIn, N);
+        ValueArrayType value(DOut, N);
+        ValueArrayType residual(DOut, N);
+
+        GADGET_CHECK_RETURN_FALSE(this->arrayToFFDInputsW(target, pos, value));
+        GADGET_CHECK_RETURN_FALSE(this->ffdApproxW(pos, value, residual, totalResidual, N, numOfRefinement));
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in ffdApproxArray(ArrayType target[DOut], real_value_type& totalResidual, size_t numOfRefinement) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::ffdApproxArray(ArrayType target[DOut], real_value_type& totalResidual, size_t& numOfRefinement, real_value_type thresResidual, size_t maxNumOfRefinement)
+{
+    try
+    {
+        size_t N = target[0].get_number_of_elements();
+
+        CoordArrayType pos(DIn, N);
+        ValueArrayType value(DOut, N);
+        ValueArrayType residual(DOut, N);
+
+        GADGET_CHECK_RETURN_FALSE(this->arrayToFFDInputsW(target, pos, value));
+        GADGET_CHECK_RETURN_FALSE(this->ffdApproxW(pos, value, residual, totalResidual, N, numOfRefinement, thresResidual, maxNumOfRefinement));
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in ffdApproxArray(ArrayType target[DOut], real_value_type& totalResidual, size_t numOfRefinement) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::ffdApproxArray(ArrayType& target, real_value_type& totalResidual, size_t numOfRefinement)
+{
+    GADGET_CHECK_RETURN_FALSE(DOut==1);
+    return this->ffdApproxArray(&target, totalResidual, numOfRefinement);
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::ffdApproxArray(ArrayType& target, real_value_type& totalResidual, size_t& numOfRefinement, real_value_type thresResidual, size_t maxNumOfRefinement)
+{
+    GADGET_CHECK_RETURN_FALSE(DOut==1);
+    return this->ffdApproxArray(&target, totalResidual, numOfRefinement, thresResidual, maxNumOfRefinement);
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::ffdApproxArray(ArrayType target[DOut], const MaskArrayType& mask, real_value_type& totalResidual, size_t numOfRefinement)
+{
+    try
+    {
+        CoordArrayType pos;
+        ValueArrayType value;
+        ValueArrayType residual;
+
+        GADGET_CHECK_RETURN_FALSE(this->arrayToFFDInputsW(target, mask, pos, value));
+
+        size_t N = pos.get_size(1);
+        GADGET_CHECK_RETURN_FALSE(this->ffdApproxW(pos, value, residual, totalResidual, N, numOfRefinement));
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in ffdApproxArray(ArrayType target[DOut], const MaskArrayType& mask, real_value_type& totalResidual, size_t numOfRefinement) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusFFDBase<T, CoordType, DIn, DOut>::ffdApproxArray(ArrayType target[DOut], const MaskArrayType& mask, real_value_type& totalResidual, size_t& numOfRefinement, real_value_type thresResidual, size_t maxNumOfRefinement)
+{
+    try
+    {
+        CoordArrayType pos;
+        ValueArrayType value, residual;
+
+        GADGET_CHECK_RETURN_FALSE(this->arrayToFFDInputsW(target, mask, pos, value));
+
+        size_t N = pos.get_size(1);
+        GADGET_CHECK_RETURN_FALSE(this->ffdApproxW(pos, value, residual, totalResidual, N, numOfRefinement, thresResidual, maxNumOfRefinement));
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in ffdApproxArray(ArrayType target[DOut], const MaskArrayType& mask, real_value_type& totalResidual, size_t numOfRefinement) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+bool gtplusFFDBase<T, CoordType, DIn, DOut>::clear(T v)
+{
+    try
+    {
+        unsigned int d;
+
+        if ( std::abs(v) == 0 )
+        {
+            for ( d=0; d<DOut; d++ )
+            {
+                Gadgetron::clear(ctrl_pt_[d]);
+            }
+        }
+        else
+        {
+            for ( d=0; d<DOut; d++ )
+            {
+                Gadgetron::fill(ctrl_pt_[d], v);
+            }
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in gtplusFFDBase<T, CoordType, DIn, DOut>::clear(T v) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+void gtplusFFDBase<T, CoordType, DIn, DOut>::print(std::ostream& os) const
+{
+    using namespace std;
+
+    os << "---------------------- GTPlus Free Form Deformation ------------------" << endl;
+    os << "Define the interface for Free Form Deformation (FFD) " << endl;
+    os << "----------------------------------------------------------------------" << endl;
+}
+
+}}
diff --git a/toolboxes/gtplus/algorithm/FreeFormDeformation/gtplusMLFFD.h b/toolboxes/gtplus/algorithm/FreeFormDeformation/gtplusMLFFD.h
new file mode 100644
index 0000000..7976f66
--- /dev/null
+++ b/toolboxes/gtplus/algorithm/FreeFormDeformation/gtplusMLFFD.h
@@ -0,0 +1,436 @@
+/** \file       gtplusMLFFD.h
+
+    \brief      Implement gtPlus multi-level FreeFormDeformation
+                For every level, the fitting residual from previous level will be approximated
+                The final fitted value is the sum of all levels
+
+    \author     Hui Xue
+*/
+
+#pragma once
+
+#include "gtplusFFDBase.h"
+
+namespace Gadgetron { namespace gtPlus {
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut>
+class gtplusMLFFD : public gtplusFFDBase<T, CoordType, DIn, DOut>
+{
+public:
+
+    typedef gtplusFFDBase<T, CoordType, DIn, DOut> BaseClass;
+    typedef gtplusFFDBase<T, CoordType, DIn, DOut> Self;
+
+    typedef typename BaseClass::bspline_float_type real_value_type;
+    typedef real_value_type bspline_float_type;
+
+    typedef typename BaseClass::coord_type coord_type;
+
+    enum { D = DIn };
+
+    typedef typename BaseClass::CoordArrayType      CoordArrayType;
+    typedef typename BaseClass::ValueArrayType      ValueArrayType;
+    typedef typename BaseClass::ArrayType           ArrayType;
+    typedef typename BaseClass::FFDCtrlPtGridType   FFDCtrlPtGridType;
+    typedef typename BaseClass::PointType           PointType;
+    typedef typename BaseClass::ImageType           ImageType;
+
+    typedef std::vector<BaseClass*> FFDArrayType;
+
+    gtplusMLFFD(bool delete_data_on_destruct=false);
+    gtplusMLFFD(const FFDArrayType& a, bool delete_data_on_destruct=false);
+    gtplusMLFFD(const Self& a);
+
+    virtual ~gtplusMLFFD();
+
+    size_t get_size() const { return ml_ffd_.size(); }
+
+    /// get the FFD array
+    FFDArrayType& getFFDArray();
+    const FFDArrayType& getFFDArray() const;
+
+    /// set the delete flag
+    bool delete_data_on_destruct() const { return delete_data_on_destruct_; }
+    void delete_data_on_destruct(bool flag) { delete_data_on_destruct_ = flag; }
+
+    /// evaluate the FFD at a grid location
+    /// the input points are in the FFD grid
+    virtual bool evaluateFFD(const CoordType pt[D], T r[DOut]) const;
+
+    /// evaluate the 1st order derivative of FFD at a grid location
+    /// deriv: derivative for all D dimensions and all DOut values
+    virtual bool evaluateFFDDerivative(const CoordType pt[D], T deriv[D][DOut]) const;
+
+    virtual bool evaluateFFDDX(const CoordType pt[D], T dx[DOut]) const;
+    virtual bool evaluateFFDDY(const CoordType pt[D], T dy[DOut]) const;
+    virtual bool evaluateFFDDZ(const CoordType pt[D], T dz[DOut]) const;
+    virtual bool evaluateFFDDS(const CoordType pt[D], T ds[DOut]) const;
+
+    /// evaluate the 2nd order derivative of FFD at a grid location
+    /// dderiv : D*D vector, stores dxx dxy dxz ...; dyx dyy dyz ...; dzx dzy dzz ...
+    virtual bool evaluateFFDSecondOrderDerivative(const CoordType pt[D], T dderiv[D*D][DOut]) const;
+
+    /// compute the FFD approximation once
+    /// pos : the position of input points, DIn by N
+    /// value : the value on input points, DOut by N
+    /// residual : the approximation residual after computing FFD, DOut by N
+    /// N : the number of points
+    virtual bool ffdApprox(const CoordArrayType& pos, ValueArrayType& value, ValueArrayType& residual, T& totalResidual, size_t N);
+    virtual bool ffdApprox(const CoordArrayType& pos, ValueArrayType& value, ValueArrayType& residual, real_value_type& totalResidual, size_t N, size_t& numOfRefinement, real_value_type thresResidual, size_t maxNumOfRefinement);
+
+    /// refine the FFD
+    virtual bool refine();
+
+    /// general print function
+    virtual void print(std::ostream& os) const;
+
+protected: 
+
+    FFDArrayType ml_ffd_;
+
+    /// if true, all stored ffd will be deleted
+    bool delete_data_on_destruct_;
+
+};
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+gtplusMLFFD<T, CoordType, DIn, DOut>::gtplusMLFFD(bool delete_data_on_destruct) : delete_data_on_destruct_(delete_data_on_destruct)
+{
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+gtplusMLFFD<T, CoordType, DIn, DOut>::gtplusMLFFD(const FFDArrayType& a, bool delete_data_on_destruct) : delete_data_on_destruct_(delete_data_on_destruct)
+{
+    ml_ffd_.resize(a.size());
+    for ( size_t ii=0; ii<a.size(); ii++ )
+    {
+        if ( a[ii] != NULL )
+        {
+            ml_ffd_[ii] = a[ii];
+        }
+    }
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+gtplusMLFFD<T, CoordType, DIn, DOut>::
+gtplusMLFFD(const Self& a)
+{
+    delete_data_on_destruct_ = false;
+    ml_ffd_ = a.getFFDArray();
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+gtplusMLFFD<T, CoordType, DIn, DOut>::~gtplusMLFFD()
+{
+    if ( delete_data_on_destruct_ )
+    {
+        for ( size_t ii=0; ii<ml_ffd_.size(); ii++ )
+        {
+            if ( ml_ffd_[ii] != NULL )
+            {
+                delete ml_ffd_[ii];
+                ml_ffd_[ii] = NULL;
+            }
+        }
+    }
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusMLFFD<T, CoordType, DIn, DOut>::evaluateFFD(const CoordType pt[D], T r[DOut]) const
+{
+    unsigned int d;
+    for ( d=0; d<DOut; d++ )
+    {
+        r[d] = 0;
+    }
+
+    T rLevel[DOut];
+
+    size_t ii;
+    for (ii=0; ii<ml_ffd_.size(); ii++)
+    {
+        if ( ml_ffd_[ii] == NULL ) continue;
+
+        GADGET_CHECK_RETURN_FALSE(ml_ffd_[ii]->evaluateFFD(pt, rLevel));
+        for ( d=0; d<DOut; d++ )
+        {
+            r[d] += rLevel[d];
+        }
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusMLFFD<T, CoordType, DIn, DOut>::evaluateFFDDerivative(const CoordType pt[D], T deriv[D][DOut]) const
+{
+    unsigned int d, d2;
+    for ( d=0; d<D; d++ )
+    {
+        for ( d2=0; d2<DOut; d2++ )
+        {
+            deriv[d][d2] = 0;
+        }
+    }
+
+    T derivLevel[D][DOut];
+
+    size_t ii;
+    for (ii=0; ii<ml_ffd_.size(); ii++)
+    {
+        if ( ml_ffd_[ii] == NULL ) continue;
+
+        GADGET_CHECK_RETURN_FALSE(ml_ffd_[ii]->evaluateFFDDerivative(pt, derivLevel));
+
+        for ( d=0; d<D; d++ )
+        {
+            for ( d2=0; d2<DOut; d2++ )
+            {
+                deriv[d][d2] += derivLevel[d][d2];
+            }
+        }
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusMLFFD<T, CoordType, DIn, DOut>::evaluateFFDDX(const CoordType pt[D], T dx[DOut]) const
+{
+    unsigned int d;
+    for ( d=0; d<DOut; d++ )
+    {
+        dx[d] = 0;
+    }
+
+    T dxLevel[D];
+
+    size_t ii;
+    for (ii=0; ii<ml_ffd_.size(); ii++)
+    {
+        if ( ml_ffd_[ii] == NULL ) continue;
+
+        GADGET_CHECK_RETURN_FALSE(ml_ffd_[ii]->evaluateFFDDX(pt, dxLevel));
+
+        for ( d=0; d<DOut; d++ )
+        {
+            dx[d] += dxLevel[d];
+        }
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusMLFFD<T, CoordType, DIn, DOut>::evaluateFFDDY(const CoordType pt[D], T dy[DOut]) const
+{
+    unsigned int d;
+    for ( d=0; d<DOut; d++ )
+    {
+        dy[d] = 0;
+    }
+
+    T dyLevel[D];
+
+    size_t ii;
+    for (ii=0; ii<ml_ffd_.size(); ii++)
+    {
+        if ( ml_ffd_[ii] == NULL ) continue;
+
+        GADGET_CHECK_RETURN_FALSE(ml_ffd_[ii]->evaluateFFDDY(pt, dyLevel));
+
+        for ( d=0; d<DOut; d++ )
+        {
+            dy[d] += dyLevel[d];
+        }
+    }
+
+    return true;
+}
+
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusMLFFD<T, CoordType, DIn, DOut>::evaluateFFDDZ(const CoordType pt[D], T dz[DOut]) const
+{
+    unsigned int d;
+    for ( d=0; d<DOut; d++ )
+    {
+        dz[d] = 0;
+    }
+
+    T dzLevel[D];
+
+    size_t ii;
+    for (ii=0; ii<ml_ffd_.size(); ii++)
+    {
+        if ( ml_ffd_[ii] == NULL ) continue;
+
+        GADGET_CHECK_RETURN_FALSE(ml_ffd_[ii]->evaluateFFDDZ(pt, dzLevel));
+
+        for ( d=0; d<DOut; d++ )
+        {
+            dz[d] += dzlevel[d];
+        }
+    }
+
+    return true;
+}
+
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusMLFFD<T, CoordType, DIn, DOut>::evaluateFFDDS(const CoordType pt[D], T ds[DOut]) const
+{
+    unsigned int d;
+    for ( d=0; d<DOut; d++ )
+    {
+        ds[d] = 0;
+    }
+
+    T dsLevel[D];
+
+    size_t ii;
+    for (ii=0; ii<ml_ffd_.size(); ii++)
+    {
+        if ( ml_ffd_[ii] == NULL ) continue;
+
+        GADGET_CHECK_RETURN_FALSE(ml_ffd_[ii]->evaluateFFDDS(pt, dsLevel));
+
+        for ( d=0; d<DOut; d++ )
+        {
+            ds[d] += dslevel[d];
+        }
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusMLFFD<T, CoordType, DIn, DOut>::evaluateFFDSecondOrderDerivative(const CoordType pt[D], T dderiv[D*D][DOut]) const
+{
+    unsigned int d, d2;
+    for ( d=0; d<D*D; d++ )
+    {
+        for ( d2=0; d2<DOut; d2++ )
+        {
+            dderiv[d][d2] = 0;
+        }
+    }
+
+    T dderivLevel[D*D][DOut];
+
+    size_t ii;
+    for (ii=0; ii<ml_ffd_.size(); ii++)
+    {
+        if ( ml_ffd_[ii] == NULL ) continue;
+
+        GADGET_CHECK_RETURN_FALSE(ml_ffd_[ii]->evaluateFFDSecondOrderDerivative(pt, dderivLevel));
+
+        for ( d=0; d<D*D; d++ )
+        {
+            for ( d2=0; d2<DOut; d2++ )
+            {
+                dderiv[d][d2] += dderivLevel[d][d2];
+            }
+        }
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusMLFFD<T, CoordType, DIn, DOut>::ffdApprox(const CoordArrayType& pos, ValueArrayType& value, ValueArrayType& residual, T& totalResidual, size_t N)
+{
+    ValueArrayType valueLevel(value);
+
+    size_t ii;
+    for (ii=0; ii<ml_ffd_.size(); ii++)
+    {
+        if ( ml_ffd_[ii] == NULL ) continue;
+
+        GADGET_CHECK_RETURN_FALSE(ml_ffd_[ii]->ffdApprox(pos, valueLevel, residual, totalResidual, N));
+        valueLevel = residual;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusMLFFD<T, CoordType, DIn, DOut>::ffdApprox(const CoordArrayType& pos, ValueArrayType& value, ValueArrayType& residual, real_value_type& totalResidual, size_t N, size_t& numOfRefinement, real_value_type thresResidual, size_t maxNumOfRefinement)
+{
+    try
+    {
+        GADGET_CHECK_RETURN_FALSE(pos.get_size(0)==N);
+        GADGET_CHECK_RETURN_FALSE(pos.get_size(1)==DIn);
+
+        GADGET_CHECK_RETURN_FALSE(value.get_size(0)==N);
+        GADGET_CHECK_RETURN_FALSE(value.get_size(1)==DOut);
+
+        totalResidual = 0;
+
+        if ( !residual.dimensions_equal(value) )
+        {
+            residual.create(value.get_dimensions());
+            Gadgetron::clear(residual);
+        }
+
+        ValueArrayType valueLevel(value);
+        size_t numOfRefinementLevel(0);
+
+        size_t ii;
+        for (ii=0; ii<ml_ffd_.size(); ii++)
+        {
+            if ( ml_ffd_[ii] == NULL ) continue;
+
+            GADGET_CHECK_RETURN_FALSE(ml_ffd_[ii]->ffdApprox(pos, valueLevel, residual, totalResidual, N, numOfRefinementLevel, thresResidual, maxNumOfRefinement));
+            numOfRefinement += numOfRefinementLevel;
+            valueLevel = residual;
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in ffdApprox(const CoordArrayType& pos, ValueArrayType& value, ValueArrayType& residual, real_value_type& totalResidual, size_t N, size_t& numOfRefinement, real_value_type thresResidual, size_t maxNumOfRefinement) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+inline bool gtplusMLFFD<T, CoordType, DIn, DOut>::refine()
+{
+    size_t ii;
+    for (ii=0; ii<ml_ffd_.size(); ii++)
+    {
+        if ( ml_ffd_[ii] == NULL ) continue;
+
+        GADGET_CHECK_RETURN_FALSE(ml_ffd_[ii]->refine());
+    }
+
+    return true;
+}
+
+template <typename T, typename CoordType, unsigned int DIn, unsigned int DOut> 
+void gtplusMLFFD<T, CoordType, DIn, DOut>::print(std::ostream& os) const
+{
+    using namespace std;
+
+    os << "---------------------- GTPlus Multi-level Free Form Deformation ------------------" << endl;
+    os << "Number of level is : " << ml_ffd_.size() << endl;
+
+    size_t ii;
+    for (ii=0; ii<ml_ffd_.size(); ii++)
+    {
+        os << "Level " << ii << " : " << endl;
+        if ( ml_ffd_[i]!=NULL )
+        {
+            ml_ffd_[i]->print(os);
+        }
+        else
+        {
+            os << "--> Pointer is NULL ... " << endl;
+        }
+    }
+    os << "------------------------------------------------------------------------------" << endl;
+}
+
+}}
diff --git a/toolboxes/gtplus/algorithm/gtPlusAlgorithmBase.h b/toolboxes/gtplus/algorithm/gtPlusAlgorithmBase.h
index b782a91..9b4dcac 100644
--- a/toolboxes/gtplus/algorithm/gtPlusAlgorithmBase.h
+++ b/toolboxes/gtplus/algorithm/gtPlusAlgorithmBase.h
@@ -5,7 +5,7 @@
 
 #pragma once
 
-#include "ismrmrd.h"
+#include "ismrmrd/ismrmrd.h"
 #include "GadgetronTimer.h"
 #include "gtPlusISMRMRDReconUtil.h"
 #include "gtPlusIOAnalyze.h"
@@ -15,10 +15,6 @@
     #include "omp.h"
 #endif // USE_OMP
 
-#ifdef USE_CUDA
-    #include "htgrappa.h"
-#endif // USE_CUDA
-
 namespace Gadgetron { namespace gtPlus {
 
 template <typename T> 
diff --git a/toolboxes/gtplus/algorithm/gtPlusDataFidelityOperator.h b/toolboxes/gtplus/algorithm/gtPlusDataFidelityOperator.h
index a8d9b83..f471e4e 100644
--- a/toolboxes/gtplus/algorithm/gtPlusDataFidelityOperator.h
+++ b/toolboxes/gtplus/algorithm/gtPlusDataFidelityOperator.h
@@ -36,6 +36,8 @@ public:
     // L2 norm of ||Dx-y||2
     virtual bool obj(const hoNDArray<T>& x, T& obj);
 
+    virtual bool unitary() const { return true; }
+
     using BaseClass::gt_timer1_;
     using BaseClass::gt_timer2_;
     using BaseClass::gt_timer3_;
@@ -82,7 +84,7 @@ forwardOperator(const hoNDArray<T>& x, hoNDArray<T>& y)
 {
     try
     {
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::multiply(acquired_points_indicator_, x, y));
+        Gadgetron::multiply(acquired_points_indicator_, x, y);
     }
     catch (...)
     {
@@ -98,7 +100,7 @@ adjointOperator(const hoNDArray<T>& x, hoNDArray<T>& y)
 {
     try
     {
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::multiply(acquired_points_indicator_, x, y));
+        Gadgetron::multiply(acquired_points_indicator_, x, y);
     }
     catch (...)
     {
@@ -115,9 +117,9 @@ grad(const hoNDArray<T>& x, hoNDArray<T>& g)
     try
     {
         // 2D'*(Dx-y)
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::multiply(acquired_points_indicator_, x, g));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::subtract(g, *acquired_points_, g));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::scal(T(2.0), g));
+        Gadgetron::multiply(acquired_points_indicator_, x, g);
+        Gadgetron::subtract(g, *acquired_points_, g);
+        Gadgetron::scal(T(2.0), g);
     }
     catch (...)
     {
@@ -134,9 +136,9 @@ obj(const hoNDArray<T>& x, T& obj)
 {
     try
     {
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::multiply(acquired_points_indicator_, x, kspace_));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::subtract(kspace_, *acquired_points_, kspace_));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::dotc(kspace_, kspace_, obj));
+        Gadgetron::multiply(acquired_points_indicator_, x, kspace_);
+        Gadgetron::subtract(kspace_, *acquired_points_, kspace_);
+        Gadgetron::dotc(kspace_, kspace_, obj);
     }
     catch (...)
     {
diff --git a/toolboxes/gtplus/algorithm/gtPlusGRAPPA.h b/toolboxes/gtplus/algorithm/gtPlusGRAPPA.h
index 1bd4cdd..9bd9cc9 100644
--- a/toolboxes/gtplus/algorithm/gtPlusGRAPPA.h
+++ b/toolboxes/gtplus/algorithm/gtPlusGRAPPA.h
@@ -19,7 +19,7 @@
 
     Saybasili H., Kellman P., Griswold MA., Derbyshire JA. Guttman, MA. 
     HTGRAPPA: Real-time B1-weighted image domain TGRAPPA reconstruction. 
-    Magnetic Resonance in Medicine 2009;61(6): 1425�1433. 
+    Magnetic Resonance in Medicine 2009;61(6): 1425-1433. 
 */
 
 #pragma once
@@ -41,42 +41,42 @@ public:
     virtual void printInfo(std::ostream& os);
 
     // get the kernel pattern, given the acceleration factor and kernel size
-    bool kerPattern(std::vector<int>& kE1, std::vector<int>& oE1, int accelFactor, size_t kNE1, bool fitItself);
+    bool kerPattern(std::vector<int>& kE1, std::vector<int>& oE1, size_t accelFactor, size_t kNE1, bool fitItself);
 
     // grappa calibration for 2D case
     // acsSrc : [RO E1 srcCHA]
     // acsDst : [RO E1 dstCHA]
     // ker : [kRO kE1 srcCHA dstCHA oE1]
     bool calib(const ho3DArray<T>& acsSrc, const ho3DArray<T>& acsDst, double thres, 
-            int kRO, const std::vector<int>& kE1, const std::vector<int>& oE1, ho5DArray<T>& ker);
+            size_t kRO, const std::vector<int>& kE1, const std::vector<int>& oE1, ho5DArray<T>& ker);
 
     // image domain kernel for 2D kernel
     // kIm: image domain kernel [RO E1 srcCHA dstCHA]
-    bool imageDomainKernel(const ho5DArray<T>& ker, int kRO, const std::vector<int>& kE1, const std::vector<int>& oE1, int ro, int e1, hoNDArray<T>& kIm);
+    bool imageDomainKernel(const ho5DArray<T>& ker, size_t kRO, const std::vector<int>& kE1, const std::vector<int>& oE1, size_t ro, size_t e1, hoNDArray<T>& kIm);
 
     // grappa calibration for 3D case
     // acsSrc : [RO E1 E2 srcCHA]
     // acsDst : [RO E1 E2 dstCHA]
     // ker : [kRO kE1 kE2 srcCHA dstCHA oE1 oE2]
     bool calib3D(const ho4DArray<T>& acsSrc, const ho4DArray<T>& acsDst, double thres, double overDetermineRatio, 
-            int kRO, const std::vector<int>& kE1, const std::vector<int>& kE2, const std::vector<int>& oE1, const std::vector<int>& oE2, ho7DArray<T>& ker);
+            size_t kRO, const std::vector<int>& kE1, const std::vector<int>& kE2, const std::vector<int>& oE1, const std::vector<int>& oE2, ho7DArray<T>& ker);
 
     // image domain kernel for 3D kernel
     // kIm: image domain kernel [RO E1 E2 srcCHA dstCHA]
-    bool imageDomainKernel3D(const ho7DArray<T>& ker, int kRO, const std::vector<int>& kE1, const std::vector<int>& kE2, const std::vector<int>& oE1, const std::vector<int>& oE2, int ro, int e1, int e2, hoNDArray<T>& kIm);
+    bool imageDomainKernel3D(const ho7DArray<T>& ker, size_t kRO, const std::vector<int>& kE1, const std::vector<int>& kE2, const std::vector<int>& oE1, const std::vector<int>& oE2, size_t ro, size_t e1, size_t e2, hoNDArray<T>& kIm);
 
     // convert the calibrated kernel to the convlution kernel in kspace
     // if ROis3rdDim == true, the kernel dimension is [E1 E2 RO], otherwise [RO E1 E2]
-    bool kspaceDomainConvKernel3D(const ho7DArray<T>& ker, int kRO, const std::vector<int>& kE1, const std::vector<int>& kE2, const std::vector<int>& oE1, const std::vector<int>& oE2, ho5DArray<T>& convKerFlip, bool ROis3rdDim=true);
+    bool kspaceDomainConvKernel3D(const ho7DArray<T>& ker, size_t kRO, const std::vector<int>& kE1, const std::vector<int>& kE2, const std::vector<int>& oE1, const std::vector<int>& oE2, ho5DArray<T>& convKerFlip, bool ROis3rdDim=true);
 
     // image domain kernel for 3D kernel, only RO direction is converted to image domain
     // E1 and E2 stays in the kspace domain
     // kImRO: kspace-image hybrid kernel [convE1 convE2 RO srcCHA dstCHA]
-    bool imageDomainKernelRO3D(const ho7DArray<T>& ker, int kRO, const std::vector<int>& kE1, const std::vector<int>& kE2, const std::vector<int>& oE1, const std::vector<int>& oE2, int ro, hoNDArray<T>& kImRO);
+    bool imageDomainKernelRO3D(const ho7DArray<T>& ker, size_t kRO, const std::vector<int>& kE1, const std::vector<int>& kE2, const std::vector<int>& oE1, const std::vector<int>& oE2, size_t ro, hoNDArray<T>& kImRO);
 
     // image domain kernel for 3D kernel, E1 and E2 directions are converted to image domain
     // kImRO : kspace-image hybrid kernel where first two dimensions are E1 and E2 and in kspace
-    bool imageDomainKernelE1E2RO(const hoNDArray<T>& kImRO, int e1, int e2, hoNDArray<T>& kImE1E2RO);
+    bool imageDomainKernelE1E2RO(const hoNDArray<T>& kImRO, size_t e1, size_t e2, hoNDArray<T>& kImE1E2RO);
 
     // use gpu in the kernel calibration
     bool calib_use_gpu_;
@@ -108,7 +108,7 @@ void gtPlusGRAPPA<T>::printInfo(std::ostream& os)
 
 template <typename T> 
 bool gtPlusGRAPPA<T>::
-kerPattern(std::vector<int>& kE1, std::vector<int>& oE1, int accelFactor, size_t kNE1, bool fitItself)
+kerPattern(std::vector<int>& kE1, std::vector<int>& oE1, size_t accelFactor, size_t kNE1, bool fitItself)
 {
     if ( accelFactor == 1 )
     {
@@ -120,35 +120,35 @@ kerPattern(std::vector<int>& kE1, std::vector<int>& oE1, int accelFactor, size_t
     kE1.resize(kNE1, 0);
     if ( kNE1%2 == 0 )
     {
-        int k;
-        for ( k=-((int)kNE1/2-1); k<=(int)kNE1/2; k++ )
+        long long k;
+        for ( k=-((long long)kNE1/2-1); k<=(long long)kNE1/2; k++ )
         {
-            kE1[k+kNE1/2-1] = k*accelFactor;
+            kE1[k+kNE1/2-1] = (int)(k*accelFactor);
         }
     }
     else
     {
-        int k;
-        for ( k=-(int)kNE1/2; k<=(int)kNE1/2; k++ )
+        long long k;
+        for ( k=-(long long)kNE1/2; k<=(long long)kNE1/2; k++ )
         {
-            kE1[k+kNE1/2] = k*accelFactor;
+            kE1[k+kNE1/2] = (int)(k*accelFactor);
         }
     }
 
     if ( fitItself )
     {
         oE1.resize(accelFactor);
-        for ( int a=0; a<accelFactor; a++ )
+        for ( long long a=0; a<(long long)accelFactor; a++ )
         {
-            oE1[a] = a;
+            oE1[a] = (int)a;
         }
     }
     else
     {
         oE1.resize(accelFactor-1);
-        for ( int a=1; a<accelFactor; a++ )
+        for ( long long a=1; a<(long long)accelFactor; a++ )
         {
-            oE1[a-1] = a;
+            oE1[a-1] = (int)a;
         }
     }
 
@@ -158,7 +158,7 @@ kerPattern(std::vector<int>& kE1, std::vector<int>& oE1, int accelFactor, size_t
 template <typename T> 
 bool gtPlusGRAPPA<T>::
 calib(const ho3DArray<T>& acsSrc, const ho3DArray<T>& acsDst, double thres, 
-    int kRO, const std::vector<int>& kE1, const std::vector<int>& oE1, ho5DArray<T>& ker)
+    size_t kRO, const std::vector<int>& kE1, const std::vector<int>& oE1, ho5DArray<T>& ker)
 {
     try
     {
@@ -174,7 +174,7 @@ calib(const ho3DArray<T>& acsSrc, const ho3DArray<T>& acsDst, double thres,
         const T* pSrc = acsSrc.begin();
         const T* pDst = acsDst.begin();
 
-        int kROhalf = kRO/2;
+        long long kROhalf = kRO/2;
         if ( 2*kROhalf == kRO )
         {
             GADGET_WARN_MSG("gtPlusGRAPPA<T>::calib(...) - 2*kROhalf == kRO " << kRO);
@@ -207,30 +207,36 @@ calib(const ho3DArray<T>& acsSrc, const ho3DArray<T>& acsDst, double thres,
         GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("grappa 2D calibration - allocate matrix storage ... "));
         hoNDArrayMemoryManaged<T> A_mem(rowA, colA, gtPlus_mem_manager_);
         A.createMatrix( rowA, colA, A_mem.begin() );
+        T* pA = A.begin();
 
         hoNDArrayMemoryManaged<T> B_mem(rowA, colB, gtPlus_mem_manager_);
         B.createMatrix( A.rows(), colB, B_mem.begin() );
+        T* pB = B.begin();
         GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.stop());
 
-        int e1;
-        for ( e1=(int)sE1; e1<=(int)eE1; e1++ )
+        long long e1;
+        for ( e1=(long long)sE1; e1<=(long long)eE1; e1++ )
         {
-            for ( int ro=kROhalf; ro<=(int)eRO; ro++ )
+            for ( long long ro=kROhalf; ro<=(long long)eRO; ro++ )
             {
-                int rInd = (e1-sE1)*lenRO+ro-kROhalf;
+                long long rInd = (e1-sE1)*lenRO+ro-kROhalf;
 
                 size_t src, dst, ke1, oe1;
-                int kro;
+                long long kro;
 
                 // fill matrix A
                 size_t col = 0;
+                size_t offset = 0;
                 for ( src=0; src<srcCHA; src++ )
                 {
                     for ( ke1=0; ke1<kNE1; ke1++ )
                     {
+                        offset = src*RO*E1 + (e1+kE1[ke1])*RO;
                         for ( kro=-kROhalf; kro<=kROhalf; kro++ )
                         {
-                            A(rInd, col++) = acsSrc(ro+kro, e1+kE1[ke1], src);
+                            // A(rInd, col++) = acsSrc(ro+kro, e1+kE1[ke1], src);
+                            pA[rInd + col*rowA] = pSrc[ro+kro+offset];
+                            col++;
                         }
                     }
                 }
@@ -256,57 +262,61 @@ calib(const ho3DArray<T>& acsSrc, const ho3DArray<T>& acsDst, double thres,
         //GADGET_MSG("B = " << v);
 
         //GADGET_CHECK_PERFORM(performTiming_, gt_timer2_.start("SolveLinearSystem_Tikhonov"));
-        #ifdef USE_CUDA
-            // go to device
-            try
-            {
-                if ( typeid(typename realType<T>::Type)==typeid(float) && calib_use_gpu_ )
-                {
-                    GADGET_MSG("grappa 2D - calling GPU kernel estimation ... ");
-                    hoNDArray<float_complext> A_tmp(A.get_dimensions(), reinterpret_cast<float_complext*>(A.begin()));
-                    hoNDArray<float_complext> B_tmp(B.get_dimensions(), reinterpret_cast<float_complext*>(B.begin()));
-
-                    int ret(0);
-                    boost::shared_ptr< hoNDArray<complext<float> > > host_x;
-
-                    #pragma omp critical(inverse)
-                    {
-                        cuNDArray<float_complext> device_A(A_tmp);
-                        cuNDArray<float_complext> device_B(B_tmp);
-                        cuNDArray<float_complext> device_x;
-
-                        ret = Gadgetron::inverse_clib_matrix(&device_A, &device_B, &device_x, thres);
-                        if ( ret == 0 )
-                        {
-                            host_x = device_x.to_host();
-                        }
-                    }
-
-                    if ( ret != 0 )
-                    {
-                        GADGET_ERROR_MSG("failed in Gadgetron::inverse_clib_matrix(&device_A, &device_B, &device_x, thres) ... ");
-                        SolveLinearSystem_Tikhonov(A, B, x, thres);
-                    }
-                    else
-                    {
-                        memcpy(x.begin(), host_x->begin(), x.get_number_of_bytes());
-                    }
-                }
-                else
-                {
-                    GADGET_WARN_MSG("GPU inverse_clib_matrix for grappa is only available for single-precision, calling the CPU version ... ");
-                    GADGET_CHECK_RETURN_FALSE(SolveLinearSystem_Tikhonov(A, B, x, thres));
-                }
-            }
-            catch(...)
-            {
-                GADGET_ERROR_MSG("failed in GPU inverse_clib_matrix for grappa, calling the CPU version ... ");
-                GADGET_CHECK_RETURN_FALSE(SolveLinearSystem_Tikhonov(A, B, x, thres));
-            }
-
-        #else
-            GADGET_CHECK_RETURN_FALSE(SolveLinearSystem_Tikhonov(A, B, x, thres));
-        #endif // USE_CUDA
+        //#ifdef USE_CUDA
+        //    // go to device
+        //    try
+        //    {
+        //        if ( typeid(typename realType<T>::Type)==typeid(float) && calib_use_gpu_ )
+        //        {
+        //            GADGET_MSG("grappa 2D - calling GPU kernel estimation ... ");
+        //            hoNDArray<float_complext> A_tmp(A.get_dimensions(), reinterpret_cast<float_complext*>(A.begin()));
+        //            hoNDArray<float_complext> B_tmp(B.get_dimensions(), reinterpret_cast<float_complext*>(B.begin()));
+
+        //            int ret(0);
+        //            boost::shared_ptr< hoNDArray<complext<float> > > host_x;
+
+        //            #pragma omp critical(inverse)
+        //            {
+        //                cuNDArray<float_complext> device_A(A_tmp);
+        //                cuNDArray<float_complext> device_B(B_tmp);
+        //                cuNDArray<float_complext> device_x;
+
+        //                ret = Gadgetron::inverse_clib_matrix(&device_A, &device_B, &device_x, thres);
+        //                if ( ret == 0 )
+        //                {
+        //                    host_x = device_x.to_host();
+        //                }
+        //            }
+
+        //            if ( ret != 0 )
+        //            {
+        //                GADGET_ERROR_MSG("failed in Gadgetron::inverse_clib_matrix(&device_A, &device_B, &device_x, thres) ... ");
+        //                SolveLinearSystem_Tikhonov(A, B, x, thres);
+        //            }
+        //            else
+        //            {
+        //                memcpy(x.begin(), host_x->begin(), x.get_number_of_bytes());
+        //            }
+        //        }
+        //        else
+        //        {
+        //            if ( calib_use_gpu_ )
+        //            {
+        //                GADGET_WARN_MSG("GPU inverse_clib_matrix for grappa is only available for single-precision, calling the CPU version ... ");
+        //            }
+
+        //            GADGET_CHECK_RETURN_FALSE(SolveLinearSystem_Tikhonov(A, B, x, thres));
+        //        }
+        //    }
+        //    catch(...)
+        //    {
+        //        GADGET_ERROR_MSG("failed in GPU inverse_clib_matrix for grappa, calling the CPU version ... ");
+        //        GADGET_CHECK_RETURN_FALSE(SolveLinearSystem_Tikhonov(A, B, x, thres));
+        //    }
+
+        //#else
+            GADGET_CHECK_EXCEPTION_RETURN_FALSE(SolveLinearSystem_Tikhonov(A, B, x, thres));
+        //#endif // USE_CUDA
 
         // GADGET_CHECK_RETURN_FALSE(SolveLinearSystem_Tikhonov(A, B, x, thres));
         //GADGET_CHECK_PERFORM(performTiming_, gt_timer2_.stop());
@@ -333,16 +343,16 @@ calib(const ho3DArray<T>& acsSrc, const ho3DArray<T>& acsDst, double thres,
 
 template <typename T> 
 bool gtPlusGRAPPA<T>::
-imageDomainKernel(const ho5DArray<T>& ker, int kRO, const std::vector<int>& kE1, const std::vector<int>& oE1, int ro, int e1, hoNDArray<T>& kIm)
+imageDomainKernel(const ho5DArray<T>& ker, size_t kRO, const std::vector<int>& kE1, const std::vector<int>& oE1, size_t ro, size_t e1, hoNDArray<T>& kIm)
 {
     try
     {
-        int srcCHA = (int)(ker.get_size(2));
-        int dstCHA = (int)(ker.get_size(3));
-        int kNE1 = (int)(kE1.size());
-        int oNE1 = (int)(oE1.size());
+        long long srcCHA = (long long)(ker.get_size(2));
+        long long dstCHA = (long long)(ker.get_size(3));
+        long long kNE1 = (long long)(kE1.size());
+        long long oNE1 = (long long)(oE1.size());
 
-        int kROhalf = kRO/2;
+        long long kROhalf = kRO/2;
         if ( 2*kROhalf == kRO )
         {
             GADGET_WARN_MSG("gtPlusGRAPPA<T>::imageDomainKernel(...) - 2*kROhalf == kRO " << kRO);
@@ -353,21 +363,21 @@ imageDomainKernel(const ho5DArray<T>& ker, int kRO, const std::vector<int>& kE1,
         kIm.create(ro, e1, srcCHA, dstCHA);
 
         /// fill the convolution kernels
-        int convKRO = 2*kRO+3;
+        long long convKRO = 2*kRO+3;
 
-        int maxKE1 = std::abs(kE1[0]);
+        long long maxKE1 = std::abs(kE1[0]);
         if ( std::abs(kE1[kNE1-1]) > maxKE1 )
         {
             maxKE1 = std::abs(kE1[kNE1-1]);
         }
-        int convKE1 = 2*maxKE1+1;
+        long long convKE1 = 2*maxKE1+1;
 
         /// allocate the convolution kernel
         ho4DArray<T> convKer(convKRO, convKE1, srcCHA, dstCHA);
         Gadgetron::clear(&convKer);
 
         /// index
-        int oe1, kro, ke1, src, dst;
+        long long oe1, kro, ke1, src, dst;
 
         /// fill the convolution kernel and sum up multiple kernels
         for ( oe1=0; oe1<oNE1; oe1++ )
@@ -396,7 +406,7 @@ imageDomainKernel(const ho5DArray<T>& ker, int kRO, const std::vector<int>& kE1,
             }
         }
 
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::scal( (typename realType<T>::Type)( std::sqrt((double)(ro*e1)) ), convKer ));
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::scal( (typename realType<T>::Type)( std::sqrt((double)(ro*e1)) ), convKer ));
         GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<T>().zeropad2D(convKer, ro, e1, kIm));
         GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->ifft2c(kIm));
     }
@@ -413,7 +423,7 @@ template <typename T>
 bool gtPlusGRAPPA<T>::
 calib3D(const ho4DArray<T>& acsSrc, const ho4DArray<T>& acsDst, 
         double thres, double overDetermineRatio, 
-        int kRO, const std::vector<int>& kE1, const std::vector<int>& kE2, 
+        size_t kRO, const std::vector<int>& kE1, const std::vector<int>& kE2, 
         const std::vector<int>& oE1, const std::vector<int>& oE2, 
         ho7DArray<T>& ker)
 {
@@ -433,7 +443,7 @@ calib3D(const ho4DArray<T>& acsSrc, const ho4DArray<T>& acsDst,
         const T* pSrc = acsSrc.begin();
         const T* pDst = acsDst.begin();
 
-        int kROhalf = kRO/2;
+        long long kROhalf = (long long)kRO/2;
         if ( 2*kROhalf == kRO )
         {
             GADGET_WARN_MSG("gtPlusGRAPPA<T>::calib3D(...) - 2*kROhalf == kRO " << kRO);
@@ -471,7 +481,7 @@ calib3D(const ho4DArray<T>& acsSrc, const ho4DArray<T>& acsDst,
 
         if ( overDetermineRatio > 1.0 )
         {
-            size_t maxRowA = std::ceil(overDetermineRatio*colA);
+            size_t maxRowA = (size_t)std::ceil(overDetermineRatio*colA);
             size_t maxROUsed = maxRowA/(lenE1*lenE2);
             if ( maxROUsed < lenRO )
             {
@@ -484,15 +494,33 @@ calib3D(const ho4DArray<T>& acsSrc, const ho4DArray<T>& acsDst,
                     if ( Gadgetron::sumOver2ndDimension(acsSrc1stChaSumE2, acsSrc1stChaSumE2E1) )
                     {
                         T maxSignal;
-                        size_t roInd;
-                        if ( Gadgetron::maxAbsolute(acsSrc1stChaSumE2E1, maxSignal, roInd) )
+                        size_t roInd(0);
+                        try
                         {
-                            sRO = roInd - maxROUsed/2;
-                            eRO = sRO + maxROUsed - 1;
+                            Gadgetron::maxAbsolute(acsSrc1stChaSumE2E1, maxSignal, roInd);
+
+                            if ( roInd > maxROUsed/2+kROhalf )
+                            {
+                                sRO = roInd - maxROUsed/2;
+                            }
+                            else
+                            {
+                                sRO = kROhalf;
+                            }
+
+                            if( sRO+maxROUsed-1 <= RO-kROhalf-1 )
+                            {
+                                eRO = sRO + maxROUsed - 1;
+                            }
+                            else
+                            {
+                                eRO = RO - kROhalf -1;
+                            }
+
                             lenRO = eRO-sRO+1;
                             GADGET_MSG("gtPlusGRAPPA<T>::calib3D(...) - overDetermineRatio = " << overDetermineRatio << " ; RO data range used : [" << sRO << " " << eRO << "] ...");
                         }
-                        else
+                        catch(...)
                         {
                             GADGET_WARN_MSG("gtPlusGRAPPA<T>::calib3D(...) - overDetermineRatio is ignored ... ");
                         }
@@ -519,7 +547,7 @@ calib3D(const ho4DArray<T>& acsSrc, const ho4DArray<T>& acsDst,
         GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.stop());
         T* pB = B.begin();
 
-        int e2;
+        long long e2;
 
         GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("grappa 3D calibration - fill calib matrixes ... "));
         #ifdef GCC_OLD_FLAG
@@ -527,17 +555,17 @@ calib3D(const ho4DArray<T>& acsSrc, const ho4DArray<T>& acsDst,
         #else
             #pragma omp parallel for default(none) private(e2) shared(sE2, eE2, sE1, eE1, kROhalf, sRO, eRO, lenRO, lenE1, srcCHA, kNE2, kNE1, A, rowA, pA, acsSrc, kE1, kE2, oNE2, oNE1, dstCHA, B, pB, acsDst, oE1, oE2)
         #endif
-        for ( e2=(int)sE2; e2<=(int)eE2; e2++ )
+        for ( e2=(long long)sE2; e2<=(long long)eE2; e2++ )
         {
-            int e1;
-            for ( e1=(int)sE1; e1<=(int)eE1; e1++ )
+            long long e1;
+            for ( e1=(long long)sE1; e1<=(long long)eE1; e1++ )
             {
-                for ( int ro=(int)sRO; ro<=(int)eRO; ro++ )
+                for ( long long ro=(long long)sRO; ro<=(long long)eRO; ro++ )
                 {
-                    int rInd = (e2-sE2)*lenRO*lenE1 + (e1-sE1)*lenRO + ro-sRO;
+                    size_t rInd = (e2-sE2)*lenRO*lenE1 + (e1-sE1)*lenRO + ro-sRO;
 
                     size_t src, dst, ke1, ke2, oe1, oe2;
-                    int kro;
+                    long long kro;
 
                     // fill matrix A
                     size_t col = 0;
@@ -585,74 +613,74 @@ calib3D(const ho4DArray<T>& acsSrc, const ho4DArray<T>& acsDst,
         //GADGET_MSG("B = " << v);
 
         GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("grappa 3D calibration - solve linear system ... "));
-        #ifdef USE_CUDA
-            // go to device
-            try
-            {
-                if ( typeid(typename realType<T>::Type)==typeid(float) && calib_use_gpu_ )
-                {
-                    GADGET_MSG("grappa 3D - calling GPU kernel estimation ... ");
-                    //hoNDArray<float_complext> A_tmp(A.get_dimensions(), reinterpret_cast<float_complext*>(A.begin()));
-                    //hoNDArray<float_complext> B_tmp(B.get_dimensions(), reinterpret_cast<float_complext*>(B.begin()));
-
-                    //cuNDArray<float_complext> device_A(A_tmp);
-                    //cuNDArray<float_complext> device_B(B_tmp);
-                    //cuNDArray<float_complext> device_x;
-                    //if ( Gadgetron::inverse_clib_matrix(&device_A, &device_B, &device_x, thres) != 0 )
-                    //{
-                    //    GADGET_ERROR_MSG("failed in Gadgetron::inverse_clib_matrix(&device_A, &device_B, &device_x, thres) ... ");
-                    //    SolveLinearSystem_Tikhonov(A, B, x, thres);
-                    //}
-                    //else
-                    //{
-                    //    // go back to host
-                    //    boost::shared_ptr< hoNDArray<complext<float> > > host_x = device_x.to_host();
-                    //    memcpy(x.begin(), host_x->begin(), x.get_number_of_bytes());
-                    //}
-
-                    hoNDArray<float_complext> A_tmp(A.get_dimensions(), reinterpret_cast<float_complext*>(A.begin()));
-                    hoNDArray<float_complext> B_tmp(B.get_dimensions(), reinterpret_cast<float_complext*>(B.begin()));
-
-                    int ret(0);
-                    boost::shared_ptr< hoNDArray<complext<float> > > host_x;
-
-                    #pragma omp critical(inverse3D)
-                    {
-                        cuNDArray<float_complext> device_A(A_tmp);
-                        cuNDArray<float_complext> device_B(B_tmp);
-                        cuNDArray<float_complext> device_x;
-
-                        ret = Gadgetron::inverse_clib_matrix(&device_A, &device_B, &device_x, thres);
-                        if ( ret == 0 )
-                        {
-                            host_x = device_x.to_host();
-                        }
-                    }
-
-                    if ( ret != 0 )
-                    {
-                        GADGET_ERROR_MSG("failed in Gadgetron::inverse_clib_matrix(&device_A, &device_B, &device_x, thres) ... ");
-                        SolveLinearSystem_Tikhonov(A, B, x, thres);
-                    }
-                    else
-                    {
-                        memcpy(x.begin(), host_x->begin(), x.get_number_of_bytes());
-                    }
-                }
-                else
-                {
-                    GADGET_WARN_MSG("GPU inverse_clib_matrix for grappa is only available for single-precision, calling the CPU version ... ");
-                    GADGET_CHECK_RETURN_FALSE(SolveLinearSystem_Tikhonov(A, B, x, thres));
-                }
-            }
-            catch(...)
-            {
-                GADGET_ERROR_MSG("failed in GPU inverse_clib_matrix for grappa, calling the CPU version ... ");
-                GADGET_CHECK_RETURN_FALSE(SolveLinearSystem_Tikhonov(A, B, x, thres));
-            }
-        #else
-            GADGET_CHECK_RETURN_FALSE(SolveLinearSystem_Tikhonov(A, B, x, thres));
-        #endif // USE_CUDA
+        //#ifdef USE_CUDA
+        //    // go to device
+        //    try
+        //    {
+        //        if ( typeid(typename realType<T>::Type)==typeid(float) && calib_use_gpu_ )
+        //        {
+        //            GADGET_MSG("grappa 3D - calling GPU kernel estimation ... ");
+        //            //hoNDArray<float_complext> A_tmp(A.get_dimensions(), reinterpret_cast<float_complext*>(A.begin()));
+        //            //hoNDArray<float_complext> B_tmp(B.get_dimensions(), reinterpret_cast<float_complext*>(B.begin()));
+
+        //            //cuNDArray<float_complext> device_A(A_tmp);
+        //            //cuNDArray<float_complext> device_B(B_tmp);
+        //            //cuNDArray<float_complext> device_x;
+        //            //if ( Gadgetron::inverse_clib_matrix(&device_A, &device_B, &device_x, thres) != 0 )
+        //            //{
+        //            //    GADGET_ERROR_MSG("failed in Gadgetron::inverse_clib_matrix(&device_A, &device_B, &device_x, thres) ... ");
+        //            //    SolveLinearSystem_Tikhonov(A, B, x, thres);
+        //            //}
+        //            //else
+        //            //{
+        //            //    // go back to host
+        //            //    boost::shared_ptr< hoNDArray<complext<float> > > host_x = device_x.to_host();
+        //            //    memcpy(x.begin(), host_x->begin(), x.get_number_of_bytes());
+        //            //}
+
+        //            hoNDArray<float_complext> A_tmp(A.get_dimensions(), reinterpret_cast<float_complext*>(A.begin()));
+        //            hoNDArray<float_complext> B_tmp(B.get_dimensions(), reinterpret_cast<float_complext*>(B.begin()));
+
+        //            int ret(0);
+        //            boost::shared_ptr< hoNDArray<complext<float> > > host_x;
+
+        //            #pragma omp critical(inverse3D)
+        //            {
+        //                cuNDArray<float_complext> device_A(A_tmp);
+        //                cuNDArray<float_complext> device_B(B_tmp);
+        //                cuNDArray<float_complext> device_x;
+
+        //                ret = Gadgetron::inverse_clib_matrix(&device_A, &device_B, &device_x, thres);
+        //                if ( ret == 0 )
+        //                {
+        //                    host_x = device_x.to_host();
+        //                }
+        //            }
+
+        //            if ( ret != 0 )
+        //            {
+        //                GADGET_ERROR_MSG("failed in Gadgetron::inverse_clib_matrix(&device_A, &device_B, &device_x, thres) ... ");
+        //                SolveLinearSystem_Tikhonov(A, B, x, thres);
+        //            }
+        //            else
+        //            {
+        //                memcpy(x.begin(), host_x->begin(), x.get_number_of_bytes());
+        //            }
+        //        }
+        //        else
+        //        {
+        //            GADGET_WARN_MSG("GPU inverse_clib_matrix for grappa is only available for single-precision, calling the CPU version ... ");
+        //            GADGET_CHECK_RETURN_FALSE(SolveLinearSystem_Tikhonov(A, B, x, thres));
+        //        }
+        //    }
+        //    catch(...)
+        //    {
+        //        GADGET_ERROR_MSG("failed in GPU inverse_clib_matrix for grappa, calling the CPU version ... ");
+        //        GADGET_CHECK_RETURN_FALSE(SolveLinearSystem_Tikhonov(A, B, x, thres));
+        //    }
+        //#else
+            GADGET_CHECK_EXCEPTION_RETURN_FALSE(SolveLinearSystem_Tikhonov(A, B, x, thres));
+        //#endif // USE_CUDA
         GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.stop());
 
         //Gadgetron::norm2(x, v);
@@ -677,20 +705,20 @@ calib3D(const ho4DArray<T>& acsSrc, const ho4DArray<T>& acsDst,
 
 template <typename T> 
 bool gtPlusGRAPPA<T>::
-kspaceDomainConvKernel3D(const ho7DArray<T>& ker, int kRO, const std::vector<int>& kE1, const std::vector<int>& kE2, const std::vector<int>& oE1, const std::vector<int>& oE2, ho5DArray<T>& convKer, bool ROis3rdDim)
+kspaceDomainConvKernel3D(const ho7DArray<T>& ker, size_t kRO, const std::vector<int>& kE1, const std::vector<int>& kE2, const std::vector<int>& oE1, const std::vector<int>& oE2, ho5DArray<T>& convKer, bool ROis3rdDim)
 {
 try
     {
-        int srcCHA = (int)(ker.get_size(3));
-        int dstCHA = (int)(ker.get_size(4));
+        long long srcCHA = (long long)(ker.get_size(3));
+        long long dstCHA = (long long)(ker.get_size(4));
 
-        int kNE1 = (int)(kE1.size());
-        int oNE1 = (int)(oE1.size());
+        long long kNE1 = (long long)(kE1.size());
+        long long oNE1 = (long long)(oE1.size());
 
-        int kNE2 = (int)(kE2.size());
-        int oNE2 = (int)(oE2.size());
+        long long kNE2 = (long long)(kE2.size());
+        long long oNE2 = (long long)(oE2.size());
 
-        int kROhalf = kRO/2;
+        long long kROhalf = kRO/2;
         if ( 2*kROhalf == kRO )
         {
             GADGET_WARN_MSG("gtPlusGRAPPA<T>::imageDomainKernel(...) - 2*kROhalf == kRO " << kRO);
@@ -699,21 +727,21 @@ try
 
         GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("grappa 3D calibration - convert to conv kernel ... "));
         /// fill the convolution kernels
-        int convKRO = 2*kRO+3;
+        long long convKRO = 2*kRO+3;
 
-        int maxKE1 = std::abs(kE1[0]);
+        long long maxKE1 = std::abs(kE1[0]);
         if ( std::abs(kE1[kNE1-1]) > maxKE1 )
         {
             maxKE1 = std::abs(kE1[kNE1-1]);
         }
-        int convKE1 = 2*maxKE1+1;
+        long long convKE1 = 2*maxKE1+1;
 
-        int maxKE2 = std::abs(kE2[0]);
+        long long maxKE2 = std::abs(kE2[0]);
         if ( std::abs(kE2[kNE2-1]) > maxKE2 )
         {
             maxKE2 = std::abs(kE2[kNE2-1]);
         }
-        int convKE2 = 2*maxKE2+1;
+        long long convKE2 = 2*maxKE2+1;
 
         /// allocate the convolution kernel
         if ( ROis3rdDim )
@@ -727,7 +755,7 @@ try
         Gadgetron::clear(&convKer);
 
         /// index
-        int oe1, oe2, kro, ke1, ke2, src, dst;
+        long long oe1, oe2, kro, ke1, ke2, src, dst;
 
         /// fill the convolution kernel and sum up multiple kernels
         for ( oe2=0; oe2<oNE2; oe2++ )
@@ -811,20 +839,20 @@ try
 
 template <typename T> 
 bool gtPlusGRAPPA<T>::
-imageDomainKernel3D(const ho7DArray<T>& ker, int kRO, const std::vector<int>& kE1, const std::vector<int>& kE2, const std::vector<int>& oE1, const std::vector<int>& oE2, int ro, int e1, int e2, hoNDArray<T>& kIm)
+imageDomainKernel3D(const ho7DArray<T>& ker, size_t kRO, const std::vector<int>& kE1, const std::vector<int>& kE2, const std::vector<int>& oE1, const std::vector<int>& oE2, size_t ro, size_t e1, size_t e2, hoNDArray<T>& kIm)
 {
     try
     {
-        int srcCHA = (int)(ker.get_size(3));
-        int dstCHA = (int)(ker.get_size(4));
+        long long srcCHA = (long long)(ker.get_size(3));
+        long long dstCHA = (long long)(ker.get_size(4));
 
-        int kNE1 = (int)(kE1.size());
-        int oNE1 = (int)(oE1.size());
+        long long kNE1 = (long long)(kE1.size());
+        long long oNE1 = (long long)(oE1.size());
 
-        int kNE2 = (int)(kE2.size());
-        int oNE2 = (int)(oE2.size());
+        long long kNE2 = (long long)(kE2.size());
+        long long oNE2 = (long long)(oE2.size());
 
-        int kROhalf = kRO/2;
+        long long kROhalf = kRO/2;
         if ( 2*kROhalf == kRO )
         {
             GADGET_WARN_MSG("gtPlusGRAPPA<T>::imageDomainKernel(...) - 2*kROhalf == kRO " << kRO);
@@ -843,77 +871,8 @@ imageDomainKernel3D(const ho7DArray<T>& ker, int kRO, const std::vector<int>& kE
         bool ROis3rdDim = false;
         GADGET_CHECK_RETURN_FALSE(this->kspaceDomainConvKernel3D(ker, kRO, kE1, kE2, oE1, oE2, convKer, ROis3rdDim));
 
-        /*GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("grappa 3D calibration - convert to conv kernel ... "));
-        /// fill the convolution kernels
-        int convKRO = 2*kRO+3;
-
-        int maxKE1 = std::abs(kE1[0]);
-        if ( std::abs(kE1[kNE1-1]) > maxKE1 )
-        {
-            maxKE1 = std::abs(kE1[kNE1-1]);
-        }
-        int convKE1 = 2*maxKE1+1;
-
-        int maxKE2 = std::abs(kE2[0]);
-        if ( std::abs(kE2[kNE2-1]) > maxKE2 )
-        {
-            maxKE2 = std::abs(kE2[kNE2-1]);
-        }
-        int convKE2 = 2*maxKE2+1;
-
-        /// allocate the convolution kernel
-        ho5DArray<T> convKer(convKRO, convKE1, convKE2, srcCHA, dstCHA);
-        Gadgetron::clear(&convKer);
-
-        /// index
-        int oe1, oe2, kro, ke1, ke2, src, dst;
-
-        /// fill the convolution kernel and sum up multiple kernels
-        for ( oe2=0; oe2<oNE2; oe2++ )
-        {
-            for ( oe1=0; oe1<oNE1; oe1++ )
-            {
-                for ( ke2=0; ke2<kNE2; ke2++ )
-                {
-                    for ( ke1=0; ke1<kNE1; ke1++ )
-                    {
-                        for ( kro=-kROhalf; kro<=kROhalf; kro++ )
-                        {
-                            for ( dst=0; dst<dstCHA; dst++ )
-                            {
-                                for ( src=0; src<srcCHA; src++ )
-                                {
-                                    convKer(-kro+kRO+1, oE1[oe1]-kE1[ke1]+maxKE1, oE2[oe2]-kE2[ke2]+maxKE2, src, dst) = ker(kro+kROhalf, ke1, ke2, src, dst, oe1, oe2);
-                                }
-                            }
-
-                        }
-                    }
-                }
-            }
-        }
-
-        if ( (oE1[0]!=0) && (oE2[0]!=0) && (srcCHA==dstCHA) )
-        {
-            for ( dst=0; dst<dstCHA; dst++ )
-            {
-                for ( src=0; src<srcCHA; src++ )
-                {
-                    if ( src == dst )
-                    {
-                        convKer(kRO+1, maxKE1, maxKE2, src, dst) = 1.0;
-                    }
-                    else
-                    {
-                        convKer(kRO+1, maxKE1, maxKE2, src, dst) = 0.0;
-                    }
-                }
-            }
-        }
-        GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.stop());*/
-
         GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("grappa 3D calibration - SNR unit scaling ... "));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::scal( (typename realType<T>::Type)( std::sqrt((double)(ro*e1*e2)) ), convKer ));
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::scal( (typename realType<T>::Type)( std::sqrt((double)(ro*e1*e2)) ), convKer ));
         GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.stop());
 
         GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("grappa 3D calibration - zero padding ... "));
@@ -935,12 +894,12 @@ imageDomainKernel3D(const ho7DArray<T>& ker, int kRO, const std::vector<int>& kE
 
 template <typename T> 
 bool gtPlusGRAPPA<T>::
-imageDomainKernelRO3D(const ho7DArray<T>& ker, int kRO, const std::vector<int>& kE1, const std::vector<int>& kE2, const std::vector<int>& oE1, const std::vector<int>& oE2, int ro, hoNDArray<T>& kImRO)
+imageDomainKernelRO3D(const ho7DArray<T>& ker, size_t kRO, const std::vector<int>& kE1, const std::vector<int>& kE2, const std::vector<int>& oE1, const std::vector<int>& oE2, size_t ro, hoNDArray<T>& kImRO)
 {
     try
     {
-        int srcCHA = (int)(ker.get_size(3));
-        int dstCHA = (int)(ker.get_size(4));
+        long long srcCHA = (long long)(ker.get_size(3));
+        long long dstCHA = (long long)(ker.get_size(4));
 
         GADGET_CHECK_RETURN_FALSE(kRO==ker.get_size(0));
         GADGET_CHECK_RETURN_FALSE(kE1.size()==ker.get_size(1));
@@ -962,7 +921,7 @@ imageDomainKernelRO3D(const ho7DArray<T>& ker, int kRO, const std::vector<int>&
         Gadgetron::clear(kImROTemp);
 
         GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("grappa 3D calibration - SNR unit scaling ... "));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::scal( (typename realType<T>::Type)( std::sqrt((double)(ro)) ), convKer ));
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::scal( (typename realType<T>::Type)( std::sqrt((double)(ro)) ), convKer ));
         GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.stop());
 
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, convKer, "convKer_scal_RO");
@@ -992,7 +951,7 @@ imageDomainKernelRO3D(const ho7DArray<T>& ker, int kRO, const std::vector<int>&
 
 template <typename T> 
 bool gtPlusGRAPPA<T>::
-imageDomainKernelE1E2RO(const hoNDArray<T>& kImRO, int e1, int e2, hoNDArray<T>& kImE1E2RO)
+imageDomainKernelE1E2RO(const hoNDArray<T>& kImRO, size_t e1, size_t e2, hoNDArray<T>& kImE1E2RO)
 {
     try
     {
@@ -1008,7 +967,7 @@ imageDomainKernelE1E2RO(const hoNDArray<T>& kImRO, int e1, int e2, hoNDArray<T>&
         hoNDArray<T> kImROScaled(kImRO);
 
         GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("grappa 3D calibration - SNR unit scaling for E1 and E2 ... "));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::scal( (typename realType<T>::Type)( std::sqrt((double)(e1*e2)) ), kImROScaled ));
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::scal( (typename realType<T>::Type)( std::sqrt((double)(e1*e2)) ), kImROScaled ));
         GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.stop());
 
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, kImROScaled, "kImROScaledE1E2");
diff --git a/toolboxes/gtplus/algorithm/gtPlusOperator.h b/toolboxes/gtplus/algorithm/gtPlusOperator.h
index b78d2ea..6720950 100644
--- a/toolboxes/gtplus/algorithm/gtPlusOperator.h
+++ b/toolboxes/gtplus/algorithm/gtPlusOperator.h
@@ -5,7 +5,7 @@
 
 #pragma once
 
-#include "ismrmrd.h"
+#include "ismrmrd/ismrmrd.h"
 #include "GadgetronTimer.h"
 #include "gtPlusISMRMRDReconUtil.h"
 #include "gtPlusIOAnalyze.h"
@@ -22,6 +22,8 @@ class gtPlusOperator
 {
 public:
 
+    typedef typename realType<T>::Type value_type;
+
     gtPlusOperator();
     virtual ~gtPlusOperator();
 
@@ -42,8 +44,16 @@ public:
     // compute cost value
     virtual bool obj(const hoNDArray<T>& x, T& obj) = 0;
 
+    // perform the proximity operation
+    virtual bool proximity(hoNDArray<T>& x, value_type thres);
+
+    // indicate the operator is unitary or not
+    // unitary operator, AA' = I
+    virtual bool unitary() const = 0;
+
     // restore acquired kspace points to x
     virtual bool restoreAcquiredKSpace(const hoNDArray<T>& acquired, hoNDArray<T>& y);
+    virtual bool restoreAcquiredKSpace(hoNDArray<T>& y);
 
     // set the memory manager
     void setMemoryManager(boost::shared_ptr<gtPlusMemoryManager>& memManager);
@@ -71,7 +81,7 @@ public:
     gtPlusISMRMRDReconUtil<T> gtPlus_util_;
     gtPlusISMRMRDReconUtilComplex<T> gtPlus_util_complex_;
 
-protected:
+public:
 
     // acquired kspace (unacquired points are zeros)
     boost::shared_ptr< hoNDArray<T> > acquired_points_;
@@ -143,6 +153,12 @@ bool gtPlusOperator<T>::adjointforwardOperator(const hoNDArray<T>& x, hoNDArray<
 }
 
 template <typename T> 
+bool gtPlusOperator<T>::restoreAcquiredKSpace(hoNDArray<T>& y)
+{
+    return this->restoreAcquiredKSpace(*acquired_points_, y);
+}
+
+template <typename T> 
 bool gtPlusOperator<T>::restoreAcquiredKSpace(const hoNDArray<T>& acquired, hoNDArray<T>& y)
 {
     try
@@ -158,7 +174,8 @@ bool gtPlusOperator<T>::restoreAcquiredKSpace(const hoNDArray<T>& acquired, hoND
         #pragma omp parallel for default(none) private(n) shared(N, pA, pY)
         for ( n=0; n<(int)N; n++ )
         {
-            if ( std::abs(pA[n]) > 0 )
+            // if ( std::abs(pA[n]) > 0 )
+            if ( GT_ABS(pA[n].real()) > 0 )
             {
                 pY[n] = pA[n];
             }
@@ -198,7 +215,8 @@ setAcquiredPoints(boost::shared_ptr< hoNDArray<T> >& kspace)
         #endif
         for ( ii=0; ii<(long long)N; ii++ )
         {
-            if ( std::abs( (*kspace)(ii) ) < DBL_EPSILON )
+            // if ( std::abs( (*kspace)(ii) ) < DBL_EPSILON )
+            if ( GT_ABS((*kspace)(ii).real()) < DBL_EPSILON )
             {
                 unacquired_points_indicator_(ii) = T(1.0);
             }
@@ -234,5 +252,11 @@ setCoilSenMap(boost::shared_ptr< hoNDArray<T> >& senMap)
     return true;
 }
 
+template <typename T> 
+bool gtPlusOperator<T>::
+proximity(hoNDArray<T>& /*x*/, value_type /*thres*/)
+{
+    return true;
+}
 
 }}
diff --git a/toolboxes/gtplus/algorithm/gtPlusSPIRIT.h b/toolboxes/gtplus/algorithm/gtPlusSPIRIT.h
index def68fc..656fad4 100644
--- a/toolboxes/gtplus/algorithm/gtPlusSPIRIT.h
+++ b/toolboxes/gtplus/algorithm/gtPlusSPIRIT.h
@@ -38,42 +38,43 @@ public:
     // SPIRIT calibration for 2D case
     // acsSrc : [RO E1 srcCHA]
     // acsDst : [RO E1 dstCHA]
-    // ker : [kRO kE1 srcCHA dstCHA 1 1]
+    // ker : [kRO kE1 srcCHA dstCHA oRO oE1]
     bool calib(const ho3DArray<T>& acsSrc, const ho3DArray<T>& acsDst, double thres, 
-            int kRO, int kE1, int a, int b, ho6DArray<T>& ker);
+            size_t kRO, size_t kE1, size_t oRO, size_t oE1, ho6DArray<T>& ker);
 
     // image domain kernel for 2D kernel
     // kIm: image domain kernel [RO E1 srcCHA dstCHA]
     // if minusI==true, compute image domain G-I kernel
-    bool imageDomainKernel(const ho6DArray<T>& ker, int kRO, int kE1, int a, int b, int ro, int e1, hoNDArray<T>& kIm, bool minusI=false);
+    bool imageDomainKernel(const ho6DArray<T>& ker, size_t kRO, size_t kE1, 
+        size_t oRO, size_t oE1, size_t ro, size_t e1, hoNDArray<T>& kIm, bool minusI=false);
 
     // SPIRIT calibration for 3D case
     // acsSrc : [RO E1 E2 srcCHA]
     // acsDst : [RO E1 E2 dstCHA]
-    // ker : [kRO kE1 kE2 srcCHA dstCHA 1 1 1]
+    // ker : [kRO kE1 kE2 srcCHA dstCHA oRO oE1 oE2]
     // overDetermineRatio : over determine ratio of calib matrix, if < 1, all data are used
     bool calib3D(const ho4DArray<T>& acsSrc, const ho4DArray<T>& acsDst, double thres, double overDetermineRatio, 
-            int kRO, int kE1, int kE2, int a, int b, int c, hoNDArray<T>& ker);
+            size_t kRO, size_t kE1, size_t kE2, size_t oRO, size_t oE1, size_t oE2, hoNDArray<T>& ker);
 
     // convert the calibrated kernel to the convlution kernel in kspace
     // if ROis3rdDim == true, the kernel dimension is [E1 E2 RO], otherwise [RO E1 E2]
-    bool kspaceDomainConvKernel3D(const hoNDArray<T>& ker, int kRO, int kE1, int kE2, int a, int b, int c, ho5DArray<T>& convKerFlip, bool minusI=true, bool ROis3rdDim=true);
+    bool kspaceDomainConvKernel3D(const hoNDArray<T>& ker, size_t kRO, size_t kE1, size_t kE2, size_t oRO, size_t oE1, size_t oE2, ho5DArray<T>& convKerFlip, bool minusI=true, bool ROis3rdDim=true);
 
     // image domain kernel for 3D kernel
     // kIm: image domain kernel [E1 E2 RO srcCHA dstCHA]
     // if minusI==true, compute image domain G-I kernel
-    bool imageDomainKernel3D(const hoNDArray<T>& ker, int kRO, int kE1, int kE2, 
-        int a, int b, int c, int ro, int e1, int e2, hoNDArray<T>& kIm, bool minusI=false);
+    bool imageDomainKernel3D(const hoNDArray<T>& ker, size_t kRO, size_t kE1, size_t kE2, 
+        size_t oRO, size_t oE1, size_t oE2, size_t ro, size_t e1, size_t e2, hoNDArray<T>& kIm, bool minusI=false);
 
     // image domain kernel for 3D kernel, only RO direction is converted to image domain
     // E1 and E2 stays in the kspace domain
     // kImRO: kspace-image hybrid kernel [convE1 convE2 RO srcCHA dstCHA]
-    bool imageDomainKernelRO3D(const hoNDArray<T>& ker, int kRO, int kE1, int kE2, 
-        int a, int b, int c, int ro, hoNDArray<T>& kImRO, bool minusI=false);
+    bool imageDomainKernelRO3D(const hoNDArray<T>& ker, size_t kRO, size_t kE1, size_t kE2, 
+        size_t oRO, size_t oE1, size_t oE2, size_t ro, hoNDArray<T>& kImRO, bool minusI=false);
 
     // image domain kernel for 3D kernel, E1 and E2 directions are converted to image domain
     // kImRO : kspace-image hybrid kernel where first two dimensions are E1 and E2 and in kspace
-    bool imageDomainKernelE1E2RO(const hoNDArray<T>& kImRO, int e1, int e2, hoNDArray<T>& kImE1E2RO);
+    bool imageDomainKernelE1E2RO(const hoNDArray<T>& kImRO, size_t e1, size_t e2, hoNDArray<T>& kImE1E2RO);
 
     // compute the image domain adjoint kernel
     bool imageDomainAdjointKernel(const hoNDArray<T>& kIm, hoNDArray<T>& adjkIm);
@@ -81,6 +82,7 @@ public:
     // compute the (G-I)'*(G-I)
     bool AdjointForwardKernel(const hoNDArray<T>& kImS2D, const hoNDArray<T>& kImD2S, hoNDArray<T>& kIm);
 
+    // use gpu in the kernel calibration
     bool calib_use_gpu_;
 
     using BaseClass::gt_timer1_;
@@ -103,13 +105,14 @@ void gtPlusSPIRIT<T>::printInfo(std::ostream& os)
     os << "Implementation of SPIRIT algorithms for ISMRMRD package" << endl;
     os << "Both 2D and 3D version are implemented" << endl;
     os << "Algorithms are published at:" << endl;
-    os << "Lustig, M. and Pauly, J. M. (2010), SPIRiT: Iterative self-consistent parallel imaging reconstruction from arbitrary k-space. Magn Reson Med, 64: 457-471. doi: 10.1002/mrm.22428" << endl;
+    os << "Lustig, M. and Pauly, J. M. (2010), SPIRiT: Iterative self-consistent parallel imaging reconstruction from arbitrary k-space. Magn Reson Med, 64: 457�471. doi: 10.1002/mrm.22428" << endl;
     os << "----------------------------------------------------------------------" << endl;
 }
 
 template <typename T> 
 bool gtPlusSPIRIT<T>::
-calib(const ho3DArray<T>& acsSrc, const ho3DArray<T>& acsDst, double thres, int kRO, int kE1, int a, int b, ho6DArray<T>& ker)
+calib(const ho3DArray<T>& acsSrc, const ho3DArray<T>& acsDst, double thres, 
+            size_t kRO, size_t kE1, size_t oRO, size_t oE1, ho6DArray<T>& ker)
 {
     try
     {
@@ -122,22 +125,39 @@ calib(const ho3DArray<T>& acsSrc, const ho3DArray<T>& acsDst, double thres, int
         size_t srcCHA = acsSrc.get_size(2);
         size_t dstCHA = acsDst.get_size(2);
 
-        int kROhalf = kRO/2;
+        long long kROhalf = kRO/2;
         if ( 2*kROhalf == kRO )
         {
             GADGET_WARN_MSG("gtPlusSPIRIT<T>::calib(...) - 2*kROhalf == kRO " << kRO);
         }
         kRO = 2*kROhalf + 1;
 
-        int kE1half = kE1/2;
+        long long kE1half = kE1/2;
         if ( 2*kE1half == kE1 )
         {
             GADGET_WARN_MSG("gtPlusSPIRIT<T>::calib(...) - 2*kE1half == kE1 " << kE1);
         }
         kE1 = 2*kE1half + 1;
 
+        if ( oRO > kRO ) oRO = kRO;
+        if ( oE1 > kE1 ) oE1 = kE1;
+
+        long long oROhalf = oRO/2;
+        if ( 2*oROhalf == oRO )
+        {
+            GADGET_WARN_MSG("gtPlusSPIRIT<T>::calib(...) - 2*oROhalf == oRO " << oRO);
+        }
+        oRO = 2*oROhalf + 1;
+
+        long long oE1half = oE1/2;
+        if ( 2*oE1half == oE1 )
+        {
+            GADGET_WARN_MSG("gtPlusSPIRIT<T>::calib(...) - 2*oE1half == oE1 " << oE1);
+        }
+        oE1 = 2*oE1half + 1;
+
         // allocate kernel
-        GADGET_CHECK_RETURN_FALSE(ker.createArray(kRO, kE1, srcCHA, dstCHA, 1, 1));
+        GADGET_CHECK_RETURN_FALSE(ker.createArray(kRO, kE1, srcCHA, dstCHA, oRO, oE1));
 
         // loop over the calibration region and assemble the equation
         // Ax = b
@@ -156,128 +176,155 @@ calib(const ho3DArray<T>& acsSrc, const ho3DArray<T>& acsDst, double thres, int
         size_t colB = dstCHA;
 
         bool useGPU = (typeid(typename realType<T>::Type)==typeid(float) && calib_use_gpu_);
-        if ( useGPU )
-        {
-            GADGET_MSG("spirit 2D - calling GPU kernel estimation ... "); 
-        }
-
-        hoMatrix<T> A(rowA, colA);
-        T* pA = A.begin();
+        //if ( useGPU )
+        //{
+        //    GADGET_MSG("spirit 2D - calling GPU kernel estimation ... "); 
+        //}
 
-        hoMatrix<T> B(rowA, colB);
-        T* pB = B.begin();
+        const T* pAcsSrc = acsSrc.begin();
 
-        hoMatrix<T> x( A.cols(), B.cols() );
+        #ifdef GCC_OLD_FLAG
+            #pragma omp parallel default(none) shared(RO, E1, sRO, eRO, sE1, eE1, oRO, oE1, lenRO, lenE1, rowA, colA, colB, kRO, kE1, kROhalf, kE1half, oROhalf, oE1half, pAcsSrc, srcCHA, dstCHA, thres, useGPU, std::cout) num_threads( (int)(oRO*oE1) ) if (oRO*oE1>=3)
+        #else
+            #pragma omp parallel default(none) shared(RO, E1, sRO, eRO, sE1, eE1, oRO, oE1, lenRO, lenE1, rowA, colA, colB, kRO, kE1, kROhalf, kE1half, oROhalf, oE1half, pAcsSrc, acsSrc, acsDst, srcCHA, dstCHA, thres, ker, useGPU, std::cout) num_threads( (int)(oRO*oE1) ) if (oRO*oE1>=3)
+        #endif
+        {
+            hoMatrix<T> A(rowA, colA);
+            T* pA = A.begin();
 
-        int dRO, dE1;
+            hoMatrix<T> B(rowA, colB);
+            T* pB = B.begin();
 
-        for ( int e1=(int)sE1; e1<=(int)eE1; e1++ )
-        {
-            dE1 = e1;
+            hoMatrix<T> x( A.cols(), B.cols() );
 
-            for ( int ro=sRO; ro<=(int)eRO; ro++ )
+            long long kInd = 0;
+            #pragma omp for
+            for ( kInd=0; kInd<(long long)(oRO*oE1); kInd++ )
             {
-                dRO = ro;
+                long long oe1 = kInd/oRO;
+                long long oro = kInd - oe1*oRO;
 
-                int rInd = (e1-sE1)*lenRO+ro-sRO;
+                oe1 -=oE1half;
+                oro -=oROhalf;
 
-                // fill matrix A
-                size_t col = 0;
-                for ( size_t src=0; src<srcCHA; src++ )
+                long long dRO, dE1;
+
+                for ( long long e1=(long long)sE1; e1<=(long long)eE1; e1++ )
                 {
-                    for ( int ke1=-kE1half; ke1<=kE1half; ke1++ )
+                    dE1 = e1 + oe1;
+
+                    for ( long long ro=sRO; ro<=(long long)eRO; ro++ )
                     {
-                        for ( int kro=-kROhalf; kro<=kROhalf; kro++ )
+                        dRO = ro + oro;
+
+                        long long rInd = (e1-sE1)*lenRO+ro-sRO;
+
+                        // fill matrix A
+                        size_t col = 0;
+                        for ( size_t src=0; src<srcCHA; src++ )
                         {
-                            if ( kro!=0 || ke1!=0 )
+                            for ( long long ke1=-kE1half; ke1<=kE1half; ke1++ )
                             {
-                                //A(rInd, col++) = acsSrc(ro+kro, e1+ke1, src);
-                                pA[rInd + col*rowA] = acsSrc(ro+kro, e1+ke1, src);
-                                col++;
+                                for ( long long kro=-kROhalf; kro<=kROhalf; kro++ )
+                                {
+                                    if ( kro!=oro || ke1!=oe1 )
+                                    {
+                                        //A(rInd, col++) = acsSrc(ro+kro, e1+ke1, src);
+                                        // pA[rInd + col*rowA] = acsSrc(ro+kro, e1+ke1, src);
+                                        pA[rInd + col*rowA] = pAcsSrc[ro+kro + (e1+ke1)*RO + src*RO*E1];
+                                        col++;
+                                    }
+                                }
                             }
                         }
-                    }
-                }
-
-                // fill matrix B
-                for ( size_t dst=0; dst<dstCHA; dst++ )
-                {
-                    //B(rInd, dst) = acsDst(dRO, dE1, dst);
-                    pB[rInd+dst*rowA] = acsDst(dRO, dE1, dst);
-                }
-            }
-        }
-
-        #ifdef USE_CUDA
-            // go to device
-            try
-            {
-                if ( useGPU )
-                {
-                    hoNDArray<float_complext> A_tmp(A.get_dimensions(), reinterpret_cast<float_complext*>(A.begin()));
-                    hoNDArray<float_complext> B_tmp(B.get_dimensions(), reinterpret_cast<float_complext*>(B.begin()));
-
-                    int ret(0);
-                    boost::shared_ptr< hoNDArray<complext<float> > > host_x;
-
-                    #pragma omp critical(inverse_spirit)
-                    {
-                        cuNDArray<float_complext> device_A(A_tmp);
-                        cuNDArray<float_complext> device_B(B_tmp);
-                        cuNDArray<float_complext> device_x;
 
-                        ret = Gadgetron::inverse_clib_matrix(&device_A, &device_B, &device_x, thres);
-                        if ( ret == 0 )
+                        // fill matrix B
+                        for ( size_t dst=0; dst<dstCHA; dst++ )
                         {
-                            host_x = device_x.to_host();
+                            //B(rInd, dst) = acsDst(dRO, dE1, dst);
+                            pB[rInd+dst*rowA] = acsDst(dRO, dE1, dst);
                         }
                     }
-
-                    if ( ret != 0 )
-                    {
-                        GADGET_ERROR_MSG("failed in Gadgetron::inverse_clib_matrix(&device_A, &device_B, &device_x, thres) ... ");
-                        SolveLinearSystem_Tikhonov(A, B, x, thres);
-                    }
-                    else
-                    {
-                        memcpy(x.begin(), host_x->begin(), host_x->get_number_of_bytes());
-                    }
                 }
-                else
-                {
-                    GADGET_WARN_MSG("GPU inverse_clib_matrix is only available for single-precision, calling the CPU version ... ");
+
+                // GADGET_CHECK_RETURN_FALSE(SolveLinearSystem_Tikhonov(A, B, x, thres));
+
+                //GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("grappa 3D calibration - solve linear system ... "));
+                //#ifdef USE_CUDA
+                //    // go to device
+                //    try
+                //    {
+                //        if ( useGPU )
+                //        {
+                //            hoNDArray<float_complext> A_tmp(A.get_dimensions(), reinterpret_cast<float_complext*>(A.begin()));
+                //            hoNDArray<float_complext> B_tmp(B.get_dimensions(), reinterpret_cast<float_complext*>(B.begin()));
+
+                //            int ret(0);
+                //            boost::shared_ptr< hoNDArray<complext<float> > > host_x;
+
+                //            #pragma omp critical(inverse_spirit)
+                //            {
+                //                cuNDArray<float_complext> device_A(A_tmp);
+                //                cuNDArray<float_complext> device_B(B_tmp);
+                //                cuNDArray<float_complext> device_x;
+
+                //                ret = Gadgetron::inverse_clib_matrix(&device_A, &device_B, &device_x, thres);
+                //                if ( ret == 0 )
+                //                {
+                //                    host_x = device_x.to_host();
+                //                }
+                //            }
+
+                //            if ( ret != 0 )
+                //            {
+                //                GADGET_ERROR_MSG("failed in Gadgetron::inverse_clib_matrix(&device_A, &device_B, &device_x, thres) ... ");
+                //                SolveLinearSystem_Tikhonov(A, B, x, thres);
+                //            }
+                //            else
+                //            {
+                //                memcpy(x.begin(), host_x->begin(), host_x->get_number_of_bytes());
+                //            }
+                //        }
+                //        else
+                //        {
+                //            GADGET_WARN_MSG("GPU inverse_clib_matrix is only available for single-precision, calling the CPU version ... ");
+                //            SolveLinearSystem_Tikhonov(A, B, x, thres);
+                //        }
+                //    }
+                //    catch(...)
+                //    {
+                //        GADGET_ERROR_MSG("failed in GPU inverse_clib_matrix for grappa, calling the CPU version ... ");
+                //        SolveLinearSystem_Tikhonov(A, B, x, thres);
+                //    }
+                //#else
                     SolveLinearSystem_Tikhonov(A, B, x, thres);
-                }
-            }
-            catch(...)
-            {
-                GADGET_ERROR_MSG("failed in GPU inverse_clib_matrix for grappa, calling the CPU version ... ");
-                SolveLinearSystem_Tikhonov(A, B, x, thres);
-            }
-        #else
-            SolveLinearSystem_Tikhonov(A, B, x, thres);
-        #endif // USE_CUDA
+                //#endif // USE_CUDA
+                //GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.stop());
 
-        int ind(0);
-        for ( size_t src=0; src<srcCHA; src++ )
-        {
-            for ( int ke1=-kE1half; ke1<=kE1half; ke1++ ) 
-            {
-                for ( int kro=-kROhalf; kro<=kROhalf; kro++ ) 
+                //SolveLinearSystem_Tikhonov(A, B, x, thres);
+
+                long long ind(0);
+                for ( size_t src=0; src<srcCHA; src++ )
                 {
-                    if ( kro!=0 || ke1!=0 )
-                    {
-                        for ( size_t dst=0; dst<dstCHA; dst++ )
-                        {
-                            ker(kro+kROhalf, ke1+kE1half, src, dst, 0, 0) = x(ind, dst);
-                        }
-                        ind++;
-                    }
-                    else
+                    for ( long long ke1=-kE1half; ke1<=kE1half; ke1++ ) 
                     {
-                        for ( size_t dst=0; dst<dstCHA; dst++ )
+                        for ( long long kro=-kROhalf; kro<=kROhalf; kro++ ) 
                         {
-                            ker(kro+kROhalf, ke1+kE1half, src, dst, 0, 0) = 0;
+                            if ( kro!=oro || ke1!=oe1 )
+                            {
+                                for ( size_t dst=0; dst<dstCHA; dst++ )
+                                {
+                                    ker(kro+kROhalf, ke1+kE1half, src, dst, oro+oROhalf, oe1+oE1half) = x(ind, dst);
+                                }
+                                ind++;
+                            }
+                            else
+                            {
+                                for ( size_t dst=0; dst<dstCHA; dst++ )
+                                {
+                                    ker(kro+kROhalf, ke1+kE1half, src, dst, oro+oROhalf, oe1+oE1half) = 0;
+                                }
+                            }
                         }
                     }
                 }
@@ -295,43 +342,53 @@ calib(const ho3DArray<T>& acsSrc, const ho3DArray<T>& acsDst, double thres, int
 
 template <typename T> 
 bool gtPlusSPIRIT<T>::
-imageDomainKernel(const ho6DArray<T>& ker, int kRO, int kE1, int a, int b, int ro, int e1, hoNDArray<T>& kIm, bool minusI)
+imageDomainKernel(const ho6DArray<T>& ker, size_t kRO, size_t kE1, size_t oRO, size_t oE1, size_t ro, size_t e1, hoNDArray<T>& kIm, bool minusI)
 {
     try
     {
-        int srcCHA = (int)(ker.get_size(2));
-        int dstCHA = (int)(ker.get_size(3));
+        long long srcCHA = (long long)(ker.get_size(2));
+        long long dstCHA = (long long)(ker.get_size(3));
 
         GADGET_CHECK_RETURN_FALSE(kRO==ker.get_size(0));
         GADGET_CHECK_RETURN_FALSE(kE1==ker.get_size(1));
+        GADGET_CHECK_RETURN_FALSE(oRO==ker.get_size(4));
+        GADGET_CHECK_RETURN_FALSE(oE1==ker.get_size(5));
 
-        int kROhalf = kRO/2;
-        int kE1half = kE1/2;
+        long long kROhalf = kRO/2;
+        long long kE1half = kE1/2;
+        long long oROhalf = oRO/2;
+        long long oE1half = oE1/2;
 
         // allocate image domain kernel
         kIm.create(ro, e1, srcCHA, dstCHA);
 
         /// fill the convolution kernels
-        int convKRO = 2*kRO-1;
-        int convKE1 = 2*kE1-1;
+        long long convKRO = 2*kRO-1;
+        long long convKE1 = 2*kE1-1;
 
         /// fill in convolution kernel
-        ho6DArray<T> convKer(convKRO, convKE1, srcCHA, dstCHA, 1, 1);
+        ho6DArray<T> convKer(convKRO, convKE1, srcCHA, dstCHA, oRO, oE1);
         Gadgetron::clear(&convKer);
 
-        int kro, ke1, src, dst;
-        for ( ke1=-kE1half; ke1<=kE1half; ke1++ )
+        long long oro, oe1, kro, ke1, src, dst;
+        for ( oe1=-oE1half; oe1<=oE1half; oe1++ )
         {
-            for ( kro=-kROhalf; kro<=kROhalf; kro++ )
+            for ( oro=-oROhalf; oro<=oROhalf; oro++ )
             {
-                int iro = kro + kRO -1;
-                int ie1 = ke1 + kE1 -1;
-
-                for ( dst=0; dst<dstCHA; dst++ )
+                for ( ke1=-kE1half; ke1<=kE1half; ke1++ )
                 {
-                    for ( src=0; src<srcCHA; src++ )
+                    for ( kro=-kROhalf; kro<=kROhalf; kro++ )
                     {
-                        convKer(iro, ie1, src, dst, 0, 0) = ker(kro+kROhalf, ke1+kE1half, src, dst, 0, 0);
+                        long long iro = kro - oro + kRO -1;
+                        long long ie1 = ke1 - oe1 + kE1 -1;
+
+                        for ( dst=0; dst<dstCHA; dst++ )
+                        {
+                            for ( src=0; src<srcCHA; src++ )
+                            {
+                                convKer(iro, ie1, src, dst, oro+oROhalf, oe1+oE1half) = ker(kro+kROhalf, ke1+kE1half, src, dst, oro+oROhalf, oe1+oE1half);
+                            }
+                        }
                     }
                 }
             }
@@ -341,6 +398,7 @@ imageDomainKernel(const ho6DArray<T>& ker, int kRO, int kE1, int a, int b, int r
         ho4DArray<T> conKerMean(convKRO, convKE1, srcCHA, dstCHA);
         GADGET_CHECK_RETURN_FALSE(Gadgetron::sumOverLastDimension(convKer, convKer2));
         GADGET_CHECK_RETURN_FALSE(Gadgetron::sumOverLastDimension(convKer2, conKerMean));
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::scal( (typename realType<T>::Type)(1.0/(oRO*oE1)), conKerMean) );
 
         // flip the kernel
         ho4DArray<T> convKerFlip(convKRO, convKE1, srcCHA, dstCHA);
@@ -369,7 +427,7 @@ imageDomainKernel(const ho6DArray<T>& ker, int kRO, int kE1, int a, int b, int r
             }
         }
 
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::scal( (typename realType<T>::Type)( std::sqrt((double)(ro*e1)) ), convKerFlip ));
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::scal( (typename realType<T>::Type)( std::sqrt((double)(ro*e1)) ), convKerFlip ));
         GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<T>().zeropad2D(convKerFlip, ro, e1, kIm));
         GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->ifft2c(kIm));
     }
@@ -385,7 +443,7 @@ imageDomainKernel(const ho6DArray<T>& ker, int kRO, int kE1, int a, int b, int r
 template <typename T> 
 bool gtPlusSPIRIT<T>::
 calib3D(const ho4DArray<T>& acsSrc, const ho4DArray<T>& acsDst, double thres, double overDetermineRatio, 
-            int kRO, int kE1, int kE2, int a, int b, int c, hoNDArray<T>& ker)
+            size_t kRO, size_t kE1, size_t kE2, size_t oRO, size_t oE1, size_t oE2, hoNDArray<T>& ker)
 {
     try
     {
@@ -400,29 +458,54 @@ calib3D(const ho4DArray<T>& acsSrc, const ho4DArray<T>& acsDst, double thres, do
         size_t srcCHA = acsSrc.get_size(3);
         size_t dstCHA = acsDst.get_size(3);
 
-        int kROhalf = kRO/2;
+        long long kROhalf = kRO/2;
         if ( 2*kROhalf == kRO )
         {
             GADGET_WARN_MSG("gtPlusSPIRIT<T>::calib3D(...) - 2*kROhalf == kRO " << kRO);
         }
         kRO = 2*kROhalf + 1;
 
-        int kE1half = kE1/2;
+        long long kE1half = kE1/2;
         if ( 2*kE1half == kE1 )
         {
             GADGET_WARN_MSG("gtPlusSPIRIT<T>::calib3D(...) - 2*kE1half == kE1 " << kE1);
         }
         kE1 = 2*kE1half + 1;
 
-        int kE2half = kE2/2;
+        long long kE2half = kE2/2;
         if ( 2*kE2half == kE2 )
         {
             GADGET_WARN_MSG("gtPlusSPIRIT<T>::calib3D(...) - 2*kE2half == kE2 " << kE2);
         }
         kE2 = 2*kE2half + 1;
 
+        if ( oRO > kRO ) oRO = kRO;
+        if ( oE1 > kE1 ) oE1 = kE1;
+        if ( oE2 > kE2 ) oE2 = kE2;
+
+        long long oROhalf = oRO/2;
+        if ( 2*oROhalf == oRO )
+        {
+            GADGET_WARN_MSG("gtPlusSPIRIT<T>::calib3D(...) - 2*oROhalf == oRO " << oRO);
+        }
+        oRO = 2*oROhalf + 1;
+
+        long long oE1half = oE1/2;
+        if ( 2*oE1half == oE1 )
+        {
+            GADGET_WARN_MSG("gtPlusSPIRIT<T>::calib3D(...) - 2*oE1half == oE1 " << oE1);
+        }
+        oE1 = 2*oE1half + 1;
+
+        long long oE2half = oE2/2;
+        if ( 2*oE2half == oE2 )
+        {
+            GADGET_WARN_MSG("gtPlusSPIRIT<T>::calib3D(...) - 2*oE2half == oE2 " << oE2);
+        }
+        oE2 = 2*oE2half + 1;
+
         // allocate kernel
-        ker.create(kRO, kE1, kE2, srcCHA, dstCHA, 1, 1, 1);
+        ker.create(kRO, kE1, kE2, srcCHA, dstCHA, oRO, oE1, oE2);
 
         // loop over the calibration region and assemble the equation
         // Ax = b
@@ -442,7 +525,7 @@ calib3D(const ho4DArray<T>& acsSrc, const ho4DArray<T>& acsDst, double thres, do
         size_t colA = (kRO*kE1*kE2-1)*srcCHA;
         if ( overDetermineRatio > 1.0 )
         {
-            size_t maxRowA = std::ceil(overDetermineRatio*colA);
+            size_t maxRowA = (size_t)std::ceil(overDetermineRatio*colA);
             size_t maxROUsed = maxRowA/(lenE1*lenE2);
             if ( maxROUsed < lenRO )
             {
@@ -455,15 +538,33 @@ calib3D(const ho4DArray<T>& acsSrc, const ho4DArray<T>& acsDst, double thres, do
                     if ( Gadgetron::sumOver2ndDimension(acsSrc1stChaSumE2, acsSrc1stChaSumE2E1) )
                     {
                         T maxSignal;
-                        size_t roInd;
-                        if ( Gadgetron::maxAbsolute(acsSrc1stChaSumE2E1, maxSignal, roInd) )
+                        size_t roInd(0);
+                        try
                         {
-                            sRO = roInd - maxROUsed/2;
-                            eRO = sRO + maxROUsed - 1;
+                            Gadgetron::maxAbsolute(acsSrc1stChaSumE2E1, maxSignal, roInd);
+
+                            if ( roInd > maxROUsed/2+kROhalf )
+                            {
+                                sRO = roInd - maxROUsed/2;
+                            }
+                            else
+                            {
+                                sRO = kROhalf;
+                            }
+
+                            if( sRO+maxROUsed-1 <= RO-kROhalf-1 )
+                            {
+                                eRO = sRO + maxROUsed - 1;
+                            }
+                            else
+                            {
+                                eRO = RO - kROhalf -1;
+                            }
+
                             lenRO = eRO-sRO+1;
                             GADGET_MSG("gtPlusSPIRIT<T>::calib3D(...) - overDetermineRatio = " << overDetermineRatio << " ; RO data range used : [" << sRO << " " << eRO << "] ...");
                         }
-                        else
+                        catch(...)
                         {
                             GADGET_WARN_MSG("gtPlusSPIRIT<T>::calib3D(...) - overDetermineRatio is ignored ... ");
                         }
@@ -485,147 +586,181 @@ calib3D(const ho4DArray<T>& acsSrc, const ho4DArray<T>& acsDst, double thres, do
             GADGET_MSG("spirit 3D - calling GPU kernel estimation ... ");
         }
 
-        hoMatrix<T> A(rowA, colA);
-        T* pA = A.begin();
-
-        hoMatrix<T> B(rowA, colB);
-        T* pB = B.begin();
+        #ifdef GCC_OLD_FLAG
+            #pragma omp parallel default(none) shared(sRO, eRO, sE1, eE1, sE2, eE2, oRO, oE1, oE2, lenRO, lenE1, lenE2, rowA, colA, colB, kROhalf, kE1half, kE2half, oROhalf, oE1half, oE2half, srcCHA, dstCHA, thres, useGPU, std::cout) num_threads( (int)(oRO*oE1*oE2) ) if (oRO*oE1*oE2>=3 && oRO*oE1*oE2<9)
+        #else
+            #pragma omp parallel default(none) shared(sRO, eRO, sE1, eE1, sE2, eE2, oRO, oE1, oE2, lenRO, lenE1, lenE2, rowA, colA, colB, kROhalf, kE1half, kE2half, oROhalf, oE1half, oE2half, acsSrc, acsDst, srcCHA, dstCHA, thres, ker, useGPU, std::cout) num_threads( (int)(oRO*oE1*oE2) ) if (oRO*oE1*oE2>=3 && oRO*oE1*oE2<9)
+        #endif
+        {
+            hoMatrix<T> A(rowA, colA);
+            hoMatrix<T> B(rowA, colB);
+            hoMatrix<T> x( A.cols(), B.cols() );
 
-        hoMatrix<T> x( A.cols(), B.cols() );
+            // hoNDArrayMemoryManaged<T> A_mem(colA, rowA, gtPlus_mem_manager_);
+            //hoNDArray<T> A_mem(colA, rowA);
+            //A.createMatrix( rowA, colA, A_mem.begin() );
 
-        int dRO, dE1, dE2;
+            // hoNDArrayMemoryManaged<T> B_mem(colB, rowA, gtPlus_mem_manager_);
+            // hoNDArray<T> B_mem(colB, rowA);
+            // B.createMatrix( A.rows(), colB, B_mem.begin() );
 
-        for ( int e2=(int)sE2; e2<=(int)eE2; e2++ )
-        {
-            dE2 = e2;
+            T* pA = A.begin();
+            T* pB = B.begin();
 
-            for ( int e1=(int)sE1; e1<=(int)eE1; e1++ )
+            long long kInd = 0;
+            #pragma omp for
+            for ( kInd=0; kInd<(long long)(oRO*oE1*oE2); kInd++ )
             {
-                dE1 = e1;
+                long long oe2 = kInd/(oRO*oE1);
+                long long oe1 = kInd - oe2*oRO*oE1;
+                oe1 /= oRO;
+                long long oro = kInd - oe2*oRO*oE1 - oe1*oRO;
 
-                for ( int ro=sRO; ro<=(int)eRO; ro++ )
-                {
-                    dRO = ro;
+                oe2 -=oE2half;
+                oe1 -=oE1half;
+                oro -=oROhalf;
 
-                    int rInd = (e2-sE2)*lenRO*lenE1 + (e1-sE1)*lenRO + ro-sRO;
+                long long dRO, dE1, dE2;
 
-                    // fill matrix A
-                    size_t col = 0;
-                    for ( size_t src=0; src<srcCHA; src++ )
+                for ( long long e2=(long long)sE2; e2<=(long long)eE2; e2++ )
+                {
+                    dE2 = e2 + oe2;
+
+                    for ( long long e1=(long long)sE1; e1<=(long long)eE1; e1++ )
                     {
-                        for ( int ke2=-kE2half; ke2<=kE2half; ke2++ )
+                        dE1 = e1 + oe1;
+
+                        for ( long long ro=sRO; ro<=(long long)eRO; ro++ )
                         {
-                            for ( int ke1=-kE1half; ke1<=kE1half; ke1++ )
+                            dRO = ro + oro;
+
+                            long long rInd = (e2-sE2)*lenRO*lenE1 + (e1-sE1)*lenRO + ro-sRO;
+
+                            // fill matrix A
+                            size_t col = 0;
+                            for ( size_t src=0; src<srcCHA; src++ )
                             {
-                                for ( int kro=-kROhalf; kro<=kROhalf; kro++ )
+                                for ( long long ke2=-kE2half; ke2<=kE2half; ke2++ )
                                 {
-                                    if ( kro!=0 || ke1!=0 || ke2!=0 )
+                                    for ( long long ke1=-kE1half; ke1<=kE1half; ke1++ )
                                     {
-                                        //A(rInd, col++) = acsSrc(ro+kro, e1+ke1, e2+ke2, src);
-                                        pA[rInd+col*rowA] = acsSrc(ro+kro, e1+ke1, e2+ke2, src);
-                                        col++;
+                                        for ( long long kro=-kROhalf; kro<=kROhalf; kro++ )
+                                        {
+                                            if ( kro!=oro || ke1!=oe1 || ke2!=oe2 )
+                                            {
+                                                //A(rInd, col++) = acsSrc(ro+kro, e1+ke1, e2+ke2, src);
+                                                pA[rInd+col*rowA] = acsSrc(ro+kro, e1+ke1, e2+ke2, src);
+                                                col++;
+                                            }
+                                        }
                                     }
                                 }
                             }
-                        }
-                    }
-
-                    // fill matrix B
-                    for ( size_t dst=0; dst<dstCHA; dst++ )
-                    {
-                        //B(rInd, dst) = acsDst(dRO, dE1, dE2, dst);
-                        pB[rInd+dst*rowA] = acsDst(dRO, dE1, dE2, dst);
-                    }
-                }
-            }
-        }
-
-        #ifdef USE_CUDA
-            // go to device
-            try
-            {
-                if ( useGPU )
-                {
-                    hoNDArray<float_complext> A_tmp(A.get_dimensions(), reinterpret_cast<float_complext*>(A.begin()));
-                    hoNDArray<float_complext> B_tmp(B.get_dimensions(), reinterpret_cast<float_complext*>(B.begin()));
-
-                    int ret(0);
-                    boost::shared_ptr< hoNDArray<complext<float> > > host_x;
-                    #pragma omp critical(inverse_spirit3D)
-                    {
-                        cuNDArray<float_complext> device_A(A_tmp);
-                        cuNDArray<float_complext> device_B(B_tmp);
-                        cuNDArray<float_complext> device_x;
 
-                        ret = Gadgetron::inverse_clib_matrix(&device_A, &device_B, &device_x, thres);
-                        if ( ret == 0 )
-                        {
-                            host_x = device_x.to_host();
+                            // fill matrix B
+                            for ( size_t dst=0; dst<dstCHA; dst++ )
+                            {
+                                //B(rInd, dst) = acsDst(dRO, dE1, dE2, dst);
+                                pB[rInd+dst*rowA] = acsDst(dRO, dE1, dE2, dst);
+                            }
                         }
                     }
-
-                    if ( ret != 0 )
-                    {
-                        GADGET_ERROR_MSG("failed in Gadgetron::inverse_clib_matrix(&device_A, &device_B, &device_x, thres) ... ");
-                        SolveLinearSystem_Tikhonov(A, B, x, thres);
-                    }
-                    else
-                    {
-                        memcpy(x.begin(), host_x->begin(), x.get_number_of_bytes());
-                    }
                 }
-                else
-                {
-                    GADGET_WARN_MSG("GPU inverse_clib_matrix is only available for single-precision, calling the CPU version ... ");
+
+                //GADGET_CHECK_RETURN_FALSE(SolveLinearSystem_Tikhonov(A, B, x, thres));
+
+                //GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("grappa 3D calibration - solve linear system ... "));
+                //#ifdef USE_CUDA
+                //    // go to device
+                //    try
+                //    {
+                //        if ( useGPU )
+                //        {
+                //            hoNDArray<float_complext> A_tmp(A.get_dimensions(), reinterpret_cast<float_complext*>(A.begin()));
+                //            hoNDArray<float_complext> B_tmp(B.get_dimensions(), reinterpret_cast<float_complext*>(B.begin()));
+
+                //            int ret(0);
+                //            boost::shared_ptr< hoNDArray<complext<float> > > host_x;
+                //            #pragma omp critical(inverse_spirit3D)
+                //            {
+                //                cuNDArray<float_complext> device_A(A_tmp);
+                //                cuNDArray<float_complext> device_B(B_tmp);
+                //                cuNDArray<float_complext> device_x;
+
+                //                ret = Gadgetron::inverse_clib_matrix(&device_A, &device_B, &device_x, thres);
+                //                if ( ret == 0 )
+                //                {
+                //                    host_x = device_x.to_host();
+                //                }
+                //            }
+
+                //            if ( ret != 0 )
+                //            {
+                //                GADGET_ERROR_MSG("failed in Gadgetron::inverse_clib_matrix(&device_A, &device_B, &device_x, thres) ... ");
+                //                SolveLinearSystem_Tikhonov(A, B, x, thres);
+                //            }
+                //            else
+                //            {
+                //                memcpy(x.begin(), host_x->begin(), x.get_number_of_bytes());
+                //            }
+                //        }
+                //        else
+                //        {
+                //            GADGET_WARN_MSG("GPU inverse_clib_matrix is only available for single-precision, calling the CPU version ... ");
+                //            SolveLinearSystem_Tikhonov(A, B, x, thres);
+                //        }
+                //    }
+                //    catch(...)
+                //    {
+                //        GADGET_ERROR_MSG("failed in GPU inverse_clib_matrix for grappa, calling the CPU version ... ");
+                //        SolveLinearSystem_Tikhonov(A, B, x, thres);
+                //    }
+                //#else
                     SolveLinearSystem_Tikhonov(A, B, x, thres);
-                }
-            }
-            catch(...)
-            {
-                GADGET_ERROR_MSG("failed in GPU inverse_clib_matrix for grappa, calling the CPU version ... ");
-                SolveLinearSystem_Tikhonov(A, B, x, thres);
-            }
-        #else
-            SolveLinearSystem_Tikhonov(A, B, x, thres);
-        #endif // USE_CUDA
+                //#endif // USE_CUDA
+                //GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.stop());
 
-        int ind(0);
+                // SolveLinearSystem_Tikhonov(A, B, x, thres);
 
-        std::vector<size_t> kerInd(8);
-        kerInd[7] = 0;
-        kerInd[6] = 0;
-        kerInd[5] = 0;
+                long long ind(0);
 
-        for ( size_t src=0; src<srcCHA; src++ )
-        {
-            kerInd[3] = src;
-            for ( int ke2=-kE2half; ke2<=kE2half; ke2++ ) 
-            {
-                kerInd[2] = ke2+kE2half;
-                for ( int ke1=-kE1half; ke1<=kE1half; ke1++ ) 
+                std::vector<size_t> kerInd(8);
+                kerInd[7] = oe2+oE2half;
+                kerInd[6] = oe1+oE1half;
+                kerInd[5] = oro+oROhalf;
+
+                for ( size_t src=0; src<srcCHA; src++ )
                 {
-                    kerInd[1] = ke1+kE1half;
-                    for ( int kro=-kROhalf; kro<=kROhalf; kro++ ) 
+                    kerInd[3] = src;
+                    for ( long long ke2=-kE2half; ke2<=kE2half; ke2++ ) 
                     {
-                        kerInd[0] = kro+kROhalf;
-
-                        if ( kro!=0 || ke1!=0 || ke2!=0 )
-                        {
-                            for ( size_t dst=0; dst<dstCHA; dst++ )
-                            {
-                                kerInd[4] = dst;
-                                size_t offset = ker.calculate_offset(kerInd);
-                                ker(offset) = x(ind, dst);
-                            }
-                            ind++;
-                        }
-                        else
+                        kerInd[2] = ke2+kE2half;
+                        for ( long long ke1=-kE1half; ke1<=kE1half; ke1++ ) 
                         {
-                            for ( size_t dst=0; dst<dstCHA; dst++ )
+                            kerInd[1] = ke1+kE1half;
+                            for ( long long kro=-kROhalf; kro<=kROhalf; kro++ ) 
                             {
-                                kerInd[4] = dst;
-                                size_t offset = ker.calculate_offset(kerInd);
-                                ker(offset) = 0;
+                                kerInd[0] = kro+kROhalf;
+
+                                if ( kro!=0 || ke1!=0 || ke2!=0 )
+                                {
+                                    for ( size_t dst=0; dst<dstCHA; dst++ )
+                                    {
+                                        kerInd[4] = dst;
+                                        size_t offset = ker.calculate_offset(kerInd);
+                                        ker(offset) = x(ind, dst);
+                                    }
+                                    ind++;
+                                }
+                                else
+                                {
+                                    for ( size_t dst=0; dst<dstCHA; dst++ )
+                                    {
+                                        kerInd[4] = dst;
+                                        size_t offset = ker.calculate_offset(kerInd);
+                                        ker(offset) = 0;
+                                    }
+                                }
                             }
                         }
                     }
@@ -644,84 +779,97 @@ calib3D(const ho4DArray<T>& acsSrc, const ho4DArray<T>& acsDst, double thres, do
 
 template <typename T> 
 bool gtPlusSPIRIT<T>::
-kspaceDomainConvKernel3D(const hoNDArray<T>& ker, int kRO, int kE1, int kE2, int a, int b, int c, ho5DArray<T>& convKerFlip, bool minusI, bool ROis3rdDim)
+kspaceDomainConvKernel3D(const hoNDArray<T>& ker, size_t kRO, size_t kE1, size_t kE2, size_t oRO, size_t oE1, size_t oE2, ho5DArray<T>& convKerFlip, bool minusI, bool ROis3rdDim)
 {
     try
     {
-        int srcCHA = (int)(ker.get_size(3));
-        int dstCHA = (int)(ker.get_size(4));
+        long long srcCHA = (long long)(ker.get_size(3));
+        long long dstCHA = (long long)(ker.get_size(4));
 
         GADGET_CHECK_RETURN_FALSE(kRO==ker.get_size(0));
         GADGET_CHECK_RETURN_FALSE(kE1==ker.get_size(1));
         GADGET_CHECK_RETURN_FALSE(kE2==ker.get_size(2));
+        GADGET_CHECK_RETURN_FALSE(oRO==ker.get_size(5));
+        GADGET_CHECK_RETURN_FALSE(oE1==ker.get_size(6));
+        GADGET_CHECK_RETURN_FALSE(oE2==ker.get_size(7));
 
-        int kROhalf = kRO/2;
-        int kE1half = kE1/2;
-        int kE2half = kE2/2;
+        long long kROhalf = kRO/2;
+        long long kE1half = kE1/2;
+        long long kE2half = kE2/2;
+        long long oROhalf = oRO/2;
+        long long oE1half = oE1/2;
+        long long oE2half = oE2/2;
 
         /// fill the convolution kernels
-        int convKRO = 2*kRO-1;
-        int convKE1 = 2*kE1-1;
-        int convKE2 = 2*kE2-1;
+        long long convKRO = 2*kRO-1;
+        long long convKE1 = 2*kE1-1;
+        long long convKE2 = 2*kE2-1;
 
         /// fill in convolution kernel
         GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("spirit 3D calibration - convert to conv kernel ... "));
 
-        hoNDArray<T> convKer(convKRO, convKE1, convKE2, srcCHA, dstCHA, 1, 1, 1);
+        hoNDArray<T> convKer(convKRO, convKE1, convKE2, srcCHA, dstCHA, oRO, oE1, oE2);
         Gadgetron::clear(&convKer);
 
-        int kro, ke1, ke2, src, dst;
+        long long oro, oe1, oe2, kro, ke1, ke2, src, dst;
         std::vector<size_t> kerInd(8), convKerInd(8);
-
-        kerInd[7] = 0;
-        convKerInd[7] = 0;
-
-        kerInd[6] = 0;
-        convKerInd[6] = 0;
-
-        kerInd[5] = 0;
-        convKerInd[5] = 0;
-
-        for ( ke2=-kE2half; ke2<=kE2half; ke2++ )
+        for ( oe2=-oE2half; oe2<=oE2half; oe2++ )
         {
-            kerInd[2] = ke2+kE2half;
+            kerInd[7] = oe2+oE2half;
+            convKerInd[7] = oe2+oE2half;
 
-            for ( ke1=-kE1half; ke1<=kE1half; ke1++ )
+            for ( oe1=-oE1half; oe1<=oE1half; oe1++ )
             {
-                kerInd[1] = ke1+kE1half;
+                kerInd[6] = oe1+oE1half;
+                convKerInd[6] = oe1+oE1half;
 
-                for ( kro=-kROhalf; kro<=kROhalf; kro++ )
+                for ( oro=-oROhalf; oro<=oROhalf; oro++ )
                 {
-                    int iro = kro + kRO -1;
-                    int ie1 = ke1 + kE1 -1;
-                    int ie2 = ke2 + kE2 -1;
+                    kerInd[5] = oro+oROhalf;
+                    convKerInd[5] = oro+oROhalf;
 
-                    kerInd[0] = kro+kROhalf;
-
-                    convKerInd[0] = iro;
-                    convKerInd[1] = ie1;
-                    convKerInd[2] = ie2;
-
-                    for ( dst=0; dst<dstCHA; dst++ )
+                    for ( ke2=-kE2half; ke2<=kE2half; ke2++ )
                     {
-                        kerInd[4] = dst;
-                        convKerInd[4] = dst;
+                        kerInd[2] = ke2+kE2half;
 
-                        for ( src=0; src<srcCHA; src++ )
+                        for ( ke1=-kE1half; ke1<=kE1half; ke1++ )
                         {
-                            kerInd[3] = src;
-                            convKerInd[3] = src;
+                            kerInd[1] = ke1+kE1half;
+
+                            for ( kro=-kROhalf; kro<=kROhalf; kro++ )
+                            {
+                                long long iro = kro - oro + kRO -1;
+                                long long ie1 = ke1 - oe1 + kE1 -1;
+                                long long ie2 = ke2 - oe2 + kE2 -1;
+
+                                kerInd[0] = kro+kROhalf;
+
+                                convKerInd[0] = iro;
+                                convKerInd[1] = ie1;
+                                convKerInd[2] = ie2;
 
-                            size_t offsetKer = ker.calculate_offset(kerInd);
-                            size_t offsetConvKer = convKer.calculate_offset(convKerInd);
+                                for ( dst=0; dst<dstCHA; dst++ )
+                                {
+                                    kerInd[4] = dst;
+                                    convKerInd[4] = dst;
+
+                                    for ( src=0; src<srcCHA; src++ )
+                                    {
+                                        kerInd[3] = src;
+                                        convKerInd[3] = src;
 
-                            convKer(offsetConvKer) = ker(offsetKer);
+                                        size_t offsetKer = ker.calculate_offset(kerInd);
+                                        size_t offsetConvKer = convKer.calculate_offset(convKerInd);
+
+                                        convKer(offsetConvKer) = ker(offsetKer);
+                                    }
+                                }
+                            }
                         }
                     }
                 }
             }
         }
-
         GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.stop());
 
         GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("spirit 3D calibration - sum over output dimensions ... "));
@@ -730,6 +878,7 @@ kspaceDomainConvKernel3D(const hoNDArray<T>& ker, int kRO, int kE1, int kE2, int
         GADGET_CHECK_RETURN_FALSE(Gadgetron::sumOverLastDimension(convKer, convKer2));
         GADGET_CHECK_RETURN_FALSE(Gadgetron::sumOverLastDimension(convKer2, convKer3));
         GADGET_CHECK_RETURN_FALSE(Gadgetron::sumOverLastDimension(convKer3, convKernMean));
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::scal( (typename realType<T>::Type)(1.0/(oRO*oE1*oE2)), convKernMean) );
         GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.stop());
 
         GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("spirit 3D calibration - flip along dimensions ... "));
@@ -826,31 +975,35 @@ kspaceDomainConvKernel3D(const hoNDArray<T>& ker, int kRO, int kE1, int kE2, int
 
 template <typename T> 
 bool gtPlusSPIRIT<T>::
-imageDomainKernel3D(const hoNDArray<T>& ker, int kRO, int kE1, int kE2, int a, int b, int c, int ro, int e1, int e2, hoNDArray<T>& kIm, bool minusI)
+imageDomainKernel3D(const hoNDArray<T>& ker, size_t kRO, size_t kE1, size_t kE2, size_t oRO, size_t oE1, size_t oE2, size_t ro, size_t e1, size_t e2, hoNDArray<T>& kIm, bool minusI)
 {
     try
     {
-        int srcCHA = (int)(ker.get_size(3));
-        int dstCHA = (int)(ker.get_size(4));
+        long long srcCHA = (long long)(ker.get_size(3));
+        long long dstCHA = (long long)(ker.get_size(4));
 
         GADGET_CHECK_RETURN_FALSE(kRO==ker.get_size(0));
         GADGET_CHECK_RETURN_FALSE(kE1==ker.get_size(1));
         GADGET_CHECK_RETURN_FALSE(kE2==ker.get_size(2));
+        GADGET_CHECK_RETURN_FALSE(oRO==ker.get_size(5));
+        GADGET_CHECK_RETURN_FALSE(oE1==ker.get_size(6));
+        GADGET_CHECK_RETURN_FALSE(oE2==ker.get_size(7));
 
         // allocate image domain kernel
         kIm.create(e1, e2, ro, srcCHA, dstCHA);
 
         bool ROat3rdDim = true;
         ho5DArray<T> convKerFlip;
-        GADGET_CHECK_RETURN_FALSE(this->kspaceDomainConvKernel3D(ker, kRO, kE1,  kE2, a, b, c, convKerFlip, minusI, ROat3rdDim));
+        GADGET_CHECK_RETURN_FALSE(this->kspaceDomainConvKernel3D(ker, kRO, kE1,  kE2, oRO, oE1, oE2, convKerFlip, minusI, ROat3rdDim));
 
         GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("spirit 3D calibration - SNR unit scaling ... "));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::scal( (typename realType<T>::Type)( std::sqrt((double)(ro*e1*e2)) ), convKerFlip ));
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::scal( (typename realType<T>::Type)( std::sqrt((double)(ro*e1*e2)) ), convKerFlip ));
         GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.stop());
 
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, convKerFlip, "convKerFlip_scal");
 
         GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("spirit 3D calibration - zero padding ... "));
+        // GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<T>().zeropad3D(convKerFlip, e1, e2, ro, kIm));
         GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<T>().zeropad3DNoPresetZeros(convKerFlip, e1, e2, ro, kIm));
         GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.stop());
 
@@ -871,20 +1024,23 @@ imageDomainKernel3D(const hoNDArray<T>& ker, int kRO, int kE1, int kE2, int a, i
 
 template <typename T> 
 bool gtPlusSPIRIT<T>::
-imageDomainKernelRO3D(const hoNDArray<T>& ker, int kRO, int kE1, int kE2, int a, int b, int c, int ro, hoNDArray<T>& kImRO, bool minusI)
+imageDomainKernelRO3D(const hoNDArray<T>& ker, size_t kRO, size_t kE1, size_t kE2, size_t oRO, size_t oE1, size_t oE2, size_t ro, hoNDArray<T>& kImRO, bool minusI)
 {
     try
     {
-        int srcCHA = (int)(ker.get_size(3));
-        int dstCHA = (int)(ker.get_size(4));
+        long long srcCHA = (long long)(ker.get_size(3));
+        long long dstCHA = (long long)(ker.get_size(4));
 
         GADGET_CHECK_RETURN_FALSE(kRO==ker.get_size(0));
         GADGET_CHECK_RETURN_FALSE(kE1==ker.get_size(1));
         GADGET_CHECK_RETURN_FALSE(kE2==ker.get_size(2));
+        GADGET_CHECK_RETURN_FALSE(oRO==ker.get_size(5));
+        GADGET_CHECK_RETURN_FALSE(oE1==ker.get_size(6));
+        GADGET_CHECK_RETURN_FALSE(oE2==ker.get_size(7));
 
         bool ROat3rdDim = false;
         ho5DArray<T> convKerFlip;
-        GADGET_CHECK_RETURN_FALSE(this->kspaceDomainConvKernel3D(ker, kRO, kE1,  kE2, a, b, c, convKerFlip, minusI, ROat3rdDim));
+        GADGET_CHECK_RETURN_FALSE(this->kspaceDomainConvKernel3D(ker, kRO, kE1,  kE2, oRO, oE1, oE2, convKerFlip, minusI, ROat3rdDim));
 
         // allocate image domain kernel
         size_t kConvE1 = convKerFlip.get_size(1);
@@ -896,7 +1052,7 @@ imageDomainKernelRO3D(const hoNDArray<T>& ker, int kRO, int kE1, int kE2, int a,
         Gadgetron::clear(kImROTemp);
 
         GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("spirit 3D calibration - SNR unit scaling ... "));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::scal( (typename realType<T>::Type)( std::sqrt((double)(ro)) ), convKerFlip ));
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::scal( (typename realType<T>::Type)( std::sqrt((double)(ro)) ), convKerFlip ));
         GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.stop());
 
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, convKerFlip, "convKerFlip_scal_RO");
@@ -917,7 +1073,7 @@ imageDomainKernelRO3D(const hoNDArray<T>& ker, int kRO, int kE1, int kE2, int a,
     }
     catch(...)
     {
-        GADGET_ERROR_MSG("Errors in gtPlusSPIRIT<T>::imageDomainKernel3D(...) ... ");
+        GADGET_ERROR_MSG("Errors in gtPlusSPIRIT<T>::imageDomainKernelRO3D(...) ... ");
         return false;
     }
 
@@ -926,7 +1082,7 @@ imageDomainKernelRO3D(const hoNDArray<T>& ker, int kRO, int kE1, int kE2, int a,
 
 template <typename T> 
 bool gtPlusSPIRIT<T>::
-imageDomainKernelE1E2RO(const hoNDArray<T>& kImRO, int e1, int e2, hoNDArray<T>& kImE1E2RO)
+imageDomainKernelE1E2RO(const hoNDArray<T>& kImRO, size_t e1, size_t e2, hoNDArray<T>& kImE1E2RO)
 {
     try
     {
@@ -942,7 +1098,7 @@ imageDomainKernelE1E2RO(const hoNDArray<T>& kImRO, int e1, int e2, hoNDArray<T>&
         hoNDArray<T> kImROScaled(kImRO);
 
         GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("spirit 3D calibration - SNR unit scaling for E1 and E2 ... "));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::scal( (typename realType<T>::Type)( std::sqrt((double)(e1*e2)) ), kImROScaled ));
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::scal( (typename realType<T>::Type)( std::sqrt((double)(e1*e2)) ), kImROScaled ));
         GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.stop());
 
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, kImROScaled, "kImROScaledE1E2");
@@ -974,7 +1130,18 @@ imageDomainAdjointKernel(const hoNDArray<T>& kIm, hoNDArray<T>& adjkIm)
     {
         GADGET_CHECK_RETURN_FALSE(Gadgetron::permuteLastTwoDimensions(kIm, adjkIm));
 
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::conjugate(adjkIm, adjkIm));
+        //size_t N = adjkIm.get_number_of_elements();
+
+        //T* pAdjKim = adjkIm.begin();
+
+        //long long n;
+        //#pragma omp parallel for default(none) private(n) shared(N, pAdjKim)
+        //for ( n=0; n<(long long)N; n++ )
+        //{
+        //    pAdjKim[n] = std::conj(pAdjKim[n]);
+        //}
+
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::conjugate(adjkIm, adjkIm));
     }
     catch(...)
     {
@@ -1010,24 +1177,29 @@ bool gtPlusSPIRIT<T>::AdjointForwardKernel(const hoNDArray<T>& kImS2D, const hoN
 
         long long d;
         #ifdef GCC_OLD_FLAG
-            #pragma omp parallel default(none) private(d) shared(N, dstCHA, srcCHA) num_threads(dstCHA) if (dstCHA > 4)
+            #pragma omp parallel default(none) private(d) shared(N, dstCHA, srcCHA) num_threads( (int)dstCHA ) if (dstCHA > 4)
         #else
-            #pragma omp parallel default(none) private(d) shared(N, dstCHA, srcCHA, kIm, kImS2D, kImD2S) num_threads(dstCHA) if (dstCHA > 4)
+            #pragma omp parallel default(none) private(d) shared(N, dstCHA, srcCHA, kIm, kImS2D, kImD2S) num_threads( (int)dstCHA ) if (dstCHA > 4)
         #endif
         {
             hoNDArray<T> ker(N);
 
+            std::vector<size_t> dim(1);
+            dim[0] = N;
+
+            hoNDArray<T> dKer, kerS2D, kerD2S;
+
             #pragma omp for
             for ( d=0; d<dstCHA; d++ )
             {
-                for ( size_t dprime=0; dprime<dstCHA; dprime++ )
+                for ( long long dprime=0; dprime<dstCHA; dprime++ )
                 {
-                    hoNDArray<T> dKer(N, kIm.begin()+d*N+dprime*N*dstCHA);
+                    dKer.create(&dim, kIm.begin()+d*N+dprime*N*dstCHA);
 
-                    for ( size_t s=0; s<srcCHA; s++ )
+                    for ( long long s=0; s<srcCHA; s++ )
                     {
-                        hoNDArray<T> kerS2D(N, const_cast<T*>(kImS2D.begin())+s*N+dprime*N*srcCHA);
-                        hoNDArray<T> kerD2S(N, const_cast<T*>(kImD2S.begin())+d*N+s*N*dstCHA);
+                        kerS2D.create(&dim, const_cast<T*>(kImS2D.begin())+s*N+dprime*N*srcCHA);
+                        kerD2S.create(&dim, const_cast<T*>(kImD2S.begin())+d*N+s*N*dstCHA);
 
                         Gadgetron::multiply(kerS2D, kerD2S, ker);
                         Gadgetron::add(dKer, ker, dKer);
diff --git a/toolboxes/gtplus/algorithm/gtPlusSPIRIT2DOperator.h b/toolboxes/gtplus/algorithm/gtPlusSPIRIT2DOperator.h
index 5ecac83..328cb14 100644
--- a/toolboxes/gtplus/algorithm/gtPlusSPIRIT2DOperator.h
+++ b/toolboxes/gtplus/algorithm/gtPlusSPIRIT2DOperator.h
@@ -35,7 +35,7 @@ public:
     using BaseClass::use_non_centered_fft_;
     using BaseClass::calib_use_gpu_;
 
-protected:
+public:
 
     // [RO E1 srcCHA dstCHA]
     using BaseClass::forward_kernel_;
@@ -177,6 +177,9 @@ inline bool gtPlusSPIRIT2DOperator<T>::adjointOperator(const hoNDArray<T>& x, ho
             this->convertToImage(x, complexIm_);
 
             // apply kernel and sum
+            //Gadgetron::multipleMultiply(complexIm_, *adjoint_kernel_, res_after_apply_kernel_);
+            //Gadgetron::sumOverSecondLastDimension(res_after_apply_kernel_, res_after_apply_kernel_sum_over_);
+
             size_t ro = x.get_size(0);
             size_t e1 = x.get_size(1);
             size_t CHA = x.get_size(2);
@@ -188,6 +191,39 @@ inline bool gtPlusSPIRIT2DOperator<T>::adjointOperator(const hoNDArray<T>& x, ho
 
             Gadgetron::imageDomainUnwrapping2D(complexIm_, *adjoint_kernel_, res_after_apply_kernel_sum_over_, y);
 
+            //long long dCha;
+
+            ////#pragma omp parallel default(shared)
+            //{
+            //    //#ifdef WIN32
+            //    //    int tid = omp_get_thread_num();
+            //    //    DWORD_PTR mask = (1 << tid);
+            //    //    // GADGET_MSG("thread id : " << tid << " - mask : " << mask);
+            //    //    SetThreadAffinityMask( GetCurrentThread(), mask );
+            //    //#endif // WIN32
+
+            //    //#pragma omp for
+
+            //    if ( typeid(T)==typeid( std::complex<float> ) )
+            //    {
+            //        for ( dCha=0; dCha<CHA; dCha++ )
+            //        {
+            //            vcMul(ro*e1*CHA, reinterpret_cast<MKL_Complex8*>(pIm), 
+            //                reinterpret_cast<MKL_Complex8*>(ker+dCha*ro*e1*CHA), 
+            //                reinterpret_cast<MKL_Complex8*>(ptt));
+
+            //            memcpy(pY+dCha*ro*e1, ptt, sizeof(T)*ro*e1);
+            //            for ( size_t sCha=1; sCha<CHA; sCha++ )
+            //            {
+            //                vcAdd(ro*e1, reinterpret_cast<MKL_Complex8*>(pY+dCha*ro*e1), 
+            //                    reinterpret_cast<MKL_Complex8*>(ptt+sCha*ro*e1), 
+            //                    reinterpret_cast<MKL_Complex8*>(pY+dCha*ro*e1));
+            //            }
+            //        }
+            //    }
+
+            //}
+
             // go back to kspace 
             this->convertToKSpace(y, res_after_apply_kernel_sum_over_);
 
diff --git a/toolboxes/gtplus/algorithm/gtPlusSPIRIT2DTOperator.h b/toolboxes/gtplus/algorithm/gtPlusSPIRIT2DTOperator.h
index e99a89d..9fa8a0c 100644
--- a/toolboxes/gtplus/algorithm/gtPlusSPIRIT2DTOperator.h
+++ b/toolboxes/gtplus/algorithm/gtPlusSPIRIT2DTOperator.h
@@ -170,7 +170,8 @@ setAcquiredPoints(boost::shared_ptr< hoNDArray<T> >& kspace)
         #endif
         for ( ii=0; ii<(long long)N; ii++ )
         {
-            if ( std::abs( (*kspace)(ii) ) < DBL_EPSILON )
+            // if ( std::abs( (*kspace)(ii) ) < DBL_EPSILON )
+            if ( GT_ABS((*kspace)(ii).real()) < DBL_EPSILON )
             {
                 this->unacquired_points_indicator_(ii) = 1.0;
             }
@@ -209,11 +210,20 @@ bool gtPlusSPIRIT2DTOperator<T>::grad(const hoNDArray<T>& x, hoNDArray<T>& g)
         // 2*Dc*(G-I)'(G-I)(D'y+Dc'x)
 
         // D'y+Dc'x
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::multiply(this->unacquired_points_indicator_, x, this->kspace_));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::add(*this->acquired_points_, this->kspace_, this->kspace_));
+        //gt_timer1_.start("1");
+        Gadgetron::multiply(this->unacquired_points_indicator_, x, this->kspace_);
+        //gt_timer1_.stop();
+
+        //gt_timer1_.start("2");
+        Gadgetron::add(*this->acquired_points_, this->kspace_, this->kspace_);
+        //gt_timer1_.stop();
 
         // x to image domain
+        //gt_timer1_.start("3");
         GADGET_CHECK_RETURN_FALSE(this->convertToImage(this->kspace_, this->complexIm_));
+        //gt_timer1_.stop();
+
+        //gt_timer1_.start("4");
 
         // apply kernel and sum
         size_t RO = x.get_size(0);
@@ -224,7 +234,12 @@ bool gtPlusSPIRIT2DTOperator<T>::grad(const hoNDArray<T>& x, hoNDArray<T>& g)
         size_t dstCHA = this->adjoint_forward_kernel_->get_size(3);
         size_t kernelN = this->adjoint_forward_kernel_->get_size(4);
 
-        this->res_after_apply_kernel_sum_over_.create(RO, E1, dstCHA, N);
+        if ( this->res_after_apply_kernel_sum_over_.get_number_of_elements() < RO*E1*dstCHA*N )
+        {
+            this->res_after_apply_kernel_sum_over_.create(RO, E1, dstCHA, N);
+        }
+
+        //Gadgetron::imageDomainUnwrapping2DT(this->complexIm_, *(this->adjoint_forward_kernel_), this->res_after_apply_kernel_sum_over_, g);
 
         size_t n;
         for ( n=0; n<N; n++)
@@ -248,14 +263,22 @@ bool gtPlusSPIRIT2DTOperator<T>::grad(const hoNDArray<T>& x, hoNDArray<T>& g)
             GADGET_CHECK_RETURN_FALSE(Gadgetron::sumOverSecondLastDimension(this->res_after_apply_kernel_, sumResCurr));
         }
 
+        //gt_timer1_.stop();
+
         // go back to kspace 
+        //gt_timer1_.start("5");
         GADGET_CHECK_RETURN_FALSE(this->convertToKSpace(this->res_after_apply_kernel_sum_over_, g));
+        //gt_timer1_.stop();
 
         // apply Dc
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::multiply(this->unacquired_points_indicator_, g, g));
+        //gt_timer1_.start("6");
+        Gadgetron::multiply(this->unacquired_points_indicator_, g, g);
+        //gt_timer1_.stop();
 
         // multiply by 2
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::scal(T(2.0), g));
+        //gt_timer1_.start("7");
+        Gadgetron::scal(T(2.0), g);
+        //gt_timer1_.stop();
     }
     catch(...)
     {
@@ -275,8 +298,8 @@ bool gtPlusSPIRIT2DTOperator<T>::obj(const hoNDArray<T>& x, T& obj)
         // ||(G-I)(D'y+Dc'x)||2
 
         // D'y+Dc'x
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::multiply(this->unacquired_points_indicator_, x, this->kspace_));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::add(*this->acquired_points_, this->kspace_, this->kspace_));
+        Gadgetron::multiply(this->unacquired_points_indicator_, x, this->kspace_);
+        Gadgetron::add(*this->acquired_points_, this->kspace_, this->kspace_);
 
         // x to image domain
         GADGET_CHECK_RETURN_FALSE(this->convertToImage(this->kspace_, this->complexIm_));
@@ -290,7 +313,12 @@ bool gtPlusSPIRIT2DTOperator<T>::obj(const hoNDArray<T>& x, T& obj)
         size_t dstCHA = this->forward_kernel_->get_size(3);
         size_t kernelN = this->forward_kernel_->get_size(4);
 
-        this->res_after_apply_kernel_sum_over_.create(RO, E1, dstCHA, N);
+        if ( this->res_after_apply_kernel_sum_over_.get_number_of_elements() < RO*E1*dstCHA*N )
+        {
+            this->res_after_apply_kernel_sum_over_.create(RO, E1, dstCHA, N);
+        }
+
+        //Gadgetron::imageDomainUnwrapping2DT(this->complexIm_, *(this->forward_kernel_), this->res_after_apply_kernel_sum_over_, this->kspace_);
 
         size_t n;
         for ( n=0; n<N; n++)
@@ -315,7 +343,7 @@ bool gtPlusSPIRIT2DTOperator<T>::obj(const hoNDArray<T>& x, T& obj)
         }
 
         // L2 norm
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::dotc(this->res_after_apply_kernel_sum_over_, this->res_after_apply_kernel_sum_over_, obj));
+        Gadgetron::dotc(this->res_after_apply_kernel_sum_over_, this->res_after_apply_kernel_sum_over_, obj);
     }
     catch(...)
     {
diff --git a/toolboxes/gtplus/algorithm/gtPlusSPIRITNoNullSpace2DTOperator.h b/toolboxes/gtplus/algorithm/gtPlusSPIRITNoNullSpace2DTOperator.h
index 132f172..8db66c5 100644
--- a/toolboxes/gtplus/algorithm/gtPlusSPIRITNoNullSpace2DTOperator.h
+++ b/toolboxes/gtplus/algorithm/gtPlusSPIRITNoNullSpace2DTOperator.h
@@ -219,7 +219,7 @@ bool gtPlusSPIRITNoNullSpace2DTOperator<T>::grad(const hoNDArray<T>& x, hoNDArra
         GADGET_CHECK_RETURN_FALSE(this->convertToKSpace(this->res_after_apply_kernel_sum_over_, g));
 
         // multiply by 2
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::scal(T(2.0), g));
+        Gadgetron::scal(T(2.0), g);
     }
     catch(...)
     {
@@ -275,7 +275,7 @@ bool gtPlusSPIRITNoNullSpace2DTOperator<T>::obj(const hoNDArray<T>& x, T& obj)
         }
 
         // L2 norm
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::dotc(this->res_after_apply_kernel_sum_over_, this->res_after_apply_kernel_sum_over_, obj));
+        Gadgetron::dotc(this->res_after_apply_kernel_sum_over_, this->res_after_apply_kernel_sum_over_, obj);
     }
     catch(...)
     {
diff --git a/toolboxes/gtplus/algorithm/gtPlusSPIRITNoNullSpaceOperator.h b/toolboxes/gtplus/algorithm/gtPlusSPIRITNoNullSpaceOperator.h
index 578e3ac..5d1f621 100644
--- a/toolboxes/gtplus/algorithm/gtPlusSPIRITNoNullSpaceOperator.h
+++ b/toolboxes/gtplus/algorithm/gtPlusSPIRITNoNullSpaceOperator.h
@@ -89,7 +89,7 @@ bool gtPlusSPIRITNoNullSpaceOperator<T>::grad(const hoNDArray<T>& x, hoNDArray<T
         GADGET_CHECK_RETURN_FALSE(this->convertToKSpace(this->res_after_apply_kernel_sum_over_, g));
 
         // multiply by 2
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::scal(T(2.0), g));
+        Gadgetron::scal(T(2.0), g);
     }
     catch(...)
     {
@@ -116,7 +116,7 @@ bool gtPlusSPIRITNoNullSpaceOperator<T>::obj(const hoNDArray<T>& x, T& obj)
         GADGET_CHECK_RETURN_FALSE(Gadgetron::sumOverSecondLastDimension(this->res_after_apply_kernel_, this->res_after_apply_kernel_sum_over_));
 
         // L2 norm
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::dotc(this->res_after_apply_kernel_sum_over_, this->res_after_apply_kernel_sum_over_, obj));
+        Gadgetron::dotc(this->res_after_apply_kernel_sum_over_, this->res_after_apply_kernel_sum_over_, obj);
     }
     catch(...)
     {
diff --git a/toolboxes/gtplus/algorithm/gtPlusSPIRITOperator.h b/toolboxes/gtplus/algorithm/gtPlusSPIRITOperator.h
index 89eb95a..976da73 100644
--- a/toolboxes/gtplus/algorithm/gtPlusSPIRITOperator.h
+++ b/toolboxes/gtplus/algorithm/gtPlusSPIRITOperator.h
@@ -45,6 +45,10 @@ public:
     // compute cost value of L2 norm ||(G-I)(Dc'x+D'y)||2
     virtual bool obj(const hoNDArray<T>& x, T& obj);
 
+    // indicate the operator is unitary or not
+    // unitary operator, AA' = I
+    virtual bool unitary() const { return false; }
+
     // convert to image domain or back to kspace
     virtual bool convertToImage(const hoNDArray<T>& x, hoNDArray<T>& im) = 0;
     virtual bool convertToKSpace(const hoNDArray<T>& im, hoNDArray<T>& x) = 0;
@@ -336,11 +340,11 @@ bool gtPlusSPIRITOperator<T>::computeRighHandSide(const hoNDArray<T>& x, hoNDArr
         // apply Dc
         if ( use_symmetric_spirit_ )
         {
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::multiply(unacquired_points_indicator_, b, b));
+            Gadgetron::multiply(unacquired_points_indicator_, b, b);
         }
 
         // multiply by -1
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::scal( (typename realType<T>::Type)(-1.0), b));
+        Gadgetron::scal( (typename realType<T>::Type)(-1.0), b);
     }
     catch(...)
     {
@@ -360,8 +364,8 @@ bool gtPlusSPIRITOperator<T>::grad(const hoNDArray<T>& x, hoNDArray<T>& g)
         // 2*Dc*(G-I)'(G-I)(D'y+Dc'x)
 
         // D'y+Dc'x
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::multiply(unacquired_points_indicator_, x, kspace_));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::add(*acquired_points_, kspace_, kspace_));
+        Gadgetron::multiply(unacquired_points_indicator_, x, kspace_);
+        Gadgetron::add(*acquired_points_, kspace_, kspace_);
 
         // x to image domain
         GADGET_CHECK_RETURN_FALSE(this->convertToImage(kspace_, complexIm_));
@@ -374,10 +378,10 @@ bool gtPlusSPIRITOperator<T>::grad(const hoNDArray<T>& x, hoNDArray<T>& g)
         GADGET_CHECK_RETURN_FALSE(this->convertToKSpace(res_after_apply_kernel_sum_over_, g));
 
         // apply Dc
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::multiply(unacquired_points_indicator_, g, g));
+        Gadgetron::multiply(unacquired_points_indicator_, g, g);
 
         // multiply by 2
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::scal( (typename realType<T>::Type)(2.0), g));
+        Gadgetron::scal( (typename realType<T>::Type)(2.0), g);
     }
     catch(...)
     {
@@ -397,8 +401,8 @@ bool gtPlusSPIRITOperator<T>::obj(const hoNDArray<T>& x, T& obj)
         // ||(G-I)(D'y+Dc'x)||2
 
         // D'y+Dc'x
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::multiply(unacquired_points_indicator_, x, kspace_));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::add(*acquired_points_, kspace_, kspace_));
+        Gadgetron::multiply(unacquired_points_indicator_, x, kspace_);
+        Gadgetron::add(*acquired_points_, kspace_, kspace_);
 
         // x to image domain
         GADGET_CHECK_RETURN_FALSE(this->convertToImage(kspace_, complexIm_));
@@ -408,7 +412,7 @@ bool gtPlusSPIRITOperator<T>::obj(const hoNDArray<T>& x, T& obj)
         GADGET_CHECK_RETURN_FALSE(Gadgetron::sumOverSecondLastDimension(res_after_apply_kernel_, res_after_apply_kernel_sum_over_));
 
         // L2 norm
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::dotc(res_after_apply_kernel_sum_over_, res_after_apply_kernel_sum_over_, obj));
+        Gadgetron::dotc(res_after_apply_kernel_sum_over_, res_after_apply_kernel_sum_over_, obj);
     }
     catch(...)
     {
diff --git a/toolboxes/gtplus/algorithm/gtPlusWavelet2DOperator.h b/toolboxes/gtplus/algorithm/gtPlusWavelet2DOperator.h
index 79a63dd..8fe07d2 100644
--- a/toolboxes/gtplus/algorithm/gtPlusWavelet2DOperator.h
+++ b/toolboxes/gtplus/algorithm/gtPlusWavelet2DOperator.h
@@ -17,18 +17,19 @@ class gtPlusWavelet2DOperator : public gtPlusWaveletOperator<T>
 public:
 
     typedef gtPlusWaveletOperator<T> BaseClass;
+    typedef typename BaseClass::value_type value_type;
 
     gtPlusWavelet2DOperator();
     virtual ~gtPlusWavelet2DOperator();
 
     virtual void printInfo(std::ostream& os);
 
-    // forward operator
+    // forward operator, perform wavelet transform
     // x: [RO E1 ...]
     // y: [RO E1 W ...]
     virtual bool forwardOperator(const hoNDArray<T>& x, hoNDArray<T>& y);
 
-    // adjoint operator
+    // adjoint operator, perform inverse transform
     // x: [RO E1 W ...]
     // y: [RO E1 ...]
     virtual bool adjointOperator(const hoNDArray<T>& x, hoNDArray<T>& y);
@@ -41,6 +42,10 @@ public:
     // in : [RO E1 1+3*level], out : [RO E1]
     bool idwtRedundantHaar(const hoNDArray<T>& in, hoNDArray<T>& out, size_t level);
 
+    virtual bool unitary() const { return true; }
+
+    using BaseClass::scale_factor_first_dimension_;
+    using BaseClass::scale_factor_second_dimension_;
     using BaseClass::numOfWavLevels_;
     using BaseClass::with_approx_coeff_;
     using BaseClass::gt_timer1_;
@@ -121,10 +126,10 @@ forwardOperator(const hoNDArray<T>& x, hoNDArray<T>& y)
         T* pX = const_cast<T*>(x.begin());
         T* pY = y.begin();
 
-        int t;
+        long long t;
 
         #pragma omp parallel for default(none) private(t) shared(num, RO, E1, W, pX, pY)
-        for ( t=0; t<num; t++ )
+        for ( t=0; t<(long long)num; t++ )
         {
             hoNDArray<T> in(RO, E1, pX+t*RO*E1);
             hoNDArray<T> out(RO, E1, W, pY+t*RO*E1*W);
@@ -172,10 +177,10 @@ adjointOperator(const hoNDArray<T>& x, hoNDArray<T>& y)
         T* pX = const_cast<T*>(x.begin());
         T* pY = y.begin();
 
-        int t;
+        long long t;
 
         #pragma omp parallel for default(none) private(t) shared(num, RO, E1, W, pX, pY)
-        for ( t=0; t<num; t++ )
+        for ( t=0; t<(long long)num; t++ )
         {
             hoNDArray<T> in(RO, E1, W, pX+t*RO*E1*W);
             hoNDArray<T> out(RO, E1, pY+t*RO*E1);
@@ -208,14 +213,14 @@ dwtRedundantHaar(const hoNDArray<T>& in, hoNDArray<T>& out, size_t level)
         {
             T* LH = pOut + (3*n+1)*RO*E1;
 
-            int ro;
+            long long ro;
             #pragma omp parallel for default(none) private(ro) shared(RO, E1, pOut, LH)
-            for (ro=0; ro<RO; ro++)
+            for (ro=0; ro<(long long)RO; ro++)
             {
                 T v1 = pOut[ro];
 
-                int ii=ro, e1;
-                for (e1=0; e1<E1-1; e1++)
+                long long ii=ro, e1;
+                for (e1=0; e1<(long long)E1-1; e1++)
                 {
                     LH[ii] = pOut[ii] - pOut[ii+RO];
                     pOut[ii] += pOut[ii+RO];
@@ -226,21 +231,21 @@ dwtRedundantHaar(const hoNDArray<T>& in, hoNDArray<T>& out, size_t level)
                 pOut[ii] += v1;
             }
 
-            Gadgetron::scal( scaleFactor, pOut, RO*E1);
-            Gadgetron::scal( scaleFactor, LH, RO*E1);
+            this->scal( RO*E1, scaleFactor, pOut );
+            this->scal( RO*E1, scaleFactor, LH );
 
             T* HL = LH + RO*E1;
             T* HH = HL + RO*E1;
 
-            int e1;
+            long long e1;
             #pragma omp parallel for default(none) private(e1) shared(RO, E1, pOut, LH, HL, HH)
-            for (e1=0; e1<E1; e1++)
+            for (e1=0; e1<(long long)E1; e1++)
             {
                 T v1 = pOut[e1*RO];
                 T v2 = LH[e1*RO];
 
                 size_t ii = e1*RO;
-                for (int ro=0; ro<RO-1; ro++)
+                for (long long ro=0; ro<(long long)RO-1; ro++)
                 {
                     HH[ii] = LH[ii] - LH[ii+1];
                     LH[ii] += LH[ii+1];
@@ -258,10 +263,10 @@ dwtRedundantHaar(const hoNDArray<T>& in, hoNDArray<T>& out, size_t level)
                 pOut[ii] += v1;
             }
 
-            Gadgetron::scal( scaleFactor, pOut, RO*E1);
-            Gadgetron::scal( scaleFactor, LH, RO*E1);
-            Gadgetron::scal( scaleFactor, HL, RO*E1);
-            Gadgetron::scal( scaleFactor, HH, RO*E1);
+            this->scal( RO*E1, scaleFactor, pOut);
+            this->scal( RO*E1, scaleFactor, LH);
+            this->scal( RO*E1, scaleFactor, HL);
+            this->scal( RO*E1, scaleFactor, HH);
         }
     }
     catch (...)
@@ -290,16 +295,16 @@ idwtRedundantHaar(const hoNDArray<T>& in, hoNDArray<T>& out, size_t level)
 
         T scaleFactor = 0.5;
 
-        int n;
-        for (n=level-1; n>=0; n--)
+        long long n;
+        for (n=(long long)level-1; n>=0; n--)
         {
             T* LH = pIn + (3*n+1)*RO*E1;
             T* HL = LH + RO*E1;
             T* HH = HL + RO*E1;
 
-            int e1;
+            long long e1;
             #pragma omp parallel for default(none) private(e1) shared(RO, E1, pOut, LH, HL, HH, pTmp)
-            for (e1=0; e1<E1; e1++)
+            for (e1=0; e1<(long long)E1; e1++)
             {
                 size_t ii = e1*RO+RO-1;
 
@@ -308,7 +313,7 @@ idwtRedundantHaar(const hoNDArray<T>& in, hoNDArray<T>& out, size_t level)
                 T vHL = HL[ii];
                 T vHH = HH[ii];
 
-                for (int ro=RO-1; ro>0; ro--)
+                for (long long ro=RO-1; ro>0; ro--)
                 {
                     // ii = e1*RO + ro;
                     pOut[ii] += pOut[ii-1] + HL[ii] - HL[ii-1];
@@ -325,18 +330,18 @@ idwtRedundantHaar(const hoNDArray<T>& in, hoNDArray<T>& out, size_t level)
                 pTmp [ii] = LH[ii] + vLH + HH[ii] - vHH;
             }
 
-            Gadgetron::scal( scaleFactor, pOut, RO*E1);
-            Gadgetron::scal( scaleFactor, pTmp, RO*E1);
+            this->scal( RO*E1, scaleFactor, pOut );
+            this->scal( RO*E1, scaleFactor, pTmp );
 
-            int ro;
+            long long ro;
             #pragma omp parallel for default(none) private(ro) shared(RO, E1, pOut, pTmp)
-            for (ro=0; ro<RO; ro++)
+            for (ro=0; ro<(long long)RO; ro++)
             {
                 size_t ii = (E1-1)*RO+ro;
                 T vLL = pOut[ii];
                 T vLH = pTmp [ii];
 
-                for (int e1=E1-1; e1>0; e1--)
+                for (long long e1=E1-1; e1>0; e1--)
                 {
                     // ii = e1*RO + ro;
                     pOut[ii] += pTmp[ii] + pOut[ii-RO] - pTmp[ii-RO];
@@ -346,7 +351,7 @@ idwtRedundantHaar(const hoNDArray<T>& in, hoNDArray<T>& out, size_t level)
                 pOut[ro] += pTmp[ro] + vLL - vLH;
             }
 
-            Gadgetron::scal( scaleFactor, pOut, RO*E1);
+            this->scal( RO*E1, scaleFactor, pOut );
         }
     }
     catch (...)
diff --git a/toolboxes/gtplus/algorithm/gtPlusWavelet3DOperator.h b/toolboxes/gtplus/algorithm/gtPlusWavelet3DOperator.h
index c9d4b5d..e1b8e85 100644
--- a/toolboxes/gtplus/algorithm/gtPlusWavelet3DOperator.h
+++ b/toolboxes/gtplus/algorithm/gtPlusWavelet3DOperator.h
@@ -17,6 +17,7 @@ class gtPlusWavelet3DOperator : public gtPlusWaveletOperator<T>
 public:
 
     typedef gtPlusWaveletOperator<T> BaseClass;
+    typedef typename BaseClass::value_type value_type;
 
     gtPlusWavelet3DOperator();
     virtual ~gtPlusWavelet3DOperator();
@@ -39,6 +40,8 @@ public:
     // in : [RO E1 E2 1+7*level], out : [RO E1 E2]
     bool idwtRedundantHaar(const hoNDArray<T>& in, hoNDArray<T>& out, size_t level);
 
+    virtual bool unitary() const { return true; }
+
     // compute L1 norm of wavelet coefficients across CHA
     // waveCoeff: [RO E1 E2 W CHA ...], W is the wavelet coefficient dimension (e.g. for 1 level wavelet decomposition, W=4 for 2D and W=8 for 3D)
     // the W=1 wavelet coefficient is the most low frequent coefficients
@@ -50,7 +53,8 @@ public:
 
     // soft-threshold or shrink the wavelet coefficients
     // the really applied threshold is mask.*thres
-    virtual bool shrinkWavCoeff(hoNDArray<T>& wavCoeff, const hoNDArray<T>& wavCoeffNorm, T thres, const hoNDArray<T>& mask, bool processApproxCoeff=false);
+    virtual bool shrinkWavCoeff(hoNDArray<T>& wavCoeff, const hoNDArray<value_type>& wavCoeffNorm, value_type thres, const hoNDArray<T>& mask, bool processApproxCoeff=false);
+    virtual bool proximity(hoNDArray<T>& wavCoeff, value_type thres);
 
     // if the sensitivity S is set, compute gradient of ||wav*F'*S'*(Dc'x+D'y)||1
     // if not, compute gradient of ||wav*F'*(Dc'x+D'y)||1
@@ -74,10 +78,13 @@ public:
     // More generally, a weighting matrix can be concatenated with wavelet coefficients to enhance or suppress regularization effects as needed
     // the regularization term can become ||W*wav*F'*(Dc'x+D'y)||1, W is the general weighting matrix
     // in the next version, we shall extend this class with more geneal weighting strategy
-    T scale_factor_first_dimension_;
-    T scale_factor_second_dimension_;
     T scale_factor_third_dimension_;
 
+    // in some cases, the boundary high frequency coefficients of the 3rd dimension should not be changed
+    bool change_coeffcients_third_dimension_boundary_;
+
+    using BaseClass::scale_factor_first_dimension_;
+    using BaseClass::scale_factor_second_dimension_;
     using BaseClass::numOfWavLevels_;
     using BaseClass::with_approx_coeff_;
     using BaseClass::gt_timer1_;
@@ -90,7 +97,7 @@ public:
     using BaseClass::gtPlus_util_complex_;
     using BaseClass::gtPlus_mem_manager_;
 
-protected:
+public:
 
     // compute gradient on the assembled kspace
     virtual bool gradTask(const hoNDArray<T>& x, hoNDArray<T>& g);
@@ -98,6 +105,11 @@ protected:
     // compute the obj on the assembled kspace
     virtual bool objTask(const hoNDArray<T>& x, T& obj);
 
+    // help memory
+    hoNDArray<T> mask_;
+    hoNDArray<T> forward_buf_;
+    hoNDArray<T> adjoint_buf_;
+
     using BaseClass::acquired_points_;
     using BaseClass::acquired_points_indicator_;
     using BaseClass::unacquired_points_indicator_;
@@ -111,7 +123,11 @@ protected:
     using BaseClass::res_after_apply_kernel_sum_over_;
 
     using BaseClass::wav_coeff_norm_;
+    using BaseClass::wav_coeff_norm_mag_;
     using BaseClass::wav_coeff_norm_approx_;
+
+    hoNDArray<value_type> wav_coeff_norm_mag_sumCHA_;
+
     using BaseClass::kspace_wav_;
     using BaseClass::complexIm_wav_;
 
@@ -123,9 +139,8 @@ protected:
 
 template <typename T> 
 gtPlusWavelet3DOperator<T>::gtPlusWavelet3DOperator() : 
-        scale_factor_first_dimension_(1.0), 
-        scale_factor_second_dimension_(1.0), 
         scale_factor_third_dimension_(1.0), 
+        change_coeffcients_third_dimension_boundary_(true), 
         BaseClass()
 {
 
@@ -188,19 +203,23 @@ forwardOperator(const hoNDArray<T>& x, hoNDArray<T>& y)
         }
         else
         {
-            #pragma omp parallel default(none) private(t) shared(num, RO, E1, CHA, E2, W, pX, pY) if ( num > 1 )
+            // #pragma omp parallel default(none) private(t) shared(num, RO, E1, CHA, E2, W, pX, pY) if ( num > 1 )
             {
-                hoNDArray<T> inPermute(RO, E1, E2, CHA);
+                // hoNDArray<T> inPermute(RO, E1, E2, CHA);
+                forward_buf_.create(RO, E1, E2, CHA);
 
-                #pragma omp for
+                // #pragma omp for
                 for ( t=0; t<num; t++ )
                 {
                     hoNDArray<T> in(RO, E1, CHA, E2, pX+t*RO*E1*CHA*E2);
-                    Gadgetron::permuteLastTwoDimensions(in, inPermute);
+                    Gadgetron::permuteLastTwoDimensions(in, forward_buf_);
+
+                    long long cha;
 
-                    for ( size_t cha=0; cha<CHA; cha++ )
+                    #pragma omp parallel for default(none) private(cha) shared(num, RO, E1, CHA, E2, W, pY, t) if ( CHA > 4 )
+                    for ( cha=0; cha<(long long)CHA; cha++ )
                     {
-                        hoNDArray<T> in_dwt(RO, E1, E2, inPermute.begin()+cha*RO*E1*E2);
+                        hoNDArray<T> in_dwt(RO, E1, E2, forward_buf_.begin()+cha*RO*E1*E2);
                         hoNDArray<T> out(RO, E1, E2, W, pY+t*RO*E1*E2*W*CHA+cha*RO*E1*E2*W);
 
                         this->dwtRedundantHaar(in_dwt, out, numOfWavLevels_);
@@ -268,24 +287,27 @@ adjointOperator(const hoNDArray<T>& x, hoNDArray<T>& y)
         }
         else
         {
-            #pragma omp parallel default(none) private(t) shared(num, RO, E1, CHA, E2, W, pX, pY) if ( num > 1 ) num_threads( ((num>16) ? 16 : num))
+            // #pragma omp parallel default(none) private(t) shared(num, RO, E1, CHA, E2, W, pX, pY) if ( num > 1 ) num_threads( (int)((num>16) ? 16 : num))
             {
-                hoNDArray<T> outPermute(RO, E1, E2, CHA);
+                // hoNDArray<T> outPermute(RO, E1, E2, CHA);
+                adjoint_buf_.create(RO, E1, E2, CHA);
 
-                #pragma omp for
+                // #pragma omp for
                 for ( t=0; t<num; t++ )
                 {
                     hoNDArray<T> out(RO, E1, CHA, E2, pY+t*RO*E1*CHA*E2);
 
-                    for ( size_t cha=0; cha<CHA; cha++ )
+                    long long cha;
+                    #pragma omp parallel for default(none) private(cha) shared(RO, E1, CHA, E2, W, pX) if ( CHA > 4 )
+                    for ( cha=0; cha<(long long)CHA; cha++ )
                     {
                         hoNDArray<T> in(RO, E1, E2, W, pX+cha*RO*E1*E2*W);
-                        hoNDArray<T> out_idwt(RO, E1, E2, outPermute.begin()+cha*RO*E1*E2);
+                        hoNDArray<T> out_idwt(RO, E1, E2, adjoint_buf_.begin()+cha*RO*E1*E2);
 
                         this->idwtRedundantHaar(in, out_idwt, numOfWavLevels_);
                     }
 
-                    Gadgetron::permuteLastTwoDimensions(outPermute, out);
+                    Gadgetron::permuteLastTwoDimensions(adjoint_buf_, out);
                 }
             }
         }
@@ -321,7 +343,7 @@ L1Norm(const hoNDArray<T>& wavCoeff, hoNDArray<T>& wavCoeffNorm)
         size_t CHA = (*dims)[4];
 
         // square the coefficients
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::multiplyConj(wavCoeff, wavCoeff, complexIm_norm_));
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::multiplyConj(wavCoeff, wavCoeff, complexIm_norm_));
         // sum over CHA
         GADGET_CHECK_RETURN_FALSE(Gadgetron::sumOver5thDimension(complexIm_norm_, wavCoeffNorm));
     }
@@ -339,11 +361,11 @@ divideWavCoeffByNorm(hoNDArray<T>& wavCoeff, const hoNDArray<T>& wavCoeffNorm, T
 {
     try
     {
-        size_t RO = wavCoeff.get_size(0);
-        size_t E1 = wavCoeff.get_size(1);
-        size_t E2 = wavCoeff.get_size(2);
-        size_t W = wavCoeff.get_size(3);
-        size_t CHA = wavCoeff.get_size(4);
+        long long RO = (long long)wavCoeff.get_size(0);
+        long long E1 = (long long)wavCoeff.get_size(1);
+        long long E2 = (long long)wavCoeff.get_size(2);
+        long long W = (long long)wavCoeff.get_size(3);
+        long long CHA = (long long)wavCoeff.get_size(4);
 
         if ( !wav_coeff_norm_approx_.dimensions_equal( &wavCoeffNorm ) )
         {
@@ -361,7 +383,7 @@ divideWavCoeffByNorm(hoNDArray<T>& wavCoeff, const hoNDArray<T>& wavCoeffNorm, T
             #pragma omp parallel for default(none) private(ii) shared(N, pBuf, pCoeffNorm, mu)
             for ( ii=0; ii<N; ii++ )
             {
-                pBuf[ii] = 1.0 / std::sqrt( pCoeffNorm[ii].real() + mu.real() );
+                pBuf[ii] = (value_type)( 1.0 / std::sqrt( pCoeffNorm[ii].real() + mu.real() ) );
             }
         }
         else
@@ -369,7 +391,7 @@ divideWavCoeffByNorm(hoNDArray<T>& wavCoeff, const hoNDArray<T>& wavCoeffNorm, T
             #pragma omp parallel for default(none) private(ii) shared(N, pBuf, pCoeffNorm, mu, p)
             for ( ii=0; ii<N; ii++ )
             {
-                pBuf[ii] = std::pow( (double)(pCoeffNorm[ii].real() + mu.real()), (double)(p.real()/2.0-1.0) );
+                pBuf[ii] = (value_type)std::pow( (double)(pCoeffNorm[ii].real() + mu.real()), (double)(p.real()/2.0-1.0) );
             }
         }
 
@@ -380,7 +402,7 @@ divideWavCoeffByNorm(hoNDArray<T>& wavCoeff, const hoNDArray<T>& wavCoeffNorm, T
         else
         {
             // GADGET_CHECK_RETURN_FALSE(Gadgetron::multiplyOver5thDimensionExcept(wav_coeff_norm_approx_, wavCoeff, 0, wavCoeff, true));
-            size_t num = wavCoeff.get_number_of_elements()/(RO*E1*E2*W*CHA);
+            long long num = wavCoeff.get_number_of_elements()/(RO*E1*E2*W*CHA);
 
             #ifdef GCC_OLD_FLAG
                 #pragma omp parallel default(none) private(ii) shared(RO, E1, E2, num, W, CHA) if ( num > 1 )
@@ -394,7 +416,7 @@ divideWavCoeffByNorm(hoNDArray<T>& wavCoeff, const hoNDArray<T>& wavCoeffNorm, T
                 {
                     hoNDArray<T> wavCoeffNormCurr(RO, E1, E2, W-1, wav_coeff_norm_approx_.begin()+ii*RO*E1*E2*W+RO*E1*E2);
 
-                    for ( size_t cha=0; cha<CHA; cha++ )
+                    for ( long long cha=0; cha<CHA; cha++ )
                     {
                         hoNDArray<T> wavCoeffCurr(RO, E1, E2, W-1, wavCoeff.begin()+ii*RO*E1*E2*W*CHA+cha*RO*E1*E2*W+RO*E1*E2);
                         Gadgetron::multiply(wavCoeffNormCurr, wavCoeffCurr, wavCoeffCurr);
@@ -413,27 +435,67 @@ divideWavCoeffByNorm(hoNDArray<T>& wavCoeff, const hoNDArray<T>& wavCoeffNorm, T
 
 template <typename T> 
 bool gtPlusWavelet3DOperator<T>::
-shrinkWavCoeff(hoNDArray<T>& wavCoeff, const hoNDArray<T>& wavCoeffNorm, T thres, const hoNDArray<T>& mask, bool processApproxCoeff)
+proximity(hoNDArray<T>& wavCoeff, value_type thres)
+{
+    try
+    {
+        // GADGET_CHECK_RETURN_FALSE(this->L1Norm(wavCoeff, wav_coeff_norm_));
+
+        // GADGET_CHECK_RETURN_FALSE(Gadgetron::multiplyConj(wavCoeff, wavCoeff, wav_coeff_norm_));
+        Gadgetron::abs(wavCoeff, wav_coeff_norm_mag_);
+
+        if ( !mask_.dimensions_equal(&wavCoeff) )
+        {
+            mask_.create(wavCoeff.get_dimensions());
+        }
+
+        Gadgetron::fill(mask_, T(thres) );
+
+        if ( GT_ABS(std::abs(scale_factor_first_dimension_)-1.0) > 1e-6 )
+        {
+            GADGET_CHECK_RETURN_FALSE(this->firstDimensionScale(mask_, scale_factor_first_dimension_));
+        }
+
+        if ( GT_ABS(std::abs(scale_factor_second_dimension_)-1.0) > 1e-6 )
+        {
+            GADGET_CHECK_RETURN_FALSE(this->secondDimensionScale(mask_, scale_factor_second_dimension_));
+        }
+
+        if ( GT_ABS(std::abs(scale_factor_third_dimension_)-1.0) > 1e-6 )
+        {
+            GADGET_CHECK_RETURN_FALSE(this->thirdDimensionScale(mask_, scale_factor_third_dimension_));
+        }
+
+        GADGET_CHECK_RETURN_FALSE(this->shrinkWavCoeff(wavCoeff, wav_coeff_norm_mag_, thres, mask_, this->with_approx_coeff_));
+    }
+    catch (...)
+    {
+        GADGET_ERROR_MSG("Errors in gtPlusWavelet3DOperator<T>::proximity(hoNDArray<T>& wavCoeff, T thres) ... ");
+        return false;
+    }
+    return true;
+}
+
+template <typename T> 
+bool gtPlusWavelet3DOperator<T>::
+shrinkWavCoeff(hoNDArray<T>& wavCoeff, const hoNDArray<value_type>& wavCoeffNorm, value_type thres, const hoNDArray<T>& mask, bool processApproxCoeff)
 {
     try
     {
         boost::shared_ptr< std::vector<size_t> > dims = wavCoeff.get_dimensions();
 
-        size_t RO = (*dims)[0];
-        size_t E1 = (*dims)[1];
-        size_t E2 = (*dims)[2];
-        size_t W = (*dims)[3];
-        size_t CHA = (*dims)[4];
+        long long RO = (long long)(*dims)[0];
+        long long E1 = (long long)(*dims)[1];
+        long long E2 = (long long)(*dims)[2];
+        long long W = (long long)(*dims)[3];
+        long long CHA = (long long)(*dims)[4];
 
         if ( !wav_coeff_norm_approx_.dimensions_equal(&wavCoeffNorm) )
         {
             wav_coeff_norm_approx_.create(wavCoeffNorm.get_dimensions());
         }
 
-        if ( !res_after_apply_kernel_.dimensions_equal(&wavCoeffNorm) )
-        {
-            res_after_apply_kernel_.create(wavCoeffNorm.get_dimensions());
-        }
+        // GADGET_CHECK_RETURN_FALSE(Gadgetron::sumOver5thDimension(wavCoeffNorm, wav_coeff_norm_mag_sumCHA_));
 
         long long ii;
         long long N = (long long)wavCoeffNorm.get_number_of_elements();
@@ -441,29 +503,59 @@ shrinkWavCoeff(hoNDArray<T>& wavCoeff, const hoNDArray<T>& wavCoeffNorm, T thres
 
         long long num = N/N4D;
 
-        const T* pCoeffNorm = wavCoeffNorm.begin();
+        value_type* pCoeffNorm = const_cast<value_type*>(wavCoeffNorm.begin());
         T* pMag = wav_coeff_norm_approx_.begin();
-        T* pMagInv = res_after_apply_kernel_.begin();
 
-        #pragma omp parallel for default(none) private(ii) shared(N, pMag, pMagInv, pCoeffNorm)
-        for ( ii=0; ii<N; ii++ )
+        if ( wavCoeffNorm.dimensions_equal(&wavCoeff) )
         {
-            pMag[ii] = std::sqrt( pCoeffNorm[ii].real() );
-            pMagInv[ii] = 1.0/(pMag[ii].real()+DBL_EPSILON);
+            #pragma omp parallel for default(none) private(ii) shared(N, pMag, pCoeffNorm)
+            for ( ii=0; ii<N; ii++ )
+            {
+                pMag[ii] = pCoeffNorm[ii];
+            }
+
+            Gadgetron::divide(wavCoeff, wav_coeff_norm_approx_, complexIm_);
         }
+        else
+        {
+            if ( !res_after_apply_kernel_.dimensions_equal(&wavCoeffNorm) )
+            {
+                res_after_apply_kernel_.create(wavCoeffNorm.get_dimensions());
+            }
+
+            T* pMagInv = res_after_apply_kernel_.begin();
+
+            #pragma omp parallel for default(none) private(ii) shared(N, pMag, pMagInv, pCoeffNorm)
+            for ( ii=0; ii<N; ii++ )
+            {
+                pMag[ii] = pCoeffNorm[ii];
+                pMagInv[ii] = 1/(pCoeffNorm[ii]+FLT_EPSILON);
+            }
+
+            // Gadgetron::inv(wav_coeff_norm_approx_, res_after_apply_kernel_);
 
-        // phase does not change
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::multiplyOver5thDimension(res_after_apply_kernel_, wavCoeff, complexIm_));
+            // phase does not change
+            if ( res_after_apply_kernel_.dimensions_equal(&wavCoeff) )
+            {
+                Gadgetron::multiply(res_after_apply_kernel_, wavCoeff, complexIm_);
+            }
+            else
+            {
+                GADGET_CHECK_RETURN_FALSE(Gadgetron::multiplyOver5thDimension(res_after_apply_kernel_, wavCoeff, complexIm_));
+            }
+        }
 
         // shrink the magnitude
         if ( mask.dimensions_equal(&wavCoeffNorm) )
         {
             const T* pMask = mask.begin();
 
+            // value_type* pMagCHA = wav_coeff_norm_mag_sumCHA_.begin();
+
             long long n = 0;
             for ( n=0; n<num; n++ )
             {
-                long long s=RO*E1; 
+                long long s=RO*E1*E2; 
                 if ( processApproxCoeff )
                 {
                     s = 0;
@@ -472,18 +564,74 @@ shrinkWavCoeff(hoNDArray<T>& wavCoeff, const hoNDArray<T>& wavCoeffNorm, T thres
                 const T* pMaskCurr = pMask + n*N4D;
                 T* pMagCurr = pMag + n*N4D;
 
-                long long nn;
-
-                #pragma omp parallel for private(nn) shared(s, N4D, pMagCurr, pMaskCurr, thres)
-                for ( nn=s; nn<N4D; nn++ )
+                if ( change_coeffcients_third_dimension_boundary_ )
+                {
+                    long long nn;
+                    #pragma omp parallel for private(nn) shared(s, N4D, pMagCurr, pMaskCurr, thres)
+                    for ( nn=s; nn<N4D; nn++ )
+                    {
+                        // if ( std::abs(pMagCurr[nn]) < std::abs(thres*pMaskCurr[nn]) )
+                        if ( pMagCurr[nn].real() < pMaskCurr[nn].real() )
+                        // if ( pMagCHA[nn] < pMaskCurr[nn].real() )
+                        {
+                            pMagCurr[nn] = 0;
+                        }
+                        else
+                        {
+                            pMagCurr[nn] -= pMaskCurr[nn];
+                        }
+                    }
+                }
+                else
                 {
-                    if ( std::abs(pMagCurr[nn]) < std::abs(thres*pMaskCurr[nn]) )
+                    // approx coefficents
+                    long long nn;
+                    #pragma omp parallel for private(nn) shared(s, N4D, pMagCurr, pMaskCurr, thres)
+                    for ( nn=s; nn<RO*E1*E2; nn++ )
                     {
-                        pMagCurr[nn] = 0;
+                        //if ( std::abs(pMagCurr[nn]) < std::abs(thres*pMaskCurr[nn]) )
+                        if ( pMagCurr[nn].real() < pMaskCurr[nn].real() )
+                        {
+                            pMagCurr[nn] = 0;
+                        }
+                        else
+                        {
+                            pMagCurr[nn] -= pMaskCurr[nn];
+                        }
                     }
-                    else
+
+                    size_t level;
+                    for ( level=0; level<numOfWavLevels_; level++ )
                     {
-                        pMagCurr[nn] -= thres;
+                        size_t start = RO*E1*E2 + 7*level;
+
+                        size_t w;
+                        for ( w=0; w<7; w++ )
+                        {
+                            size_t startW = start+w*RO*E1*E2;
+                            size_t endW = startW+RO*E1*E2;
+
+                            if ( w >= 3 )
+                            {
+                                startW += RO*E1;
+                                endW -= RO*E1;
+                            }
+
+                            long long nn;
+                            #pragma omp parallel for private(nn) shared(s, N4D, pMagCurr, pMaskCurr, thres)
+                            for ( nn=(long long)startW; nn<(long long)endW; nn++ )
+                            {
+                                // if ( std::abs(pMagCurr[nn]) < std::abs(thres*pMaskCurr[nn]) )
+                                if ( pMagCurr[nn].real() < pMaskCurr[nn].real() )
+                                {
+                                    pMagCurr[nn] = 0;
+                                }
+                                else
+                                {
+                                    pMagCurr[nn] -= pMaskCurr[nn];
+                                }
+                            }
+                        }
                     }
                 }
             }
@@ -493,7 +641,7 @@ shrinkWavCoeff(hoNDArray<T>& wavCoeff, const hoNDArray<T>& wavCoeffNorm, T thres
             long long n = 0;
             for ( n=0; n<num; n++ )
             {
-                long long s=RO*E1; 
+                long long s=RO*E1*E2; 
                 if ( processApproxCoeff )
                 {
                     s = 0;
@@ -501,17 +649,73 @@ shrinkWavCoeff(hoNDArray<T>& wavCoeff, const hoNDArray<T>& wavCoeffNorm, T thres
 
                 T* pMagCurr = pMag + n*N4D;
 
-                long long nn;
-                #pragma omp parallel for private(nn) shared(s, N4D, pMagCurr, thres)
-                for ( nn=s; nn<N4D; nn++ )
+                if ( change_coeffcients_third_dimension_boundary_ )
                 {
-                    if ( std::abs(pMagCurr[nn]) < std::abs(thres) )
+                    long long nn;
+                    #pragma omp parallel for private(nn) shared(s, N4D, pMagCurr, thres)
+                    for ( nn=s; nn<N4D; nn++ )
                     {
-                        pMagCurr[nn] = 0;
+                        // if ( std::abs(pMagCurr[nn]) < std::abs(thres) )
+                        if ( pMagCurr[nn].real() < thres )
+                        {
+                            pMagCurr[nn] = 0;
+                        }
+                        else
+                        {
+                            pMagCurr[nn] -= thres;
+                        }
                     }
-                    else
+                }
+                else
+                {
+                    // approx coefficents
+                    long long nn;
+                    #pragma omp parallel for private(nn) shared(s, N4D, pMagCurr, thres)
+                    for ( nn=s; nn<RO*E1*E2; nn++ )
+                    {
+                        // if ( std::abs(pMagCurr[nn]) < std::abs(thres) )
+                        if ( pMagCurr[nn].real() < thres )
+                        {
+                            pMagCurr[nn] = 0;
+                        }
+                        else
+                        {
+                            pMagCurr[nn] -= thres;
+                        }
+                    }
+
+                    size_t level;
+                    for ( level=0; level<numOfWavLevels_; level++ )
                     {
-                        pMagCurr[nn] -= thres;
+                        size_t start = RO*E1*E2 + 7*level;
+
+                        size_t w;
+                        for ( w=0; w<7; w++ )
+                        {
+                            size_t startW = start+w*RO*E1*E2;
+                            size_t endW = startW+RO*E1*E2;
+
+                            if ( w >= 3 )
+                            {
+                                startW += RO*E1;
+                                endW -= RO*E1;
+                            }
+
+                            long long nn;
+                            #pragma omp parallel for private(nn) shared(s, N4D, pMagCurr, thres)
+                            for ( nn=(long long)startW; nn<(long long)endW; nn++ )
+                            {
+                                // if ( std::abs(pMagCurr[nn]) < std::abs(thres) )
+                                if ( pMagCurr[nn].real() < thres )
+                                {
+                                    pMagCurr[nn] = 0;
+                                }
+                                else
+                                {
+                                    pMagCurr[nn] -= thres;
+                                }
+                            }
+                        }
                     }
                 }
             }
@@ -519,32 +723,62 @@ shrinkWavCoeff(hoNDArray<T>& wavCoeff, const hoNDArray<T>& wavCoeffNorm, T thres
 
         if ( processApproxCoeff )
         {
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::multiplyOver5thDimension(wav_coeff_norm_approx_, complexIm_, wavCoeff));
+            if ( wav_coeff_norm_approx_.dimensions_equal(&complexIm_) )
+            {
+                Gadgetron::multiply(wav_coeff_norm_approx_, complexIm_, wavCoeff);
+            }
+            else
+            {
+                GADGET_CHECK_RETURN_FALSE(Gadgetron::multiplyOver5thDimension(wav_coeff_norm_approx_, complexIm_, wavCoeff));
+            }
         }
         else
         {
             // GADGET_CHECK_RETURN_FALSE(Gadgetron::multiplyOver5thDimensionExcept(wav_coeff_norm_approx_, complexIm_, 0, wavCoeff, false));
 
-            size_t num = wavCoeff.get_number_of_elements()/(RO*E1*E2*W*CHA);
+            if ( wav_coeff_norm_approx_.dimensions_equal(&wavCoeff) )
+            {
+                #ifdef GCC_OLD_FLAG
+                    #pragma omp parallel default(none) private(ii) shared(RO, E1, E2, W, CHA) if ( CHA > 1 )
+                #else
+                    #pragma omp parallel default(none) private(ii) shared(RO, E1, E2, wavCoeffNorm, wavCoeff, W, CHA) if ( CHA > 1 )
+                #endif
+                {
 
-            #ifdef GCC_OLD_FLAG
-                #pragma omp parallel default(none) private(ii) shared(RO, E1, E2, num, W, CHA) if ( num > 1 )
-            #else
-                #pragma omp parallel default(none) private(ii) shared(RO, E1, E2, num, wavCoeffNorm, wavCoeff, W, CHA) if ( num > 1 )
-            #endif
+                    #pragma omp for
+                    for ( ii=0; ii<CHA; ii++ )
+                    {
+                        hoNDArray<T> magCurr(RO, E1, E2, W-1, wav_coeff_norm_approx_.begin()+ii*RO*E1*E2*W+RO*E1*E2);
+                        hoNDArray<T> phaseCurr(RO, E1, E2, W-1, complexIm_.begin()+ii*RO*E1*E2*W+RO*E1*E2);
+                        hoNDArray<T> wavCoeffCurr(RO, E1, E2, W-1, wavCoeff.begin()+ii*RO*E1*E2*W+RO*E1*E2);
+
+                        Gadgetron::multiply(magCurr, phaseCurr, wavCoeffCurr);
+                    }
+                }
+            }
+            else
             {
+                long long num = wavCoeff.get_number_of_elements()/(RO*E1*E2*W*CHA);
 
-                #pragma omp for
-                for ( ii=0; ii<num; ii++ )
+                #ifdef GCC_OLD_FLAG
+                    #pragma omp parallel default(none) private(ii) shared(RO, E1, E2, num, W, CHA) if ( num > 1 )
+                #else
+                    #pragma omp parallel default(none) private(ii) shared(RO, E1, E2, num, wavCoeffNorm, wavCoeff, W, CHA) if ( num > 1 )
+                #endif
                 {
-                    hoNDArray<T> magCurr(RO, E1, E2, W-1, wav_coeff_norm_approx_.begin()+ii*RO*E1*E2*W+RO*E1*E2);
 
-                    for ( size_t cha=0; cha<CHA; cha++ )
+                    #pragma omp for
+                    for ( ii=0; ii<num; ii++ )
                     {
-                        hoNDArray<T> phaseCurr(RO, E1, E2, W-1, complexIm_.begin()+ii*RO*E1*E2*W*CHA+cha*RO*E1*E2*W+RO*E1*E2);
-                        hoNDArray<T> wavCoeffCurr(RO, E1, E2, W-1, wavCoeff.begin()+ii*RO*E1*E2*W*CHA+cha*RO*E1*E2*W+RO*E1*E2);
+                        hoNDArray<T> magCurr(RO, E1, E2, W-1, wav_coeff_norm_approx_.begin()+ii*RO*E1*E2*W+RO*E1*E2);
 
-                        Gadgetron::multiply(magCurr, phaseCurr, wavCoeffCurr);
+                        for ( long long cha=0; cha<CHA; cha++ )
+                        {
+                            hoNDArray<T> phaseCurr(RO, E1, E2, W-1, complexIm_.begin()+ii*RO*E1*E2*W*CHA+cha*RO*E1*E2*W+RO*E1*E2);
+                            hoNDArray<T> wavCoeffCurr(RO, E1, E2, W-1, wavCoeff.begin()+ii*RO*E1*E2*W*CHA+cha*RO*E1*E2*W+RO*E1*E2);
+
+                            Gadgetron::multiply(magCurr, phaseCurr, wavCoeffCurr);
+                        }
                     }
                 }
             }
@@ -564,15 +798,15 @@ dwtRedundantHaar(const hoNDArray<T>& in, hoNDArray<T>& out, size_t level)
 {
     try
     {
-        size_t RO = in.get_size(0);
-        size_t E1 = in.get_size(1);
-        size_t E2 = in.get_size(2);
+        long long RO = (long long)in.get_size(0);
+        long long E1 = (long long)in.get_size(1);
+        long long E2 = (long long)in.get_size(2);
 
         T* pOut = out.begin();
         memcpy(pOut, in.begin(), sizeof(T)*RO*E1*E2);
 
-        size_t N2D = RO*E1;
-        size_t N3D = RO*E1*E2;
+        long long N2D = RO*E1;
+        long long N3D = RO*E1*E2;
 
         for (size_t n=0; n<level; n++)
         {
@@ -607,8 +841,8 @@ dwtRedundantHaar(const hoNDArray<T>& in, hoNDArray<T>& out, size_t level)
                 }
             }
 
-            Gadgetron::scal( 0.5, lll, N3D);
-            Gadgetron::scal( 0.5, llh, N3D);
+            this->scal( N3D, T(0.5), lll);
+            this->scal( N3D, T(0.5), llh );
 
             #pragma omp parallel for default(none) private(e2) shared(RO, E1, E2, N2D, lll, llh, lhh, lhl)
             for (e2=0; e2<E2; e2++)
@@ -642,16 +876,16 @@ dwtRedundantHaar(const hoNDArray<T>& in, hoNDArray<T>& out, size_t level)
             #pragma omp parallel sections
             {
                 #pragma omp section
-                Gadgetron::scal( 0.5, lll, N3D);
+                this->scal( N3D, T(0.5), lll );
 
                 #pragma omp section
-                Gadgetron::scal( 0.5, lhl, N3D);
+                this->scal( N3D, T(0.5), lhl );
 
                 #pragma omp section
-                Gadgetron::scal( 0.5, llh, N3D);
+                this->scal( N3D, T(0.5), llh );
 
                 #pragma omp section
-                Gadgetron::scal( 0.5, lhh, N3D);
+                this->scal( N3D, T(0.5), lhh );
             }
 
             long long e1;
@@ -705,28 +939,28 @@ dwtRedundantHaar(const hoNDArray<T>& in, hoNDArray<T>& out, size_t level)
             #pragma omp parallel sections
             {
                 #pragma omp section
-                Gadgetron::scal( 0.5, lll, N3D);
+                this->scal( N3D, T(0.5), lll);
 
                 #pragma omp section
-                Gadgetron::scal( 0.5, hll, N3D);
+                this->scal( N3D, T(0.5), hll);
 
                 #pragma omp section
-                Gadgetron::scal( 0.5, lhl, N3D);
+                this->scal( N3D, T(0.5), lhl);
 
                 #pragma omp section
-                Gadgetron::scal( 0.5, hhl, N3D);
+                this->scal( N3D, T(0.5), hhl);
 
                 #pragma omp section
-                Gadgetron::scal( 0.5, llh, N3D);
+                this->scal( N3D, T(0.5), llh);
 
                 #pragma omp section
-                Gadgetron::scal( 0.5, hlh, N3D);
+                this->scal( N3D, T(0.5), hlh);
 
                 #pragma omp section
-                Gadgetron::scal( 0.5, lhh, N3D);
+                this->scal( N3D, T(0.5), lhh);
 
                 #pragma omp section
-                Gadgetron::scal( 0.5, hhh, N3D);
+                this->scal( N3D, T(0.5), hhh);
             }
         }
     }
@@ -744,16 +978,16 @@ idwtRedundantHaar(const hoNDArray<T>& in, hoNDArray<T>& out, size_t level)
 {
     try
     {
-        size_t RO = in.get_size(0);
-        size_t E1 = in.get_size(1);
-        size_t E2 = in.get_size(2);
+        long long RO = (long long)in.get_size(0);
+        long long E1 = (long long)in.get_size(1);
+        long long E2 = (long long)in.get_size(2);
 
         T* pIn = const_cast<T*>(in.begin());
         T* pOut = out.begin();
         memcpy(pOut, in.begin(), sizeof(T)*RO*E1*E2);
 
-        size_t N2D = RO*E1;
-        size_t N3D = RO*E1*E2;
+        long long N2D = RO*E1;
+        long long N3D = RO*E1*E2;
 
         hoNDArray<T> LL(N3D);
         T* pLL = LL.begin();
@@ -767,8 +1001,8 @@ idwtRedundantHaar(const hoNDArray<T>& in, hoNDArray<T>& out, size_t level)
         hoNDArray<T> HH(N3D);
         T* pHH = HH.begin();
 
-        int n;
-        for (n=level-1; n>=0; n--)
+        long long n;
+        for (n=(long long)level-1; n>=0; n--)
         {
             T* lll = pOut;
             T* llh = pIn + n*7*N3D + N3D;
@@ -791,19 +1025,19 @@ idwtRedundantHaar(const hoNDArray<T>& in, hoNDArray<T>& out, size_t level)
                     for (long long e2=E2-1; e2>0; e2--)
                     {
                         ind = ind2D + e2*N2D;
-                        pLL[ind] = lll[ind]+lll[ind-N2D] + hll[ind]-hll[ind-N2D];
-                        pHL[ind] = lhl[ind]+lhl[ind-N2D] + hhl[ind]-hhl[ind-N2D];
-                        pLH[ind] = llh[ind]+llh[ind-N2D] + hlh[ind]-hlh[ind-N2D];
-                        pHH[ind] = lhh[ind]+lhh[ind-N2D] + hhh[ind]-hhh[ind-N2D];
+                        pLL[ind] = (lll[ind]+lll[ind-N2D]) + (hll[ind]-hll[ind-N2D]);
+                        pHL[ind] = (lhl[ind]+lhl[ind-N2D]) + (hhl[ind]-hhl[ind-N2D]);
+                        pLH[ind] = (llh[ind]+llh[ind-N2D]) + (hlh[ind]-hlh[ind-N2D]);
+                        pHH[ind] = (lhh[ind]+lhh[ind-N2D]) + (hhh[ind]-hhh[ind-N2D]);
                     }
 
                     if ( E2 > 1 )
                     {
                         ind = ind2D + (E2-1)*N2D;
-                        pLL[ind2D] = lll[ind2D]+lll[ind] + hll[ind2D]-hll[ind];
-                        pHL[ind2D] = lhl[ind2D]+lhl[ind] + hhl[ind2D]-hhl[ind];
-                        pLH[ind2D] = llh[ind2D]+llh[ind] + hlh[ind2D]-hlh[ind];
-                        pHH[ind2D] = lhh[ind2D]+lhh[ind] + hhh[ind2D]-hhh[ind];
+                        pLL[ind2D] = (lll[ind2D]+lll[ind]) + (hll[ind2D]-hll[ind]);
+                        pHL[ind2D] = (lhl[ind2D]+lhl[ind]) + (hhl[ind2D]-hhl[ind]);
+                        pLH[ind2D] = (llh[ind2D]+llh[ind]) + (hlh[ind2D]-hlh[ind]);
+                        pHH[ind2D] = (lhh[ind2D]+lhh[ind]) + (hhh[ind2D]-hhh[ind]);
                     }
                 }
             }
@@ -811,63 +1045,63 @@ idwtRedundantHaar(const hoNDArray<T>& in, hoNDArray<T>& out, size_t level)
             #pragma omp parallel sections
             {
                 #pragma omp section
-                Gadgetron::scal( 0.5, pLL, N3D);
+                this->scal( N3D, T(0.5), pLL );
 
                 #pragma omp section
-                Gadgetron::scal( 0.5, pHL, N3D);
+                this->scal( N3D, T(0.5), pHL );
 
                 #pragma omp section
-                Gadgetron::scal( 0.5, pLH, N3D);
+                this->scal( N3D, T(0.5), pLH );
 
                 #pragma omp section
-                Gadgetron::scal( 0.5, pHH, N3D);
+                this->scal( N3D, T(0.5), pHH );
             }
 
             long long e2;
             #pragma omp parallel for default(none) private(e2) shared(RO, E1, E2, N2D, pLL, pHL, pLH, pHH) 
-            for (e2=0; e2<E2; e2++)
+            for (e2=0; e2<(long long)E2; e2++)
             {
-                int ind3D = e2*N2D;
-                for (long long e1=0; e1<E1; e1++)
+                long long ind3D = e2*N2D;
+                for (long long e1=0; e1<(long long)E1; e1++)
                 {
-                    int ind = e1*RO + RO-1 + ind3D;
+                    long long ind = e1*RO + RO-1 + ind3D;
 
                     T v1 = pLL[ind];
                     T v2 = pLH[ind];
 
-                    for (long long ro=RO-1; ro>0; ro--)
+                    for (long long ro=(long long)RO-1; ro>0; ro--)
                     {
-                        pLL[ind] = pLL[ind]+pLL[ind-1] + pHL[ind]-pHL[ind-1];
-                        pLH[ind] = pLH[ind]+pLH[ind-1] + pHH[ind]-pHH[ind-1];
+                        pLL[ind] = (pLL[ind]+pLL[ind-1]) + (pHL[ind]-pHL[ind-1]);
+                        pLH[ind] = (pLH[ind]+pLH[ind-1]) + (pHH[ind]-pHH[ind-1]);
                         ind--;
                     }
 
-                    pLL[ind] = pLL[ind]+v1 + pHL[ind]-pHL[ind+RO-1];
-                    pLH[ind] = pLH[ind]+v2 + pHH[ind]-pHH[ind+RO-1];
+                    pLL[ind] = (pLL[ind]+v1) + (pHL[ind]-pHL[ind+RO-1]);
+                    pLH[ind] = (pLH[ind]+v2) + (pHH[ind]-pHH[ind+RO-1]);
                 }
             }
 
-            Gadgetron::scal( 0.5, pLL, N3D);
-            Gadgetron::scal( 0.5, pLH, N3D);
+            this->scal( N3D, T(0.5), pLL );
+            this->scal( N3D, T(0.5), pLH );
 
             #pragma omp parallel for default(none) private(e2) shared(RO, E1, E2, N2D, pLL,pLH, pOut) 
-            for (e2=0; e2<E2; e2++)
+            for (e2=0; e2<(long long)E2; e2++)
             {
-                int ind3D = e2*N2D;
-                for (long long ro=0; ro<RO; ro++)
+                long long ind3D = e2*N2D;
+                for (long long ro=0; ro<(long long)RO; ro++)
                 {
-                    int ind = (E1-1)*RO + ro + ind3D;
-                    for (long long e1=E1-1; e1>0; e1--)
+                    long long ind = (E1-1)*RO + ro + ind3D;
+                    for (long long e1=(long long)E1-1; e1>0; e1--)
                     {
-                        pOut[ind] = pLL[ind]+pLL[ind-RO] + pLH[ind]-pLH[ind-RO];
+                        pOut[ind] = (pLL[ind]+pLL[ind-RO]) + (pLH[ind]-pLH[ind-RO]);
                         ind -= RO;
                     }
 
-                    pOut[ind] = pLL[ind]+pLL[ind+(E1-1)*RO] + pLH[ind]-pLH[ind+(E1-1)*RO];
+                    pOut[ind] = (pLL[ind]+pLL[ind+(E1-1)*RO]) + (pLH[ind]-pLH[ind+(E1-1)*RO]);
                 }
             }
 
-            Gadgetron::scal( 0.5, pOut, N3D);
+            this->scal( N3D, T(0.5), pOut );
         }
     }
     catch (...)
@@ -922,7 +1156,7 @@ gradTask(const hoNDArray<T>& x, hoNDArray<T>& g)
         //gt_timer2_.stop();
 
         //gt_timer2_.start("8");
-        GADGET_CHECK_RETURN_FALSE(this->divideWavCoeffByNorm(res_after_apply_kernel_sum_over_, wav_coeff_norm_, T(1e-15), T(1.0), with_approx_coeff_));
+        GADGET_CHECK_RETURN_FALSE(this->divideWavCoeffByNorm(res_after_apply_kernel_sum_over_, wav_coeff_norm_, (value_type)(1e-15), (value_type)(1.0), with_approx_coeff_));
         //gt_timer2_.stop();
 
         // first dimension scaling
diff --git a/toolboxes/gtplus/algorithm/gtPlusWaveletNoNullSpace2DOperator.h b/toolboxes/gtplus/algorithm/gtPlusWaveletNoNullSpace2DOperator.h
index 3ca1628..73f6630 100644
--- a/toolboxes/gtplus/algorithm/gtPlusWaveletNoNullSpace2DOperator.h
+++ b/toolboxes/gtplus/algorithm/gtPlusWaveletNoNullSpace2DOperator.h
@@ -21,15 +21,17 @@ public:
 
     virtual void printInfo(std::ostream& os);
 
-    // if the sensitivity S is set, compute gradient of ||wav*F'*S'*x||1
+    // if the sensitivity S is set, compute gradient of ||wav*S'*F'*x||1
     // if not, compute gradient of ||wav*F'*x||1
     // x represents the unacquired kspace points [RO E1 CHA]
     virtual bool grad(const hoNDArray<T>& x, hoNDArray<T>& g);
 
-    // if the sensitivity S is set, compute cost value of L2 norm ||wav*F'*S'*x||1
+    // if the sensitivity S is set, compute cost value of L2 norm ||wav*S'*F'*x||1
     // if not, compute cost value of L2 norm ||wav*F'*x||1
     virtual bool obj(const hoNDArray<T>& x, T& obj);
 
+    using BaseClass::scale_factor_first_dimension_;
+    using BaseClass::scale_factor_second_dimension_;
     using BaseClass::numOfWavLevels_;
     using BaseClass::with_approx_coeff_;
     using BaseClass::gt_timer1_;
diff --git a/toolboxes/gtplus/algorithm/gtPlusWaveletNoNullSpace3DOperator.h b/toolboxes/gtplus/algorithm/gtPlusWaveletNoNullSpace3DOperator.h
index fcd0c47..98e92ae 100644
--- a/toolboxes/gtplus/algorithm/gtPlusWaveletNoNullSpace3DOperator.h
+++ b/toolboxes/gtplus/algorithm/gtPlusWaveletNoNullSpace3DOperator.h
@@ -21,16 +21,19 @@ public:
 
     virtual void printInfo(std::ostream& os);
 
-    // if the sensitivity S is set, compute gradient of ||wav*F'*S'*x||1
+    // if the sensitivity S is set, compute gradient of ||wav*S'*F'*x||1
     // if not, compute gradient of ||wav*F'*x||1
     // x represents the unacquired kspace points [RO E1 CHA E2]
     virtual bool grad(const hoNDArray<T>& x, hoNDArray<T>& g);
 
-    // if the sensitivity S is set, compute cost value of L2 norm ||wav*F'*S'*x||1
+    // if the sensitivity S is set, compute cost value of L2 norm ||wav*S'*F'*x||1
     // if not, compute cost value of L2 norm ||wav*F'*x||1
     virtual bool obj(const hoNDArray<T>& x, T& obj);
 
+    using BaseClass::scale_factor_first_dimension_;
+    using BaseClass::scale_factor_second_dimension_;
     using BaseClass::scale_factor_third_dimension_;
+    using BaseClass::change_coeffcients_third_dimension_boundary_;
     using BaseClass::numOfWavLevels_;
     using BaseClass::with_approx_coeff_;
     using BaseClass::gt_timer1_;
diff --git a/toolboxes/gtplus/algorithm/gtPlusWaveletOperator.h b/toolboxes/gtplus/algorithm/gtPlusWaveletOperator.h
index f16ab24..717489d 100644
--- a/toolboxes/gtplus/algorithm/gtPlusWaveletOperator.h
+++ b/toolboxes/gtplus/algorithm/gtPlusWaveletOperator.h
@@ -15,6 +15,7 @@ class gtPlusWaveletOperator : public gtPlusOperator<T>
 public:
 
     typedef gtPlusOperator<T> BaseClass;
+    typedef typename BaseClass::value_type value_type;
 
     gtPlusWaveletOperator();
     virtual ~gtPlusWaveletOperator();
@@ -39,14 +40,15 @@ public:
 
     // soft-threshold or shrink the wavelet coefficients
     // the really applied threshold is mask.*thres
-    virtual bool shrinkWavCoeff(hoNDArray<T>& wavCoeff, const hoNDArray<T>& wavCoeffNorm, T thres, const hoNDArray<T>& mask, bool processApproxCoeff=false);
+    virtual bool shrinkWavCoeff(hoNDArray<T>& wavCoeff, const hoNDArray<T>& wavCoeffNorm, value_type thres, const hoNDArray<T>& mask, bool processApproxCoeff=false);
+    virtual bool proximity(hoNDArray<T>& wavCoeff, value_type thres);
 
-    // if the sensitivity S is set, compute gradient of ||wav*F'*S'*(Dc'x+D'y)||1
+    // if the sensitivity S is set, compute gradient of ||wav*S'*F'*(Dc'x+D'y)||1
     // if not, compute gradient of ||wav*F'*(Dc'x+D'y)||1
     // x represents the unacquired kspace points [RO E1 CHA]
     virtual bool grad(const hoNDArray<T>& x, hoNDArray<T>& g);
 
-    // if the sensitivity S is set, compute cost value of L2 norm ||wav*F'*S'*(Dc'x+D'y)||1
+    // if the sensitivity S is set, compute cost value of L2 norm ||wav*S'*F'*(Dc'x+D'y)||1
     // if not, compute cost value of L2 norm ||wav*F'*(Dc'x+D'y)||1
     virtual bool obj(const hoNDArray<T>& x, T& obj);
 
@@ -56,6 +58,9 @@ public:
     // whether to include low frequency approximation coefficients
     bool with_approx_coeff_;
 
+    T scale_factor_first_dimension_;
+    T scale_factor_second_dimension_;
+
     using BaseClass::gt_timer1_;
     using BaseClass::gt_timer2_;
     using BaseClass::gt_timer3_;
@@ -66,7 +71,7 @@ public:
     using BaseClass::gtPlus_util_complex_;
     using BaseClass::gtPlus_mem_manager_;
 
-protected:
+public:
 
     // convert to image domain or back to kspace
     virtual bool convertToImage(const hoNDArray<T>& x, hoNDArray<T>& im);
@@ -78,6 +83,12 @@ protected:
     // compute the obj on the assembled kspace
     virtual bool objTask(const hoNDArray<T>& x, T& obj);
 
+    // utility function
+    void scal(size_t N, float a, float* x);
+    void scal(size_t N, double a, double* x);
+    void scal(size_t N, std::complex<float> a, std::complex<float>* x);
+    void scal(size_t N, std::complex<double> a, std::complex<double>* x);
+
     using BaseClass::acquired_points_;
     using BaseClass::acquired_points_indicator_;
     using BaseClass::unacquired_points_indicator_;
@@ -92,6 +103,8 @@ protected:
     hoNDArray<T> wav_coeff_norm_;
     hoNDArray<T> wav_coeff_norm_approx_;
 
+    hoNDArray<value_type> wav_coeff_norm_mag_;
+
     hoNDArray<T> kspace_wav_;
     hoNDArray<T> complexIm_wav_;
     hoNDArray<T> complexIm_norm_;
@@ -103,7 +116,7 @@ protected:
 };
 
 template <typename T> 
-gtPlusWaveletOperator<T>::gtPlusWaveletOperator() : numOfWavLevels_(1), with_approx_coeff_(false), BaseClass()
+gtPlusWaveletOperator<T>::gtPlusWaveletOperator() : numOfWavLevels_(1), with_approx_coeff_(false), scale_factor_first_dimension_(1.0), scale_factor_second_dimension_(1.0), BaseClass()
 {
 
 }
@@ -114,6 +127,68 @@ gtPlusWaveletOperator<T>::~gtPlusWaveletOperator()
 }
 
 template <typename T> 
+void gtPlusWaveletOperator<T>::scal(size_t N, float a, float* x)
+{
+    long long n;
+    #pragma omp parallel for default(none) private(n) shared(N, x, a) if (N>64*1024)
+    for (n = 0; n < (long long)N; n++)
+    {
+        x[n] *= a;
+    }
+}
+
+template <typename T> 
+void gtPlusWaveletOperator<T>::scal(size_t N, double a, double* x)
+{
+    long long n;
+    #pragma omp parallel for default(none) private(n) shared(N, x, a) if (N>64*1024)
+    for (n = 0; n < (long long)N; n++)
+    {
+        x[n] *= a;
+    }
+}
+
+template <typename T> 
+void gtPlusWaveletOperator<T>::scal(size_t N,  std::complex<float>  a,  std::complex<float> * x)
+{
+    long long n;
+
+    #pragma omp parallel for default(none) private(n) shared(N, x, a) if (N>64*1024)
+    for (n = 0; n < (long long)N; n++)
+    {
+        const  std::complex<float> & c = x[n];
+        const float re = c.real();
+        const float im = c.imag();
+
+        const float ar = a.real();
+        const float ai = a.imag();
+
+        reinterpret_cast<float(&)[2]>(x[n])[0] = re*ar-im*ai;
+        reinterpret_cast<float(&)[2]>(x[n])[1] = re*ai+im*ar;
+    }
+}
+
+template <typename T> 
+void gtPlusWaveletOperator<T>::scal(size_t N,  std::complex<double>  a,  std::complex<double> * x)
+{
+    long long n;
+
+    #pragma omp parallel for default(none) private(n) shared(N, x, a) if (N>64*1024)
+    for (n = 0; n < (long long)N; n++)
+    {
+        const  std::complex<double> & c = x[n];
+        const double re = c.real();
+        const double im = c.imag();
+
+        const double ar = a.real();
+        const double ai = a.imag();
+
+        reinterpret_cast<double(&)[2]>(x[n])[0] = re*ar-im*ai;
+        reinterpret_cast<double(&)[2]>(x[n])[1] = re*ai+im*ar;
+    }
+}
+
+template <typename T> 
 bool gtPlusWaveletOperator<T>::
 L1Norm(const hoNDArray<T>& wavCoeff, hoNDArray<T>& wavCoeffNorm)
 {
@@ -135,7 +210,7 @@ L1Norm(const hoNDArray<T>& wavCoeff, hoNDArray<T>& wavCoeffNorm)
         size_t CHA = (*dims)[3];
 
         // square the coefficients
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::multiplyConj(wavCoeff, wavCoeff, complexIm_norm_));
+        Gadgetron::multiplyConj(wavCoeff, wavCoeff, complexIm_norm_);
         // sum over CHA
         GADGET_CHECK_RETURN_FALSE(Gadgetron::sumOver4thDimension(complexIm_norm_, wavCoeffNorm));
     }
@@ -155,7 +230,7 @@ L1NormTotal(const hoNDArray<T>& wavCoeff, hoNDArray<T>& wavCoeffNorm, T& L1Coeff
     {
         GADGET_CHECK_RETURN_FALSE(this->L1Norm(wavCoeff, wavCoeffNorm));
 
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::sqrt(wavCoeffNorm, wav_coeff_norm_approx_));
+        Gadgetron::sqrt(wavCoeffNorm, wav_coeff_norm_approx_);
 
         L1CoeffNorm = Gadgetron::asum(&wav_coeff_norm_approx_);
     }
@@ -194,7 +269,7 @@ divideWavCoeffByNorm(hoNDArray<T>& wavCoeff, const hoNDArray<T>& wavCoeffNorm, T
             #pragma omp parallel for default(none) private(ii) shared(N, pBuf, pCoeffNorm, mu)
             for ( ii=0; ii<N; ii++ )
             {
-                pBuf[ii] = 1.0 / std::sqrt( pCoeffNorm[ii].real() + mu.real() );
+                pBuf[ii] = (value_type)(1.0 / std::sqrt( pCoeffNorm[ii].real() + mu.real() ));
             }
         }
         else
@@ -202,7 +277,7 @@ divideWavCoeffByNorm(hoNDArray<T>& wavCoeff, const hoNDArray<T>& wavCoeffNorm, T
             #pragma omp parallel for default(none) private(ii) shared(N, pBuf, pCoeffNorm, mu, p)
             for ( ii=0; ii<N; ii++ )
             {
-                pBuf[ii] = std::pow( (double)(pCoeffNorm[ii].real() + mu.real()), (double)(p.real()/2.0-1.0) );
+                pBuf[ii] = (value_type)std::pow( (double)(pCoeffNorm[ii].real() + mu.real()), (double)(p.real()/2.0-1.0) );
             }
         }
 
@@ -223,7 +298,7 @@ divideWavCoeffByNorm(hoNDArray<T>& wavCoeff, const hoNDArray<T>& wavCoeffNorm, T
             {
 
                 #pragma omp for
-                for ( ii=0; ii<num; ii++ )
+                for ( ii=0; ii<(long long)num; ii++ )
                 {
                     hoNDArray<T> wavCoeffNormCurr(RO, E1, W-1, wav_coeff_norm_approx_.begin()+ii*RO*E1*W+RO*E1);
 
@@ -246,7 +321,26 @@ divideWavCoeffByNorm(hoNDArray<T>& wavCoeff, const hoNDArray<T>& wavCoeffNorm, T
 
 template <typename T> 
 bool gtPlusWaveletOperator<T>::
-shrinkWavCoeff(hoNDArray<T>& wavCoeff, const hoNDArray<T>& wavCoeffNorm, T thres, const hoNDArray<T>& mask, bool processApproxCoeff)
+proximity(hoNDArray<T>& wavCoeff, value_type thres)
+{
+    try
+    {
+        GADGET_CHECK_RETURN_FALSE(this->L1Norm(wavCoeff, wav_coeff_norm_));
+        hoNDArray<T> mask;
+
+        GADGET_CHECK_RETURN_FALSE(this->shrinkWavCoeff(wavCoeff, wav_coeff_norm_, thres, mask, with_approx_coeff_));
+    }
+    catch (...)
+    {
+        GADGET_ERROR_MSG("Errors in gtPlusWaveletOperator<T>::proximity(hoNDArray<T>& wavCoeff, value_type thres) ... ");
+        return false;
+    }
+    return true;
+}
+
+template <typename T> 
+bool gtPlusWaveletOperator<T>::
+shrinkWavCoeff(hoNDArray<T>& wavCoeff, const hoNDArray<T>& wavCoeffNorm, value_type thres, const hoNDArray<T>& mask, bool processApproxCoeff)
 {
     try
     {
@@ -278,8 +372,8 @@ shrinkWavCoeff(hoNDArray<T>& wavCoeff, const hoNDArray<T>& wavCoeffNorm, T thres
         #pragma omp parallel for default(none) private(ii) shared(N, pMag, pMagInv, pCoeffNorm)
         for ( ii=0; ii<N; ii++ )
         {
-            pMag[ii] = std::sqrt( pCoeffNorm[ii].real() );
-            pMagInv[ii] = 1.0/(pMag[ii].real()+DBL_EPSILON);
+            pMag[ii] = (value_type)std::sqrt( pCoeffNorm[ii].real() );
+            pMagInv[ii] = (value_type)(1.0/(pMag[ii].real()+DBL_EPSILON));
         }
 
         // phase does not change
@@ -307,13 +401,14 @@ shrinkWavCoeff(hoNDArray<T>& wavCoeff, const hoNDArray<T>& wavCoeffNorm, T thres
                 #pragma omp parallel for private(nn) shared(s, N3D, pMagCurr, pMaskCurr, thres)
                 for ( nn=s; nn<N3D; nn++ )
                 {
-                    if ( std::abs(pMagCurr[nn]) < std::abs(thres*pMaskCurr[nn]) )
+                    // if ( std::abs(pMagCurr[nn]) < std::abs(thres*pMaskCurr[nn]) )
+                    if ( pMagCurr[nn].real() < thres*pMaskCurr[nn].real() )
                     {
                         pMagCurr[nn] = 0;
                     }
                     else
                     {
-                        pMagCurr[nn] -= thres;
+                        pMagCurr[nn] -= thres*pMaskCurr[nn];
                     }
                 }
             }
@@ -335,7 +430,8 @@ shrinkWavCoeff(hoNDArray<T>& wavCoeff, const hoNDArray<T>& wavCoeffNorm, T thres
                 #pragma omp parallel for private(nn) shared(s, N3D, pMagCurr, thres)
                 for ( nn=s; nn<N3D; nn++ )
                 {
-                    if ( std::abs(pMagCurr[nn]) < std::abs(thres) )
+                    // if ( std::abs(pMagCurr[nn]) < std::abs(thres) )
+                    if ( pMagCurr[nn].real() < thres )
                     {
                         pMagCurr[nn] = 0;
                     }
@@ -394,15 +490,11 @@ grad(const hoNDArray<T>& x, hoNDArray<T>& g)
     {
         // D'y+Dc'x
         //gt_timer1_.start("1");
-        //vcMul(unacquired_points_indicator_.get_number_of_elements(), 
-        //    reinterpret_cast<MKL_Complex8*>(unacquired_points_indicator_.begin()), 
-        //    reinterpret_cast<const MKL_Complex8*>(x.begin()), 
-        //    reinterpret_cast<MKL_Complex8*>(kspace_.begin()));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::multiply(unacquired_points_indicator_, x, kspace_));
+        Gadgetron::multiply(unacquired_points_indicator_, x, kspace_);
         //gt_timer1_.stop();
 
         //gt_timer1_.start("2");
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::add(*acquired_points_, kspace_, kspace_));
+        Gadgetron::add(*acquired_points_, kspace_, kspace_);
         //gt_timer1_.stop();
 
         // compute the gradient on assembled kspace
@@ -410,7 +502,7 @@ grad(const hoNDArray<T>& x, hoNDArray<T>& g)
 
         // only unacquired points are kept
         //gt_timer1_.start("12");
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::multiply(unacquired_points_indicator_, g, g));
+        Gadgetron::multiply(unacquired_points_indicator_, g, g);
         //gt_timer1_.stop();
     }
     catch (...)
@@ -430,11 +522,11 @@ obj(const hoNDArray<T>& x, T& obj)
     {
         // D'y+Dc'x
         //gt_timer1_.start("1");
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::multiply(unacquired_points_indicator_, x, kspace_));
+        Gadgetron::multiply(unacquired_points_indicator_, x, kspace_);
         //gt_timer1_.stop();
 
         //gt_timer1_.start("2");
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::add(*acquired_points_, kspace_, kspace_));
+        Gadgetron::add(*acquired_points_, kspace_, kspace_);
         //gt_timer1_.stop();
 
         // compute the objective function on assembled kspace
@@ -492,7 +584,7 @@ gradTask(const hoNDArray<T>& x, hoNDArray<T>& g)
         //gt_timer2_.stop();
 
         //gt_timer2_.start("8");
-        GADGET_CHECK_RETURN_FALSE(this->divideWavCoeffByNorm(res_after_apply_kernel_sum_over_, wav_coeff_norm_, T(1e-15), T(1.0), with_approx_coeff_));
+        GADGET_CHECK_RETURN_FALSE(this->divideWavCoeffByNorm(res_after_apply_kernel_sum_over_, wav_coeff_norm_, T( (value_type)1e-15), T( (value_type)1.0 ), with_approx_coeff_));
         //gt_timer2_.stop();
 
         // go back to image
diff --git a/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_Cartesian.xml b/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_Cartesian.xml
deleted file mode 100644
index fb0185e..0000000
--- a/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_Cartesian.xml
+++ /dev/null
@@ -1,798 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
-        xmlns="http://gadgetron.sf.net/gadgetron"
-        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-
-    <!-- 
-        GT Plus configuratin file for general 2D or 2D+T cartesian reconstruction
-
-        Author: Hui Xue
-        Magnetic Resonance Technology Program
-        National Heart, Lung and Blood Institute
-        National Institutes of Health
-        10 Center Drive, Bethesda
-        MD 20814
-        USA
-        Email: hui.xue at nih.gov
-    -->
-
-    <!-- reader -->
-    <reader>
-        <slot>1008</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
-    </reader>
-
-    <!-- writer -->
-    <writer>
-        <slot>1004</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterCPLX</classname>
-    </writer>
-    <writer>
-        <slot>1005</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterFLOAT</classname>
-    </writer>
-    <writer>
-        <slot>1006</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterUSHORT</classname>
-    </writer>
-
-    <!-- RO asymmetric echo handling -->
-    <gadget>
-        <name>PartialFourierAdjust</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>PartialFourierAdjustROGadget</classname>
-    </gadget>
-
-    <!-- Noise prewhitening -->
-    <gadget>
-        <name>NoiseAdjust</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>NoiseAdjustGadget</classname>
-    </gadget>
-
-    <!-- RO oversampling removal -->
-    <gadget>
-        <name>RemoveROOversampling</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>RemoveROOversamplingGadget</classname>
-    </gadget>
-
-    <!-- Data accumulation and trigger gadget -->
-    <gadget>
-        <name>Acc</name>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
-
-        <!-- debug and info mode -->
-        <property>
-            <name>verboseMode</name>
-            <value>false</value>
-        </property>
-
-        <!-- No acceleration mode -->
-        <property>
-            <name>noacceleration_triggerDim1</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>noacceleration_triggerDim2</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>noacceleration_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- Interleaved mode -->
-        <property>
-            <name>interleaved_triggerDim1</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>interleaved_triggerDim2</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>interleaved_numOfKSpace_triggerDim1</name>
-            <value>4</value>
-        </property>
-
-        <!-- Embedded mode -->
-        <property>
-            <name>embedded_triggerDim1</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>embedded_triggerDim2</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>embedded_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- Separate mode -->
-        <property>
-            <name>separate_triggerDim1</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>separate_triggerDim2</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>separate_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
-        <property>
-            <name>other_kspace_matching_Dim</name>
-            <value>DIM_Repetition</value>
-        </property>
-
-    </gadget>
-
-    <!--Recon computation for 2DT cases -->
-    <gadget>
-        <name>Recon</name>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusRecon2DTGadget</classname>
-
-        <!-- kspace data -->
-        <property>
-            <name>dim_4th</name>
-            <value>DIM_NONE</value>
-        </property>
-        <property>
-            <name>dim_5th</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <!-- work flow -->
-        <property>
-            <name>workOrder_ShareDim</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <!-- No acceleration mode -->
-        <property>
-            <name>no_acceleration_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>no_acceleration_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>no_acceleration_same_combinationcoeff_allS</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>no_acceleration_whichS_combinationcoeff</name>
-            <value>0</value>
-        </property>
-
-        <!-- Interleaved mode -->
-        <property>
-            <name>interleaved_same_combinationcoeff_allS</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>interleaved_whichS_combinationcoeff</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>interleaved_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-
-        <!-- Embedded mode -->
-        <property>
-            <name>embedded_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>embedded_fullres_coilmap</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_fullres_coilmap_useHighestSignal</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>embedded_same_combinationcoeff_allS</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_whichS_combinationcoeff</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>embedded_ref_fillback</name>
-            <value>true</value>
-        </property>
-
-        <!-- Separate mode -->
-        <property>
-            <name>separate_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>separate_fullres_coilmap</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_same_combinationcoeff_allS</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_whichS_combinationcoeff</name>
-            <value>0</value>
-        </property>
-
-        <!-- coil compression -->
-        <property>
-            <name>same_coil_compression_coeff_allS</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>downstream_coil_compression</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>coil_compression_thres</name>
-            <value>0.001</value>
-        </property>
-
-        <property>
-            <name>coil_compression_num_modesKept</name>
-            <value>8</value>
-        </property>
-
-        <!-- parameters for coil map estimation 
-            enum ISMRMRDCOILMAPALGO
-            {
-                ISMRMRD_SOUHEIL,
-                ISMRMRD_SOUHEIL_ITER
-            };
-        -->
-        <property>
-            <name>coil_map_algorithm</name>
-            <value>ISMRMRD_SOUHEIL</value>
-        </property>
-        <property>
-            <name>csm_kSize</name>
-            <value>7</value>
-        </property>
-
-        <property>
-            <name>csm_powermethod_num</name>
-            <value>3</value>
-        </property>
-
-        <property>
-            <name>csm_true_3D</name>
-            <value>false</value>
-        </property>
-
-        <property>
-            <name>csm_iter_num</name>
-            <value>5</value>
-        </property>
-
-        <property>
-            <name>csm_iter_thres</name>
-            <value>0.001</value>
-        </property>
-
-        <property>
-            <name>csm_use_gpu</name>
-            <value>true</value>
-        </property>
-
-        <!-- algorithm -->
-        <property>
-            <name>recon_algorithm</name>
-            <value>ISMRMRD_GRAPPA</value>
-        </property>
-
-        <property>
-            <name>recon_kspace_needed</name>
-            <value>false</value>
-        </property>
-
-        <property>
-            <name>recon_auto_parameters</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_GRAPPA -->
-        <property>
-            <name>grappa_kSize_RO</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>grappa_kSize_E1</name>
-            <value>4</value>
-        </property>
-        <property>
-            <name>grappa_kSize_E2</name>
-            <value>4</value>
-        </property>
-        <property>
-            <name>grappa_reg_lamda</name>
-            <value>0.0005</value>
-        </property>
-        <property>
-            <name>grappa_calib_over_determine_ratio</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>grappa_use_gpu</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_SPIRIT -->
-        <property>
-            <name>spirit_kSize_RO</name>
-            <value>7</value>
-        </property>
-        <property>
-            <name>spirit_kSize_E1</name>
-            <value>7</value>
-        </property>
-        <property>
-            <name>spirit_kSize_E2</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>spirit_reg_lamda</name>
-            <value>0.005</value>
-        </property>
-        <property>
-            <name>spirit_use_gpu</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_calib_over_determine_ratio</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>spirit_solve_symmetric</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>spirit_iter_max</name>
-            <value>90</value>
-        </property>
-        <property>
-            <name>spirit_iter_thres</name>
-            <value>0.0015</value>
-        </property>
-        <property>
-            <name>spirit_print_iter</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_L1SPIRIT -->
-        <property>
-            <name>spirit_perform_linear</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_perform_nonlinear</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_parallel_imaging_lamda</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_image_reg_lamda</name>
-            <value>0.0025</value>
-        </property>
-        <property>
-            <name>spirit_data_fidelity_lamda</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>spirit_ncg_iter_max</name>
-            <value>10</value>
-        </property>
-        <property>
-            <name>spirit_ncg_iter_thres</name>
-            <value>0.0001</value>
-        </property>
-        <property>
-            <name>spirit_ncg_print_iter</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_use_coil_sen_map</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_use_moco_enhancement</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>spirit_recon_moco_images</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>spirit_RO_enhancement_ratio</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_E1_enhancement_ratio</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_E2_enhancement_ratio</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_temporal_enhancement_ratio</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_2D_scale_per_chunk</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for scaling and image sending -->
-        <property>
-            <name>min_intensity_value</name>
-            <value>64</value>
-        </property>
-
-        <property>
-            <name>max_intensity_value</name>
-            <value>4095</value>
-        </property>
-
-        <property>
-            <name>scalingFactor</name>
-            <value>-1.0</value>
-        </property>
-
-        <property>
-            <name>use_constant_scalingFactor</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for kspace filter, image data -->
-        <property>
-            <name>filterRO</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterRO_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterRO_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterE1</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterE1_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterE1_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterE2</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterE2_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterE2_width</name>
-            <value>0.15</value>
-        </property>
-
-        <!-- parameters for kspace filter, ref data -->
-        <property>
-            <name>filterRefRO</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefRO_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefRO_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterRefE1</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefE1_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefE1_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterRefE2</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefE2_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefE2_width</name>
-            <value>0.15</value>
-        </property>
-
-        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
-        <property>
-            <name>filterPartialFourierRO</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>filterPartialFourierE1</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>filterPartialFourierE2</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_HOMODYNE, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
-        <property>
-            <name>partialFourier_algo</name>
-            <value>ISMRMRD_PF_FENGHUANG</value>
-        </property>
-
-        <!-- parameters for partial fourier homodyne algorithm -->
-        <property>
-            <name>partialFourier_homodyne_iters</name>
-            <value>6</value>
-        </property>
-        <property>
-            <name>partialFourier_homodyne_thres</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_homodyne_densityComp</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for partial fourier POCS algorithm -->
-        <property>
-            <name>partialFourier_POCS_iters</name>
-            <value>6</value>
-        </property>
-        <property>
-            <name>partialFourier_POCS_thres</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_POCS_transitBand</name>
-            <value>24</value>
-        </property>
-
-        <!-- parameters for partial fourier FengHuang algorithm -->
-        <property>
-            <name>partialFourier_FengHuang_kSize_RO</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_kSize_E1</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_kSize_E2</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_thresReg</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_sameKernel_allN</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_transitBand</name>
-            <value>24</value>
-        </property>
-
-        <!-- parameters for debug and timing -->
-        <property>
-            <name>debugFolder</name>
-            <value></value>
-        </property>
-
-        <property>
-            <name>debugFolder2</name>
-            <value></value>
-        </property>
-
-        <property>
-            <name>cloudNodeFile</name>
-            <value>myCloud_2DT.txt</value>
-        </property>
-
-        <property>
-            <name>performTiming</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>verboseMode</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for system acquisition -->
-        <property>
-            <name>timeStampResolution</name>
-            <value>0.0025</value>
-        </property>
-
-        <!-- parameters for recon job split -->
-        <property>
-            <name>job_split_by_S</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>job_num_of_N</name>
-            <value>32</value>
-        </property>
-        <property>
-            <name>job_max_Megabytes</name>
-            <value>10240</value>
-        </property>
-        <property>
-            <name>job_overlap</name>
-            <value>2</value>
-        </property>
-        <property>
-            <name>job_perform_on_control_node</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for the cloud computation 
-             The cloud should be defined as the following: CloudNodeX_IP/Port/XMLConfiguration etc.
-        -->
-        <property>
-            <name>CloudComputing</name>
-            <value>false</value>
-        </property>
-
-        <property>
-            <name>CloudSize</name>
-            <value>1</value>
-        </property>
-
-        <!-- node 0 -->
-        <property>
-            <name>CloudNode0_IP</name>
-            <value>localhost</value>
-        </property>
-
-        <property>
-            <name>CloudNode0_Port</name>
-            <value>9003</value>
-        </property>
-
-        <property>
-            <name>CloudNode0_XMLConfiguration</name>
-            <value>GadgetronProgram_gtPlus_2DT_Cartesian_CloudNode.xml</value>
-        </property>
-
-        <property>
-            <name>CloudNode0_ComputingPowerIndex</name>
-            <value>1</value>
-        </property>
-
-    </gadget>
-
-    <!-- after recon processing -->
-    <gadget>
-        <name>FloatToShort</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>FloatToUShortGadget</classname>
-    </gadget>
-
-    <!--
-    <gadget>
-      <name>ImageFinishCPLX</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageFinishGadgetCPLX</classname>
-    </gadget>
-    -->
-
-    <!--
-    <gadget>
-      <name>ImageFinishFLOAT</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageFinishGadgetFLOAT</classname>
-    </gadget>
-    -->
-
-    <gadget>
-        <name>ImageFinishUSHORT</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>ImageFinishGadgetUSHORT</classname>
-    </gadget>
-
-</gadgetronStreamConfiguration>
diff --git a/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_Cartesian_CloudNode.xml b/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_Cartesian_CloudNode.xml
deleted file mode 100644
index 3be2ad1..0000000
--- a/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_Cartesian_CloudNode.xml
+++ /dev/null
@@ -1,67 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
-        xmlns="http://gadgetron.sf.net/gadgetron"
-        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-
-    <!-- 
-        GT Plus configuratin file for 2DT cartesian reconstruction on GtPlus Cloud
-        This configuration file configures one gadget to perform the reconstruction for
-        2DT job packages
-
-        Depending on the incoming algorithm parameters, both linear and non-linear reconstruction
-        can be performed
-
-        Author: Hui Xue
-        Magnetic Resonance Technology Program
-        National Heart, Lung and Blood Institute
-        National Institutes of Health
-        10 Center Drive, Bethesda
-        MD 20814
-        USA
-        Email: hui.xue at nih.gov
-    -->
-
-    <!-- reader -->
-    <reader>
-        <slot>1013</slot>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusCloudJobMessageReaderCPFL</classname>
-    </reader>
-
-    <!-- writer -->
-    <writer>
-        <slot>1013</slot>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusCloudJobMessageWriterCPFL</classname>
-    </writer>
-
-    <!--
-    Recon computation for 2DT/3DT cases, process one job
-    a gtPlusReconJob2DT job consists of kspace, kernel and parameters
-    kspace: [RO E1 CHA E2/PHS]
-    -->
-
-    <gadget>
-        <name>Recon</name>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusReconJob2DTGadget</classname>
-
-        <!-- parameters for debug and timing -->
-        <property>
-            <name>debugFolder</name>
-            <value></value>
-        </property>
-
-        <property>
-            <name>performTiming</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>verboseMode</name>
-            <value>false</value>
-        </property>
-
-    </gadget>
-
-</gadgetronStreamConfiguration>
diff --git a/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_Cartesian_DualLayer_Gateway_L1SPIRIT.xml b/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_Cartesian_DualLayer_Gateway_L1SPIRIT.xml
deleted file mode 100644
index 32d6ae5..0000000
--- a/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_Cartesian_DualLayer_Gateway_L1SPIRIT.xml
+++ /dev/null
@@ -1,808 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
-        xmlns="http://gadgetron.sf.net/gadgetron"
-        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-
-    <!-- 
-        GT Plus configuratin file for general 2D cartesian reconstruction using L1 SPIRIT
-        The GtPlus cloud computing by default is turned on in this configuration file
-        The dual-layer cloud topology is used here, therefore every incoming SLICE is sent
-        to one first layer GtPlus cloud node. This first layer node can further split the job and
-        process the SLICE with one or more second layer nodes.
-
-        Author: Hui Xue
-        Magnetic Resonance Technology Program
-        National Heart, Lung and Blood Institute
-        National Institutes of Health
-        10 Center Drive, Bethesda
-        MD 20814
-        USA
-        Email: hui.xue at nih.gov
-
-        Ref to: 
-
-        Hui Xue, Souheil Inati, Thomas Sangild Sorensen, Peter Kellman, Michael S. Hansen. 
-        Distributed MRI Reconstruction using Gadgetron based Cloud Computing. Submitted to
-        Magenetic Resonance in Medicine on Dec 2013.
-    -->
-
-    <!-- reader -->
-    <reader>
-        <slot>1008</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
-    </reader>
-
-    <!-- writer -->
-    <writer>
-        <slot>1004</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterCPLX</classname>
-    </writer>
-    <writer>
-        <slot>1005</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterFLOAT</classname>
-    </writer>
-    <writer>
-        <slot>1006</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterUSHORT</classname>
-    </writer>
-
-    <!-- RO asymmetric echo handling -->
-    <gadget>
-        <name>PartialFourierAdjust</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>PartialFourierAdjustROGadget</classname>
-    </gadget>
-
-    <!-- Noise prewhitening -->
-    <gadget>
-        <name>NoiseAdjust</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>NoiseAdjustGadget</classname>
-    </gadget>
-
-    <!-- RO oversampling removal -->
-    <gadget>
-        <name>RemoveROOversampling</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>RemoveROOversamplingGadget</classname>
-    </gadget>
-
-    <!-- Data accumulation and trigger gadget -->
-    <gadget>
-        <name>Acc</name>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
-
-        <!-- debug and info mode -->
-        <property>
-            <name>verboseMode</name>
-            <value>false</value>
-        </property>
-
-        <!-- No acceleration mode -->
-        <property>
-            <name>noacceleration_triggerDim1</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>noacceleration_triggerDim2</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>noacceleration_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- Interleaved mode -->
-        <property>
-            <name>interleaved_triggerDim1</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>interleaved_triggerDim2</name>
-            <value>DIM_Slice</value>
-        </property>
-
-        <property>
-            <name>interleaved_numOfKSpace_triggerDim1</name>
-            <value>0</value>
-        </property>
-
-        <!-- Embedded mode -->
-        <property>
-            <name>embedded_triggerDim1</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>embedded_triggerDim2</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>embedded_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- Separate mode -->
-        <property>
-            <name>separate_triggerDim1</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>separate_triggerDim2</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>separate_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
-        <property>
-            <name>other_kspace_matching_Dim</name>
-            <value>DIM_Repetition</value>
-        </property>
-
-    </gadget>
-
-    <!-- Recon computation for 2DT cases -->
-    <gadget>
-        <name>Recon</name>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusRecon2DTGadgetCloud</classname>
-
-        <!-- kspace data -->
-        <property>
-            <name>dim_4th</name>
-            <value>DIM_Phase</value>
-        </property>
-        <property>
-            <name>dim_5th</name>
-            <value>DIM_Slice</value>
-        </property>
-
-        <!-- work flow -->
-        <property>
-            <name>workOrder_ShareDim</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <!-- No acceleration mode -->
-        <property>
-            <name>no_acceleration_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>no_acceleration_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>no_acceleration_same_combinationcoeff_allS</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>no_acceleration_whichS_combinationcoeff</name>
-            <value>0</value>
-        </property>
-
-        <!-- Interleaved mode -->
-        <property>
-            <name>interleaved_same_combinationcoeff_allS</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>interleaved_whichS_combinationcoeff</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>interleaved_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-
-        <!-- Embedded mode -->
-        <property>
-            <name>embedded_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>embedded_fullres_coilmap</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_fullres_coilmap_useHighestSignal</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>embedded_same_combinationcoeff_allS</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_whichS_combinationcoeff</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>embedded_ref_fillback</name>
-            <value>true</value>
-        </property>
-
-        <!-- Separate mode -->
-        <property>
-            <name>separate_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>separate_fullres_coilmap</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_same_combinationcoeff_allS</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_whichS_combinationcoeff</name>
-            <value>0</value>
-        </property>
-
-        <!-- coil compression -->
-        <property>
-            <name>same_coil_compression_coeff_allS</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>downstream_coil_compression</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>coil_compression_thres</name>
-            <value>0.005</value>
-        </property>
-
-        <property>
-            <name>coil_compression_num_modesKept</name>
-            <value>-1</value>
-        </property>
-
-        <!-- parameters for coil map estimation 
-            enum ISMRMRDCOILMAPALGO
-            {
-                ISMRMRD_SOUHEIL,
-                ISMRMRD_SOUHEIL_ITER
-            };
-        -->
-        <property>
-            <name>coil_map_algorithm</name>
-            <value>ISMRMRD_SOUHEIL</value>
-        </property>
-        <property>
-            <name>csm_kSize</name>
-            <value>7</value>
-        </property>
-
-        <property>
-            <name>csm_powermethod_num</name>
-            <value>3</value>
-        </property>
-
-        <property>
-            <name>csm_true_3D</name>
-            <value>false</value>
-        </property>
-
-        <property>
-            <name>csm_iter_num</name>
-            <value>5</value>
-        </property>
-
-        <property>
-            <name>csm_iter_thres</name>
-            <value>0.001</value>
-        </property>
-
-        <property>
-            <name>csm_use_gpu</name>
-            <value>true</value>
-        </property>
-
-        <!-- algorithm -->
-        <property>
-            <name>recon_algorithm</name>
-            <value>ISMRMRD_L1SPIRIT</value>
-        </property>
-
-        <property>
-            <name>recon_kspace_needed</name>
-            <value>false</value>
-        </property>
-
-        <property>
-            <name>recon_auto_parameters</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_GRAPPA -->
-        <property>
-            <name>grappa_kSize_RO</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>grappa_kSize_E1</name>
-            <value>4</value>
-        </property>
-        <property>
-            <name>grappa_kSize_E2</name>
-            <value>4</value>
-        </property>
-        <property>
-            <name>grappa_reg_lamda</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>grappa_calib_over_determine_ratio</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>grappa_use_gpu</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_SPIRIT -->
-        <property>
-            <name>spirit_kSize_RO</name>
-            <value>7</value>
-        </property>
-        <property>
-            <name>spirit_kSize_E1</name>
-            <value>7</value>
-        </property>
-        <property>
-            <name>spirit_kSize_E2</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>spirit_reg_lamda</name>
-            <value>0.005</value>
-        </property>
-        <property>
-            <name>spirit_use_gpu</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_calib_over_determine_ratio</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>spirit_solve_symmetric</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>spirit_iter_max</name>
-            <value>90</value>
-        </property>
-        <property>
-            <name>spirit_iter_thres</name>
-            <value>0.0015</value>
-        </property>
-        <property>
-            <name>spirit_print_iter</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_L1SPIRIT -->
-        <property>
-            <name>spirit_perform_linear</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_perform_nonlinear</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_parallel_imaging_lamda</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_image_reg_lamda</name>
-            <value>0.001</value>
-        </property>
-        <property>
-            <name>spirit_data_fidelity_lamda</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>spirit_ncg_iter_max</name>
-            <value>10</value>
-        </property>
-        <property>
-            <name>spirit_ncg_iter_thres</name>
-            <value>0.0001</value>
-        </property>
-        <property>
-            <name>spirit_ncg_print_iter</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_use_coil_sen_map</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_use_moco_enhancement</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>spirit_recon_moco_images</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>spirit_RO_enhancement_ratio</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_E1_enhancement_ratio</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_E2_enhancement_ratio</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_temporal_enhancement_ratio</name>
-            <value>20.0</value>
-        </property>
-        <property>
-            <name>spirit_2D_scale_per_chunk</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for scaling and image sending -->
-        <property>
-            <name>min_intensity_value</name>
-            <value>64</value>
-        </property>
-
-        <property>
-            <name>max_intensity_value</name>
-            <value>4095</value>
-        </property>
-
-        <property>
-            <name>scalingFactor</name>
-            <value>-1.0</value>
-        </property>
-
-        <property>
-            <name>use_constant_scalingFactor</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for kspace filter, image data -->
-        <property>
-            <name>filterRO</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterRO_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterRO_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterE1</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterE1_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterE1_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterE2</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterE2_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterE2_width</name>
-            <value>0.15</value>
-        </property>
-
-        <!-- parameters for kspace filter, ref data -->
-        <property>
-            <name>filterRefRO</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefRO_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefRO_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterRefE1</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefE1_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefE1_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterRefE2</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefE2_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefE2_width</name>
-            <value>0.15</value>
-        </property>
-
-        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
-        <property>
-            <name>filterPartialFourierRO</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>filterPartialFourierE1</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>filterPartialFourierE2</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_HOMODYNE, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
-        <property>
-            <name>partialFourier_algo</name>
-            <value>ISMRMRD_PF_FENGHUANG</value>
-        </property>
-
-        <!-- parameters for partial fourier homodyne algorithm -->
-        <property>
-            <name>partialFourier_homodyne_iters</name>
-            <value>6</value>
-        </property>
-        <property>
-            <name>partialFourier_homodyne_thres</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_homodyne_densityComp</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for partial fourier POCS algorithm -->
-        <property>
-            <name>partialFourier_POCS_iters</name>
-            <value>6</value>
-        </property>
-        <property>
-            <name>partialFourier_POCS_thres</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_POCS_transitBand</name>
-            <value>24</value>
-        </property>
-
-        <!-- parameters for partial fourier FengHuang algorithm -->
-        <property>
-            <name>partialFourier_FengHuang_kSize_RO</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_kSize_E1</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_kSize_E2</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_thresReg</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_sameKernel_allN</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_transitBand</name>
-            <value>24</value>
-        </property>
-
-        <!-- parameters for debug and timing -->
-        <property>
-            <name>debugFolder</name>
-            <value></value>
-        </property>
-
-        <property>
-            <name>debugFolder2</name>
-            <value></value>
-        </property>
-
-        <property>
-            <name>cloudNodeFile</name>
-            <value>myCloud_2DT_DualLayer.txt</value>
-        </property>
-
-        <property>
-            <name>performTiming</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>verboseMode</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for system acquisition -->
-        <property>
-            <name>timeStampResolution</name>
-            <value>0.0025</value>
-        </property>
-
-        <!-- parameters for recon job split -->
-        <property>
-            <name>job_split_by_S</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>job_num_of_N</name>
-            <value>48</value>
-        </property>
-        <property>
-            <name>job_max_Megabytes</name>
-            <value>2048</value>
-        </property>
-        <property>
-            <name>job_overlap</name>
-            <value>1</value>
-        </property>
-        <property>
-            <name>job_perform_on_control_node</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for the cloud computation 
-             The cloud should be defined as the following: CloudNodeX_IP/Port/XMLConfiguration etc.
-        -->
-        <property>
-            <name>CloudComputing</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>CloudSize</name>
-            <value>1</value>
-        </property>
-
-        <!-- node 0 -->
-        <property>
-            <name>CloudNode0_IP</name>
-            <value>localhost</value>
-        </property>
-
-        <property>
-            <name>CloudNode0_Port</name>
-            <value>9003</value>
-        </property>
-
-        <property>
-            <name>CloudNode0_XMLConfiguration</name>
-            <value>GadgetronProgram_gtPlus_2DT_Cartesian_FirstLayer_CloudNode.xml</value>
-        </property>
-
-        <property>
-            <name>CloudNode0_ComputingPowerIndex</name>
-            <value>1</value>
-        </property>
-
-    </gadget>
-
-    <!-- after recon processing -->
-    <gadget>
-        <name>FloatToShort</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>FloatToUShortGadget</classname>
-    </gadget>
-
-    <!--
-    <gadget>
-      <name>ImageFinishCPLX</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageFinishGadgetCPLX</classname>
-    </gadget>
-    -->
-
-    <!--
-    <gadget>
-      <name>ImageFinishFLOAT</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageFinishGadgetFLOAT</classname>
-    </gadget>
-    -->
-
-    <gadget>
-        <name>ImageFinishUSHORT</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>ImageFinishGadgetUSHORT</classname>
-    </gadget>
-
-</gadgetronStreamConfiguration>
diff --git a/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_Cartesian_DualLayer_Gateway_SPIRIT.xml b/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_Cartesian_DualLayer_Gateway_SPIRIT.xml
deleted file mode 100644
index 52e6269..0000000
--- a/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_Cartesian_DualLayer_Gateway_SPIRIT.xml
+++ /dev/null
@@ -1,808 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
-        xmlns="http://gadgetron.sf.net/gadgetron"
-        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-
-    <!-- 
-        GT Plus configuratin file for general 2D cartesian reconstruction using linear SPIRIT
-        The GtPlus cloud computing by default is turned on in this configuration file
-        The dual-layer cloud topology is used here, therefore every incoming SLICE is sent
-        to one first layer GtPlus cloud node. This first layer node can further split the job and
-        process the SLICE with one or more second layer nodes.
-
-        Author: Hui Xue
-        Magnetic Resonance Technology Program
-        National Heart, Lung and Blood Institute
-        National Institutes of Health
-        10 Center Drive, Bethesda
-        MD 20814
-        USA
-        Email: hui.xue at nih.gov
-
-        Ref to: 
-
-        Hui Xue, Souheil Inati, Thomas Sangild Sorensen, Peter Kellman, Michael S. Hansen. 
-        Distributed MRI Reconstruction using Gadgetron based Cloud Computing. Submitted to
-        Magenetic Resonance in Medicine on Dec 2013.
-    -->
-
-    <!-- reader -->
-    <reader>
-        <slot>1008</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
-    </reader>
-
-    <!-- writer -->
-    <writer>
-        <slot>1004</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterCPLX</classname>
-    </writer>
-    <writer>
-        <slot>1005</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterFLOAT</classname>
-    </writer>
-    <writer>
-        <slot>1006</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterUSHORT</classname>
-    </writer>
-
-    <!-- RO asymmetric echo handling -->
-    <gadget>
-        <name>PartialFourierAdjust</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>PartialFourierAdjustROGadget</classname>
-    </gadget>
-
-    <!-- Noise prewhitening -->
-    <gadget>
-        <name>NoiseAdjust</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>NoiseAdjustGadget</classname>
-    </gadget>
-
-    <!-- RO oversampling removal -->
-    <gadget>
-        <name>RemoveROOversampling</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>RemoveROOversamplingGadget</classname>
-    </gadget>
-
-    <!-- Data accumulation and trigger gadget -->
-    <gadget>
-        <name>Acc</name>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
-
-        <!-- debug and info mode -->
-        <property>
-            <name>verboseMode</name>
-            <value>false</value>
-        </property>
-
-        <!-- No acceleration mode -->
-        <property>
-            <name>noacceleration_triggerDim1</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>noacceleration_triggerDim2</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>noacceleration_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- Interleaved mode -->
-        <property>
-            <name>interleaved_triggerDim1</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>interleaved_triggerDim2</name>
-            <value>DIM_Slice</value>
-        </property>
-
-        <property>
-            <name>interleaved_numOfKSpace_triggerDim1</name>
-            <value>0</value>
-        </property>
-
-        <!-- Embedded mode -->
-        <property>
-            <name>embedded_triggerDim1</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>embedded_triggerDim2</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>embedded_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- Separate mode -->
-        <property>
-            <name>separate_triggerDim1</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>separate_triggerDim2</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>separate_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
-        <property>
-            <name>other_kspace_matching_Dim</name>
-            <value>DIM_Repetition</value>
-        </property>
-
-    </gadget>
-
-    <!-- Recon computation for 2DT cases -->
-    <gadget>
-        <name>Recon</name>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusRecon2DTGadgetCloud</classname>
-
-        <!-- kspace data -->
-        <property>
-            <name>dim_4th</name>
-            <value>DIM_Phase</value>
-        </property>
-        <property>
-            <name>dim_5th</name>
-            <value>DIM_Slice</value>
-        </property>
-
-        <!-- work flow -->
-        <property>
-            <name>workOrder_ShareDim</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <!-- No acceleration mode -->
-        <property>
-            <name>no_acceleration_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>no_acceleration_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>no_acceleration_same_combinationcoeff_allS</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>no_acceleration_whichS_combinationcoeff</name>
-            <value>0</value>
-        </property>
-
-        <!-- Interleaved mode -->
-        <property>
-            <name>interleaved_same_combinationcoeff_allS</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>interleaved_whichS_combinationcoeff</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>interleaved_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-
-        <!-- Embedded mode -->
-        <property>
-            <name>embedded_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>embedded_fullres_coilmap</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_fullres_coilmap_useHighestSignal</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>embedded_same_combinationcoeff_allS</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_whichS_combinationcoeff</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>embedded_ref_fillback</name>
-            <value>true</value>
-        </property>
-
-        <!-- Separate mode -->
-        <property>
-            <name>separate_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>separate_fullres_coilmap</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_same_combinationcoeff_allS</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_whichS_combinationcoeff</name>
-            <value>0</value>
-        </property>
-
-        <!-- coil compression -->
-        <property>
-            <name>same_coil_compression_coeff_allS</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>downstream_coil_compression</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>coil_compression_thres</name>
-            <value>0.005</value>
-        </property>
-
-        <property>
-            <name>coil_compression_num_modesKept</name>
-            <value>-1</value>
-        </property>
-
-        <!-- parameters for coil map estimation 
-            enum ISMRMRDCOILMAPALGO
-            {
-                ISMRMRD_SOUHEIL,
-                ISMRMRD_SOUHEIL_ITER
-            };
-        -->
-        <property>
-            <name>coil_map_algorithm</name>
-            <value>ISMRMRD_SOUHEIL</value>
-        </property>
-        <property>
-            <name>csm_kSize</name>
-            <value>7</value>
-        </property>
-
-        <property>
-            <name>csm_powermethod_num</name>
-            <value>3</value>
-        </property>
-
-        <property>
-            <name>csm_true_3D</name>
-            <value>false</value>
-        </property>
-
-        <property>
-            <name>csm_iter_num</name>
-            <value>5</value>
-        </property>
-
-        <property>
-            <name>csm_iter_thres</name>
-            <value>0.001</value>
-        </property>
-
-        <property>
-            <name>csm_use_gpu</name>
-            <value>true</value>
-        </property>
-
-        <!-- algorithm -->
-        <property>
-            <name>recon_algorithm</name>
-            <value>ISMRMRD_GRAPPA</value>
-        </property>
-
-        <property>
-            <name>recon_kspace_needed</name>
-            <value>false</value>
-        </property>
-
-        <property>
-            <name>recon_auto_parameters</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_GRAPPA -->
-        <property>
-            <name>grappa_kSize_RO</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>grappa_kSize_E1</name>
-            <value>4</value>
-        </property>
-        <property>
-            <name>grappa_kSize_E2</name>
-            <value>4</value>
-        </property>
-        <property>
-            <name>grappa_reg_lamda</name>
-            <value>0.0005</value>
-        </property>
-        <property>
-            <name>grappa_calib_over_determine_ratio</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>grappa_use_gpu</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_SPIRIT -->
-        <property>
-            <name>spirit_kSize_RO</name>
-            <value>7</value>
-        </property>
-        <property>
-            <name>spirit_kSize_E1</name>
-            <value>7</value>
-        </property>
-        <property>
-            <name>spirit_kSize_E2</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>spirit_reg_lamda</name>
-            <value>0.005</value>
-        </property>
-        <property>
-            <name>spirit_use_gpu</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_calib_over_determine_ratio</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>spirit_solve_symmetric</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>spirit_iter_max</name>
-            <value>90</value>
-        </property>
-        <property>
-            <name>spirit_iter_thres</name>
-            <value>0.0015</value>
-        </property>
-        <property>
-            <name>spirit_print_iter</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_L1SPIRIT -->
-        <property>
-            <name>spirit_perform_linear</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_perform_nonlinear</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>spirit_parallel_imaging_lamda</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_image_reg_lamda</name>
-            <value>0.0025</value>
-        </property>
-        <property>
-            <name>spirit_data_fidelity_lamda</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>spirit_ncg_iter_max</name>
-            <value>10</value>
-        </property>
-        <property>
-            <name>spirit_ncg_iter_thres</name>
-            <value>0.0001</value>
-        </property>
-        <property>
-            <name>spirit_ncg_print_iter</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_use_coil_sen_map</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_use_moco_enhancement</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>spirit_recon_moco_images</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>spirit_RO_enhancement_ratio</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_E1_enhancement_ratio</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_E2_enhancement_ratio</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_temporal_enhancement_ratio</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_2D_scale_per_chunk</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for scaling and image sending -->
-        <property>
-            <name>min_intensity_value</name>
-            <value>64</value>
-        </property>
-
-        <property>
-            <name>max_intensity_value</name>
-            <value>4095</value>
-        </property>
-
-        <property>
-            <name>scalingFactor</name>
-            <value>-1.0</value>
-        </property>
-
-        <property>
-            <name>use_constant_scalingFactor</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for kspace filter, image data -->
-        <property>
-            <name>filterRO</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterRO_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterRO_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterE1</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterE1_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterE1_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterE2</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterE2_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterE2_width</name>
-            <value>0.15</value>
-        </property>
-
-        <!-- parameters for kspace filter, ref data -->
-        <property>
-            <name>filterRefRO</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefRO_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefRO_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterRefE1</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefE1_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefE1_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterRefE2</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefE2_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefE2_width</name>
-            <value>0.15</value>
-        </property>
-
-        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
-        <property>
-            <name>filterPartialFourierRO</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>filterPartialFourierE1</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>filterPartialFourierE2</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_HOMODYNE, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
-        <property>
-            <name>partialFourier_algo</name>
-            <value>ISMRMRD_PF_FENGHUANG</value>
-        </property>
-
-        <!-- parameters for partial fourier homodyne algorithm -->
-        <property>
-            <name>partialFourier_homodyne_iters</name>
-            <value>6</value>
-        </property>
-        <property>
-            <name>partialFourier_homodyne_thres</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_homodyne_densityComp</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for partial fourier POCS algorithm -->
-        <property>
-            <name>partialFourier_POCS_iters</name>
-            <value>6</value>
-        </property>
-        <property>
-            <name>partialFourier_POCS_thres</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_POCS_transitBand</name>
-            <value>24</value>
-        </property>
-
-        <!-- parameters for partial fourier FengHuang algorithm -->
-        <property>
-            <name>partialFourier_FengHuang_kSize_RO</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_kSize_E1</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_kSize_E2</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_thresReg</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_sameKernel_allN</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_transitBand</name>
-            <value>24</value>
-        </property>
-
-        <!-- parameters for debug and timing -->
-        <property>
-            <name>debugFolder</name>
-            <value></value>
-        </property>
-
-        <property>
-            <name>debugFolder2</name>
-            <value></value>
-        </property>
-
-        <property>
-            <name>cloudNodeFile</name>
-            <value>myCloud_2DT_DualLayer.txt</value>
-        </property>
-
-        <property>
-            <name>performTiming</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>verboseMode</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for system acquisition -->
-        <property>
-            <name>timeStampResolution</name>
-            <value>0.0025</value>
-        </property>
-
-        <!-- parameters for recon job split -->
-        <property>
-            <name>job_split_by_S</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>job_num_of_N</name>
-            <value>32</value>
-        </property>
-        <property>
-            <name>job_max_Megabytes</name>
-            <value>2048</value>
-        </property>
-        <property>
-            <name>job_overlap</name>
-            <value>2</value>
-        </property>
-        <property>
-            <name>job_perform_on_control_node</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for the cloud computation 
-             The cloud should be defined as the following: CloudNodeX_IP/Port/XMLConfiguration etc.
-        -->
-        <property>
-            <name>CloudComputing</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>CloudSize</name>
-            <value>1</value>
-        </property>
-
-        <!-- node 0 -->
-        <property>
-            <name>CloudNode0_IP</name>
-            <value>localhost</value>
-        </property>
-
-        <property>
-            <name>CloudNode0_Port</name>
-            <value>9003</value>
-        </property>
-
-        <property>
-            <name>CloudNode0_XMLConfiguration</name>
-            <value>GadgetronProgram_gtPlus_2DT_Cartesian_FirstLayer_CloudNode.xml</value>
-        </property>
-
-        <property>
-            <name>CloudNode0_ComputingPowerIndex</name>
-            <value>1</value>
-        </property>
-
-    </gadget>
-
-    <!-- after recon processing -->
-    <gadget>
-        <name>FloatToShort</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>FloatToUShortGadget</classname>
-    </gadget>
-
-    <!--
-    <gadget>
-      <name>ImageFinishCPLX</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageFinishGadgetCPLX</classname>
-    </gadget>
-    -->
-
-    <!--
-    <gadget>
-      <name>ImageFinishFLOAT</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageFinishGadgetFLOAT</classname>
-    </gadget>
-    -->
-
-    <gadget>
-        <name>ImageFinishUSHORT</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>ImageFinishGadgetUSHORT</classname>
-    </gadget>
-
-</gadgetronStreamConfiguration>
diff --git a/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_Cartesian_FirstLayer_CloudNode.xml b/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_Cartesian_FirstLayer_CloudNode.xml
deleted file mode 100644
index aaf305a..0000000
--- a/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_Cartesian_FirstLayer_CloudNode.xml
+++ /dev/null
@@ -1,269 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
-        xmlns="http://gadgetron.sf.net/gadgetron"
-        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-
-    <!-- 
-        GT Plus configuratin file for general 2D cartesian reconstruction using linear or non-linear SPIRIT
-        The dual-layer cloud topology is used here, therefore every incoming SLICE is sent
-        to one first layer GtPlus cloud node. This first layer node can further split the job and
-        process the SLICE with one or more second layer nodes.
-
-        This configuration file is for the first layer GtPlus cloud node.
-
-        Author: Hui Xue
-        Magnetic Resonance Technology Program
-        National Heart, Lung and Blood Institute
-        National Institutes of Health
-        10 Center Drive, Bethesda
-        MD 20814
-        USA
-        Email: hui.xue at nih.gov
-
-        Ref to: 
-
-        Hui Xue, Souheil Inati, Thomas Sangild Sorensen, Peter Kellman, Michael S. Hansen. 
-        Distributed MRI Reconstruction using Gadgetron based Cloud Computing. Submitted to
-        Magenetic Resonance in Medicine on Dec 2013.
-    -->
-
-    <!-- reader -->
-    <reader>
-        <slot>1014</slot>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlus2DTGadgetCloudJobMessageReaderCPFL</classname>
-    </reader>
-
-    <!-- writer -->
-    <writer>
-        <slot>1014</slot>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlus2DTGadgetCloudJobMessageWriterCPFL</classname>
-    </writer>
-
-    <!--
-    Recon computation for 2DT cases
-    -->
-
-    <gadget>
-        <name>Recon</name>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusReconJob2DTGadgetCloud</classname>
-
-        <!-- parameters for kspace filter, image data -->
-        <property>
-            <name>filterRO</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterRO_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterRO_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterE1</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterE1_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterE1_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterE2</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterE2_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterE2_width</name>
-            <value>0.15</value>
-        </property>
-
-        <!-- parameters for kspace filter, ref data -->
-        <property>
-            <name>filterRefRO</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefRO_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefRO_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterRefE1</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefE1_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefE1_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterRefE2</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefE2_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefE2_width</name>
-            <value>0.15</value>
-        </property>
-
-        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
-        <property>
-            <name>filterPartialFourierRO</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>filterPartialFourierE1</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>filterPartialFourierE2</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for recon job split -->
-        <property>
-            <name>job_split_by_S</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>job_num_of_N</name>
-            <value>48</value>
-        </property>
-        <property>
-            <name>job_max_Megabytes</name>
-            <value>2048</value>
-        </property>
-        <property>
-            <name>job_overlap</name>
-            <value>1</value>
-        </property>
-        <property>
-            <name>job_perform_on_control_node</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for the cloud computation 
-             The cloud should be defined as the following: CloudNodeX_IP/Port/XMLConfiguration etc.
-        -->
-        <property>
-            <name>CloudComputing</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>CloudSize</name>
-            <value>1</value>
-        </property>
-
-        <!-- node 0 -->
-        <property>
-            <name>CloudNode0_IP</name>
-            <value>localhost</value>
-        </property>
-
-        <property>
-            <name>CloudNode0_Port</name>
-            <value>9004</value>
-        </property>
-
-        <property>
-            <name>CloudNode0_XMLConfiguration</name>
-            <value>GadgetronProgram_gtPlus_2DT_Cartesian_CloudNode.xml</value>
-        </property>
-
-        <property>
-            <name>CloudNode0_ComputingPowerIndex</name>
-            <value>1</value>
-        </property>
-
-        <!-- parameters for debug and timing -->
-        <property>
-            <name>debugFolder</name>
-            <value></value>
-        </property>
-
-        <property>
-            <name>debugFolder2</name>
-            <value></value>
-        </property>
-
-        <property>
-            <name>cloudNodeFile</name>
-            <value>myCloud_2DT_DualLayer_FirstLayer.txt</value>
-        </property>
-
-        <property>
-            <name>performTiming</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>verboseMode</name>
-            <value>false</value>
-        </property>
-
-    </gadget>
-
-</gadgetronStreamConfiguration>
diff --git a/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_Cartesian_L1SPIRIT.xml b/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_Cartesian_L1SPIRIT.xml
deleted file mode 100644
index 5f83c31..0000000
--- a/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_Cartesian_L1SPIRIT.xml
+++ /dev/null
@@ -1,799 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
-        xmlns="http://gadgetron.sf.net/gadgetron"
-        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-
-    <!-- 
-        GT Plus configuratin file for general 2D cartesian reconstruction using L1 SPIRIT
-        The GtPlus cloud computing can be turned on in this configuration file
-
-        Author: Hui Xue
-        Magnetic Resonance Technology Program
-        National Heart, Lung and Blood Institute
-        National Institutes of Health
-        10 Center Drive, Bethesda
-        MD 20814
-        USA
-        Email: hui.xue at nih.gov
-    -->
-
-    <!-- reader -->
-    <reader>
-        <slot>1008</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
-    </reader>
-
-    <!-- writer -->
-    <writer>
-        <slot>1004</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterCPLX</classname>
-    </writer>
-    <writer>
-        <slot>1005</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterFLOAT</classname>
-    </writer>
-    <writer>
-        <slot>1006</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterUSHORT</classname>
-    </writer>
-
-    <!-- RO asymmetric echo handling -->
-    <gadget>
-        <name>PartialFourierAdjust</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>PartialFourierAdjustROGadget</classname>
-    </gadget>
-
-    <!-- Noise prewhitening -->
-    <gadget>
-        <name>NoiseAdjust</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>NoiseAdjustGadget</classname>
-    </gadget>
-
-    <!-- RO oversampling removal -->
-    <gadget>
-        <name>RemoveROOversampling</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>RemoveROOversamplingGadget</classname>
-    </gadget>
-
-    <!-- Data accumulation and trigger gadget -->
-    <gadget>
-        <name>Acc</name>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
-
-        <!-- debug and info mode -->
-        <property>
-            <name>verboseMode</name>
-            <value>false</value>
-        </property>
-
-        <!-- No acceleration mode -->
-        <property>
-            <name>noacceleration_triggerDim1</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>noacceleration_triggerDim2</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>noacceleration_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- Interleaved mode -->
-        <property>
-            <name>interleaved_triggerDim1</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>interleaved_triggerDim2</name>
-            <value>DIM_Slice</value>
-        </property>
-
-        <property>
-            <name>interleaved_numOfKSpace_triggerDim1</name>
-            <value>8</value>
-        </property>
-
-        <!-- Embedded mode -->
-        <property>
-            <name>embedded_triggerDim1</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>embedded_triggerDim2</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>embedded_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- Separate mode -->
-        <property>
-            <name>separate_triggerDim1</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>separate_triggerDim2</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>separate_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
-        <property>
-            <name>other_kspace_matching_Dim</name>
-            <value>DIM_Repetition</value>
-        </property>
-
-    </gadget>
-
-    <!-- Recon computation for 2DT cases -->
-    <gadget>
-        <name>Recon</name>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusRecon2DTGadget</classname>
-
-        <!-- kspace data -->
-        <property>
-            <name>dim_4th</name>
-            <value>DIM_NONE</value>
-        </property>
-        <property>
-            <name>dim_5th</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <!-- work flow -->
-        <property>
-            <name>workOrder_ShareDim</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <!-- No acceleration mode -->
-        <property>
-            <name>no_acceleration_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>no_acceleration_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>no_acceleration_same_combinationcoeff_allS</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>no_acceleration_whichS_combinationcoeff</name>
-            <value>0</value>
-        </property>
-
-        <!-- Interleaved mode -->
-        <property>
-            <name>interleaved_same_combinationcoeff_allS</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>interleaved_whichS_combinationcoeff</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>interleaved_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-
-        <!-- Embedded mode -->
-        <property>
-            <name>embedded_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>embedded_fullres_coilmap</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_fullres_coilmap_useHighestSignal</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>embedded_same_combinationcoeff_allS</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_whichS_combinationcoeff</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>embedded_ref_fillback</name>
-            <value>true</value>
-        </property>
-
-        <!-- Separate mode -->
-        <property>
-            <name>separate_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>separate_fullres_coilmap</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_same_combinationcoeff_allS</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_whichS_combinationcoeff</name>
-            <value>0</value>
-        </property>
-
-        <!-- coil compression -->
-        <property>
-            <name>same_coil_compression_coeff_allS</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>downstream_coil_compression</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>coil_compression_thres</name>
-            <value>0.005</value>
-        </property>
-
-        <property>
-            <name>coil_compression_num_modesKept</name>
-            <value>-1</value>
-        </property>
-
-        <!-- parameters for coil map estimation 
-            enum ISMRMRDCOILMAPALGO
-            {
-                ISMRMRD_SOUHEIL,
-                ISMRMRD_SOUHEIL_ITER
-            };
-        -->
-        <property>
-            <name>coil_map_algorithm</name>
-            <value>ISMRMRD_SOUHEIL</value>
-        </property>
-        <property>
-            <name>csm_kSize</name>
-            <value>7</value>
-        </property>
-
-        <property>
-            <name>csm_powermethod_num</name>
-            <value>3</value>
-        </property>
-
-        <property>
-            <name>csm_true_3D</name>
-            <value>false</value>
-        </property>
-
-        <property>
-            <name>csm_iter_num</name>
-            <value>5</value>
-        </property>
-
-        <property>
-            <name>csm_iter_thres</name>
-            <value>0.001</value>
-        </property>
-
-        <property>
-            <name>csm_use_gpu</name>
-            <value>true</value>
-        </property>
-
-        <!-- algorithm -->
-        <property>
-            <name>recon_algorithm</name>
-            <value>ISMRMRD_L1SPIRIT</value>
-        </property>
-
-        <property>
-            <name>recon_kspace_needed</name>
-            <value>false</value>
-        </property>
-
-        <property>
-            <name>recon_auto_parameters</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_GRAPPA -->
-        <property>
-            <name>grappa_kSize_RO</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>grappa_kSize_E1</name>
-            <value>4</value>
-        </property>
-        <property>
-            <name>grappa_kSize_E2</name>
-            <value>4</value>
-        </property>
-        <property>
-            <name>grappa_reg_lamda</name>
-            <value>0.0005</value>
-        </property>
-        <property>
-            <name>grappa_calib_over_determine_ratio</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>grappa_use_gpu</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_SPIRIT -->
-        <property>
-            <name>spirit_kSize_RO</name>
-            <value>7</value>
-        </property>
-        <property>
-            <name>spirit_kSize_E1</name>
-            <value>7</value>
-        </property>
-        <property>
-            <name>spirit_kSize_E2</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>spirit_reg_lamda</name>
-            <value>0.005</value>
-        </property>
-        <property>
-            <name>spirit_use_gpu</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_calib_over_determine_ratio</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>spirit_solve_symmetric</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>spirit_iter_max</name>
-            <value>90</value>
-        </property>
-        <property>
-            <name>spirit_iter_thres</name>
-            <value>0.0015</value>
-        </property>
-        <property>
-            <name>spirit_print_iter</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_L1SPIRIT -->
-        <property>
-            <name>spirit_perform_linear</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_perform_nonlinear</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_parallel_imaging_lamda</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_image_reg_lamda</name>
-            <value>0.0025</value>
-        </property>
-        <property>
-            <name>spirit_data_fidelity_lamda</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>spirit_ncg_iter_max</name>
-            <value>10</value>
-        </property>
-        <property>
-            <name>spirit_ncg_iter_thres</name>
-            <value>0.0001</value>
-        </property>
-        <property>
-            <name>spirit_ncg_print_iter</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_use_coil_sen_map</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_use_moco_enhancement</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>spirit_recon_moco_images</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>spirit_RO_enhancement_ratio</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_E1_enhancement_ratio</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_E2_enhancement_ratio</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_temporal_enhancement_ratio</name>
-            <value>20.0</value>
-        </property>
-        <property>
-            <name>spirit_2D_scale_per_chunk</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for scaling and image sending -->
-        <property>
-            <name>min_intensity_value</name>
-            <value>64</value>
-        </property>
-
-        <property>
-            <name>max_intensity_value</name>
-            <value>4095</value>
-        </property>
-
-        <property>
-            <name>scalingFactor</name>
-            <value>-1.0</value>
-        </property>
-
-        <property>
-            <name>use_constant_scalingFactor</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for kspace filter, image data -->
-        <property>
-            <name>filterRO</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterRO_sigma</name>
-            <value>0.5</value>
-        </property>
-        <property>
-            <name>filterRO_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterE1</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterE1_sigma</name>
-            <value>0.5</value>
-        </property>
-        <property>
-            <name>filterE1_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterE2</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterE2_sigma</name>
-            <value>0.5</value>
-        </property>
-        <property>
-            <name>filterE2_width</name>
-            <value>0.15</value>
-        </property>
-
-        <!-- parameters for kspace filter, ref data -->
-        <property>
-            <name>filterRefRO</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefRO_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefRO_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterRefE1</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefE1_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefE1_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterRefE2</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefE2_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefE2_width</name>
-            <value>0.15</value>
-        </property>
-
-        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
-        <property>
-            <name>filterPartialFourierRO</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>filterPartialFourierE1</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>filterPartialFourierE2</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_HOMODYNE, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
-        <property>
-            <name>partialFourier_algo</name>
-            <value>ISMRMRD_PF_FENGHUANG</value>
-        </property>
-
-        <!-- parameters for partial fourier homodyne algorithm -->
-        <property>
-            <name>partialFourier_homodyne_iters</name>
-            <value>6</value>
-        </property>
-        <property>
-            <name>partialFourier_homodyne_thres</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_homodyne_densityComp</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for partial fourier POCS algorithm -->
-        <property>
-            <name>partialFourier_POCS_iters</name>
-            <value>6</value>
-        </property>
-        <property>
-            <name>partialFourier_POCS_thres</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_POCS_transitBand</name>
-            <value>24</value>
-        </property>
-
-        <!-- parameters for partial fourier FengHuang algorithm -->
-        <property>
-            <name>partialFourier_FengHuang_kSize_RO</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_kSize_E1</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_kSize_E2</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_thresReg</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_sameKernel_allN</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_transitBand</name>
-            <value>24</value>
-        </property>
-
-        <!-- parameters for debug and timing -->
-        <property>
-            <name>debugFolder</name>
-            <value></value>
-        </property>
-
-        <property>
-            <name>debugFolder2</name>
-            <value></value>
-        </property>
-
-        <property>
-            <name>cloudNodeFile</name>
-            <value>myCloud_2DT.txt</value>
-        </property>
-
-        <property>
-            <name>performTiming</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>verboseMode</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for system acquisition -->
-        <property>
-            <name>timeStampResolution</name>
-            <value>0.0025</value>
-        </property>
-
-        <!-- parameters for recon job split -->
-        <property>
-            <name>job_split_by_S</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>job_num_of_N</name>
-            <value>48</value>
-        </property>
-        <property>
-            <name>job_max_Megabytes</name>
-            <value>8192</value>
-        </property>
-        <property>
-            <name>job_overlap</name>
-            <value>2</value>
-        </property>
-        <property>
-            <name>job_perform_on_control_node</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for the cloud computation 
-             The cloud should be defined as the following: CloudNodeX_IP/Port/XMLConfiguration etc.
-        -->
-        <property>
-            <name>CloudComputing</name>
-            <value>false</value>
-        </property>
-
-        <property>
-            <name>CloudSize</name>
-            <value>1</value>
-        </property>
-
-        <!-- node 0 -->
-        <property>
-            <name>CloudNode0_IP</name>
-            <value>localhost</value>
-        </property>
-
-        <property>
-            <name>CloudNode0_Port</name>
-            <value>9003</value>
-        </property>
-
-        <property>
-            <name>CloudNode0_XMLConfiguration</name>
-            <value>GadgetronProgram_gtPlus_2DT_Cartesian_CloudNode.xml</value>
-        </property>
-
-        <property>
-            <name>CloudNode0_ComputingPowerIndex</name>
-            <value>1</value>
-        </property>
-
-    </gadget>
-
-    <!-- after recon processing -->
-    <gadget>
-        <name>FloatToShort</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>FloatToUShortGadget</classname>
-    </gadget>
-
-    <!--
-    <gadget>
-      <name>ImageFinishCPLX</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageFinishGadgetCPLX</classname>
-    </gadget>
-    -->
-
-    <!--
-    <gadget>
-      <name>ImageFinishFLOAT</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageFinishGadgetFLOAT</classname>
-    </gadget>
-    -->
-
-    <gadget>
-        <name>ImageFinishUSHORT</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>ImageFinishGadgetUSHORT</classname>
-    </gadget>
-
-</gadgetronStreamConfiguration>
diff --git a/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_Cartesian_SPIRIT.xml b/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_Cartesian_SPIRIT.xml
deleted file mode 100644
index 22dc832..0000000
--- a/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_Cartesian_SPIRIT.xml
+++ /dev/null
@@ -1,799 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
-        xmlns="http://gadgetron.sf.net/gadgetron"
-        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-
-    <!-- 
-        GT Plus configuratin file for general 2D cartesian reconstruction using linear SPIRIT
-        The GtPlus cloud computing can be turned on in this configuration file
-
-        Author: Hui Xue
-        Magnetic Resonance Technology Program
-        National Heart, Lung and Blood Institute
-        National Institutes of Health
-        10 Center Drive, Bethesda
-        MD 20814
-        USA
-        Email: hui.xue at nih.gov
-    -->
-
-    <!-- reader -->
-    <reader>
-        <slot>1008</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
-    </reader>
-
-    <!-- writer -->
-    <writer>
-        <slot>1004</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterCPLX</classname>
-    </writer>
-    <writer>
-        <slot>1005</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterFLOAT</classname>
-    </writer>
-    <writer>
-        <slot>1006</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterUSHORT</classname>
-    </writer>
-
-    <!-- RO asymmetric echo handling -->
-    <gadget>
-        <name>PartialFourierAdjust</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>PartialFourierAdjustROGadget</classname>
-    </gadget>
-
-    <!-- Noise prewhitening -->
-    <gadget>
-        <name>NoiseAdjust</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>NoiseAdjustGadget</classname>
-    </gadget>
-
-    <!-- RO oversampling removal -->
-    <gadget>
-        <name>RemoveROOversampling</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>RemoveROOversamplingGadget</classname>
-    </gadget>
-
-    <!-- Data accumulation and trigger gadget -->
-    <gadget>
-        <name>Acc</name>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
-
-        <!-- debug and info mode -->
-        <property>
-            <name>verboseMode</name>
-            <value>false</value>
-        </property>
-
-        <!-- No acceleration mode -->
-        <property>
-            <name>noacceleration_triggerDim1</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>noacceleration_triggerDim2</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>noacceleration_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- Interleaved mode -->
-        <property>
-            <name>interleaved_triggerDim1</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>interleaved_triggerDim2</name>
-            <value>DIM_Slice</value>
-        </property>
-
-        <property>
-            <name>interleaved_numOfKSpace_triggerDim1</name>
-            <value>8</value>
-        </property>
-
-        <!-- Embedded mode -->
-        <property>
-            <name>embedded_triggerDim1</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>embedded_triggerDim2</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>embedded_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- Separate mode -->
-        <property>
-            <name>separate_triggerDim1</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>separate_triggerDim2</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>separate_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
-        <property>
-            <name>other_kspace_matching_Dim</name>
-            <value>DIM_Repetition</value>
-        </property>
-
-    </gadget>
-
-    <!-- Recon computation for 2DT cases -->
-    <gadget>
-        <name>Recon</name>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusRecon2DTGadget</classname>
-
-        <!-- kspace data -->
-        <property>
-            <name>dim_4th</name>
-            <value>DIM_NONE</value>
-        </property>
-        <property>
-            <name>dim_5th</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <!-- work flow -->
-        <property>
-            <name>workOrder_ShareDim</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <!-- No acceleration mode -->
-        <property>
-            <name>no_acceleration_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>no_acceleration_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>no_acceleration_same_combinationcoeff_allS</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>no_acceleration_whichS_combinationcoeff</name>
-            <value>0</value>
-        </property>
-
-        <!-- Interleaved mode -->
-        <property>
-            <name>interleaved_same_combinationcoeff_allS</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>interleaved_whichS_combinationcoeff</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>interleaved_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-
-        <!-- Embedded mode -->
-        <property>
-            <name>embedded_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>embedded_fullres_coilmap</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_fullres_coilmap_useHighestSignal</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>embedded_same_combinationcoeff_allS</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_whichS_combinationcoeff</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>embedded_ref_fillback</name>
-            <value>true</value>
-        </property>
-
-        <!-- Separate mode -->
-        <property>
-            <name>separate_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>separate_fullres_coilmap</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_same_combinationcoeff_allS</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_whichS_combinationcoeff</name>
-            <value>0</value>
-        </property>
-
-        <!-- coil compression -->
-        <property>
-            <name>same_coil_compression_coeff_allS</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>downstream_coil_compression</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>coil_compression_thres</name>
-            <value>0.005</value>
-        </property>
-
-        <property>
-            <name>coil_compression_num_modesKept</name>
-            <value>-1</value>
-        </property>
-
-        <!-- parameters for coil map estimation 
-            enum ISMRMRDCOILMAPALGO
-            {
-                ISMRMRD_SOUHEIL,
-                ISMRMRD_SOUHEIL_ITER
-            };
-        -->
-        <property>
-            <name>coil_map_algorithm</name>
-            <value>ISMRMRD_SOUHEIL</value>
-        </property>
-        <property>
-            <name>csm_kSize</name>
-            <value>7</value>
-        </property>
-
-        <property>
-            <name>csm_powermethod_num</name>
-            <value>3</value>
-        </property>
-
-        <property>
-            <name>csm_true_3D</name>
-            <value>false</value>
-        </property>
-
-        <property>
-            <name>csm_iter_num</name>
-            <value>5</value>
-        </property>
-
-        <property>
-            <name>csm_iter_thres</name>
-            <value>0.001</value>
-        </property>
-
-        <property>
-            <name>csm_use_gpu</name>
-            <value>true</value>
-        </property>
-
-        <!-- algorithm -->
-        <property>
-            <name>recon_algorithm</name>
-            <value>ISMRMRD_SPIRIT</value>
-        </property>
-
-        <property>
-            <name>recon_kspace_needed</name>
-            <value>false</value>
-        </property>
-
-        <property>
-            <name>recon_auto_parameters</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_GRAPPA -->
-        <property>
-            <name>grappa_kSize_RO</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>grappa_kSize_E1</name>
-            <value>4</value>
-        </property>
-        <property>
-            <name>grappa_kSize_E2</name>
-            <value>4</value>
-        </property>
-        <property>
-            <name>grappa_reg_lamda</name>
-            <value>0.0005</value>
-        </property>
-        <property>
-            <name>grappa_calib_over_determine_ratio</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>grappa_use_gpu</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_SPIRIT -->
-        <property>
-            <name>spirit_kSize_RO</name>
-            <value>7</value>
-        </property>
-        <property>
-            <name>spirit_kSize_E1</name>
-            <value>7</value>
-        </property>
-        <property>
-            <name>spirit_kSize_E2</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>spirit_reg_lamda</name>
-            <value>0.005</value>
-        </property>
-        <property>
-            <name>spirit_use_gpu</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_calib_over_determine_ratio</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>spirit_solve_symmetric</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>spirit_iter_max</name>
-            <value>90</value>
-        </property>
-        <property>
-            <name>spirit_iter_thres</name>
-            <value>0.0015</value>
-        </property>
-        <property>
-            <name>spirit_print_iter</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_L1SPIRIT -->
-        <property>
-            <name>spirit_perform_linear</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_perform_nonlinear</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>spirit_parallel_imaging_lamda</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_image_reg_lamda</name>
-            <value>0.0025</value>
-        </property>
-        <property>
-            <name>spirit_data_fidelity_lamda</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>spirit_ncg_iter_max</name>
-            <value>10</value>
-        </property>
-        <property>
-            <name>spirit_ncg_iter_thres</name>
-            <value>0.0001</value>
-        </property>
-        <property>
-            <name>spirit_ncg_print_iter</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_use_coil_sen_map</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_use_moco_enhancement</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>spirit_recon_moco_images</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>spirit_RO_enhancement_ratio</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_E1_enhancement_ratio</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_E2_enhancement_ratio</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_temporal_enhancement_ratio</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_2D_scale_per_chunk</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for scaling and image sending -->
-        <property>
-            <name>min_intensity_value</name>
-            <value>64</value>
-        </property>
-
-        <property>
-            <name>max_intensity_value</name>
-            <value>4095</value>
-        </property>
-
-        <property>
-            <name>scalingFactor</name>
-            <value>-1.0</value>
-        </property>
-
-        <property>
-            <name>use_constant_scalingFactor</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for kspace filter, image data -->
-        <property>
-            <name>filterRO</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterRO_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterRO_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterE1</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterE1_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterE1_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterE2</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterE2_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterE2_width</name>
-            <value>0.15</value>
-        </property>
-
-        <!-- parameters for kspace filter, ref data -->
-        <property>
-            <name>filterRefRO</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefRO_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefRO_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterRefE1</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefE1_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefE1_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterRefE2</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefE2_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefE2_width</name>
-            <value>0.15</value>
-        </property>
-
-        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
-        <property>
-            <name>filterPartialFourierRO</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>filterPartialFourierE1</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>filterPartialFourierE2</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_HOMODYNE, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
-        <property>
-            <name>partialFourier_algo</name>
-            <value>ISMRMRD_PF_FENGHUANG</value>
-        </property>
-
-        <!-- parameters for partial fourier homodyne algorithm -->
-        <property>
-            <name>partialFourier_homodyne_iters</name>
-            <value>6</value>
-        </property>
-        <property>
-            <name>partialFourier_homodyne_thres</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_homodyne_densityComp</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for partial fourier POCS algorithm -->
-        <property>
-            <name>partialFourier_POCS_iters</name>
-            <value>6</value>
-        </property>
-        <property>
-            <name>partialFourier_POCS_thres</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_POCS_transitBand</name>
-            <value>24</value>
-        </property>
-
-        <!-- parameters for partial fourier FengHuang algorithm -->
-        <property>
-            <name>partialFourier_FengHuang_kSize_RO</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_kSize_E1</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_kSize_E2</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_thresReg</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_sameKernel_allN</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_transitBand</name>
-            <value>24</value>
-        </property>
-
-        <!-- parameters for debug and timing -->
-        <property>
-            <name>debugFolder</name>
-            <value></value>
-        </property>
-
-        <property>
-            <name>debugFolder2</name>
-            <value></value>
-        </property>
-
-        <property>
-            <name>cloudNodeFile</name>
-            <value>myCloud_2DT.txt</value>
-        </property>
-
-        <property>
-            <name>performTiming</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>verboseMode</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for system acquisition -->
-        <property>
-            <name>timeStampResolution</name>
-            <value>0.0025</value>
-        </property>
-
-        <!-- parameters for recon job split -->
-        <property>
-            <name>job_split_by_S</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>job_num_of_N</name>
-            <value>48</value>
-        </property>
-        <property>
-            <name>job_max_Megabytes</name>
-            <value>8192</value>
-        </property>
-        <property>
-            <name>job_overlap</name>
-            <value>1</value>
-        </property>
-        <property>
-            <name>job_perform_on_control_node</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for the cloud computation 
-             The cloud should be defined as the following: CloudNodeX_IP/Port/XMLConfiguration etc.
-        -->
-        <property>
-            <name>CloudComputing</name>
-            <value>false</value>
-        </property>
-
-        <property>
-            <name>CloudSize</name>
-            <value>1</value>
-        </property>
-
-        <!-- node 0 -->
-        <property>
-            <name>CloudNode0_IP</name>
-            <value>localhost</value>
-        </property>
-
-        <property>
-            <name>CloudNode0_Port</name>
-            <value>9003</value>
-        </property>
-
-        <property>
-            <name>CloudNode0_XMLConfiguration</name>
-            <value>GadgetronProgram_gtPlus_2DT_Cartesian_CloudNode.xml</value>
-        </property>
-
-        <property>
-            <name>CloudNode0_ComputingPowerIndex</name>
-            <value>1</value>
-        </property>
-
-    </gadget>
-
-    <!-- after recon processing -->
-    <gadget>
-        <name>FloatToShort</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>FloatToUShortGadget</classname>
-    </gadget>
-
-    <!--
-    <gadget>
-      <name>ImageFinishCPLX</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageFinishGadgetCPLX</classname>
-    </gadget>
-    -->
-
-    <!--
-    <gadget>
-      <name>ImageFinishFLOAT</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageFinishGadgetFLOAT</classname>
-    </gadget>
-    -->
-
-    <gadget>
-        <name>ImageFinishUSHORT</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>ImageFinishGadgetUSHORT</classname>
-    </gadget>
-
-</gadgetronStreamConfiguration>
diff --git a/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_FatWater.xml b/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_FatWater.xml
deleted file mode 100644
index b084345..0000000
--- a/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_FatWater.xml
+++ /dev/null
@@ -1,654 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
-        xmlns="http://gadgetron.sf.net/gadgetron"
-        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-
-    <!-- 
-        GT Plus configuratin file for 2DT cartesian reconstruction on fat water multi-contrast application
-
-        Author: Hui Xue
-        Magnetic Resonance Technology Program
-        National Heart, Lung and Blood Institute
-        National Institutes of Health
-        10 Center Drive, Bethesda
-        MD 20814
-        USA
-        Email: hui.xue at nih.gov
-    -->
-
-    <!-- reader -->
-    <reader>
-        <slot>1008</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
-    </reader>
-
-    <!-- writer -->
-    <writer>
-        <slot>1004</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterCPLX</classname>
-    </writer>
-    <writer>
-        <slot>1005</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterFLOAT</classname>
-    </writer>
-    <writer>
-        <slot>1006</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterUSHORT</classname>
-    </writer>
-
-    <!-- RO asymmetric echo handling -->
-    <gadget>
-        <name>PartialFourierAdjust</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>PartialFourierAdjustROGadget</classname>
-    </gadget>
-
-    <!-- Noise prewhitening -->
-    <gadget>
-        <name>NoiseAdjust</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>NoiseAdjustGadget</classname>
-    </gadget>
-
-    <!-- RO oversampling removal -->
-    <gadget>
-        <name>RemoveROOversampling</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>RemoveROOversamplingGadget</classname>
-    </gadget>
-
-    <!-- Data accumulation and trigger gadget -->
-    <gadget>
-        <name>Acc</name>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
-
-        <!-- debug and info mode -->
-        <property>
-            <name>verboseMode</name>
-            <value>false</value>
-        </property>
-
-        <!-- No acceleration mode -->
-        <property>
-            <name>noacceleration_triggerDim1</name>
-            <value>DIM_Repetition</value>
-        </property>
-
-        <property>
-            <name>noacceleration_triggerDim2</name>
-            <value>DIM_Slice</value>
-        </property>
-
-        <property>
-            <name>noacceleration_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- Interleaved mode -->
-        <property>
-            <name>interleaved_triggerDim1</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>interleaved_triggerDim2</name>
-            <value>DIM_Slice</value>
-        </property>
-
-        <property>
-            <name>interleaved_numOfKSpace_triggerDim1</name>
-            <value>4</value>
-        </property>
-
-        <!-- Embedded mode -->
-        <property>
-            <name>embedded_triggerDim1</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>embedded_triggerDim2</name>
-            <value>DIM_Slice</value>
-        </property>
-
-        <property>
-            <name>embedded_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- Separate mode -->
-        <property>
-            <name>separate_triggerDim1</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>separate_triggerDim2</name>
-            <value>DIM_Slice</value>
-        </property>
-
-        <property>
-            <name>separate_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
-        <property>
-            <name>other_kspace_matching_Dim</name>
-            <value>DIM_Repetition</value>
-        </property>
-
-    </gadget>
-
-    <!-- Recon computation for 2DT cases -->
-    <gadget>
-        <name>Recon</name>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusRecon2DTGadget</classname>
-
-        <!-- kspace data -->
-        <property>
-            <name>dim_4th</name>
-            <value>DIM_Repetition</value>
-        </property>
-        <property>
-            <name>dim_5th</name>
-            <value>DIM_Contrast</value>
-        </property>
-
-        <!-- work flow -->
-        <property>
-            <name>workOrder_ShareDim</name>
-            <value>DIM_Set</value>
-        </property>
-
-        <!-- No acceleration mode -->
-        <property>
-            <name>no_acceleration_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>no_acceleration_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>no_acceleration_same_combinationcoeff_allS</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>no_acceleration_whichS_combinationcoeff</name>
-            <value>1</value>
-        </property>
-
-        <!-- Interleaved mode -->
-        <property>
-            <name>interleaved_same_combinationcoeff_allS</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>interleaved_whichS_combinationcoeff</name>
-            <value>1</value>
-        </property>
-        <property>
-            <name>interleaved_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-
-        <!-- Embedded mode -->
-        <property>
-            <name>embedded_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>embedded_fullres_coilmap</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_fullres_coilmap_useHighestSignal</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>embedded_same_combinationcoeff_allS</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_whichS_combinationcoeff</name>
-            <value>1</value>
-        </property>
-        <property>
-            <name>embedded_ref_fillback</name>
-            <value>true</value>
-        </property>
-
-        <!-- Separate mode -->
-        <property>
-            <name>separate_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>separate_fullres_coilmap</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_same_combinationcoeff_allS</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_whichS_combinationcoeff</name>
-            <value>1</value>
-        </property>
-
-        <!-- coil compression -->
-        <property>
-            <name>same_coil_compression_coeff_allS</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>downstream_coil_compression</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>coil_compression_thres</name>
-            <value>0.001</value>
-        </property>
-
-        <property>
-            <name>coil_compression_num_modesKept</name>
-            <value>8</value>
-        </property>
-
-        <!-- parameters for coil map estimation 
-            enum ISMRMRDCOILMAPALGO
-            {
-                ISMRMRD_SOUHEIL,
-                ISMRMRD_SOUHEIL_ITER
-            };
-        -->
-        <property>
-            <name>coil_map_algorithm</name>
-            <value>ISMRMRD_SOUHEIL</value>
-        </property>
-        <property>
-            <name>csm_kSize</name>
-            <value>7</value>
-        </property>
-
-        <property>
-            <name>csm_powermethod_num</name>
-            <value>3</value>
-        </property>
-
-        <property>
-            <name>csm_true_3D</name>
-            <value>false</value>
-        </property>
-
-        <property>
-            <name>csm_iter_num</name>
-            <value>5</value>
-        </property>
-
-        <property>
-            <name>csm_iter_thres</name>
-            <value>0.001</value>
-        </property>
-
-        <property>
-            <name>csm_use_gpu</name>
-            <value>true</value>
-        </property>
-
-        <!-- algorithm -->
-        <property>
-            <name>recon_algorithm</name>
-            <value>ISMRMRD_GRAPPA</value>
-        </property>
-
-        <property>
-            <name>recon_kspace_needed</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_GRAPPA -->
-        <property>
-            <name>grappa_kSize_RO</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>grappa_kSize_E1</name>
-            <value>4</value>
-        </property>
-        <property>
-            <name>grappa_kSize_E2</name>
-            <value>4</value>
-        </property>
-        <property>
-            <name>grappa_reg_lamda</name>
-            <value>0.0005</value>
-        </property>
-        <property>
-            <name>grappa_calib_over_determine_ratio</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>grappa_use_gpu</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_SPIRIT -->
-        <property>
-            <name>spirit_kSize_RO</name>
-            <value>7</value>
-        </property>
-        <property>
-            <name>spirit_kSize_E1</name>
-            <value>7</value>
-        </property>
-        <property>
-            <name>spirit_kSize_E2</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>spirit_reg_lamda</name>
-            <value>0.005</value>
-        </property>
-        <property>
-            <name>spirit_use_gpu</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_iter_max</name>
-            <value>70</value>
-        </property>
-        <property>
-            <name>spirit_iter_thres</name>
-            <value>1e-5</value>
-        </property>
-
-        <!-- parameters for scaling and image sending -->
-        <property>
-            <name>min_intensity_value</name>
-            <value>64</value>
-        </property>
-
-        <property>
-            <name>max_intensity_value</name>
-            <value>4095</value>
-        </property>
-
-        <property>
-            <name>scalingFactor</name>
-            <value>-1.0</value>
-        </property>
-
-        <property>
-            <name>use_constant_scalingFactor</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for kspace filter, image data -->
-        <property>
-            <name>filterRO</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterRO_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterRO_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterE1</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterE1_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterE1_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterE2</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterE2_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterE2_width</name>
-            <value>0.15</value>
-        </property>
-
-        <!-- parameters for kspace filter, ref data -->
-        <property>
-            <name>filterRefRO</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefRO_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefRO_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterRefE1</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefE1_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefE1_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterRefE2</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefE2_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefE2_width</name>
-            <value>0.15</value>
-        </property>
-
-        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
-        <property>
-            <name>filterPartialFourierRO</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>filterPartialFourierE1</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>filterPartialFourierE2</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_HOMODYNE, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
-        <property>
-            <name>partialFourier_algo</name>
-            <value>ISMRMRD_PF_FENGHUANG</value>
-        </property>
-
-        <!-- parameters for partial fourier homodyne algorithm -->
-        <property>
-            <name>partialFourier_homodyne_iters</name>
-            <value>6</value>
-        </property>
-        <property>
-            <name>partialFourier_homodyne_thres</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_homodyne_densityComp</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for partial fourier POCS algorithm -->
-        <property>
-            <name>partialFourier_POCS_iters</name>
-            <value>6</value>
-        </property>
-        <property>
-            <name>partialFourier_POCS_thres</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_POCS_transitBand</name>
-            <value>24</value>
-        </property>
-
-        <!-- parameters for partial fourier FengHuang algorithm -->
-        <property>
-            <name>partialFourier_FengHuang_kSize_RO</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_kSize_E1</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_kSize_E2</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_thresReg</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_sameKernel_allN</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_transitBand</name>
-            <value>24</value>
-        </property>
-
-        <!-- parameters for debug and timing -->
-        <property>
-            <name>debugFolder</name>
-            <value></value>
-        </property>
-
-        <property>
-            <name>debugFolder2</name>
-            <value></value>
-        </property>
-
-        <property>
-            <name>performTiming</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>verboseMode</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for system acquisition -->
-        <property>
-            <name>timeStampResolution</name>
-            <value>0.0025</value>
-        </property>
-
-    </gadget>
-
-    <!-- after recon processing -->
-    <gadget>
-        <name>FloatToShort</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>FloatToUShortGadget</classname>
-    </gadget>
-
-    <!--
-    <gadget>
-      <name>ImageFinishCPLX</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageFinishGadgetCPLX</classname>
-    </gadget>
-    -->
-
-    <!--
-    <gadget>
-      <name>ImageFinishFLOAT</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageFinishGadgetFLOAT</classname>
-    </gadget>
-    -->
-
-    <gadget>
-        <name>ImageFinishUSHORT</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>ImageFinishGadgetUSHORT</classname>
-    </gadget>
-
-</gadgetronStreamConfiguration>
diff --git a/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_LGE.xml b/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_LGE.xml
deleted file mode 100644
index 5009962..0000000
--- a/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_LGE.xml
+++ /dev/null
@@ -1,654 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
-        xmlns="http://gadgetron.sf.net/gadgetron"
-        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-
-    <!-- 
-        GT Plus configuratin file for 2DT cartesian reconstruction on cardiac LGE imaging
-
-        Author: Hui Xue
-        Magnetic Resonance Technology Program
-        National Heart, Lung and Blood Institute
-        National Institutes of Health
-        10 Center Drive, Bethesda
-        MD 20814
-        USA
-        Email: hui.xue at nih.gov
-    -->
-
-    <!-- reader -->
-    <reader>
-        <slot>1008</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
-    </reader>
-
-    <!-- writer -->
-    <writer>
-        <slot>1004</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterCPLX</classname>
-    </writer>
-    <writer>
-        <slot>1005</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterFLOAT</classname>
-    </writer>
-    <writer>
-        <slot>1006</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterUSHORT</classname>
-    </writer>
-
-    <!-- RO asymmetric echo handling -->
-    <gadget>
-        <name>PartialFourierAdjust</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>PartialFourierAdjustROGadget</classname>
-    </gadget>
-
-    <!-- Noise prewhitening -->
-    <gadget>
-        <name>NoiseAdjust</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>NoiseAdjustGadget</classname>
-    </gadget>
-
-    <!-- RO oversampling removal -->
-    <gadget>
-        <name>RemoveROOversampling</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>RemoveROOversamplingGadget</classname>
-    </gadget>
-
-    <!-- Data accumulation and trigger gadget -->
-    <gadget>
-        <name>Acc</name>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
-
-        <!-- debug and info mode -->
-        <property>
-            <name>verboseMode</name>
-            <value>false</value>
-        </property>
-
-        <!-- No acceleration mode -->
-        <property>
-            <name>noacceleration_triggerDim1</name>
-            <value>DIM_Repetition</value>
-        </property>
-
-        <property>
-            <name>noacceleration_triggerDim2</name>
-            <value>DIM_Slice</value>
-        </property>
-
-        <property>
-            <name>noacceleration_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- Interleaved mode -->
-        <property>
-            <name>interleaved_triggerDim1</name>
-            <value>DIM_Repetition</value>
-        </property>
-
-        <property>
-            <name>interleaved_triggerDim2</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>interleaved_numOfKSpace_triggerDim1</name>
-            <value>4</value>
-        </property>
-
-        <!-- Embedded mode -->
-        <property>
-            <name>embedded_triggerDim1</name>
-            <value>DIM_Repetition</value>
-        </property>
-
-        <property>
-            <name>embedded_triggerDim2</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>embedded_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- Separate mode -->
-        <property>
-            <name>separate_triggerDim1</name>
-            <value>DIM_Repetition</value>
-        </property>
-
-        <property>
-            <name>separate_triggerDim2</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>separate_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
-        <property>
-            <name>other_kspace_matching_Dim</name>
-            <value>DIM_Repetition</value>
-        </property>
-
-    </gadget>
-
-    <!-- Recon computation for 2DT cases -->
-    <gadget>
-        <name>Recon</name>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusRecon2DTGadget</classname>
-
-        <!-- kspace data -->
-        <property>
-            <name>dim_4th</name>
-            <value>DIM_Repetition</value>
-        </property>
-        <property>
-            <name>dim_5th</name>
-            <value>DIM_Slice</value>
-        </property>
-
-        <!-- work flow -->
-        <property>
-            <name>workOrder_ShareDim</name>
-            <value>DIM_Set</value>
-        </property>
-
-        <!-- No acceleration mode -->
-        <property>
-            <name>no_acceleration_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>no_acceleration_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>no_acceleration_same_combinationcoeff_allS</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>no_acceleration_whichS_combinationcoeff</name>
-            <value>1</value>
-        </property>
-
-        <!-- Interleaved mode -->
-        <property>
-            <name>interleaved_same_combinationcoeff_allS</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>interleaved_whichS_combinationcoeff</name>
-            <value>1</value>
-        </property>
-        <property>
-            <name>interleaved_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-
-        <!-- Embedded mode -->
-        <property>
-            <name>embedded_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>embedded_fullres_coilmap</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_fullres_coilmap_useHighestSignal</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>embedded_same_combinationcoeff_allS</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_whichS_combinationcoeff</name>
-            <value>1</value>
-        </property>
-        <property>
-            <name>embedded_ref_fillback</name>
-            <value>true</value>
-        </property>
-
-        <!-- Separate mode -->
-        <property>
-            <name>separate_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>separate_fullres_coilmap</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_same_combinationcoeff_allS</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>separate_whichS_combinationcoeff</name>
-            <value>1</value>
-        </property>
-
-        <!-- coil compression -->
-        <property>
-            <name>same_coil_compression_coeff_allS</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>downstream_coil_compression</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>coil_compression_thres</name>
-            <value>0.001</value>
-        </property>
-
-        <property>
-            <name>coil_compression_num_modesKept</name>
-            <value>8</value>
-        </property>
-
-        <!-- parameters for coil map estimation 
-            enum ISMRMRDCOILMAPALGO
-            {
-                ISMRMRD_SOUHEIL,
-                ISMRMRD_SOUHEIL_ITER
-            };
-        -->
-        <property>
-            <name>coil_map_algorithm</name>
-            <value>ISMRMRD_SOUHEIL</value>
-        </property>
-        <property>
-            <name>csm_kSize</name>
-            <value>7</value>
-        </property>
-
-        <property>
-            <name>csm_powermethod_num</name>
-            <value>3</value>
-        </property>
-
-        <property>
-            <name>csm_true_3D</name>
-            <value>false</value>
-        </property>
-
-        <property>
-            <name>csm_iter_num</name>
-            <value>5</value>
-        </property>
-
-        <property>
-            <name>csm_iter_thres</name>
-            <value>0.001</value>
-        </property>
-
-        <property>
-            <name>csm_use_gpu</name>
-            <value>true</value>
-        </property>
-
-        <!-- algorithm -->
-        <property>
-            <name>recon_algorithm</name>
-            <value>ISMRMRD_GRAPPA</value>
-        </property>
-
-        <property>
-            <name>recon_kspace_needed</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_GRAPPA -->
-        <property>
-            <name>grappa_kSize_RO</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>grappa_kSize_E1</name>
-            <value>4</value>
-        </property>
-        <property>
-            <name>grappa_kSize_E2</name>
-            <value>4</value>
-        </property>
-        <property>
-            <name>grappa_reg_lamda</name>
-            <value>0.0005</value>
-        </property>
-        <property>
-            <name>grappa_calib_over_determine_ratio</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>grappa_use_gpu</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_SPIRIT -->
-        <property>
-            <name>spirit_kSize_RO</name>
-            <value>7</value>
-        </property>
-        <property>
-            <name>spirit_kSize_E1</name>
-            <value>7</value>
-        </property>
-        <property>
-            <name>spirit_kSize_E2</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>spirit_reg_lamda</name>
-            <value>0.005</value>
-        </property>
-        <property>
-            <name>spirit_use_gpu</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_iter_max</name>
-            <value>70</value>
-        </property>
-        <property>
-            <name>spirit_iter_thres</name>
-            <value>1e-5</value>
-        </property>
-
-        <!-- parameters for scaling and image sending -->
-        <property>
-            <name>min_intensity_value</name>
-            <value>64</value>
-        </property>
-
-        <property>
-            <name>max_intensity_value</name>
-            <value>4095</value>
-        </property>
-
-        <property>
-            <name>scalingFactor</name>
-            <value>-1.0</value>
-        </property>
-
-        <property>
-            <name>use_constant_scalingFactor</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for kspace filter, image data -->
-        <property>
-            <name>filterRO</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterRO_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterRO_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterE1</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterE1_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterE1_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterE2</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterE2_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterE2_width</name>
-            <value>0.15</value>
-        </property>
-
-        <!-- parameters for kspace filter, ref data -->
-        <property>
-            <name>filterRefRO</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefRO_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefRO_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterRefE1</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefE1_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefE1_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterRefE2</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefE2_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefE2_width</name>
-            <value>0.15</value>
-        </property>
-
-        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
-        <property>
-            <name>filterPartialFourierRO</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>filterPartialFourierE1</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>filterPartialFourierE2</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_HOMODYNE, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
-        <property>
-            <name>partialFourier_algo</name>
-            <value>ISMRMRD_PF_FENGHUANG</value>
-        </property>
-
-        <!-- parameters for partial fourier homodyne algorithm -->
-        <property>
-            <name>partialFourier_homodyne_iters</name>
-            <value>6</value>
-        </property>
-        <property>
-            <name>partialFourier_homodyne_thres</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_homodyne_densityComp</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for partial fourier POCS algorithm -->
-        <property>
-            <name>partialFourier_POCS_iters</name>
-            <value>6</value>
-        </property>
-        <property>
-            <name>partialFourier_POCS_thres</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_POCS_transitBand</name>
-            <value>24</value>
-        </property>
-
-        <!-- parameters for partial fourier FengHuang algorithm -->
-        <property>
-            <name>partialFourier_FengHuang_kSize_RO</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_kSize_E1</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_kSize_E2</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_thresReg</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_sameKernel_allN</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_transitBand</name>
-            <value>24</value>
-        </property>
-
-        <!-- parameters for debug and timing -->
-        <property>
-            <name>debugFolder</name>
-            <value></value>
-        </property>
-
-        <property>
-            <name>debugFolder2</name>
-            <value></value>
-        </property>
-
-        <property>
-            <name>performTiming</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>verboseMode</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for system acquisition -->
-        <property>
-            <name>timeStampResolution</name>
-            <value>0.0025</value>
-        </property>
-
-    </gadget>
-
-    <!-- after recon processing -->
-    <gadget>
-        <name>FloatToShort</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>FloatToUShortGadget</classname>
-    </gadget>
-
-    <!--
-    <gadget>
-      <name>ImageFinishCPLX</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageFinishGadgetCPLX</classname>
-    </gadget>
-    -->
-
-    <!--
-    <gadget>
-      <name>ImageFinishFLOAT</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageFinishGadgetFLOAT</classname>
-    </gadget>
-    -->
-
-    <gadget>
-        <name>ImageFinishUSHORT</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>ImageFinishGadgetUSHORT</classname>
-    </gadget>
-
-</gadgetronStreamConfiguration>
diff --git a/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_MOLLI.xml b/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_MOLLI.xml
deleted file mode 100644
index 300705e..0000000
--- a/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_MOLLI.xml
+++ /dev/null
@@ -1,654 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
-        xmlns="http://gadgetron.sf.net/gadgetron"
-        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-
-    <!-- 
-        GT Plus configuratin file for 2DT cartesian reconstruction on cardiac MOLLI T1 mapping
-
-        Author: Hui Xue
-        Magnetic Resonance Technology Program
-        National Heart, Lung and Blood Institute
-        National Institutes of Health
-        10 Center Drive, Bethesda
-        MD 20814
-        USA
-        Email: hui.xue at nih.gov
-    -->
-
-    <!-- reader -->
-    <reader>
-        <slot>1008</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
-    </reader>
-
-    <!-- writer -->
-    <writer>
-        <slot>1004</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterCPLX</classname>
-    </writer>
-    <writer>
-        <slot>1005</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterFLOAT</classname>
-    </writer>
-    <writer>
-        <slot>1006</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterUSHORT</classname>
-    </writer>
-
-    <!-- RO asymmetric echo handling -->
-    <gadget>
-        <name>PartialFourierAdjust</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>PartialFourierAdjustROGadget</classname>
-    </gadget>
-
-    <!-- Noise prewhitening -->
-    <gadget>
-        <name>NoiseAdjust</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>NoiseAdjustGadget</classname>
-    </gadget>
-
-    <!-- RO oversampling removal -->
-    <gadget>
-        <name>RemoveROOversampling</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>RemoveROOversamplingGadget</classname>
-    </gadget>
-
-    <!-- Data accumulation and trigger gadget -->
-    <gadget>
-        <name>Acc</name>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
-
-        <!-- debug and info mode -->
-        <property>
-            <name>verboseMode</name>
-            <value>false</value>
-        </property>
-
-        <!-- No acceleration mode -->
-        <property>
-            <name>noacceleration_triggerDim1</name>
-            <value>DIM_Set</value>
-        </property>
-
-        <property>
-            <name>noacceleration_triggerDim2</name>
-            <value>DIM_Slice</value>
-        </property>
-
-        <property>
-            <name>noacceleration_numOfKSpace_triggerDim1</name>
-            <value>5</value>
-        </property>
-
-        <!-- Interleaved mode -->
-        <property>
-            <name>interleaved_triggerDim1</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>interleaved_triggerDim2</name>
-            <value>DIM_Slice</value>
-        </property>
-
-        <property>
-            <name>interleaved_numOfKSpace_triggerDim1</name>
-            <value>4</value>
-        </property>
-
-        <!-- Embedded mode -->
-        <property>
-            <name>embedded_triggerDim1</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>embedded_triggerDim2</name>
-            <value>DIM_Slice</value>
-        </property>
-
-        <property>
-            <name>embedded_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- Separate mode -->
-        <property>
-            <name>separate_triggerDim1</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>separate_triggerDim2</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>separate_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
-        <property>
-            <name>other_kspace_matching_Dim</name>
-            <value>DIM_Repetition</value>
-        </property>
-
-    </gadget>
-
-    <!-- Recon computation for 2DT cases -->
-    <gadget>
-        <name>Recon</name>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusRecon2DTGadget</classname>
-
-        <!-- kspace data -->
-        <property>
-            <name>dim_4th</name>
-            <value>DIM_Set</value>
-        </property>
-        <property>
-            <name>dim_5th</name>
-            <value>DIM_Slice</value>
-        </property>
-
-        <!-- work flow -->
-        <property>
-            <name>workOrder_ShareDim</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <!-- No acceleration mode -->
-        <property>
-            <name>no_acceleration_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>no_acceleration_ref_numOfModes</name>
-            <value>3</value>
-        </property>
-        <property>
-            <name>no_acceleration_same_combinationcoeff_allS</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>no_acceleration_whichS_combinationcoeff</name>
-            <value>0</value>
-        </property>
-
-        <!-- Interleaved mode -->
-        <property>
-            <name>interleaved_same_combinationcoeff_allS</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>interleaved_whichS_combinationcoeff</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>interleaved_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-
-        <!-- Embedded mode -->
-        <property>
-            <name>embedded_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_ref_numOfModes</name>
-            <value>3</value>
-        </property>
-        <property>
-            <name>embedded_fullres_coilmap</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_fullres_coilmap_useHighestSignal</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_same_combinationcoeff_allS</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>embedded_whichS_combinationcoeff</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>embedded_ref_fillback</name>
-            <value>true</value>
-        </property>
-
-        <!-- Separate mode -->
-        <property>
-            <name>separate_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_ref_numOfModes</name>
-            <value>3</value>
-        </property>
-        <property>
-            <name>separate_fullres_coilmap</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_same_combinationcoeff_allS</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>separate_whichS_combinationcoeff</name>
-            <value>0</value>
-        </property>
-
-        <!-- coil compression -->
-        <property>
-            <name>same_coil_compression_coeff_allS</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>downstream_coil_compression</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>coil_compression_thres</name>
-            <value>0.001</value>
-        </property>
-
-        <property>
-            <name>coil_compression_num_modesKept</name>
-            <value>8</value>
-        </property>
-
-        <!-- parameters for coil map estimation 
-            enum ISMRMRDCOILMAPALGO
-            {
-                ISMRMRD_SOUHEIL,
-                ISMRMRD_SOUHEIL_ITER
-            };
-        -->
-        <property>
-            <name>coil_map_algorithm</name>
-            <value>ISMRMRD_SOUHEIL</value>
-        </property>
-        <property>
-            <name>csm_kSize</name>
-            <value>7</value>
-        </property>
-
-        <property>
-            <name>csm_powermethod_num</name>
-            <value>3</value>
-        </property>
-
-        <property>
-            <name>csm_true_3D</name>
-            <value>false</value>
-        </property>
-
-        <property>
-            <name>csm_iter_num</name>
-            <value>5</value>
-        </property>
-
-        <property>
-            <name>csm_iter_thres</name>
-            <value>0.001</value>
-        </property>
-
-        <property>
-            <name>csm_use_gpu</name>
-            <value>true</value>
-        </property>
-
-        <!-- algorithm -->
-        <property>
-            <name>recon_algorithm</name>
-            <value>ISMRMRD_GRAPPA</value>
-        </property>
-
-        <property>
-            <name>recon_kspace_needed</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_GRAPPA -->
-        <property>
-            <name>grappa_kSize_RO</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>grappa_kSize_E1</name>
-            <value>4</value>
-        </property>
-        <property>
-            <name>grappa_kSize_E2</name>
-            <value>4</value>
-        </property>
-        <property>
-            <name>grappa_reg_lamda</name>
-            <value>0.0005</value>
-        </property>
-        <property>
-            <name>grappa_calib_over_determine_ratio</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>grappa_use_gpu</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_SPIRIT -->
-        <property>
-            <name>spirit_kSize_RO</name>
-            <value>7</value>
-        </property>
-        <property>
-            <name>spirit_kSize_E1</name>
-            <value>7</value>
-        </property>
-        <property>
-            <name>spirit_kSize_E2</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>spirit_reg_lamda</name>
-            <value>0.005</value>
-        </property>
-        <property>
-            <name>spirit_use_gpu</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_iter_max</name>
-            <value>70</value>
-        </property>
-        <property>
-            <name>spirit_iter_thres</name>
-            <value>1e-5</value>
-        </property>
-
-        <!-- parameters for scaling and image sending -->
-        <property>
-            <name>min_intensity_value</name>
-            <value>64</value>
-        </property>
-
-        <property>
-            <name>max_intensity_value</name>
-            <value>4095</value>
-        </property>
-
-        <property>
-            <name>scalingFactor</name>
-            <value>-1.0</value>
-        </property>
-
-        <property>
-            <name>use_constant_scalingFactor</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for kspace filter, image data -->
-        <property>
-            <name>filterRO</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterRO_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterRO_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterE1</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterE1_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterE1_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterE2</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterE2_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterE2_width</name>
-            <value>0.15</value>
-        </property>
-
-        <!-- parameters for kspace filter, ref data -->
-        <property>
-            <name>filterRefRO</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefRO_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefRO_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterRefE1</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefE1_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefE1_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterRefE2</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefE2_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefE2_width</name>
-            <value>0.15</value>
-        </property>
-
-        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
-        <property>
-            <name>filterPartialFourierRO</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>filterPartialFourierE1</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>filterPartialFourierE2</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_HOMODYNE, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
-        <property>
-            <name>partialFourier_algo</name>
-            <value>ISMRMRD_PF_FENGHUANG</value>
-        </property>
-
-        <!-- parameters for partial fourier homodyne algorithm -->
-        <property>
-            <name>partialFourier_homodyne_iters</name>
-            <value>6</value>
-        </property>
-        <property>
-            <name>partialFourier_homodyne_thres</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_homodyne_densityComp</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for partial fourier POCS algorithm -->
-        <property>
-            <name>partialFourier_POCS_iters</name>
-            <value>6</value>
-        </property>
-        <property>
-            <name>partialFourier_POCS_thres</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_POCS_transitBand</name>
-            <value>24</value>
-        </property>
-
-        <!-- parameters for partial fourier FengHuang algorithm -->
-        <property>
-            <name>partialFourier_FengHuang_kSize_RO</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_kSize_E1</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_kSize_E2</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_thresReg</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_sameKernel_allN</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_transitBand</name>
-            <value>24</value>
-        </property>
-
-        <!-- parameters for debug and timing -->
-        <property>
-            <name>debugFolder</name>
-            <value></value>
-        </property>
-
-        <property>
-            <name>debugFolder2</name>
-            <value></value>
-        </property>
-
-        <property>
-            <name>performTiming</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>verboseMode</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for system acquisition -->
-        <property>
-            <name>timeStampResolution</name>
-            <value>0.0025</value>
-        </property>
-
-    </gadget>
-
-    <!-- after recon processing -->
-    <gadget>
-        <name>FloatToShort</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>FloatToUShortGadget</classname>
-    </gadget>
-
-    <!--
-    <gadget>
-      <name>ImageFinishCPLX</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageFinishGadgetCPLX</classname>
-    </gadget>
-    -->
-
-    <!--
-    <gadget>
-      <name>ImageFinishFLOAT</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageFinishGadgetFLOAT</classname>
-    </gadget>
-    -->
-
-    <gadget>
-        <name>ImageFinishUSHORT</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>ImageFinishGadgetUSHORT</classname>
-    </gadget>
-
-</gadgetronStreamConfiguration>
diff --git a/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_Perfusion.xml b/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_Perfusion.xml
deleted file mode 100644
index 2606ef8..0000000
--- a/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_Perfusion.xml
+++ /dev/null
@@ -1,655 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
-        xmlns="http://gadgetron.sf.net/gadgetron"
-        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-
-    <!-- 
-        GT Plus configuratin file for 2DT cartesian reconstruction on cardiac perfusion mapping
-        The support for AIF acquisition is implemented.
-
-        Author: Hui Xue
-        Magnetic Resonance Technology Program
-        National Heart, Lung and Blood Institute
-        National Institutes of Health
-        10 Center Drive, Bethesda
-        MD 20814
-        USA
-        Email: hui.xue at nih.gov
-    -->
-
-    <!-- reader -->
-    <reader>
-        <slot>1008</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
-    </reader>
-
-    <!-- writer -->
-    <writer>
-        <slot>1004</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterCPLX</classname>
-    </writer>
-    <writer>
-        <slot>1005</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterFLOAT</classname>
-    </writer>
-    <writer>
-        <slot>1006</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterUSHORT</classname>
-    </writer>
-
-    <!-- RO asymmetric echo handling -->
-    <gadget>
-        <name>PartialFourierAdjust</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>PartialFourierAdjustROGadget</classname>
-    </gadget>
-
-    <!-- Noise prewhitening -->
-    <gadget>
-        <name>NoiseAdjust</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>NoiseAdjustGadget</classname>
-    </gadget>
-
-    <!-- RO oversampling removal -->
-    <gadget>
-        <name>RemoveROOversampling</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>RemoveROOversamplingGadget</classname>
-    </gadget>
-
-    <!-- Data accumulation and trigger gadget -->
-    <gadget>
-        <name>Acc</name>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
-
-        <!-- debug and info mode -->
-        <property>
-            <name>verboseMode</name>
-            <value>false</value>
-        </property>
-
-        <!-- No acceleration mode -->
-        <property>
-            <name>noacceleration_triggerDim1</name>
-            <value>DIM_Repetition</value>
-        </property>
-
-        <property>
-            <name>noacceleration_triggerDim2</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>noacceleration_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- Interleaved mode -->
-        <property>
-            <name>interleaved_triggerDim1</name>
-            <value>DIM_Repetition</value>
-        </property>
-
-        <property>
-            <name>interleaved_triggerDim2</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>interleaved_numOfKSpace_triggerDim1</name>
-            <value>4</value>
-        </property>
-
-        <!-- Embedded mode -->
-        <property>
-            <name>embedded_triggerDim1</name>
-            <value>DIM_Repetition</value>
-        </property>
-
-        <property>
-            <name>embedded_triggerDim2</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>embedded_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- Separate mode -->
-        <property>
-            <name>separate_triggerDim1</name>
-            <value>DIM_Repetition</value>
-        </property>
-
-        <property>
-            <name>separate_triggerDim2</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>separate_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
-        <property>
-            <name>other_kspace_matching_Dim</name>
-            <value>DIM_Repetition</value>
-        </property>
-
-    </gadget>
-
-    <!-- Recon computation for 2DT cases -->
-    <gadget>
-        <name>Recon</name>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusRecon2DTGadget</classname>
-
-        <!-- kspace data -->
-        <property>
-            <name>dim_4th</name>
-            <value>DIM_Repetition</value>
-        </property>
-        <property>
-            <name>dim_5th</name>
-            <value>DIM_Slice</value>
-        </property>
-
-        <!-- work flow -->
-        <property>
-            <name>workOrder_ShareDim</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <!-- No acceleration mode -->
-        <property>
-            <name>no_acceleration_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>no_acceleration_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>no_acceleration_same_combinationcoeff_allS</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>no_acceleration_whichS_combinationcoeff</name>
-            <value>1</value>
-        </property>
-
-        <!-- Interleaved mode -->
-        <property>
-            <name>interleaved_same_combinationcoeff_allS</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>interleaved_whichS_combinationcoeff</name>
-            <value>1</value>
-        </property>
-        <property>
-            <name>interleaved_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-
-        <!-- Embedded mode -->
-        <property>
-            <name>embedded_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>embedded_fullres_coilmap</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_fullres_coilmap_useHighestSignal</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>embedded_same_combinationcoeff_allS</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>embedded_whichS_combinationcoeff</name>
-            <value>1</value>
-        </property>
-        <property>
-            <name>embedded_ref_fillback</name>
-            <value>true</value>
-        </property>
-
-        <!-- Separate mode -->
-        <property>
-            <name>separate_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>separate_fullres_coilmap</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_same_combinationcoeff_allS</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>separate_whichS_combinationcoeff</name>
-            <value>1</value>
-        </property>
-
-        <!-- coil compression -->
-        <property>
-            <name>same_coil_compression_coeff_allS</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>downstream_coil_compression</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>coil_compression_thres</name>
-            <value>0.001</value>
-        </property>
-
-        <property>
-            <name>coil_compression_num_modesKept</name>
-            <value>8</value>
-        </property>
-
-        <!-- parameters for coil map estimation 
-            enum ISMRMRDCOILMAPALGO
-            {
-                ISMRMRD_SOUHEIL,
-                ISMRMRD_SOUHEIL_ITER
-            };
-        -->
-        <property>
-            <name>coil_map_algorithm</name>
-            <value>ISMRMRD_SOUHEIL</value>
-        </property>
-        <property>
-            <name>csm_kSize</name>
-            <value>7</value>
-        </property>
-
-        <property>
-            <name>csm_powermethod_num</name>
-            <value>3</value>
-        </property>
-
-        <property>
-            <name>csm_true_3D</name>
-            <value>false</value>
-        </property>
-
-        <property>
-            <name>csm_iter_num</name>
-            <value>5</value>
-        </property>
-
-        <property>
-            <name>csm_iter_thres</name>
-            <value>0.001</value>
-        </property>
-
-        <property>
-            <name>csm_use_gpu</name>
-            <value>true</value>
-        </property>
-
-        <!-- algorithm -->
-        <property>
-            <name>recon_algorithm</name>
-            <value>ISMRMRD_GRAPPA</value>
-        </property>
-
-        <property>
-            <name>recon_kspace_needed</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_GRAPPA -->
-        <property>
-            <name>grappa_kSize_RO</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>grappa_kSize_E1</name>
-            <value>4</value>
-        </property>
-        <property>
-            <name>grappa_kSize_E2</name>
-            <value>4</value>
-        </property>
-        <property>
-            <name>grappa_reg_lamda</name>
-            <value>0.0005</value>
-        </property>
-        <property>
-            <name>grappa_calib_over_determine_ratio</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>grappa_use_gpu</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_SPIRIT -->
-        <property>
-            <name>spirit_kSize_RO</name>
-            <value>7</value>
-        </property>
-        <property>
-            <name>spirit_kSize_E1</name>
-            <value>7</value>
-        </property>
-        <property>
-            <name>spirit_kSize_E2</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>spirit_reg_lamda</name>
-            <value>0.005</value>
-        </property>
-        <property>
-            <name>spirit_use_gpu</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_iter_max</name>
-            <value>70</value>
-        </property>
-        <property>
-            <name>spirit_iter_thres</name>
-            <value>1e-5</value>
-        </property>
-
-        <!-- parameters for scaling and image sending -->
-        <property>
-            <name>min_intensity_value</name>
-            <value>64</value>
-        </property>
-
-        <property>
-            <name>max_intensity_value</name>
-            <value>4095</value>
-        </property>
-
-        <property>
-            <name>scalingFactor</name>
-            <value>-1.0</value>
-        </property>
-
-        <property>
-            <name>use_constant_scalingFactor</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for kspace filter, image data -->
-        <property>
-            <name>filterRO</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterRO_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterRO_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterE1</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterE1_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterE1_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterE2</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterE2_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterE2_width</name>
-            <value>0.15</value>
-        </property>
-
-        <!-- parameters for kspace filter, ref data -->
-        <property>
-            <name>filterRefRO</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefRO_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefRO_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterRefE1</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefE1_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefE1_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterRefE2</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefE2_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefE2_width</name>
-            <value>0.15</value>
-        </property>
-
-        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
-        <property>
-            <name>filterPartialFourierRO</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>filterPartialFourierE1</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>filterPartialFourierE2</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_HOMODYNE, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
-        <property>
-            <name>partialFourier_algo</name>
-            <value>ISMRMRD_PF_FENGHUANG</value>
-        </property>
-
-        <!-- parameters for partial fourier homodyne algorithm -->
-        <property>
-            <name>partialFourier_homodyne_iters</name>
-            <value>6</value>
-        </property>
-        <property>
-            <name>partialFourier_homodyne_thres</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_homodyne_densityComp</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for partial fourier POCS algorithm -->
-        <property>
-            <name>partialFourier_POCS_iters</name>
-            <value>6</value>
-        </property>
-        <property>
-            <name>partialFourier_POCS_thres</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_POCS_transitBand</name>
-            <value>24</value>
-        </property>
-
-        <!-- parameters for partial fourier FengHuang algorithm -->
-        <property>
-            <name>partialFourier_FengHuang_kSize_RO</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_kSize_E1</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_kSize_E2</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_thresReg</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_sameKernel_allN</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_transitBand</name>
-            <value>24</value>
-        </property>
-
-        <!-- parameters for debug and timing -->
-        <property>
-            <name>debugFolder</name>
-            <value></value>
-        </property>
-
-        <property>
-            <name>debugFolder2</name>
-            <value></value>
-        </property>
-
-        <property>
-            <name>performTiming</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>verboseMode</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for system acquisition -->
-        <property>
-            <name>timeStampResolution</name>
-            <value>0.0025</value>
-        </property>
-
-    </gadget>
-
-    <!-- after recon processing -->
-    <gadget>
-        <name>FloatToShort</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>FloatToUShortGadget</classname>
-    </gadget>
-
-    <!--
-    <gadget>
-      <name>ImageFinishCPLX</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageFinishGadgetCPLX</classname>
-    </gadget>
-    -->
-
-    <!--
-    <gadget>
-      <name>ImageFinishFLOAT</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageFinishGadgetFLOAT</classname>
-    </gadget>
-    -->
-
-    <gadget>
-        <name>ImageFinishUSHORT</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>ImageFinishGadgetUSHORT</classname>
-    </gadget>
-
-</gadgetronStreamConfiguration>
diff --git a/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_RealTimeCine.xml b/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_RealTimeCine.xml
deleted file mode 100644
index 107775e..0000000
--- a/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_RealTimeCine.xml
+++ /dev/null
@@ -1,741 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
-        xmlns="http://gadgetron.sf.net/gadgetron"
-        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-
-    <!-- 
-        GT Plus configuratin file for 2DT cartesian reconstruction on real-time cine imaging
-        The GtPlus supports the on-the-fly reconstruction, therefore the reconstruction starts 
-        whenever sufficient data is received. The reconstructed images are sent out once the 
-        computation is finished.
-
-        Author: Hui Xue
-        Magnetic Resonance Technology Program
-        National Heart, Lung and Blood Institute
-        National Institutes of Health
-        10 Center Drive, Bethesda
-        MD 20814
-        USA
-        Email: hui.xue at nih.gov
-    -->
-
-    <!-- reader -->
-    <reader>
-        <slot>1008</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
-    </reader>
-
-    <!-- writer -->
-    <writer>
-        <slot>1004</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterCPLX</classname>
-    </writer>
-    <writer>
-        <slot>1005</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterFLOAT</classname>
-    </writer>
-    <writer>
-        <slot>1006</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterUSHORT</classname>
-    </writer>
-
-    <!-- RO asymmetric echo handling -->
-    <gadget>
-        <name>PartialFourierAdjust</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>PartialFourierAdjustROGadget</classname>
-    </gadget>
-
-    <!-- Noise prewhitening -->
-    <gadget>
-        <name>NoiseAdjust</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>NoiseAdjustGadget</classname>
-    </gadget>
-
-    <!-- RO oversampling removal -->
-    <gadget>
-        <name>RemoveROOversampling</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>RemoveROOversamplingGadget</classname>
-    </gadget>
-
-    <!-- Data accumulation and trigger gadget -->
-    <gadget>
-        <name>Acc</name>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
-
-        <!-- debug and info mode -->
-        <property>
-            <name>verboseMode</name>
-            <value>false</value>
-        </property>
-
-        <!-- No acceleration mode -->
-        <property>
-            <name>noacceleration_triggerDim1</name>
-            <value>DIM_Phase</value>
-        </property>
-
-        <property>
-            <name>noacceleration_triggerDim2</name>
-            <value>DIM_Slice</value>
-        </property>
-
-        <property>
-            <name>noacceleration_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- Interleaved mode -->
-        <property>
-            <name>interleaved_triggerDim1</name>
-            <value>DIM_Phase</value>
-        </property>
-
-        <property>
-            <name>interleaved_triggerDim2</name>
-            <value>DIM_Slice</value>
-        </property>
-
-        <property>
-            <name>interleaved_numOfKSpace_triggerDim1</name>
-            <value>8</value>
-        </property>
-
-        <!-- Embedded mode -->
-        <property>
-            <name>embedded_triggerDim1</name>
-            <value>DIM_Phase</value>
-        </property>
-
-        <property>
-            <name>embedded_triggerDim2</name>
-            <value>DIM_Slice</value>
-        </property>
-
-        <property>
-            <name>embedded_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- Separate mode -->
-        <property>
-            <name>separate_triggerDim1</name>
-            <value>DIM_Phase</value>
-        </property>
-
-        <property>
-            <name>separate_triggerDim2</name>
-            <value>DIM_Slice</value>
-        </property>
-
-        <property>
-            <name>separate_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
-        <property>
-            <name>other_kspace_matching_Dim</name>
-            <value>DIM_Repetition</value>
-        </property>
-
-    </gadget>
-
-    <!-- Recon computation for 2DT cases -->
-    <gadget>
-        <name>Recon</name>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusRecon2DTGadget</classname>
-
-        <!-- kspace data -->
-        <property>
-            <name>dim_4th</name>
-            <value>DIM_Phase</value>
-        </property>
-        <property>
-            <name>dim_5th</name>
-            <value>DIM_Slice</value>
-        </property>
-
-        <!-- work flow -->
-        <property>
-            <name>workOrder_ShareDim</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <!-- No acceleration mode -->
-        <property>
-            <name>no_acceleration_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>no_acceleration_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>no_acceleration_same_combinationcoeff_allS</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>no_acceleration_whichS_combinationcoeff</name>
-            <value>0</value>
-        </property>
-
-        <!-- Interleaved mode -->
-        <property>
-            <name>interleaved_same_combinationcoeff_allS</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>interleaved_whichS_combinationcoeff</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>interleaved_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-
-        <!-- Embedded mode -->
-        <property>
-            <name>embedded_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>embedded_fullres_coilmap</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>embedded_fullres_coilmap_useHighestSignal</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>embedded_same_combinationcoeff_allS</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>embedded_whichS_combinationcoeff</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>embedded_ref_fillback</name>
-            <value>true</value>
-        </property>
-
-        <!-- Separate mode -->
-        <property>
-            <name>separate_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>separate_fullres_coilmap</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>separate_same_combinationcoeff_allS</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>separate_whichS_combinationcoeff</name>
-            <value>0</value>
-        </property>
-
-        <!-- coil compression -->
-        <property>
-            <name>same_coil_compression_coeff_allS</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>downstream_coil_compression</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>coil_compression_thres</name>
-            <value>0.001</value>
-        </property>
-
-        <property>
-            <name>coil_compression_num_modesKept</name>
-            <value>8</value>
-        </property>
-
-        <!-- parameters for coil map estimation 
-            enum ISMRMRDCOILMAPALGO
-            {
-                ISMRMRD_SOUHEIL,
-                ISMRMRD_SOUHEIL_ITER
-            };
-        -->
-        <property>
-            <name>coil_map_algorithm</name>
-            <value>ISMRMRD_SOUHEIL</value>
-        </property>
-        <property>
-            <name>csm_kSize</name>
-            <value>7</value>
-        </property>
-
-        <property>
-            <name>csm_powermethod_num</name>
-            <value>3</value>
-        </property>
-
-        <property>
-            <name>csm_true_3D</name>
-            <value>false</value>
-        </property>
-
-        <property>
-            <name>csm_iter_num</name>
-            <value>5</value>
-        </property>
-
-        <property>
-            <name>csm_iter_thres</name>
-            <value>0.001</value>
-        </property>
-
-        <property>
-            <name>csm_use_gpu</name>
-            <value>true</value>
-        </property>
-
-        <!-- algorithm -->
-        <property>
-            <name>recon_algorithm</name>
-            <value>ISMRMRD_GRAPPA</value>
-        </property>
-
-        <property>
-            <name>recon_kspace_needed</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_GRAPPA -->
-        <property>
-            <name>grappa_kSize_RO</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>grappa_kSize_E1</name>
-            <value>4</value>
-        </property>
-        <property>
-            <name>grappa_kSize_E2</name>
-            <value>4</value>
-        </property>
-        <property>
-            <name>grappa_reg_lamda</name>
-            <value>0.0005</value>
-        </property>
-        <property>
-            <name>grappa_calib_over_determine_ratio</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>grappa_use_gpu</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_SPIRIT -->
-        <property>
-            <name>spirit_kSize_RO</name>
-            <value>7</value>
-        </property>
-        <property>
-            <name>spirit_kSize_E1</name>
-            <value>7</value>
-        </property>
-        <property>
-            <name>spirit_kSize_E2</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>spirit_reg_lamda</name>
-            <value>0.005</value>
-        </property>
-        <property>
-            <name>spirit_use_gpu</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_iter_max</name>
-            <value>100</value>
-        </property>
-        <property>
-            <name>spirit_iter_thres</name>
-            <value>1e-5</value>
-        </property>
-        <property>
-            <name>spirit_print_iter</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_L1SPIRIT -->
-        <property>
-            <name>spirit_perform_linear</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_perform_nonlinear</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_parallel_imaging_lamda</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_image_reg_lamda</name>
-            <value>0.001</value>
-        </property>
-        <property>
-            <name>spirit_data_fidelity_lamda</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>spirit_ncg_iter_max</name>
-            <value>10</value>
-        </property>
-        <property>
-            <name>spirit_ncg_iter_thres</name>
-            <value>0.001</value>
-        </property>
-        <property>
-            <name>spirit_ncg_print_iter</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_use_coil_sen_map</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_use_moco_enhancement</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>spirit_recon_moco_images</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>spirit_RO_enhancement_ratio</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_E1_enhancement_ratio</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_E2_enhancement_ratio</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_temporal_enhancement_ratio</name>
-            <value>5.0</value>
-        </property>
-
-        <!-- parameters for recon job split -->
-        <property>
-            <name>job_split_by_S</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>job_num_of_N</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>job_max_Megabytes</name>
-            <value>20480</value>
-        </property>
-        <property>
-            <name>job_overlap</name>
-            <value>2</value>
-        </property>
-
-        <!-- parameters for scaling and image sending -->
-        <property>
-            <name>min_intensity_value</name>
-            <value>64</value>
-        </property>
-
-        <property>
-            <name>max_intensity_value</name>
-            <value>4095</value>
-        </property>
-
-        <property>
-            <name>scalingFactor</name>
-            <value>-1.0</value>
-        </property>
-
-        <property>
-            <name>use_constant_scalingFactor</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for kspace filter, image data -->
-        <property>
-            <name>filterRO</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterRO_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterRO_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterE1</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterE1_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterE1_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterE2</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterE2_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterE2_width</name>
-            <value>0.15</value>
-        </property>
-
-        <!-- parameters for kspace filter, ref data -->
-        <property>
-            <name>filterRefRO</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefRO_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefRO_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterRefE1</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefE1_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefE1_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterRefE2</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefE2_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefE2_width</name>
-            <value>0.15</value>
-        </property>
-
-        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
-        <property>
-            <name>filterPartialFourierRO</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>filterPartialFourierE1</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>filterPartialFourierE2</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_HOMODYNE, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
-        <property>
-            <name>partialFourier_algo</name>
-            <value>ISMRMRD_PF_FENGHUANG</value>
-        </property>
-
-        <!-- parameters for partial fourier homodyne algorithm -->
-        <property>
-            <name>partialFourier_homodyne_iters</name>
-            <value>6</value>
-        </property>
-        <property>
-            <name>partialFourier_homodyne_thres</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_homodyne_densityComp</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for partial fourier POCS algorithm -->
-        <property>
-            <name>partialFourier_POCS_iters</name>
-            <value>6</value>
-        </property>
-        <property>
-            <name>partialFourier_POCS_thres</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_POCS_transitBand</name>
-            <value>24</value>
-        </property>
-
-        <!-- parameters for partial fourier FengHuang algorithm -->
-        <property>
-            <name>partialFourier_FengHuang_kSize_RO</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_kSize_E1</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_kSize_E2</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_thresReg</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_sameKernel_allN</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_transitBand</name>
-            <value>24</value>
-        </property>
-
-        <!-- parameters for debug and timing -->
-        <property>
-            <name>debugFolder</name>
-            <value></value>
-        </property>
-
-        <property>
-            <name>debugFolder2</name>
-            <value></value>
-        </property>
-
-        <property>
-            <name>performTiming</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>verboseMode</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for system acquisition -->
-        <property>
-            <name>timeStampResolution</name>
-            <value>0.0025</value>
-        </property>
-
-    </gadget>
-
-    <!-- after recon processing -->
-    <gadget>
-        <name>FloatToShort</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>FloatToUShortGadget</classname>
-    </gadget>
-
-    <!--
-    <gadget>
-      <name>ImageFinishCPLX</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageFinishGadgetCPLX</classname>
-    </gadget>
-    -->
-
-    <!--
-    <gadget>
-      <name>ImageFinishFLOAT</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageFinishGadgetFLOAT</classname>
-    </gadget>
-    -->
-
-    <gadget>
-        <name>ImageFinishUSHORT</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>ImageFinishGadgetUSHORT</classname>
-    </gadget>
-
-</gadgetronStreamConfiguration>
diff --git a/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_RealTimeFlow.xml b/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_RealTimeFlow.xml
deleted file mode 100644
index 684b9e7..0000000
--- a/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_RealTimeFlow.xml
+++ /dev/null
@@ -1,689 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
-        xmlns="http://gadgetron.sf.net/gadgetron"
-        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-
-    <!-- 
-        GT Plus configuratin file for 2DT cartesian reconstruction on real-time flow imaging
-        The GtPlus supports the on-the-fly reconstruction, therefore the reconstruction starts 
-        whenever sufficient data is received. The reconstructed images are sent out once the 
-        computation is finished.
-
-        Author: Hui Xue
-        Magnetic Resonance Technology Program
-        National Heart, Lung and Blood Institute
-        National Institutes of Health
-        10 Center Drive, Bethesda
-        MD 20814
-        USA
-        Email: hui.xue at nih.gov
-    -->
-
-    <!-- reader -->
-    <reader>
-        <slot>1008</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
-    </reader>
-
-    <!-- writer -->
-    <writer>
-        <slot>1004</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterCPLX</classname>
-    </writer>
-    <writer>
-        <slot>1005</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterFLOAT</classname>
-    </writer>
-    <writer>
-        <slot>1006</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterUSHORT</classname>
-    </writer>
-
-    <!-- RO asymmetric echo handling -->
-    <gadget>
-        <name>PartialFourierAdjust</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>PartialFourierAdjustROGadget</classname>
-    </gadget>
-
-    <!-- Noise prewhitening -->
-    <gadget>
-        <name>NoiseAdjust</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>NoiseAdjustGadget</classname>
-    </gadget>
-
-    <!-- RO oversampling removal -->
-    <gadget>
-        <name>RemoveROOversampling</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>RemoveROOversamplingGadget</classname>
-    </gadget>
-
-    <gadget>
-        <name>Acc</name>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
-
-        <!-- debug and info mode -->
-        <property>
-            <name>verboseMode</name>
-            <value>false</value>
-        </property>
-
-        <!-- No acceleration mode -->
-        <property>
-            <name>noacceleration_triggerDim1</name>
-            <value>DIM_Phase</value>
-        </property>
-
-        <property>
-            <name>noacceleration_triggerDim2</name>
-            <value>DIM_Slice</value>
-        </property>
-
-        <property>
-            <name>noacceleration_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- Interleaved mode -->
-        <property>
-            <name>interleaved_triggerDim1</name>
-            <value>DIM_Phase</value>
-        </property>
-
-        <property>
-            <name>interleaved_triggerDim2</name>
-            <value>DIM_Slice</value>
-        </property>
-
-        <property>
-            <name>interleaved_numOfKSpace_triggerDim1</name>
-            <value>4</value>
-        </property>
-
-        <!-- Embedded mode -->
-        <property>
-            <name>embedded_triggerDim1</name>
-            <value>DIM_Phase</value>
-        </property>
-
-        <property>
-            <name>embedded_triggerDim2</name>
-            <value>DIM_Slice</value>
-        </property>
-
-        <property>
-            <name>embedded_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- Separate mode -->
-        <property>
-            <name>separate_triggerDim1</name>
-            <value>DIM_Phase</value>
-        </property>
-
-        <property>
-            <name>separate_triggerDim2</name>
-            <value>DIM_Slice</value>
-        </property>
-
-        <property>
-            <name>separate_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
-        <property>
-            <name>other_kspace_matching_Dim</name>
-            <value>DIM_Repetition</value>
-        </property>
-
-    </gadget>
-
-    <!--
-    Recon computation for 2DT cases
-
-    kspace_: [RO E1 CHA N S], for 2D recon, N can be 1
-    ref_: [RO E1 CHA M S], M can equal to N or 1 or others
-    fullkspace_: [RO E1 CHA N S]
-    complexIm_: [RO E1 N S], after coil combination
-    coilMap_: [RO E1 CHA 1 or N S]
-    gfactor_: [RO E1 CHA 1 or N S]
-
-    the 4th and 5th dimensions (N and S) needs to be specified. For example,
-    for real-time cine, N = DIM_Phase and S=DIM_Slice
-
-    default behavior
-    a) the coil compression coefficients are computed once across all S
-    b) the kernel or coil sensitivity are estimated for every S
-
-    embedded mode
-    a) perform recon and estimate kernel/coil sensitivity for every 2D kspace [RO E1 CHA]
-    b) coil combination uses different coil maps for every S
-    c) if the kspace recon is performed, the coil combination map is reestimated on the fullkspace for every 2D images
-    d) the ref lines are filled back to fullkspace_
-
-    separate mode
-    a) perform recon and estimate kernel/coil sensitivity for every 2D kspace [RO E1 CHA] if M==N
-    b) if M==1, the kernel is only estimated once for every S
-    c) coil combination uses different coil maps for every S
-    d) if the kspace recon is performed, the coil combination map is reestimated on the fullkspace for every 2D images
-
-    interleave
-    a) the average-all ref is used
-    b) kernel/coil sensitivity is estimated once for every S
-    -->
-
-    <gadget>
-        <name>Recon</name>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusRecon2DTGadget</classname>
-
-        <!-- kspace data -->
-        <property>
-            <name>dim_4th</name>
-            <value>DIM_Phase</value>
-        </property>
-        <property>
-            <name>dim_5th</name>
-            <value>DIM_Set</value>
-        </property>
-
-        <!-- work flow -->
-        <property>
-            <name>workOrder_ShareDim</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <!-- No acceleration mode -->
-        <property>
-            <name>no_acceleration_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>no_acceleration_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>no_acceleration_same_combinationcoeff_allS</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>no_acceleration_whichS_combinationcoeff</name>
-            <value>0</value>
-        </property>
-
-        <!-- Interleaved mode -->
-        <property>
-            <name>interleaved_same_combinationcoeff_allS</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>interleaved_whichS_combinationcoeff</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>interleaved_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-
-        <!-- Embedded mode -->
-        <property>
-            <name>embedded_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>embedded_fullres_coilmap</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_fullres_coilmap_useHighestSignal</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>embedded_same_combinationcoeff_allS</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_whichS_combinationcoeff</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>embedded_ref_fillback</name>
-            <value>true</value>
-        </property>
-
-        <!-- Separate mode -->
-        <property>
-            <name>separate_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>separate_fullres_coilmap</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_same_combinationcoeff_allS</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_whichS_combinationcoeff</name>
-            <value>0</value>
-        </property>
-
-        <!-- coil compression -->
-        <property>
-            <name>same_coil_compression_coeff_allS</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>downstream_coil_compression</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>coil_compression_thres</name>
-            <value>0.001</value>
-        </property>
-
-        <property>
-            <name>coil_compression_num_modesKept</name>
-            <value>8</value>
-        </property>
-
-        <!-- parameters for coil map estimation 
-            enum ISMRMRDCOILMAPALGO
-            {
-                ISMRMRD_SOUHEIL,
-                ISMRMRD_SOUHEIL_ITER
-            };
-        -->
-        <property>
-            <name>coil_map_algorithm</name>
-            <value>ISMRMRD_SOUHEIL</value>
-        </property>
-        <property>
-            <name>csm_kSize</name>
-            <value>7</value>
-        </property>
-
-        <property>
-            <name>csm_powermethod_num</name>
-            <value>3</value>
-        </property>
-
-        <property>
-            <name>csm_true_3D</name>
-            <value>false</value>
-        </property>
-
-        <property>
-            <name>csm_iter_num</name>
-            <value>5</value>
-        </property>
-
-        <property>
-            <name>csm_iter_thres</name>
-            <value>0.001</value>
-        </property>
-
-        <property>
-            <name>csm_use_gpu</name>
-            <value>true</value>
-        </property>
-
-        <!-- algorithm -->
-        <property>
-            <name>recon_algorithm</name>
-            <value>ISMRMRD_GRAPPA</value>
-        </property>
-
-        <property>
-            <name>recon_kspace_needed</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_GRAPPA -->
-        <property>
-            <name>grappa_kSize_RO</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>grappa_kSize_E1</name>
-            <value>4</value>
-        </property>
-        <property>
-            <name>grappa_kSize_E2</name>
-            <value>4</value>
-        </property>
-        <property>
-            <name>grappa_reg_lamda</name>
-            <value>0.0005</value>
-        </property>
-        <property>
-            <name>grappa_calib_over_determine_ratio</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>grappa_use_gpu</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_SPIRIT -->
-        <property>
-            <name>spirit_kSize_RO</name>
-            <value>7</value>
-        </property>
-        <property>
-            <name>spirit_kSize_E1</name>
-            <value>7</value>
-        </property>
-        <property>
-            <name>spirit_kSize_E2</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>spirit_reg_lamda</name>
-            <value>0.005</value>
-        </property>
-        <property>
-            <name>spirit_use_gpu</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_iter_max</name>
-            <value>70</value>
-        </property>
-        <property>
-            <name>spirit_iter_thres</name>
-            <value>1e-5</value>
-        </property>
-
-        <!-- parameters for scaling and image sending -->
-        <property>
-            <name>min_intensity_value</name>
-            <value>64</value>
-        </property>
-
-        <property>
-            <name>max_intensity_value</name>
-            <value>4095</value>
-        </property>
-
-        <property>
-            <name>scalingFactor</name>
-            <value>-1.0</value>
-        </property>
-
-        <property>
-            <name>use_constant_scalingFactor</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for kspace filter, image data -->
-        <property>
-            <name>filterRO</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterRO_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterRO_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterE1</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterE1_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterE1_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterE2</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterE2_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterE2_width</name>
-            <value>0.15</value>
-        </property>
-
-        <!-- parameters for kspace filter, ref data -->
-        <property>
-            <name>filterRefRO</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefRO_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefRO_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterRefE1</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefE1_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefE1_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterRefE2</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefE2_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefE2_width</name>
-            <value>0.15</value>
-        </property>
-
-        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
-        <property>
-            <name>filterPartialFourierRO</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>filterPartialFourierE1</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>filterPartialFourierE2</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_HOMODYNE, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
-        <property>
-            <name>partialFourier_algo</name>
-            <value>ISMRMRD_PF_FENGHUANG</value>
-        </property>
-
-        <!-- parameters for partial fourier homodyne algorithm -->
-        <property>
-            <name>partialFourier_homodyne_iters</name>
-            <value>6</value>
-        </property>
-        <property>
-            <name>partialFourier_homodyne_thres</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_homodyne_densityComp</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for partial fourier POCS algorithm -->
-        <property>
-            <name>partialFourier_POCS_iters</name>
-            <value>6</value>
-        </property>
-        <property>
-            <name>partialFourier_POCS_thres</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_POCS_transitBand</name>
-            <value>24</value>
-        </property>
-
-        <!-- parameters for partial fourier FengHuang algorithm -->
-        <property>
-            <name>partialFourier_FengHuang_kSize_RO</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_kSize_E1</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_kSize_E2</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_thresReg</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_sameKernel_allN</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_transitBand</name>
-            <value>24</value>
-        </property>
-
-        <!-- parameters for debug and timing -->
-        <property>
-            <name>debugFolder</name>
-            <value></value>
-        </property>
-
-        <property>
-            <name>debugFolder2</name>
-            <value></value>
-        </property>
-
-        <property>
-            <name>performTiming</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>verboseMode</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for system acquisition -->
-        <property>
-            <name>timeStampResolution</name>
-            <value>0.0025</value>
-        </property>
-
-    </gadget>
-
-    <!-- after recon processing -->
-    <gadget>
-        <name>FloatToShort</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>FloatToUShortGadget</classname>
-    </gadget>
-
-    <!--
-    <gadget>
-      <name>ImageFinishCPLX</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageFinishGadgetCPLX</classname>
-    </gadget>
-    -->
-
-    <!--
-    <gadget>
-      <name>ImageFinishFLOAT</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageFinishGadgetFLOAT</classname>
-    </gadget>
-    -->
-
-    <gadget>
-        <name>ImageFinishUSHORT</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>ImageFinishGadgetUSHORT</classname>
-    </gadget>
-
-</gadgetronStreamConfiguration>
diff --git a/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_T2W.xml b/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_T2W.xml
deleted file mode 100644
index 2c30202..0000000
--- a/toolboxes/gtplus/config/GadgetronProgram_gtPlus_2DT_T2W.xml
+++ /dev/null
@@ -1,654 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
-        xmlns="http://gadgetron.sf.net/gadgetron"
-        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-
-    <!-- 
-        GT Plus configuratin file for 2DT cartesian reconstruction on T2 weigthed cardiac imaging
-
-        Author: Hui Xue
-        Magnetic Resonance Technology Program
-        National Heart, Lung and Blood Institute
-        National Institutes of Health
-        10 Center Drive, Bethesda
-        MD 20814
-        USA
-        Email: hui.xue at nih.gov
-    -->
-
-    <!-- reader -->
-    <reader>
-        <slot>1008</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
-    </reader>
-
-    <!-- writer -->
-    <writer>
-        <slot>1004</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterCPLX</classname>
-    </writer>
-    <writer>
-        <slot>1005</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterFLOAT</classname>
-    </writer>
-    <writer>
-        <slot>1006</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterUSHORT</classname>
-    </writer>
-
-    <!-- RO asymmetric echo handling -->
-    <gadget>
-        <name>PartialFourierAdjust</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>PartialFourierAdjustROGadget</classname>
-    </gadget>
-
-    <!-- Noise prewhitening -->
-    <gadget>
-        <name>NoiseAdjust</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>NoiseAdjustGadget</classname>
-    </gadget>
-
-    <!-- RO oversampling removal -->
-    <gadget>
-        <name>RemoveROOversampling</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>RemoveROOversamplingGadget</classname>
-    </gadget>
-
-    <!-- Data accumulation and trigger gadget -->
-    <gadget>
-        <name>Acc</name>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
-
-        <!-- debug and info mode -->
-        <property>
-            <name>verboseMode</name>
-            <value>true</value>
-        </property>
-
-        <!-- No acceleration mode -->
-        <property>
-            <name>noacceleration_triggerDim1</name>
-            <value>DIM_Repetition</value>
-        </property>
-
-        <property>
-            <name>noacceleration_triggerDim2</name>
-            <value>DIM_Slice</value>
-        </property>
-
-        <property>
-            <name>noacceleration_numOfKSpace_triggerDim1</name>
-            <value>5</value>
-        </property>
-
-        <!-- Interleaved mode -->
-        <property>
-            <name>interleaved_triggerDim1</name>
-            <value>DIM_Repetition</value>
-        </property>
-
-        <property>
-            <name>interleaved_triggerDim2</name>
-            <value>DIM_Slice</value>
-        </property>
-
-        <property>
-            <name>interleaved_numOfKSpace_triggerDim1</name>
-            <value>4</value>
-        </property>
-
-        <!-- Embedded mode -->
-        <property>
-            <name>embedded_triggerDim1</name>
-            <value>DIM_Repetition</value>
-        </property>
-
-        <property>
-            <name>embedded_triggerDim2</name>
-            <value>DIM_Slice</value>
-        </property>
-
-        <property>
-            <name>embedded_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- Separate mode -->
-        <property>
-            <name>separate_triggerDim1</name>
-            <value>DIM_Repetition</value>
-        </property>
-
-        <property>
-            <name>separate_triggerDim2</name>
-            <value>DIM_Slice</value>
-        </property>
-
-        <property>
-            <name>separate_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
-        <property>
-            <name>other_kspace_matching_Dim</name>
-            <value>DIM_Repetition</value>
-        </property>
-
-    </gadget>
-
-    <!-- Recon computation for 2DT cases -->
-    <gadget>
-        <name>Recon</name>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusRecon2DTGadget</classname>
-
-        <!-- kspace data -->
-        <property>
-            <name>dim_4th</name>
-            <value>DIM_Repetition</value>
-        </property>
-        <property>
-            <name>dim_5th</name>
-            <value>DIM_Set</value>
-        </property>
-
-        <!-- work flow -->
-        <property>
-            <name>workOrder_ShareDim</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <!-- No acceleration mode -->
-        <property>
-            <name>no_acceleration_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>no_acceleration_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>no_acceleration_same_combinationcoeff_allS</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>no_acceleration_whichS_combinationcoeff</name>
-            <value>0</value>
-        </property>
-
-        <!-- Interleaved mode -->
-        <property>
-            <name>interleaved_same_combinationcoeff_allS</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>interleaved_whichS_combinationcoeff</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>interleaved_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-
-        <!-- Embedded mode -->
-        <property>
-            <name>embedded_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>embedded_fullres_coilmap</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_fullres_coilmap_useHighestSignal</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>embedded_same_combinationcoeff_allS</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>embedded_whichS_combinationcoeff</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>embedded_ref_fillback</name>
-            <value>true</value>
-        </property>
-
-        <!-- Separate mode -->
-        <property>
-            <name>separate_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_ref_numOfModes</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>separate_fullres_coilmap</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_same_combinationcoeff_allS</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>separate_whichS_combinationcoeff</name>
-            <value>0</value>
-        </property>
-
-        <!-- coil compression -->
-        <property>
-            <name>same_coil_compression_coeff_allS</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>downstream_coil_compression</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>coil_compression_thres</name>
-            <value>0.001</value>
-        </property>
-
-        <property>
-            <name>coil_compression_num_modesKept</name>
-            <value>8</value>
-        </property>
-
-        <!-- parameters for coil map estimation 
-            enum ISMRMRDCOILMAPALGO
-            {
-                ISMRMRD_SOUHEIL,
-                ISMRMRD_SOUHEIL_ITER
-            };
-        -->
-        <property>
-            <name>coil_map_algorithm</name>
-            <value>ISMRMRD_SOUHEIL</value>
-        </property>
-        <property>
-            <name>csm_kSize</name>
-            <value>7</value>
-        </property>
-
-        <property>
-            <name>csm_powermethod_num</name>
-            <value>3</value>
-        </property>
-
-        <property>
-            <name>csm_true_3D</name>
-            <value>false</value>
-        </property>
-
-        <property>
-            <name>csm_iter_num</name>
-            <value>5</value>
-        </property>
-
-        <property>
-            <name>csm_iter_thres</name>
-            <value>0.001</value>
-        </property>
-
-        <property>
-            <name>csm_use_gpu</name>
-            <value>true</value>
-        </property>
-
-        <!-- algorithm -->
-        <property>
-            <name>recon_algorithm</name>
-            <value>ISMRMRD_GRAPPA</value>
-        </property>
-
-        <property>
-            <name>recon_kspace_needed</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_GRAPPA -->
-        <property>
-            <name>grappa_kSize_RO</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>grappa_kSize_E1</name>
-            <value>4</value>
-        </property>
-        <property>
-            <name>grappa_kSize_E2</name>
-            <value>4</value>
-        </property>
-        <property>
-            <name>grappa_reg_lamda</name>
-            <value>0.0005</value>
-        </property>
-        <property>
-            <name>grappa_calib_over_determine_ratio</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>grappa_use_gpu</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_SPIRIT -->
-        <property>
-            <name>spirit_kSize_RO</name>
-            <value>7</value>
-        </property>
-        <property>
-            <name>spirit_kSize_E1</name>
-            <value>7</value>
-        </property>
-        <property>
-            <name>spirit_kSize_E2</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>spirit_reg_lamda</name>
-            <value>0.005</value>
-        </property>
-        <property>
-            <name>spirit_use_gpu</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_iter_max</name>
-            <value>70</value>
-        </property>
-        <property>
-            <name>spirit_iter_thres</name>
-            <value>1e-5</value>
-        </property>
-
-        <!-- parameters for scaling and image sending -->
-        <property>
-            <name>min_intensity_value</name>
-            <value>64</value>
-        </property>
-
-        <property>
-            <name>max_intensity_value</name>
-            <value>4095</value>
-        </property>
-
-        <property>
-            <name>scalingFactor</name>
-            <value>-1.0</value>
-        </property>
-
-        <property>
-            <name>use_constant_scalingFactor</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for kspace filter, image data -->
-        <property>
-            <name>filterRO</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterRO_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterRO_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterE1</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterE1_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterE1_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterE2</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterE2_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterE2_width</name>
-            <value>0.15</value>
-        </property>
-
-        <!-- parameters for kspace filter, ref data -->
-        <property>
-            <name>filterRefRO</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefRO_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefRO_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterRefE1</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefE1_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefE1_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterRefE2</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefE2_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefE2_width</name>
-            <value>0.15</value>
-        </property>
-
-        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
-        <property>
-            <name>filterPartialFourierRO</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>filterPartialFourierE1</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>filterPartialFourierE2</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_HOMODYNE, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
-        <property>
-            <name>partialFourier_algo</name>
-            <value>ISMRMRD_PF_FENGHUANG</value>
-        </property>
-
-        <!-- parameters for partial fourier homodyne algorithm -->
-        <property>
-            <name>partialFourier_homodyne_iters</name>
-            <value>6</value>
-        </property>
-        <property>
-            <name>partialFourier_homodyne_thres</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_homodyne_densityComp</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for partial fourier POCS algorithm -->
-        <property>
-            <name>partialFourier_POCS_iters</name>
-            <value>6</value>
-        </property>
-        <property>
-            <name>partialFourier_POCS_thres</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_POCS_transitBand</name>
-            <value>24</value>
-        </property>
-
-        <!-- parameters for partial fourier FengHuang algorithm -->
-        <property>
-            <name>partialFourier_FengHuang_kSize_RO</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_kSize_E1</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_kSize_E2</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_thresReg</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_sameKernel_allN</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_transitBand</name>
-            <value>24</value>
-        </property>
-
-        <!-- parameters for debug and timing -->
-        <property>
-            <name>debugFolder</name>
-            <value></value>
-        </property>
-
-        <property>
-            <name>debugFolder2</name>
-            <value></value>
-        </property>
-
-        <property>
-            <name>performTiming</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>verboseMode</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for system acquisition -->
-        <property>
-            <name>timeStampResolution</name>
-            <value>0.0025</value>
-        </property>
-
-    </gadget>
-
-    <!-- after recon processing -->
-    <gadget>
-        <name>FloatToShort</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>FloatToUShortGadget</classname>
-    </gadget>
-
-    <!--
-    <gadget>
-      <name>ImageFinishCPLX</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageFinishGadgetCPLX</classname>
-    </gadget>
-    -->
-
-    <!--
-    <gadget>
-      <name>ImageFinishFLOAT</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageFinishGadgetFLOAT</classname>
-    </gadget>
-    -->
-
-    <gadget>
-        <name>ImageFinishUSHORT</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>ImageFinishGadgetUSHORT</classname>
-    </gadget>
-
-</gadgetronStreamConfiguration>
diff --git a/toolboxes/gtplus/config/GadgetronProgram_gtPlus_3DT_Cartesian.xml b/toolboxes/gtplus/config/GadgetronProgram_gtPlus_3DT_Cartesian.xml
deleted file mode 100644
index 7fbab73..0000000
--- a/toolboxes/gtplus/config/GadgetronProgram_gtPlus_3DT_Cartesian.xml
+++ /dev/null
@@ -1,787 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
-        xmlns="http://gadgetron.sf.net/gadgetron"
-        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-
-    <!-- 
-        GT Plus configuratin file for general 3D or 3D+T cartesian reconstruction
-
-        Author: Hui Xue
-        Magnetic Resonance Technology Program
-        National Heart, Lung and Blood Institute
-        National Institutes of Health
-        10 Center Drive, Bethesda
-        MD 20814
-        USA
-        Email: hui.xue at nih.gov
-    -->
-
-    <!-- reader -->
-    <reader>
-        <slot>1008</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
-    </reader>
-
-    <!-- writer -->
-    <writer>
-        <slot>1004</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterCPLX</classname>
-    </writer>
-    <writer>
-        <slot>1005</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterFLOAT</classname>
-    </writer>
-    <writer>
-        <slot>1006</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterUSHORT</classname>
-    </writer>
-
-    <!-- RO asymmetric echo handling -->
-    <gadget>
-        <name>PartialFourierAdjust</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>PartialFourierAdjustROGadget</classname>
-    </gadget>
-
-    <!-- Noise prewhitening -->
-    <gadget>
-        <name>NoiseAdjust</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>NoiseAdjustGadget</classname>
-    </gadget>
-
-    <!-- RO oversampling removal -->
-    <gadget>
-        <name>RemoveROOversampling</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>RemoveROOversamplingGadget</classname>
-    </gadget>
-
-    <!-- Data accumulation and trigger gadget -->
-    <gadget>
-        <name>Acc</name>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
-
-        <!-- debug and info mode -->
-        <property>
-            <name>verboseMode</name>
-            <value>false</value>
-        </property>
-
-        <!-- No acceleration mode -->
-        <property>
-            <name>noacceleration_triggerDim1</name>
-            <value>DIM_Encoding2</value>
-        </property>
-
-        <property>
-            <name>noacceleration_triggerDim2</name>
-            <value>DIM_Slice</value>
-        </property>
-
-        <property>
-            <name>noacceleration_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- Interleaved mode -->
-        <property>
-            <name>interleaved_triggerDim1</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>interleaved_triggerDim2</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>interleaved_numOfKSpace_triggerDim1</name>
-            <value>4</value>
-        </property>
-
-        <!-- Embedded mode -->
-        <property>
-            <name>embedded_triggerDim1</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>embedded_triggerDim2</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>embedded_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- Separate mode -->
-        <property>
-            <name>separate_triggerDim1</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>separate_triggerDim2</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>separate_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
-        <property>
-            <name>other_kspace_matching_Dim</name>
-            <value>DIM_Repetition</value>
-        </property>
-
-    </gadget>
-
-    <!-- Recon computation for 3DT cases -->
-    <gadget>
-        <name>Recon</name>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusRecon3DTGadget</classname>
-
-        <!-- kspace data -->
-        <property>
-            <name>dim_5th</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <!-- work flow -->
-        <property>
-            <name>workOrder_ShareDim</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <!-- No acceleration mode -->
-        <property>
-            <name>no_acceleration_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>no_acceleration_same_combinationcoeff_allN</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>no_acceleration_whichN_combinationcoeff</name>
-            <value>0</value>
-        </property>
-
-        <!-- Interleaved mode -->
-        <property>
-            <name>interleaved_same_combinationcoeff_allN</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>interleaved_whichN_combinationcoeff</name>
-            <value>0</value>
-        </property>
-
-        <!-- Embedded mode -->
-        <property>
-            <name>embedded_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_fullres_coilmap</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_same_combinationcoeff_allN</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_whichN_combinationcoeff</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>embedded_ref_fillback</name>
-            <value>true</value>
-        </property>
-
-        <!-- Separate mode -->
-        <property>
-            <name>separate_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_fullres_coilmap</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_same_combinationcoeff_allN</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_whichN_combinationcoeff</name>
-            <value>0</value>
-        </property>
-
-        <!-- coil compression -->
-        <property>
-            <name>same_coil_compression_coeff_allN</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>upstream_coil_compression</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>upstream_coil_compression_thres</name>
-            <value>0.01</value>
-        </property>
-
-        <property>
-            <name>upstream_coil_compression_num_modesKept</name>
-            <value>-1</value>
-        </property>
-
-        <property>
-            <name>downstream_coil_compression</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>coil_compression_thres</name>
-            <value>0.001</value>
-        </property>
-
-        <property>
-            <name>coil_compression_num_modesKept</name>
-            <value>8</value>
-        </property>
-
-        <!-- parameters for coil map estimation 
-            enum ISMRMRDCOILMAPALGO
-            {
-                ISMRMRD_SOUHEIL,
-                ISMRMRD_SOUHEIL_ITER
-            };
-        -->
-        <property>
-            <name>coil_map_algorithm</name>
-            <value>ISMRMRD_SOUHEIL_ITER</value>
-        </property>
-        <property>
-            <name>csm_kSize</name>
-            <value>7</value>
-        </property>
-
-        <property>
-            <name>csm_powermethod_num</name>
-            <value>3</value>
-        </property>
-
-        <property>
-            <name>csm_true_3D</name>
-            <value>false</value>
-        </property>
-
-        <property>
-            <name>csm_iter_num</name>
-            <value>3</value>
-        </property>
-
-        <property>
-            <name>csm_iter_thres</name>
-            <value>0.001</value>
-        </property>
-
-        <property>
-            <name>csm_use_gpu</name>
-            <value>true</value>
-        </property>
-
-        <!-- algorithm -->
-        <property>
-            <name>recon_algorithm</name>
-            <value>ISMRMRD_GRAPPA</value>
-        </property>
-
-        <property>
-            <name>recon_kspace_needed</name>
-            <value>false</value>
-        </property>
-
-        <property>
-            <name>recon_auto_parameters</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_GRAPPA -->
-        <property>
-            <name>grappa_kSize_RO</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>grappa_kSize_E1</name>
-            <value>4</value>
-        </property>
-        <property>
-            <name>grappa_kSize_E2</name>
-            <value>4</value>
-        </property>
-        <property>
-            <name>grappa_reg_lamda</name>
-            <value>0.0005</value>
-        </property>
-        <property>
-            <name>grappa_calib_over_determine_ratio</name>
-            <value>45</value>
-        </property>
-        <property>
-            <name>grappa_use_gpu</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_SPIRIT -->
-        <property>
-            <name>spirit_kSize_RO</name>
-            <value>7</value>
-        </property>
-        <property>
-            <name>spirit_kSize_E1</name>
-            <value>7</value>
-        </property>
-        <property>
-            <name>spirit_kSize_E2</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>spirit_reg_lamda</name>
-            <value>0.005</value>
-        </property>
-        <property>
-            <name>spirit_use_gpu</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_calib_over_determine_ratio</name>
-            <value>15</value>
-        </property>
-        <property>
-            <name>spirit_solve_symmetric</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>spirit_iter_max</name>
-            <value>70</value>
-        </property>
-        <property>
-            <name>spirit_iter_thres</name>
-            <value>0.001</value>
-        </property>
-        <property>
-            <name>spirit_print_iter</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_L1SPIRIT -->
-        <property>
-            <name>spirit_perform_linear</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_perform_nonlinear</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>spirit_parallel_imaging_lamda</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_image_reg_lamda</name>
-            <value>0.001</value>
-        </property>
-        <property>
-            <name>spirit_data_fidelity_lamda</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>spirit_ncg_iter_max</name>
-            <value>10</value>
-        </property>
-        <property>
-            <name>spirit_ncg_iter_thres</name>
-            <value>0.001</value>
-        </property>
-        <property>
-            <name>spirit_ncg_print_iter</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_use_coil_sen_map</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_use_moco_enhancement</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>spirit_recon_moco_images</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>spirit_temporal_enhancement_ratio</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_2D_scale_per_chunk</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>spirit_RO_enhancement_ratio</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_E1_enhancement_ratio</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_E2_enhancement_ratio</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_3D_scale_per_chunk</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for scaling and image sending -->
-        <property>
-            <name>min_intensity_value</name>
-            <value>64</value>
-        </property>
-
-        <property>
-            <name>max_intensity_value</name>
-            <value>4095</value>
-        </property>
-
-        <property>
-            <name>scalingFactor</name>
-            <value>-1.0</value>
-        </property>
-
-        <property>
-            <name>use_constant_scalingFactor</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for kspace filter, image data -->
-        <property>
-            <name>filterRO</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterRO_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterRO_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterE1</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterE1_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterE1_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterE2</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterE2_sigma</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>filterE2_width</name>
-            <value>0.15</value>
-        </property>
-
-        <!-- parameters for kspace filter, ref data -->
-        <property>
-            <name>filterRefRO</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefRO_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefRO_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterRefE1</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefE1_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefE1_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterRefE2</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefE2_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefE2_width</name>
-            <value>0.15</value>
-        </property>
-
-        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
-        <property>
-            <name>filterPartialFourierRO</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>filterPartialFourierE1</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>filterPartialFourierE2</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
-        <property>
-            <name>partialFourier_algo</name>
-            <value>ISMRMRD_PF_POCS</value>
-        </property>
-
-        <!-- parameters for partial fourier POCS algorithm -->
-        <property>
-            <name>partialFourier_POCS_iters</name>
-            <value>6</value>
-        </property>
-        <property>
-            <name>partialFourier_POCS_thres</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_POCS_transitBand</name>
-            <value>24</value>
-        </property>
-        <property>
-            <name>partialFourier_POCS_transitBand_E2</name>
-            <value>24</value>
-        </property>
-
-        <!-- parameters for partial fourier FengHuang algorithm -->
-        <property>
-            <name>partialFourier_FengHuang_kSize_RO</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_kSize_E1</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_kSize_E2</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_thresReg</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_sameKernel_allN</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_transitBand</name>
-            <value>24</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_transitBand_E2</name>
-            <value>24</value>
-        </property>
-
-        <!-- parameters for debug and timing -->
-        <property>
-            <name>debugFolder</name>
-            <value></value>
-        </property>
-
-        <property>
-            <name>debugFolder2</name>
-            <value></value>
-        </property>
-
-        <property>
-            <name>cloudNodeFile</name>
-            <value>myCloud_3DT.txt</value>
-        </property>
-
-        <property>
-            <name>performTiming</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>verboseMode</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for system acquisition -->
-        <property>
-            <name>timeStampResolution</name>
-            <value>0.0025</value>
-        </property>
-
-        <!-- parameters for recon job split -->
-        <property>
-            <name>job_split_by_S</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>job_num_of_N</name>
-            <value>32</value>
-        </property>
-        <property>
-            <name>job_max_Megabytes</name>
-            <value>13000</value>
-        </property>
-        <property>
-            <name>job_overlap</name>
-            <value>2</value>
-        </property>
-        <property>
-            <name>job_perform_on_control_node</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for the cloud computation 
-             The cloud should be defined as the following: CloudNodeX_IP/Port/XMLConfiguration etc.
-        -->
-        <property>
-            <name>CloudComputing</name>
-            <value>false</value>
-        </property>
-
-        <property>
-            <name>CloudSize</name>
-            <value>1</value>
-        </property>
-
-        <!-- node 0 -->
-        <property>
-            <name>CloudNode0_IP</name>
-            <value>localhost</value>
-        </property>
-
-        <property>
-            <name>CloudNode0_Port</name>
-            <value>9003</value>
-        </property>
-
-        <property>
-            <name>CloudNode0_XMLConfiguration</name>
-            <value>GadgetronProgram_gtPlus_3DT_Cartesian_CloudNode.xml</value>
-        </property>
-
-        <property>
-            <name>CloudNode0_ComputingPowerIndex</name>
-            <value>1</value>
-        </property>
-
-    </gadget>
-
-    <!-- after recon processing -->
-    <gadget>
-        <name>FloatToShort</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>FloatToUShortGadget</classname>
-    </gadget>
-
-    <!--
-    <gadget>
-      <name>ImageFinishCPLX</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageFinishGadgetCPLX</classname>
-    </gadget>
-    -->
-
-    <!--
-    <gadget>
-      <name>ImageFinishFLOAT</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageFinishGadgetFLOAT</classname>
-    </gadget>
-    -->
-
-    <gadget>
-        <name>ImageFinishUSHORT</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>ImageFinishGadgetUSHORT</classname>
-    </gadget>
-
-</gadgetronStreamConfiguration>
diff --git a/toolboxes/gtplus/config/GadgetronProgram_gtPlus_3DT_Cartesian_CloudNode.xml b/toolboxes/gtplus/config/GadgetronProgram_gtPlus_3DT_Cartesian_CloudNode.xml
deleted file mode 100644
index 8c4b2b0..0000000
--- a/toolboxes/gtplus/config/GadgetronProgram_gtPlus_3DT_Cartesian_CloudNode.xml
+++ /dev/null
@@ -1,72 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
-        xmlns="http://gadgetron.sf.net/gadgetron"
-        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-
-    <!-- 
-        GT Plus configuratin file for 3D cartesian reconstruction on GtPlus Cloud
-        This configuration file configures one gadget to perform the reconstruction for
-        3DT job packages
-
-        Depending on the incoming algorithm parameters, both linear and non-linear reconstruction
-        can be performed
-
-        Author: Hui Xue
-        Magnetic Resonance Technology Program
-        National Heart, Lung and Blood Institute
-        National Institutes of Health
-        10 Center Drive, Bethesda
-        MD 20814
-        USA
-        Email: hui.xue at nih.gov
-    -->
-
-    <!-- reader -->
-    <reader>
-        <slot>1013</slot>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusCloudJobMessageReaderCPFL</classname>
-    </reader>
-
-    <!-- writer -->
-    <writer>
-        <slot>1013</slot>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusCloudJobMessageWriterCPFL</classname>
-    </writer>
-
-    <!--
-    Recon computation for 2DT/3DT cases, process one job
-    a gtPlusReconJob2DT job consists of kspace, kernel and parameters
-    kspace: [RO E1 CHA E2/PHS]
-    -->
-
-    <gadget>
-        <name>Recon</name>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusReconJob3DTGadget</classname>
-
-        <!-- parameters for debug and timing -->
-        <property>
-            <name>debugFolder</name>
-            <value></value>
-        </property>
-
-        <property>
-            <name>debugFolder2</name>
-            <value></value>
-        </property>
-
-        <property>
-            <name>performTiming</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>verboseMode</name>
-            <value>false</value>
-        </property>
-
-    </gadget>
-
-</gadgetronStreamConfiguration>
diff --git a/toolboxes/gtplus/config/GadgetronProgram_gtPlus_3DT_Cartesian_L1SPIRIT.xml b/toolboxes/gtplus/config/GadgetronProgram_gtPlus_3DT_Cartesian_L1SPIRIT.xml
deleted file mode 100644
index 2f21ed7..0000000
--- a/toolboxes/gtplus/config/GadgetronProgram_gtPlus_3DT_Cartesian_L1SPIRIT.xml
+++ /dev/null
@@ -1,816 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
-        xmlns="http://gadgetron.sf.net/gadgetron"
-        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-
-    <!-- 
-        GT Plus configuratin file for general 3D or 3D+T cartesian reconstruction using L1 SPIRIT
-        The GtPlus cloud computing can be turned on in this configuration file
-        The single-layer cloud topology is used here.
-
-        Author: Hui Xue
-        Magnetic Resonance Technology Program
-        National Heart, Lung and Blood Institute
-        National Institutes of Health
-        10 Center Drive, Bethesda
-        MD 20814
-        USA
-        Email: hui.xue at nih.gov
-
-        Ref to: 
-
-        Hui Xue, Souheil Inati, Thomas Sangild Sorensen, Peter Kellman, Michael S. Hansen. 
-        Distributed MRI Reconstruction using Gadgetron based Cloud Computing. Submitted to
-        Magenetic Resonance in Medicine on Dec 2013.
-    -->
-
-    <!-- reader -->
-    <reader>
-        <slot>1008</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
-    </reader>
-
-    <!-- writer -->
-    <writer>
-        <slot>1004</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterCPLX</classname>
-    </writer>
-    <writer>
-        <slot>1005</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterFLOAT</classname>
-    </writer>
-    <writer>
-        <slot>1006</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterUSHORT</classname>
-    </writer>
-
-    <!-- RO asymmetric echo handling -->
-    <gadget>
-        <name>PartialFourierAdjust</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>PartialFourierAdjustROGadget</classname>
-    </gadget>
-
-    <!-- Noise prewhitening -->
-    <gadget>
-        <name>NoiseAdjust</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>NoiseAdjustGadget</classname>
-    </gadget>
-
-    <!-- RO oversampling removal -->
-    <gadget>
-        <name>RemoveROOversampling</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>RemoveROOversamplingGadget</classname>
-    </gadget>
-
-    <!-- Data accumulation and trigger gadget -->
-    <gadget>
-        <name>Acc</name>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
-
-        <!-- debug and info mode -->
-        <property>
-            <name>verboseMode</name>
-            <value>false</value>
-        </property>
-
-        <!-- No acceleration mode -->
-        <property>
-            <name>noacceleration_triggerDim1</name>
-            <value>DIM_Encoding2</value>
-        </property>
-
-        <property>
-            <name>noacceleration_triggerDim2</name>
-            <value>DIM_Slice</value>
-        </property>
-
-        <property>
-            <name>noacceleration_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- Interleaved mode -->
-        <property>
-            <name>interleaved_triggerDim1</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>interleaved_triggerDim2</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>interleaved_numOfKSpace_triggerDim1</name>
-            <value>8</value>
-        </property>
-
-        <!-- Embedded mode -->
-        <property>
-            <name>embedded_triggerDim1</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>embedded_triggerDim2</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>embedded_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- Separate mode -->
-        <property>
-            <name>separate_triggerDim1</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>separate_triggerDim2</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>separate_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
-        <property>
-            <name>other_kspace_matching_Dim</name>
-            <value>DIM_Repetition</value>
-        </property>
-
-    </gadget>
-
-    <!-- Recon computation for 3DT cases -->
-    <gadget>
-        <name>Recon</name>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusRecon3DTGadget</classname>
-
-        <!-- kspace data -->
-        <property>
-            <name>dim_5th</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <!-- work flow -->
-        <property>
-            <name>workOrder_ShareDim</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <!-- No acceleration mode -->
-        <property>
-            <name>no_acceleration_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>no_acceleration_same_combinationcoeff_allN</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>no_acceleration_whichN_combinationcoeff</name>
-            <value>0</value>
-        </property>
-
-        <!-- Interleaved mode -->
-        <property>
-            <name>interleaved_same_combinationcoeff_allN</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>interleaved_whichN_combinationcoeff</name>
-            <value>0</value>
-        </property>
-
-        <!-- Embedded mode -->
-        <property>
-            <name>embedded_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_fullres_coilmap</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_same_combinationcoeff_allN</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_whichN_combinationcoeff</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>embedded_ref_fillback</name>
-            <value>true</value>
-        </property>
-
-        <!-- Separate mode -->
-        <property>
-            <name>separate_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_fullres_coilmap</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_same_combinationcoeff_allN</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_whichN_combinationcoeff</name>
-            <value>0</value>
-        </property>
-
-        <!-- coil compression -->
-        <property>
-            <name>same_coil_compression_coeff_allN</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>upstream_coil_compression</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>upstream_coil_compression_thres</name>
-            <value>0.01</value>
-        </property>
-
-        <property>
-            <name>upstream_coil_compression_num_modesKept</name>
-            <value>-1</value>
-        </property>
-
-        <property>
-            <name>downstream_coil_compression</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>coil_compression_thres</name>
-            <value>0.001</value>
-        </property>
-
-        <property>
-            <name>coil_compression_num_modesKept</name>
-            <value>-1</value>
-        </property>
-
-        <!-- parameters for coil map estimation 
-            enum ISMRMRDCOILMAPALGO
-            {
-                ISMRMRD_SOUHEIL,
-                ISMRMRD_SOUHEIL_ITER
-            };
-        -->
-        <property>
-            <name>coil_map_algorithm</name>
-            <value>ISMRMRD_SOUHEIL</value>
-        </property>
-        <property>
-            <name>csm_kSize</name>
-            <value>7</value>
-        </property>
-
-        <property>
-            <name>csm_powermethod_num</name>
-            <value>3</value>
-        </property>
-
-        <property>
-            <name>csm_true_3D</name>
-            <value>false</value>
-        </property>
-
-        <property>
-            <name>csm_iter_num</name>
-            <value>3</value>
-        </property>
-
-        <property>
-            <name>csm_iter_thres</name>
-            <value>0.001</value>
-        </property>
-
-        <property>
-            <name>csm_use_gpu</name>
-            <value>true</value>
-        </property>
-
-        <!-- algorithm -->
-        <property>
-            <name>recon_algorithm</name>
-            <value>ISMRMRD_L1SPIRIT</value>
-        </property>
-
-        <property>
-            <name>recon_kspace_needed</name>
-            <value>false</value>
-        </property>
-
-        <property>
-            <name>recon_auto_parameters</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_GRAPPA -->
-        <property>
-            <name>grappa_kSize_RO</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>grappa_kSize_E1</name>
-            <value>4</value>
-        </property>
-        <property>
-            <name>grappa_kSize_E2</name>
-            <value>4</value>
-        </property>
-        <property>
-            <name>grappa_reg_lamda</name>
-            <value>0.0005</value>
-        </property>
-        <property>
-            <name>grappa_calib_over_determine_ratio</name>
-            <value>45</value>
-        </property>
-        <property>
-            <name>grappa_use_gpu</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_SPIRIT -->
-        <property>
-            <name>spirit_kSize_RO</name>
-            <value>7</value>
-        </property>
-        <property>
-            <name>spirit_kSize_E1</name>
-            <value>7</value>
-        </property>
-        <property>
-            <name>spirit_kSize_E2</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>spirit_reg_lamda</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>spirit_use_gpu</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_calib_over_determine_ratio</name>
-            <value>15</value>
-        </property>
-        <property>
-            <name>spirit_solve_symmetric</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>spirit_iter_max</name>
-            <value>100</value>
-        </property>
-        <property>
-            <name>spirit_iter_thres</name>
-            <value>0.0025</value>
-        </property>
-        <property>
-            <name>spirit_print_iter</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_L1SPIRIT -->
-        <property>
-            <name>spirit_perform_linear</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_perform_nonlinear</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_parallel_imaging_lamda</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_image_reg_lamda</name>
-            <value>0.002</value>
-        </property>
-        <property>
-            <name>spirit_data_fidelity_lamda</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>spirit_ncg_iter_max</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>spirit_ncg_iter_thres</name>
-            <value>0.001</value>
-        </property>
-        <property>
-            <name>spirit_ncg_print_iter</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_use_coil_sen_map</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_use_moco_enhancement</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>spirit_recon_moco_images</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>spirit_temporal_enhancement_ratio</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_2D_scale_per_chunk</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>spirit_RO_enhancement_ratio</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_E1_enhancement_ratio</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_E2_enhancement_ratio</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_3D_scale_per_chunk</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for scaling and image sending -->
-        <property>
-            <name>min_intensity_value</name>
-            <value>64</value>
-        </property>
-
-        <property>
-            <name>max_intensity_value</name>
-            <value>4095</value>
-        </property>
-
-        <property>
-            <name>scalingFactor</name>
-            <value>-1.0</value>
-        </property>
-
-        <property>
-            <name>use_constant_scalingFactor</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for kspace filter, image data -->
-        <property>
-            <name>filterRO</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterRO_sigma</name>
-            <value>0.5</value>
-        </property>
-        <property>
-            <name>filterRO_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterE1</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterE1_sigma</name>
-            <value>0.5</value>
-        </property>
-        <property>
-            <name>filterE1_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterE2</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterE2_sigma</name>
-            <value>0.5</value>
-        </property>
-        <property>
-            <name>filterE2_width</name>
-            <value>0.15</value>
-        </property>
-
-        <!-- parameters for kspace filter, ref data -->
-        <property>
-            <name>filterRefRO</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefRO_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefRO_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterRefE1</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefE1_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefE1_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterRefE2</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefE2_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefE2_width</name>
-            <value>0.15</value>
-        </property>
-
-        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
-        <property>
-            <name>filterPartialFourierRO</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>filterPartialFourierE1</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>filterPartialFourierE2</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
-        <property>
-            <name>partialFourier_algo</name>
-            <value>ISMRMRD_PF_POCS</value>
-        </property>
-
-        <!-- parameters for partial fourier POCS algorithm -->
-        <property>
-            <name>partialFourier_POCS_iters</name>
-            <value>6</value>
-        </property>
-        <property>
-            <name>partialFourier_POCS_thres</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_POCS_transitBand</name>
-            <value>24</value>
-        </property>
-        <property>
-            <name>partialFourier_POCS_transitBand_E2</name>
-            <value>24</value>
-        </property>
-
-        <!-- parameters for partial fourier FengHuang algorithm -->
-        <property>
-            <name>partialFourier_FengHuang_kSize_RO</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_kSize_E1</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_kSize_E2</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_thresReg</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_sameKernel_allN</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_transitBand</name>
-            <value>24</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_transitBand_E2</name>
-            <value>24</value>
-        </property>
-
-        <!-- parameters for debug and timing -->
-        <property>
-            <name>debugFolder</name>
-            <value></value>
-        </property>
-
-        <property>
-            <name>debugFolder2</name>
-            <value></value>
-        </property>
-
-        <property>
-            <name>cloudNodeFile</name>
-            <value>myCloud_3DT.txt</value>
-        </property>
-
-        <property>
-            <name>performTiming</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>verboseMode</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for system acquisition -->
-        <property>
-            <name>timeStampResolution</name>
-            <value>0.0025</value>
-        </property>
-
-        <!-- parameters for recon job split -->
-        <property>
-            <name>job_split_by_S</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>job_num_of_N</name>
-            <value>32</value>
-        </property>
-        <property>
-            <name>job_max_Megabytes</name>
-            <value>2499</value>
-        </property>
-        <property>
-            <name>job_overlap</name>
-            <value>1</value>
-        </property>
-        <property>
-            <name>job_perform_on_control_node</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for the cloud computation 
-             The cloud should be defined as the following: CloudNodeX_IP/Port/XMLConfiguration etc.
-        -->
-        <property>
-            <name>CloudComputing</name>
-            <value>false</value>
-        </property>
-
-        <property>
-            <name>CloudSize</name>
-            <value>2</value>
-        </property>
-
-        <!-- node 0 -->
-        <property>
-            <name>CloudNode0_IP</name>
-            <value>localhost</value>
-        </property>
-
-        <property>
-            <name>CloudNode0_Port</name>
-            <value>9003</value>
-        </property>
-
-        <property>
-            <name>CloudNode0_XMLConfiguration</name>
-            <value>GadgetronProgram_gtPlus_3DT_Cartesian_CloudNode.xml</value>
-        </property>
-
-        <property>
-            <name>CloudNode0_ComputingPowerIndex</name>
-            <value>1</value>
-        </property>
-
-        <!-- node 1 -->
-        <property>
-            <name>CloudNode1_IP</name>
-            <value>localhost</value>
-        </property>
-
-        <property>
-            <name>CloudNode1_Port</name>
-            <value>9004</value>
-        </property>
-
-        <property>
-            <name>CloudNode1_XMLConfiguration</name>
-            <value>GadgetronProgram_gtPlus_3DT_Cartesian_CloudNode.xml</value>
-        </property>
-
-        <property>
-            <name>CloudNode1_ComputingPowerIndex</name>
-            <value>1</value>
-        </property>
-
-    </gadget>
-
-    <!-- after recon processing -->
-    <gadget>
-        <name>FloatToShort</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>FloatToUShortGadget</classname>
-    </gadget>
-
-    <!--
-    <gadget>
-      <name>ImageFinishCPLX</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageFinishGadgetCPLX</classname>
-    </gadget>
-    -->
-
-    <!--
-    <gadget>
-      <name>ImageFinishFLOAT</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageFinishGadgetFLOAT</classname>
-    </gadget>
-    -->
-
-    <gadget>
-        <name>ImageFinishUSHORT</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>ImageFinishGadgetUSHORT</classname>
-    </gadget>
-
-</gadgetronStreamConfiguration>
diff --git a/toolboxes/gtplus/config/GadgetronProgram_gtPlus_3DT_Cartesian_SPIRIT.xml b/toolboxes/gtplus/config/GadgetronProgram_gtPlus_3DT_Cartesian_SPIRIT.xml
deleted file mode 100644
index b1c8bbe..0000000
--- a/toolboxes/gtplus/config/GadgetronProgram_gtPlus_3DT_Cartesian_SPIRIT.xml
+++ /dev/null
@@ -1,795 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<gadgetronStreamConfiguration xsi:schemaLocation="http://gadgetron.sf.net/gadgetron gadgetron.xsd"
-        xmlns="http://gadgetron.sf.net/gadgetron"
-        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-
-    <!-- 
-        GT Plus configuratin file for general 3D or 3D+T cartesian reconstruction using linear SPIRIT
-        The GtPlus cloud computing can be turned on in this configuration file
-        The single-layer cloud topology is used here.
-
-        Author: Hui Xue
-        Magnetic Resonance Technology Program
-        National Heart, Lung and Blood Institute
-        National Institutes of Health
-        10 Center Drive, Bethesda
-        MD 20814
-        USA
-        Email: hui.xue at nih.gov
-
-        Ref to: 
-
-        Hui Xue, Souheil Inati, Thomas Sangild Sorensen, Peter Kellman, Michael S. Hansen. 
-        Distributed MRI Reconstruction using Gadgetron based Cloud Computing. Submitted to
-        Magenetic Resonance in Medicine on Dec 2013.
-    -->
-
-    <!-- reader -->
-    <reader>
-        <slot>1008</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>GadgetIsmrmrdAcquisitionMessageReader</classname>
-    </reader>
-
-    <!-- writer -->
-    <writer>
-        <slot>1004</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterCPLX</classname>
-    </writer>
-    <writer>
-        <slot>1005</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterFLOAT</classname>
-    </writer>
-    <writer>
-        <slot>1006</slot>
-        <dll>gadgetron_mricore</dll>
-        <classname>MRIImageWriterUSHORT</classname>
-    </writer>
-
-    <!-- RO asymmetric echo handling -->
-    <gadget>
-        <name>PartialFourierAdjust</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>PartialFourierAdjustROGadget</classname>
-    </gadget>
-
-    <!-- Noise prewhitening -->
-    <gadget>
-        <name>NoiseAdjust</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>NoiseAdjustGadget</classname>
-    </gadget>
-
-    <!-- RO oversampling removal -->
-    <gadget>
-        <name>RemoveROOversampling</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>RemoveROOversamplingGadget</classname>
-    </gadget>
-
-    <!-- Data accumulation and trigger gadget -->
-    <gadget>
-        <name>Acc</name>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusAccumulatorWorkOrderTriggerGadget</classname>
-
-        <!-- debug and info mode -->
-        <property>
-            <name>verboseMode</name>
-            <value>false</value>
-        </property>
-
-        <!-- No acceleration mode -->
-        <property>
-            <name>noacceleration_triggerDim1</name>
-            <value>DIM_Encoding2</value>
-        </property>
-
-        <property>
-            <name>noacceleration_triggerDim2</name>
-            <value>DIM_Slice</value>
-        </property>
-
-        <property>
-            <name>noacceleration_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- Interleaved mode -->
-        <property>
-            <name>interleaved_triggerDim1</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>interleaved_triggerDim2</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>interleaved_numOfKSpace_triggerDim1</name>
-            <value>8</value>
-        </property>
-
-        <!-- Embedded mode -->
-        <property>
-            <name>embedded_triggerDim1</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>embedded_triggerDim2</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>embedded_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- Separate mode -->
-        <property>
-            <name>separate_triggerDim1</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>separate_triggerDim2</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <property>
-            <name>separate_numOfKSpace_triggerDim1</name>
-            <value>1</value>
-        </property>
-
-        <!-- if 'other' kspace data presents, enforce its dimension matches the image data-->
-        <property>
-            <name>other_kspace_matching_Dim</name>
-            <value>DIM_Repetition</value>
-        </property>
-
-    </gadget>
-
-    <!-- Recon computation for 3DT cases -->
-    <gadget>
-        <name>Recon</name>
-        <dll>gadgetronPlus</dll>
-        <classname>GtPlusRecon3DTGadget</classname>
-
-        <!-- kspace data -->
-        <property>
-            <name>dim_5th</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <!-- work flow -->
-        <property>
-            <name>workOrder_ShareDim</name>
-            <value>DIM_NONE</value>
-        </property>
-
-        <!-- No acceleration mode -->
-        <property>
-            <name>no_acceleration_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>no_acceleration_same_combinationcoeff_allN</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>no_acceleration_whichN_combinationcoeff</name>
-            <value>0</value>
-        </property>
-
-        <!-- Interleaved mode -->
-        <property>
-            <name>interleaved_same_combinationcoeff_allN</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>interleaved_whichN_combinationcoeff</name>
-            <value>0</value>
-        </property>
-
-        <!-- Embedded mode -->
-        <property>
-            <name>embedded_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_fullres_coilmap</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_same_combinationcoeff_allN</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>embedded_whichN_combinationcoeff</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>embedded_ref_fillback</name>
-            <value>true</value>
-        </property>
-
-        <!-- Separate mode -->
-        <property>
-            <name>separate_averageall_ref</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_fullres_coilmap</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_same_combinationcoeff_allN</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>separate_whichN_combinationcoeff</name>
-            <value>0</value>
-        </property>
-
-        <!-- coil compression -->
-        <property>
-            <name>same_coil_compression_coeff_allN</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>upstream_coil_compression</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>upstream_coil_compression_thres</name>
-            <value>0.005</value>
-        </property>
-
-        <property>
-            <name>upstream_coil_compression_num_modesKept</name>
-            <value>-1</value>
-        </property>
-
-        <property>
-            <name>downstream_coil_compression</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>coil_compression_thres</name>
-            <value>0.001</value>
-        </property>
-
-        <property>
-            <name>coil_compression_num_modesKept</name>
-            <value>-1</value>
-        </property>
-
-        <!-- parameters for coil map estimation 
-            enum ISMRMRDCOILMAPALGO
-            {
-                ISMRMRD_SOUHEIL,
-                ISMRMRD_SOUHEIL_ITER
-            };
-        -->
-        <property>
-            <name>coil_map_algorithm</name>
-            <value>ISMRMRD_SOUHEIL</value>
-        </property>
-        <property>
-            <name>csm_kSize</name>
-            <value>7</value>
-        </property>
-
-        <property>
-            <name>csm_powermethod_num</name>
-            <value>3</value>
-        </property>
-
-        <property>
-            <name>csm_true_3D</name>
-            <value>false</value>
-        </property>
-
-        <property>
-            <name>csm_iter_num</name>
-            <value>3</value>
-        </property>
-
-        <property>
-            <name>csm_iter_thres</name>
-            <value>0.001</value>
-        </property>
-
-        <property>
-            <name>csm_use_gpu</name>
-            <value>true</value>
-        </property>
-
-        <!-- algorithm -->
-        <property>
-            <name>recon_algorithm</name>
-            <value>ISMRMRD_SPIRIT</value>
-        </property>
-
-        <property>
-            <name>recon_kspace_needed</name>
-            <value>false</value>
-        </property>
-
-        <property>
-            <name>recon_auto_parameters</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_GRAPPA -->
-        <property>
-            <name>grappa_kSize_RO</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>grappa_kSize_E1</name>
-            <value>4</value>
-        </property>
-        <property>
-            <name>grappa_kSize_E2</name>
-            <value>4</value>
-        </property>
-        <property>
-            <name>grappa_reg_lamda</name>
-            <value>0.0005</value>
-        </property>
-        <property>
-            <name>grappa_calib_over_determine_ratio</name>
-            <value>45</value>
-        </property>
-        <property>
-            <name>grappa_use_gpu</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_SPIRIT -->
-        <property>
-            <name>spirit_kSize_RO</name>
-            <value>7</value>
-        </property>
-        <property>
-            <name>spirit_kSize_E1</name>
-            <value>7</value>
-        </property>
-        <property>
-            <name>spirit_kSize_E2</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>spirit_reg_lamda</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>spirit_use_gpu</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_calib_over_determine_ratio</name>
-            <value>15</value>
-        </property>
-        <property>
-            <name>spirit_solve_symmetric</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>spirit_iter_max</name>
-            <value>100</value>
-        </property>
-        <property>
-            <name>spirit_iter_thres</name>
-            <value>0.0025</value>
-        </property>
-        <property>
-            <name>spirit_print_iter</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for ISMRMRD_L1SPIRIT -->
-        <property>
-            <name>spirit_perform_linear</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_perform_nonlinear</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>spirit_parallel_imaging_lamda</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_image_reg_lamda</name>
-            <value>0.001</value>
-        </property>
-        <property>
-            <name>spirit_data_fidelity_lamda</name>
-            <value>0</value>
-        </property>
-        <property>
-            <name>spirit_ncg_iter_max</name>
-            <value>10</value>
-        </property>
-        <property>
-            <name>spirit_ncg_iter_thres</name>
-            <value>0.001</value>
-        </property>
-        <property>
-            <name>spirit_ncg_print_iter</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_use_coil_sen_map</name>
-            <value>true</value>
-        </property>
-        <property>
-            <name>spirit_use_moco_enhancement</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>spirit_recon_moco_images</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>spirit_temporal_enhancement_ratio</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_2D_scale_per_chunk</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>spirit_RO_enhancement_ratio</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_E1_enhancement_ratio</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_E2_enhancement_ratio</name>
-            <value>1.0</value>
-        </property>
-        <property>
-            <name>spirit_3D_scale_per_chunk</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for scaling and image sending -->
-        <property>
-            <name>min_intensity_value</name>
-            <value>64</value>
-        </property>
-
-        <property>
-            <name>max_intensity_value</name>
-            <value>4095</value>
-        </property>
-
-        <property>
-            <name>scalingFactor</name>
-            <value>-1.0</value>
-        </property>
-
-        <property>
-            <name>use_constant_scalingFactor</name>
-            <value>false</value>
-        </property>
-
-        <!-- parameters for kspace filter, image data -->
-        <property>
-            <name>filterRO</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterRO_sigma</name>
-            <value>0.5</value>
-        </property>
-        <property>
-            <name>filterRO_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterE1</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterE1_sigma</name>
-            <value>0.5</value>
-        </property>
-        <property>
-            <name>filterE1_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterE2</name>
-            <value>ISMRMRD_FILTER_GAUSSIAN</value>
-        </property>
-        <property>
-            <name>filterE2_sigma</name>
-            <value>0.5</value>
-        </property>
-        <property>
-            <name>filterE2_width</name>
-            <value>0.15</value>
-        </property>
-
-        <!-- parameters for kspace filter, ref data -->
-        <property>
-            <name>filterRefRO</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefRO_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefRO_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterRefE1</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefE1_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefE1_width</name>
-            <value>0.15</value>
-        </property>
-
-        <property>
-            <name>filterRefE2</name>
-            <value>ISMRMRD_FILTER_HANNING</value>
-        </property>
-        <property>
-            <name>filterRefE2_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterRefE2_width</name>
-            <value>0.15</value>
-        </property>
-
-        <!-- parameters for kspace filter, partial fourier/asymmetric echo filter -->
-        <property>
-            <name>filterPartialFourierRO</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierRO_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>filterPartialFourierE1</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE1_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>filterPartialFourierE2</name>
-            <value>ISMRMRD_FILTER_TAPERED_HANNING</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_sigma</name>
-            <value>1.5</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_width</name>
-            <value>0.15</value>
-        </property>
-        <property>
-            <name>filterPartialFourierE2_densityComp</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for partial fourier handling algorithm, ISMRMRD_PF_POCS, ISMRMRD_PF_FENGHUANG, ISMRMRD_PF_ZEROFILLING_FILTER, ISMRMRD_PF_ZEROFILLING -->
-        <property>
-            <name>partialFourier_algo</name>
-            <value>ISMRMRD_PF_POCS</value>
-        </property>
-
-        <!-- parameters for partial fourier POCS algorithm -->
-        <property>
-            <name>partialFourier_POCS_iters</name>
-            <value>6</value>
-        </property>
-        <property>
-            <name>partialFourier_POCS_thres</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_POCS_transitBand</name>
-            <value>24</value>
-        </property>
-        <property>
-            <name>partialFourier_POCS_transitBand_E2</name>
-            <value>24</value>
-        </property>
-
-        <!-- parameters for partial fourier FengHuang algorithm -->
-        <property>
-            <name>partialFourier_FengHuang_kSize_RO</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_kSize_E1</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_kSize_E2</name>
-            <value>5</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_thresReg</name>
-            <value>0.01</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_sameKernel_allN</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_transitBand</name>
-            <value>24</value>
-        </property>
-        <property>
-            <name>partialFourier_FengHuang_transitBand_E2</name>
-            <value>24</value>
-        </property>
-
-        <!-- parameters for debug and timing -->
-        <property>
-            <name>debugFolder</name>
-            <value></value>
-        </property>
-
-        <property>
-            <name>debugFolder2</name>
-            <value></value>
-        </property>
-
-        <property>
-            <name>cloudNodeFile</name>
-            <value>myCloud_3DT.txt</value>
-        </property>
-
-        <property>
-            <name>performTiming</name>
-            <value>true</value>
-        </property>
-
-        <property>
-            <name>verboseMode</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for system acquisition -->
-        <property>
-            <name>timeStampResolution</name>
-            <value>0.0025</value>
-        </property>
-
-        <!-- parameters for recon job split -->
-        <property>
-            <name>job_split_by_S</name>
-            <value>false</value>
-        </property>
-        <property>
-            <name>job_num_of_N</name>
-            <value>48</value>
-        </property>
-        <property>
-            <name>job_max_Megabytes</name>
-            <value>2499</value>
-        </property>
-        <property>
-            <name>job_overlap</name>
-            <value>2</value>
-        </property>
-        <property>
-            <name>job_perform_on_control_node</name>
-            <value>true</value>
-        </property>
-
-        <!-- parameters for the cloud computation 
-             The cloud should be defined as the following: CloudNodeX_IP/Port/XMLConfiguration etc.
-        -->
-        <property>
-            <name>CloudComputing</name>
-            <value>false</value>
-        </property>
-
-        <property>
-            <name>CloudSize</name>
-            <value>1</value>
-        </property>
-
-        <!-- node 0 -->
-        <property>
-            <name>CloudNode0_IP</name>
-            <value>localhost</value>
-        </property>
-
-        <property>
-            <name>CloudNode0_Port</name>
-            <value>9003</value>
-        </property>
-
-        <property>
-            <name>CloudNode0_XMLConfiguration</name>
-            <value>GadgetronProgram_gtPlus_3DT_Cartesian_CloudNode.xml</value>
-        </property>
-
-        <property>
-            <name>CloudNode0_ComputingPowerIndex</name>
-            <value>1</value>
-        </property>
-
-    </gadget>
-
-    <!-- after recon processing -->
-    <gadget>
-        <name>FloatToShort</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>FloatToUShortGadget</classname>
-    </gadget>
-
-    <!--
-    <gadget>
-      <name>ImageFinishCPLX</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageFinishGadgetCPLX</classname>
-    </gadget>
-    -->
-
-    <!--
-    <gadget>
-      <name>ImageFinishFLOAT</name>
-      <dll>gadgetron_mricore</dll>
-      <classname>ImageFinishGadgetFLOAT</classname>
-    </gadget>
-    -->
-
-    <gadget>
-        <name>ImageFinishUSHORT</name>
-        <dll>gadgetron_mricore</dll>
-        <classname>ImageFinishGadgetUSHORT</classname>
-    </gadget>
-
-</gadgetronStreamConfiguration>
diff --git a/toolboxes/gtplus/config/gtCloud/myCloud_2DT.txt b/toolboxes/gtplus/config/gtCloud/myCloud_2DT.txt
deleted file mode 100644
index b11be4a..0000000
--- a/toolboxes/gtplus/config/gtCloud/myCloud_2DT.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-localhost
-9002
-1
-localhost
-9003
-GadgetronProgram_gtPlus_2DT_Cartesian_CloudNode.xml
-1
-0
diff --git a/toolboxes/gtplus/config/gtCloud/myCloud_2DT_DualLayer.txt b/toolboxes/gtplus/config/gtCloud/myCloud_2DT_DualLayer.txt
deleted file mode 100644
index 69be64c..0000000
--- a/toolboxes/gtplus/config/gtCloud/myCloud_2DT_DualLayer.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-localhost
-9002
-1
-localhost
-9003
-GadgetronProgram_gtPlus_2DT_Cartesian_FirstLayer_CloudNode.xml
-1
-0
diff --git a/toolboxes/gtplus/config/gtCloud/myCloud_2DT_DualLayer_FirstLayer.txt b/toolboxes/gtplus/config/gtCloud/myCloud_2DT_DualLayer_FirstLayer.txt
deleted file mode 100644
index 789fa76..0000000
--- a/toolboxes/gtplus/config/gtCloud/myCloud_2DT_DualLayer_FirstLayer.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-localhost
-9003
-1
-localhost
-9004
-GadgetronProgram_gtPlus_2DT_Cartesian_CloudNode.xml
-1
-0
diff --git a/toolboxes/gtplus/config/gtCloud/myCloud_3DT.txt b/toolboxes/gtplus/config/gtCloud/myCloud_3DT.txt
deleted file mode 100644
index 2bfe3f1..0000000
--- a/toolboxes/gtplus/config/gtCloud/myCloud_3DT.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-localhost
-9002
-2
-localhost
-9003
-GadgetronProgram_gtPlus_3DT_Cartesian_CloudNode.xml
-1
-localhost
-9004
-GadgetronProgram_gtPlus_3DT_Cartesian_CloudNode.xml
-1
-0
diff --git a/toolboxes/gtplus/matlab/CMakeLists.txt b/toolboxes/gtplus/matlab/CMakeLists.txt
deleted file mode 100644
index 1553c7f..0000000
--- a/toolboxes/gtplus/matlab/CMakeLists.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-
-set( FILEs  
-    FtkMatlabConverterBase.h
-    FtkMatlabConverter.h 
-    FtkMatlabConverterComplex.h 
-    FtkMatlabMacros.h 
-    FtkMatlabEngineBase.h 
-    FtkMatlabEngineBase.cpp 
-    FtkMexExport.h 
-    mexFtk.h )
diff --git a/toolboxes/gtplus/matlab/FtkMatlabConverterBase.h b/toolboxes/gtplus/matlab/FtkMatlabConverterBase.h
deleted file mode 100644
index c03c09f..0000000
--- a/toolboxes/gtplus/matlab/FtkMatlabConverterBase.h
+++ /dev/null
@@ -1,569 +0,0 @@
-/**
-*  @file    FtkMatlabConverterBase.h
-*  @brief   Ftk and Matlab converter base
-*  @author  Hui Xue
-*  @date    July 18, 2011
-*  @Site    SCR, Princeton
-*
-*  Copyright (C) Siemens Corporate Research, Inc. 2011 All Rights Reserved
-**/
-
-#ifdef FTK_MATLAB_SUPPORT
-
-#ifndef FTK_FTKMATLABCONVERTERBASE_H
-#define FTK_FTKMATLABCONVERTERBASE_H
-
-#include <interface/matlab/FtkMexExport.h>
-
-#include <vector>
-#include <typeinfo>
-#include <core/basic/Clock.h>
-#include <core/basic/Common.h>
-#include <core/basic/Exception.h>
-#include <core/math/MathMacros.h>
-#include <core/basic/Allocate.h>
-#include <core/basic/Array1d.h> 
-#include <core/basic/RealMatrix.h> 
-#include <core/image/Image.h>
-#include "core/image/container/ImageContainerArray.h"
-#include "core/image/container/ImageContainerMatrix.h"
-#include "core/image/container/ImageContainerAllocation.h"
-
-BEGIN_NAMESPACE_1(ftk)
-
-struct ImageInfo
-{
-    int sizeX;
-    int sizeY;
-    int sizeZ;
-    int sizeT;
-    int sizeN;
-    int sizeM;
-
-    double spacingX;
-    double spacingY;
-    double spacingZ;
-    double spacingT;
-    double spacingN;
-    double spacingM;
-
-    double positionPatient[3];
-    double orientationPatient[3][3];
-
-    /// number of fields
-    FTK_STATIC_CONST( numOfFields, IndexType, 20 );
-
-    ImageInfo() 
-    {
-        initialize();
-    }
-
-    ImageInfo(const ImageBase<2>& aImage) 
-    {
-        initialize();
-
-        sizeX = aImage.getSize(0);
-        sizeY = aImage.getSize(1);
-
-        spacingX = aImage.getPixelSpacing(0);
-        spacingY = aImage.getPixelSpacing(1);
-
-        positionPatient[0] = aImage.getPosition(0);
-        positionPatient[1] = aImage.getPosition(1);
-        positionPatient[2] = aImage.getPosition(2);
-
-        orientationPatient[0][0] = aImage.getOrient3D(0, 0);
-        orientationPatient[0][1] = aImage.getOrient3D(0, 1);
-        orientationPatient[0][2] = aImage.getOrient3D(0, 2);
-
-        orientationPatient[1][0] = aImage.getOrient3D(1, 0);
-        orientationPatient[1][1] = aImage.getOrient3D(1, 1);
-        orientationPatient[1][2] = aImage.getOrient3D(1, 2);
-
-        orientationPatient[2][0] = aImage.getOrient3D(2, 0);
-        orientationPatient[2][1] = aImage.getOrient3D(2, 1);
-        orientationPatient[2][2] = aImage.getOrient3D(2, 2);
-    }
-
-    ImageInfo(const ImageBase<3>& aImage) 
-    {
-        initialize();
-
-        sizeX = aImage.getSize(0);
-        sizeY = aImage.getSize(1);
-        sizeZ = aImage.getSize(2);
-
-        spacingX = aImage.getPixelSpacing(0);
-        spacingY = aImage.getPixelSpacing(1);
-        spacingZ = aImage.getPixelSpacing(2);
-
-        positionPatient[0] = aImage.getPosition(0);
-        positionPatient[1] = aImage.getPosition(1);
-        positionPatient[2] = aImage.getPosition(2);
-
-        orientationPatient[0][0] = aImage.getOrient3D(0, 0);
-        orientationPatient[0][1] = aImage.getOrient3D(0, 1);
-        orientationPatient[0][2] = aImage.getOrient3D(0, 2);
-
-        orientationPatient[1][0] = aImage.getOrient3D(1, 0);
-        orientationPatient[1][1] = aImage.getOrient3D(1, 1);
-        orientationPatient[1][2] = aImage.getOrient3D(1, 2);
-
-        orientationPatient[2][0] = aImage.getOrient3D(2, 0);
-        orientationPatient[2][1] = aImage.getOrient3D(2, 1);
-        orientationPatient[2][2] = aImage.getOrient3D(2, 2);
-    }
-
-    ImageInfo(const ImageBase<4>& aImage) 
-    {
-        initialize();
-
-        sizeX = aImage.getSize(0);
-        sizeY = aImage.getSize(1);
-        sizeZ = aImage.getSize(2);
-        sizeT = aImage.getSize(3);
-
-        spacingX = aImage.getPixelSpacing(0);
-        spacingY = aImage.getPixelSpacing(1);
-        spacingZ = aImage.getPixelSpacing(2);
-        spacingT = aImage.getPixelSpacing(3);
-
-        positionPatient[0] = aImage.getPosition(0);
-        positionPatient[1] = aImage.getPosition(1);
-        positionPatient[2] = aImage.getPosition(2);
-
-        orientationPatient[0][0] = aImage.getOrient3D(0, 0);
-        orientationPatient[0][1] = aImage.getOrient3D(0, 1);
-        orientationPatient[0][2] = aImage.getOrient3D(0, 2);
-
-        orientationPatient[1][0] = aImage.getOrient3D(1, 0);
-        orientationPatient[1][1] = aImage.getOrient3D(1, 1);
-        orientationPatient[1][2] = aImage.getOrient3D(1, 2);
-
-        orientationPatient[2][0] = aImage.getOrient3D(2, 0);
-        orientationPatient[2][1] = aImage.getOrient3D(2, 1);
-        orientationPatient[2][2] = aImage.getOrient3D(2, 2);
-    }
-
-    ImageInfo(const ImageBase<5>& aImage) 
-    {
-        initialize();
-
-        sizeX = aImage.getSize(0);
-        sizeY = aImage.getSize(1);
-        sizeZ = aImage.getSize(2);
-        sizeT = aImage.getSize(3);
-        sizeN = aImage.getSize(4);
-
-        spacingX = aImage.getPixelSpacing(0);
-        spacingY = aImage.getPixelSpacing(1);
-        spacingZ = aImage.getPixelSpacing(2);
-        spacingT = aImage.getPixelSpacing(3);
-        spacingN = aImage.getPixelSpacing(4);
-
-        positionPatient[0] = aImage.getPosition(0);
-        positionPatient[1] = aImage.getPosition(1);
-        positionPatient[2] = aImage.getPosition(2);
-
-        orientationPatient[0][0] = aImage.getOrient3D(0, 0);
-        orientationPatient[0][1] = aImage.getOrient3D(0, 1);
-        orientationPatient[0][2] = aImage.getOrient3D(0, 2);
-
-        orientationPatient[1][0] = aImage.getOrient3D(1, 0);
-        orientationPatient[1][1] = aImage.getOrient3D(1, 1);
-        orientationPatient[1][2] = aImage.getOrient3D(1, 2);
-
-        orientationPatient[2][0] = aImage.getOrient3D(2, 0);
-        orientationPatient[2][1] = aImage.getOrient3D(2, 1);
-        orientationPatient[2][2] = aImage.getOrient3D(2, 2);
-    }
-
-    ImageInfo(const ImageBase<6>& aImage) 
-    {
-        initialize();
-
-        sizeX = aImage.getSize(0);
-        sizeY = aImage.getSize(1);
-        sizeZ = aImage.getSize(2);
-        sizeT = aImage.getSize(3);
-        sizeN = aImage.getSize(4);
-        sizeM = aImage.getSize(5);
-
-        spacingX = aImage.getPixelSpacing(0);
-        spacingY = aImage.getPixelSpacing(1);
-        spacingZ = aImage.getPixelSpacing(2);
-        spacingT = aImage.getPixelSpacing(3);
-        spacingN = aImage.getPixelSpacing(4);
-        spacingM = aImage.getPixelSpacing(5);
-
-        positionPatient[0] = aImage.getPosition(0);
-        positionPatient[1] = aImage.getPosition(1);
-        positionPatient[2] = aImage.getPosition(2);
-
-        orientationPatient[0][0] = aImage.getOrient3D(0, 0);
-        orientationPatient[0][1] = aImage.getOrient3D(0, 1);
-        orientationPatient[0][2] = aImage.getOrient3D(0, 2);
-
-        orientationPatient[1][0] = aImage.getOrient3D(1, 0);
-        orientationPatient[1][1] = aImage.getOrient3D(1, 1);
-        orientationPatient[1][2] = aImage.getOrient3D(1, 2);
-
-        orientationPatient[2][0] = aImage.getOrient3D(2, 0);
-        orientationPatient[2][1] = aImage.getOrient3D(2, 1);
-        orientationPatient[2][2] = aImage.getOrient3D(2, 2);
-    }
-
-    ~ImageInfo() {}
-
-    void initialize()
-    {
-        sizeX = 1;
-        sizeY = 1;
-        sizeZ = 1;
-        sizeT = 1;
-        sizeN = 1;
-        sizeM = 1;
-
-        spacingX = 1.0;
-        spacingY = 1.0;
-        spacingZ = 1.0;
-        spacingT = 1.0;
-        spacingN = 1.0;
-        spacingM = 1.0;
-
-        positionPatient[0] = 0.0;
-        positionPatient[1] = 0.0;
-        positionPatient[2] = 0.0;
-
-        orientationPatient[0][0] = 1.0;
-        orientationPatient[0][1] = 0.0;
-        orientationPatient[0][2] = 0.0;
-
-        orientationPatient[1][0] = 0.0;
-        orientationPatient[1][1] = 1.0;
-        orientationPatient[1][2] = 0.0;
-
-        orientationPatient[2][0] = 0.0;
-        orientationPatient[2][1] = 0.0;
-        orientationPatient[2][2] = 1.0;
-
-        int ind = 0;
-        fieldnames.resize(numOfFields);
-        fieldnames[ind++] = "sizeX";
-        fieldnames[ind++] = "sizeY";
-        fieldnames[ind++] = "sizeZ";
-        fieldnames[ind++] = "sizeT";
-        fieldnames[ind++] = "sizeN";
-        fieldnames[ind++] = "sizeM";
-        fieldnames[ind++] = "spacingX";
-        fieldnames[ind++] = "spacingY";
-        fieldnames[ind++] = "spacingZ";
-        fieldnames[ind++] = "spacingT";
-        fieldnames[ind++] = "spacingN";
-        fieldnames[ind++] = "spacingM";
-        fieldnames[ind++] = "positionPatient";
-        fieldnames[ind++] = "orientationPatient";
-        fieldnames[ind++] = "xsize";
-        fieldnames[ind++] = "ysize";
-        fieldnames[ind++] = "zsize";
-        fieldnames[ind++] = "xvoxelsize";
-        fieldnames[ind++] = "yvoxelsize";
-        fieldnames[ind++] = "zvoxelsize";
-    }
-
-    mxArray* convertToMatlab() const 
-    {
-        try
-        {
-            mwSize num[2] = {1, 1};
-            mxArray* info = mxCreateStructArray(2, num, numOfFields, const_cast<const char**>(&fieldnames[0]));
-
-            int ind = 0;
-
-            mxSetField(info, 0, fieldnames[ind++], mxCreateDoubleScalar(sizeX));
-            mxSetField(info, 0, fieldnames[ind++], mxCreateDoubleScalar(sizeY));
-            mxSetField(info, 0, fieldnames[ind++], mxCreateDoubleScalar(sizeZ));
-            mxSetField(info, 0, fieldnames[ind++], mxCreateDoubleScalar(sizeT));
-            mxSetField(info, 0, fieldnames[ind++], mxCreateDoubleScalar(sizeN));
-            mxSetField(info, 0, fieldnames[ind++], mxCreateDoubleScalar(sizeM));
-
-            mxSetField(info, 0, fieldnames[ind++], mxCreateDoubleScalar(spacingX));
-            mxSetField(info, 0, fieldnames[ind++], mxCreateDoubleScalar(spacingY));
-            mxSetField(info, 0, fieldnames[ind++], mxCreateDoubleScalar(spacingZ));
-            mxSetField(info, 0, fieldnames[ind++], mxCreateDoubleScalar(spacingT));
-            mxSetField(info, 0, fieldnames[ind++], mxCreateDoubleScalar(spacingN));
-            mxSetField(info, 0, fieldnames[ind++], mxCreateDoubleScalar(spacingM));
-
-            mxArray* mxPositionPatient = mxCreateDoubleMatrix(1, 3, mxREAL);
-            double* pPositionData = mxGetPr(mxPositionPatient);
-            pPositionData[0] = positionPatient[0];
-            pPositionData[1] = positionPatient[1];
-            pPositionData[2] = positionPatient[2];
-            mxSetField(info, 0, fieldnames[ind++], mxPositionPatient);
-
-            mxArray* mxOrientationPatient = mxCreateDoubleMatrix(3, 3, mxREAL);
-            double* pOrientationData = mxGetPr(mxOrientationPatient);
-            pOrientationData[0] = orientationPatient[0][0];
-            pOrientationData[1] = orientationPatient[1][0];
-            pOrientationData[2] = orientationPatient[2][0];
-            pOrientationData[3] = orientationPatient[0][1];
-            pOrientationData[4] = orientationPatient[1][1];
-            pOrientationData[5] = orientationPatient[2][1];
-            pOrientationData[6] = orientationPatient[0][2];
-            pOrientationData[7] = orientationPatient[1][2];
-            pOrientationData[8] = orientationPatient[2][2];
-            mxSetField(info, 0, fieldnames[ind++], mxOrientationPatient);
-
-            mxSetField(info, 0, fieldnames[ind++], mxCreateDoubleScalar(sizeX));
-            mxSetField(info, 0, fieldnames[ind++], mxCreateDoubleScalar(sizeY));
-            mxSetField(info, 0, fieldnames[ind++], mxCreateDoubleScalar(sizeZ));
-
-            mxSetField(info, 0, fieldnames[ind++], mxCreateDoubleScalar(spacingX));
-            mxSetField(info, 0, fieldnames[ind++], mxCreateDoubleScalar(spacingY));
-            mxSetField(info, 0, fieldnames[ind++], mxCreateDoubleScalar(spacingZ));
-
-            return info;
-        }
-        catch(...)
-        {
-            mexErrMsgTxt("Exceptions happened in ImageInfo::convertToMatlab() ... ");
-            throw;
-        }
-
-        return NULL;
-    }
-
-    bool convertFromMatlab(const mxArray* info)
-    {
-        try
-        {
-            int ind = 0;
-            sizeX = static_cast<int>(mxGetScalar(mxGetField(info, 0, fieldnames[ind++])));
-            sizeY = static_cast<int>(mxGetScalar(mxGetField(info, 0, fieldnames[ind++])));
-            sizeZ = static_cast<int>(mxGetScalar(mxGetField(info, 0, fieldnames[ind++])));
-            sizeT = static_cast<int>(mxGetScalar(mxGetField(info, 0, fieldnames[ind++])));
-            sizeN = static_cast<int>(mxGetScalar(mxGetField(info, 0, fieldnames[ind++])));
-            sizeM = static_cast<int>(mxGetScalar(mxGetField(info, 0, fieldnames[ind++])));
-
-            spacingX = mxGetScalar(mxGetField(info, 0, fieldnames[ind++]));
-            spacingY = mxGetScalar(mxGetField(info, 0, fieldnames[ind++]));
-            spacingZ = mxGetScalar(mxGetField(info, 0, fieldnames[ind++]));
-            spacingT = mxGetScalar(mxGetField(info, 0, fieldnames[ind++]));
-            spacingN = mxGetScalar(mxGetField(info, 0, fieldnames[ind++]));
-            spacingM = mxGetScalar(mxGetField(info, 0, fieldnames[ind++]));
-
-            mxArray* mxPositionPatient = mxGetField(info, 0, fieldnames[ind++]);
-            double* pPositionData = mxGetPr(mxPositionPatient);
-            positionPatient[0] = pPositionData[0];
-            positionPatient[1] = pPositionData[1];
-            positionPatient[2] = pPositionData[2];
-            
-            mxArray* mxOrientationPatient = mxGetField(info, 0, fieldnames[ind++]);
-            double* pOrientationData = mxGetPr(mxOrientationPatient);
-            orientationPatient[0][0] = pOrientationData[0];
-            orientationPatient[1][0] = pOrientationData[1];
-            orientationPatient[2][0] = pOrientationData[2];
-            orientationPatient[0][1] = pOrientationData[3];
-            orientationPatient[1][1] = pOrientationData[4];
-            orientationPatient[2][1] = pOrientationData[5];
-            orientationPatient[0][2] = pOrientationData[6];
-            orientationPatient[1][2] = pOrientationData[7];
-            orientationPatient[2][2] = pOrientationData[8];
-        }
-        catch(...)
-        {
-            mexErrMsgTxt("Exceptions happened in ImageInfo::convertFromMatlab() ... ");
-            return false;
-        }
-
-        return true;
-    }
-
-    Size<IndexType, 2> getSize2D() const { return Size<IndexType, 2>(sizeX, sizeY); }
-    Spacing<2> getSpacing2D() const { return Spacing<2>(spacingX, spacingY); }
-
-    Size<IndexType, 3> getSize3D() const { return Size<IndexType, 3>(sizeX, sizeY, sizeZ); }
-    Spacing<3> getSpacing3D() const { return Spacing<3>(spacingX, spacingY, spacingZ); }
-
-    Size<IndexType, 4> getSize4D() const 
-    { 
-        Size<IndexType, 4> aSize; 
-        aSize[0] = sizeX;
-        aSize[1] = sizeY;
-        aSize[2] = sizeZ;
-        aSize[3] = sizeT;
-        return aSize;
-    }
-
-    Size<IndexType, 5> getSize5D() const 
-    { 
-        Size<IndexType, 5> aSize; 
-        aSize[0] = sizeX;
-        aSize[1] = sizeY;
-        aSize[2] = sizeZ;
-        aSize[3] = sizeT;
-        aSize[4] = sizeN;
-        return aSize;
-    }
-
-    Size<IndexType, 6> getSize6D() const 
-    { 
-        Size<IndexType, 6> aSize; 
-        aSize[0] = sizeX;
-        aSize[1] = sizeY;
-        aSize[2] = sizeZ;
-        aSize[3] = sizeT;
-        aSize[4] = sizeN;
-        aSize[5] = sizeM;
-        return aSize;
-    }
-
-    Spacing<4> getSpacing4D() const 
-    { 
-        Spacing<4> spacing;
-        spacing[0] = spacingX;
-        spacing[1] = spacingY;
-        spacing[2] = spacingZ;
-        spacing[3] = spacingT;
-
-        return spacing; 
-    }
-
-    Spacing<5> getSpacing5D() const 
-    { 
-        Spacing<5> spacing;
-        spacing[0] = spacingX;
-        spacing[1] = spacingY;
-        spacing[2] = spacingZ;
-        spacing[3] = spacingT;
-        spacing[4] = spacingN;
-
-        return spacing; 
-    }
-
-    Spacing<6> getSpacing6D() const 
-    { 
-        Spacing<6> spacing;
-        spacing[0] = spacingX;
-        spacing[1] = spacingY;
-        spacing[2] = spacingZ;
-        spacing[3] = spacingT;
-        spacing[4] = spacingN;
-        spacing[5] = spacingM;
-
-        return spacing; 
-    }
-
-    Point3d<double> getPosition() const { return Point3d<double>(positionPatient[0], positionPatient[1], positionPatient[2]); }
-    Point3d<double> getOrient3D(int i) const { return Point3d<double>(orientationPatient[i][0], orientationPatient[i][1], orientationPatient[i][2]); }
-
-protected: 
-
-    std::vector<char*> fieldnames;
-};
-
-template <typename ValueType> 
-class FtkMatlabConverterBase : public Object
-{
-public:
-    
-    /** @name Typedefs */
-    //@{
-    /// 2D Image 
-    typedef Image<ValueType, 2> Image2DType;
-    /// 3D Image 
-    typedef Image<ValueType, 3> Image3DType;
-    /// 4D Image 
-    typedef Image<ValueType, 4> Image4DType;
-    /// 5D Image 
-    typedef Image<ValueType, 5> Image5DType;
-    /// 6D Image 
-    typedef Image<ValueType, 6> Image6DType;
-    /// Image array 
-    typedef ImageContainerArray<Image2DType> ImageContainerArrayType;
-    /// Image matrix
-    typedef ImageContainerMatrix<ImageContainerArrayType> ImageContainerMatrixType;
-    /// std vector type
-    typedef std::vector<ValueType> StdVectorType;
-    /// ftk vector type
-    typedef Array1d<ValueType> FtkVectorType;
-    /// ftk matrix type
-    typedef RealMatrix<ValueType> FtkMatrixType;
-    //@}
-
-    /** @name Constructors and destructor */
-    //@{
-    FtkMatlabConverterBase() {}
-    virtual ~FtkMatlabConverterBase() {}
-    //@}
-
-    /** @name functions to convert ftk to/from Matlab */
-    //@{
-    // 2D image
-    virtual bool convertToMatlab(const Image2DType& aImage, mxArray*& aMxImage, mxArray*& aHeader) = 0;
-    virtual bool convertFromMatlab(const mxArray* aMxImage, const mxArray* aHeader, Image2DType& aImage) = 0;
-    // 3D image
-    virtual bool convertToMatlab(const Image3DType& aImage, mxArray*& aMxImage, mxArray*& aHeader) = 0;
-    virtual bool convertFromMatlab(const mxArray* aMxImage, const mxArray* aHeader, Image3DType& aImage) = 0;
-    // image array
-    virtual bool convertToMatlab(const ImageContainerArrayType& aImageArray, mxArray*& aMxImage, mxArray*& aHeader) = 0;
-    virtual bool convertFromMatlab(const mxArray* aMxImage, const mxArray* aHeader, ImageContainerArrayType& aImage) = 0;
-    // image matrix
-    virtual bool convertToMatlab(const ImageContainerMatrixType& aImageMatrix, mxArray*& aMxImage, mxArray*& aHeader) = 0;
-    virtual bool convertFromMatlab(const mxArray* aMxImage, const mxArray* aHeader, ImageContainerMatrixType& aImageMatrix) = 0;
-    // ftk vector
-    virtual bool convertToMatlab(const FtkVectorType& vec, mxArray*& aMxArray) = 0;
-    virtual bool convertFromMatlab(const mxArray* aMxArray, FtkVectorType& vec) = 0;
-    // ftk matrix
-    virtual bool convertToMatlab(const FtkMatrixType& vec, mxArray*& aMxArray) = 0;
-    virtual bool convertFromMatlab(const mxArray* aMxArray, FtkMatrixType& vec) = 0;
-
-    // std vector
-    virtual bool convertToMatlab(const StdVectorType& vec, mxArray*& aMxArray) = 0;
-    virtual bool convertFromMatlab(const mxArray* aMxArray, StdVectorType& vec) = 0;
-    // std string
-    virtual bool convertToMatlab(const std::string& str, mxArray*& aMxStr);
-    virtual bool convertFromMatlab(const mxArray* aMxStr, std::string& str);
-    //@}
-
-    virtual void print(std::ostream& os) const = 0;
-
-protected:
-    
-};
-
-// -------------------------------------------------------
-// std string
-// -------------------------------------------------------
-
-template <typename ValueType> 
-bool FtkMatlabConverterBase<ValueType>::
-convertToMatlab(const std::string& str, mxArray*& aMxStr)
-{
-    aMxStr = mxCreateString(str.c_str());
-    return (aMxStr != NULL);
-}
-
-template <typename ValueType> 
-bool FtkMatlabConverterBase<ValueType>::
-convertFromMatlab(const mxArray* aMxStr, std::string& str)
-{
-    FTK_CHECK_RETURN_FALSE(aMxStr!=NULL);
-
-    int buflen = mxGetNumberOfElements(aMxStr) + 1;
-
-    std::vector<char> buf(buflen, '\0');
-
-    if (mxGetString(aMxStr, &buf[0], buflen) != 0)
-        return false;
-
-    str = std::string(&buf[0]);
-
-    return true;
-}
-
-END_NAMESPACE_1(ftk)
-
-#endif // FTK_FTKMATLABCONVERTERBASE_H 
-
-#endif // FTK_MATLAB_SUPPORT 
diff --git a/toolboxes/gtplus/matlab/gtMatlab.h b/toolboxes/gtplus/matlab/gtMatlab.h
index fa1b65f..61fbef8 100644
--- a/toolboxes/gtplus/matlab/gtMatlab.h
+++ b/toolboxes/gtplus/matlab/gtMatlab.h
@@ -8,6 +8,7 @@
 
 #pragma once 
 
+#include <sstream>
 #include <strstream>
 
 #ifdef GADGET_MSG
@@ -35,3 +36,53 @@
 #endif // _DEBUG
 
 #define GADGET_ERROR_MSG(message) GADGET_MSG(message) 
+
+#ifdef GADGET_CHECK_RETURN_FALSE
+    #undef GADGET_CHECK_RETURN_FALSE
+#endif // GADGET_CHECK_RETURN_FALSE
+#define GADGET_CHECK_RETURN_FALSE(con) { if ( ! (con) ) { return false; } }
+
+#ifdef GADGET_DEBUG_MODE
+#define GADGET_DEBUG_CHECK_THROW(con) GADGET_CHECK_THROW(con)
+#define GADGET_DEBUG_CHECK_RETURN(con, value) GADGET_CHECK_RETURN(con, value)
+#define GADGET_DEBUG_CHECK_RETURN_FALSE(con) GADGET_CHECK_RETURN_FALSE(con)
+#else
+#define GADGET_DEBUG_CHECK_THROW(con)
+#define GADGET_DEBUG_CHECK_RETURN(con, value)
+#define GADGET_DEBUG_CHECK_RETURN_FALSE(con)
+#endif // GADGET_DEBUG_MODE
+
+template <typename ObjType> void matlab_printInfo(const ObjType& obj)
+{
+    std::ostrstream outs;
+    obj.print(outs);
+    outs << std::ends;
+    std::string msg(outs.str());
+    GADGET_MSG(msg.c_str());
+}
+
+inline void printAuthorInfo(std::stringstream& outs)
+{
+    using namespace std;
+    outs << "---------------------------------------------------------------------" << endl;
+    outs << "This software is made by: " << endl;
+    outs << endl;
+    outs << "\t\tHui Xue " << endl;
+    outs << "Magnetic Resonance Technology Program" << endl;
+    outs << "National Heart, Lung and Blood Institute" << endl;
+    outs << "National Institutes of Health" << endl;
+    outs << "Email: hui.xue at nih.gov" << endl;
+    outs << endl;
+    outs << "\t\tPeter Kellman " << endl;
+    outs << "Medical Signal and Image Processing Program" << endl;
+    outs << "National Heart, Lung and Blood Institute" << endl;
+    outs << "National Institutes of Health" << endl;
+    outs << "Email: kellmanp at nhlbi.nih.gov" << endl;
+    outs << endl;
+    outs << "\t\tMichael Hansen " << endl;
+    outs << "Medical Signal and Image Processing Program" << endl;
+    outs << "National Heart, Lung and Blood Institute" << endl;
+    outs << "National Institutes of Health" << endl;
+    outs << "Email: michael.hansen at nih.gov" << endl;
+    outs << "---------------------------------------------------------------------" << endl;
+}
diff --git a/toolboxes/gtplus/matlab/gtMatlabConverter.h b/toolboxes/gtplus/matlab/gtMatlabConverter.h
index 3006369..64b81bb 100644
--- a/toolboxes/gtplus/matlab/gtMatlabConverter.h
+++ b/toolboxes/gtplus/matlab/gtMatlabConverter.h
@@ -8,17 +8,7 @@
 
 #pragma once
 
-#include <matrix.h>
-#include <mat.h>
-#include <mexGT.h>
-#include <cmath>
-#include <vector>
-#include <string>
-#include <iostream>
-#include <strstream>
-
-#include "hoNDArray.h"
-#include "gtMatlab.h"
+#include "gtMatlabImage.h"
 
 namespace Gadgetron
 {
@@ -42,6 +32,47 @@ public:
     virtual bool Str2Matlab(const std::string& str, mxArray*& aMx);
     virtual bool Matlab2Str(const mxArray* aMx, std::string& str);
 
+    template <unsigned int D> 
+    bool hoNDImage2Matlab(const hoNDImage<T, D>& a, mxArray*& aMx, mxArray*& aHeader)
+    {
+        std::vector<size_t> dim(D);
+        a.get_dimensions(dim);
+
+        hoNDArray<T> buf(dim, const_cast<T*>(a.get_data_ptr()), false);
+        GADGET_CHECK_RETURN_FALSE(hoNDArray2Matlab(buf, aMx));
+
+        gtMatlabImageHeader<T, D> header(a);
+        GADGET_CHECK_RETURN_FALSE(header.toMatlab(aHeader));
+
+        return true;
+    }
+
+    template <unsigned int D> 
+    bool Matlab2hoNDImage(const mxArray* aMx, const mxArray* aHeader, hoNDImage<T, D>& a)
+    {
+        mwSize ndim = mxGetNumberOfDimensions(aMx);
+        if ( ndim != D ) return false;
+
+        hoNDArray<T> buf;
+        GADGET_CHECK_RETURN_FALSE(Matlab2hoNDArray(aMx, buf));
+        GADGET_CHECK_RETURN_FALSE(buf.get_number_of_dimensions()<=D);
+
+        a.from_NDArray(buf);
+
+        gtMatlabImageHeader<T, D> header;
+        GADGET_CHECK_RETURN_FALSE(header.fromMatlab(aHeader));
+
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            a.set_pixel_size(ii, header.pixelSize_[ii]);
+            a.set_origin(ii, header.origin_[ii]);
+            a.set_axis(ii, header.axis_[ii]);
+        }
+
+        return true;
+    }
+
     virtual void printInfo(std::ostream& os) const;
 };
 
@@ -53,16 +84,16 @@ hoNDArray2Matlab(const hoNDArray<T>& a, mxArray*& aMx)
     {
         boost::shared_ptr< std::vector<size_t> > dim = a.get_dimensions();
 
-        int ndim = dim->size();
+        mwSize ndim = dim->size();
         mwSize* dims = new mwSize[ndim];
 
-        size_t ii;
+        mwSize ii;
         for ( ii=0; ii<ndim; ii++ )
         {
             dims[ii] = static_cast<mwSize>( (*dim)[ii] );
         }
 
-        size_t N = a.get_number_of_elements();
+        mwSize N = a.get_number_of_elements();
         const T* pA = a.begin();
 
         if ( typeid(T) == typeid(float) )
@@ -72,7 +103,7 @@ hoNDArray2Matlab(const hoNDArray<T>& a, mxArray*& aMx)
 
             for ( ii=0; ii<N; ii++ )
             {
-                ptr[ii] = pA[ii];
+                ptr[ii] = (float)(pA[ii]);
             }
         }
         else
@@ -82,7 +113,7 @@ hoNDArray2Matlab(const hoNDArray<T>& a, mxArray*& aMx)
 
             for ( ii=0; ii<N; ii++ )
             {
-                ptr[ii] = pA[ii];
+                ptr[ii] = (float)(pA[ii]);
             }
         }
     }
@@ -211,7 +242,7 @@ template <typename T>
 bool gtMatlabConverter<T>::
 Matlab2Str(const mxArray* aMx, std::string& str)
 {
-    int N = mxGetNumberOfElements(aMx) + 1;
+    mwSize N = mxGetNumberOfElements(aMx) + 1;
 
     std::vector<char> buf(N, '\0');
     if (mxGetString(aMx, &buf[0], N) != 0)
diff --git a/toolboxes/gtplus/matlab/gtMatlabConverterComplex.h b/toolboxes/gtplus/matlab/gtMatlabConverterComplex.h
index e9c404b..dfda4b7 100644
--- a/toolboxes/gtplus/matlab/gtMatlabConverterComplex.h
+++ b/toolboxes/gtplus/matlab/gtMatlabConverterComplex.h
@@ -8,16 +8,7 @@
 
 #pragma once
 
-#include <matrix.h>
-#include <mat.h>
-#include <mexGT.h>
-#include <cmath>
-#include <vector>
-#include <string>
-#include <iostream>
-#include <strstream>
-
-#include "hoNDArray.h"
+#include "gtMatlabImage.h"
 
 namespace Gadgetron
 {
@@ -33,6 +24,44 @@ public:
     virtual bool hoNDArray2Matlab(const hoNDArray<T>& a, mxArray*& aMx);
     virtual bool Matlab2hoNDArray(const mxArray* aMx, hoNDArray<T>& a);
 
+    template <unsigned int D> 
+    bool hoNDImage2Matlab(const hoNDImage<T, D>& a, mxArray*& aMx, mxArray*& aHeader)
+    {
+        std::vector<size_t> dim(D);
+        a.get_dimensions(dim);
+
+        hoNDArray<T> buf(dim, const_cast<T*>(a.get_data_ptr()), false);
+        GADGET_CHECK_RETURN_FALSE(hoNDArray2Matlab(buf, aMx));
+
+        gtMatlabImageHeader<T, D> header(a);
+        GADGET_CHECK_RETURN_FALSE(header.toMatlab(aHeader));
+
+        return true;
+    }
+
+    template <unsigned int D> 
+    bool Matlab2hoNDImage(const mxArray* aMx, const mxArray* aHeader, hoNDImage<T, D>& a)
+    {
+        hoNDArray<T> buf;
+        GADGET_CHECK_RETURN_FALSE(Matlab2hoNDArray(aMx, buf));
+        GADGET_CHECK_RETURN_FALSE(buf.get_number_of_dimensions()==D);
+
+        a.from_NDArray(buf);
+
+        gtMatlabImageHeader<T, D> header;
+        GADGET_CHECK_RETURN_FALSE(header.fromMatlab(aHeader));
+
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            a.set_pixel_size(ii, header.pixelSize_[ii]);
+            a.set_origin(ii, header.origin_[ii]);
+            a.set_axis(ii, header.axis_[ii]);
+        }
+
+        return true;
+    }
+
     virtual void printInfo(std::ostream& os) const;
 
 protected:
diff --git a/toolboxes/gtplus/matlab/gtMatlabImage.h b/toolboxes/gtplus/matlab/gtMatlabImage.h
new file mode 100644
index 0000000..f3bf7b8
--- /dev/null
+++ b/toolboxes/gtplus/matlab/gtMatlabImage.h
@@ -0,0 +1,255 @@
+/********************************************************************
+    created:    2014/02/25
+    author:     Hui Xue
+
+    purpose:    Gadgetron data structure for ND image matlab conversion
+*********************************************************************/
+
+#pragma once
+
+#include <matrix.h>
+#include <mat.h>
+#include <mexGT.h>
+#include <cmath>
+#include <vector>
+#include <string>
+#include <iostream>
+#include <strstream>
+
+#include "hoNDArray.h"
+#include "hoNDImage.h"
+#include "gtMatlab.h"
+
+namespace Gadgetron
+{
+
+template <typename T, unsigned int D>
+class gtMatlabImageHeader
+{
+public:
+
+    typedef hoNDImage<T, D> ImageType;
+
+    typedef typename ImageType::value_type value_type;
+    typedef typename ImageType::coord_type coord_type;
+    typedef typename ImageType::a_axis_type a_axis_type;
+    typedef typename ImageType::axis_type axis_type;
+
+    coord_type pixelSize_[D];
+    coord_type origin_[D];
+    hoNDPoint<coord_type, D> axis_[D];
+
+    gtMatlabImageHeader();
+    gtMatlabImageHeader(const ImageType& im);
+    virtual ~gtMatlabImageHeader();
+
+    /// for the axis, it will be a D*D rotation matrix
+    /// every column is a oritentation vector for a dimension
+    virtual bool toMatlab(mxArray*& header);
+    virtual bool fromMatlab(const mxArray* header);
+
+protected:
+
+    // the header field names
+    std::vector<char*> header_fields_;
+
+    void set_header_fields()
+    {
+        size_t num = 3; // origin, pixelSize, axis
+        header_fields_.resize(3);
+        header_fields_[0] = "origin";
+        header_fields_[1] = "pixelSize";
+        header_fields_[2] = "axis";
+    }
+};
+
+template <typename T, unsigned int D>
+gtMatlabImageHeader<T, D>::gtMatlabImageHeader()
+{
+    unsigned int ii;
+    for (ii=0;ii<D; ii++)
+    {
+        pixelSize_[ii] = 1;
+        origin_[ii] = 0;
+        axis_[ii].fill(0);
+        axis_[ii][ii] = coord_type(1.0);
+    }
+
+    this->set_header_fields();
+}
+
+template <typename T, unsigned int D>
+gtMatlabImageHeader<T, D>::gtMatlabImageHeader(const ImageType& im)
+{
+    std::vector<coord_type> pixelSize;
+    im.get_pixel_size(pixelSize);
+
+    std::vector<coord_type> origin;
+    im.get_origin(origin);
+
+    axis_type axis;
+    im.get_axis(axis);
+
+    unsigned int ii;
+    for (ii=0;ii<D; ii++)
+    {
+        pixelSize_[ii] = pixelSize[ii];
+        origin_[ii] = origin[ii];
+        axis_[ii] = axis[ii];
+    }
+
+    this->set_header_fields();
+}
+
+template <typename T, unsigned int D>
+gtMatlabImageHeader<T, D>::~gtMatlabImageHeader()
+{
+
+}
+
+template <typename T, unsigned int D>
+bool gtMatlabImageHeader<T, D>::toMatlab(mxArray*& header)
+{
+    try
+    {
+        unsigned int ii, jj;
+
+        mwSize num[2] = {1, 1};
+        header = mxCreateStructArray(2, num, (int)header_fields_.size(), const_cast<const char**>(&header_fields_[0]));
+
+        mwSize dims[1];
+        dims[0] = D;
+
+        mxArray* aMx = mxCreateNumericArray(1, dims, mxSINGLE_CLASS, mxREAL);
+        float* pr = static_cast<float*>(mxGetData(aMx));
+        for ( ii=0; ii<D; ii++ )
+        {
+            pr[ii] = origin_[ii];
+        }
+
+        mxSetField(header, 0, header_fields_[0], aMx);
+
+        aMx = mxCreateNumericArray(1, dims, mxSINGLE_CLASS, mxREAL);
+        pr = static_cast<float*>(mxGetData(aMx));
+        for ( ii=0; ii<D; ii++ )
+        {
+            pr[ii] = pixelSize_[ii];
+        }
+
+        mxSetField(header, 0, header_fields_[1], aMx);
+
+        mwSize dimsAxis[2];
+        dimsAxis[0] = D;
+        dimsAxis[1] = D;
+
+        aMx = mxCreateNumericMatrix(D, D, mxSINGLE_CLASS, mxREAL);
+        pr = static_cast<float*>(mxGetData(aMx));
+        for ( jj=0; jj<D; jj++ )
+        {
+            for ( ii=0; ii<D; ii++ )
+            {
+                pr[jj + ii*D] = axis_[jj][ii]; // stored in column-wise
+            }
+        }
+
+        mxSetField(header, 0, header_fields_[2], aMx);
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in gtMatlabImageHeader<T, D>::toMatlab(mxArray*& header) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T, unsigned int D>
+bool gtMatlabImageHeader<T, D>::fromMatlab(const mxArray* header)
+{
+    try
+    {
+        GADGET_CHECK_RETURN_FALSE(mxIsStruct(header));
+
+        unsigned int ii, jj;
+
+        mxArray* aMx = mxGetField(header, 0, header_fields_[0]);
+        size_t N = mxGetNumberOfElements(aMx);
+
+        if ( mxIsSingle(aMx) )
+        {
+            float* pr = static_cast<float*>(mxGetData(aMx));
+
+            for ( ii=0; ii<GT_MIN(D, N); ii++ )
+            {
+                origin_[ii] = (coord_type)pr[ii];
+            }
+        }
+        else
+        {
+            double* pr = static_cast<double*>(mxGetData(aMx));
+
+            for ( ii=0; ii<GT_MIN(D, N); ii++ )
+            {
+                origin_[ii] = (coord_type)pr[ii];
+            }
+        }
+
+        aMx = mxGetField(header, 0, header_fields_[1]);
+        N = mxGetNumberOfElements(aMx);
+
+        if ( mxIsSingle(aMx) )
+        {
+            float* pr = static_cast<float*>(mxGetData(aMx));
+
+            for ( ii=0; ii<GT_MIN(D, N); ii++ )
+            {
+                pixelSize_[ii] = (coord_type)pr[ii];
+            }
+        }
+        else
+        {
+            double* pr = static_cast<double*>(mxGetData(aMx));
+
+            for ( ii=0; ii<GT_MIN(D, N); ii++ )
+            {
+                pixelSize_[ii] = (coord_type)pr[ii];
+            }
+        }
+
+        aMx = mxGetField(header, 0, header_fields_[2]);
+
+        if ( mxIsSingle(aMx) )
+        {
+            float* pr = static_cast<float*>(mxGetData(aMx));
+
+            for ( jj=0; jj<GT_MIN(D, N); jj++ )
+            {
+                for ( ii=0; ii<GT_MIN(D, N); ii++ )
+                {
+                    axis_[jj][ii] = (coord_type)pr[jj + ii*D];
+                }
+            }
+        }
+        else
+        {
+            double* pr = static_cast<double*>(mxGetData(aMx));
+
+            for ( jj=0; jj<GT_MIN(D, N); jj++ )
+            {
+                for ( ii=0; ii<GT_MIN(D, N); ii++ )
+                {
+                    axis_[jj][ii] = (coord_type)pr[jj + ii*D];
+                }
+            }
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in gtMatlabImageHeader<T, D>::fromMatlab(const mxArray* header) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+}
diff --git a/toolboxes/gtplus/solver/gtPlusLSQRSolver.h b/toolboxes/gtplus/solver/gtPlusLSQRSolver.h
index 56b1fd8..bed2eb4 100644
--- a/toolboxes/gtplus/solver/gtPlusLSQRSolver.h
+++ b/toolboxes/gtplus/solver/gtPlusLSQRSolver.h
@@ -61,7 +61,7 @@ gtPlusLSQRSolver<Array_Type_I, Array_Type_O, Oper_Type>::
 gtPlusLSQRSolver() : BaseClass()
 {
     iterMax_ = 70;
-    thres_ = 1e-4;
+    thres_ = (value_type)1e-4;
 }
 
 template <typename Array_Type_I, typename Array_Type_O, typename Oper_Type>
@@ -93,7 +93,7 @@ solve(const Array_Type_I& b, Array_Type_O& x)
         // u = u - A(x, varargin{:}, 'notransp');
         // u = b - A*x0
         GADGET_CHECK_RETURN_FALSE(oper_->forwardOperator(x, u));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::subtract(b, u, u));
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::subtract(b, u, u));
 
         value_type beta;
         Gadgetron::norm2(u, beta);
@@ -101,25 +101,25 @@ solve(const Array_Type_I& b, Array_Type_O& x)
         value_type normr(beta);
         if (std::abs(beta)>0)
         {
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::scal( value_type(1.0)/beta, u));
+            GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::scal( value_type(1.0)/beta, u));
         }
 
-        double c = 1;
-        double s = 0;
+        value_type c = 1;
+        value_type s = 0;
         value_type phibar = beta;
 
         // v = A(u, varargin{:},'transp');
-        Array_Type_I v(b);
+        Array_Type_I v(x);
         GADGET_CHECK_RETURN_FALSE(oper_->adjointOperator(u, v));
 
         value_type alpha;
         Gadgetron::norm2(v, alpha);
         if (std::abs(alpha)>0)
         {
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::scal( value_type(1.0)/alpha, v));
+            GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::scal( value_type(1.0)/alpha, v));
         }
 
-        Array_Type_I d(b);
+        Array_Type_I d(x);
         Gadgetron::clear(d);
 
         value_type normar;
@@ -140,7 +140,7 @@ solve(const Array_Type_I& b, Array_Type_O& x)
 
         // loop over maxit iterations (unless convergence or failure)
 
-        Array_Type_I z(v), dtmp(d), ztmp(d), vt(v);
+        Array_Type_I z(v), dtmp(d), ztmp(v), vt(v), utmp(u);
         Array_Type_I normaVec(3);
 
         value_type thet, rhot, rho, phi, tmp, tmp2;
@@ -152,12 +152,12 @@ solve(const Array_Type_I& b, Array_Type_O& x)
             memcpy(z.begin(), v.begin(), v.get_number_of_bytes());
 
             // u = A(z, varargin{:},'notransp') - alpha*u;
-            GADGET_CHECK_RETURN_FALSE(oper_->forwardOperator(z, dtmp));
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::scal( alpha, u));
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::subtract( dtmp, u, u));
+            GADGET_CHECK_RETURN_FALSE(oper_->forwardOperator(z, utmp));
+            Gadgetron::scal( alpha, u);
+            Gadgetron::subtract( utmp, u, u);
 
             Gadgetron::norm2(u, beta);
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::scal( value_type(1.0)/beta, u));
+            Gadgetron::scal( value_type(1.0)/beta, u);
 
             normaVec(0) = norma;
             normaVec(1) = alpha;
@@ -180,9 +180,9 @@ solve(const Array_Type_I& b, Array_Type_O& x)
             // d = (z - thet * d) / rho;
             //dtmp = d;
             memcpy(dtmp.begin(), d.begin(), d.get_number_of_bytes());
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::scal( thet, dtmp));
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::subtract( z, dtmp, ztmp));
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::scal( value_type(1.0)/rho, ztmp));
+            Gadgetron::scal( thet, dtmp);
+            Gadgetron::subtract( z, dtmp, ztmp);
+            Gadgetron::scal( value_type(1.0)/rho, ztmp);
             //d = ztmp;
             memcpy(d.begin(), ztmp.begin(), d.get_number_of_bytes());
 
@@ -230,21 +230,21 @@ solve(const Array_Type_I& b, Array_Type_O& x)
             // x = x + phi * d;
             //dtmp = d;
             memcpy(dtmp.begin(), d.begin(), d.get_number_of_bytes());
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::scal( phi, dtmp));
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::add( x, dtmp, x));
+            Gadgetron::scal( phi, dtmp);
+            Gadgetron::add( x, dtmp, x);
 
-            normr = std::abs( (double)s) * normr;
+            normr = (value_type)(std::abs( (double)s) * normr);
 
             // vt = A(u, varargin{:},'transp');
             GADGET_CHECK_RETURN_FALSE(oper_->adjointOperator(u, vt));
 
             // v = vt - beta * v;
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::scal( beta, v));
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::subtract( vt, v, v));
+            Gadgetron::scal( beta, v);
+            Gadgetron::subtract( vt, v, v);
 
             Gadgetron::norm2(v, alpha);
 
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::scal( value_type(1.0)/alpha, v));
+            Gadgetron::scal( value_type(1.0)/alpha, v);
 
             normar = alpha * std::abs( (value_type)s * phi);
         }
diff --git a/toolboxes/gtplus/solver/gtPlusLinearSolver.h b/toolboxes/gtplus/solver/gtPlusLinearSolver.h
index 00cb5c3..49ccdc1 100644
--- a/toolboxes/gtplus/solver/gtPlusLinearSolver.h
+++ b/toolboxes/gtplus/solver/gtPlusLinearSolver.h
@@ -17,6 +17,7 @@ public:
     typedef gtPlusSolver<Array_Type_I, Array_Type_O> BaseClass;
 
     typedef typename BaseClass::ValueType ValueType;
+    typedef typename realType<ValueType>::Type value_type;
 
     gtPlusLinearSolver();
     virtual ~gtPlusLinearSolver();
@@ -32,7 +33,7 @@ public:
     size_t iterMax_;
 
     // threshold for detla change of residual
-    double thres_;
+    value_type thres_;
 
     // initial guess for the solver
     Array_Type_O* x0_;
diff --git a/toolboxes/gtplus/solver/gtPlusNCGSolver.h b/toolboxes/gtplus/solver/gtPlusNCGSolver.h
index 4055bf6..400f0f8 100644
--- a/toolboxes/gtplus/solver/gtPlusNCGSolver.h
+++ b/toolboxes/gtplus/solver/gtPlusNCGSolver.h
@@ -24,7 +24,7 @@ public:
 
     typedef gtPlusNonLinearSolver<Array_Type_I, Array_Type_O, Oper_Type> BaseClass;
     typedef typename BaseClass::ValueType ValueType;
-    typedef typename realType<ValueType>::Type value_type;
+    typedef typename BaseClass::value_type value_type;
     typedef typename BaseClass::Oper_Elem_Type Oper_Elem_Type;
     typedef typename BaseClass::Oper_List_Type Oper_List_Type;
 
@@ -42,26 +42,26 @@ public:
     size_t iterMax_;
 
     /// threshold for detla change of gradient
-    double gradThres_;
+    value_type gradThres_;
 
     /// threshold for detla change of objective function
-    double objThres_;
+    value_type objThres_;
 
     /// scale factor of initial step size of linear search 
-    double beta_;
+    value_type beta_;
 
     /// initial step size of linear search
-    double t0_;
+    value_type t0_;
 
     /// number of max linear search iterations (secant linear search)
     size_t secantIterMax_;
 
     /// gradient threshold for secant linear search
-    double secantThres_;
+    value_type secantThres_;
 
     /// sometimes the secantThres can increase during line search
     /// the maximal allowed secantThres increments compared to previous secant iteration
-    double secantRatio_;
+    value_type secantRatio_;
 
     /// initial guess for the solver
     Array_Type_O* x0_;
@@ -90,12 +90,12 @@ gtPlusNCGSolver<Array_Type_I, Array_Type_O, Oper_Type>::
 gtPlusNCGSolver() : BaseClass()
 {
     iterMax_ = 10;
-    gradThres_ = 1e-4;
-    objThres_ = 0.1;
-    beta_ = 0.5;
-    t0_ = 2.0;
+    gradThres_ = (value_type)1e-4;
+    objThres_ = (value_type)0.1;
+    beta_ = (value_type)0.5;
+    t0_ = (value_type)2.0;
     secantIterMax_ = 10;
-    secantThres_ = 1e-3;
+    secantThres_ = (value_type)1e-3;
     secantRatio_ = 2;
 }
 
@@ -118,13 +118,13 @@ grad(const Array_Type_I& x, Array_Type_I& g)
         if ( N == 0 ) return true;
 
         GADGET_CHECK_RETURN_FALSE(operList_[0].first->grad(x, g));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::scal(operList_[0].second, g));
+        Gadgetron::scal(operList_[0].second, g);
 
         for ( size_t op=1; op<N; op++ )
         {
             GADGET_CHECK_RETURN_FALSE(operList_[op].first->grad(x, gradBuf_));
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::scal(operList_[op].second, gradBuf_));
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::add(gradBuf_, g, g));
+            Gadgetron::scal(operList_[op].second, gradBuf_);
+            Gadgetron::add(gradBuf_, g, g);
         }
     }
     catch(...)
@@ -176,8 +176,6 @@ solve(const Array_Type_I& /*b*/, Array_Type_O& x)
     {
         if ( operList_.empty() ) return true;
 
-        value_type v;
-
         // initial gradient
         Array_Type_I g0(*x0_);
         GADGET_CHECK_RETURN_FALSE(this->grad(*x0_, g0));
@@ -187,7 +185,7 @@ solve(const Array_Type_I& /*b*/, Array_Type_O& x)
 
         // dx = -g0;
         Array_Type_I dx(g0);
-        GADGET_CHECK_RETURN_FALSE( Gadgetron::scal( (value_type)(-1), dx ) );
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE( Gadgetron::scal( (value_type)(-1), dx ) );
 
         //Gadgetron::norm2(dx, v); GADGET_MSG(v);
 
@@ -215,11 +213,25 @@ solve(const Array_Type_I& /*b*/, Array_Type_O& x)
         }
 
         unsigned int numOfTries = 0;
-        while ( (std::abs(currF.real() - oriF.real())/currF.real() < 0.05) && (numOfTries < 3) )
+
+        value_type changeRatio = std::abs(currF.real() - oriF.real())/currF.real();
+        value_type changeRatio2 = std::abs(currF.real() - oriF.real())/oriF.real();
+        value_type minChangeRatio = (value_type)0.05;
+        value_type maxChangeRatio = (value_type)6.0;
+        unsigned int maxNumOfTries = 4;
+
+        while ( ( (changeRatio<minChangeRatio)||(changeRatio2>maxChangeRatio) ) && (numOfTries < maxNumOfTries) )
         {
             numOfTries++;
 
-            t0 /= beta_;
+            if ( changeRatio<minChangeRatio )
+            {
+                t0 /= beta_;
+            }
+            else if ( changeRatio2>maxChangeRatio )
+            {
+                t0 *= beta_;
+            }
 
             dxTmp = dx;
             Gadgetron::scal(t0, dxTmp);
@@ -229,6 +241,9 @@ solve(const Array_Type_I& /*b*/, Array_Type_O& x)
 
             GADGET_MSG("t0 is " << t0 << " ... ");
             GADGET_MSG("To determine t0, --- ori and curr obj: " << oriF << " - " << currF << " ... ");
+
+            changeRatio = std::abs(currF.real() - oriF.real())/currF.real();
+            changeRatio2 = std::abs(currF.real() - oriF.real())/oriF.real();
         }
 
         prevF = oriF;
@@ -308,7 +323,7 @@ solve(const Array_Type_I& /*b*/, Array_Type_O& x)
             // Fletcher - Reeves updates
             Gadgetron::dotc(g1, g1, v1);
             Gadgetron::dotc(g0, g0, v2);
-            bk = v1.real()/(v2.real()+DBL_EPSILON);
+            bk = (value_type)(v1.real()/(v2.real()+DBL_EPSILON));
 
             g0 = g1;
 
diff --git a/toolboxes/gtplus/solver/gtPlusNonLinearSolver.h b/toolboxes/gtplus/solver/gtPlusNonLinearSolver.h
index ba31d33..fed39a8 100644
--- a/toolboxes/gtplus/solver/gtPlusNonLinearSolver.h
+++ b/toolboxes/gtplus/solver/gtPlusNonLinearSolver.h
@@ -17,6 +17,7 @@ public:
     typedef gtPlusSolver<Array_Type_I, Array_Type_O> BaseClass;
 
     typedef typename BaseClass::ValueType ValueType;
+    typedef typename realType<ValueType>::Type value_type;
 
     // one operator is related to a weight
     typedef std::pair<Oper_Type*, ValueType> Oper_Elem_Type;
diff --git a/toolboxes/gtplus/solver/gtPlusSolver.h b/toolboxes/gtplus/solver/gtPlusSolver.h
index d4d9e79..6e73c56 100644
--- a/toolboxes/gtplus/solver/gtPlusSolver.h
+++ b/toolboxes/gtplus/solver/gtPlusSolver.h
@@ -5,7 +5,7 @@
 
 #pragma once
 
-#include "ismrmrd.h"
+#include "ismrmrd/ismrmrd.h"
 #include "GadgetronTimer.h"
 #include "gtPlusISMRMRDReconUtil.h"
 #include "gtPlusIOAnalyze.h"
diff --git a/toolboxes/gtplus/ut/CMakeLists.txt b/toolboxes/gtplus/ut/CMakeLists.txt
index 44319d1..bcad9e3 100644
--- a/toolboxes/gtplus/ut/CMakeLists.txt
+++ b/toolboxes/gtplus/ut/CMakeLists.txt
@@ -4,23 +4,22 @@ if(WIN32)
     link_directories(${Boost_LIBRARY_DIRS})
 endif(WIN32)
 
-include_directories( ${GTEST_INCLUDE_DIRS} 
+include_directories( ${GTEST_INCLUDE_DIRS}
+                     ${CMAKE_BINARY_DIR}/apps/gadgetron
                      ${CMAKE_SOURCE_DIR}/gadgets/core 
                      ${ACE_INCLUDE_DIR} 
                      ${Boost_INCLUDE_DIR}
                      ${FFTW3_INCLUDE_DIR}
                      ${ISMRMRD_INCLUDE_DIR}
-                     ${ISMRMRD_XSD_INCLUDE_DIR}
-                     ${XSD_INCLUDE_DIR}
-                     ${XERCESC_INCLUDE_DIR}
                      ${CMAKE_SOURCE_DIR}/dependencies/tinyxml
                      ${CMAKE_SOURCE_DIR}/toolboxes/mri/pmri/gpu
                      ${CMAKE_SOURCE_DIR}/toolboxes/nfft/gpu
                      ${CMAKE_SOURCE_DIR}/toolboxes/core
                      ${CMAKE_SOURCE_DIR}/toolboxes/core/gpu
                      ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu
+                     ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/image
                      ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/hostutils
-                     ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/arma_math
+                     ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/math
                      ${CMAKE_SOURCE_DIR}/toolboxes/operators
                      ${CMAKE_SOURCE_DIR}/toolboxes/operators/cpu
                      ${CMAKE_SOURCE_DIR}/toolboxes/solvers
@@ -30,45 +29,32 @@ include_directories( ${GTEST_INCLUDE_DIRS}
                      ${CMAKE_SOURCE_DIR}/toolboxes/core
                      ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu
                      ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/hostutils
-                     ${CMAKE_SOURCE_DIR}/toolboxes/gadgettools )
-
-if (MKL_FOUND)
-    MESSAGE("MKL Found for gtPlus ... ")
-    ADD_DEFINITIONS(-DUSE_MKL)
-    list(APPEND EXTRA_MKL_LIBRARIES mkl_core)
-    if ( USE_OPENMP )
-        list(APPEND EXTRA_MKL_LIBRARIES mkl_intel_thread)
-    endif ( USE_OPENMP )
-
-    INCLUDE_DIRECTORIES( ${MKL_INCLUDE_DIR} )
-    LINK_DIRECTORIES( ${MKL_LIB_DIR} ${MKL_COMPILER_LIB_DIR} )
-endif (MKL_FOUND)
+                     ${CMAKE_SOURCE_DIR}/toolboxes/gadgettools 
+                     ${CMAKE_SOURCE_DIR}/toolboxes/registration/optical_flow
+                     ${CMAKE_SOURCE_DIR}/toolboxes/registration/optical_flow/cpu
+                     ${CMAKE_SOURCE_DIR}/toolboxes/gadgettools
+                     ${CMAKE_SOURCE_DIR}/toolboxes/gadgettools/ismrmrd )
 
 link_libraries(optimized ${ACE_LIBRARIES} debug ${ACE_DEBUG_LIBRARY} 
                 ${GTEST_LIBRARIES} 
                 ${Boost_LIBRARIES} 
                 ${ISMRMRD_LIBRARIES} 
-                ${MKL_LIBRARIES} 
-                cpucore 
-                cpucore_math 
-                gtplus 
-                gadgettools 
+                gadgetron_toolbox_cpucore 
+                gadgetron_toolbox_cpucore_math 
+                # cpureg
+                gadgetron_toolbox_cpufft
+                gadgetron_toolbox_gtplus 
+                gadgetron_toolbox_gadgettools 
                 gadgetronPlus)
 
 if (CUDA_FOUND)
-    link_libraries(gtplus gpuparallelmri gpucore)
+    link_libraries(gadgetron_toolbox_gtplus gadgetron_toolbox_gpuparallelmri gadgetron_toolbox_gpucore)
 endif (CUDA_FOUND)
 
-add_executable(gtplus_ut_util 
-    gtplus_ut.cpp 
-    util_test.cpp )
-
-add_executable(gtplus_ut_grappa 
-    gtplus_ut.cpp 
-    grappa_test.cpp )
-
-add_executable(gtplus_ut_spirit 
-    gtplus_ut.cpp 
-    spirit_test.cpp )
+if (CUDA_FOUND)
+    add_executable(gtplus_ut_grappa 
+        gtplus_ut.cpp 
+        grappa_test.cpp )
+endif (CUDA_FOUND)
 
 #add_test(gtplus_ut gtplus_ut_util)
diff --git a/toolboxes/gtplus/ut/grappa_test.cpp b/toolboxes/gtplus/ut/grappa_test.cpp
index 65a61a1..d6ad7c1 100644
--- a/toolboxes/gtplus/ut/grappa_test.cpp
+++ b/toolboxes/gtplus/ut/grappa_test.cpp
@@ -4,8 +4,7 @@
 #endif // USE_OMP
 
 #include "Gadget.h"
-#include "ismrmrd.h"
-#include "hoNDArray_elemwise.h"
+#include "ismrmrd/ismrmrd.h"
 #include "complext.h"
 
 #include <gtest/gtest.h>
@@ -94,14 +93,14 @@ TYPED_TEST_CASE(gtPlus_grappa_Test, cpfloatImplementations);
 
 TYPED_TEST(gtPlus_grappa_Test, reconWorker2DTGRAPPA_SNRUnit)
 {
-    typedef GT_Complex8 T;
+    typedef std::complex<float> T;
 
     gtPlusIOAnalyze gt_io;
 
     float v;
 
     // image data
-    hoNDArray<GT_Complex8> data;
+    hoNDArray<std::complex<float> > data;
     gt_io.importArrayComplex(data, this->gtPluse_ut_data_folder_ + "StdandardDataR2_Kspace_real", 
         this->gtPluse_ut_data_folder_ + "StdandardDataR2_Kspace_imag");
     data.print(std::cout);
@@ -121,7 +120,7 @@ TYPED_TEST(gtPlus_grappa_Test, reconWorker2DTGRAPPA_SNRUnit)
     unsigned long long SET = 1;
     unsigned long long SEG = 1;
 
-    hoNDArray<GT_Complex8> kspace(RO, E1, CHA, SLC, E2, CON, PHS);
+    hoNDArray<std::complex<float> > kspace(RO, E1, CHA, SLC, E2, CON, PHS);
     memcpy(kspace.begin(), data.begin(), data.get_number_of_bytes());
 
     Gadgetron::norm2(kspace, v);
@@ -280,7 +279,7 @@ TYPED_TEST(gtPlus_grappa_Test, reconWorker2DTGRAPPA_SNRUnit)
 
 TYPED_TEST(gtPlus_grappa_Test, reconWorker2DTGRAPPA)
 {
-    typedef GT_Complex8 T;
+    typedef std::complex<float> T;
 
     gtPlusIOAnalyze gt_io;
 
@@ -297,12 +296,13 @@ TYPED_TEST(gtPlus_grappa_Test, reconWorker2DTGRAPPA)
     gt_io.importArray(imag_data, filename);
     imag_data.print(std::cout);
 
-    boost::shared_ptr< hoNDArray<GT_Complex8> > tmp = real_imag_to_complex<GT_Complex8>(&real_data, &imag_data);
+    hoNDArray<std::complex<float> > tmp;
+    Gadgetron::real_imag_to_complex<std::complex<float> >(real_data, imag_data, tmp);
 
-    unsigned long long RO = tmp->get_size(0);
-    unsigned long long E1 = tmp->get_size(1);
-    unsigned long long CHA = tmp->get_size(2);
-    unsigned long long PHS = tmp->get_size(3);
+    unsigned long long RO = tmp.get_size(0);
+    unsigned long long E1 = tmp.get_size(1);
+    unsigned long long CHA = tmp.get_size(2);
+    unsigned long long PHS = tmp.get_size(3);
 
     unsigned long long reconE1 = 120;
 
@@ -314,7 +314,7 @@ TYPED_TEST(gtPlus_grappa_Test, reconWorker2DTGRAPPA)
     unsigned long long SET = 1;
     unsigned long long SEG = 1;
 
-    hoNDArray<GT_Complex8> kspace(RO, E1, CHA, SLC, E2, CON, PHS, tmp->begin());
+    hoNDArray<std::complex<float> > kspace(RO, E1, CHA, SLC, E2, CON, PHS, tmp.begin());
 
     Gadgetron::norm2(kspace, v);
     GADGET_MSG("kspace = " << v);
@@ -331,7 +331,7 @@ TYPED_TEST(gtPlus_grappa_Test, reconWorker2DTGRAPPA)
     imag_ref.print(std::cout);
 
     hoNDArray<T> ref;
-    real_imag_to_complex<GT_Complex8>(real_ref, imag_ref, ref);
+    real_imag_to_complex<std::complex<float> >(real_ref, imag_ref, ref);
 
     Gadgetron::norm2(ref, v);
     GADGET_MSG("ref = " << v);
@@ -411,7 +411,7 @@ TYPED_TEST(gtPlus_grappa_Test, reconWorker2DTGRAPPA)
 
     workOrder->CalibMode_ = ISMRMRD_separate;
     workOrder->start_RO_ = 34;
-    workOrder->end_RO_ = RO-1;
+    workOrder->end_RO_ = (int)RO-1;
     workOrder->acceFactorE1_ = 4;
     workOrder->acceFactorE2_ = 1;
 
@@ -466,7 +466,7 @@ TYPED_TEST(gtPlus_grappa_Test, reconWorker2DTGRAPPA)
 
 TYPED_TEST(gtPlus_grappa_Test, grappa2D)
 {
-    typedef GT_Complex8 T;
+    typedef std::complex<float> T;
 
     gtPlusIOAnalyze gt_io;
 
@@ -483,14 +483,15 @@ TYPED_TEST(gtPlus_grappa_Test, grappa2D)
     gt_io.importArray(imag_data, filename);
     imag_data.print(std::cout);
 
-    boost::shared_ptr< hoNDArray<GT_Complex8> > tmp = real_imag_to_complex<GT_Complex8>(&real_data, &imag_data);
+    hoNDArray<std::complex<float> > tmp;
+    Gadgetron::real_imag_to_complex<std::complex<float> >(real_data, imag_data, tmp);
 
-    unsigned long long RO = tmp->get_size(0);
-    unsigned long long E1 = tmp->get_size(1);
-    unsigned long long CHA = tmp->get_size(2);
-    unsigned long long PHS = tmp->get_size(3);
+    unsigned long long RO = tmp.get_size(0);
+    unsigned long long E1 = tmp.get_size(1);
+    unsigned long long CHA = tmp.get_size(2);
+    unsigned long long PHS = tmp.get_size(3);
 
-    hoNDArray<GT_Complex8> kspace(RO, E1, CHA, PHS, tmp->begin());
+    hoNDArray<std::complex<float> > kspace(RO, E1, CHA, PHS, tmp.begin());
 
     // ref
     hoNDArray<float> real_ref;
@@ -504,17 +505,17 @@ TYPED_TEST(gtPlus_grappa_Test, grappa2D)
     imag_ref.print(std::cout);
 
     hoNDArray<T> ref;
-    real_imag_to_complex<GT_Complex8>(real_ref, imag_ref, ref);
+    real_imag_to_complex<std::complex<float> >(real_ref, imag_ref, ref);
 
     Gadgetron::norm2(ref, v);
     GADGET_MSG("ref = " << v);
 
     // recon
-    gtPlusISMRMRDReconUtil<GT_Complex8> util;
-    gtPlusISMRMRDReconUtilComplex<GT_Complex8> utilCplx;
+    gtPlusISMRMRDReconUtil<std::complex<float> > util;
+    gtPlusISMRMRDReconUtilComplex<std::complex<float> > utilCplx;
 
     // sum of square
-    hoNDArray<GT_Complex8> complexIm, sosIm;
+    hoNDArray<std::complex<float> > complexIm, sosIm;
 
     GadgetronTimer timer(false);
     timer.start("ifft2c");
@@ -527,7 +528,7 @@ TYPED_TEST(gtPlus_grappa_Test, grappa2D)
 
     hoNDArray<float> magSoS;
     timer.start("absolute");
-    Gadgetron::absolute(sosIm, magSoS);
+    Gadgetron::abs(sosIm, magSoS);
     timer.stop();
 
     filename = this->gtPluse_ut_res_folder_ + "SoS";
@@ -539,7 +540,7 @@ TYPED_TEST(gtPlus_grappa_Test, grappa2D)
     filename = this->gtPluse_ut_res_folder_ + "complexIm";
     gt_io.export3DArrayComplex(complexIm, filename);
 
-    hoNDArray<GT_Complex8> coilMap;
+    hoNDArray<std::complex<float> > coilMap;
     timer.start("coilMap2DNIH");
     utilCplx.coilMap2DNIH(complexIm, coilMap, ISMRMRD_SOUHEIL, 7, 3, 3, true);
     timer.stop();
@@ -571,7 +572,7 @@ TYPED_TEST(gtPlus_grappa_Test, grappa2D)
 
     ho5DArray<T> ker(kRO, kNE1, srcCHA, dstCHA, oE1.size());
     timer.start("grappa.calib");
-    grappa.grappa_.calib(acsSrc, acsDst, grappa_reg_lamda_, kRO, kE1, oE1, ker);
+    grappa.grappa_.calib(acsSrc, acsDst, grappa_reg_lamda_, (int)kRO, kE1, oE1, ker);
     timer.stop();
 
     Gadgetron::norm2(ker, v);
@@ -580,7 +581,7 @@ TYPED_TEST(gtPlus_grappa_Test, grappa2D)
 
     ho4DArray<T> kIm(RO, E1, srcCHA, dstCHA);
     timer.start("grappa.imageDomainKernel");
-    grappa.grappa_.imageDomainKernel(ker, kRO, kE1, oE1, RO, E1, kIm);
+    grappa.grappa_.imageDomainKernel(ker, (int)kRO, kE1, oE1, (int)RO, (int)E1, kIm);
     timer.stop();
     gt_io.exportArrayComplex(kIm, this->gtPluse_ut_res_folder_ + "kIm");
 
diff --git a/toolboxes/gtplus/ut/spirit_test.cpp b/toolboxes/gtplus/ut/spirit_test.cpp
deleted file mode 100644
index 554285f..0000000
--- a/toolboxes/gtplus/ut/spirit_test.cpp
+++ /dev/null
@@ -1,425 +0,0 @@
-
-#ifdef USE_OMP
-#include "omp.h"
-#endif // USE_OMP
-
-#include "Gadget.h"
-#include "ismrmrd.h"
-#include "hoNDArray_elemwise.h"
-#include "complext.h"
-
-#include <gtest/gtest.h>
-
-#include "hoNDArray_utils.h"
-
-#include "gtPlusIOAnalyze.h"
-#include "gtPlusISMRMRDReconUtil.h"
-// #include "gtPlusISMRMRDReconWorkOrder.h"
-#include "gtPlusISMRMRDReconWorker2DTGRAPPA.h"
-#include "gtPlusISMRMRDReconWorker2DTSPIRIT.h"
-#include "gtPlusISMRMRDReconWorker2DTL1SPIRITNCG.h"
-#include "gtPlusISMRMRDReconWorker3DTSPIRIT.h"
-#include "gtPlusISMRMRDReconWorkFlowCartesian2DT.h"
-#include "gtPlusISMRMRDReconWorkFlowCartesian3DT.h"
-#include "gtPlusMemoryManager.h"
-#include "hoNDArrayMemoryManaged.h"
-#include "gtPlusSPIRIT2DOperator.h"
-#include "gtPlusSPIRIT2DTOperator.h"
-#include "gtPlusSPIRIT3DOperator.h"
-#include "gtPlusSPIRITNoNullSpace2DOperator.h"
-#include "gtPlusSPIRITNoNullSpace2DTOperator.h"
-#include "gtPlusSPIRITNoNullSpace3DOperator.h"
-#include "gtPlusLSQRSolver.h"
-#include "gtPlusNCGSolver.h"
-#include "gtPlusWavelet2DOperator.h"
-#include "gtPlusWavelet3DOperator.h"
-#include "gtPlusWaveletNoNullSpace2DOperator.h"
-#include "gtPlusWaveletNoNullSpace3DOperator.h"
-#include "gtPlusDataFidelityOperator.h"
-#include "gtPlusISMRMRDReconWorkFlowCartesian3DT.h"
-#include "gtPlusISMRMRDReconWorker3DTGRAPPA.h"
-#include "gtPlusISMRMRDReconWorker3DTNoAcceleration.h"
-#include "gtPlusISMRMRDReconWorker3DTSPIRIT.h"
-#include "gtPlusISMRMRDReconWorker3DTL1SPIRITNCG.h"
-#include "gtPlusMemoryManager.h"
-
-#include "GadgetronTimer.h"
-
-#include <boost/thread/mutex.hpp>
-
-#ifdef max
-#undef max
-#endif // max
-
-using namespace Gadgetron;
-using namespace Gadgetron::gtPlus;
-using testing::Types;
-
-template <typename T> class gtPlus_spirit_Test : public ::testing::Test 
-{
-protected:
-    virtual void SetUp()
-    {
-        GADGET_MSG("=============================================================================================");
-        gtPluse_ut_folder_ = std::string(::getenv("GTPLUS_UNITTEST_DIRECTORY"));
-        GADGET_MSG("=============================================================================================");
-        GADGET_MSG("Unit Test for GtPlus");
-        gtPluse_ut_data_folder_ = gtPluse_ut_folder_ + "/data/";
-        gtPluse_ut_res_folder_ = gtPluse_ut_folder_ + "/result/";
-        GADGET_MSG("gtPluse_ut_data_folder_ is " << gtPluse_ut_data_folder_);
-        GADGET_MSG("gtPluse_ut_res_folder_ is " << gtPluse_ut_res_folder_);
-
-        timer_.set_timing_in_destruction(false);
-
-#ifdef WIN32
-    #ifdef USE_OMP
-        /// lock the threads
-        #pragma omp parallel default(shared)
-        {
-            int tid = omp_get_thread_num();
-            // std::cout << tid << std::endl;
-            DWORD_PTR mask = (1 << tid);
-            SetThreadAffinityMask( GetCurrentThread(), mask );
-        }
-    #endif // USE_OMP
-#endif // WIN32
-    }
-
-    std::string gtPluse_ut_folder_;
-    std::string gtPluse_ut_data_folder_;
-    std::string gtPluse_ut_res_folder_;
-
-    gtPlusIOAnalyze gt_io_;
-    gtPlusISMRMRDReconUtil<T> util_;
-    gtPlusISMRMRDReconUtilComplex<T> utilCplx_;
-    GadgetronTimer timer_;
-};
-
-typedef Types<float, double> realImplementations;
-
-typedef Types< std::complex<float> > cpfloatImplementations;
-
-typedef Types<std::complex<float>, std::complex<double>, float_complext, double_complext> cplxImplementations;
-typedef Types<std::complex<float>, std::complex<double> > stdCplxImplementations;
-typedef Types<float_complext, double_complext> cplxtImplementations;
-
-TYPED_TEST_CASE(gtPlus_spirit_Test, cpfloatImplementations);
-
-TYPED_TEST(gtPlus_spirit_Test, reconWorker2DTSPIRIT)
-{
-    typedef GT_Complex8 T;
-
-    gtPlusIOAnalyze gt_io;
-
-    float v;
-
-    // image data
-    hoNDArray<float> real_data;
-    std::string filename = this->gtPluse_ut_data_folder_ + "underSampledKSpace_real";
-    gt_io.importArray(real_data, filename);
-    real_data.print(std::cout);
-
-    hoNDArray<float> imag_data;
-    filename = this->gtPluse_ut_data_folder_ + "underSampledKSpace_imag";
-    gt_io.importArray(imag_data, filename);
-    imag_data.print(std::cout);
-
-    boost::shared_ptr< hoNDArray<GT_Complex8> > tmp = real_imag_to_complex<GT_Complex8>(&real_data, &imag_data);
-
-    unsigned long long RO = tmp->get_size(0);
-    unsigned long long E1 = tmp->get_size(1);
-    unsigned long long CHA = tmp->get_size(2);
-    unsigned long long PHS = tmp->get_size(3);
-
-    unsigned long long reconE1 = 120;
-
-    // [Ro E1 Cha Slice E2 Con Phase Rep Set Seg]
-    unsigned long long SLC = 1;
-    unsigned long long E2 = 1;
-    unsigned long long CON = 1;
-    unsigned long long REP = 1;
-    unsigned long long SET = 1;
-    unsigned long long SEG = 1;
-
-    hoNDArray<GT_Complex8> kspace(RO, E1, CHA, SLC, E2, CON, PHS, tmp->begin());
-
-    Gadgetron::norm2(kspace, v);
-    GADGET_MSG("kspace = " << v);
-
-    // ref
-    hoNDArray<float> real_ref;
-    filename = this->gtPluse_ut_data_folder_ + "ref_real";
-    gt_io.importArray(real_ref, filename);
-    real_ref.print(std::cout);
-
-    hoNDArray<float> imag_ref;
-    filename = this->gtPluse_ut_data_folder_ + "ref_imag";
-    gt_io.importArray(imag_ref, filename);
-    imag_ref.print(std::cout);
-
-    hoNDArray<T> ref;
-    real_imag_to_complex<GT_Complex8>(real_ref, imag_ref, ref);
-
-    Gadgetron::norm2(ref, v);
-    GADGET_MSG("ref = " << v);
-
-    // call the recon
-    typedef std::complex<float> ValueType;
-    typedef Gadgetron::gtPlus::gtPlusReconWorkOrder2DT<ValueType> WorkOrderType;
-    typedef std::pair<Gadgetron::gtPlus::ISMRMRDDIM, unsigned long long> DimensionRecordType;
-
-    WorkOrderType* workOrder = new WorkOrderType;
-
-    workOrder->data_ = kspace;
-    workOrder->ref_ = ref;
-
-    boost::shared_ptr< std::vector<size_t> > dims = workOrder->data_.get_dimensions();
-
-    GADGET_MSG("[Ro E1 Cha Slice E2 Con Phase Rep Set Seg] = [" 
-        << (*dims)[0] << " " << (*dims)[1] << " " << (*dims)[2] << " " << (*dims)[3] << " " << (*dims)[4] 
-        << " " << (*dims)[5] << " " << (*dims)[6] << " " << 1 << " " << 1 << " " << 1 << "]");
-
-    std::vector<size_t> dimensions_ = *dims;
-
-        // work flow
-    Gadgetron::gtPlus::gtPlusISMRMRDReconWorkFlowCartesian2DT<ValueType> workflow_;
-
-    // worker
-    Gadgetron::gtPlus::gtPlusReconWorker2DTSPIRIT<ValueType> worker_spirit_;
-
-    // parameters
-    Gadgetron::gtPlus::ISMRMRDDIM dim_4th_ = DIM_Phase;
-    Gadgetron::gtPlus::ISMRMRDDIM dim_5th_ = DIM_Slice;
-    Gadgetron::gtPlus::ISMRMRDDIM workOrder_ShareDim_ = DIM_NONE;
-
-    bool interleaved_same_combinationcoeff_allS_ = false;
-    int interleaved_whichS_combinationcoeff_ = 0;
-
-    bool embedded_averageall_ref_ = false;
-    bool embedded_fullres_coilmap_ = true;
-    bool embedded_same_combinationcoeff_allS_ = false;
-    int embedded_whichS_combinationcoeff_ = 0;
-    bool embedded_ref_fillback_ = true;
-
-    bool separate_averageall_ref_ = false;
-    bool separate_fullres_coilmap_ = true;
-    bool separate_same_combinationcoeff_allS_ = false;
-    int separate_whichS_combinationcoeff_ = 0;
-
-    bool same_coil_compression_coeff_allS_ = true;
-    bool downstream_coil_compression_ = true;
-    double coil_compression_thres_ = 1e-3;
-    int coil_compression_num_modesKept_ = -1;
-
-    unsigned long long csm_kSize_ = 7;
-    unsigned long long csm_powermethod_num_ = 3;
-
-    Gadgetron::gtPlus::ISMRMRDALGO recon_algorithm_ = ISMRMRD_SPIRIT;
-    bool recon_kspace_needed_ = true;
-
-    unsigned long long spirit_kSize_RO_ = 5;
-    unsigned long long spirit_kSize_E1_ = 5;
-    unsigned long long spirit_kSize_E2_ = 5;
-
-    double spirit_reg_lamda_ = 0.005;
-    unsigned long long spirit_iter_max_ = 100;
-    double spirit_iter_thres_ = 1e-5;
-
-    // recon
-    workflow_.setDataArray(kspace);
-    workflow_.setRefArray(ref);
-
-    Gadgetron::norm2(workOrder->data_, v); GADGET_MSG("workOrder->data_ = " << v);
-    Gadgetron::norm2(workOrder->ref_, v); GADGET_MSG("workOrder->ref_ = " << v);
-
-    workflow_.reconSizeRO_ = RO;
-    workflow_.reconSizeE1_ = reconE1;
-    workflow_.reconSizeE2_ = 1;
-    // workflow_.dataDimStartingIndexes_ = workOrder->dataDimStartingIndexes_;
-    workflow_.dim4th_ = dim_4th_;
-    workflow_.dim5th_ = dim_5th_;
-
-    workOrder->CalibMode_ = ISMRMRD_separate;
-    workOrder->start_RO_ = 34;
-    workOrder->end_RO_ = RO-1;
-    workOrder->acceFactorE1_ = 4;
-    workOrder->acceFactorE2_ = 1;
-
-    workOrder->downstream_coil_compression_ = downstream_coil_compression_;
-    workOrder->coil_compression_thres_ = coil_compression_thres_;
-    workOrder->coil_compression_num_modesKept_ = coil_compression_num_modesKept_;
-    workOrder->csm_kSize_ = csm_kSize_;
-    workOrder->csm_powermethod_num_ = csm_powermethod_num_;;
-
-    workOrder->recon_algorithm_ = recon_algorithm_;
-
-    workOrder->spirit_kSize_RO_ = spirit_kSize_RO_;
-    workOrder->spirit_kSize_E1_ = spirit_kSize_E1_;
-    workOrder->spirit_kSize_E2_ = spirit_kSize_E2_;
-    workOrder->spirit_reg_lamda_ = spirit_reg_lamda_;
-    workOrder->spirit_iter_max_ = spirit_iter_max_;
-    workOrder->spirit_iter_thres_ = spirit_iter_thres_;
-
-    workOrder->recon_kspace_needed_ = recon_kspace_needed_;
-
-    if ( coil_compression_thres_>0 || coil_compression_num_modesKept_>0 )
-    {
-        workOrder->coil_compression_ = true;
-    }
-    else
-    {
-        workOrder->coil_compression_ = false;
-    }
-
-    workOrder->same_coil_compression_coeff_allS_ = same_coil_compression_coeff_allS_;
-    workOrder->embedded_averageall_ref_ = embedded_averageall_ref_;
-    workOrder->embedded_fullres_coilmap_ = embedded_fullres_coilmap_;
-    workOrder->embedded_same_combinationcoeff_allS_ = embedded_same_combinationcoeff_allS_;
-    workOrder->embedded_whichS_combinationcoeff_ = embedded_whichS_combinationcoeff_;
-    workOrder->embedded_ref_fillback_ = embedded_ref_fillback_;
-    workOrder->separate_averageall_ref_ = separate_averageall_ref_;
-    workOrder->separate_fullres_coilmap_ = separate_fullres_coilmap_;
-    workOrder->separate_same_combinationcoeff_allS_ = separate_same_combinationcoeff_allS_;
-    workOrder->separate_whichS_combinationcoeff_ = separate_whichS_combinationcoeff_;
-    workOrder->interleaved_same_combinationcoeff_allS_ = interleaved_same_combinationcoeff_allS_;
-    workOrder->interleaved_whichS_combinationcoeff_ = interleaved_whichS_combinationcoeff_;
-
-    worker_spirit_.performTiming_ = true;
-    worker_spirit_.debugFolder_ = this->gtPluse_ut_res_folder_;
-
-    workflow_.debugFolder_ = this->gtPluse_ut_res_folder_;
-    workflow_.worker_ = &worker_spirit_;
-    workflow_.workOrder_ = workOrder;
-
-    gt_io.exportArrayComplex(workflow_.workOrder_->ref_, this->gtPluse_ut_res_folder_+"ref");
-
-    boost::shared_ptr<Gadgetron::gtPlus::gtPlusMemoryManager> mem_manager_(new Gadgetron::gtPlus::gtPlusMemoryManager(4, 640*1024*1024));
-    worker_spirit_.gtPlus_mem_manager_ = mem_manager_;
-
-    workflow_.preProcessing();
-    workflow_.recon();
-    workflow_.postProcessing();
-
-    gt_io.exportArrayComplex(workflow_.res_, this->gtPluse_ut_res_folder_+"spirit2D_gtPlus_res");
-}
-
-TYPED_TEST(gtPlus_spirit_Test, testNCGSolver2DTSPIRIT_neuro_3by3)
-{
-    typedef GT_Complex8 T;
-
-    gtPlusIOAnalyze gt_io;
-
-    float v;
-
-    // image data
-    hoNDArray<float> real_data;
-    std::string filename = this->gtPluse_ut_data_folder_ + "Job2DT_kspace_ID6_REAL";
-    gt_io.importArray(real_data, filename);
-    real_data.print(std::cout);
-
-    hoNDArray<float> imag_data;
-    filename = this->gtPluse_ut_data_folder_ + "Job2DT_kspace_ID6_IMAG";
-    gt_io.importArray(imag_data, filename);
-    imag_data.print(std::cout);
-
-    boost::shared_ptr< hoNDArray<GT_Complex8> > kspace = real_imag_to_complex<GT_Complex8>(&real_data, &imag_data);
-
-    hoNDArray<float> real_ker;
-    filename = this->gtPluse_ut_data_folder_ + "Job2DT_ker_ID6_REAL";
-    gt_io.importArray(real_ker, filename);
-    real_ker.print(std::cout);
-
-    hoNDArray<float> imag_ker;
-    filename = this->gtPluse_ut_data_folder_ + "Job2DT_ker_ID6_IMAG";
-    gt_io.importArray(imag_ker, filename);
-    imag_ker.print(std::cout);
-
-    boost::shared_ptr< hoNDArray<GT_Complex8> > ker = real_imag_to_complex<GT_Complex8>(&real_ker, &imag_ker);
-
-    hoNDArray<float> real_kspaceLinear;
-    filename = this->gtPluse_ut_data_folder_ + "Job2DT_kspaceLinear_ID6_REAL";
-    gt_io.importArray(real_kspaceLinear, filename);
-    real_kspaceLinear.print(std::cout);
-
-    hoNDArray<float> imag_kspaceLinear;
-    filename = this->gtPluse_ut_data_folder_ + "Job2DT_kspaceLinear_ID6_IMAG";
-    gt_io.importArray(imag_kspaceLinear, filename);
-    imag_kspaceLinear.print(std::cout);
-
-    boost::shared_ptr< hoNDArray<GT_Complex8> > kspaceLinear = real_imag_to_complex<GT_Complex8>(&real_kspaceLinear, &imag_kspaceLinear);
-
-    Gadgetron::gtPlus::gtPlusReconWorker3DTL1SPIRITNCG<GT_Complex8> worker_spirit_L1_ncg_;
-    worker_spirit_L1_ncg_.performTiming_ = true;
-    worker_spirit_L1_ncg_.debugFolder_ = this->gtPluse_ut_res_folder_;
-
-    Gadgetron::gtPlus::gtPlusReconJob2DT< std::complex<float> > job;
-
-    job.kspace = *kspace;
-    job.ker = *ker;
-
-    job.workOrder2DT.CalibMode_ = ISMRMRD_embedded;
-    job.workOrder2DT.InterleaveDim_ = DIM_Phase;
-
-    job.workOrder2DT.acceFactorE1_ = 3;
-    job.workOrder2DT.acceFactorE2_ = 3;
-
-    job.workOrder2DT.kSpaceCenterRO_ = 128;
-    job.workOrder2DT.kSpaceCenterEncode1_ = 127;
-    job.workOrder2DT.kSpaceCenterEncode2_ = 96;
-
-    job.workOrder2DT.kSpaceMaxRO_ = 256;
-    job.workOrder2DT.kSpaceMaxEncode1_ = 255;
-    job.workOrder2DT.kSpaceMaxEncode2_ = 191;
-
-    job.workOrder2DT.recon_algorithm_ = ISMRMRD_L1SPIRIT;
-    job.workOrder2DT.recon_auto_parameters_ = false;
-
-    job.workOrder2DT.spirit_kSize_RO_ = 7;
-    job.workOrder2DT.spirit_kSize_E1_ = 7;
-    job.workOrder2DT.spirit_kSize_E2_ = 5;
-
-    job.workOrder2DT.spirit_reg_lamda_ = 0.01;
-    job.workOrder2DT.spirit_calib_over_determine_ratio_ = 15;
-
-    job.workOrder2DT.spirit_solve_symmetric_ = false;
-
-    job.workOrder2DT.spirit_iter_max_ = 100;
-    job.workOrder2DT.spirit_iter_thres_ = 0.005;
-    job.workOrder2DT.spirit_print_iter_ = true;
-
-    job.workOrder2DT.spirit_perform_linear_ = true;
-    job.workOrder2DT.spirit_perform_nonlinear_ = true;
-
-    job.workOrder2DT.spirit_parallel_imaging_lamda_ = 1;
-    job.workOrder2DT.spirit_image_reg_lamda_ = 0.0025;
-    job.workOrder2DT.spirit_data_fidelity_lamda_ = 0;
-
-    job.workOrder2DT.spirit_ncg_iter_max_ = 10;
-    job.workOrder2DT.spirit_ncg_iter_thres_ = 0.001;
-    job.workOrder2DT.spirit_ncg_print_iter_ = true;
-    job.workOrder2DT.spirit_ncg_scale_factor_ = 1;
-
-    job.workOrder2DT.spirit_use_coil_sen_map_ = false;
-    job.workOrder2DT.spirit_use_moco_enhancement_ = false;
-    job.workOrder2DT.spirit_recon_moco_images_ = false;
-
-    job.workOrder2DT.spirit_temporal_enhancement_ratio_ = 5;
-    job.workOrder2DT.spirit_2D_scale_per_chunk_ = false;
-
-    job.workOrder2DT.spirit_E2_enhancement_ratio_ = 1.0;
-    job.workOrder2DT.spirit_3D_scale_per_chunk_ = false;
-
-    bool succeed = true;
-    GADGET_START_TIMING_CONDITION(this->timer_, "Recon 2DT job ... ", true);
-
-    job.res = job.kspace;
-
-    worker_spirit_L1_ncg_.performUnwarppingImplROPermuted(&(job.workOrder2DT), job.kspace, job.ker, *job.workOrder2DT.coilMap_, job.res);
-    // worker_spirit_L1_ncg_.performUnwarppingImplROPermuted(&(job.workOrder2DT), job.kspace, job.ker, *job.workOrder2DT.coilMap_, *kspaceLinear, job.res);
-
-    // succeed = worker_spirit_L1_ncg_.performUnwarppingImpl(job);
-
-    GADGET_STOP_TIMING_CONDITION(this->timer_, true);
-
-    gt_io.exportArrayComplex(job.res, this->gtPluse_ut_res_folder_+"NCGSolver2DTSPIRIT_neuro_3by3_res");
-}
diff --git a/toolboxes/gtplus/ut/util_test.cpp b/toolboxes/gtplus/ut/util_test.cpp
deleted file mode 100644
index c36ed82..0000000
--- a/toolboxes/gtplus/ut/util_test.cpp
+++ /dev/null
@@ -1,1195 +0,0 @@
-
-#ifdef USE_OMP
-#include "omp.h"
-#endif // USE_OMP
-
-#include "Gadget.h"
-#include "Gadgetron.h"
-#include "ismrmrd.h"
-#include "hoNDArray_elemwise.h"
-#include "complext.h"
-
-#include <gtest/gtest.h>
-
-#include "hoNDArray_utils.h"
-
-#include "gtPlusIOAnalyze.h"
-#include "gtPlusISMRMRDReconUtil.h"
-// #include "gtPlusISMRMRDReconWorkOrder.h"
-#include "gtPlusISMRMRDReconWorker2DTGRAPPA.h"
-#include "gtPlusISMRMRDReconWorker2DTSPIRIT.h"
-#include "gtPlusISMRMRDReconWorker3DTSPIRIT.h"
-#include "gtPlusISMRMRDReconWorkFlowCartesian2DT.h"
-#include "gtPlusISMRMRDReconWorkFlowCartesian3DT.h"
-#include "gtPlusMemoryManager.h"
-#include "hoNDArrayMemoryManaged.h"
-#include "gtPlusSPIRIT2DOperator.h"
-#include "gtPlusSPIRIT2DTOperator.h"
-#include "gtPlusSPIRIT3DOperator.h"
-#include "gtPlusSPIRITNoNullSpace2DOperator.h"
-#include "gtPlusSPIRITNoNullSpace2DTOperator.h"
-#include "gtPlusSPIRITNoNullSpace3DOperator.h"
-#include "gtPlusNCGSolver.h"
-
-#include "GadgetronTimer.h"
-
-#include <boost/thread/mutex.hpp>
-
-#ifdef max
-#undef max
-#endif // max
-
-using namespace Gadgetron;
-using namespace Gadgetron::gtPlus;
-using testing::Types;
-
-template <typename T> class gtPlus_IO_Test : public ::testing::Test 
-{
-protected:
-    virtual void SetUp()
-    {
-        GADGET_MSG("=============================================================================================");
-        gtPluse_ut_folder_ = std::string(::getenv("GTPLUS_UNITTEST_DIRECTORY"));
-        GADGET_MSG("=============================================================================================");
-        GADGET_MSG("Unit Test for GtPlus");
-        gtPluse_ut_data_folder_ = gtPluse_ut_folder_ + "/data/";
-        gtPluse_ut_res_folder_ = gtPluse_ut_folder_ + "/result/";
-        GADGET_MSG("gtPluse_ut_data_folder_ is " << gtPluse_ut_data_folder_);
-        GADGET_MSG("gtPluse_ut_res_folder_ is " << gtPluse_ut_res_folder_);
-
-        timer_.set_timing_in_destruction(false);
-
-#ifdef WIN32
-    #ifdef USE_OMP
-        /// lock the threads
-        #pragma omp parallel default(shared)
-        {
-            int tid = omp_get_thread_num();
-            // std::cout << tid << std::endl;
-            DWORD_PTR mask = (1 << tid);
-            SetThreadAffinityMask( GetCurrentThread(), mask );
-        }
-    #endif // USE_OMP
-#endif // WIN32
-    }
-
-    std::string gtPluse_ut_folder_;
-    std::string gtPluse_ut_data_folder_;
-    std::string gtPluse_ut_res_folder_;
-
-    gtPlusIOAnalyze gt_io_;
-    gtPlusISMRMRDReconUtil<T> util_;
-    gtPlusISMRMRDReconUtilComplex<T> utilCplx_;
-    GadgetronTimer timer_;
-};
-
-typedef Types<float, double> realImplementations;
-
-typedef Types< std::complex<float> > cpfloatImplementations;
-
-typedef Types<std::complex<float>, std::complex<double>, float_complext, double_complext> cplxImplementations;
-typedef Types<std::complex<float>, std::complex<double> > stdCplxImplementations;
-typedef Types<float_complext, double_complext> cplxtImplementations;
-
-TYPED_TEST_CASE(gtPlus_IO_Test, cpfloatImplementations);
-
-TYPED_TEST(gtPlus_IO_Test, recon2DCoilMapGPU)
-{
-    typedef GT_Complex8 T;
-
-    gtPlusIOAnalyze gt_io;
-
-    float v;
-
-    // image data
-    hoNDArray<GT_Complex8> data;
-    // gt_io.importArrayComplex(data, this->gtPluse_ut_data_folder_ + "fullkspace__REAL", this->gtPluse_ut_data_folder_ + "fullkspace__IMAG");
-    gt_io.importArrayComplex(data, this->gtPluse_ut_data_folder_ + "aveComplexIm_REAL", this->gtPluse_ut_data_folder_ + "aveComplexIm_IMAG");
-    data.print(std::cout);
-
-    data.squeeze();
-
-    GadgetronTimer timer(false);
-
-    unsigned int RO = data.get_size(0);
-    unsigned int E1 = data.get_size(1);
-    unsigned int CHA = data.get_size(2);
-    unsigned int N = data.get_size(3);
-
-    Gadgetron::norm2(data, v);
-    GADGET_MSG("data = " << v);
-
-    {
-        GPUTimer t("all steps");
-    }
-
-    hoNDArray<GT_Complex8> data2D(RO, E1, CHA, data.begin());
-
-    hoNDArray<T> CoilMap2D;
-    timer.start("coilMap2DNIHGPU 2D");
-    gtPlusISMRMRDReconUtilComplex<T>().coilMap2DNIHGPU(data2D, CoilMap2D, ISMRMRD_SOUHEIL, 7, 3, 3, 1e-3);
-    timer.stop();
-    GADGET_EXPORT_ARRAY_COMPLEX(this->gtPluse_ut_res_folder_, gt_io, CoilMap2D, "CoilMap2D_1");
-
-    {
-    // call the old coil map code
-    timer.start("coilMap2DNIHGPU 2D old");
-    hoNDArray<float_complext> host_data(RO, E1, CHA, reinterpret_cast<float_complext*>(data2D.begin()));
-    cuNDArray<float_complext> device_data(host_data);
-    boost::shared_ptr< cuNDArray<float_complext> > csm = Gadgetron::estimate_b1_map<float, 2>( &device_data, CHA);
-    boost::shared_ptr< hoNDArray<float_complext> > csm_host = csm->to_host();
-    memcpy(CoilMap2D.begin(), csm_host->begin(), csm_host->get_number_of_bytes());
-    timer.stop();
-    GADGET_EXPORT_ARRAY_COMPLEX(this->gtPluse_ut_res_folder_, gt_io, CoilMap2D, "CoilMap2D_1_old");
-    }
-
-    hoNDArray<T> CoilMap;
-    timer.start("coilMap2DNIHGPU");
-    gtPlusISMRMRDReconUtilComplex<T>().coilMap2DNIHGPU(data, CoilMap, ISMRMRD_SOUHEIL, 7, 3, 3, 1e-3);
-    timer.stop();
-    GADGET_EXPORT_ARRAY_COMPLEX(this->gtPluse_ut_res_folder_, gt_io, CoilMap, "CoilMap2D");
-
-    hoNDArray<T> CoilMap2;
-    timer.start("coilMap2DNIH");
-    gtPlusISMRMRDReconUtilComplex<T>().coilMap2DNIH(data2D, CoilMap2, ISMRMRD_SOUHEIL, 7, 3, 3, 1e-3, false);
-    timer.stop();
-    GADGET_EXPORT_ARRAY_COMPLEX(this->gtPluse_ut_res_folder_, gt_io, CoilMap2, "CoilMap2D_2");
-
-    hoNDArray<T> combined;
-    timer.start("coil combine");
-    gtPlusISMRMRDReconUtilComplex<T>().coilCombine(data, CoilMap, combined);
-    timer.stop();
-    GADGET_EXPORT_ARRAY_COMPLEX(this->gtPluse_ut_res_folder_, gt_io, combined, "combined2D");
-
-    cudaDeviceReset();
-}
-
-TYPED_TEST(gtPlus_IO_Test, recon3DCoilMapGPU)
-{
-    typedef GT_Complex8 T;
-
-    gtPlusIOAnalyze gt_io;
-
-    float v;
-
-    // image data
-    hoNDArray<GT_Complex8> data;
-    gt_io.importArrayComplex(data, this->gtPluse_ut_data_folder_ + "fullkspace__REAL", this->gtPluse_ut_data_folder_ + "fullkspace__IMAG");
-    data.print(std::cout);
-
-    data.squeeze();
-
-    GadgetronTimer timer(false);
-
-    unsigned int RO = data.get_size(0);
-    unsigned int E1 = data.get_size(1);
-    unsigned int E2 = data.get_size(2);
-    unsigned int CHA = data.get_size(3);
-
-    Gadgetron::norm2(data, v);
-    GADGET_MSG("data = " << v);
-
-    {
-        GPUTimer t("all steps");
-    }
-
-    hoNDArray<GT_Complex8> Im2;
-    timer.start("ifft3c");
-    hoNDFFT<float>::instance()->ifft3c(data, Im2);
-    timer.stop();
-    GADGET_EXPORT_ARRAY_COMPLEX(this->gtPluse_ut_res_folder_, gt_io, Im2, "Im2");
-
-    hoNDArray<T> CoilMap;
-    timer.start("coilMap3DNIHGPU");
-    gtPlusISMRMRDReconUtilComplex<T>().coilMap3DNIHGPU_FullResMap(Im2, CoilMap, ISMRMRD_SOUHEIL, 7, 3, true);
-    timer.stop();
-    GADGET_EXPORT_ARRAY_COMPLEX(this->gtPluse_ut_res_folder_, gt_io, CoilMap, "CoilMap");
-
-    omp_set_nested(1);
-
-    hoNDArray<T> CoilMap2;
-    timer.start("coilMap3DNIH");
-    gtPlusISMRMRDReconUtilComplex<T>().coilMap3DNIH(Im2, CoilMap2, ISMRMRD_SOUHEIL, 7, 3, true);
-    timer.stop();
-    GADGET_EXPORT_ARRAY_COMPLEX(this->gtPluse_ut_res_folder_, gt_io, CoilMap2, "CoilMap2");
-
-    hoNDArray<T> combined;
-    timer.start("coil combine");
-    gtPlusISMRMRDReconUtilComplex<T>().coilCombine3D(Im2, CoilMap, combined);
-    timer.stop();
-    GADGET_EXPORT_ARRAY_COMPLEX(this->gtPluse_ut_res_folder_, gt_io, combined, "combined");
-
-    cudaDeviceReset();
-}
-
-//TYPED_TEST(gtPlus_IO_Test, reconCoilCompression)
-//{
-//    typedef float T;
-//    typedef std::complex<T> TValueType;
-//
-//    gtPlusIOAnalyze gt_io;
-//    GadgetronTimer timer(false);
-//
-//    gtPlusISMRMRDReconUtil<TValueType> util;
-//    gtPlusISMRMRDReconUtilComplex<TValueType> utilCplx;
-//    std::string filename;
-//
-//    hoNDArray<GT_Complex8> data;
-//    gt_io.importArrayComplex(data, this->gtPluse_ut_data_folder_ + "refRecon_REAL", this->gtPluse_ut_data_folder_ + "refRecon_IMAG");
-//    data.print(std::cout);
-//
-//    // export images
-//    hoNDArray<GT_Complex8> complexIm;
-//    Gadgetron::hoNDFFT<T>::instance()->ifft2c(data, complexIm);
-//
-//    hoNDArray<TValueType> sos;
-//    utilCplx.sumOfSquare(complexIm, sos);
-//
-//    filename = this->gtPluse_ut_res_folder_ + "refRecon_SoS";
-//    gt_io.exportArrayComplex(sos, filename);
-//
-//    hoMatrix<GT_Complex8> coeff, eigenValues;
-//    utilCplx.computeKLCoilCompressionCoeff(data, 1e-3, coeff, eigenValues);
-//    eigenValues.print(std::cout);
-//}
-//
-//TYPED_TEST(gtPlus_IO_Test, MatrixComputation)
-//{
-//    MKL_INT n = 4, nrhs = 2, ldb = 2;
-//
-//    /* Local arrays */
-//    //MKL_Complex8 a[16] = 
-//    //{
-//    //    { 5.96f,  0.00f}, { 0.40f,  -1.19f}, { -0.83f, -0.48f}, { -0.57f, 0.40f},
-//    //    { 0.40f,  1.19f}, { 7.95f,  0.00f}, { 0.33f,  0.09f}, { 0.22f, 0.74f},
-//    //    {-0.83f,  0.48f}, { 0.33f, -0.09f}, { 4.43f,  0.00f}, { -1.09f, 0.32f},
-//    //    {-0.57f, -0.40f}, { 0.22f, -0.74f}, {-1.09f, -0.32f}, { 3.46f,  0.00f}
-//    //};
-//
-//    //MKL_Complex8 b[8] = 
-//    //{
-//    //    {-2.94f,  5.79f}, { 8.44f,  3.07f},
-//    //    { 8.12f, -9.12f}, { 1.00f, -4.62f},
-//    //    { 9.09f, -5.03f}, { 3.64f, -2.33f},
-//    //    { 7.36f,  6.77f}, { 8.04f,  2.87f}
-//    //};
-//
-//    MKL_Complex8 a[16] = 
-//    {
-//        { 5.96f,  0.00f},   { 0.40f,  1.19f},   { -0.83f, 0.48f},   { -0.57f, -0.40f},
-//        { 0.40f,  -1.19f},  { 7.95f,  0.00f},   { 0.33f,  -0.09f},  { 0.22f, -0.74f},
-//        {-0.83f,  -0.48f},  { 0.33f, 0.09f},    { 4.43f,  0.00f},   { -1.09f, -0.32f},
-//        {-0.57f,  0.40f},   { 0.22f, 0.74f},    {-1.09f, 0.32f},    { 3.46f,  0.00f}
-//    };
-//
-//    MKL_Complex8 b[8] = 
-//    {
-//        {-2.94f,  5.79f}, { 8.12f, -9.12f}, { 9.09f, -5.03f}, { 7.36f,  6.77f}, 
-//        { 8.44f,  3.07f}, { 1.00f, -4.62f}, { 3.64f, -2.33f}, { 8.04f,  2.87f}
-//    };
-//
-//    hoMatrix< std::complex<float> > A(n, n, reinterpret_cast<std::complex<float>*>(a));
-//    hoMatrix< std::complex<float> > B(n, ldb, reinterpret_cast<std::complex<float>*>(b));
-//
-//    hoMatrix< std::complex<float> > AB;
-//    GeneralMatrixProduct_gemm(AB, A, false, B, false);
-//    AB.print(std::cout);
-//
-//    GeneralMatrixProduct_gemm(AB, A, true, B, false);
-//    AB.print(std::cout);
-//
-//    //A*B
-//    //ans =
-//    //                   -41.9895 +               20.0944i                    35.3342 +                17.026i
-//    //                    56.5497 -               67.5926i                     8.7286 -               19.3177i
-//    //                    31.5997 -               37.2643i                    -2.1214 -               10.9889i
-//    //                    12.9773 +               15.8586i                    16.3236 +                4.4228i
-//
-//    hoMatrix< std::complex<float> > A2(A);
-//    hoMatrix< std::complex<float> > B2(B);
-//
-//    SymmetricHermitianPositiveDefiniteLinearSystem_posv(A2, B2);
-//
-//    A2.print(std::cout);
-//    B2.print(std::cout);
-//
-//    //    Solution
-//    //    (  0.80,  1.62) (  2.52,  0.61)
-//    //    (  1.26, -1.78) (  0.01, -1.38)
-//    //    (  3.38, -0.29) (  2.42, -0.52)
-//    //    (  3.46,  2.92) (  3.77,  1.37)
-//
-//    //    Details of Cholesky factorization
-//    //    (  2.44,  0.00) (  0.00,  0.00) (  0.00,  0.00) (  0.00,  0.00)
-//    //    (  0.16,  0.49) (  2.77,  0.00) (  0.00,  0.00) (  0.00,  0.00)
-//    //    ( -0.34,  0.20) (  0.10, -0.10) (  2.06,  0.00) (  0.00,  0.00)
-//    //    ( -0.23, -0.16) (  0.12, -0.30) ( -0.57, -0.20) (  1.71,  0.00)
-//
-//    A2 = A;
-//    CholeskyHermitianPositiveDefinite_potrf(A2, 'L');
-//    A2.print(std::cout);
-//
-//    A2 = A;
-//    A2.print(std::cout);
-//
-//    hoMatrix< std::complex<float> > eigenValue;
-//    EigenAnalysis_syev_heev2(A2, eigenValue);
-//    A2.print(std::cout);
-//    eigenValue.print(std::cout);
-//
-//    hoMatrix< std::complex<float> > C;
-//    GeneralMatrixProduct_gemm(C, A2, false, A2, true);
-//    C.print(std::cout);
-//
-//    A2 = A;
-//    B2 = B;
-//    hoMatrix< std::complex<float> > x;
-//    double lamda = 1e-4;
-//    SolveLinearSystem_Tikhonov(A2, B2, x, lamda);
-//    x.print(std::cout);
-//}
-//
-//TYPED_TEST(gtPlus_IO_Test, memoryManager)
-//{
-//    typedef GT_Complex8 T;
-//
-//    unsigned int RO = 256;
-//    unsigned int E1 = 256;
-//    unsigned int E2 = 256;
-//    unsigned int CHA = 32;
-//
-//    size_t num = (size_t)RO*E1*E2*CHA*sizeof(T);
-//    std::cout << "Allocate " << num/1024/1024 << " MegaBytes ..." << std::endl;
-//
-//    GadgetronTimer timer(false);
-//
-//    timer.start("Allocate 2D array...");
-//    hoNDArray<T> a2D(RO, E1);
-//    timer.stop();
-//
-//    timer.start("Allocate 3D array...");
-//    hoNDArray<T> a3D(RO, E1, E2);
-//    timer.stop();
-//
-//    timer.start("Allocate 3D array...");
-//    T* p3D = new T[RO*E1*E2];
-//    timer.stop();
-//    memset(p3D, 0, sizeof(T)*RO*E1*E2);
-//    delete [] p3D;
-//
-//    timer.start("Allocate 3D array...");
-//    p3D = (T*)mkl_malloc(sizeof(T)*RO*E1*E2, 4);
-//    timer.stop();
-//    p3D[12] = T(2.3);
-//    memset(p3D, 0, sizeof(T)*RO*E1*E2);
-//
-//    timer.start("Allocate 4D array...");
-//    hoNDArray<T> a4D(RO, E1, E2, CHA);
-//    timer.stop();
-//
-//    timer.start("Allocate 4D array...");
-//    T* p4D = new T[RO*E1*E2*CHA];
-//    timer.stop();
-//    memset(p4D, 0, sizeof(T)*RO*E1*E2*CHA);
-//    delete [] p4D;
-//
-//    timer.start("Allocate 4D array...");
-//    p4D = (T*)mkl_malloc(sizeof(T)*RO*E1*E2*CHA, 4);
-//    timer.stop();
-//    p4D[12560] = T(2.3);
-//    timer.start("Allocate 4D array...");
-//    memset(p4D, 0, sizeof(T)*RO*E1*E2*CHA);
-//    timer.stop();
-//
-//    timer.start("Allocate ...");
-//    boost::shared_ptr<gtPlusMemoryManager> memMagnager(new gtPlusMemoryManager(4, num));
-//    timer.stop();
-//
-//    timer.start("Allocate 3 pieces ...");
-//    void* ptr = memMagnager->allocate(num/2);
-//    ptr = memMagnager->allocate(num/4);
-//    ptr = memMagnager->allocate(num/8);
-//    timer.stop();
-//
-//    memMagnager->printInfo(std::cout);
-//
-//    boost::shared_ptr<gtPlusMemoryManager> memMagnager2;
-//
-//    if ( memMagnager2 )
-//    {
-//        std::cout << "Test " << std::endl;
-//    }
-//
-//    if ( memMagnager )
-//    {
-//        std::cout << "Test " << std::endl;
-//    }
-//
-//    boost::mutex mutex_;
-//
-//    timer.start("mutex cost ...");
-//    mutex_.lock();
-//    mutex_.unlock();
-//    timer.stop();
-//
-//    std::cout << memMagnager.use_count() << std::endl;
-//
-//    timer.start("Allocate hoNDArrayMemoryManaged ...");
-//    mutex_.lock();
-//    Gadgetron::hoNDArrayMemoryManaged<T> a(256, 256, 128, memMagnager);
-//    mutex_.unlock();
-//    timer.stop();
-//
-//    std::cout << memMagnager.use_count() << std::endl;
-//
-//    memMagnager->printInfo(std::cout);
-//
-//    a.clear();
-//
-//    memMagnager->printInfo(std::cout);
-//
-//    int ii;
-//    #pragma omp parallel
-//    {
-//        Gadgetron::hoNDArrayMemoryManaged<T> b(256, 256, memMagnager);
-//    }
-//
-//    //timer.start("Allocate hoNDArrayMemoryManaged 2...");
-//    //Gadgetron::hoNDArrayMemoryManaged<T> b(256, 256, 128, *memMagnager);
-//    //timer.stop();
-//}
-//
-//TYPED_TEST(gtPlus_IO_Test, kspaceFilter)
-//{
-//    typedef GT_Complex8 T;
-//
-//    gtPlusIOAnalyze gt_io;
-//
-//    gtPlusISMRMRDReconUtil<T> util;
-//
-//    hoNDArray<T> filter;
-//
-//    unsigned int len = 12;
-//    double sigma = 1.5;
-//    unsigned int width = len*0.15;
-//
-//    ISMRMRDKSPACEFILTER filterType = ISMRMRD_FILTER_NONE;
-//    util.generateSymmetricFilter(len, filter, filterType, sigma, width);
-//    filter.printContent(std::cout);
-//
-//    filterType = ISMRMRD_FILTER_GAUSSIAN;
-//    util.generateSymmetricFilter(len, filter, filterType, sigma, width);
-//    filter.printContent(std::cout);
-//
-//    filterType = ISMRMRD_FILTER_HANNING;
-//    util.generateSymmetricFilter(len, filter, filterType, sigma, width);
-//    filter.printContent(std::cout);
-//
-//    filterType = ISMRMRD_FILTER_TUKEY;
-//    util.generateSymmetricFilter(len, filter, filterType, sigma, width);
-//    filter.printContent(std::cout);
-//
-//    filterType = ISMRMRD_FILTER_TAPERED_HANNING;
-//    util.generateSymmetricFilter(len, filter, filterType, sigma, width);
-//    filter.printContent(std::cout);
-//
-//    GADGET_MSG("------------------------------------------------");
-//
-//    len = 13;
-//
-//    filterType = ISMRMRD_FILTER_NONE;
-//    util.generateSymmetricFilter(len, filter, filterType, sigma, width);
-//    filter.printContent(std::cout);
-//
-//    filterType = ISMRMRD_FILTER_GAUSSIAN;
-//    util.generateSymmetricFilter(len, filter, filterType, sigma, width);
-//    filter.printContent(std::cout);
-//
-//    filterType = ISMRMRD_FILTER_HANNING;
-//    util.generateSymmetricFilter(len, filter, filterType, sigma, width);
-//    filter.printContent(std::cout);
-//
-//    filterType = ISMRMRD_FILTER_TUKEY;
-//    util.generateSymmetricFilter(len, filter, filterType, sigma, width);
-//    filter.printContent(std::cout);
-//
-//    filterType = ISMRMRD_FILTER_TAPERED_HANNING;
-//    util.generateSymmetricFilter(len, filter, filterType, sigma, width);
-//    filter.printContent(std::cout);
-//
-//    GADGET_MSG("------------------------------------------------");
-//
-//    len = 13;
-//    unsigned int start = 0;
-//    unsigned int end = 9;
-//
-//    filterType = ISMRMRD_FILTER_NONE;
-//    util.generateAsymmetricFilter(len, start, end, filter, filterType, width);
-//    filter.printContent(std::cout);
-//
-//    filterType = ISMRMRD_FILTER_TAPERED_HANNING;
-//    util.generateAsymmetricFilter(len, start, end, filter, filterType, width);
-//    filter.printContent(std::cout);
-//
-//    GADGET_MSG("------------------------------------------------");
-//
-//    start = 4;
-//    end = 12;
-//
-//    filterType = ISMRMRD_FILTER_NONE;
-//    util.generateAsymmetricFilter(len, start, end, filter, filterType, width);
-//    filter.printContent(std::cout);
-//
-//    filterType = ISMRMRD_FILTER_TAPERED_HANNING;
-//    util.generateAsymmetricFilter(len, start, end, filter, filterType, width);
-//    filter.printContent(std::cout);
-//
-//    GADGET_MSG("------------------------------------------------");
-//
-//    len = 12;
-//
-//    start = 0;
-//    end = 9;
-//    filterType = ISMRMRD_FILTER_NONE;
-//    util.generateSymmetricFilterForRef(len, start, end, filter, filterType, sigma, width);
-//    filter.printContent(std::cout);
-//
-//    filterType = ISMRMRD_FILTER_GAUSSIAN;
-//    util.generateSymmetricFilterForRef(len, start, end, filter, filterType, sigma, width);
-//    filter.printContent(std::cout);
-//
-//    filterType = ISMRMRD_FILTER_HANNING;
-//    util.generateSymmetricFilterForRef(len, start, end, filter, filterType, sigma, width);
-//    filter.printContent(std::cout);
-//
-//    filterType = ISMRMRD_FILTER_TUKEY;
-//    util.generateSymmetricFilterForRef(len, start, end, filter, filterType, sigma, width);
-//    filter.printContent(std::cout);
-//
-//    filterType = ISMRMRD_FILTER_TAPERED_HANNING;
-//    util.generateSymmetricFilterForRef(len, start, end, filter, filterType, sigma, width);
-//    filter.printContent(std::cout);
-//
-//    GADGET_MSG("------------------------------------------------");
-//
-//    start = 4;
-//    end = len-1;
-//
-//    filterType = ISMRMRD_FILTER_NONE;
-//    util.generateSymmetricFilterForRef(len, start, end, filter, filterType, sigma, width);
-//    filter.printContent(std::cout);
-//
-//    filterType = ISMRMRD_FILTER_GAUSSIAN;
-//    util.generateSymmetricFilterForRef(len, start, end, filter, filterType, sigma, width);
-//    filter.printContent(std::cout);
-//
-//    filterType = ISMRMRD_FILTER_HANNING;
-//    util.generateSymmetricFilterForRef(len, start, end, filter, filterType, sigma, width);
-//    filter.printContent(std::cout);
-//
-//    filterType = ISMRMRD_FILTER_TUKEY;
-//    util.generateSymmetricFilterForRef(len, start, end, filter, filterType, sigma, width);
-//    filter.printContent(std::cout);
-//
-//    filterType = ISMRMRD_FILTER_TAPERED_HANNING;
-//    util.generateSymmetricFilterForRef(len, start, end, filter, filterType, sigma, width);
-//    filter.printContent(std::cout);
-//
-//    GADGET_MSG("------------------------------------------------");
-//
-//    len = 13;
-//
-//    start = 0;
-//    end = 9;
-//    filterType = ISMRMRD_FILTER_NONE;
-//    util.generateSymmetricFilterForRef(len, start, end, filter, filterType, sigma, width);
-//    filter.printContent(std::cout);
-//
-//    filterType = ISMRMRD_FILTER_GAUSSIAN;
-//    util.generateSymmetricFilterForRef(len, start, end, filter, filterType, sigma, width);
-//    filter.printContent(std::cout);
-//
-//    filterType = ISMRMRD_FILTER_HANNING;
-//    util.generateSymmetricFilterForRef(len, start, end, filter, filterType, sigma, width);
-//    filter.printContent(std::cout);
-//
-//    filterType = ISMRMRD_FILTER_TUKEY;
-//    util.generateSymmetricFilterForRef(len, start, end, filter, filterType, sigma, width);
-//    filter.printContent(std::cout);
-//
-//    filterType = ISMRMRD_FILTER_TAPERED_HANNING;
-//    util.generateSymmetricFilterForRef(len, start, end, filter, filterType, sigma, width);
-//    filter.printContent(std::cout);
-//
-//    GADGET_MSG("------------------------------------------------");
-//
-//    start = 4;
-//    end = len-1;
-//
-//    filterType = ISMRMRD_FILTER_NONE;
-//    util.generateSymmetricFilterForRef(len, start, end, filter, filterType, sigma, width);
-//    filter.printContent(std::cout);
-//
-//    filterType = ISMRMRD_FILTER_GAUSSIAN;
-//    util.generateSymmetricFilterForRef(len, start, end, filter, filterType, sigma, width);
-//    filter.printContent(std::cout);
-//
-//    filterType = ISMRMRD_FILTER_HANNING;
-//    util.generateSymmetricFilterForRef(len, start, end, filter, filterType, sigma, width);
-//    filter.printContent(std::cout);
-//
-//    filterType = ISMRMRD_FILTER_TUKEY;
-//    util.generateSymmetricFilterForRef(len, start, end, filter, filterType, sigma, width);
-//    filter.printContent(std::cout);
-//
-//    filterType = ISMRMRD_FILTER_TAPERED_HANNING;
-//    util.generateSymmetricFilterForRef(len, start, end, filter, filterType, sigma, width);
-//    filter.printContent(std::cout);
-//
-//    GADGET_MSG("------------------------------------------------");
-//}
-//
-//TYPED_TEST(gtPlus_IO_Test, FFT)
-//{
-//    {
-//        hoNDArray< std::complex<float> > A1D(7);
-//        for ( unsigned int ii=0; ii<7; ii++ )
-//        {
-//            A1D(ii) = ii;
-//        }
-//
-//        A1D.print(std::cout);
-//
-//        hoNDArray< std::complex<float> > A1Ds;
-//        hoNDFFT<float>::instance()->ifftshift1D(A1D, A1Ds);
-//        A1Ds.print(std::cout);
-//
-//        hoNDFFT<float>::instance()->fftshift1D(A1D, A1Ds);
-//        A1Ds.print(std::cout);
-//
-//        hoNDFFT<float>::instance()->ifftshift1D(A1Ds, A1D);
-//        A1D.print(std::cout);
-//
-//        hoNDArray< std::complex<float> > AR(A1D);
-//        hoNDFFT<float>::instance()->fft1(A1D, AR);
-//        AR.print(std::cout);
-//
-//        //0 = (7.937254,0.000000)
-//        //1 = (-1.322875,2.746980)
-//        //2 = (-1.322876,1.054958)
-//        //3 = (-1.322875,0.301938)
-//        //4 = (-1.322875,-0.301938)
-//        //5 = (-1.322876,-1.054958)
-//        //6 = (-1.322875,-2.746980)
-//
-//        hoNDFFT<float>::instance()->ifft1(A1D, AR);
-//        AR.print(std::cout);
-//
-//        //0 = (7.937254,0.000000)
-//        //1 = (-1.322875,-2.746980)
-//        //2 = (-1.322876,-1.054958)
-//        //3 = (-1.322875,-0.301938)
-//        //4 = (-1.322875,0.301938)
-//        //5 = (-1.322876,1.054958)
-//        //6 = (-1.322875,2.746980)
-//
-//        hoNDFFT<float>::instance()->fft1c(A1D, AR);
-//        AR.print(std::cout);
-//
-//        //0 = (0.000000,1.356896)
-//        //1 = (0.000000,-1.692022)
-//        //2 = (0.000000,3.048917)
-//        //3 = (7.937254,0.000000)
-//        //4 = (0.000000,-3.048917)
-//        //5 = (0.000000,1.692022)
-//        //6 = (0.000000,-1.356896)
-//
-//        hoNDFFT<float>::instance()->ifft1c(A1D, AR);
-//        AR.print(std::cout);
-//
-//        //0 = (0.000000,-1.356896)
-//        //1 = (0.000000,1.692022)
-//        //2 = (0.000000,-3.048917)
-//        //3 = (7.937254,0.000000)
-//        //4 = (0.000000,3.048917)
-//        //5 = (0.000000,-1.692022)
-//        //6 = (0.000000,1.356896)
-//    }
-//
-//    {
-//        int nx = 5, ny = 6, nz = 3;
-//
-//        ho3DArray< std::complex<float> > A(nx, ny, nz);
-//        A.fill(2.0);
-//        A(1, 4, 2) = std::complex<float>(12, -5.0);
-//        A.print(std::cout);
-//
-//        ho3DArray< std::complex<float> > AR(A);
-//        hoNDFFT<float>::instance()->fft2(AR);
-//        AR.print(std::cout);
-//
-//        //AR(:,:,1) =
-//        //    10.9545         0         0         0         0
-//        //    0         0         0         0         0
-//        //    0         0         0         0         0
-//        //    0         0         0         0         0
-//        //    0         0         0         0         0
-//        //    0         0         0         0         0
-//        //    AR(:,:,2) =
-//        //    10.9545         0         0         0         0
-//        //    0         0         0         0         0
-//        //    0         0         0         0         0
-//        //    0         0         0         0         0
-//        //    0         0         0         0         0
-//        //    0         0         0         0         0
-//        //    AR(:,:,3) =
-//        //    12.7802 - 0.9129i  -0.3040 - 2.0185i  -2.0136 - 0.3346i  -0.9405 + 1.8117i   1.4324 + 1.4543i
-//        //    -0.1223 + 2.0376i   1.9001 + 0.7460i   1.2966 - 1.5765i  -1.0987 - 1.7203i  -1.9756 + 0.5133i
-//        //    -1.7034 - 1.1247i  -1.5960 + 1.2725i   0.7170 + 1.9112i   2.0392 - 0.0914i   0.5433 - 1.9676i
-//        //    1.8257 - 0.9129i  -0.3040 - 2.0185i  -2.0136 - 0.3346i  -0.9405 + 1.8117i   1.4324 + 1.4543i
-//        //    -0.1223 + 2.0376i   1.9001 + 0.7460i   1.2966 - 1.5765i  -1.0987 - 1.7203i  -1.9756 + 0.5133i
-//        //    -1.7034 - 1.1247i  -1.5960 + 1.2725i   0.7170 + 1.9112i   2.0392 - 0.0914i   0.5433 - 1.9676i
-//
-//        ho3DArray< std::complex<float> > AR_I(A);
-//        hoNDFFT<float>::instance()->ifft2(AR_I);
-//        AR_I.print(std::cout);
-//
-//        //AR_I(:,:,1) =
-//        //10.9545         0         0         0         0
-//        //0         0         0         0         0
-//        //0         0         0         0         0
-//        //0         0         0         0         0
-//        //0         0         0         0         0
-//        //0         0         0         0         0
-//        //AR_I(:,:,2) =
-//        //10.9545         0         0         0         0
-//        //0         0         0         0         0
-//        //0         0         0         0         0
-//        //0         0         0         0         0
-//        //0         0         0         0         0
-//        //0         0         0         0         0
-//        //AR_I(:,:,3) =
-//        //12.7802 - 0.9129i   1.4324 + 1.4543i  -0.9405 + 1.8117i  -2.0136 - 0.3346i  -0.3040 - 2.0185i
-//        //-1.7034 - 1.1247i   0.5433 - 1.9676i   2.0392 - 0.0914i   0.7170 + 1.9112i  -1.5960 + 1.2725i
-//        //-0.1223 + 2.0376i  -1.9756 + 0.5133i  -1.0987 - 1.7203i   1.2966 - 1.5765i   1.9001 + 0.7460i
-//        //1.8257 - 0.9129i   1.4324 + 1.4543i  -0.9405 + 1.8117i  -2.0136 - 0.3346i  -0.3040 - 2.0185i
-//        //-1.7034 - 1.1247i   0.5433 - 1.9676i   2.0392 - 0.0914i   0.7170 + 1.9112i  -1.5960 + 1.2725i
-//        //-0.1223 + 2.0376i  -1.9756 + 0.5133i  -1.0987 - 1.7203i   1.2966 - 1.5765i   1.9001 + 0.7460i
-//
-//        ho3DArray< std::complex<float> > ARc(A);
-//        hoNDFFT<float>::instance()->fft2c(ARc);
-//        ARc.print(std::cout);
-//
-//        //ARc(:,:,1) =
-//        //0         0         0         0         0
-//        //0         0         0         0         0
-//        //0         0         0         0         0
-//        //0         0   10.9545         0         0
-//        //0         0         0         0         0
-//        //0         0         0         0         0
-//        //ARc(:,:,2) =
-//        //0         0         0         0         0
-//        //0         0         0         0         0
-//        //0         0         0         0         0
-//        //0         0   10.9545         0         0
-//        //0         0         0         0         0
-//        //0         0         0         0         0
-//        //ARc(:,:,3) =
-//        //2.0136 + 0.3346i   0.3040 + 2.0185i  -1.8257 + 0.9129i  -1.4324 - 1.4543i   0.9405 - 1.8117i
-//        //1.2966 - 1.5765i   1.9001 + 0.7460i  -0.1223 + 2.0376i  -1.9756 + 0.5133i  -1.0987 - 1.7203i
-//        //-0.7170 - 1.9112i   1.5960 - 1.2725i   1.7034 + 1.1247i  -0.5433 + 1.9676i  -2.0392 + 0.0914i
-//        //-2.0136 - 0.3346i  -0.3040 - 2.0185i  12.7802 - 0.9129i   1.4324 + 1.4543i  -0.9405 + 1.8117i
-//        //-1.2966 + 1.5765i  -1.9001 - 0.7460i   0.1223 - 2.0376i   1.9756 - 0.5133i   1.0987 + 1.7203i
-//        //0.7170 + 1.9112i  -1.5960 + 1.2725i  -1.7034 - 1.1247i   0.5433 - 1.9676i   2.0392 - 0.0914i
-//
-//        ARc = A;
-//        hoNDFFT<float>::instance()->ifft2c(ARc);
-//        ARc.print(std::cout);
-//
-//        //ARc(:,:,1) =
-//        //0         0         0         0         0
-//        //0         0         0         0         0
-//        //0         0         0         0         0
-//        //0         0   10.9545         0         0
-//        //0         0         0         0         0
-//        //0         0         0         0         0
-//        //ARc(:,:,2) =
-//        //0         0         0         0         0
-//        //0         0         0         0         0
-//        //0         0         0         0         0
-//        //0         0   10.9545         0         0
-//        //0         0         0         0         0
-//        //0         0         0         0         0
-//        //ARc(:,:,3) =
-//        //0.9405 - 1.8117i  -1.4324 - 1.4543i  -1.8257 + 0.9129i   0.3040 + 2.0185i   2.0136 + 0.3346i
-//        //2.0392 - 0.0914i   0.5433 - 1.9676i  -1.7034 - 1.1247i  -1.5960 + 1.2725i   0.7170 + 1.9112i
-//        //1.0987 + 1.7203i   1.9756 - 0.5133i   0.1223 - 2.0376i  -1.9001 - 0.7460i  -1.2966 + 1.5765i
-//        //-0.9405 + 1.8117i   1.4324 + 1.4543i  12.7802 - 0.9129i  -0.3040 - 2.0185i  -2.0136 - 0.3346i
-//        //-2.0392 + 0.0914i  -0.5433 + 1.9676i   1.7034 + 1.1247i   1.5960 - 1.2725i  -0.7170 - 1.9112i
-//        //-1.0987 - 1.7203i  -1.9756 + 0.5133i  -0.1223 + 2.0376i   1.9001 + 0.7460i   1.2966 - 1.5765i
-//    }
-//
-//    {
-//        int nx = 5, ny = 6, nz = 3;
-//
-//        ho3DArray< std::complex<float> > A(nx, ny, nz);
-//        A.fill(2.0);
-//        A(1, 4, 2) = std::complex<float>(12, -5.0);
-//        A.print(std::cout);
-//
-//        ho3DArray< std::complex<float> > AR(A);
-//        hoNDFFT<float>::instance()->fft3(AR);
-//        AR.print(std::cout);
-//
-//        hoNDFFT<float>::instance()->ifft3(AR);
-//        AR.print(std::cout);
-//
-//        ho3DArray< std::complex<float> > AR_I(A);
-//        hoNDFFT<float>::instance()->ifft3(AR_I);
-//        AR_I.print(std::cout);
-//
-//        ho3DArray< std::complex<float> > ARc(A);
-//        hoNDFFT<float>::instance()->fft3c(ARc);
-//        ARc.print(std::cout);
-//
-//        ARc = A;
-//        hoNDFFT<float>::instance()->ifft3c(ARc);
-//        ARc.print(std::cout);
-//    }
-//}
-//
-//TYPED_TEST(gtPlus_IO_Test, recon3D)
-//{
-//    typedef GT_Complex8 T;
-//
-//    gtPlusIOAnalyze gt_io;
-//
-//    float v;
-//
-//    std::string debugFolder;
-//
-//    // image data
-//    hoNDArray<GT_Complex8> data;
-//    gt_io.importArrayComplex(data, debugFolder + "data_dst__REAL", 
-//        debugFolder + "data_dst__IMAG");
-//    data.print(std::cout);
-//
-//    GadgetronTimer timer(false);
-//
-//    unsigned int RO = data.get_size(0);
-//    unsigned int E1 = data.get_size(1);
-//    unsigned int E2 = data.get_size(2);
-//    unsigned int CHA = data.get_size(3);
-//
-//    Gadgetron::norm2(data, v);
-//    GADGET_MSG("data = " << v);
-//
-//    hoNDArray<GT_Complex8> Im;
-//    hoNDFFT<float>::instance()->ifft3(data, Im);
-//
-//    GADGET_EXPORT_ARRAY_COMPLEX(debugFolder, gt_io, Im, "Im");
-//
-//    hoNDArray<GT_Complex8> Im2;
-//    timer.start("ifft3c");
-//    hoNDFFT<float>::instance()->ifft3c(data, Im2);
-//    timer.stop();
-//    GADGET_EXPORT_ARRAY_COMPLEX(debugFolder, gt_io, Im2, "Im2");
-//
-//    hoNDArray<GT_Complex8> Im3(RO, E1, 4, CHA);
-//    memcpy(Im3.begin(), Im2.begin(), Im3.get_number_of_bytes());
-//    GADGET_EXPORT_ARRAY_COMPLEX(debugFolder, gt_io, Im3, "Im3");
-//
-//    hoNDArray<T> CoilMap;
-//    timer.start("coilMap3DNIH");
-//    gtPlusISMRMRDReconUtilComplex<T>().coilMap3DNIH(Im3, CoilMap, ISMRMRD_SOUHEIL, 7, 3, true);
-//    timer.stop();
-//    GADGET_EXPORT_ARRAY_COMPLEX(debugFolder, gt_io, CoilMap, "CoilMap");
-//}
-//
-//TYPED_TEST(gtPlus_IO_Test, KLTransform)
-//{
-//    typedef GT_Complex8 T;
-//
-//    gtPlusIOAnalyze gt_io;
-//
-//    float v;
-//
-//    // image data
-//    hoNDArray<float> real_data;
-//    std::string filename = this->gtPluse_ut_data_folder_ + "fullkspace_REAL";
-//    gt_io.importArray(real_data, filename);
-//    real_data.print(std::cout);
-//
-//    hoNDArray<float> imag_data;
-//    filename = this->gtPluse_ut_data_folder_ + "fullkspace_IMAG";
-//    gt_io.importArray(imag_data, filename);
-//    imag_data.print(std::cout);
-//
-//    boost::shared_ptr< hoNDArray<GT_Complex8> > tmp = real_imag_to_complex<GT_Complex8>(&real_data, &imag_data);
-//
-//    unsigned int RO = tmp->get_size(0);
-//    unsigned int E1 = tmp->get_size(1);
-//    unsigned int CHA = tmp->get_size(2);
-//    unsigned int PHS = tmp->get_size(3);
-//
-//    hoNDArray<GT_Complex8> kspace(RO, E1, CHA, PHS, tmp->begin());
-//
-//    gtPlusISMRMRDReconUtil<GT_Complex8> util;
-//    gtPlusISMRMRDReconUtilComplex<GT_Complex8> utilCplx;
-//
-//    hoNDArray<GT_Complex8> complexIm;
-//    Gadgetron::hoNDFFT<float>::instance()->ifft2c(kspace, complexIm);
-//
-//    hoNDArray<GT_Complex8> complexImSoS;
-//    utilCplx.sumOfSquare(complexIm, complexImSoS);
-//
-//    gt_io.export3DArrayComplex(complexImSoS, this->gtPluse_ut_res_folder_+"complexImSoS");
-//
-//    unsigned int numOfModes = 10;
-//
-//    hoNDArray<GT_Complex8> complexImKLF;
-//    util.computeKLFilter(complexIm, numOfModes, complexImKLF);
-//
-//    utilCplx.sumOfSquare(complexImKLF, complexImSoS);
-//
-//    gt_io.export3DArrayComplex(complexImSoS, this->gtPluse_ut_res_folder_+"complexImKLFSoS");
-//}
-//
-//TYPED_TEST(gtPlus_IO_Test, reconRemoveROOversampling)
-//{
-//    typedef float T;
-//    typedef std::complex<T> TValueType;
-//
-//    gtPlusIOAnalyze gt_io;
-//    GadgetronTimer timer(false);
-//
-//    gtPlusISMRMRDReconUtil<TValueType> util;
-//    gtPlusISMRMRDReconUtilComplex<TValueType> utilCplx;
-//    std::string filename;
-//
-//    hoNDArray<GT_Complex8> data;
-//    gt_io.importArrayComplex(data, this->gtPluse_ut_data_folder_ + "kspace_DownSampleFE_real", this->gtPluse_ut_data_folder_ + "kspace_DownSampleFE_imag");
-//    // real_imag_to_complex<GT_Complex8>(real_data, imag_data, data);
-//    data.print(std::cout);
-//
-//    // export images
-//    hoNDArray<GT_Complex8> complexIm;
-//    Gadgetron::hoNDFFT<T>::instance()->ifft2c(data, complexIm);
-//
-//    hoNDArray<TValueType> sos;
-//    utilCplx.sumOfSquare(complexIm, sos);
-//
-//    filename = this->gtPluse_ut_res_folder_ + "kspace_DownSampleFE_SoS";
-//    gt_io.exportArrayComplex(sos, filename);
-//
-//    // cut down RO oversampling
-//    hoNDArray<TValueType> dataCut;
-//    Gadgetron::hoNDFFT<T>::instance()->ifft1c(data);
-//    utilCplx.cutpad2D(data, data.get_size(0)/2, data.get_size(1), dataCut);
-//    Gadgetron::hoNDFFT<T>::instance()->fft1c(dataCut);
-//
-//    Gadgetron::hoNDFFT<T>::instance()->ifft2c(dataCut, complexIm);
-//    utilCplx.sumOfSquare(complexIm, sos);
-//
-//    filename = this->gtPluse_ut_res_folder_ + "kspace_DownSampleFE_SoS_CutRO";
-//    gt_io.exportArrayComplex(sos, filename);
-//}
-//
-//TYPED_TEST(gtPlus_IO_Test, reconNoisePrewhitening)
-//{
-//    typedef float T;
-//    typedef std::complex<T> TValueType;
-//
-//    gtPlusIOAnalyze gt_io;
-//    GadgetronTimer timer(false);
-//
-//    hoNDArray<float> real_noise;
-//    std::string filename = this->gtPluse_ut_data_folder_ + "Noise_real";
-//    gt_io.importArray(real_noise, filename);
-//    real_noise.print(std::cout);
-//
-//    hoNDArray<float> imag_noise;
-//    filename = this->gtPluse_ut_data_folder_ + "Noise_imag";
-//    gt_io.importArray(imag_noise, filename);
-//    imag_noise.print(std::cout);
-//
-//    ho3DArray<GT_Complex8> noise;
-//    real_imag_to_complex<GT_Complex8>(real_noise, imag_noise, noise);
-//
-//    int COL = noise.get_size(0);
-//    int E1 = noise.get_size(1);
-//    int CHA = noise.get_size(2);
-//
-//    GADGET_MSG(noise(12, 0, 10));
-//
-//    // compute noise prewhitener
-//    double rxDwellTimeData = 2100;
-//    hoMatrix<TValueType> noisePrewhitener(CHA, CHA);
-//
-//    gtPlusISMRMRDReconUtilComplex<TValueType> utilCplx;
-//
-//    double noiseBandWidth = 130;
-//    double receiverBWRatio = 0.79;
-//    double ADCSamplingTimeinSecond = 2100/1e9;
-//
-//    hoMatrix<TValueType> prewhiteningMatrix;
-//
-//    GADGET_START_TIMING(timer, "computeNoisePrewhiteningMatrix");
-//    utilCplx.computeNoisePrewhiteningMatrix(noise, noiseBandWidth, receiverBWRatio, ADCSamplingTimeinSecond, prewhiteningMatrix);
-//    GADGET_STOP_TIMING(timer);
-//    // prewhiteningMatrix.print(std::cout);
-//
-//    EXPECT_NEAR(prewhiteningMatrix(0, 0).real(), 5.1331672e+004, 0.01);
-//    EXPECT_NEAR(prewhiteningMatrix(0, 0).imag(), 0.0, 0.01);
-//
-//    EXPECT_NEAR(prewhiteningMatrix(1, 0).real(), -5791.2319, 0.01);
-//    EXPECT_NEAR(prewhiteningMatrix(1, 0).imag(), -1603.6230, 0.01);
-//
-//    EXPECT_NEAR(prewhiteningMatrix(2, 1).real(), -9597.3955, 0.01);
-//    EXPECT_NEAR(prewhiteningMatrix(2, 1).imag(), 4500.7114, 0.01);
-//
-//    EXPECT_NEAR(prewhiteningMatrix(4, 3).real(), -7718.3286, 0.01);
-//    EXPECT_NEAR(prewhiteningMatrix(4, 3).imag(), -3565.7336, 0.01);
-//
-//    EXPECT_NEAR(prewhiteningMatrix(31, 31).real(), 60350.840, 0.01);
-//    EXPECT_NEAR(prewhiteningMatrix(31, 31).imag(), 0.0, 0.01);
-//
-//    /// load the data scan
-//    hoNDArray<float> real_data;
-//    filename = this->gtPluse_ut_data_folder_ + "noisePrewhitening_DataScan_real";
-//    gt_io.importArray(real_data, filename);
-//    real_data.print(std::cout);
-//
-//    hoNDArray<float> imag_data;
-//    filename = this->gtPluse_ut_data_folder_ + "noisePrewhitening_DataScan_imag";
-//    gt_io.importArray(imag_data, filename);
-//    imag_data.print(std::cout);
-//
-//    ho3DArray<GT_Complex8> data;
-//    real_imag_to_complex<GT_Complex8>(real_data, imag_data, data);
-//
-//    GADGET_MSG(data(42, 12, 10));
-//
-//    // apply the noise matrix
-//    GADGET_START_TIMING(timer, "performNoisePrewhitening");
-//    utilCplx.performNoisePrewhitening(data, prewhiteningMatrix);
-//    GADGET_STOP_TIMING(timer);
-//    GADGET_MSG(data(42, 12, 10));
-//    EXPECT_LE(std::abs(data(42, 12, 10)-TValueType(-0.068069, -0.185625)), 1e-6);
-//}
-//
-//
-//TYPED_TEST(gtPlus_IO_Test, IOTest)
-//{
-//    typedef GT_Complex8 T;
-//
-//    gtPlusIOAnalyze gt_io;
-//
-//    hoNDArray<float> real_Im;
-//    std::string filename = this->gtPluse_ut_data_folder_ + "KSpaceBinning_IncomingKSpace_real";
-//    gt_io.importArray(real_Im, filename);
-//    real_Im.print(std::cout);
-//
-//    hoNDArray<float> imag_Im;
-//    filename = this->gtPluse_ut_data_folder_ + "KSpaceBinning_IncomingKSpace_imag";
-//    gt_io.importArray(imag_Im, filename);
-//    imag_Im.print(std::cout);
-//
-//    filename = this->gtPluse_ut_res_folder_ + "KSpaceBinning_IncomingKSpace_real2";
-//    gt_io.exportArray(real_Im, filename);
-//
-//    filename = this->gtPluse_ut_res_folder_ + "KSpaceBinning_IncomingKSpace_imag2";
-//    gt_io.exportArray(imag_Im, filename);
-//
-//    boost::shared_ptr< hoNDArray<GT_Complex8> > tmp = real_imag_to_complex<GT_Complex8>(&real_Im, &imag_Im);
-//
-//    unsigned int RO = tmp->get_size(0);
-//    unsigned int E1 = tmp->get_size(1);
-//    unsigned int CHA = tmp->get_size(2);
-//    unsigned int PHS = tmp->get_size(3);
-//
-//    hoNDArray<GT_Complex8> kspace(RO, E1, CHA, PHS, tmp->begin());
-//
-//    float nrm2;
-//    Gadgetron::norm2(kspace, nrm2);
-//    GADGET_MSG("nrm2 = " << nrm2);
-//
-//    gtPlusISMRMRDReconUtil<GT_Complex8> util;
-//    gtPlusISMRMRDReconUtilComplex<GT_Complex8> utilCplx;
-//
-//    // sum of square
-//    hoNDArray<GT_Complex8> complexIm, sosIm;
-//
-//    GadgetronTimer timer(false);
-//    timer.start("ifft2c");
-//    hoNDFFT<float>::instance()->ifft2c(kspace, complexIm);
-//    timer.stop();
-//
-//    timer.start("sumOfSquare");
-//    utilCplx.sumOfSquare(complexIm, sosIm);
-//    timer.stop();
-//
-//    hoNDArray<float> magSoS;
-//    timer.start("absolute");
-//    Gadgetron::absolute(sosIm, magSoS);
-//    timer.stop();
-//
-//    filename = this->gtPluse_ut_res_folder_ + "KSpaceBinning_IncomingKSpace_SoS";
-//    gt_io.exportArray(magSoS, filename);
-//
-//    // coil map estimation
-//
-//    hoNDArray<GT_Complex8> meanKSpace;
-//    sumOverLastDimension(kspace, meanKSpace);
-//
-//    filename = this->gtPluse_ut_res_folder_ + "KSpaceBinning_IncomingKSpace_mean";
-//    gt_io.export3DArrayComplex(meanKSpace, filename);
-//
-//    Gadgetron::norm2(meanKSpace, nrm2);
-//    GADGET_MSG("nrm2 = " << nrm2);
-//
-//    hoNDArray<GT_Complex8> meanIm;
-//    hoNDFFT<float>::instance()->ifft2c(meanKSpace, meanIm);
-//    Gadgetron::norm2(meanIm, nrm2);
-//    GADGET_MSG("nrm2 = " << nrm2);
-//
-//    filename = this->gtPluse_ut_res_folder_ + "KSpaceBinning_IncomingKSpace_meanIm";
-//    gt_io.export3DArrayComplex(meanIm, filename);
-//
-//    hoNDArray<GT_Complex8> coilMap;
-//    timer.start("coilMap2DNIH");
-//    utilCplx.coilMap2DNIH(meanIm, coilMap, ISMRMRD_SOUHEIL, 7, 3);
-//    timer.stop();
-//
-//    filename = this->gtPluse_ut_res_folder_ + "KSpaceBinning_IncomingKSpace_meanIm_coilMap";
-//    gt_io.export3DArrayComplex(coilMap, filename);
-//
-//    hoNDArray<GT_Complex8> combined;
-//    timer.start("coilCombine");
-//    utilCplx.coilCombine(meanIm, coilMap, combined);
-//    timer.stop();
-//
-//    gt_io.export3DArrayComplex(combined, this->gtPluse_ut_res_folder_ + "KSpaceBinning_IncomingKSpace_meanIm_coilMap_combined");
-//
-//    // KLT
-//    hoMatrix<T> coeff, eigenValues;
-//    timer.start("computeKLTCoeff");
-//    util.computeKLTCoeff(meanKSpace, coeff, eigenValues);
-//    timer.stop();
-//    eigenValues.print(std::cout);
-//
-//    double thres = 0.001;
-//    timer.start("computeKLCoilCompressionCoeff, thres");
-//    util.computeKLCoilCompressionCoeff(meanKSpace, thres, coeff, eigenValues);
-//    timer.stop();
-//    eigenValues.print(std::cout);
-//
-//    hoNDArray<T> dataEigen;
-//    int numOfModeKept = 20;
-//    util.computeKLCoilCompression(meanKSpace, numOfModeKept, coeff, eigenValues, dataEigen);
-//    Gadgetron::norm2(dataEigen, nrm2);
-//    GADGET_MSG("nrm2 = " << nrm2);
-//
-//    hoNDFFT<float>::instance()->ifft2c(dataEigen, meanIm);
-//    gt_io.export3DArrayComplex(meanIm, this->gtPluse_ut_res_folder_ + "KSpaceBinning_IncomingKSpace_meanIm_dataEigen");
-//}
diff --git a/toolboxes/gtplus/util/gtPlusIOAnalyze.cpp b/toolboxes/gtplus/util/gtPlusIOAnalyze.cpp
index 43571a8..9176331 100644
--- a/toolboxes/gtplus/util/gtPlusIOAnalyze.cpp
+++ b/toolboxes/gtplus/util/gtPlusIOAnalyze.cpp
@@ -13,87 +13,36 @@
 
 namespace Gadgetron { namespace gtPlus {
 
-gtPlusIOAnalyze::gtPlusIOAnalyze(float px, float py)
+gtPlusIOAnalyze::gtPlusIOAnalyze() : BaseClass()
 {
-    pixelSize_.resize(2);
-    pixelSize_[0] = px;
-    pixelSize_[1] = py;
 }
 
-gtPlusIOAnalyze::gtPlusIOAnalyze(float px, float py, float pz)
+gtPlusIOAnalyze::gtPlusIOAnalyze(float px, float py) : BaseClass(px, py)
 {
-    pixelSize_.resize(3);
-    pixelSize_[0] = px;
-    pixelSize_[1] = py;
-    pixelSize_[2] = pz;
 }
 
-gtPlusIOAnalyze::gtPlusIOAnalyze(float px, float py, float pz, float pt)
+gtPlusIOAnalyze::gtPlusIOAnalyze(float px, float py, float pz) : BaseClass(px, py, pz)
 {
-    pixelSize_.resize(4);
-    pixelSize_[0] = px;
-    pixelSize_[1] = py;
-    pixelSize_[2] = pz;
-    pixelSize_[3] = pt;
 }
 
-gtPlusIOAnalyze::gtPlusIOAnalyze(float px, float py, float pz, float pt, float pr)
+gtPlusIOAnalyze::gtPlusIOAnalyze(float px, float py, float pz, float pt) : BaseClass(px, py, pz, pt)
 {
-    pixelSize_.resize(5);
-    pixelSize_[0] = px;
-    pixelSize_[1] = py;
-    pixelSize_[2] = pz;
-    pixelSize_[3] = pt;
-    pixelSize_[4] = pr;
 }
 
-gtPlusIOAnalyze::gtPlusIOAnalyze(float px, float py, float pz, float pt, float pr, float ps)
+gtPlusIOAnalyze::gtPlusIOAnalyze(float px, float py, float pz, float pt, float pr) : BaseClass(px, py, pz, pt, pr)
 {
-    pixelSize_.resize(6);
-    pixelSize_[0] = px;
-    pixelSize_[1] = py;
-    pixelSize_[2] = pz;
-    pixelSize_[3] = pt;
-    pixelSize_[4] = pr;
-    pixelSize_[5] = ps;
 }
 
-gtPlusIOAnalyze::gtPlusIOAnalyze(float px, float py, float pz, float pt, float pr, float ps, float pp)
+gtPlusIOAnalyze::gtPlusIOAnalyze(float px, float py, float pz, float pt, float pr, float ps) : BaseClass(px, py, pz, pt, pr, ps)
 {
-    pixelSize_.resize(7);
-    pixelSize_[0] = px;
-    pixelSize_[1] = py;
-    pixelSize_[2] = pz;
-    pixelSize_[3] = pt;
-    pixelSize_[4] = pr;
-    pixelSize_[5] = ps;
-    pixelSize_[6] = pp;
 }
 
-gtPlusIOAnalyze::gtPlusIOAnalyze(float px, float py, float pz, float pt, float pr, float ps, float pp, float pq)
+gtPlusIOAnalyze::gtPlusIOAnalyze(float px, float py, float pz, float pt, float pr, float ps, float pp) : BaseClass(px, py, pz, pt, pr, ps, pp)
 {
-    pixelSize_.resize(8);
-    pixelSize_[0] = px;
-    pixelSize_[1] = py;
-    pixelSize_[2] = pz;
-    pixelSize_[3] = pt;
-    pixelSize_[4] = pr;
-    pixelSize_[5] = ps;
-    pixelSize_[6] = pp;
-    pixelSize_[7] = pq;
 }
 
-void gtPlusIOAnalyze::setPixelSize(float px, float py, float pz, float pt, float pr, float ps, float pp, float pq)
+gtPlusIOAnalyze::gtPlusIOAnalyze(float px, float py, float pz, float pt, float pr, float ps, float pp, float pq) : BaseClass(px, py, pz, pt, pr, ps, pp, pq)
 {
-    pixelSize_.resize(8);
-    pixelSize_[0] = px;
-    pixelSize_[1] = py;
-    pixelSize_[2] = pz;
-    pixelSize_[3] = pt;
-    pixelSize_[4] = pr;
-    pixelSize_[5] = ps;
-    pixelSize_[6] = pp;
-    pixelSize_[7] = pq;
 }
 
 void gtPlusIOAnalyze::printInfo(std::ostream& os)
@@ -104,108 +53,7 @@ void gtPlusIOAnalyze::printInfo(std::ostream& os)
     os << "--------------------------------------------------------------------------" << endl;
 }
 
-std::string gtPlusIOAnalyze::getRTTIFromAnalyzeDataType(AnalyzeDataType aDT)
-{
-    std::string rttiID;
-
-    switch (aDT)
-    {
-    case DT_UNSIGNED_CHAR :
-        rttiID = typeid(unsigned char).name();
-        break;
-
-    case DT_SIGNED_SHORT :
-        rttiID = typeid(short).name();
-        break;
-
-    case DT_UNSIGNED_SHORT :
-        rttiID = typeid(unsigned short).name();
-        break;
-
-    case DT_SIGNED_INT :
-        rttiID = typeid(int).name();
-        break;
-
-    case DT_UNSIGNED_INT :
-        rttiID = typeid(size_t).name();
-        break;
-
-    case DT_FLOAT :
-        rttiID = typeid(float).name();
-        break;
-
-    case DT_DOUBLE :
-        rttiID = typeid(double).name();
-        break;
-
-    case DT_COMPLEX :
-        rttiID = typeid(GT_Complex8).name();
-        break;
-
-    case DT_DOUBLECOMPLEX :
-        rttiID = typeid(GT_Complex16).name();
-        break;
-
-    default:
-        rttiID = "UNKOWN TYPE";
-    }
-
-    return rttiID;
-}
-
-AnalyzeDataType gtPlusIOAnalyze::getAnalyzeDataTypeFromRTTI(const std::string& name)
-{
-    AnalyzeDataType analyzeDT = DT_ANA_UNKNOWN;
-
-    if ( name == typeid(unsigned char).name() )
-    {
-        analyzeDT = DT_UNSIGNED_CHAR;
-    }
-
-    if ( name == typeid(short).name() )
-    {
-        analyzeDT = DT_SIGNED_SHORT;
-    }
-
-    if ( name == typeid(unsigned short).name() )
-    {
-        analyzeDT = DT_UNSIGNED_SHORT;
-    }
-
-    if ( name == typeid(int).name() )
-    {
-        analyzeDT = DT_SIGNED_INT;
-    }
-
-    if ( name == typeid(size_t).name() )
-    {
-        analyzeDT = DT_UNSIGNED_INT;
-    }
-
-    if ( name == typeid(float).name() )
-    {
-        analyzeDT = DT_FLOAT;
-    }
-
-    if ( name == typeid(double).name() )
-    {
-        analyzeDT = DT_DOUBLE;
-    }
-
-    if ( name == typeid(GT_Complex8).name() )
-    {
-        analyzeDT = DT_COMPLEX;
-    }
-
-    if ( name == typeid(GT_Complex16).name() )
-    {
-        analyzeDT = DT_DOUBLECOMPLEX;
-    }
-
-    return analyzeDT;
-}
-
-bool gtPlusIOAnalyze::readAnalyzeHeader(const std::string& filename, dsr& header)
+bool gtPlusIOAnalyze::readHeader(const std::string& filename, HeaderType& header)
 {
     try
     {
@@ -220,14 +68,14 @@ bool gtPlusIOAnalyze::readAnalyzeHeader(const std::string& filename, dsr& header
     }
     catch(...)
     {
-        GADGET_ERROR_MSG("Errors in gtPlusIOAnalyze::readAnalyzeHeader(const std::string& filename, dsr& header) ... ");
+        GADGET_ERROR_MSG("Errors in gtPlusIOAnalyze::readHeader(const std::string& filename, dsr& header) ... ");
         return false;
     }
 
     return true;
 }
 
-bool gtPlusIOAnalyze::writeAnalyzeHeader(const std::string& filename, const dsr& header)
+bool gtPlusIOAnalyze::writeHeader(const std::string& filename, const HeaderType& header)
 {
     try
     {
@@ -242,7 +90,7 @@ bool gtPlusIOAnalyze::writeAnalyzeHeader(const std::string& filename, const dsr&
     }
     catch(...)
     {
-        GADGET_ERROR_MSG("Errors in gtPlusIOAnalyze::writeAnalyzeHeader(const std::string& filename, const dsr& header) ... ");
+        GADGET_ERROR_MSG("Errors in gtPlusIOAnalyze::writeHeader(const std::string& filename, const dsr& header) ... ");
         return false;
     }
 
diff --git a/toolboxes/gtplus/util/gtPlusIOAnalyze.h b/toolboxes/gtplus/util/gtPlusIOAnalyze.h
index 4cfe5f9..26e22a9 100644
--- a/toolboxes/gtplus/util/gtPlusIOAnalyze.h
+++ b/toolboxes/gtplus/util/gtPlusIOAnalyze.h
@@ -17,23 +17,6 @@
 
 // the following Analyze75 data structured is defined as this online document eeg.sourceforge.net/ANALYZE75.pdf‎
 
-enum AnalyzeDataType
-{
-    DT_ANA_UNKNOWN=0,
-    DT_BINARY=1, 
-    DT_UNSIGNED_CHAR=2,
-    DT_SIGNED_SHORT=4,
-    DT_UNSIGNED_SHORT=5,
-    DT_SIGNED_INT=8,
-    DT_UNSIGNED_INT=9,
-    DT_FLOAT=16,
-    DT_COMPLEX=32,
-    DT_DOUBLE=64,
-    DT_DOUBLECOMPLEX=96, // this type is added to support complex doulbe
-    DT_RGB=128,
-    DT_ALL=255
-};
-
 // the official definition of Analyze 7.5 file format
 struct header_key
 {
@@ -104,11 +87,14 @@ struct dsr
 
 namespace Gadgetron { namespace gtPlus {
 
-class EXPORTGTPLUS gtPlusIOAnalyze
+class EXPORTGTPLUSIO gtPlusIOAnalyze : public gtPlusIOBase<dsr>
 {
 public:
 
-    gtPlusIOAnalyze() { pixelSize_.resize(10, 1.0); }
+    typedef gtPlusIOBase<dsr> BaseClass;
+    typedef BaseClass::THeaderType HeaderType;
+
+    gtPlusIOAnalyze();
     gtPlusIOAnalyze(float px, float py);
     gtPlusIOAnalyze(float px, float py, float pz);
     gtPlusIOAnalyze(float px, float py, float pz, float pt);
@@ -117,310 +103,395 @@ public:
     gtPlusIOAnalyze(float px, float py, float pz, float pt, float pr, float ps, float pp);
     gtPlusIOAnalyze(float px, float py, float pz, float pt, float pr, float ps, float pp, float pq);
 
-    void setPixelSize(float px, float py, float pz=1.0f, float pt=1.0f, float pr=1.0f, float ps=1.0f, float pp=1.0f, float pq=1.0f);
-
     virtual ~gtPlusIOAnalyze() {}
 
-public:
+    virtual void printInfo(std::ostream& os);
+
+    virtual bool exportArray(const hoNDArray<short>& a, const std::string& filename) { return this->exportArrayImpl(a, filename); }
+
+    virtual bool exportArray(const hoNDArray<unsigned short>& a, const std::string& filename) { return this->exportArrayImpl(a, filename); }
+    virtual bool exportArray(const hoNDArray<int>& a, const std::string& filename) { return this->exportArrayImpl(a, filename); }
+    virtual bool exportArray(const hoNDArray<unsigned int>& a, const std::string& filename) { return this->exportArrayImpl(a, filename); }
+    virtual bool exportArray(const hoNDArray<size_t>& a, const std::string& filename) { return this->exportArrayImpl(a, filename); }
+    virtual bool exportArray(const hoNDArray<float>& a, const std::string& filename) { return this->exportArrayImpl(a, filename); }
+    virtual bool exportArray(const hoNDArray<double>& a, const std::string& filename) { return this->exportArrayImpl(a, filename); }
+    virtual bool exportArray(const hoNDArray< std::complex<float> >& a, const std::string& filename) { return this->exportArrayImpl(a, filename); }
+    virtual bool exportArray(const hoNDArray< std::complex<double> >& a, const std::string& filename) { return this->exportArrayImpl(a, filename); }
+
+    virtual bool importArray(hoNDArray<short>& a, const std::string& filename) { return this->importArrayImpl(a, filename); }
+    virtual bool importArray(hoNDArray<unsigned short>& a, const std::string& filename) { return this->importArrayImpl(a, filename); }
+    virtual bool importArray(hoNDArray<int>& a, const std::string& filename) { return this->importArrayImpl(a, filename); }
+    virtual bool importArray(hoNDArray<unsigned int>& a, const std::string& filename) { return this->importArrayImpl(a, filename); }
+    virtual bool importArray(hoNDArray<float>& a, const std::string& filename) { return this->importArrayImpl(a, filename); }
+    virtual bool importArray(hoNDArray<double>& a, const std::string& filename) { return this->importArrayImpl(a, filename); }
+    virtual bool importArray(hoNDArray< std::complex<float> >& a, const std::string& filename) { return this->importArrayImpl(a, filename); }
+    virtual bool importArray(hoNDArray< std::complex<double> >& a, const std::string& filename) { return this->importArrayImpl(a, filename); }
+
+    template <typename T> 
+    bool exportArrayImpl(const hoNDArray<T>& a, const std::string& filename)
+    {
+        try
+        {
+            HeaderType header;
+            GADGET_CHECK_RETURN_FALSE(this->array2Header(a, header));
+            GADGET_CHECK_RETURN_FALSE(this->writeHeader(filename, header));
 
-    void printInfo(std::ostream& os);
+            std::string filenameData = filename;
+            filenameData.append(".img");
+            GADGET_CHECK_RETURN_FALSE(this->writeData(filenameData, a.begin(), a.get_number_of_bytes()));
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in gtPlusIOAnalyze::exportArrayImpl(const hoNDArray<T>& a, const std::string& filename) ... ");
+            return false;
+        }
 
-    // export/input for 2D/3D/4D array
-    // filename should be given without .hdr extension
-    // the .hdr and .img extension will be added internally
+        return true;
+    }
 
-    template <typename T> bool exportArray(const hoNDArray<T>& a, const std::string& filename);
-    template <typename T> bool importArray(hoNDArray<T>& a, const std::string& filename);
+    template <typename T> 
+    bool importArrayImpl(hoNDArray<T>& a, const std::string& filename)
+    {
+        try
+        {
+            HeaderType header;
+            GADGET_CHECK_RETURN_FALSE(this->readHeader(filename, header));
+            GADGET_CHECK_RETURN_FALSE(this->header2Array(a, header));
 
-    template <typename T> bool exportArrayComplex(const hoNDArray<T>& a, const std::string& filename);
-    template <typename T> bool importArrayComplex(hoNDArray<T>& a, const std::string& filename);
+            std::string filenameData = filename;
+            filenameData.append(".img");
+            GADGET_CHECK_RETURN_FALSE(this->readData(filenameData, a.begin(), a.get_number_of_bytes()));
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in gtPlusIOAnalyze::importArrayImpl(const hoNDArray<T>& a, const std::string& filename) ... ");
+            return false;
+        }
 
-    template <typename T> bool importArrayComplex(hoNDArray<T>& a, const std::string& filename_real, const std::string& filename_imag);
+        return true;
+    }
 
-    // 2D array is exported as a 2D image
-    template <typename T> bool export2DArray(const hoNDArray<T>& a, const std::string& filename);
-    template <typename T> bool import2DArray(hoNDArray<T>& a, const std::string& filename);
+    template <typename T, unsigned int D> 
+    bool exportImage(const hoNDImage<T,D>& a, const std::string& filename)
+    {
+        try
+        {
+            HeaderType header;
+            GADGET_CHECK_RETURN_FALSE(this->image2Header(a, header));
+            GADGET_CHECK_RETURN_FALSE(this->writeHeader(filename, header));
 
-    template <typename T> bool export2DArrayComplex(const hoNDArray<T>& a, const std::string& filename);
-    template <typename T> bool import2DArrayComplex(hoNDArray<T>& a, const std::string& filename);
+            std::string filenameData = filename;
+            filenameData.append(".img");
+            GADGET_CHECK_RETURN_FALSE(this->writeData(filenameData, a.begin(), a.get_number_of_bytes()));
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in exportImage(const hoNDImage<T,D>& a, const std::string& filename) ... ");
+            return false;
+        }
 
-    // 3D array is exported as a 3D volume
-    template <typename T> bool export3DArray(const hoNDArray<T>& a, const std::string& filename);
-    template <typename T> bool import3DArray(hoNDArray<T>& a, const std::string& filename);
+        return true;
+    }
 
-    template <typename T> bool export3DArrayComplex(const hoNDArray<T>& a, const std::string& filename);
-    template <typename T> bool import3DArrayComplex(hoNDArray<T>& a, const std::string& filename);
+    template <typename T, unsigned int D> 
+    bool importImage(hoNDImage<T,D>& a, const std::string& filename)
+    {
+        try
+        {
+            HeaderType header;
+            GADGET_CHECK_RETURN_FALSE(this->readHeader(filename, header));
+            GADGET_CHECK_RETURN_FALSE(this->header2Image(a, header));
 
-    // 4D array is exported as multiple 3D volume
-    template <typename T> bool export4DArray(const hoNDArray<T>& a, const std::string& filename);
-    template <typename T> bool export4DArrayComplex(const hoNDArray<T>& a, const std::string& filename);
+            std::string filenameData = filename;
+            filenameData.append(".img");
+            GADGET_CHECK_RETURN_FALSE(this->readData(filenameData, a.begin(), a.get_number_of_bytes()));
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in importImage(const hoNDImage<T,D>& a, const std::string& filename) ... ");
+            return false;
+        }
 
-protected:
+        return true;
+    }
 
-    std::vector<float> pixelSize_;
+/// image functions
 
-    template <typename T> bool array2Analyze(const hoNDArray<T>& a, dsr& header);
-    template <typename T> bool analyze2Array(hoNDArray<T>& a, const dsr& header);
+    template <typename T, unsigned int D> 
+    bool exportImageComplex(const hoNDImage<T,D>& a, const std::string& filename)
+    {
+        try
+        {
+            typedef typename Gadgetron::realType<T>::Type value_type;
 
-    // get the run-time type ID from analyze data type or vice versa
-    std::string getRTTIFromAnalyzeDataType(AnalyzeDataType aDT);
-    AnalyzeDataType getAnalyzeDataTypeFromRTTI(const std::string& name);
+            //hoNDImage<value_type, D> buf;
+            //GADGET_CHECK_RETURN_FALSE(Gadgetron::complex_to_real(a, buf));
 
-    // read/write the analyze header
-    bool readAnalyzeHeader(const std::string& filename, dsr& header);
-    bool writeAnalyzeHeader(const std::string& filename, const dsr& header);
+            //std::string filenameReal = filename;
+            //filenameReal.append("_REAL");
+            //GADGET_CHECK_RETURN_FALSE(exportImage(buf, filenameReal));
 
-    // read/write the analyze data file
-    // len is the number of bytes
-    template <typename T> bool readAnalyzeData(const std::string& filename, T* data, long long len);
-    template <typename T> bool writeAnalyzeData(const std::string& filename, const T* data, long long len);
-};
+            //GADGET_CHECK_RETURN_FALSE(Gadgetron::complex_to_imag(a, buf));
+            //std::string filenameImag = filename;
+            //filenameImag.append("_IMAG");
+            //GADGET_CHECK_RETURN_FALSE(exportImage(buf, filenameImag));
 
-template <typename T> 
-bool gtPlusIOAnalyze::exportArray(const hoNDArray<T>& a, const std::string& filename)
-{
-    try
-    {
-        dsr header;
-        GADGET_CHECK_RETURN_FALSE(array2Analyze(a, header));
-        GADGET_CHECK_RETURN_FALSE(writeAnalyzeHeader(filename, header));
-        GADGET_CHECK_RETURN_FALSE(writeAnalyzeData(filename, a.begin(), a.get_number_of_bytes()));
-    }
-    catch(...)
-    {
-        GADGET_ERROR_MSG("Errors in gtPlusIOAnalyze::exportArray(const hoNDArray<T>& a, const std::string& filename) ... ");
-        return false;
-    }
+            //GADGET_CHECK_RETURN_FALSE(Gadgetron::abs(a, buf));
+            //std::string filenameMag = filename;
+            //filenameMag.append("_MAG");
+            //GADGET_CHECK_RETURN_FALSE(exportImage(buf, filenameMag));
 
-    return true;
-}
+            //GADGET_CHECK_RETURN_FALSE(Gadgetron::argument(a, buf));
+            //std::string filenamePhase = filename;
+            //filenamePhase.append("_PHASE");
+            //GADGET_CHECK_RETURN_FALSE(exportImage(buf, filenamePhase));
 
-template <typename T> 
-bool gtPlusIOAnalyze::importArray(hoNDArray<T>& a, const std::string& filename)
-{
-    try
-    {
-        dsr header;
-        GADGET_CHECK_RETURN_FALSE(readAnalyzeHeader(filename, header));
-        GADGET_CHECK_RETURN_FALSE(analyze2Array(a, header));
-        GADGET_CHECK_RETURN_FALSE(readAnalyzeData(filename, a.begin(), a.get_number_of_bytes()));
-    }
-    catch(...)
-    {
-        GADGET_ERROR_MSG("Errors in gtPlusIOAnalyze::importArray(const hoNDArray<T>& a, const std::string& filename) ... ");
-        return false;
-    }
+            long long num = (long long)a.get_number_of_elements();
 
-    return true;
-}
+            long long n;
 
-template <typename T> 
-bool gtPlusIOAnalyze::exportArrayComplex(const hoNDArray<T>& a, const std::string& filename)
-{
-    try
-    {
-        typedef typename Gadgetron::realType<T>::Type value_type;
+            hoNDImage<value_type, D> rpart, ipart, mag, phs;
+            rpart.create( *a.get_dimensions() );
+            ipart.create( *a.get_dimensions() );
+            mag.create( *a.get_dimensions() );
+            phs.create( *a.get_dimensions() );
 
-        hoNDArray<value_type> buf;
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::complex_to_real(a, buf));
+            const T* pA = a.begin();
 
-        std::string filenameReal = filename;
-        filenameReal.append("_REAL");
-        GADGET_CHECK_RETURN_FALSE(exportArray(buf, filenameReal));
+            #pragma omp parallel for default(none) private(n) shared(num, pA, rpart, ipart, mag, phs)
+            for ( n=0; n<num; n++ )
+            {
+                rpart(n) = pA[n].real();
+                ipart(n) = pA[n].imag();
+                mag(n) = std::abs( pA[n] );
+                phs(n) = std::arg( pA[n] );
+            }
+
+            std::string filenameReal = filename;
+            filenameReal.append("_REAL");
+            GADGET_CHECK_RETURN_FALSE(exportImage(rpart, filenameReal));
 
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::complex_to_imag(a, buf));
-        std::string filenameImag = filename;
-        filenameImag.append("_IMAG");
-        GADGET_CHECK_RETURN_FALSE(exportArray(buf, filenameImag));
+            std::string filenameImag = filename;
+            filenameImag.append("_IMAG");
+            GADGET_CHECK_RETURN_FALSE(exportImage(ipart, filenameImag));
 
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::absolute(a, buf));
-        std::string filenameMag = filename;
-        filenameMag.append("_MAG");
-        GADGET_CHECK_RETURN_FALSE(exportArray(buf, filenameMag));
+            std::string filenameMag = filename;
+            filenameMag.append("_MAG");
+            GADGET_CHECK_RETURN_FALSE(exportImage(mag, filenameMag));
+
+            std::string filenamePhase = filename;
+            filenamePhase.append("_PHASE");
+            GADGET_CHECK_RETURN_FALSE(exportImage(phs, filenamePhase));
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in exportImageComplex(const hoNDImage<T,D>& a, const std::string& filename) ... ");
+            return false;
+        }
 
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::argument(a, buf));
-        std::string filenamePhase = filename;
-        filenamePhase.append("_PHASE");
-        GADGET_CHECK_RETURN_FALSE(exportArray(buf, filenamePhase));
+        return true;
     }
-    catch(...)
+
+    template <typename T, unsigned int D> 
+    bool importImageComplex(hoNDImage<T,D>& a, const std::string& filename)
     {
-        GADGET_ERROR_MSG("Errors in gtPlusIOAnalyze::exportArrayComplex(const hoNDArray<T>& a, const std::string& filename) ... ");
-        return false;
-    }
+        try
+        {
+            typedef typename T::value_type value_type;
+            hoNDImage<value_type, D> real, imag;
 
-    return true;
-}
+            std::string filenameReal = filename;
+            filenameReal.append("_REAL");
+            GADGET_CHECK_RETURN_FALSE(importImage(real, filenameReal));
 
-template <typename T> 
-bool gtPlusIOAnalyze::importArrayComplex(hoNDArray<T>& a, const std::string& filename)
-{
-    try
-    {
-        typedef typename T::value_type value_type;
-        hoNDArray<value_type> real, imag;
+            std::string filenameImag = filename;
+            filenameImag.append("_IMAG");
+            GADGET_CHECK_RETURN_FALSE(importImage(imag, filenameImag));
 
-        std::string filenameReal = filename;
-        filenameReal.append("_REAL");
-        GADGET_CHECK_RETURN_FALSE(importArray(real, filenameReal));
+            a.create(real.get_dimensions());
+            long long num = (long long)a.get_number_of_elements();
 
-        std::string filenameImag = filename;
-        filenameImag.append("_IMAG");
-        GADGET_CHECK_RETURN_FALSE(importArray(imag, filenameImag));
+            long long n;
+            T* pA = a.begin();
 
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::real_imag_to_complex(real, imag, a));
+            #pragma omp parallel for default(none) private(n) shared(num, pA, real, imag)
+            for ( n=0; n<num; n++ )
+            {
+                pA[n] = T( real(n), imag(n) );
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in importImageComplex(const hoNDImage<T,D>& a, const std::string& filename) ... ");
+            return false;
+        }
+
+        return true;
     }
-    catch(...)
+
+    template <typename T, unsigned int D> 
+    bool importImageComplex(hoNDImage<T,D>& a, const std::string& filename_real, const std::string& filename_imag)
     {
-        GADGET_ERROR_MSG("Errors in gtPlusIOAnalyze::importArrayComplex(const hoNDArray<T>& a, const std::string& filename) ... ");
-        return false;
-    }
+        try
+        {
+            typedef typename realType<T>::Type value_type;
+            hoNDImage<value_type, D> real, imag;
 
-    return true;
-}
+            GADGET_CHECK_RETURN_FALSE(importImage(real, filename_real));
+            GADGET_CHECK_RETURN_FALSE(importImage(imag, filename_imag));
 
-template <typename T> 
-bool gtPlusIOAnalyze::importArrayComplex(hoNDArray<T>& a, const std::string& filename_real, const std::string& filename_imag)
-{
-    try
-    {
-        typedef typename realType<T>::Type value_type;
-        hoNDArray<value_type> real, imag;
+            a.create(real.get_dimensions());
+            long long num = (long long)a.get_number_of_elements();
+
+            long long n;
+            T* pA = a.begin();
 
-        GADGET_CHECK_RETURN_FALSE(importArray(real, filename_real));
-        GADGET_CHECK_RETURN_FALSE(importArray(imag, filename_imag));
+            #pragma omp parallel for default(none) private(n) shared(num, pA, real, imag)
+            for ( n=0; n<num; n++ )
+            {
+                pA[n] = T( real(n), imag(n) );
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in importImageComplex(hoNDImage<T,D>& a, const std::string& filename_real, const std::string& filename_imag) ... ");
+            return false;
+        }
 
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::real_imag_to_complex(real, imag, a));
+        return true;
     }
-    catch(...)
+
+    template <typename T> 
+    bool export2DImage(const hoNDImage<T,2>& a, const std::string& filename)
     {
-        GADGET_ERROR_MSG("Errors in gtPlusIOAnalyze::importArrayComplex(hoNDArray<T>& a, const std::string& filename_real, const std::string& filename_imag) ... ");
-        return false;
+        return exportImage(a, filename);
     }
 
-    return true;
-}
-
-template <typename T> 
-bool gtPlusIOAnalyze::export2DArray(const hoNDArray<T>& a, const std::string& filename)
-{
-    return exportArray(a, filename);
-}
-
-template <typename T> 
-bool gtPlusIOAnalyze::import2DArray(hoNDArray<T>& a, const std::string& filename)
-{
-    return importArray(a, filename);
-}
-
-template <typename T> 
-bool gtPlusIOAnalyze::export2DArrayComplex(const hoNDArray<T>& a, const std::string& filename)
-{
-    return exportArrayComplex(a, filename);
-}
+    template <typename T> 
+    bool import2DImage(hoNDImage<T,2>& a, const std::string& filename)
+    {
+        return importImage(a, filename);
+    }
 
-template <typename T> 
-bool gtPlusIOAnalyze::import2DArrayComplex(hoNDArray<T>& a, const std::string& filename)
-{
-    return importArrayComplex(a, filename);
-}
+    template <typename T> 
+    bool export2DImageComplex(const hoNDImage<T,2>& a, const std::string& filename)
+    {
+        return exportImageComplex(a, filename);
+    }
 
-template <typename T> 
-bool gtPlusIOAnalyze::export3DArray(const hoNDArray<T>& a, const std::string& filename)
-{
-    return exportArray(a, filename);
-}
+    template <typename T> 
+    bool import2DImageComplex(hoNDImage<T,2>& a, const std::string& filename)
+    {
+        return importImageComplex(a, filename);
+    }
 
-template <typename T> 
-bool gtPlusIOAnalyze::import3DArray(hoNDArray<T>& a, const std::string& filename)
-{
-    return importArray(a, filename);
-}
+    template <typename T> 
+    bool export3DImage(const hoNDImage<T,3>& a, const std::string& filename)
+    {
+        return exportImage(a, filename);
+    }
 
-template <typename T> 
-bool gtPlusIOAnalyze::export3DArrayComplex(const hoNDArray<T>& a, const std::string& filename)
-{
-    return exportArrayComplex(a, filename);
-}
+    template <typename T> 
+    bool import3DImage(hoNDImage<T,3>& a, const std::string& filename)
+    {
+        return importImage(a, filename);
+    }
 
-template <typename T> 
-bool gtPlusIOAnalyze::import3DArrayComplex(hoNDArray<T>& a, const std::string& filename)
-{
-    return importArrayComplex(a, filename);
-}
+    template <typename T> 
+    bool export3DImageComplex(const hoNDImage<T,3>& a, const std::string& filename)
+    {
+        return exportImageComplex(a, filename);
+    }
 
-template <typename T> 
-bool gtPlusIOAnalyze::export4DArray(const hoNDArray<T>& a, const std::string& filename)
-{
-    try
+    template <typename T> 
+    bool import3DImageComplex(hoNDImage<T,3>& a, const std::string& filename)
     {
-        size_t RO     = a.get_size(0);
-        size_t E1     = a.get_size(1);
-        size_t CHA    = a.get_size(2);
-        size_t N      = a.get_size(3);
+        return importImageComplex(a, filename);
+    }
 
-        size_t ii;
-        for (ii=0; ii<N; ii++ )
+    template <typename T> 
+    bool export4DImage(const hoNDImage<T,4>& a, const std::string& filename)
+    {
+        try
         {
-            std::vector<size_t> dim(3);
-            dim[0] = RO;
-            dim[1] = E1;
-            dim[2] = CHA;
+            size_t RO     = a.get_size(0);
+            size_t E1     = a.get_size(1);
+            size_t CHA    = a.get_size(2);
+            size_t N      = a.get_size(3);
+
+            size_t ii;
+            for (ii=0; ii<N; ii++ )
+            {
+                std::vector<size_t> dim(3);
+                dim[0] = RO;
+                dim[1] = E1;
+                dim[2] = CHA;
 
-            boost::shared_ptr< std::vector<size_t> > sDim(&dim);
-            hoNDArray<T> a3D(sDim, const_cast<T*>(a.begin()+ii*RO*E1*CHA), false);
+                hoNDImage<T, 3> a3D(dim, const_cast<T*>(a.begin()+ii*RO*E1*CHA), false);
 
-            std::ostringstream ostr;
-            ostr << filename << "_" << ii << std::ends;
-            GADGET_CHECK_RETURN_FALSE(export3DArray(a3D, ostr.str()));
+                std::ostringstream ostr;
+                ostr << filename << "_" << ii << std::ends;
+                GADGET_CHECK_RETURN_FALSE(export3DImage(a3D, ostr.str()));
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in export4DImage(const hoNDImage<T>& a, const std::string& filename) ... ");
+            return false;
         }
-    }
-    catch(...)
-    {
-        GADGET_ERROR_MSG("Errors in gtPlusIOAnalyze::export4DArray(const hoNDArray<T>& a, const std::string& filename) ... ");
-        return false;
-    }
 
-    return true;
-}
+        return true;
+    }
 
-template <typename T> 
-bool gtPlusIOAnalyze::export4DArrayComplex(const hoNDArray<T>& a, const std::string& filename)
-{
-    try
+    template <typename T> 
+    bool export4DImageComplex(const hoNDImage<T,4>& a, const std::string& filename)
     {
-        size_t RO     = a.get_size(0);
-        size_t E1     = a.get_size(1);
-        size_t CHA    = a.get_size(2);
-        size_t N      = a.get_size(3);
-
-        size_t ii;
-        for (ii=0; ii<N; ii++ )
+        try
         {
-            std::vector<size_t> dim(3);
-            dim[0] = RO;
-            dim[1] = E1;
-            dim[2] = CHA;
+            size_t RO     = a.get_size(0);
+            size_t E1     = a.get_size(1);
+            size_t CHA    = a.get_size(2);
+            size_t N      = a.get_size(3);
 
-            boost::shared_ptr< std::vector<size_t> > sDim(&dim);
-            hoNDArray<T> a3D(sDim, const_cast<T*>(a.begin()+ii*RO*E1*CHA), false);
+            size_t ii;
+            for (ii=0; ii<N; ii++ )
+            {
+                std::vector<size_t> dim(3);
+                dim[0] = RO;
+                dim[1] = E1;
+                dim[2] = CHA;
 
-            std::ostringstream ostr;
-            ostr << filename << "_" << ii << std::ends;
-            GADGET_CHECK_RETURN_FALSE(export3DArrayComplex(a3D, ostr.str()));
+                hoNDImage<T, 3> a3D(dim, const_cast<T*>(a.begin()+ii*RO*E1*CHA), false);
+
+                std::ostringstream ostr;
+                ostr << filename << "_" << ii << std::ends;
+                GADGET_CHECK_RETURN_FALSE(export3DImageComplex(a3D, ostr.str()));
+            }
         }
-    }
-    catch(...)
-    {
-        GADGET_ERROR_MSG("Errors in gtPlusIOAnalyze::export4DArrayComplex(const hoNDArray<T>& a, const std::string& filename) ... ");
-        return false;
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in export4DImageComplex(const hoNDImage<T>& a, const std::string& filename) ... ");
+            return false;
+        }
+
+        return true;
     }
 
-    return true;
-}
+protected:
+
+    template <typename T> bool array2Header(const hoNDArray<T>& a, HeaderType& header);
+    template <typename T> bool header2Array(hoNDArray<T>& a, const HeaderType& header);
+
+    template <typename T, unsigned int D> bool image2Header(const hoNDImage<T, D>& a, HeaderType& header);
+    template <typename T, unsigned int D> bool header2Image(hoNDImage<T, D>& a, const HeaderType& header);
+
+    // read/write the analyze header
+    bool readHeader(const std::string& filename, HeaderType& header);
+    bool writeHeader(const std::string& filename, const HeaderType& header);
+};
 
 template <typename T> 
-bool gtPlusIOAnalyze::array2Analyze(const hoNDArray<T>& a, dsr& header)
+bool gtPlusIOAnalyze::array2Header(const hoNDArray<T>& a, HeaderType& header)
 {
     try
     {
@@ -494,7 +565,7 @@ bool gtPlusIOAnalyze::array2Analyze(const hoNDArray<T>& a, dsr& header)
         header.dime.unused14 = 0;
 
         std::string rttiID = std::string(typeid(T).name());
-        header.dime.datatype = (short)getAnalyzeDataTypeFromRTTI(rttiID);
+        header.dime.datatype = (short)getDataTypeFromRTTI(rttiID);
         header.dime.bitpix = (short)(8*sizeof(T));
         header.dime.dim_un0 = 0;
 
@@ -556,12 +627,12 @@ bool gtPlusIOAnalyze::array2Analyze(const hoNDArray<T>& a, dsr& header)
 }
 
 template <typename T> 
-bool gtPlusIOAnalyze::analyze2Array(hoNDArray<T>& a, const dsr& header)
+bool gtPlusIOAnalyze::header2Array(hoNDArray<T>& a, const HeaderType& header)
 {
     try
     {
         std::string rttiID = std::string(typeid(T).name());
-        GADGET_CHECK_RETURN_FALSE(rttiID==getRTTIFromAnalyzeDataType( (AnalyzeDataType)header.dime.datatype));
+        GADGET_CHECK_RETURN_FALSE(rttiID==getRTTIFromDataType( (GtDataType)header.dime.datatype));
 
         std::vector<size_t> dim(header.dime.dim[0]);
         size_t ii;
@@ -605,44 +676,258 @@ bool gtPlusIOAnalyze::analyze2Array(hoNDArray<T>& a, const dsr& header)
     return true;
 }
 
-template <typename T> 
-bool gtPlusIOAnalyze::readAnalyzeData(const std::string& filename, T* data, long long len)
+template <typename T, unsigned int D> 
+bool gtPlusIOAnalyze::image2Header(const hoNDImage<T,D>& a, HeaderType& header)
 {
     try
     {
-        std::string filenameData = filename;
-        filenameData.append(".img");
-        gtPlusIOWorker ioworker(filenameData, true);
+        typedef typename hoNDImage<T,D>::coord_type coord_type;
+
+        // set everything to zero
+        memset(&header, 0, sizeof(dsr));
+
+        // header_key
+        header.hk.sizeof_hdr = 348;
+        size_t i;
+        for (i=0; i<10; i++ ) header.hk.data_type[i] = 0;
+        for (i=0; i<18; i++ ) header.hk.db_name[i] = 0;
+        header.hk.extents = 16384;
+        header.hk.session_error = 0;
+        header.hk.regular = 'r';
+        header.hk.hkey_un0 = 0;
+
+        // image_dimension
+        size_t NDim = D;
+
+        header.dime.dim[0] = (short)(NDim);
+        header.dime.dim[1] = (short)(a.get_size(0));
+
+        if ( NDim > 1 )
+            header.dime.dim[2] = (short)(a.get_size(1));
+        else
+            header.dime.dim[2] = 1;
+
+        if ( NDim > 2 )
+            header.dime.dim[3] = (short)(a.get_size(2));
+        else
+            header.dime.dim[3] = 1;
+
+        if ( NDim > 3 )
+            header.dime.dim[4] = (short)(a.get_size(3));
+        else
+            header.dime.dim[4] = 1;
+
+        if ( NDim > 4 )
+            header.dime.dim[5] = (short)(a.get_size(4));
+        else
+            header.dime.dim[5] = 1;
+
+        if ( NDim > 5 )
+            header.dime.dim[6] = (short)(a.get_size(5));
+        else
+            header.dime.dim[6] = 1;
+
+        if ( NDim > 6 )
+            header.dime.dim[7] = (short)(a.get_size(6));
+        else
+            header.dime.dim[7] = 1;
+
+        if ( NDim > 7 )
+            header.dime.unused8 = (short)(a.get_size(7));
+        else
+            header.dime.unused8 = 1;
 
-        GADGET_CHECK_RETURN_FALSE(ioworker.open());
-        GADGET_CHECK_RETURN_FALSE(ioworker.read(reinterpret_cast<char*>(data), len));
-        GADGET_CHECK_RETURN_FALSE(ioworker.close());
+        if ( NDim > 8 )
+            header.dime.unused9 = (short)(a.get_size(8));
+        else
+            header.dime.unused9 = 1;
+
+        if ( NDim > 9 )
+            header.dime.unused10 = (short)(a.get_size(9));
+        else
+            header.dime.unused10 = 1;
+
+        header.dime.unused11 = 0;
+        header.dime.unused12 = 0;
+        header.dime.unused13 = 0;
+        header.dime.unused14 = 0;
+
+        std::string rttiID = std::string(typeid(T).name());
+        header.dime.datatype = (short)getDataTypeFromRTTI(rttiID);
+        header.dime.bitpix = (short)(8*sizeof(T));
+        header.dime.dim_un0 = 0;
+
+        header.dime.pixdim[0] = 0;
+        header.dime.pixdim[1] = a.get_pixel_size(0);
+        header.dime.pixdim[2] = 1;
+        header.dime.pixdim[3] = 1;
+        if ( NDim > 1 )
+            header.dime.pixdim[2] = a.get_pixel_size(1);
+        if ( NDim > 2 )
+            header.dime.pixdim[3] = a.get_pixel_size(2);
+        if ( NDim > 3 )
+            header.dime.pixdim[4] = a.get_pixel_size(3);
+        if ( NDim > 4 )
+            header.dime.pixdim[5] = a.get_pixel_size(4);
+        if ( NDim > 5 )
+            header.dime.pixdim[6] = a.get_pixel_size(5);
+        if ( NDim > 6 )
+            header.dime.pixdim[7] = a.get_pixel_size(6);
+
+        header.dime.vox_offset = 0;
+        header.dime.funused1 = 0;
+        header.dime.funused2 = 0;
+        header.dime.funused3 = 0;
+        header.dime.cal_max = 0;
+        header.dime.cal_min = 0;
+        header.dime.compressed = 0;
+        header.dime.verified = 0;
+        header.dime.glmax = 0;
+        header.dime.glmin = 0;
+
+        // data history
+        for (i=0; i<80; i++ ) header.hist.descrip[i] = 0;
+        for (i=0; i<24; i++ ) header.hist.aux_file[i] = 0;
+        header.hist.orient = 0;
+        for (i=0; i<10; i++ ) header.hist.originator[i] = 0;
+        for (i=0; i<10; i++ ) header.hist.generated[i] = 0;
+        for (i=0; i<10; i++ ) header.hist.scannum[i] = 0;
+        for (i=0; i<10; i++ ) header.hist.patient_id[i] = 0;
+        for (i=0; i<10; i++ ) header.hist.exp_date[i] = 0;
+        for (i=0; i<10; i++ ) header.hist.exp_time[i] = 0;
+        for (i=0; i<3; i++ ) header.hist.hist_un0[i] = 0;
+        header.hist.views = 0;
+        header.hist.vols_added = 0;
+        header.hist.start_field = 0;
+        header.hist.field_skip = 0;
+        header.hist.omax = 0;
+        header.hist.omin = 0;
+        header.hist.smax = 0;
+        header.hist.smin = 0;
+
+        // store image origin and axis
+        // total number of bytes are
+        size_t numOfBytes = sizeof(float)*(D+D*D);
+        if ( numOfBytes <= sizeof(data_history) )
+        {
+            std::vector<float> buf(D+D*D, 0);
+
+            unsigned int ii;
+            for ( ii=0; ii<D; ii++ )
+            {
+                buf[ii] = (float)a.get_origin(ii);
+            }
+
+            unsigned int jj;
+            for ( ii=0; ii<D; ii++ )
+            {
+                for ( jj=0; jj<D; jj++ )
+                {
+                    buf[D+ii*D+jj] = (float)a.get_axis(ii, jj);
+                }
+            }
+
+            memcpy(&header.hist, &buf[0], numOfBytes);
+        }
     }
     catch(...)
     {
-        GADGET_ERROR_MSG("Errors in gtPlusIOAnalyze::readAnalyzeData(const std::string& filename, T* data, long long len) ... ");
+        GADGET_ERROR_MSG("Errors in gtPlusIOAnalyze::image2Analyze(const hoNDImage<T>& a, dsr& header) ... ");
         return false;
     }
 
     return true;
 }
 
-template <typename T> 
-bool gtPlusIOAnalyze::writeAnalyzeData(const std::string& filename, const T* data, long long len)
+template <typename T, unsigned int D> 
+bool gtPlusIOAnalyze::header2Image(hoNDImage<T,D>& a, const HeaderType& header)
 {
     try
     {
-        std::string filenameData = filename;
-        filenameData.append(".img");
-        gtPlusIOWorker ioworker(filenameData, false);
+        std::string rttiID = std::string(typeid(T).name());
+        GADGET_CHECK_RETURN_FALSE(rttiID==getRTTIFromDataType( (GtDataType)header.dime.datatype));
+
+        std::vector<size_t> dim(header.dime.dim[0]);
+
+        if ( D > dim.size() ) return false;
 
-        GADGET_CHECK_RETURN_FALSE(ioworker.open());
-        GADGET_CHECK_RETURN_FALSE(ioworker.write(reinterpret_cast<const char*>(data), len));
-        GADGET_CHECK_RETURN_FALSE(ioworker.close());
+        size_t ii;
+        for ( ii=0; ii<dim.size(); ii++ )
+        {
+            if ( ii == 7 )
+            {
+                dim[ii] = header.dime.unused8;
+            }
+            else if ( ii == 8 )
+            {
+                dim[ii] = header.dime.unused9;
+            }
+            else if ( ii == 9 ) 
+            {
+                dim[ii] = header.dime.unused10;
+            }
+            else
+            {
+                dim[ii] = header.dime.dim[ii+1];
+            }
+        }
+
+        a.create(dim);
+
+        for ( ii=0; ii<dim.size(); ii++ )
+        {
+            if ( ii < 7 )
+            {
+                a.set_pixel_size(ii, header.dime.pixdim[ii+1]);
+            }
+        }
+
+        // get origin and axis
+        size_t numOfBytes = sizeof(float)*(D+D*D);
+        if ( numOfBytes <= sizeof(data_history) )
+        {
+            std::vector<float> buf(D+D*D);
+            memcpy(&buf[0], &header.hist, numOfBytes);
+
+            unsigned int ii;
+            for ( ii=0; ii<D; ii++ )
+            {
+                a.set_origin(ii, buf[ii]);
+            }
+
+            unsigned int jj;
+            for ( ii=0; ii<D; ii++ )
+            {
+                typename hoNDImage<T,D>::coord_type v(0);
+                typename hoNDImage<T,D>::coord_type mag(0);
+
+                for ( jj=0; jj<D; jj++ )
+                {
+                    v = buf[D+ii*D+jj];
+                    mag += v*v;
+                    a.set_axis(ii, jj, v);
+                }
+
+                if ( mag < FLT_EPSILON )
+                {
+                    for ( jj=0; jj<D; jj++ )
+                    {
+                        if ( ii != jj )
+                        {
+                            a.set_axis(ii, jj, 0);
+                        }
+                        else
+                        {
+                            a.set_axis(ii, jj, (typename hoNDImage<T,D>::coord_type)(1.0) );
+                        }
+                    }
+                }
+            }
+        }
     }
     catch(...)
     {
-        GADGET_ERROR_MSG("Errors in gtPlusIOAnalyze::writeAnalyzeData(const std::string& filename, const T* data, long long len) ... ");
+        GADGET_ERROR_MSG("Errors in gtPlusIOAnalyze::analyze2Image(hoNDImage<T,D>& a, const dsr& header) ... ");
         return false;
     }
 
diff --git a/toolboxes/gtplus/util/gtPlusIOBase.cpp b/toolboxes/gtplus/util/gtPlusIOBase.cpp
index b08c445..69d5b53 100644
--- a/toolboxes/gtplus/util/gtPlusIOBase.cpp
+++ b/toolboxes/gtplus/util/gtPlusIOBase.cpp
@@ -76,10 +76,10 @@ long gtPlusIOWorker::tell()
 
     if ( readFlag_ )
     {
-        return fid_.tellg();
+        return (long)fid_.tellg();
     }
 
-    return fid_.tellp();
+    return (long)fid_.tellp();
 }
 
 bool gtPlusIOWorker::seek(long long offset)
@@ -125,76 +125,76 @@ bool gtPlusIOWorker::write(const char* data, long long len)
 
 // --------------------------------------------------------------------------
 
-void gtPlusIOBase::printInfo(std::ostream& os)
-{
-    using namespace std;
-
-    os << "-------------- GTPlus IO Util ---------------" << endl;
-    os << "Implementation of file input/output operations" << endl;
-    os << "---------------------------------------------" << endl;
-}
-
-bool gtPlusIOBase::readFromFile(const std::string& filename, char*& data, long long& length)
-{
-    try
-    {
-        if (data!=NULL) delete [] data;
-
-        gtPlusIOWorker ioworker_(filename, true);
-
-        GADGET_CHECK_RETURN_FALSE(ioworker_.open());
-
-        // read the total length
-        long long totalLen;
-        GADGET_CHECK_RETURN_FALSE(ioworker_.read(reinterpret_cast<char*>(&totalLen), sizeof(long long)));
-
-        length = totalLen - sizeof(long long);
-
-        data = new char[length];
-        GADGET_CHECK_RETURN_FALSE(data!=NULL);
-
-        GADGET_CHECK_RETURN_FALSE(ioworker_.read(data, length));
-
-        GADGET_CHECK_RETURN_FALSE(ioworker_.close());
-    }
-    catch (...)
-    {
-        GADGET_ERROR_MSG("Errors in gtPlusIOBase::readFromFile(const std::string& filename, char*& data, long long& length) ... ");
-        return false;
-    }
-
-    return true;
-}
-
-bool gtPlusIOBase::writeToFile(const std::string& filename, char* data, long long length)
-{
-    try
-    {
-        if ( length == 0 ) return true;
-
-        GADGET_CHECK_RETURN_FALSE(data!=NULL);
-
-        gtPlusIOWorker ioworker_(filename, false);
-
-        GADGET_CHECK_RETURN_FALSE(ioworker_.open());
-
-        // write the total lengh
-        const long long totalLen = length+sizeof(long long);
-        GADGET_CHECK_RETURN_FALSE(ioworker_.write(reinterpret_cast<const char*>(&totalLen), sizeof(long long)));
-
-        // write the data
-        GADGET_CHECK_RETURN_FALSE(ioworker_.write(data, length));
-
-        // close the file
-        GADGET_CHECK_RETURN_FALSE(ioworker_.close());
-    }
-    catch (...)
-    {
-        GADGET_ERROR_MSG("Errors in gtPlusIOBase::writeToFile(const std::string& filename, char* data, long long length) ... ");
-        return false;
-    }
-
-    return true;
-}
+//void gtPlusIOBase::printInfo(std::ostream& os)
+//{
+//    using namespace std;
+//
+//    os << "-------------- GTPlus IO Util ---------------" << endl;
+//    os << "Implementation of file input/output operations" << endl;
+//    os << "---------------------------------------------" << endl;
+//}
+//
+//bool gtPlusIOBase::readFromFile(const std::string& filename, char*& data, long long& length)
+//{
+//    try
+//    {
+//        if (data!=NULL) delete [] data;
+//
+//        gtPlusIOWorker ioworker_(filename, true);
+//
+//        GADGET_CHECK_RETURN_FALSE(ioworker_.open());
+//
+//        // read the total length
+//        long long totalLen;
+//        GADGET_CHECK_RETURN_FALSE(ioworker_.read(reinterpret_cast<char*>(&totalLen), sizeof(long long)));
+//
+//        length = totalLen - sizeof(long long);
+//
+//        data = new char[length];
+//        GADGET_CHECK_RETURN_FALSE(data!=NULL);
+//
+//        GADGET_CHECK_RETURN_FALSE(ioworker_.read(data, length));
+//
+//        GADGET_CHECK_RETURN_FALSE(ioworker_.close());
+//    }
+//    catch (...)
+//    {
+//        GADGET_ERROR_MSG("Errors in gtPlusIOBase::readFromFile(const std::string& filename, char*& data, long long& length) ... ");
+//        return false;
+//    }
+//
+//    return true;
+//}
+//
+//bool gtPlusIOBase::writeToFile(const std::string& filename, char* data, long long length)
+//{
+//    try
+//    {
+//        if ( length == 0 ) return true;
+//
+//        GADGET_CHECK_RETURN_FALSE(data!=NULL);
+//
+//        gtPlusIOWorker ioworker_(filename, false);
+//
+//        GADGET_CHECK_RETURN_FALSE(ioworker_.open());
+//
+//        // write the total lengh
+//        const long long totalLen = length+sizeof(long long);
+//        GADGET_CHECK_RETURN_FALSE(ioworker_.write(reinterpret_cast<const char*>(&totalLen), sizeof(long long)));
+//
+//        // write the data
+//        GADGET_CHECK_RETURN_FALSE(ioworker_.write(data, length));
+//
+//        // close the file
+//        GADGET_CHECK_RETURN_FALSE(ioworker_.close());
+//    }
+//    catch (...)
+//    {
+//        GADGET_ERROR_MSG("Errors in gtPlusIOBase::writeToFile(const std::string& filename, char* data, long long length) ... ");
+//        return false;
+//    }
+//
+//    return true;
+//}
 
 }}
diff --git a/toolboxes/gtplus/util/gtPlusIOBase.h b/toolboxes/gtplus/util/gtPlusIOBase.h
index 59cc02f..ff359a3 100644
--- a/toolboxes/gtplus/util/gtPlusIOBase.h
+++ b/toolboxes/gtplus/util/gtPlusIOBase.h
@@ -8,41 +8,21 @@
 #include <iostream>
 #include <typeinfo>
 
-#include "GtPlusExport.h"
+#include "GtPlusIOExport.h"
+#include "GadgetronCommon.h"
+
 #include "NDArray.h"
 #include "complext.h"
-#include "vector_td.h"
 #include "GadgetronException.h"
-#include "GadgetronCommon.h"
-
-#include <mkl.h>
 
 #include "hoNDArray.h"
-#include "ho2DArray.h"
-#include "ho3DArray.h"
-#include "ho4DArray.h"
-#include "ho5DArray.h"
-#include "ho6DArray.h"
-#include "ho7DArray.h"
+#include "hoNDImage.h"
 
 #include "hoNDArray_fileio.h"
-#include "hoNDArray_elemwise.h"
-
-// the file input/output utility functions
-
-#ifdef GT_Complex8
-    #undef GT_Complex8
-#endif // GT_Complex8
-typedef std::complex<float> GT_Complex8;
-
-#ifdef GT_Complex16
-    #undef GT_Complex16
-#endif // GT_Complex16
-typedef std::complex<double> GT_Complex16;
 
 namespace Gadgetron { namespace gtPlus {
 
-class EXPORTGTPLUS gtPlusIOWorker
+class EXPORTGTPLUSIO gtPlusIOWorker
 {
 public:
 
@@ -80,50 +60,756 @@ protected:
     bool readFlag_;
 };
 
-class EXPORTGTPLUS gtPlusIOBase
+#ifdef DT_UNKNOWN
+    #undef DT_UNKNOWN
+#endif // DT_UNKNOWN
+
+enum GtDataType
 {
-public:
+    DT_ANA_UNKNOWN=0,
+    //DT_BINARY=1, 
+    //DT_UNSIGNED_CHAR=2,
+    //DT_SIGNED_SHORT=4,
+    //DT_UNSIGNED_SHORT=5,
+    //DT_SIGNED_INT=8,
+    //DT_UNSIGNED_INT=9,
+    //DT_FLOAT=16,
+    //DT_COMPLEX=32,
+    //DT_DOUBLE=64,
+    //DT_DOUBLECOMPLEX=96, // this type is added to support complex doulbe
+    //DT_RGB=128,
+    //DT_ALL=255
+
+    DT_NONE                    =0,
+    DT_UNKNOWN                 =0,     /* what it says, dude           */
+    DT_BINARY                  =1,     /* binary (1 bit/voxel)         */
+    DT_UNSIGNED_CHAR           =2,     /* unsigned char (8 bits/voxel) */
+    DT_SIGNED_SHORT            =4,     /* signed short (16 bits/voxel) */
+    DT_UNSIGNED_SHORT          =5,
+    DT_SIGNED_INT              =8,     /* signed int (32 bits/voxel)   */
+    DT_UNSIGNED_INT            =9,
+    DT_FLOAT                  =16,     /* float (32 bits/voxel)        */
+    DT_COMPLEX                =32,     /* complex (64 bits/voxel)      */
+    DT_DOUBLE                 =64,     /* double (64 bits/voxel)       */
+    DT_RGB                   =128,     /* RGB triple (24 bits/voxel)   */
+    DT_ALL                   =255,     /* not very useful (?)          */
 
-    gtPlusIOBase() {}
-    virtual ~gtPlusIOBase() {}
+                                /*----- another set of names for the same ---*/
+    DT_UINT8                   =2,
+    DT_INT16                   =4,
+    DT_INT32                   =8,
+    DT_FLOAT32                =16,
+    DT_COMPLEX64              =32,
+    DT_FLOAT64                =64,
+    DT_RGB24                 =128,
 
+                                /*------------------- new codes for NIFTI ---*/
+    DT_INT8                  =256,     /* signed char (8 bits)         */
+    DT_UINT16                =512,     /* unsigned short (16 bits)     */
+    DT_UINT32                =768,     /* unsigned int (32 bits)       */
+    DT_INT64                =1024,     /* long long (64 bits)          */
+    DT_UINT64               =1280,     /* unsigned long long (64 bits) */
+    DT_FLOAT128             =1536,     /* long double (128 bits)       */
+    DT_COMPLEX128           =1792,     /* double pair (128 bits)       */
+    DT_COMPLEX256           =2048,     /* long double pair (256 bits)  */
+    DT_RGBA32               =2304,     /* 4 byte RGBA (32 bits/voxel)  */
+};
+
+template <typename HeaderType>
+class gtPlusIOBase
+{
 public:
 
-    void printInfo(std::ostream& os);
+    typedef HeaderType THeaderType;
 
-    // buffer read/write functions
-    // length: number of bytes
-    bool readFromFile(const std::string& filename, char*& data, long long& length);
-    bool writeToFile(const std::string& filename, char* data, long long length);
+    gtPlusIOBase()
+    {
+        pixelSize_.resize(10, 1.0);
+    }
 
-    // general export/input for ND array
-    //template <typename T> bool exportNDArray(const hoNDArray<T>& a, const std::string& filename) const;
-    //template <typename T> bool importNDArray(hoNDArray<T>& a, std::string& filename) const;
-};
+    gtPlusIOBase(float px, float py)
+    {
+        pixelSize_.resize(2);
+        pixelSize_[0] = px;
+        pixelSize_[1] = py;
+    }
 
-/*template <typename T>
-bool gtPlusIOBase::exportNDArray(const hoNDArray<T>& a, const std::string& filename) const
-{
-    GADGET_CHECK_RETURN_FALSE( Gadgetron::write_nd_array(const_cast<hoNDArray<T>* >(&a), filename) == 0 );
-    return true;
-}
+    gtPlusIOBase(float px, float py, float pz)
+    {
+        pixelSize_.resize(3);
+        pixelSize_[0] = px;
+        pixelSize_[1] = py;
+        pixelSize_[2] = pz;
+    }
 
-template <typename T> 
-bool gtPlusIOBase::importNDArray(hoNDArray<T>& a, std::string& filename) const
-{
-    try
+    gtPlusIOBase(float px, float py, float pz, float pt)
+    {
+        pixelSize_.resize(4);
+        pixelSize_[0] = px;
+        pixelSize_[1] = py;
+        pixelSize_[2] = pz;
+        pixelSize_[3] = pt;
+    }
+
+    gtPlusIOBase(float px, float py, float pz, float pt, float pr)
+    {
+        pixelSize_.resize(5);
+        pixelSize_[0] = px;
+        pixelSize_[1] = py;
+        pixelSize_[2] = pz;
+        pixelSize_[3] = pt;
+        pixelSize_[4] = pr;
+    }
+
+    gtPlusIOBase(float px, float py, float pz, float pt, float pr, float ps)
+    {
+        pixelSize_.resize(6);
+        pixelSize_[0] = px;
+        pixelSize_[1] = py;
+        pixelSize_[2] = pz;
+        pixelSize_[3] = pt;
+        pixelSize_[4] = pr;
+        pixelSize_[5] = ps;
+    }
+
+    gtPlusIOBase(float px, float py, float pz, float pt, float pr, float ps, float pp)
+    {
+        pixelSize_.resize(7);
+        pixelSize_[0] = px;
+        pixelSize_[1] = py;
+        pixelSize_[2] = pz;
+        pixelSize_[3] = pt;
+        pixelSize_[4] = pr;
+        pixelSize_[5] = ps;
+        pixelSize_[6] = pp;
+    }
+
+    gtPlusIOBase(float px, float py, float pz, float pt, float pr, float ps, float pp, float pq)
+    {
+        pixelSize_.resize(8);
+        pixelSize_[0] = px;
+        pixelSize_[1] = py;
+        pixelSize_[2] = pz;
+        pixelSize_[3] = pt;
+        pixelSize_[4] = pr;
+        pixelSize_[5] = ps;
+        pixelSize_[6] = pp;
+        pixelSize_[7] = pq;
+    }
+
+    void setPixelSize(float px, float py, float pz=1.0f, float pt=1.0f, float pr=1.0f, float ps=1.0f, float pp=1.0f, float pq=1.0f)
+    {
+        pixelSize_.resize(8);
+        pixelSize_[0] = px;
+        pixelSize_[1] = py;
+        pixelSize_[2] = pz;
+        pixelSize_[3] = pt;
+        pixelSize_[4] = pr;
+        pixelSize_[5] = ps;
+        pixelSize_[6] = pp;
+        pixelSize_[7] = pq;
+    }
+
+    void setPixelSize(double px, double py, double pz=1.0, double pt=1.0, double pr=1.0, double ps=1.0, double pp=1.0, double pq=1.0)
+    {
+        pixelSize_.resize(8);
+        pixelSize_[0] = (float)px;
+        pixelSize_[1] = (float)py;
+        pixelSize_[2] = (float)pz;
+        pixelSize_[3] = (float)pt;
+        pixelSize_[4] = (float)pr;
+        pixelSize_[5] = (float)ps;
+        pixelSize_[6] = (float)pp;
+        pixelSize_[7] = (float)pq;
+    }
+
+    void printInfo(std::ostream& os)
+    {
+        using namespace std;
+
+        os << "-------------- GTPlus Array/Image input/output to medical image format -------------" << endl;
+        os << "--------------------------------------------------------------------------" << endl;
+    }
+
+    virtual ~gtPlusIOBase()
+    {
+    }
+
+    /// export/input for 2D/3D/4D array
+    /// filename should be given without extension
+
+    virtual bool exportArray(const hoNDArray<short>& a, const std::string& filename) = 0;
+    virtual bool exportArray(const hoNDArray<unsigned short>& a, const std::string& filename) = 0;
+    virtual bool exportArray(const hoNDArray<int>& a, const std::string& filename) = 0;
+    virtual bool exportArray(const hoNDArray<unsigned int>& a, const std::string& filename) = 0;
+    virtual bool exportArray(const hoNDArray<float>& a, const std::string& filename) = 0;
+    virtual bool exportArray(const hoNDArray<double>& a, const std::string& filename) = 0;
+    virtual bool exportArray(const hoNDArray< std::complex<float> >& a, const std::string& filename) = 0;
+    virtual bool exportArray(const hoNDArray< std::complex<double> >& a, const std::string& filename) = 0;
+
+    virtual bool importArray(hoNDArray<short>& a, const std::string& filename) = 0;
+    virtual bool importArray(hoNDArray<unsigned short>& a, const std::string& filename) = 0;
+    virtual bool importArray(hoNDArray<int>& a, const std::string& filename) = 0;
+    virtual bool importArray(hoNDArray<unsigned int>& a, const std::string& filename) = 0;
+    virtual bool importArray(hoNDArray<float>& a, const std::string& filename) = 0;
+    virtual bool importArray(hoNDArray<double>& a, const std::string& filename) = 0;
+    virtual bool importArray(hoNDArray< std::complex<float> >& a, const std::string& filename) = 0;
+    virtual bool importArray(hoNDArray< std::complex<double> >& a, const std::string& filename) = 0;
+
+    template <typename T> 
+    bool exportArrayComplexRealImag(const hoNDArray<T>& a, const std::string& filename)
+    {
+        try
+        {
+            typedef typename Gadgetron::realType<T>::Type value_type;
+
+            hoNDArray<value_type> buf(a.get_dimensions());
+
+            long long num = (long long)a.get_number_of_elements();
+
+            long long n;
+            #pragma omp parallel for default(none) private(n) shared(num, a, buf)
+            for ( n=0; n<num; n++ )
+            {
+                buf(n) = a(n).real();
+            }
+
+            std::string filenameReal = filename;
+            filenameReal.append("_REAL");
+            GADGET_CHECK_RETURN_FALSE(exportArray(buf, filenameReal));
+
+            #pragma omp parallel for default(none) private(n) shared(num, a, buf)
+            for ( n=0; n<num; n++ )
+            {
+                buf(n) = a(n).imag();
+            }
+
+            std::string filenameImag = filename;
+            filenameImag.append("_IMAG");
+            GADGET_CHECK_RETURN_FALSE(exportArray(buf, filenameImag));
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in exportArrayComplexRealImag(const hoNDArray<T>& a, const std::string& filename) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename T> 
+    bool exportArrayComplex(const hoNDArray<T>& a, const std::string& filename)
+    {
+        try
+        {
+            typedef typename Gadgetron::realType<T>::Type value_type;
+
+            /*hoNDArray<value_type> buf;
+            GADGET_CHECK_RETURN_FALSE(Gadgetron::complex_to_real(a, buf));
+
+            std::string filenameReal = filename;
+            filenameReal.append("_REAL");
+            GADGET_CHECK_RETURN_FALSE(exportArray(buf, filenameReal));
+
+            GADGET_CHECK_RETURN_FALSE(Gadgetron::complex_to_imag(a, buf));
+            std::string filenameImag = filename;
+            filenameImag.append("_IMAG");
+            GADGET_CHECK_RETURN_FALSE(exportArray(buf, filenameImag));
+
+            GADGET_CHECK_RETURN_FALSE(Gadgetron::abs(a, buf));
+            std::string filenameMag = filename;
+            filenameMag.append("_MAG");
+            GADGET_CHECK_RETURN_FALSE(exportArray(buf, filenameMag));
+
+            GADGET_CHECK_RETURN_FALSE(Gadgetron::argument(a, buf));
+            std::string filenamePhase = filename;
+            filenamePhase.append("_PHASE");
+            GADGET_CHECK_RETURN_FALSE(exportArray(buf, filenamePhase));*/
+
+            hoNDArray<value_type> rpart, ipart, mag, phs;
+            rpart.create(a.get_dimensions());
+            ipart.create(a.get_dimensions());
+            mag.create(a.get_dimensions());
+            phs.create(a.get_dimensions());
+
+            long long num = (long long)a.get_number_of_elements();
+
+            long long n;
+
+            #pragma omp parallel for default(none) private(n) shared(num, a, rpart, ipart, mag, phs)
+            for ( n=0; n<num; n++ )
+            {
+                rpart(n) = a(n).real();
+                ipart(n) = a(n).imag();
+                mag(n) = std::abs( a(n) );
+                phs(n) = std::arg( a(n) );
+            }
+
+            std::string filenameReal = filename;
+            filenameReal.append("_REAL");
+            GADGET_CHECK_RETURN_FALSE(exportArray(rpart, filenameReal));
+
+            std::string filenameImag = filename;
+            filenameImag.append("_IMAG");
+            GADGET_CHECK_RETURN_FALSE(exportArray(ipart, filenameImag));
+
+            std::string filenameMag = filename;
+            filenameMag.append("_MAG");
+            GADGET_CHECK_RETURN_FALSE(exportArray(mag, filenameMag));
+
+            std::string filenamePhase = filename;
+            filenamePhase.append("_PHASE");
+            GADGET_CHECK_RETURN_FALSE(exportArray(phs, filenamePhase));
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in exportArrayComplex(const hoNDArray<T>& a, const std::string& filename) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename T> 
+    bool importArrayComplex(hoNDArray<T>& a, const std::string& filename)
+    {
+        try
+        {
+            typedef typename T::value_type value_type;
+            hoNDArray<value_type> real, imag;
+
+            std::string filenameReal = filename;
+            filenameReal.append("_REAL");
+            GADGET_CHECK_RETURN_FALSE(importArray(real, filenameReal));
+
+            std::string filenameImag = filename;
+            filenameImag.append("_IMAG");
+            GADGET_CHECK_RETURN_FALSE(importArray(imag, filenameImag));
+
+            a.create(real.get_dimensions());
+            long long num = (long long)real.get_number_of_elements();
+
+            long long n;
+            #pragma omp parallel for private(n) shared(num, a, real, imag)
+            for ( n=0; n<num; n++ )
+            {
+                a(n) = T(real(n), imag(n));
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in importArrayComplex(const hoNDArray<T>& a, const std::string& filename) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename T> 
+    bool importArrayComplex(hoNDArray<T>& a, const std::string& filename_real, const std::string& filename_imag)
+    {
+        try
+        {
+            typedef typename realType<T>::Type value_type;
+            hoNDArray<value_type> real, imag;
+
+            GADGET_CHECK_RETURN_FALSE(importArray(real, filename_real));
+            GADGET_CHECK_RETURN_FALSE(importArray(imag, filename_imag));
+
+            a.create(real.get_dimensions());
+            long long num = (long long)real.get_number_of_elements();
+
+            long long n;
+            #pragma omp parallel for private(n) shared(num, a, real, imag)
+            for ( n=0; n<num; n++ )
+            {
+                a(n) = T(real(n), imag(n));
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in importArrayComplex(hoNDArray<T>& a, const std::string& filename_real, const std::string& filename_imag) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename T> 
+    bool export2DArray(const hoNDArray<T>& a, const std::string& filename)
+    {
+        return exportArray(a, filename);
+    }
+
+    template <typename T> 
+    bool import2DArray(hoNDArray<T>& a, const std::string& filename)
+    {
+        return importArray(a, filename);
+    }
+
+    template <typename T> 
+    bool export2DArrayComplex(const hoNDArray<T>& a, const std::string& filename)
+    {
+        return exportArrayComplex(a, filename);
+    }
+
+    template <typename T> 
+    bool import2DArrayComplex(hoNDArray<T>& a, const std::string& filename)
+    {
+        return importArrayComplex(a, filename);
+    }
+
+    template <typename T> 
+    bool export3DArray(const hoNDArray<T>& a, const std::string& filename)
+    {
+        return exportArray(a, filename);
+    }
+
+    template <typename T> 
+    bool import3DArray(hoNDArray<T>& a, const std::string& filename)
+    {
+        return importArray(a, filename);
+    }
+
+    template <typename T> 
+    bool export3DArrayComplex(const hoNDArray<T>& a, const std::string& filename)
+    {
+        return exportArrayComplex(a, filename);
+    }
+
+    template <typename T> 
+    bool import3DArrayComplex(hoNDArray<T>& a, const std::string& filename)
+    {
+        return importArrayComplex(a, filename);
+    }
+
+    template <typename T> 
+    bool export4DArray(const hoNDArray<T>& a, const std::string& filename)
+    {
+        try
+        {
+            size_t RO     = a.get_size(0);
+            size_t E1     = a.get_size(1);
+            size_t CHA    = a.get_size(2);
+            size_t N      = a.get_size(3);
+
+            size_t ii;
+            for (ii=0; ii<N; ii++ )
+            {
+                std::vector<size_t> dim(3);
+                dim[0] = RO;
+                dim[1] = E1;
+                dim[2] = CHA;
+
+                boost::shared_ptr< std::vector<size_t> > sDim(&dim);
+                hoNDArray<T> a3D(sDim, const_cast<T*>(a.begin()+ii*RO*E1*CHA), false);
+
+                std::ostringstream ostr;
+                ostr << filename << "_" << ii << std::ends;
+                GADGET_CHECK_RETURN_FALSE(export3DArray(a3D, ostr.str()));
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in export4DArray(const hoNDArray<T>& a, const std::string& filename) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename T> 
+    bool export4DArrayComplex(const hoNDArray<T>& a, const std::string& filename)
+    {
+        try
+        {
+            size_t RO     = a.get_size(0);
+            size_t E1     = a.get_size(1);
+            size_t CHA    = a.get_size(2);
+            size_t N      = a.get_size(3);
+
+            size_t ii;
+            for (ii=0; ii<N; ii++ )
+            {
+                std::vector<size_t> dim(3);
+                dim[0] = RO;
+                dim[1] = E1;
+                dim[2] = CHA;
+
+                boost::shared_ptr< std::vector<size_t> > sDim(&dim);
+                hoNDArray<T> a3D(sDim, const_cast<T*>(a.begin()+ii*RO*E1*CHA), false);
+
+                std::ostringstream ostr;
+                ostr << filename << "_" << ii << std::ends;
+                GADGET_CHECK_RETURN_FALSE(export3DArrayComplex(a3D, ostr.str()));
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in export4DArrayComplex(const hoNDArray<T>& a, const std::string& filename) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    static bool readFromFile(const std::string& filename, char*& data, long long& length)
+    {
+        try
+        {
+            if (data!=NULL) delete [] data;
+
+            gtPlusIOWorker ioworker_(filename, true);
+
+            GADGET_CHECK_RETURN_FALSE(ioworker_.open());
+
+            // read the total length
+            long long totalLen;
+            GADGET_CHECK_RETURN_FALSE(ioworker_.read(reinterpret_cast<char*>(&totalLen), sizeof(long long)));
+
+            length = totalLen - sizeof(long long);
+
+            data = new char[length];
+            GADGET_CHECK_RETURN_FALSE(data!=NULL);
+
+            GADGET_CHECK_RETURN_FALSE(ioworker_.read(data, length));
+
+            GADGET_CHECK_RETURN_FALSE(ioworker_.close());
+        }
+        catch (...)
+        {
+            GADGET_ERROR_MSG("Errors in gtPlusIOBase::readFromFile(const std::string& filename, char*& data, long long& length) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    static bool writeToFile(const std::string& filename, char* data, long long length)
+    {
+        try
+        {
+            if ( length == 0 ) return true;
+
+            GADGET_CHECK_RETURN_FALSE(data!=NULL);
+
+            gtPlusIOWorker ioworker_(filename, false);
+
+            GADGET_CHECK_RETURN_FALSE(ioworker_.open());
+
+            // write the total lengh
+            const long long totalLen = length+sizeof(long long);
+            GADGET_CHECK_RETURN_FALSE(ioworker_.write(reinterpret_cast<const char*>(&totalLen), sizeof(long long)));
+
+            // write the data
+            GADGET_CHECK_RETURN_FALSE(ioworker_.write(data, length));
+
+            // close the file
+            GADGET_CHECK_RETURN_FALSE(ioworker_.close());
+        }
+        catch (...)
+        {
+            GADGET_ERROR_MSG("Errors in gtPlusIOBase::writeToFile(const std::string& filename, char* data, long long length) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+protected:
+
+    std::vector<float> pixelSize_;
+
+    // get the run-time type ID from analyze data type or vice versa
+    std::string getRTTIFromDataType(GtDataType aDT)
+    {
+        std::string rttiID;
+
+        switch (aDT)
+        {
+        case DT_INT8 :
+            rttiID = typeid(char).name();
+            break;
+
+        case DT_UNSIGNED_CHAR :
+            rttiID = typeid(unsigned char).name();
+            break;
+
+        case DT_SIGNED_SHORT :
+            rttiID = typeid(short).name();
+            break;
+
+        case DT_UNSIGNED_SHORT :
+        case DT_UINT16 :
+            rttiID = typeid(unsigned short).name();
+            break;
+
+        case DT_SIGNED_INT :
+            rttiID = typeid(int).name();
+            break;
+
+        case DT_UINT32 :
+            rttiID = typeid(unsigned int).name();
+            break;
+
+        case DT_INT64 :
+            rttiID = typeid(long long).name();
+            break;
+
+        case DT_UINT64 :
+            rttiID = typeid(unsigned long long).name();
+            break;
+
+        case DT_FLOAT :
+            rttiID = typeid(float).name();
+            break;
+
+        case DT_DOUBLE :
+            rttiID = typeid(double).name();
+            break;
+
+        case DT_FLOAT128 :
+            rttiID = typeid(long double).name();
+            break;
+
+        case DT_COMPLEX :
+            rttiID = typeid( std::complex<float> ).name();
+            break;
+
+        case DT_COMPLEX128 :
+            rttiID = typeid( std::complex<double> ).name();
+            break;
+
+        case DT_COMPLEX256 :
+            rttiID = typeid( std::complex<long double> ).name();
+            break;
+
+        case DT_RGB :
+            rttiID = typeid( Gadgetron::rgb_type ).name();
+            break;
+
+        case DT_RGBA32 :
+            rttiID = typeid( Gadgetron::rgba_type ).name();
+            break;
+
+        default:
+            rttiID = "UNKOWN TYPE";
+        }
+
+        return rttiID;
+    }
+
+    GtDataType getDataTypeFromRTTI(const std::string& name)
     {
-        boost::shared_ptr< hoNDArray<T> > aRead;
-        aRead = Gadgetron::read_nd_array(filename);
-        a = *aRead;
+        GtDataType analyzeDT = DT_ANA_UNKNOWN;
+
+        if ( name == typeid(unsigned char).name() )
+        {
+            analyzeDT = DT_UNSIGNED_CHAR;
+        }
+
+        if ( name == typeid(short).name() )
+        {
+            analyzeDT = DT_SIGNED_SHORT;
+        }
+
+        if ( name == typeid(unsigned short).name() )
+        {
+            analyzeDT = DT_UINT16;
+        }
+
+        if ( name == typeid(int).name() )
+        {
+            analyzeDT = DT_SIGNED_INT;
+        }
+
+        if ( name == typeid(unsigned int).name() )
+        {
+            analyzeDT = DT_UINT32;
+        }
+
+        if ( name == typeid(float).name() )
+        {
+            analyzeDT = DT_FLOAT;
+        }
+
+        if ( name == typeid(double).name() )
+        {
+            analyzeDT = DT_DOUBLE;
+        }
+
+        if ( name == typeid(long double).name() )
+        {
+            analyzeDT = DT_FLOAT128;
+        }
+
+        if ( name == typeid( std::complex<float> ).name() )
+        {
+            analyzeDT = DT_COMPLEX;
+        }
+
+        if ( name == typeid( std::complex<double> ).name() )
+        {
+            analyzeDT = DT_COMPLEX128;
+        }
+
+        if ( name == typeid(std::complex<long double>).name() )
+        {
+            analyzeDT = DT_COMPLEX256;
+        }
+
+        if ( name == typeid(Gadgetron::rgb_type).name() )
+        {
+            analyzeDT = DT_RGB;
+        }
+
+        if ( name == typeid(Gadgetron::rgba_type).name() )
+        {
+            analyzeDT = DT_RGBA32;
+        }
+
+        return analyzeDT;
     }
-    catch(...)
+
+    template <typename T> 
+    bool readData(const std::string& filename, T* data, long long len)
     {
-        GADGET_ERROR_MSG("Errors in gtPlusIOBase::importNDArray(hoNDArray<T>& a, const std::string& filename) ... ");
-        return false;
+        try
+        {
+            gtPlusIOWorker ioworker(filename, true);
+
+            GADGET_CHECK_RETURN_FALSE(ioworker.open());
+            GADGET_CHECK_RETURN_FALSE(ioworker.read(reinterpret_cast<char*>(data), len));
+            GADGET_CHECK_RETURN_FALSE(ioworker.close());
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in readData(const std::string& filename, T* data, long long len) ... ");
+            return false;
+        }
+
+        return true;
     }
 
-    return true;
-}*/
+    template <typename T> 
+    bool writeData(const std::string& filename, const T* data, long long len)
+    {
+        try
+        {
+            gtPlusIOWorker ioworker(filename, false);
+
+            GADGET_CHECK_RETURN_FALSE(ioworker.open());
+            GADGET_CHECK_RETURN_FALSE(ioworker.write(reinterpret_cast<const char*>(data), len));
+            GADGET_CHECK_RETURN_FALSE(ioworker.close());
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in writeData(const std::string& filename, const T* data, long long len) ... ");
+            return false;
+        }
+
+        return true;
+    }
+};
 
 }}
diff --git a/toolboxes/gtplus/util/gtPlusMemoryManager.cpp b/toolboxes/gtplus/util/gtPlusMemoryManager.cpp
index 078bf5d..4ddd6b9 100644
--- a/toolboxes/gtplus/util/gtPlusMemoryManager.cpp
+++ b/toolboxes/gtplus/util/gtPlusMemoryManager.cpp
@@ -312,25 +312,12 @@ bool gtPlusMemoryManager::increase(size_t added_bytes)
 
 void gtPlusMemoryManager::_allocate_memory( size_t size, void*& data )
 {
-    #ifdef USE_MKL
-        data = mkl_calloc(size, 1, aligned_bytes_);
-    #else
-        data = calloc(size, 1);
-    #endif // USE_MKL
-
-    // data = reinterpret_cast<void*>(new char[size]);
-    //memset(data, 0, size);
+    data = calloc(size, 1);
 }
 
 void gtPlusMemoryManager::_deallocate_memory( void* data )
 {
-    #ifdef USE_MKL
-        mkl_free(data);
-    #else
-         free(data);
-    #endif // USE_MKL
-
-    // delete [] data;
+    free(data);
 }
 
 size_t gtPlusMemoryManager::totalFreeMemory() const
diff --git a/toolboxes/gtplus/util/gtPlusMemoryManager.h b/toolboxes/gtplus/util/gtPlusMemoryManager.h
index f2b09ac..19a9387 100644
--- a/toolboxes/gtplus/util/gtPlusMemoryManager.h
+++ b/toolboxes/gtplus/util/gtPlusMemoryManager.h
@@ -21,8 +21,6 @@
 #include <string>
 #include <limits>
 
-#include <mkl.h>
-
 // the memory manager for large chunk allocation
 
 namespace Gadgetron { namespace gtPlus {
diff --git a/toolboxes/gtplus/util/gtPlusUtil.h b/toolboxes/gtplus/util/gtPlusUtil.h
new file mode 100644
index 0000000..f9c2c5d
--- /dev/null
+++ b/toolboxes/gtplus/util/gtPlusUtil.h
@@ -0,0 +1,96 @@
+/** \file   gtPlusUtil.h
+    \brief  Define the symbols and implement common functionalities for GtPlus toolbox
+
+    \author Hui Xue
+*/
+
+#pragma once
+
+#include "GtPlusExport.h"
+
+#include "ho2DArray.h"
+#include "ho3DArray.h"
+#include "ho4DArray.h"
+#include "ho5DArray.h"
+#include "ho6DArray.h"
+#include "ho7DArray.h"
+#include "hoMatrix.h"
+#include "hoNDFFT.h"
+#include "hoNDArray_utils.h"
+#include "hoNDArray_elemwise.h"
+#include "hoNDImage_util.h"
+#include "gtPlusIOAnalyze.h"
+#include "hoNDArrayMemoryManaged.h"
+#include "GadgetronTimer.h"
+
+#ifdef _WIN32
+    #include <random>
+    #include <array>
+#endif // _WIN32
+
+#ifdef USE_OMP
+    #include <omp.h>
+#endif // USE_OMP
+
+#include "GtPlusDefinition.h"
+
+namespace Gadgetron { namespace gtPlus {
+
+// ------------------------------------------------------------------------
+// random generator
+// ------------------------------------------------------------------------
+
+#ifdef _WIN32
+
+/// norm distribution random number generator
+template <typename T> 
+class gtPlusRandNorm
+{
+public:
+
+    typedef std::mt19937 RandomGeneratorType;
+
+    gtPlusRandNorm();
+    gtPlusRandNorm(long long seed, T mean=0, T sigma=1);
+    ~gtPlusRandNorm();
+
+    void seed(unsigned long seed);
+    void setPara(T mean=0, T sigma=1);
+
+    RandomGeneratorType& getRandomer() { return rng_; }
+    const RandomGeneratorType& getRandomer() const { return rng_; }
+
+    bool gen(hoNDArray<T>& randNum);
+    bool gen(hoNDArray< std::complex<T> >& randNum);
+
+protected:
+
+    RandomGeneratorType rng_;
+    std::normal_distribution<T> dist_norm_;
+};
+
+#endif // _WIN32
+
+template <typename T> 
+class gtPlusUtil
+{
+public:
+
+    gtPlusUtil() {}
+    ~gtPlusUtil() {}
+
+    // ------------------------------------------------------------------------
+    // utility functions for various things
+    // ------------------------------------------------------------------------
+
+    /// get the current time in system
+    /// time stores year, month, date, hour, minute and second
+    bool getCurrentTime(size_t time[6]);
+
+    /// get UTC (Coordinated Universal Time) time from current time
+    bool convertTimeToUTC(size_t time[6], double& tmUTC);
+};
+
+}}
+
+#include "gtPlusUtil.hxx"
diff --git a/toolboxes/gtplus/util/gtPlusUtil.hxx b/toolboxes/gtplus/util/gtPlusUtil.hxx
new file mode 100644
index 0000000..45ca4df
--- /dev/null
+++ b/toolboxes/gtplus/util/gtPlusUtil.hxx
@@ -0,0 +1,149 @@
+
+#include "gtPlusUtil.h"
+
+namespace Gadgetron { namespace gtPlus {
+
+// ------------------------------------------------------------------------
+// random generator
+// ------------------------------------------------------------------------
+
+#ifdef _WIN32
+
+template <typename T> 
+gtPlusRandNorm<T>::gtPlusRandNorm()
+{
+    rng_.seed();
+    this->setPara(0, 1);
+}
+
+template <typename T> 
+gtPlusRandNorm<T>::gtPlusRandNorm(long long s, T mean, T sigma)
+{
+    this->seed(s);
+    this->setPara(mean, sigma);
+}
+
+template <typename T> 
+gtPlusRandNorm<T>::~gtPlusRandNorm()
+{
+}
+
+template <typename T> 
+void gtPlusRandNorm<T>::seed(unsigned long s)
+{
+    rng_.seed(s);
+}
+
+template <typename T> 
+void gtPlusRandNorm<T>::setPara(T mean, T sigma)
+{
+    typename std::normal_distribution<T>::param_type para(mean, sigma);
+    dist_norm_.param(para);
+}
+
+template <typename T> 
+inline bool gtPlusRandNorm<T>::gen(hoNDArray<T>& randNum)
+{
+    try
+    {
+        size_t N = randNum.get_number_of_elements();
+        size_t n;
+        for ( n=0; n<N; n++ )
+        {
+            randNum(n) = dist_norm_(rng_);
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Errors in gtPlusRandNorm<T>::gen(hoNDArray<T>& randNum) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T> 
+inline bool gtPlusRandNorm<T>::gen(hoNDArray< std::complex<T> >& randNum)
+{
+    try
+    {
+        size_t N = randNum.get_number_of_elements();
+        size_t n;
+
+        T real, imag;
+        for ( n=0; n<N; n++ )
+        {
+            real = dist_norm_(rng_);
+            imag = dist_norm_(rng_);
+
+            randNum(n) = std::complex<T>(real, imag);
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Errors in gtPlusRandNorm<T>::gen(hoNDArray< std::complex<T> >& randNum) ... ");
+        return false;
+    }
+
+    return true;
+}
+#endif // _WIN32
+
+// ------------------------------------------------------------------------
+// utility functions for various things
+// ------------------------------------------------------------------------
+
+template <typename T> 
+bool gtPlusUtil<T>::getCurrentTime(size_t time[6])
+{
+    try
+    {
+        time_t rawtime;
+        struct tm* timeinfo;
+
+        std::time(&rawtime);
+        timeinfo = std::gmtime (&rawtime);
+
+        time[0] = timeinfo->tm_year+1900;
+        time[1] = timeinfo->tm_mon+1;
+        time[2] = timeinfo->tm_mday;
+        time[3] = timeinfo->tm_hour;
+        time[4] = timeinfo->tm_min;
+        time[5] = timeinfo->tm_sec;
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in gtPlusUtil<T>::getCurrentTime(size_t time[6]) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T> 
+bool gtPlusUtil<T>::convertTimeToUTC(size_t time[6], double& tmUTC)
+{
+    try
+    {
+        struct tm timeinfo;
+
+        timeinfo.tm_year   = time[0]-1900;
+        timeinfo.tm_mon    = time[1] - 1;
+        timeinfo.tm_mday   = time[2];
+        timeinfo.tm_hour   = time[3];
+        timeinfo.tm_min    = time[4];
+        timeinfo.tm_sec    = time[5];
+        timeinfo.tm_isdst  = 0;
+
+        tmUTC = (double)mktime(&timeinfo);
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Error happened in gtPlusUtil<T>::convertTimeToUTC(size_t time[6], double& tmUTC) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+}}
diff --git a/toolboxes/gtplus/workflow/gtPlusCloudScheduler.cpp b/toolboxes/gtplus/workflow/gtPlusCloudScheduler.cpp
index 3958ecd..745c99f 100644
--- a/toolboxes/gtplus/workflow/gtPlusCloudScheduler.cpp
+++ b/toolboxes/gtplus/workflow/gtPlusCloudScheduler.cpp
@@ -43,7 +43,7 @@ void gtPlusCloudScheduler::setUpNodes(size_t numOfNodes)
         node_id_computing_power_indexes_.resize(num_of_nodes_);
         for ( size_t ii=0; ii<num_of_nodes_; ii++ )
         {
-            node_id_computing_power_indexes_[ii].first = ii;
+            node_id_computing_power_indexes_[ii].first = (int)ii;
             node_id_computing_power_indexes_[ii].second = 1.0;
         }
     }
diff --git a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconCoilMapEstimation.h b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconCoilMapEstimation.h
new file mode 100644
index 0000000..1ce83a1
--- /dev/null
+++ b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconCoilMapEstimation.h
@@ -0,0 +1,137 @@
+/** \file   gtPlusISMRMRDReconCoilMapEstimation.h
+    \brief  Implement coil map estimation methods.
+    \author Hui Xue
+*/
+
+#pragma once
+
+#include "GtPlusExport.h"
+#include "gtPlusISMRMRDReconUtil.h"
+#include "gtPlusSPIRIT.h"
+
+namespace Gadgetron { namespace gtPlus {
+
+// ================================================================================================== //
+
+template <typename T> 
+class gtPlusISMRMRDReconCoilMapEstimation
+{
+public:
+
+    typedef typename realType<T>::Type value_type;
+
+    gtPlusISMRMRDReconCoilMapEstimation();
+    virtual ~gtPlusISMRMRDReconCoilMapEstimation();
+
+    void printInfo(std::ostream& os);
+
+    // compute dual coil map
+    // data : ref kspace [RO E1 CHA]
+    // coilMap : [RO E1 CHA 2]
+    bool coilMap2DSPIRIT(const hoNDArray<T>& data, hoNDArray<T>& coilMap, hoNDArray<value_type>& eigD, size_t kRO, size_t kE1, value_type thres=0.01);
+};
+
+template <typename T> 
+gtPlusISMRMRDReconCoilMapEstimation<T>::gtPlusISMRMRDReconCoilMapEstimation()
+{
+}
+
+template <typename T> 
+gtPlusISMRMRDReconCoilMapEstimation<T>::~gtPlusISMRMRDReconCoilMapEstimation()
+{
+}
+
+template <typename T> 
+bool gtPlusISMRMRDReconCoilMapEstimation<T>::coilMap2DSPIRIT(const hoNDArray<T>& data, hoNDArray<T>& coilMap, hoNDArray<value_type>& eigD, size_t kRO, size_t kE1, value_type thres)
+{
+    try
+    {
+        gtPlusSPIRIT<T> spirit;
+
+        size_t oRO = 1;
+        size_t oE1 = 1;
+
+        size_t RO = coilMap.get_size(0);
+        size_t E1 = coilMap.get_size(1);
+        size_t CHA = data.get_size(2);
+
+        ho3DArray<T> acsSrc(data.get_size(0), data.get_size(1), CHA, const_cast<T*>(data.begin()));
+        ho3DArray<T> acsDst(data.get_size(0), data.get_size(1), CHA, const_cast<T*>(data.begin()));
+
+        ho6DArray<T> ker(kRO, kE1, CHA, CHA, oRO, oE1);
+
+        GADGET_CHECK_RETURN_FALSE(spirit.calib(acsSrc, acsDst, thres, kRO, kE1, oRO, oE1, ker));
+
+        // std::string debugFolder_ = "D:/gtuser/mrprogs/gadgetron/toolboxes/gtplus/ut/result/";
+        // Gadgetron::gtPlus::gtPlusIOAnalyze gt_exporter_;
+        // GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, ker, "ker");
+
+        bool minusI = false;
+        hoNDArray<T> kIm(RO, E1, CHA, CHA);
+        GADGET_CHECK_RETURN_FALSE(spirit.imageDomainKernel(ker, kRO, kE1, oRO, oE1, RO, E1, kIm, minusI));
+        T* pkIm = kIm.begin();
+
+        // GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, kIm, "kIm");
+
+        coilMap.create(RO, E1, CHA, 2);
+        eigD.create(RO, E1, 2);
+
+        long long ro, e1, scha, dcha;
+
+        #pragma omp parallel default(none) private(ro, e1, scha, dcha) shared(RO, E1, CHA, pkIm, coilMap, eigD)
+        {
+            hoMatrix<T> R(CHA, CHA), RC(CHA, CHA), RRT(CHA, CHA);
+            Gadgetron::clear(RRT);
+
+            hoMatrix<value_type> eigenValue;
+
+            #pragma omp for 
+            for ( e1=0; e1<E1; e1++ )
+            {
+                for ( ro=0; ro<RO; ro++ )
+                {
+                    const size_t offset = e1*RO + ro;
+
+                    for ( dcha=0; dcha<CHA; dcha++ )
+                    {
+                        for ( scha=0; scha<CHA; scha++ )
+                        {
+                            // T v = kIm(ro, e1, scha, dcha);
+                            T v = pkIm[dcha*RO*E1*CHA + scha*RO*E1 + offset];
+                            if ( scha == dcha )
+                            {
+                                v -= 1;
+                            }
+
+                            R(scha, dcha) = v;
+                        }
+                    }
+
+                    memcpy(RC.begin(), R.begin(), sizeof(T)*CHA*CHA);
+                    Gadgetron::gemm(RRT, RC, false, R, true);
+
+                    Gadgetron::heev(RRT, eigenValue);
+
+                    for ( scha=0; scha<CHA; scha++ )
+                    {
+                        coilMap(ro, e1, scha, 0) = RRT(scha, 0);
+                        coilMap(ro, e1, scha, 1) = RRT(scha, 1);
+                        eigD(ro, e1, 0) = 1.0 - eigenValue(0, 0);
+                        eigD(ro, e1, 1) = 1.0 - eigenValue(1, 0);
+                    }
+                }
+            }
+        }
+
+        Gadgetron::conjugate(coilMap, coilMap);
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Errors in gtPlusISMRMRDReconCoilMapEstimation<T>::coilMap2DSPIRIT(...) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+}}
diff --git a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconUtil.cpp b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconUtil.cpp
index a82824a..649963c 100644
--- a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconUtil.cpp
+++ b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconUtil.cpp
@@ -9,10 +9,2552 @@ namespace Gadgetron { namespace gtPlus {
 
 template EXPORTGTPLUS class gtPlusISMRMRDReconUtil<float>;
 template EXPORTGTPLUS class gtPlusISMRMRDReconUtil<double>;
-template EXPORTGTPLUS class gtPlusISMRMRDReconUtil<GT_Complex8>;
-template EXPORTGTPLUS class gtPlusISMRMRDReconUtil<GT_Complex16>;
+template EXPORTGTPLUS class gtPlusISMRMRDReconUtil< std::complex<float> >;
+template EXPORTGTPLUS class gtPlusISMRMRDReconUtil< std::complex<double> >;
 
-template EXPORTGTPLUS class gtPlusISMRMRDReconUtilComplex<GT_Complex8>;
-template EXPORTGTPLUS class gtPlusISMRMRDReconUtilComplex<GT_Complex16>;
+template EXPORTGTPLUS class gtPlusISMRMRDReconUtilComplex< std::complex<float> >;
+template EXPORTGTPLUS class gtPlusISMRMRDReconUtilComplex< std::complex<double> >;
 
 }}
+
+namespace Gadgetron {
+
+    // ----------------------------------------------------------------------------------------
+    // templated functions
+    // ----------------------------------------------------------------------------------------
+
+    template<typename T> 
+    bool sumOverLastDimension(const hoNDArray<T>& x, hoNDArray<T>& r)
+    {
+        try
+        {
+            boost::shared_ptr< std::vector<size_t> > dim = x.get_dimensions();
+            size_t NDim = dim->size();
+
+            std::vector<size_t> dimR(NDim-1);
+
+            size_t d;
+            for ( d=0; d<NDim-1; d++ )
+            {
+                dimR[d] = (*dim)[d];
+            }
+
+            if ( !r.dimensions_equal(&dimR) )
+            {
+                r.create(&dimR);
+            }
+
+            // Gadgetron::clear(&r);
+
+            if ( x.get_size(NDim-1) <= 1 )
+            {
+                memcpy(r.begin(), x.begin(), x.get_number_of_bytes());
+                return true;
+            }
+
+            size_t lastDim = x.get_size(NDim-1);
+            size_t NR = r.get_number_of_elements();
+            T* pA = const_cast<T*>(x.begin());
+            T* pR = r.begin();
+
+            memcpy(pR, pA, sizeof(T)*NR);
+
+            // sum over the last dim
+            hoNDArray<T> tmp;
+            for ( d=1; d<lastDim; d++ )
+            {
+                tmp.create(&dimR, pA+d*NR);
+                add(tmp, r, r);
+            }
+        }
+        catch (...)
+        {
+            GADGET_ERROR_MSG("Errors in sumOverLastDimension(const hoNDArray<T>& x, hoNDArray<T>& r) ... ");
+            return false;
+        }
+        return true;
+    }
+
+    template<typename T> 
+    bool sumOverSecondLastDimension(const hoNDArray<T>& x, hoNDArray<T>& r)
+    {
+        try
+        {
+            boost::shared_ptr< std::vector<size_t> > dim = x.get_dimensions();
+            size_t NDim = dim->size();
+
+            if ( NDim < 2 ) return true;
+
+            std::vector<size_t> dimR(NDim-1);
+            std::vector<size_t> dimRInternal(NDim-2);
+
+            size_t d;
+            for ( d=0; d<NDim-2; d++ )
+            {
+                dimR[d] = (*dim)[d];
+                dimRInternal[d] = (*dim)[d];
+            }
+            dimR[NDim-2] = (*dim)[NDim-1];
+
+            if ( !r.dimensions_equal(&dimR) )
+            {
+                r.create(&dimR);
+            }
+
+            if ( x.get_size(NDim-2) <= 1 )
+            {
+                memcpy(r.begin(), x.begin(), x.get_number_of_bytes());
+                return true;
+            }
+
+            size_t lastDim = x.get_size(NDim-1);
+            size_t secondLastDim = x.get_size(NDim-2);
+            size_t NS = x.get_number_of_elements()/lastDim;
+            size_t NR = r.get_number_of_elements()/lastDim;
+            T* pA = const_cast<T*>(x.begin());
+            T* pR = r.begin();
+
+            int l;
+            #pragma omp parallel default(none) private(l) shared(lastDim, secondLastDim, NS, NR, pA, pR, dimRInternal)
+            {
+                hoNDArray<T> tmp, tmp2;
+
+                #pragma omp for
+                for ( l=0; l<(int)lastDim; l++ )
+                {
+                    memcpy(pR+l*NR, pA+l*NS, sizeof(T)*NR);
+                    tmp.create(&dimRInternal, pR+l*NR);
+                    for ( size_t s=1; s<secondLastDim; s++ )
+                    {
+                        tmp2.create(&dimRInternal, pA+l*NS+s*NR);
+                        add(tmp, tmp2, tmp);
+                    }
+                }
+            }
+        }
+        catch (...)
+        {
+            GADGET_ERROR_MSG("Errors in sumOverSecondLastDimension(const hoNDArray<T>& x, hoNDArray<T>& r) ... ");
+            return false;
+        }
+        return true;
+    }
+
+    // e.g. x is 3D and y is 4D array, r(:,:,:,n) = y(:,:,:,n) .* x
+    template<typename T> 
+    bool multiplyOverLastDimension(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r)
+    {
+        try
+        {
+            boost::shared_ptr< std::vector<size_t> > dimX = x.get_dimensions();
+            boost::shared_ptr< std::vector<size_t> > dimY = y.get_dimensions();
+
+            size_t NDim = dimY->size();
+
+            GADGET_CHECK_RETURN_FALSE(dimX->size()==NDim-1);
+
+            if ( !r.dimensions_equal(dimY.get()) )
+            {
+                r.create(dimY);
+            }
+
+            if ( y.get_size(NDim-1) <= 1 )
+            {
+                multiply(x, y, r);
+                return true;
+            }
+
+            size_t lastDim = y.get_size(NDim-1);
+            size_t N = x.get_number_of_elements();
+            const T* pX = x.begin();
+            const T* pY = y.begin();
+            T* pR = r.begin();
+
+            int d;
+
+            #ifdef GCC_OLD_FLAG
+                #pragma omp parallel default(none) private(d) shared(dimX, lastDim, N, pY, pR)
+            #else
+                #pragma omp parallel default(none) private(d) shared(x, dimX, lastDim, N, pY, pR)
+            #endif
+            {
+                hoNDArray<T> tmpY, tmpR;
+
+                #pragma omp for
+                for ( d=0; d<(int)lastDim; d++ )
+                {
+                    tmpY.create(dimX.get(), const_cast<T*>(pY+d*N));
+                    tmpR.create(dimX.get(), pR+d*N);
+                    multiply(x, tmpY, tmpR);
+                }
+            }
+        }
+        catch (...)
+        {
+            GADGET_ERROR_MSG("Errors in multiplyOverLastDimension(const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& r) ... ");
+            return false;
+        }
+        return true;
+    }
+
+    // e.g. x is 3D and y is 4D array, r(:,:,:,n) = y(:,:,:,n) ./ x
+    template<typename T> 
+    bool divideOverLastDimension(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r)
+    {
+        try
+        {
+            boost::shared_ptr< std::vector<size_t> > dimX = x.get_dimensions();
+            boost::shared_ptr< std::vector<size_t> > dimY = y.get_dimensions();
+
+            size_t NDim = dimY->size();
+
+            GADGET_CHECK_RETURN_FALSE(dimX->size()==NDim-1);
+
+            if ( !r.dimensions_equal(dimY.get()) )
+            {
+                r.create(dimY);
+            }
+
+            if ( y.get_size(NDim-1) <= 1 )
+            {
+                divide(y, x, r);
+                return true;
+            }
+
+            size_t lastDim = y.get_size(NDim-1);
+            size_t N = x.get_number_of_elements();
+            T* pY = const_cast<T*>(y.begin());
+            T* pR = r.begin();
+
+            int d;
+
+            #ifdef GCC_OLD_FLAG
+                #pragma omp parallel default(none) private(d) shared(dimX, lastDim, N, pY, pR)
+            #else
+                #pragma omp parallel default(none) private(d) shared(x, dimX, lastDim, N, pY, pR)
+            #endif
+            {
+                hoNDArray<T> tmpY, tmpR;
+
+                #pragma omp for
+                for ( d=0; d<(int)lastDim; d++ )
+                {
+                    tmpY.create(dimX, pY+d*N);
+                    tmpR.create(dimX, pR+d*N);
+                    divide(tmpY, x, tmpR);
+                }
+            }
+        }
+        catch (...)
+        {
+            GADGET_ERROR_MSG("Errors in divideOverLastDimension(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r) ... ");
+            return false;
+        }
+        return true;
+    }
+
+    template<typename T> 
+    bool sumOver1stDimension(const hoNDArray<T>& x, hoNDArray<T>& r)
+    {
+        try
+        {
+            size_t RO = x.get_size(0);
+            size_t num = x.get_number_of_elements()/(RO);
+
+            boost::shared_ptr< std::vector<size_t> > dim = x.get_dimensions();
+
+            std::vector<size_t> dimAve(*dim);
+            dimAve[0] = 1;
+            r.create(&dimAve);
+
+            const T* pX = x.begin();
+            T* pR = r.begin();
+
+            int n;
+            #pragma omp parallel for default(none) private(n) shared(RO, num, pX, pR)
+            for ( n=0; n<(int)num; n++ )
+            {
+                T xsum = pX[n*RO];
+                for (size_t ro=1; ro<RO; ro++ )
+                {
+                    xsum += pX[n*RO+ro];
+                }
+
+                pR[n] = xsum;
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in sumOver1stDimension(...) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename T> 
+    bool sumOver2ndDimension(const hoNDArray<T>& x, hoNDArray<T>& r)
+    {
+        try
+        {
+            size_t NDim = x.get_number_of_dimensions();
+
+            if ( NDim < 2 )
+            {
+                r = x;
+                return true;
+            }
+
+            size_t RO = x.get_size(0);
+            size_t E1 = x.get_size(1);
+
+            size_t num = x.get_number_of_elements()/(RO*E1);
+
+            boost::shared_ptr< std::vector<size_t> > dim = x.get_dimensions();
+
+            std::vector<size_t> dimAve(*dim);
+            dimAve[1] = 1;
+            r.create(&dimAve);
+
+            int n;
+            #ifdef GCC_OLD_FLAG
+                #pragma omp parallel for default(none) private(n) shared(RO, E1, num)
+            #else
+                #pragma omp parallel for default(none) private(n) shared(RO, E1, num, x, r)
+            #endif
+            for ( n=0; n<(int)num; n++ )
+            {
+                hoNDArray<T> xsum(RO, const_cast<T*>(r.begin()+n*RO));
+                memcpy(xsum.begin(), x.begin()+n*RO*E1, xsum.get_number_of_bytes());
+
+                for (size_t e1=1; e1<E1; e1++ )
+                {
+                    hoNDArray<T> x1D(RO, const_cast<T*>(x.begin()+n*RO*E1+e1*RO));
+                    Gadgetron::add(x1D, xsum, xsum);
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in sumOver2ndDimension(...) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename T> 
+    bool sumOver3rdDimension(const hoNDArray<T>& x, hoNDArray<T>& r)
+    {
+        try
+        {
+            size_t NDim = x.get_number_of_dimensions();
+
+            if ( NDim < 3 )
+            {
+                r = x;
+                return true;
+            }
+
+            size_t RO = x.get_size(0);
+            size_t E1 = x.get_size(1);
+            size_t CHA = x.get_size(2);
+
+            size_t num = x.get_number_of_elements()/(RO*E1*CHA);
+
+            boost::shared_ptr< std::vector<size_t> > dim = x.get_dimensions();
+
+            std::vector<size_t> dimAve(*dim);
+            dimAve[2] = 1;
+            r.create(&dimAve);
+
+            int n;
+            #ifdef GCC_OLD_FLAG
+                #pragma omp parallel default(none) private(n) shared(RO, E1, CHA, num) if (num>1)
+            #else
+                #pragma omp parallel default(none) private(n) shared(RO, E1, CHA, num, x, r) if (num>1)
+            #endif
+            {
+                hoNDArray<T> xsum;
+                hoNDArray<T> x2D;
+
+                #pragma omp for
+                for ( n=0; n<(int)num; n++ )
+                {
+                    xsum.create(RO, E1, const_cast<T*>(r.begin()+n*RO*E1));
+                    memcpy(xsum.begin(), x.begin()+n*RO*E1*CHA, xsum.get_number_of_bytes());
+
+                    for (size_t cha=1; cha<CHA; cha++ )
+                    {
+                        x2D.create(RO, E1, const_cast<T*>(x.begin()+n*RO*E1*CHA+cha*RO*E1));
+                        Gadgetron::add(x2D, xsum, xsum);
+                    }
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in sumOver3rdDimension(...) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename T> bool sumOver4thDimension(const hoNDArray<T>& x, hoNDArray<T>& r)
+    {
+        try
+        {
+            size_t NDim = x.get_number_of_dimensions();
+
+            if ( NDim < 4 )
+            {
+                r = x;
+                return true;
+            }
+
+            size_t RO = x.get_size(0);
+            size_t E1 = x.get_size(1);
+            size_t CHA = x.get_size(2);
+            size_t N = x.get_size(3);
+
+            size_t num = x.get_number_of_elements()/(RO*E1*CHA*N);
+
+            boost::shared_ptr< std::vector<size_t> > dim = x.get_dimensions();
+
+            std::vector<size_t> dimAve(*dim);
+            dimAve[3] = 1;
+            r.create(&dimAve);
+
+            int n;
+            #ifdef GCC_OLD_FLAG
+                #pragma omp parallel for default(none) private(n) shared(RO, E1, CHA, N, num)
+            #else
+                #pragma omp parallel for default(none) private(n) shared(RO, E1, CHA, N, num, x, r)
+            #endif
+            for ( n=0; n<(int)num; n++ )
+            {
+                hoNDArray<T> xsum(RO, E1, CHA, const_cast<T*>(r.begin()+n*RO*E1*CHA));
+                memcpy(xsum.begin(), x.begin()+n*RO*E1*CHA*N, xsum.get_number_of_bytes());
+
+                for (size_t nn=1; nn<N; nn++ )
+                {
+                    hoNDArray<T> x3D(RO, E1, CHA, const_cast<T*>(x.begin()+n*RO*E1*CHA*N+nn*RO*E1*CHA));
+                    Gadgetron::add(x3D, xsum, xsum);
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in sumOver4thDimension(const hoNDArray<T>& x, hoNDArray<T>& r) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename T> bool sumOver5thDimension(const hoNDArray<T>& x, hoNDArray<T>& r)
+    {
+        try
+        {
+            size_t NDim = x.get_number_of_dimensions();
+
+            if ( NDim < 5 )
+            {
+                r = x;
+                return true;
+            }
+
+            size_t RO = x.get_size(0);
+            size_t E1 = x.get_size(1);
+            size_t CHA = x.get_size(2);
+            size_t N = x.get_size(3);
+            size_t S = x.get_size(4);
+
+            size_t num = x.get_number_of_elements()/(RO*E1*CHA*N*S);
+
+            boost::shared_ptr< std::vector<size_t> > dim = x.get_dimensions();
+
+            std::vector<size_t> dimAve(*dim);
+            dimAve[4] = 1;
+            r.create(&dimAve);
+
+            int n;
+            #ifdef GCC_OLD_FLAG
+                #pragma omp parallel for default(none) private(n) shared(RO, E1, CHA, N, S, num) if (num > 4)
+            #else
+                #pragma omp parallel for default(none) private(n) shared(RO, E1, CHA, N, S, num, x, r) if (num > 4)
+            #endif
+            for ( n=0; n<(int)num; n++ )
+            {
+                hoNDArray<T> xsum(RO, E1, CHA, N, const_cast<T*>(r.begin()+n*RO*E1*CHA*N));
+                memcpy(xsum.begin(), x.begin()+n*RO*E1*CHA*N*S, xsum.get_number_of_bytes());
+
+                for (size_t s=1; s<S; s++ )
+                {
+                    hoNDArray<T> x4D(RO, E1, CHA, N, const_cast<T*>(x.begin()+n*RO*E1*CHA*N*S+s*RO*E1*CHA*N));
+                    Gadgetron::add(x4D, xsum, xsum);
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in sumOver5thDimension(const hoNDArray<T>& x, hoNDArray<T>& r) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    // e.g. x is 3D and y is 4D array, r(:,:,n,:) = y(:,:,n,:) .* x3D
+    template<typename T> 
+    bool multiplyOver3rdDimension(const hoNDArray<T>& x3D, const hoNDArray<T>& y4D, hoNDArray<T>& r)
+    {
+        try
+        {
+            boost::shared_ptr< std::vector<size_t> > dimX = x3D.get_dimensions();
+            boost::shared_ptr< std::vector<size_t> > dimY = y4D.get_dimensions();
+
+            size_t NDim = dimY->size();
+
+            GADGET_CHECK_RETURN_FALSE(dimX->size()>=3);
+            GADGET_CHECK_RETURN_FALSE(NDim>=4);
+            GADGET_CHECK_RETURN_FALSE((*dimX)[0]==(*dimY)[0]);
+            GADGET_CHECK_RETURN_FALSE((*dimX)[1]==(*dimY)[1]);
+
+            if ( !r.dimensions_equal(dimY.get()) )
+            {
+                r.create(dimY);
+            }
+
+            long long t, N2D = (long long)x3D.get_size(0)*x3D.get_size(1);
+            long long sz = (long long)y4D.get_size(2);
+            long long st = (long long)y4D.get_number_of_elements()/(N2D*sz);
+
+            if ( sz == 1 )
+            {
+                multiply(x3D, y4D, r);
+                return true;
+            }
+
+            const T* pX = x3D.begin();
+            const T* pY = y4D.begin();
+            T* pR = r.begin();
+
+            std::vector<size_t> dim2D(2);
+            dim2D[0] = (*dimY)[0];
+            dim2D[1] = (*dimY)[1];
+
+            #pragma omp parallel for default(none) private(t) shared(N2D, sz, st, dim2D, pX, pY, pR)
+            for ( t=0; t<st; t++ )
+            {
+                hoNDArray<T> tmpX, tmpY, tmpR;
+                tmpX.create(&dim2D, const_cast<T*>(pX+t*N2D));
+
+                for ( long long z=0; z<sz; z++ )
+                {
+                    tmpY.create(&dim2D, const_cast<T*>(pY+t*N2D*sz+z*N2D));
+                    tmpR.create(&dim2D, pR+t*N2D*sz+z*N2D);
+                    multiply(tmpX, tmpY, tmpR);
+                }
+            }
+        }
+        catch (...)
+        {
+            GADGET_ERROR_MSG("Errors in multiplyOver3rdDimension(const hoNDArray<float>& x3D, const hoNDArray<float>& y4D, hoNDArray<float>& r) ... ");
+            return false;
+        }
+        return true;
+    }
+
+    template<typename T> 
+    bool multiplyOver4thDimension(const hoNDArray<T>& x4D, const hoNDArray<T>& y5D, hoNDArray<T>& r)
+    {
+        try
+        {
+            boost::shared_ptr< std::vector<size_t> > dimX = x4D.get_dimensions();
+            boost::shared_ptr< std::vector<size_t> > dimY = y5D.get_dimensions();
+
+            size_t NDim = dimY->size();
+
+            GADGET_CHECK_RETURN_FALSE(dimX->size()>=4);
+            GADGET_CHECK_RETURN_FALSE((*dimX)[0]==(*dimY)[0]);
+            GADGET_CHECK_RETURN_FALSE((*dimX)[1]==(*dimY)[1]);
+            GADGET_CHECK_RETURN_FALSE((*dimX)[2]==(*dimY)[2]);
+
+            if ( !r.dimensions_equal(dimY.get()) )
+            {
+                r.create(dimY);
+            }
+
+            size_t RO = (*dimX)[0];
+            size_t E1 = (*dimX)[1];
+            size_t CHA = (*dimX)[2];
+
+            long long t, N3D = (long long)RO*E1*CHA;
+
+            size_t N = (*dimY)[3];
+            size_t num = x4D.get_number_of_elements()/(RO*E1*CHA);
+
+            const T* pX = x4D.begin();
+            const T* pY = y5D.begin();
+            T* pR = r.begin();
+
+            std::vector<size_t> dim3D(3);
+            dim3D[0] = RO;
+            dim3D[1] = E1;
+            dim3D[2] = CHA;
+
+            #pragma omp parallel for default(none) private(t) shared(N3D, N, dim3D, pX, pY, pR, num)
+            for ( t=0; t<(int)num; t++ )
+            {
+                hoNDArray<T> tmpX, tmpY, tmpR;
+                tmpX.create(&dim3D, const_cast<T*>(pX+t*N3D));
+
+                for ( int n=0; n<N; n++ )
+                {
+                    tmpY.create(&dim3D, const_cast<T*>(pY+t*N3D*N+n*N3D));
+                    tmpR.create(&dim3D, pR+t*N3D*N+n*N3D);
+                    multiply(tmpX, tmpY, tmpR);
+                }
+            }
+        }
+        catch (...)
+        {
+            GADGET_ERROR_MSG("Errors in multiplyOver4thDimension(const hoNDArray<float>& x4D, const hoNDArray<float>& y5D, hoNDArray<float>& r) ... ");
+            return false;
+        }
+        return true;
+    }
+
+    template<typename T> 
+    bool multiplyOver4thDimensionExcept(const hoNDArray<T>& x4D, const hoNDArray<T>& y5D, size_t n, hoNDArray<T>& r, bool copyY2R)
+    {
+        try
+        {
+            boost::shared_ptr< std::vector<size_t> > dimX = x4D.get_dimensions();
+            boost::shared_ptr< std::vector<size_t> > dimY = y5D.get_dimensions();
+
+            size_t NDim = dimY->size();
+
+            GADGET_CHECK_RETURN_FALSE(dimX->size()>=4);
+            GADGET_CHECK_RETURN_FALSE((*dimX)[0]==(*dimY)[0]);
+            GADGET_CHECK_RETURN_FALSE((*dimX)[1]==(*dimY)[1]);
+            GADGET_CHECK_RETURN_FALSE((*dimX)[2]==(*dimY)[2]);
+
+            const T* pX = x4D.begin();
+            const T* pY = y5D.begin();
+            T* pR = r.begin();
+
+            if ( (pR!=pY) && (!r.dimensions_equal(dimY.get())) )
+            {
+                r.create(dimY);
+                pR = r.begin();
+            }
+
+            size_t RO = (*dimX)[0];
+            size_t E1 = (*dimX)[1];
+            size_t CHA = (*dimX)[2];
+
+            long long t, N3D = (long long)RO*E1*CHA;
+
+            long long N = (long long)(*dimY)[3];
+            long long num = (long long)x4D.get_number_of_elements()/(RO*E1*CHA);
+
+            std::vector<size_t> dim3D(3);
+            dim3D[0] = RO;
+            dim3D[1] = E1;
+            dim3D[2] = CHA;
+
+            #pragma omp parallel for default(none) private(t) shared(N3D, N, dim3D, pX, pY, pR, num, n, copyY2R)
+            for ( t=0; t<num; t++ )
+            {
+                hoNDArray<T> tmpX, tmpY, tmpR;
+                tmpX.create(&dim3D, const_cast<T*>(pX+t*N3D));
+
+                for ( long long z=0; z<N; z++ )
+                {
+                    if ( z != n )
+                    {
+                        tmpY.create(&dim3D, const_cast<T*>(pY+t*N3D*N+z*N3D));
+                        tmpR.create(&dim3D, pR+t*N3D*N+z*N3D);
+                        multiply(tmpX, tmpY, tmpR);
+                    }
+                    else
+                    {
+                        if ( pR != pY )
+                        {
+                            if ( copyY2R )
+                            {
+                                memcpy(pR+t*N3D*N+z*N3D, const_cast<T*>(pY+t*N3D*N+z*N3D), sizeof(T)*N3D);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        catch (...)
+        {
+            GADGET_ERROR_MSG("Errors in multiplyOver4thDimensionExcept(const hoNDArray<float>& x4D, const hoNDArray<float>& y5D, size_t n, hoNDArray<float>& r, bool copyY2R) ... ");
+            return false;
+        }
+        return true;
+    }
+
+    template<typename T>
+    bool multiplyOver5thDimension(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r)
+    {
+        try
+        {
+            boost::shared_ptr< std::vector<size_t> > dimX = x.get_dimensions();
+            boost::shared_ptr< std::vector<size_t> > dimY = y.get_dimensions();
+
+            size_t NDim = dimY->size();
+
+            GADGET_CHECK_RETURN_FALSE(dimX->size()>=5);
+            GADGET_CHECK_RETURN_FALSE((*dimX)[0]==(*dimY)[0]);
+            GADGET_CHECK_RETURN_FALSE((*dimX)[1]==(*dimY)[1]);
+            GADGET_CHECK_RETURN_FALSE((*dimX)[2]==(*dimY)[2]);
+            GADGET_CHECK_RETURN_FALSE((*dimX)[3]==(*dimY)[3]);
+
+            if ( !r.dimensions_equal(dimY.get()) )
+            {
+                r.create(dimY);
+            }
+
+            size_t RO = (*dimX)[0];
+            size_t E1 = (*dimX)[1];
+            size_t E2 = (*dimX)[2];
+            size_t CHA = (*dimX)[3];
+
+            int t;
+            size_t N4D = RO*E1*E2*CHA;
+
+            size_t N = (*dimY)[4];
+            size_t num = x.get_number_of_elements()/N4D;
+
+            const T* pX = x.begin();
+            const T* pY = y.begin();
+            T* pR = r.begin();
+
+            std::vector<size_t> dim4D(4);
+            dim4D[0] = RO;
+            dim4D[1] = E1;
+            dim4D[2] = E2;
+            dim4D[3] = CHA;
+
+            #pragma omp parallel for default(none) private(t) shared(N4D, N, dim4D, pX, pY, pR, num)
+            for ( t=0; t<(int)num; t++ )
+            {
+                hoNDArray<T> tmpX, tmpY, tmpR;
+                tmpX.create(&dim4D, const_cast<T*>(pX+t*N4D));
+
+                for ( int n=0; n<N; n++ )
+                {
+                    tmpY.create(&dim4D, const_cast<T*>(pY+t*N4D*N+n*N4D));
+                    tmpR.create(&dim4D, pR+t*N4D*N+n*N4D);
+                    multiply(tmpX, tmpY, tmpR);
+                }
+            }
+        }
+        catch (...)
+        {
+            GADGET_ERROR_MSG("Errors in multiplyOver5thDimension(const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& r) ... ");
+            return false;
+        }
+        return true;
+    }
+
+    template<typename T> 
+    bool multiplyOver5thDimensionExcept(const hoNDArray<T>& x, const hoNDArray<T>& y, size_t n, hoNDArray<T>& r, bool copyY2R)
+    {
+        try
+        {
+            boost::shared_ptr< std::vector<size_t> > dimX = x.get_dimensions();
+            boost::shared_ptr< std::vector<size_t> > dimY = y.get_dimensions();
+
+            size_t NDim = dimY->size();
+
+            GADGET_CHECK_RETURN_FALSE(dimX->size()>=5);
+            GADGET_CHECK_RETURN_FALSE((*dimX)[0]==(*dimY)[0]);
+            GADGET_CHECK_RETURN_FALSE((*dimX)[1]==(*dimY)[1]);
+            GADGET_CHECK_RETURN_FALSE((*dimX)[2]==(*dimY)[2]);
+            GADGET_CHECK_RETURN_FALSE((*dimX)[3]==(*dimY)[3]);
+
+            const T* pX = x.begin();
+            const T* pY = y.begin();
+            T* pR = r.begin();
+
+            if ( (pR!=pY) && (!r.dimensions_equal(dimY.get())) )
+            {
+                r.create(dimY);
+                pR = r.begin();
+            }
+
+            size_t RO = (*dimX)[0];
+            size_t E1 = (*dimX)[1];
+            size_t E2 = (*dimX)[2];
+            size_t CHA = (*dimX)[3];
+
+            int t;
+            size_t N4D = RO*E1*E2*CHA;
+
+            size_t N = (*dimY)[4];
+            size_t num = x.get_number_of_elements()/N4D;
+
+            std::vector<size_t> dim4D(4);
+            dim4D[0] = RO;
+            dim4D[1] = E1;
+            dim4D[2] = E2;
+            dim4D[3] = CHA;
+
+            #pragma omp parallel for default(none) private(t) shared(N4D, dim4D, pX, pY, pR, num, n, N, copyY2R)
+            for ( t=0; t<(int)num; t++ )
+            {
+                hoNDArray<T> tmpX, tmpY, tmpR;
+                tmpX.create(&dim4D, const_cast<T*>(pX+t*N4D));
+
+                for ( int z=0; z<N; z++ )
+                {
+                    if ( z != n )
+                    {
+                        tmpY.create(&dim4D, const_cast<T*>(pY+t*N4D*N+z*N4D));
+                        tmpR.create(&dim4D, pR+t*N4D*N+z*N4D);
+                        multiply(tmpX, tmpY, tmpR);
+                    }
+                    else
+                    {
+                        if ( pR != pY )
+                        {
+                            if ( copyY2R )
+                            {
+                                memcpy(pR+t*N4D*N+z*N4D, const_cast<T*>(pY+t*N4D*N+z*N4D), sizeof(T)*N4D);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        catch (...)
+        {
+            GADGET_ERROR_MSG("Errors in multiplyOver5thDimensionExcept(const hoNDArray<T>& x, const hoNDArray<T>& y, size_t n, hoNDArray<T>& r, bool copyY2R) ... ");
+            return false;
+        }
+        return true;
+    }
+
+    template <typename T> 
+    bool multipleAdd(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r)
+    {
+        GADGET_DEBUG_CHECK_RETURN_FALSE(x.get_number_of_elements()<=y.get_number_of_elements());
+        if ( r.get_number_of_elements()!=y.get_number_of_elements())
+        {
+            r = y;
+        }
+
+        long long Nx = x.get_number_of_elements();
+        long long N = y.get_number_of_elements() / Nx;
+
+        const T* pX = x.begin();
+
+        long long n;
+        #pragma omp parallel for default(none) private(n) shared(pX, y, r, Nx, N)
+        for ( n=0; n<N; n++ )
+        {
+            const T* pY = y.begin()+n*Nx;
+            T* pR = r.begin() + n*Nx;
+
+            long long ii;
+            for ( ii=0; ii<Nx; ii++ )
+            {
+                pR[ii] = pX[ii] + pY[ii];
+            }
+        }
+
+        //if ( typeid(T)==typeid(float) )
+        //{
+        //    #ifdef GCC_OLD_FLAG
+        //        #pragma omp parallel for default(none) private(n) shared(Nx, N)
+        //    #else
+        //        #pragma omp parallel for default(none) private(n) shared(x, y, r, Nx, N)
+        //    #endif
+        //    for ( n=0; n<N; n++ )
+        //    {
+        //        const T* pY = y.begin()+n*Nx;
+        //        T* pR = pR + n*Nx;
+
+        //        size_t ii;
+        //        for ( ii=0; ii<Nx; ii++ )
+        //        {
+        //            pR[ii] = pX[ii] + pY[ii];
+        //        }
+        //    }
+        //}
+        //else if ( typeid(T)==typeid(double) )
+        //{
+        //    #ifdef GCC_OLD_FLAG
+        //        #pragma omp parallel for default(none) private(n) shared(Nx, N)
+        //    #else
+        //        #pragma omp parallel for default(none) private(n) shared(x, y, r, Nx, N)
+        //    #endif
+        //    for ( n=0; n<N; n++ )
+        //    {
+        //        Gadgetron::math::add(x.get_number_of_elements(), x.begin(), y.begin()+n*Nx, r.begin()+n*Nx);
+        //    }
+        //}
+        //else if ( typeid(T)==typeid( std::complex<float> ) )
+        //{
+        //    #ifdef GCC_OLD_FLAG
+        //        #pragma omp parallel for default(none) private(n) shared(Nx, N)
+        //    #else
+        //        #pragma omp parallel for default(none) private(n) shared(x, y, r, Nx, N)
+        //    #endif
+        //    for ( n=0; n<N; n++ )
+        //    {
+        //        Gadgetron::math::add(x.get_number_of_elements(), x.begin(), y.begin()+n*Nx, r.begin()+n*Nx);
+        //    }
+        //}
+        //else if ( typeid(T)==typeid( std::complex<double> ) )
+        //{
+        //    #ifdef GCC_OLD_FLAG
+        //        #pragma omp parallel for default(none) private(n) shared(Nx, N)
+        //    #else
+        //        #pragma omp parallel for default(none) private(n) shared(x, y, r, Nx, N)
+        //    #endif
+        //    for ( n=0; n<N; n++ )
+        //    {
+        //        Gadgetron::math::add(x.get_number_of_elements(), x.begin(), y.begin()+n*Nx, r.begin()+n*Nx);
+        //    }
+        //}
+        //else
+        //{
+        //    GADGET_ERROR_MSG("multipleAdd : unsupported type " << typeid(T).name());
+        //    return false;
+        //}
+
+        return true;
+    }
+
+    inline void multiplyCplx(size_t N, const  std::complex<float> * x, const  std::complex<float> * y,  std::complex<float> * r)
+    {
+        long long n;
+        #pragma omp parallel for default(none) private(n) shared(N, x, y, r) if (N>64*1024)
+        for (n = 0; n < (long long)N; n++)
+        {
+            const std::complex<float>& a1 = x[n];
+            const std::complex<float>& b1 = y[n];
+            const float a = a1.real();
+            const float b = a1.imag();
+            const float c = b1.real();
+            const float d = b1.imag();
+
+            reinterpret_cast<float(&)[2]>(r[n])[0] = a*c-b*d;
+            reinterpret_cast<float(&)[2]>(r[n])[1] = a*d+b*c;
+        }
+    }
+
+    inline void multiplyCplx(size_t N, const  std::complex<double> * x, const  std::complex<double> * y,  std::complex<double> * r)
+    {
+        long long n;
+        #pragma omp parallel for default(none) private(n) shared(N, x, y, r) if (N>64*1024)
+        for (n = 0; n < (long long)N; n++)
+        {
+            const std::complex<double>& a1 = x[n];
+            const std::complex<double>& b1 = y[n];
+            const double a = a1.real();
+            const double b = a1.imag();
+            const double c = b1.real();
+            const double d = b1.imag();
+
+            reinterpret_cast<double(&)[2]>(r[n])[0] = a*c-b*d;
+            reinterpret_cast<double(&)[2]>(r[n])[1] = a*d+b*c;
+        }
+    }
+
+    template <typename T> 
+    bool multipleMultiply(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r)
+    {
+        GADGET_DEBUG_CHECK_RETURN_FALSE(x.get_number_of_elements()<=y.get_number_of_elements());
+        if ( r.get_number_of_elements()!=y.get_number_of_elements())
+        {
+            r = y;
+        }
+
+        long long Nx = x.get_number_of_elements();
+        long long N = y.get_number_of_elements() / Nx;
+
+        const T* pX = x.begin();
+
+        long long n;
+
+        if ( typeid(T)==typeid(float) )
+        {
+            #pragma omp parallel for default(none) private(n) shared(pX, y, r, Nx, N)
+            for ( n=0; n<N; n++ )
+            {
+                const T* pY = y.begin()+n*Nx;
+                T* pR = r.begin() + n*Nx;
+
+                long long ii;
+                for ( ii=0; ii<Nx; ii++ )
+                {
+                    pR[ii] = pX[ii] * pY[ii];
+                }
+            }
+        }
+        else if ( typeid(T)==typeid(double) )
+        {
+            #pragma omp parallel for default(none) private(n) shared(pX, y, r, Nx, N)
+            for ( n=0; n<N; n++ )
+            {
+                const T* pY = y.begin()+n*Nx;
+                T* pR = r.begin() + n*Nx;
+
+                long long ii;
+                for ( ii=0; ii<Nx; ii++ )
+                {
+                    pR[ii] = pX[ii] * pY[ii];
+                }
+            }
+        }
+        else if ( typeid(T)==typeid( std::complex<float> ) )
+        {
+            #ifdef GCC_OLD_FLAG
+                #pragma omp parallel for default(none) private(n) shared(Nx, N)
+            #else
+                #pragma omp parallel for default(none) private(n) shared(x, y, r, Nx, N)
+            #endif
+            for ( n=0; n<N; n++ )
+            {
+                multiplyCplx(x.get_number_of_elements(), (const std::complex<float>*)(x.begin()), (const std::complex<float>*)(y.begin()+n*Nx), (std::complex<float>*)(r.begin()+n*Nx));
+            }
+        }
+        else if ( typeid(T)==typeid( std::complex<double> ) )
+        {
+            #ifdef GCC_OLD_FLAG
+                #pragma omp parallel for default(none) private(n) shared(Nx, N)
+            #else
+                #pragma omp parallel for default(none) private(n) shared(x, y, r, Nx, N)
+            #endif
+            for ( n=0; n<N; n++ )
+            {
+                multiplyCplx(x.get_number_of_elements(), (const std::complex<double>*)(x.begin()), (const std::complex<double>*)(y.begin()+n*Nx), (std::complex<double>*)(r.begin()+n*Nx));
+            }
+        }
+        else
+        {
+            GADGET_ERROR_MSG("multipleMultiply : unsupported type " << typeid(T).name());
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename T> 
+    bool cropUpTo11DArray(const hoNDArray<T>& x, hoNDArray<T>& r, const std::vector<size_t>& startND, std::vector<size_t>& size)
+    {
+        GADGET_CHECK_RETURN_FALSE( startND.size() == size.size() );
+        GADGET_CHECK_RETURN_FALSE( startND.size() <= 11 );
+
+        r.create(&size);
+        if ( r.get_number_of_elements() == x.get_number_of_elements() )
+        {
+            r = x;
+            return true;
+        }
+
+        std::vector<size_t> start(11, 0);
+        std::vector<size_t> end(11, 0);
+
+        size_t ii;
+        for ( ii=0; ii<startND.size(); ii++ )
+        {
+            start[ii] = startND[ii];
+            end[ii] = start[ii] + size[ii] - 1;
+            GADGET_CHECK_RETURN_FALSE(end[ii] < x.get_size(ii));
+        }
+
+        // [Ro E1 Cha Slice E2 Con Phase Rep Set Seg Ave]
+        size_t e1, cha, n, s, con, phs, rep, set, seg, ave;
+
+        std::vector<size_t> srcInd(11), dstInd(11);
+
+        for ( ave=start[10]; ave<=end[10]; ave++ )
+        {
+            srcInd[10] = ave; dstInd[10] = ave-start[10];
+
+            for ( seg=start[9]; seg<=end[9]; seg++ )
+            {
+                srcInd[9] = seg; dstInd[9] = seg-start[9];
+
+                for ( set=start[8]; set<=end[8]; set++ )
+                {
+                    srcInd[8] = set; dstInd[8] = set-start[8];
+
+                    for ( rep=start[7]; rep<=end[7]; rep++ )
+                    {
+                        srcInd[7] = rep; dstInd[7] = rep-start[7];
+
+                        for ( phs=start[6]; phs<=end[6]; phs++ )
+                        {
+                            srcInd[6] = phs; dstInd[6] = phs-start[6];
+
+                            for ( con=start[5]; con<=end[5]; con++ )
+                            {
+                                srcInd[5] = con; dstInd[5] = con-start[5];
+
+                                for ( s=start[4]; s<=end[4]; s++ )
+                                {
+                                    srcInd[4] = s; dstInd[4] = s-start[4];
+
+                                    for ( n=start[3]; n<=end[3]; n++ )
+                                    {
+                                        srcInd[3] = n; dstInd[3] = n-start[3];
+
+                                        for ( cha=start[2]; cha<=end[2]; cha++ )
+                                        {
+                                            srcInd[2] = cha; dstInd[2] = cha-start[2];
+
+                                            for ( e1=start[1]; e1<=end[1]; e1++ )
+                                            {
+                                                srcInd[1] = e1; dstInd[1] = e1-start[1];
+
+                                                srcInd[0] = start[0];
+                                                dstInd[0] = 0;
+
+                                                size_t offsetSrc = x.calculate_offset(srcInd);
+                                                size_t offsetDst = r.calculate_offset(dstInd);
+
+                                                memcpy(r.begin()+offsetDst, x.begin()+offsetSrc, sizeof(T)*(end[0]-start[0]+1));
+
+                                                /*for ( ro=start[0]; ro<=end[0]; ro++ )
+                                                {
+                                                    srcInd[0] = ro;
+                                                    dstInd[0] = ro-start[0];
+
+                                                    int offsetSrc = x.calculate_offset(srcInd);
+                                                    int offsetDst = r.calculate_offset(dstInd);
+
+                                                    r(offsetDst) = x(offsetSrc);
+                                                }*/
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        return true;
+    }
+
+    template <typename T> 
+    bool setSubArrayUpTo11DArray(const hoNDArray<T>& x, hoNDArray<T>& r, const std::vector<size_t>& startND, std::vector<size_t>& size)
+    {
+        GADGET_CHECK_RETURN_FALSE( startND.size() == size.size() );
+        GADGET_CHECK_RETURN_FALSE( startND.size() <= 11 );
+
+        if ( r.get_number_of_elements() == x.get_number_of_elements() )
+        {
+            r = x;
+            return true;
+        }
+
+        std::vector<size_t> start(11, 0);
+        std::vector<size_t> end(11, 0);
+
+        size_t ii;
+        for ( ii=0; ii<startND.size(); ii++ )
+        {
+            start[ii] = startND[ii];
+            end[ii] = start[ii] + size[ii] - 1;
+            GADGET_CHECK_RETURN_FALSE(end[ii] < r.get_size(ii));
+        }
+
+        // [Ro E1 Cha Slice E2 Con Phase Rep Set Seg Ave]
+        size_t e1, cha, n, s, con, phs, rep, set, seg, ave;
+
+        std::vector<size_t> srcInd(11), dstInd(11);
+
+        for ( ave=start[10]; ave<=end[10]; ave++ )
+        {
+            dstInd[10] = ave; srcInd[10] = ave-start[10];
+
+            for ( seg=start[9]; seg<=end[9]; seg++ )
+            {
+                dstInd[9] = seg; srcInd[9] = seg-start[9];
+
+                for ( set=start[8]; set<=end[8]; set++ )
+                {
+                    dstInd[8] = set; srcInd[8] = set-start[8];
+
+                    for ( rep=start[7]; rep<=end[7]; rep++ )
+                    {
+                        dstInd[7] = rep; srcInd[7] = rep-start[7];
+
+                        for ( phs=start[6]; phs<=end[6]; phs++ )
+                        {
+                            dstInd[6] = phs; srcInd[6] = phs-start[6];
+
+                            for ( con=start[5]; con<=end[5]; con++ )
+                            {
+                                dstInd[5] = con; srcInd[5] = con-start[5];
+
+                                for ( s=start[4]; s<=end[4]; s++ )
+                                {
+                                    dstInd[4] = s; srcInd[4] = s-start[4];
+
+                                    for ( n=start[3]; n<=end[3]; n++ )
+                                    {
+                                        dstInd[3] = n; srcInd[3] = n-start[3];
+
+                                        for ( cha=start[2]; cha<=end[2]; cha++ )
+                                        {
+                                            dstInd[2] = cha; srcInd[2] = cha-start[2];
+
+                                            for ( e1=start[1]; e1<=end[1]; e1++ )
+                                            {
+                                                dstInd[1] = e1; srcInd[1] = e1-start[1];
+
+                                                dstInd[0] = start[0];
+                                                srcInd[0] = 0;
+
+                                                size_t offsetSrc = x.calculate_offset(srcInd);
+                                                size_t offsetDst = r.calculate_offset(dstInd);
+
+                                                memcpy(r.begin()+offsetDst, x.begin()+offsetSrc, sizeof(T)*(end[0]-start[0]+1));
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        return true;
+    }
+
+    template<typename T> 
+    bool extractSampledLinesUpTo11DArray(const hoNDArray<T>& x, hoNDArray<T>& r, const hoNDArray<float>& timeStamp, double acceFactorE1, double acceFactorE2)
+    {
+        try
+        {
+            std::vector<size_t> dim;
+            x.get_dimensions(dim);
+
+            size_t RO = x.get_size(0);
+            size_t E1 = x.get_size(1);
+            size_t CHA = x.get_size(2);
+            size_t SLC = x.get_size(3);
+            size_t E2 = x.get_size(4);
+            size_t CON = x.get_size(5);
+            size_t PHS = x.get_size(6);
+            size_t REP = x.get_size(7);
+            size_t SET = x.get_size(8);
+            size_t SEG = x.get_size(9);
+            size_t AVE = x.get_size(10);
+
+            size_t Num = AVE*SEG*SET*REP*PHS*SLC;
+
+            std::vector<size_t> dimRes(dim);
+
+            if ( acceFactorE1>1 && E1>1 )
+            {
+                dimRes[1] = (size_t)(E1/acceFactorE1) + 1;
+            }
+
+            size_t dstE1 = dimRes[1];
+
+            if ( acceFactorE2>1 && E2>1 )
+            {
+                dimRes[4] = (size_t)(E2/acceFactorE2) + 1;
+            }
+
+            r.create(&dimRes);
+            Gadgetron::clear(r);
+
+            // [Ro E1 Cha Slice E2 Con Phase Rep Set Seg Ave]
+
+            size_t ROLen = sizeof(T)*RO;
+            hoNDArray<T> dummyArray(SLC, CON, PHS, REP, SET, SEG, AVE);
+
+            long long n;
+            #pragma omp parallel default(none) private(n) shared(Num, dummyArray, RO, E1, CHA, SLC, E2, CON, PHS, REP, SET, SEG, AVE, timeStamp, x, r, ROLen, dstE1)
+            {
+
+                std::vector<size_t> indN;
+                std::vector<size_t> srcInd(11, 0), dstInd(11, 0);
+                size_t e1, cha, slc, e2, con, rep, phs, set, seg, ave;
+
+                #pragma omp for
+                for ( n=0; n<(long long)Num; n++ )
+                {
+                    indN = dummyArray.calculate_index(n);
+
+                    ave = indN[6];
+                    seg = indN[5];
+                    set = indN[4];
+                    rep = indN[3];
+                    phs = indN[2];
+                    con = indN[1];
+                    slc = indN[0];
+
+                    srcInd[10] = ave; dstInd[10] = ave;
+                    srcInd[9] = seg; dstInd[9] = seg;
+                    srcInd[8] = set; dstInd[8] = set;
+                    srcInd[7] = rep; dstInd[7] = rep;
+                    srcInd[6] = phs; dstInd[6] = phs;
+                    srcInd[5] = con; dstInd[5] = con;
+                    srcInd[4] = slc; dstInd[4] = slc;
+
+                    // ------------------
+                    size_t indE2(0);
+                    size_t prevE2(0);
+                    for ( e2=0; e2<E2; e2++ )
+                    {
+                        srcInd[3] = e2; dstInd[3] = indE2;
+
+                        size_t indE1(0);
+                        for ( e1=0; e1<E1; e1++ )
+                        {
+                            srcInd[1] = e1; 
+
+                            srcInd[2] = 0;
+                            if ( timeStamp(srcInd) > 0 )
+                            {
+                                dstInd[1] = indE1;
+                                indE1++;
+
+                                if ( e2 != prevE2 )
+                                {
+                                    prevE2 = e2;
+                                    indE2++;
+                                }
+
+                                // ------------------
+                                srcInd[2] = 0; dstInd[2] = 0;
+                                size_t offsetSrc = x.calculate_offset(srcInd);
+                                size_t offsetDst = r.calculate_offset(dstInd);
+
+                                for ( cha=0; cha<CHA; cha++ )
+                                {
+                                    memcpy(r.begin()+offsetDst, x.begin()+offsetSrc, ROLen);
+
+                                    offsetSrc += RO*E1;
+                                    offsetDst += RO*dstE1;
+                                }
+                                // ------------------
+                            }
+                        }
+                        // ------------------
+                    }
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in extractSampledLinesUpTo11DArray(...) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename T> 
+    bool fillSampledLinesUpTo11DArray(const hoNDArray<T>& x, hoNDArray<T>& r, const hoNDArray<float>& timeStamp)
+    {
+        try
+        {
+            size_t RO = x.get_size(0);
+            size_t E1 = timeStamp.get_size(1);
+            size_t CHA = x.get_size(2);
+            size_t SLC = timeStamp.get_size(3);
+            size_t E2 = timeStamp.get_size(4);
+            size_t CON = timeStamp.get_size(5);
+            size_t PHS = timeStamp.get_size(6);
+            size_t REP = timeStamp.get_size(7);
+            size_t SET = timeStamp.get_size(8);
+            size_t SEG = timeStamp.get_size(9);
+            size_t AVE = timeStamp.get_size(10);
+
+            size_t srcE1 = x.get_size(1);
+
+            size_t Num = AVE*SEG*SET*REP*PHS*SLC;
+
+            std::vector<size_t> dimRes;
+            timeStamp.get_dimensions(dimRes);
+
+            dimRes[0] = RO;
+            dimRes[2] = CHA;
+            r.create(&dimRes);
+            Gadgetron::clear(r);
+
+            size_t ROLen = sizeof(T)*RO;
+            hoNDArray<T> dummyArray(SLC, CON, PHS, REP, SET, SEG, AVE);
+
+            long long n;
+            #pragma omp parallel default(none) private(n) shared(Num, dummyArray, RO, E1, CHA, SLC, E2, CON, PHS, REP, SET, SEG, AVE, timeStamp, x, r, ROLen, srcE1)
+            {
+
+                std::vector<size_t> indN;
+                std::vector<size_t> srcInd(11, 0), dstInd(11, 0);
+                size_t e1, cha, slc, e2, con, rep, phs, set, seg, ave;
+
+                #pragma omp for
+                for ( n=0; n<(long long)Num; n++ )
+                {
+                    indN = dummyArray.calculate_index(n);
+
+                    ave = indN[6];
+                    seg = indN[5];
+                    set = indN[4];
+                    rep = indN[3];
+                    phs = indN[2];
+                    con = indN[1];
+                    slc = indN[0];
+
+                    srcInd[10] = ave; dstInd[10] = ave;
+                    srcInd[9] = seg; dstInd[9] = seg;
+                    srcInd[8] = set; dstInd[8] = set;
+                    srcInd[7] = rep; dstInd[7] = rep;
+                    srcInd[6] = phs; dstInd[6] = phs;
+                    srcInd[5] = con; dstInd[5] = con;
+                    srcInd[4] = slc; dstInd[4] = slc;
+
+                    // ------------------
+                    size_t indE2(0);
+                    size_t prevE2(0);
+                    for ( e2=0; e2<E2; e2++ )
+                    {
+                        srcInd[3] = indE2; dstInd[3] = e2;
+
+                        size_t indE1(0);
+                        for ( e1=0; e1<E1; e1++ )
+                        {
+                            dstInd[1] = e1; 
+
+                            dstInd[2] = 0;
+                            if ( timeStamp(dstInd) > 0 )
+                            {
+                                srcInd[1] = indE1;
+                                indE1++;
+
+                                if ( e2 != prevE2 )
+                                {
+                                    prevE2 = e2;
+                                    indE2++;
+                                }
+
+                                // ------------------
+
+                                srcInd[2] = 0; dstInd[2] = 0;
+                                size_t offsetSrc = x.calculate_offset(srcInd);
+                                size_t offsetDst = r.calculate_offset(dstInd);
+
+                                for ( cha=0; cha<CHA; cha++ )
+                                {
+                                    memcpy(r.begin()+offsetDst, x.begin()+offsetSrc, ROLen);
+
+                                    offsetSrc += RO*srcE1;
+                                    offsetDst += RO*E1;
+                                }
+                                // ------------------
+                            }
+                        }
+                        // ------------------
+                    }
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in fillSampledLinesUpTo11DArray(...) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename T> 
+    bool stdOver3rdDimension(const hoNDArray<T>& x, hoNDArray<T>& std, bool NMinusOne)
+    {
+        try
+        {
+            typedef typename realType<T>::Type value_type;
+
+            GADGET_DEBUG_CHECK_RETURN_FALSE(x.get_number_of_dimensions() >= 3);
+
+            size_t RO = x.get_size(0);
+            size_t E1 = x.get_size(1);
+            size_t CHA = x.get_size(2);
+
+            long long num = (long long)x.get_number_of_elements() / (RO*E1*CHA);
+
+            boost::shared_ptr< std::vector<size_t> > dim = x.get_dimensions();
+
+            std::vector<size_t> dimStd(*dim);
+            dimStd.erase(dimStd.begin()+2);
+            std.create(&dimStd);
+
+            std::vector<size_t> dim3D(3);
+            dim3D[0] = RO;
+            dim3D[1] = E1;
+            dim3D[2] = CHA;
+
+            T S( (value_type)CHA );
+            if ( NMinusOne )
+            {
+                S = T( (value_type)CHA-1);
+            }
+
+            T v(0), v1(0);
+            T S2 = T( (value_type)1.0 )/S;
+            T S3 = T( (value_type)1.0 )/T( (value_type)CHA );
+
+            long long n;
+
+            #ifdef GCC_OLD_FLAG
+                #pragma omp parallel for default(none) private(n) shared(num, RO, E1, CHA, S, S2, S3, v, v1)
+            #else
+                #pragma omp parallel for default(none) private(n) shared(num, RO, E1, CHA, x, std, S, S2, S3, v, v1)
+            #endif
+            for ( n=0; n<num; n++ )
+            {
+                hoNDArray<T> xTmp(RO, E1, CHA, const_cast<T*>(x.begin()+n*RO*E1*CHA));
+                hoNDArray<T> mean(RO, E1);
+                Gadgetron::clear(mean);
+
+                size_t ro, e1, cha;
+                for ( cha=0; cha<CHA; cha++ )
+                {
+                    for ( e1=0; e1<E1; e1++ )
+                    {
+                        for ( ro=0; ro<RO; ro++ )
+                        {
+                            mean(ro+e1*RO) += xTmp(cha*RO*E1+e1*RO+ro)*S3;
+                        }
+                    }
+                }
+
+                for ( e1=0; e1<E1; e1++ )
+                {
+                    for ( ro=0; ro<RO; ro++ )
+                    {
+                        size_t ind = e1*RO+ro;
+
+                        v = 0; v1 = 0;
+                        for ( cha=0; cha<CHA; cha++ )
+                        {
+                            v1 = std::abs(xTmp(cha*RO*E1+ind)-mean(ind));
+                            v += v1*v1;
+                        }
+
+                        v /= S;
+                        std(ind+n*RO*E1) = std::sqrt(v);
+                    }
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in stdOver3rdDimension(const hoNDArray<T>& x, hoNDArray<T>& std, bool NMinusOne) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename T> 
+    bool cropOver3rdDimension(const hoNDArray<T>& x, hoNDArray<T>& r, size_t start, size_t end)
+    {
+        try
+        {
+            boost::shared_ptr< std::vector<size_t> > dimX = x.get_dimensions();
+
+            size_t NDim = dimX->size();
+
+            if ( NDim <= 2 )
+            {
+                r = x;
+                return true;
+            }
+
+            size_t RO = x.get_size(0);
+            size_t E1 = x.get_size(1);
+            size_t E2 = x.get_size(2);
+
+            size_t E2_R = end-start+1;
+
+            if ( E2 <= E2_R )
+            {
+                r = x;
+                return true;
+            }
+
+            std::vector<size_t> dimR(*dimX);
+            dimR[2] = E2_R;
+
+            r.create(&dimR);
+
+            size_t N2D = RO*E1;
+            size_t N3D = RO*E1*E2;
+            size_t N3D_R = RO*E1*E2_R;
+
+            size_t N = x.get_number_of_elements()/N3D;
+
+            const T* pX = x.begin();
+            T* pR = r.begin();
+
+            size_t n;
+            for ( n=0; n<N; n++ )
+            {
+                long long e2;
+                #pragma omp parallel for default(none) private(e2) shared(N2D, N3D, N3D_R, pX, pR, RO, E1, E2, n, start, end)
+                for ( e2=(long long)start; e2<=(long long)end; e2++ )
+                {
+                    memcpy(pR+n*N3D_R+(e2-start)*N2D, pX+n*N3D+e2*N2D, sizeof(T)*N2D);
+                }
+            }
+        }
+        catch (...)
+        {
+            GADGET_ERROR_MSG("Errors in cropOver3rdDimension(const hoNDArray<T>& x, hoNDArray<T>& r, size_t start, size_t end) ... ");
+            return false;
+        }
+        return true;
+    }
+
+    template<typename T> bool setSubArrayOver3rdDimension(const hoNDArray<T>& x, hoNDArray<T>& r, size_t start, size_t end)
+    {
+        try
+        {
+            boost::shared_ptr< std::vector<size_t> > dimR = r.get_dimensions();
+
+            size_t NDim = dimR->size();
+
+            if ( NDim <= 2 )
+            {
+                r = x;
+                return true;
+            }
+
+            size_t RO = r.get_size(0);
+            size_t E1 = r.get_size(1);
+            size_t E2 = r.get_size(2);
+
+            size_t E2_X = end-start+1;
+            GADGET_CHECK_RETURN_FALSE( E2_X == x.get_size(2) );
+
+            if ( E2_X >= E2 )
+            {
+                r = x;
+                return true;
+            }
+
+            size_t N2D = RO*E1;
+            size_t N3D = RO*E1*E2;
+            size_t N3D_X = RO*E1*E2_X;
+
+            size_t N = r.get_number_of_elements()/N3D;
+
+            const T* pX = x.begin();
+            T* pR = r.begin();
+
+            size_t n;
+            for ( n=0; n<N; n++ )
+            {
+                long long e2;
+                #pragma omp parallel for default(none) private(e2) shared(N2D, N3D, N3D_X, pX, pR, RO, E1, E2, n, start, end)
+                for ( e2=(long long)start; e2<=(long long)end; e2++ )
+                {
+                    memcpy(pR+n*N3D+e2*N2D, pX+n*N3D_X+(e2-start)*N2D, sizeof(T)*N2D);
+                }
+            }
+        }
+        catch (...)
+        {
+            GADGET_ERROR_MSG("Errors in setSubArrayOver3rdDimension(const hoNDArray<T>& x, hoNDArray<T>& r, size_t start, size_t end) ... ");
+            return false;
+        }
+        return true;
+    }
+
+    template<typename T> 
+    bool permuteE2To3rdDimension(const hoNDArray<T>& x, hoNDArray<T>& r)
+    {
+        try
+        {
+            boost::shared_ptr< std::vector<size_t> > dimX = x.get_dimensions();
+
+            size_t NDim = dimX->size();
+
+            if ( NDim <= 5 )
+            {
+                r = x;
+                return true;
+            }
+
+            size_t RO = x.get_size(0);
+            size_t E1 = x.get_size(1);
+            size_t CHA = x.get_size(2);
+            size_t SLC = x.get_size(3);
+            size_t E2 = x.get_size(4);
+
+            std::vector<size_t> dimR(*dimX);
+            dimR[2] = E2;
+            dimR[3] = CHA;
+            dimR[4] = SLC;
+
+            r.create(&dimR);
+
+            size_t N2D = RO*E1;
+            size_t N5D = RO*E1*CHA*E2*SLC;
+
+            size_t N = x.get_number_of_elements()/N5D;
+
+            const T* pX = x.begin();
+            T* pR = r.begin();
+
+            size_t n;
+            for ( n=0; n<N; n++ )
+            {
+                int e2;
+                #pragma omp parallel for default(none) private(e2) shared(N5D, N2D, pX, pR, CHA, SLC, E2, n)
+                for ( e2=0; e2<E2; e2++ )
+                {
+                    for ( size_t slc=0; slc<SLC; slc++ )
+                    {
+                        for ( size_t cha=0; cha<CHA; cha++ )
+                        {
+                            memcpy(pR+n*N5D+slc*CHA*E2*N2D+cha*E2*N2D+e2*N2D, pX+n*N5D+e2*SLC*CHA*N2D+slc*CHA*N2D+cha*N2D, sizeof(T)*N2D);
+                        }
+                    }
+                }
+            }
+        }
+        catch (...)
+        {
+            GADGET_ERROR_MSG("Errors in permuteE2To3rdDimension(const hoNDArray<T>& x, hoNDArray<T>& r) ... ");
+            return false;
+        }
+        return true;
+    }
+
+    template<typename T> 
+    bool permuteE2To5thDimension(const hoNDArray<T>& x, hoNDArray<T>& r)
+    {
+        try
+        {
+            boost::shared_ptr< std::vector<size_t> > dimX = x.get_dimensions();
+
+            size_t NDim = dimX->size();
+
+            if ( NDim < 5 )
+            {
+                r = x;
+                return true;
+            }
+
+            size_t RO = x.get_size(0);
+            size_t E1 = x.get_size(1);
+            size_t E2 = x.get_size(2);
+            size_t CHA = x.get_size(3);
+            size_t SLC = x.get_size(4);
+
+            std::vector<size_t> dimR(*dimX);
+            dimR[2] = CHA;
+            dimR[3] = SLC;
+            dimR[4] = E2;
+
+            r.create(&dimR);
+
+            size_t N2D = RO*E1;
+            size_t N5D = RO*E1*CHA*E2*SLC;
+
+            size_t N = x.get_number_of_elements()/N5D;
+
+            const T* pX = x.begin();
+            T* pR = r.begin();
+
+            size_t n;
+            for ( n=0; n<N; n++ )
+            {
+                int e2;
+                #pragma omp parallel for default(none) private(e2) shared(N5D, N2D, pX, pR, CHA, SLC, E2, n)
+                for ( e2=0; e2<E2; e2++ )
+                {
+                    for ( size_t slc=0; slc<SLC; slc++ )
+                    {
+                        for ( size_t cha=0; cha<CHA; cha++ )
+                        {
+                            memcpy(pR+n*N5D+e2*SLC*CHA*N2D+slc*CHA*N2D+cha*N2D, pX+n*N5D+slc*CHA*E2*N2D+cha*E2*N2D+e2*N2D, sizeof(T)*N2D);
+                        }
+                    }
+                }
+            }
+        }
+        catch (...)
+        {
+            GADGET_ERROR_MSG("Errors in permuteE2To5thDimension(const hoNDArray<T>& x, hoNDArray<T>& r) ... ");
+            return false;
+        }
+        return true;
+    }
+
+    template<typename T> 
+    bool permuteROTo3rdDimensionFor3DRecon(const hoNDArray<T>& x, hoNDArray<T>& r)
+    {
+        try
+        {
+            boost::shared_ptr< std::vector<size_t> > dimX = x.get_dimensions();
+
+            size_t NDim = dimX->size();
+
+            if ( NDim < 3 )
+            {
+                r = x;
+                return true;
+            }
+
+            size_t RO = x.get_size(0);
+            size_t E1 = x.get_size(1);
+            size_t E2 = x.get_size(2);
+
+            std::vector<size_t> dimR(*dimX);
+            dimR[0] = E1;
+            dimR[1] = E2;
+            dimR[2] = RO;
+
+            r.create(&dimR);
+
+            size_t N3D = RO*E1*E2;
+
+            size_t N = x.get_number_of_elements()/N3D;
+
+            const T* pX = x.begin();
+            T* pR = r.begin();
+
+            long long n;
+
+            #pragma omp parallel for default(none) private(n) shared(RO, E1, E2, N, pR, N3D, pX)
+            for ( n=0; n<(long long)N; n++ )
+            {
+                T* pRn = pR + n*N3D;
+                T* pXn = const_cast<T*>(pX) + n*N3D;
+
+                for ( size_t e2=0; e2<E2; e2++ )
+                {
+                    for ( size_t e1=0; e1<E1; e1++ )
+                    {
+                        for ( size_t ro=0; ro<RO; ro++ )
+                        {
+                            pRn[e1+e2*E1+ro*E1*E2] = pXn[ro+e1*RO+e2*RO*E1];
+                        }
+                    }
+                }
+            }
+        }
+        catch (...)
+        {
+            GADGET_ERROR_MSG("Errors in permuteROTo3rdDimensionFor3DRecon(const hoNDArray<T>& x, hoNDArray<T>& r) ... ");
+            return false;
+        }
+        return true;
+    }
+
+    template<typename T> 
+    bool permuteROTo4thDimensionFor3DRecon(const hoNDArray<T>& x, hoNDArray<T>& r)
+    {
+        try
+        {
+            boost::shared_ptr< std::vector<size_t> > dimX = x.get_dimensions();
+
+            size_t NDim = dimX->size();
+
+            if ( NDim < 4 )
+            {
+                r = x;
+                return true;
+            }
+
+            size_t RO = x.get_size(0);
+            size_t E1 = x.get_size(1);
+            size_t E2 = x.get_size(2);
+            size_t CHA = x.get_size(3);
+
+            std::vector<size_t> dimR(*dimX);
+            dimR[0] = E1;
+            dimR[1] = E2;
+            dimR[2] = CHA;
+            dimR[3] = RO;
+
+            r.create(&dimR);
+
+            size_t N4D = RO*E1*E2*CHA;
+
+            size_t N = x.get_number_of_elements()/N4D;
+
+            const T* pX = x.begin();
+            T* pR = r.begin();
+
+            long long n;
+            for ( n=0; n<(long long)N; n++ )
+            {
+                T* pRn = pR + n*N4D;
+                T* pXn = const_cast<T*>(pX) + n*N4D;
+
+                long long cha;
+
+                #pragma omp parallel for default(none) private(cha) shared(RO, E1, E2, CHA, pXn, pRn)
+                for ( cha=0; cha<(long long)CHA; cha++ )
+                {
+                    for ( size_t e2=0; e2<E2; e2++ )
+                    {
+                        for ( size_t e1=0; e1<E1; e1++ )
+                        {
+                            for ( size_t ro=0; ro<RO; ro++ )
+                            {
+                                pRn[e1+e2*E1+cha*E1*E2+ro*E1*E2*CHA] = pXn[ro+e1*RO+e2*RO*E1+cha*RO*E1*E2];
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        catch (...)
+        {
+            GADGET_ERROR_MSG("Errors in permuteROTo4thDimensionFor3DRecon(const hoNDArray<T>& x, hoNDArray<T>& r) ... ");
+            return false;
+        }
+        return true;
+    }
+
+    template<typename T> 
+    bool permuteROTo1stDimensionFor3DRecon(const hoNDArray<T>& x, hoNDArray<T>& r)
+    {
+        try
+        {
+            boost::shared_ptr< std::vector<size_t> > dimX = x.get_dimensions();
+
+            size_t NDim = dimX->size();
+
+            if ( NDim < 4 )
+            {
+                r = x;
+                return true;
+            }
+
+            size_t E1 = x.get_size(0);
+            size_t E2 = x.get_size(1);
+            size_t CHA = x.get_size(2);
+            size_t RO = x.get_size(3);
+
+            std::vector<size_t> dimR(*dimX);
+            dimR[0] = RO;
+            dimR[1] = E1;
+            dimR[2] = E2;
+            dimR[3] = CHA;
+
+            r.create(&dimR);
+
+            size_t N4D = RO*E1*E2*CHA;
+
+            size_t N = x.get_number_of_elements()/N4D;
+
+            const T* pX = x.begin();
+            T* pR = r.begin();
+
+            long long n;
+            for ( n=0; n<(long long)N; n++ )
+            {
+                T* pRn = pR + n*N4D;
+                T* pXn = const_cast<T*>(pX) + n*N4D;
+
+                long long cha;
+
+                #pragma omp parallel for default(none) private(cha) shared(RO, E1, E2, CHA, pXn, pRn)
+                for ( cha=0; cha<(long long)CHA; cha++ )
+                {
+                    for ( size_t e2=0; e2<E2; e2++ )
+                    {
+                        for ( size_t e1=0; e1<E1; e1++ )
+                        {
+                            size_t indRn = e1*RO+e2*RO*E1+cha*RO*E1*E2;
+                            size_t indXn = e1+e2*E1+cha*E1*E2;
+                            for ( size_t ro=0; ro<RO; ro++ )
+                            {
+                                pRn[ro+indRn] = pXn[indXn+ro*E1*E2*CHA];
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        catch (...)
+        {
+            GADGET_ERROR_MSG("Errors in permuteROTo1stDimensionFor3DRecon(const hoNDArray<T>& x, hoNDArray<T>& r) ... ");
+            return false;
+        }
+        return true;
+    }
+
+    template<typename T> 
+    bool permute3rdDimensionTo1stDimension(const hoNDArray<T>& x, hoNDArray<T>& r)
+    {
+        try
+        {
+            boost::shared_ptr< std::vector<size_t> > dimX = x.get_dimensions();
+
+            size_t NDim = dimX->size();
+
+            if ( NDim < 3 )
+            {
+                r = x;
+                return true;
+            }
+
+            size_t RO = x.get_size(0);
+            size_t E1 = x.get_size(1);
+            size_t E2 = x.get_size(2);
+
+            std::vector<size_t> dimR(*dimX);
+            dimR[0] = E2;
+            dimR[1] = RO;
+            dimR[2] = E1;
+
+            r.create(&dimR);
+
+            size_t N3D = RO*E1*E2;
+
+            size_t N = x.get_number_of_elements()/N3D;
+
+            const T* pX = x.begin();
+            T* pR = r.begin();
+
+            long long n, e2;
+            for ( n=0; n<(long long)N; n++ )
+            {
+                T* pRn = pR + n*N3D;
+                T* pXn = const_cast<T*>(pX) + n*N3D;
+
+                #pragma omp parallel for default(none) private(e2) shared(RO, E1, E2, pXn, pRn)
+                for ( e2=0; e2<(long long)E2; e2++ )
+                {
+                    for ( size_t e1=0; e1<E1; e1++ )
+                    {
+                        size_t indRn = e2+e1*E2*RO;
+                        size_t indXn = e1*RO+e2*RO*E1;
+                        for ( size_t ro=0; ro<RO; ro++ )
+                        {
+                            pRn[ro*E2+indRn] = pXn[ro+indXn];
+                        }
+                    }
+                }
+            }
+        }
+        catch (...)
+        {
+            GADGET_ERROR_MSG("Errors in permute3rdDimensionTo1stDimension(const hoNDArray<T>& x, hoNDArray<T>& r) ... ");
+            return false;
+        }
+        return true;
+    }
+
+    template<typename T> 
+    bool permuteROTo5thDimensionFor3DRecon(const hoNDArray<T>& x, hoNDArray<T>& r)
+    {
+        try
+        {
+            boost::shared_ptr< std::vector<size_t> > dimX = x.get_dimensions();
+
+            size_t NDim = dimX->size();
+
+            if ( NDim < 5 )
+            {
+                r = x;
+                return true;
+            }
+
+            size_t RO = x.get_size(0);
+            size_t E1 = x.get_size(1);
+            size_t E2 = x.get_size(2);
+            size_t srcCHA = x.get_size(3);
+            size_t dstCHA = x.get_size(4);
+
+            std::vector<size_t> dimR(*dimX);
+            dimR[0] = E1;
+            dimR[1] = E2;
+            dimR[2] = srcCHA;
+            dimR[3] = dstCHA;
+            dimR[4] = RO;
+
+            r.create(&dimR);
+
+            size_t N5D = RO*E1*E2*srcCHA*dstCHA;
+
+            size_t N = x.get_number_of_elements()/N5D;
+
+            const T* pX = x.begin();
+            T* pR = r.begin();
+
+            long long n;
+            for ( n=0; n<(long long)N; n++ )
+            {
+                T* pRn = pR + n*N5D;
+                T* pXn = const_cast<T*>(pX) + n*N5D;
+
+                long long dcha;
+
+                #pragma omp parallel for default(none) private(dcha) shared(RO, E1, E2, srcCHA, dstCHA, pXn, pRn)
+                for ( dcha=0; dcha<(long long)dstCHA; dcha++ )
+                {
+                    for ( size_t scha=0; scha<(int)srcCHA; scha++ )
+                    {
+                        for ( size_t e2=0; e2<E2; e2++ )
+                        {
+                            for ( size_t e1=0; e1<E1; e1++ )
+                            {
+                                size_t indRn = e1+e2*E1+scha*E1*E2+dcha*E1*E2*srcCHA;
+                                size_t indXn = e1*RO+e2*RO*E1+scha*RO*E1*E2+dcha*RO*E1*E2*srcCHA;
+                                for ( size_t ro=0; ro<RO; ro++ )
+                                {
+                                    pRn[indRn+ro*E1*E2*srcCHA*dstCHA] = pXn[ro+indXn];
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        catch (...)
+        {
+            GADGET_ERROR_MSG("Errors in permuteROTo5thDimensionFor3DRecon(const hoNDArray<T>& x, hoNDArray<T>& r) ... ");
+            return false;
+        }
+        return true;
+    }
+
+    template<typename T> 
+    bool imageDomainUnwrapping2D(const hoNDArray<T>& x, const hoNDArray<T>& kernel, hoNDArray<T>& buf, hoNDArray<T>& y)
+    {
+        try
+        {
+            T* pX = const_cast<T*>(x.begin());
+            T* ker = const_cast<T*>(kernel.begin());
+            T* pY = y.begin();
+
+            size_t ro = x.get_size(0);
+            size_t e1 = x.get_size(1);
+            size_t srcCHA = x.get_size(2);
+            size_t dstCHA = kernel.get_size(3);
+
+            if ( buf.get_number_of_elements() < ro*e1*srcCHA )
+            {
+                buf.create(ro, e1, srcCHA);
+            }
+            T* pBuf = buf.begin();
+
+            size_t dCha;
+
+            //#pragma omp parallel default(shared)
+            {
+                //#ifdef WIN32
+                //    int tid = omp_get_thread_num();
+                //    DWORD_PTR mask = (1 << tid);
+                //    // GADGET_MSG("thread id : " << tid << " - mask : " << mask);
+                //    SetThreadAffinityMask( GetCurrentThread(), mask );
+                //#endif // WIN32
+
+                //#pragma omp for
+
+                for ( dCha=0; dCha<dstCHA; dCha++ )
+                {
+                    multiplyCplx(ro*e1*srcCHA, pX, ker+dCha*ro*e1*srcCHA, pBuf);
+
+                    memcpy(pY+dCha*ro*e1, pBuf, sizeof(T)*ro*e1);
+                    for ( size_t sCha=1; sCha<srcCHA; sCha++ )
+                    {
+                        // Gadgetron::math::add(ro*e1, pY+dCha*ro*e1, pBuf+sCha*ro*e1, pY+dCha*ro*e1);
+
+                        size_t ii;
+                        size_t N2D=ro*e1;
+
+                        T* pY2D = pY+dCha*ro*e1;
+                        T* pBuf2D = pBuf+sCha*ro*e1;
+
+                        for ( ii=0; ii<N2D; ii++ )
+                        {
+                            pY2D[ii] += pBuf2D[ii];
+                        }
+                    }
+                }
+            }
+        }
+        catch (...)
+        {
+            GADGET_ERROR_MSG("Errors in imageDomainUnwrapping2D(const hoNDArray<T>& x, const hoNDArray<T>& ker, hoNDArray<T>& buf, hoNDArray<T>& y) ... ");
+            return false;
+        }
+        return true;
+    }
+
+    template<typename T> 
+    bool imageDomainUnwrapping2DT(const hoNDArray<T>& x, const hoNDArray<T>& kernel, hoNDArray<T>& buf, hoNDArray<T>& y)
+    {
+        try
+        {
+            long long ro = (long long)x.get_size(0);
+            long long e1 = (long long)x.get_size(1);
+            long long srcCHA = (long long)x.get_size(2);
+            long long N = (long long)x.get_size(3);
+
+            long long dstCHA = (long long)kernel.get_size(3);
+            long long kerN = (long long)kernel.get_size(4);
+
+            if ( (long long)buf.get_number_of_elements() < ro*e1*srcCHA )
+            {
+                buf.create(ro, e1, srcCHA);
+            }
+            T* pBuf = buf.begin();
+
+            long long n, dCha;
+
+            //#pragma omp parallel default(shared)
+            {
+                //#ifdef WIN32
+                //    int tid = omp_get_thread_num();
+                //    DWORD_PTR mask = (1 << tid);
+                //    // GADGET_MSG("thread id : " << tid << " - mask : " << mask);
+                //    SetThreadAffinityMask( GetCurrentThread(), mask );
+                //#endif // WIN32
+
+                //#pragma omp for
+
+                //if ( typeid(T)==typeid( std::complex<float> ) )
+                //{
+                    const T* pXN = x.begin();
+                    T* pYN = y.begin();
+                    T* pBufN = buf.begin();
+                    const T* pKerN = kernel.begin();
+
+                    // #pragma omp parallel for default(none) private(dCha, n) shared(N, ro, e1, srcCHA, dstCHA, kerN, pXN, pYN, pBufN, pKerN)
+                    for ( dCha=0; dCha<(long long)dstCHA; dCha++ )
+                    {
+                        for ( n=0; n<N; n++  )
+                        {
+                            const T* ker = pKerN + n*ro*e1*srcCHA*dstCHA;
+                            if ( kerN <= n )
+                            {
+                                ker = pKerN + (kerN-1)*ro*e1*srcCHA*dstCHA;
+                            }
+
+                            const T* pX = pXN + n*ro*e1*srcCHA;
+                            T* pBuf =pBufN + n*ro*e1*srcCHA;
+
+                            multiplyCplx(ro*e1*srcCHA, pX, ker+dCha*ro*e1*srcCHA, pBuf);
+                        //}
+
+                        //for ( n=0; n<N; n++  )
+                        //{
+                            T* pY = pYN + n*ro*e1*dstCHA;
+                            //T* pBuf =pBufN + n*ro*e1*srcCHA;
+
+                            memcpy(pY+dCha*ro*e1, pBuf, sizeof(T)*ro*e1);
+                            for ( long long sCha=1; sCha<srcCHA; sCha++ )
+                            {
+                                // Gadgetron::math::add(ro*e1, pY+dCha*ro*e1, pBuf+sCha*ro*e1, pY+dCha*ro*e1);
+                                size_t ii;
+                                size_t N2D=ro*e1;
+
+                                T* pY2D = pY+dCha*ro*e1;
+                                T* pBuf2D = pBuf+sCha*ro*e1;
+
+                                for ( ii=0; ii<N2D; ii++ )
+                                {
+                                    pY2D[ii] += pBuf2D[ii];
+                                }
+                            }
+                        }
+                    }
+                //}
+                //else if ( typeid(T)==typeid( std::complex<double> ) )
+                //{
+                //    for ( n=0; n<N; n++ )
+                //    {
+                //        const T* ker = kernel.begin() + n*ro*e1*srcCHA*dstCHA;
+                //        if ( kerN <= n )
+                //        {
+                //            ker = kernel.begin() + (kerN-1)*ro*e1*srcCHA*dstCHA;
+                //        }
+
+                //        const T* pX = x.begin() + n*ro*e1*srcCHA;
+                //        T* pY = y.begin() + n*ro*e1*dstCHA;
+
+                //        for ( long long dCha=0; dCha<dstCHA; dCha++ )
+                //        {
+                //            Gadgetron::math::multiply(ro*e1*srcCHA, pX, ker+dCha*ro*e1*srcCHA, pBuf);
+
+                //            memcpy(pY+dCha*ro*e1, pBuf, sizeof(T)*ro*e1);
+                //            for ( long long sCha=1; sCha<srcCHA; sCha++ )
+                //            {
+                //                Gadgetron::math::add(ro*e1, pY+dCha*ro*e1, pBuf+sCha*ro*e1, pY+dCha*ro*e1);
+                //            }
+                //        }
+                //    }
+                //}
+            }
+        }
+        catch (...)
+        {
+            GADGET_ERROR_MSG("Errors in imageDomainUnwrapping2DT(const hoNDArray<T>& x, const hoNDArray<T>& ker, hoNDArray<T>& buf, hoNDArray<T>& y) ... ");
+            return false;
+        }
+        return true;
+    }
+
+    template<typename CoordType, typename T> 
+    bool computePeriodicBoundaryValues(const hoNDArray<CoordType>& x, const hoNDArray<T>& y, CoordType start, CoordType end, hoNDArray<CoordType>& vx, hoNDArray<T>& vy)
+    {
+        try
+        {
+            typedef typename realType<T>::Type real_value_type;
+
+            size_t N = x.get_size(0);
+            size_t M = y.get_size(1);
+
+            GADGET_CHECK_RETURN_FALSE(y.get_size(0)==N);
+            GADGET_CHECK_RETURN_FALSE(start<=x(0));
+            GADGET_CHECK_RETURN_FALSE(end>=x(N-1));
+
+            vx.create(N+2);
+            vy.create(N+2, M);
+
+            size_t m, n;
+
+            vx(0) = start;
+            for ( n=0; n<N; n++ )
+            {
+                vx(n+1) = x(n);
+            }
+            vx(N+1) = end;
+
+            CoordType dS = x(0) - start;
+            CoordType dE = end - x(N-1);
+
+            // start, end
+            CoordType wS;
+            if ( dE+dS > FLT_EPSILON )
+                wS = dE/(dE+dS);
+            else
+                wS = dE/FLT_EPSILON;
+
+            for ( m=0; m<M; m++ )
+            {
+                T a = y(0, m);
+                T b = y(N-1, m);
+
+                vy(0, m) = b + (real_value_type)wS * ( a - b );
+                vy(N+1, m) = vy(0, m);
+            }
+
+            // middle
+            for ( n=0; n<N; n++ )
+            {
+                for ( m=0; m<M; m++ )
+                {
+                    vy(n+1, m) = y(n, m);
+                }
+            }
+        }
+        catch (...)
+        {
+            GADGET_ERROR_MSG("Errors in computePeriodicBoundaryValues(const hoNDArray<CoordType>& x, const hoNDArray<T>& y, CoordType& start, CoordType& end, hoNDArray<T>& r) ... ");
+            return false;
+        }
+        return true;
+    }
+
+    template EXPORTGTPLUS bool sumOverLastDimension(const hoNDArray<float>& x, hoNDArray<float>& r);
+    template EXPORTGTPLUS bool sumOverLastDimension(const hoNDArray<double>& x, hoNDArray<double>& r);
+    template EXPORTGTPLUS bool sumOverLastDimension(const hoNDArray< std::complex<float> >& x, hoNDArray< std::complex<float> >& r);
+    template EXPORTGTPLUS bool sumOverLastDimension(const hoNDArray< std::complex<double> >& x, hoNDArray< std::complex<double> >& r);
+
+    template EXPORTGTPLUS bool sumOverSecondLastDimension(const hoNDArray<float>& x, hoNDArray<float>& r);
+    template EXPORTGTPLUS bool sumOverSecondLastDimension(const hoNDArray<double>& x, hoNDArray<double>& r);
+    template EXPORTGTPLUS bool sumOverSecondLastDimension(const hoNDArray< std::complex<float> >& x, hoNDArray< std::complex<float> >& r);
+    template EXPORTGTPLUS bool sumOverSecondLastDimension(const hoNDArray< std::complex<double> >& x, hoNDArray< std::complex<double> >& r);
+
+    template EXPORTGTPLUS bool multiplyOverLastDimension(const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& r);
+    template EXPORTGTPLUS bool multiplyOverLastDimension(const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& r);
+    template EXPORTGTPLUS bool multiplyOverLastDimension(const hoNDArray< std::complex<float> >& x, const hoNDArray< std::complex<float> >& y, hoNDArray< std::complex<float> >& r);
+    template EXPORTGTPLUS bool multiplyOverLastDimension(const hoNDArray< std::complex<double> >& x, const hoNDArray< std::complex<double> >& y, hoNDArray< std::complex<double> >& r);
+
+    template EXPORTGTPLUS bool divideOverLastDimension(const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& r);
+    template EXPORTGTPLUS bool divideOverLastDimension(const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& r);
+    template EXPORTGTPLUS bool divideOverLastDimension(const hoNDArray< std::complex<float> >& x, const hoNDArray< std::complex<float> >& y, hoNDArray< std::complex<float> >& r);
+    template EXPORTGTPLUS bool divideOverLastDimension(const hoNDArray< std::complex<double> >& x, const hoNDArray< std::complex<double> >& y, hoNDArray< std::complex<double> >& r);
+
+    template EXPORTGTPLUS bool sumOver1stDimension(const hoNDArray<float>& x, hoNDArray<float>& r);
+    template EXPORTGTPLUS bool sumOver1stDimension(const hoNDArray<double>& x, hoNDArray<double>& r);
+    template EXPORTGTPLUS bool sumOver1stDimension(const hoNDArray< std::complex<float> >& x, hoNDArray< std::complex<float> >& r);
+    template EXPORTGTPLUS bool sumOver1stDimension(const hoNDArray< std::complex<double> >& x, hoNDArray< std::complex<double> >& r);
+
+    template EXPORTGTPLUS bool sumOver2ndDimension(const hoNDArray<float>& x, hoNDArray<float>& r);
+    template EXPORTGTPLUS bool sumOver2ndDimension(const hoNDArray<double>& x, hoNDArray<double>& r);
+    template EXPORTGTPLUS bool sumOver2ndDimension(const hoNDArray< std::complex<float> >& x, hoNDArray< std::complex<float> >& r);
+    template EXPORTGTPLUS bool sumOver2ndDimension(const hoNDArray< std::complex<double> >& x, hoNDArray< std::complex<double> >& r);
+
+    template EXPORTGTPLUS bool sumOver3rdDimension(const hoNDArray<float>& x, hoNDArray<float>& r);
+    template EXPORTGTPLUS bool sumOver3rdDimension(const hoNDArray<double>& x, hoNDArray<double>& r);
+    template EXPORTGTPLUS bool sumOver3rdDimension(const hoNDArray< std::complex<float> >& x, hoNDArray< std::complex<float> >& r);
+    template EXPORTGTPLUS bool sumOver3rdDimension(const hoNDArray< std::complex<double> >& x, hoNDArray< std::complex<double> >& r);
+
+    template EXPORTGTPLUS bool sumOver4thDimension(const hoNDArray<float>& x, hoNDArray<float>& r);
+    template EXPORTGTPLUS bool sumOver4thDimension(const hoNDArray<double>& x, hoNDArray<double>& r);
+    template EXPORTGTPLUS bool sumOver4thDimension(const hoNDArray< std::complex<float> >& x, hoNDArray< std::complex<float> >& r);
+    template EXPORTGTPLUS bool sumOver4thDimension(const hoNDArray< std::complex<double> >& x, hoNDArray< std::complex<double> >& r);
+
+    template EXPORTGTPLUS bool sumOver5thDimension(const hoNDArray<float>& x, hoNDArray<float>& r);
+    template EXPORTGTPLUS bool sumOver5thDimension(const hoNDArray<double>& x, hoNDArray<double>& r);
+    template EXPORTGTPLUS bool sumOver5thDimension(const hoNDArray< std::complex<float> >& x, hoNDArray< std::complex<float> >& r);
+    template EXPORTGTPLUS bool sumOver5thDimension(const hoNDArray< std::complex<double> >& x, hoNDArray< std::complex<double> >& r);
+
+    template EXPORTGTPLUS bool multiplyOver3rdDimension(const hoNDArray<float>& x3D, const hoNDArray<float>& y4D, hoNDArray<float>& r);
+    template EXPORTGTPLUS bool multiplyOver3rdDimension(const hoNDArray<double>& x3D, const hoNDArray<double>& y4D, hoNDArray<double>& r);
+    template EXPORTGTPLUS bool multiplyOver3rdDimension(const hoNDArray< std::complex<float> >& x3D, const hoNDArray< std::complex<float> >& y4D, hoNDArray< std::complex<float> >& r);
+    template EXPORTGTPLUS bool multiplyOver3rdDimension(const hoNDArray< std::complex<double> >& x3D, const hoNDArray< std::complex<double> >& y4D, hoNDArray< std::complex<double> >& r);
+
+    template EXPORTGTPLUS bool multiplyOver4thDimension(const hoNDArray<float>& x4D, const hoNDArray<float>& y5D, hoNDArray<float>& r);
+    template EXPORTGTPLUS bool multiplyOver4thDimension(const hoNDArray<double>& x4D, const hoNDArray<double>& y5D, hoNDArray<double>& r);
+    template EXPORTGTPLUS bool multiplyOver4thDimension(const hoNDArray< std::complex<float> >& x4D, const hoNDArray< std::complex<float> >& y5D, hoNDArray< std::complex<float> >& r);
+    template EXPORTGTPLUS bool multiplyOver4thDimension(const hoNDArray< std::complex<double> >& x4D, const hoNDArray< std::complex<double> >& y5D, hoNDArray< std::complex<double> >& r);
+
+    template EXPORTGTPLUS bool multiplyOver4thDimensionExcept(const hoNDArray<float>& x4D, const hoNDArray<float>& y5D, size_t n, hoNDArray<float>& r, bool copyY2R);
+    template EXPORTGTPLUS bool multiplyOver4thDimensionExcept(const hoNDArray<double>& x4D, const hoNDArray<double>& y5D, size_t n, hoNDArray<double>& r, bool copyY2R);
+    template EXPORTGTPLUS bool multiplyOver4thDimensionExcept(const hoNDArray< std::complex<float> >& x4D, const hoNDArray< std::complex<float> >& y5D, size_t n, hoNDArray< std::complex<float> >& r, bool copyY2R);
+    template EXPORTGTPLUS bool multiplyOver4thDimensionExcept(const hoNDArray< std::complex<double> >& x4D, const hoNDArray< std::complex<double> >& y5D, size_t n, hoNDArray< std::complex<double> >& r, bool copyY2R);
+
+    template EXPORTGTPLUS bool multiplyOver5thDimension(const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& r);
+    template EXPORTGTPLUS bool multiplyOver5thDimension(const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& r);
+    template EXPORTGTPLUS bool multiplyOver5thDimension(const hoNDArray< std::complex<float> >& x, const hoNDArray< std::complex<float> >& y, hoNDArray< std::complex<float> >& r);
+    template EXPORTGTPLUS bool multiplyOver5thDimension(const hoNDArray< std::complex<double> >& x, const hoNDArray< std::complex<double> >& y, hoNDArray< std::complex<double> >& r);
+
+    template EXPORTGTPLUS bool multiplyOver5thDimensionExcept(const hoNDArray<float>& x, const hoNDArray<float>& y, size_t n, hoNDArray<float>& r, bool copyY2R);
+    template EXPORTGTPLUS bool multiplyOver5thDimensionExcept(const hoNDArray<double>& x, const hoNDArray<double>& y, size_t n, hoNDArray<double>& r, bool copyY2R);
+    template EXPORTGTPLUS bool multiplyOver5thDimensionExcept(const hoNDArray< std::complex<float> >& x, const hoNDArray< std::complex<float> >& y, size_t n, hoNDArray< std::complex<float> >& r, bool copyY2R);
+    template EXPORTGTPLUS bool multiplyOver5thDimensionExcept(const hoNDArray< std::complex<double> >& x, const hoNDArray< std::complex<double> >& y, size_t n, hoNDArray< std::complex<double> >& r, bool copyY2R);
+
+    template EXPORTGTPLUS bool multipleAdd(const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& r);
+    template EXPORTGTPLUS bool multipleAdd(const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& r);
+    template EXPORTGTPLUS bool multipleAdd(const hoNDArray< std::complex<float> >& x, const hoNDArray< std::complex<float> >& y, hoNDArray< std::complex<float> >& r);
+    template EXPORTGTPLUS bool multipleAdd(const hoNDArray< std::complex<double> >& x, const hoNDArray< std::complex<double> >& y, hoNDArray< std::complex<double> >& r);
+
+    template EXPORTGTPLUS bool multipleMultiply(const hoNDArray<float>& x, const hoNDArray<float>& y, hoNDArray<float>& r);
+    template EXPORTGTPLUS bool multipleMultiply(const hoNDArray<double>& x, const hoNDArray<double>& y, hoNDArray<double>& r);
+    template EXPORTGTPLUS bool multipleMultiply(const hoNDArray< std::complex<float> >& x, const hoNDArray< std::complex<float> >& y, hoNDArray< std::complex<float> >& r);
+    template EXPORTGTPLUS bool multipleMultiply(const hoNDArray< std::complex<double> >& x, const hoNDArray< std::complex<double> >& y, hoNDArray< std::complex<double> >& r);
+
+    template EXPORTGTPLUS bool cropUpTo11DArray(const hoNDArray<short>& x, hoNDArray<short>& r, const std::vector<size_t>& start, std::vector<size_t>& size);
+    template EXPORTGTPLUS bool cropUpTo11DArray(const hoNDArray<unsigned short>& x, hoNDArray<unsigned short>& r, const std::vector<size_t>& start, std::vector<size_t>& size);
+    template EXPORTGTPLUS bool cropUpTo11DArray(const hoNDArray<float>& x, hoNDArray<float>& r, const std::vector<size_t>& start, std::vector<size_t>& size);
+    template EXPORTGTPLUS bool cropUpTo11DArray(const hoNDArray<double>& x, hoNDArray<double>& r, const std::vector<size_t>& start, std::vector<size_t>& size);
+    template EXPORTGTPLUS bool cropUpTo11DArray(const hoNDArray< std::complex<float> >& x, hoNDArray< std::complex<float> >& r, const std::vector<size_t>& start, std::vector<size_t>& size);
+    template EXPORTGTPLUS bool cropUpTo11DArray(const hoNDArray< std::complex<double> >& x, hoNDArray< std::complex<double> >& r, const std::vector<size_t>& start, std::vector<size_t>& size);
+
+    template EXPORTGTPLUS bool setSubArrayUpTo11DArray(const hoNDArray<short>& x, hoNDArray<short>& r, const std::vector<size_t>& start, std::vector<size_t>& size);
+    template EXPORTGTPLUS bool setSubArrayUpTo11DArray(const hoNDArray<unsigned short>& x, hoNDArray<unsigned short>& r, const std::vector<size_t>& start, std::vector<size_t>& size);
+    template EXPORTGTPLUS bool setSubArrayUpTo11DArray(const hoNDArray<float>& x, hoNDArray<float>& r, const std::vector<size_t>& start, std::vector<size_t>& size);
+    template EXPORTGTPLUS bool setSubArrayUpTo11DArray(const hoNDArray<double>& x, hoNDArray<double>& r, const std::vector<size_t>& start, std::vector<size_t>& size);
+    template EXPORTGTPLUS bool setSubArrayUpTo11DArray(const hoNDArray< std::complex<float> >& x, hoNDArray< std::complex<float> >& r, const std::vector<size_t>& start, std::vector<size_t>& size);
+    template EXPORTGTPLUS bool setSubArrayUpTo11DArray(const hoNDArray< std::complex<double> >& x, hoNDArray< std::complex<double> >& r, const std::vector<size_t>& start, std::vector<size_t>& size);
+
+    template EXPORTGTPLUS bool extractSampledLinesUpTo11DArray(const hoNDArray<short>& x, hoNDArray<short>& r, const hoNDArray<float>& timeStamp, double acceFactorE1, double acceFactorE2);
+    template EXPORTGTPLUS bool extractSampledLinesUpTo11DArray(const hoNDArray<unsigned short>& x, hoNDArray<unsigned short>& r, const hoNDArray<float>& timeStamp, double acceFactorE1, double acceFactorE2);
+    template EXPORTGTPLUS bool extractSampledLinesUpTo11DArray(const hoNDArray<float>& x, hoNDArray<float>& r, const hoNDArray<float>& timeStamp, double acceFactorE1, double acceFactorE2);
+    template EXPORTGTPLUS bool extractSampledLinesUpTo11DArray(const hoNDArray<double>& x, hoNDArray<double>& r, const hoNDArray<float>& timeStamp, double acceFactorE1, double acceFactorE2);
+    template EXPORTGTPLUS bool extractSampledLinesUpTo11DArray(const hoNDArray< std::complex<float> >& x, hoNDArray< std::complex<float> >& r, const hoNDArray<float>& timeStamp, double acceFactorE1, double acceFactorE2);
+    template EXPORTGTPLUS bool extractSampledLinesUpTo11DArray(const hoNDArray< std::complex<double> >& x, hoNDArray< std::complex<double> >& r, const hoNDArray<float>& timeStamp, double acceFactorE1, double acceFactorE2);
+
+    template EXPORTGTPLUS bool fillSampledLinesUpTo11DArray(const hoNDArray<short>& x, hoNDArray<short>& r, const hoNDArray<float>& timeStamp);
+    template EXPORTGTPLUS bool fillSampledLinesUpTo11DArray(const hoNDArray<unsigned short>& x, hoNDArray<unsigned short>& r, const hoNDArray<float>& timeStamp);
+    template EXPORTGTPLUS bool fillSampledLinesUpTo11DArray(const hoNDArray<float>& x, hoNDArray<float>& r, const hoNDArray<float>& timeStamp);
+    template EXPORTGTPLUS bool fillSampledLinesUpTo11DArray(const hoNDArray<double>& x, hoNDArray<double>& r, const hoNDArray<float>& timeStamp);
+    template EXPORTGTPLUS bool fillSampledLinesUpTo11DArray(const hoNDArray< std::complex<float> >& x, hoNDArray< std::complex<float> >& r, const hoNDArray<float>& timeStamp);
+    template EXPORTGTPLUS bool fillSampledLinesUpTo11DArray(const hoNDArray< std::complex<double> >& x, hoNDArray< std::complex<double> >& r, const hoNDArray<float>& timeStamp);
+
+    template EXPORTGTPLUS bool cropOver3rdDimension(const hoNDArray<short>& x, hoNDArray<short>& r, size_t start, size_t end);
+    template EXPORTGTPLUS bool cropOver3rdDimension(const hoNDArray<unsigned short>& x, hoNDArray<unsigned short>& r, size_t start, size_t end);
+    template EXPORTGTPLUS bool cropOver3rdDimension(const hoNDArray<float>& x, hoNDArray<float>& r, size_t start, size_t end);
+    template EXPORTGTPLUS bool cropOver3rdDimension(const hoNDArray<double>& x, hoNDArray<double>& r, size_t start, size_t end);
+    template EXPORTGTPLUS bool cropOver3rdDimension(const hoNDArray< std::complex<float> >& x, hoNDArray< std::complex<float> >& r, size_t start, size_t end);
+    template EXPORTGTPLUS bool cropOver3rdDimension(const hoNDArray< std::complex<double> >& x, hoNDArray< std::complex<double> >& r, size_t start, size_t end);
+
+    template EXPORTGTPLUS bool setSubArrayOver3rdDimension(const hoNDArray<short>& x, hoNDArray<short>& r, size_t start, size_t end);
+    template EXPORTGTPLUS bool setSubArrayOver3rdDimension(const hoNDArray<unsigned short>& x, hoNDArray<unsigned short>& r, size_t start, size_t end);
+    template EXPORTGTPLUS bool setSubArrayOver3rdDimension(const hoNDArray<float>& x, hoNDArray<float>& r, size_t start, size_t end);
+    template EXPORTGTPLUS bool setSubArrayOver3rdDimension(const hoNDArray<double>& x, hoNDArray<double>& r, size_t start, size_t end);
+    template EXPORTGTPLUS bool setSubArrayOver3rdDimension(const hoNDArray< std::complex<float> >& x, hoNDArray< std::complex<float> >& r, size_t start, size_t end);
+    template EXPORTGTPLUS bool setSubArrayOver3rdDimension(const hoNDArray< std::complex<double> >& x, hoNDArray< std::complex<double> >& r, size_t start, size_t end);
+
+    template EXPORTGTPLUS bool stdOver3rdDimension(const hoNDArray<float>& x, hoNDArray<float>& std, bool NMinusOne);
+    template EXPORTGTPLUS bool stdOver3rdDimension(const hoNDArray<double>& x, hoNDArray<double>& std, bool NMinusOne);
+    template EXPORTGTPLUS bool stdOver3rdDimension(const hoNDArray< std::complex<float> >& x, hoNDArray< std::complex<float> >& std, bool NMinusOne);
+    template EXPORTGTPLUS bool stdOver3rdDimension(const hoNDArray< std::complex<double> >& x, hoNDArray< std::complex<double> >& std, bool NMinusOne);
+
+    template EXPORTGTPLUS bool permuteE2To3rdDimension(const hoNDArray<float>& x, hoNDArray<float>& r);
+    template EXPORTGTPLUS bool permuteE2To3rdDimension(const hoNDArray<double>& x, hoNDArray<double>& r);
+    template EXPORTGTPLUS bool permuteE2To3rdDimension(const hoNDArray< std::complex<float> >& x, hoNDArray< std::complex<float> >& r);
+    template EXPORTGTPLUS bool permuteE2To3rdDimension(const hoNDArray< std::complex<double> >& x, hoNDArray< std::complex<double> >& r);
+
+    template EXPORTGTPLUS bool permuteE2To5thDimension(const hoNDArray<float>& x, hoNDArray<float>& r);
+    template EXPORTGTPLUS bool permuteE2To5thDimension(const hoNDArray<double>& x, hoNDArray<double>& r);
+    template EXPORTGTPLUS bool permuteE2To5thDimension(const hoNDArray< std::complex<float> >& x, hoNDArray< std::complex<float> >& r);
+    template EXPORTGTPLUS bool permuteE2To5thDimension(const hoNDArray< std::complex<double> >& x, hoNDArray< std::complex<double> >& r);
+
+    template EXPORTGTPLUS bool permuteROTo3rdDimensionFor3DRecon(const hoNDArray<float>& x, hoNDArray<float>& r);
+    template EXPORTGTPLUS bool permuteROTo3rdDimensionFor3DRecon(const hoNDArray<double>& x, hoNDArray<double>& r);
+    template EXPORTGTPLUS bool permuteROTo3rdDimensionFor3DRecon(const hoNDArray< std::complex<float> >& x, hoNDArray< std::complex<float> >& r);
+    template EXPORTGTPLUS bool permuteROTo3rdDimensionFor3DRecon(const hoNDArray< std::complex<double> >& x, hoNDArray< std::complex<double> >& r);
+
+    template EXPORTGTPLUS bool permuteROTo4thDimensionFor3DRecon(const hoNDArray<float>& x, hoNDArray<float>& r);
+    template EXPORTGTPLUS bool permuteROTo4thDimensionFor3DRecon(const hoNDArray<double>& x, hoNDArray<double>& r);
+    template EXPORTGTPLUS bool permuteROTo4thDimensionFor3DRecon(const hoNDArray< std::complex<float> >& x, hoNDArray< std::complex<float> >& r);
+    template EXPORTGTPLUS bool permuteROTo4thDimensionFor3DRecon(const hoNDArray< std::complex<double> >& x, hoNDArray< std::complex<double> >& r);
+
+    template EXPORTGTPLUS bool permuteROTo1stDimensionFor3DRecon(const hoNDArray<float>& x, hoNDArray<float>& r);
+    template EXPORTGTPLUS bool permuteROTo1stDimensionFor3DRecon(const hoNDArray<double>& x, hoNDArray<double>& r);
+    template EXPORTGTPLUS bool permuteROTo1stDimensionFor3DRecon(const hoNDArray< std::complex<float> >& x, hoNDArray< std::complex<float> >& r);
+    template EXPORTGTPLUS bool permuteROTo1stDimensionFor3DRecon(const hoNDArray< std::complex<double> >& x, hoNDArray< std::complex<double> >& r);
+
+    template EXPORTGTPLUS bool permute3rdDimensionTo1stDimension(const hoNDArray<float>& x, hoNDArray<float>& r);
+    template EXPORTGTPLUS bool permute3rdDimensionTo1stDimension(const hoNDArray<double>& x, hoNDArray<double>& r);
+    template EXPORTGTPLUS bool permute3rdDimensionTo1stDimension(const hoNDArray< std::complex<float> >& x, hoNDArray< std::complex<float> >& r);
+    template EXPORTGTPLUS bool permute3rdDimensionTo1stDimension(const hoNDArray< std::complex<double> >& x, hoNDArray< std::complex<double> >& r);
+
+    template EXPORTGTPLUS bool permuteROTo5thDimensionFor3DRecon(const hoNDArray<float>& x, hoNDArray<float>& r);
+    template EXPORTGTPLUS bool permuteROTo5thDimensionFor3DRecon(const hoNDArray<double>& x, hoNDArray<double>& r);
+    template EXPORTGTPLUS bool permuteROTo5thDimensionFor3DRecon(const hoNDArray< std::complex<float> >& x, hoNDArray< std::complex<float> >& r);
+    template EXPORTGTPLUS bool permuteROTo5thDimensionFor3DRecon(const hoNDArray< std::complex<double> >& x, hoNDArray< std::complex<double> >& r);
+
+    template EXPORTGTPLUS bool imageDomainUnwrapping2D(const hoNDArray< std::complex<float> >& x, const hoNDArray< std::complex<float> >& ker, hoNDArray< std::complex<float> >& buf, hoNDArray< std::complex<float> >& y);
+    template EXPORTGTPLUS bool imageDomainUnwrapping2D(const hoNDArray< std::complex<double> >& x, const hoNDArray< std::complex<double> >& ker, hoNDArray< std::complex<double> >& buf, hoNDArray< std::complex<double> >& y);
+
+    template EXPORTGTPLUS bool imageDomainUnwrapping2DT(const hoNDArray< std::complex<float> >& x, const hoNDArray< std::complex<float> >& ker, hoNDArray< std::complex<float> >& buf, hoNDArray< std::complex<float> >& y);
+    template EXPORTGTPLUS bool imageDomainUnwrapping2DT(const hoNDArray< std::complex<double> >& x, const hoNDArray< std::complex<double> >& ker, hoNDArray< std::complex<double> >& buf, hoNDArray< std::complex<double> >& y);
+
+    template EXPORTGTPLUS bool computePeriodicBoundaryValues(const hoNDArray<float>& x, const hoNDArray<float>& y, float start, float end, hoNDArray<float>& vx, hoNDArray<float>& vy);
+    template EXPORTGTPLUS bool computePeriodicBoundaryValues(const hoNDArray<float>& x, const hoNDArray<double>& y, float start, float end, hoNDArray<float>& vx, hoNDArray<double>& vy);
+    template EXPORTGTPLUS bool computePeriodicBoundaryValues(const hoNDArray<float>& x, const hoNDArray< std::complex<float> >& y, float start, float end, hoNDArray<float>& vx, hoNDArray< std::complex<float> >& vy);
+    template EXPORTGTPLUS bool computePeriodicBoundaryValues(const hoNDArray<float>& x, const hoNDArray< std::complex<double> >& y, float start, float end, hoNDArray<float>& vx, hoNDArray< std::complex<double> >& vy);
+
+    template EXPORTGTPLUS bool computePeriodicBoundaryValues(const hoNDArray<double>& x, const hoNDArray<double>& y, double start, double end, hoNDArray<double>& vx, hoNDArray<double>& vy);
+    template EXPORTGTPLUS bool computePeriodicBoundaryValues(const hoNDArray<double>& x, const hoNDArray<float>& y, double start, double end, hoNDArray<double>& vx, hoNDArray<float>& vy);
+    template EXPORTGTPLUS bool computePeriodicBoundaryValues(const hoNDArray<double>& x, const hoNDArray< std::complex<float> >& y, double start, double end, hoNDArray<double>& vx, hoNDArray< std::complex<float> >& vy);
+    template EXPORTGTPLUS bool computePeriodicBoundaryValues(const hoNDArray<double>& x, const hoNDArray< std::complex<double> >& y, double start, double end, hoNDArray<double>& vx, hoNDArray< std::complex<double> >& vy);
+}
diff --git a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconUtil.h b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconUtil.h
index 0cac782..3cf3657 100644
--- a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconUtil.h
+++ b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconUtil.h
@@ -4,7 +4,7 @@
             The ISMRMRD format is fully supported in this toolbox.
 
             Other functinalities implemented here include:
-            Karhunen-Lo�ve Transform (KLT) or Principle Component Analysis (PCA)
+            Karhunen-Lo�ve Transform (KLT) or Principle Component Analysis (PCA)
             KSpace filter
             Several MR sensitivity map estimation methods
 
@@ -16,13 +16,15 @@
 
                 Inati SJ, Hansen MS, Kellman P. 
                 A solution to the phase problem in adaptive coil combination. 
-                In: ISMRM proceeding; 20�26 april; salt lake city, utah, USA. ; 2013. 2672.
+                In: ISMRM proceeding; april; salt lake city, utah, USA. ; 2013. 2672.
 
                 Kellman P, McVeigh ER. 
                 Image reconstruction in SNR units: A general method for SNR measurement. 
                 Magnetic Resonance in Medicine 2005;54(6):1439-1447.
 
-            ISMRMRD_SOUHEIL_ITER coil map estimation is not implemented yet.
+            ISMRMRD_SOUHEIL_ITER coil map estimation is based on:
+
+                Inati SJ, Hansen MS, Kellman P. Unpublished algorithm.
 
     \author Hui Xue
 */
@@ -31,7 +33,8 @@
 
 #include "GtPlusExport.h"
 
-#include "ismrmrd.h"
+#include "ismrmrd/ismrmrd.h"
+#include "ismrmrd/meta.h"
 
 #include "boost/tuple/tuple.hpp"
 #include "boost/tuple/tuple_comparison.hpp"
@@ -44,12 +47,14 @@
 #include "ho6DArray.h"
 #include "ho7DArray.h"
 #include "hoMatrix.h"
+#include "hoNDArray_linalg.h"
 #include "hoNDFFT.h"
 #include "hoNDArray_utils.h"
-#include "hoNDArray_blas.h"
 #include "hoNDArray_elemwise.h"
-#include "hoNDArray_operators.h"
-#include "util/gtPlusIOAnalyze.h"
+#include "hoNDImage_util.h"
+#include "hoNDArray_reductions.h"
+#include "hoNDArray_linalg.h"
+#include "gtPlusIOAnalyze.h"
 #include "hoNDArrayMemoryManaged.h"
 #include "GadgetronTimer.h"
 
@@ -57,90 +62,207 @@
     #include <omp.h>
 #endif // USE_OMP
 
-#ifdef USE_CUDA
-    #include "GPUTimer.h"
-    #include "b1_map.h"
-    #include "cudaDeviceManager.h"
-    #include "cuNDArray_elemwise.h"
-#endif // USE_CUDA
+#include "GtPlusDefinition.h"
+
+namespace Gadgetron {
+
+    /**
+    * @brief sum over last dimension of an array
+             e.g. for a 4D array, sum over the 4th dimension and get a 3D array
+    */
+    template<typename T> EXPORTGTPLUS bool sumOverLastDimension(const hoNDArray<T>& x, hoNDArray<T>& r); // 
+
+    /**
+    * @brief sum over the second last dimension of an array
+             e.g. for a 4D array, sum over the 3rd dimension and get a 3D array
+    */
+    template<typename T> EXPORTGTPLUS bool sumOverSecondLastDimension(const hoNDArray<T>& x, hoNDArray<T>& r);
+
+    /**
+    * @brief multiply over the last dimension of y by x
+             e.g. x is 3D and y is 4D array, r(:,:,:,n) = y(:,:,:,n) .* x
+    */
+    template<typename T> EXPORTGTPLUS bool multiplyOverLastDimension(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r);
+
+    /**
+    * @brief divide the last dimension of y by x
+             e.g. x is 3D and y is 4D array, r(:,:,:,n) = y(:,:,:,n) ./ x
+    */
+    template<typename T> EXPORTGTPLUS bool divideOverLastDimension(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r);
+
+    /**
+    * @brief sum over the 1st dimension of an array
+             e.g. for a 2D array, sum over the 1st dimension and get an array of [1 E1]
+    */
+    template<typename T> EXPORTGTPLUS bool sumOver1stDimension(const hoNDArray<T>& x, hoNDArray<T>& r);
+
+    /**
+    * @brief sum over the 2nd dimension of an array
+             e.g. for a 3D array, sum over the 2nd dimension and get an array of [RO 1 CHA]
+    */
+    template<typename T> EXPORTGTPLUS bool sumOver2ndDimension(const hoNDArray<T>& x, hoNDArray<T>& r);
+
+    /**
+    * @brief sum over the 3rd dimension of an array
+             e.g. for a 4D array, sum over the 3rd dimension and get an array of [RO E1 1 N]
+    */
+    template<typename T> EXPORTGTPLUS bool sumOver3rdDimension(const hoNDArray<T>& x, hoNDArray<T>& r);
+
+    /**
+    * @brief sum over the 4th dimension of an array
+             e.g. for a 5D array [RO E1 CHA N S], sum over the 4th dimension and get an array of [RO E1 CHA 1 S]
+    */
+    template<typename T> EXPORTGTPLUS bool sumOver4thDimension(const hoNDArray<T>& x, hoNDArray<T>& r);
+
+    /**
+    * @brief sum over the 5th dimension of an array
+             e.g. for a 6D array, sum over the 5th dimension and get an array [RO E1 CHA N 1 P]
+    */
+    template<typename T> EXPORTGTPLUS bool sumOver5thDimension(const hoNDArray<T>& x, hoNDArray<T>& r);
+
+    /**
+    * @brief multiply over the 3rd/4th/5th dimension of y by x
+             e.g. x is 3D and y is 4D array, r(:,:,n,:) = y(:,:,n,:) .* x
+             e.g. x is 4D and y is 5D array, r(:,:,:,n,:) = y(:,:,:,n,:) .* x
+             e.g. x is 5D and y is 6D array, r(:,:,:,:, n,:) = y(:,:,:,:,n,:) .* x
+    */
+    template<typename T> EXPORTGTPLUS bool multiplyOver3rdDimension(const hoNDArray<T>& x3D, const hoNDArray<T>& y4D, hoNDArray<T>& r);
+    template<typename T> EXPORTGTPLUS bool multiplyOver4thDimension(const hoNDArray<T>& x4D, const hoNDArray<T>& y5D, hoNDArray<T>& r);
+    template<typename T> EXPORTGTPLUS bool multiplyOver5thDimension(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r);
+
+    /**
+    * @brief multiply over the 4th/5th dimension of y by x except for dimension index n
+             e.g. x is 4D and y is 5D array, r(:,:,:,t,:) = y(:,:,:,t,:) .* x, except for r(:,:,:,n,:) = y(:,:,:,n,:)
+             e.g. x is 5D and y is 6D array, r(:,:,:,:,t,:) = y(:,:,:,:,t,:) .* x, except for r(:,:,:,:,n,:) = y(:,:,:,:,n,:)
+    */
+    template<typename T> EXPORTGTPLUS bool multiplyOver4thDimensionExcept(const hoNDArray<T>& x4D, const hoNDArray<T>& y5D, size_t n, hoNDArray<T>& r, bool copyY2R=true);
+    template<typename T> EXPORTGTPLUS bool multiplyOver5thDimensionExcept(const hoNDArray<T>& x, const hoNDArray<T>& y, size_t n, hoNDArray<T>& r, bool copyY2R=true);
+
+    /**
+    * @brief r = x add/multiply/divide y for every part of y
+    */
+    template<typename T> EXPORTGTPLUS bool multipleAdd(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r);
+    template<typename T> EXPORTGTPLUS bool multipleMultiply(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r);
+    template<typename T> EXPORTGTPLUS bool multipleDivide(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r);
+
+    /**
+    * @brief copy the sub-array of x to r
+             the sub-array is defined by its starting index and array size
+    */
+    template<typename T> EXPORTGTPLUS bool cropUpTo11DArray(const hoNDArray<T>& x, hoNDArray<T>& r, const std::vector<size_t>& startND, std::vector<size_t>& size);
+
+    /**
+    * @brief set the sub-array of r from x
+             the sub-array is defined by its starting index and array size
+    */
+    template<typename T> EXPORTGTPLUS bool setSubArrayUpTo11DArray(const hoNDArray<T>& x, hoNDArray<T>& r, const std::vector<size_t>& startND, std::vector<size_t>& size);
+
+    /**
+    * @brief extract sampled lines from an NDArray
+             timeStamp indicates sampled lines; -1 for unsampled lines
+             x : [Ro E1 Cha Slice E2 Con Phase Rep Set Seg AVE]
+             timeStamp: [1 E1 1 Slice E2 Con Phase Rep Set Seg AVE]
+    */
+    template<typename T> EXPORTGTPLUS bool extractSampledLinesUpTo11DArray(const hoNDArray<T>& x, hoNDArray<T>& r, const hoNDArray<float>& timeStamp, double acceFactorE1, double acceFactorE2);
+
+    /**
+    * @brief fill sampled lines to an NDArray
+             timeStamp indicates sampled lines; -1 for unsampled lines
+    */
+    template<typename T> EXPORTGTPLUS bool fillSampledLinesUpTo11DArray(const hoNDArray<T>& x, hoNDArray<T>& r, const hoNDArray<float>& timeStamp);
+
+    /**
+    * @brief copy the sub-array of x to r only along the 3rd dimensions
+             e.g. x is [RO E1 D3 ...], r will be [RO E1 end-start+1 ... ]
+    */
+    template<typename T> EXPORTGTPLUS bool cropOver3rdDimension(const hoNDArray<T>& x, hoNDArray<T>& r, size_t start, size_t end);
+
+    /**
+    * @brief set the sub-array of r from x only along the 3rd dimensions
+             e.g. r(:, :, start:end, :, ...) will be replaced by x
+    */
+    template<typename T> EXPORTGTPLUS bool setSubArrayOver3rdDimension(const hoNDArray<T>& x, hoNDArray<T>& r, size_t start, size_t end);
+
+    /**
+    * @brief compute the standard deviation along the 3rd dimension, if NMinusOne == true, divided by N-1; otherwise, divided by N
+    */
+    template<typename T> EXPORTGTPLUS bool stdOver3rdDimension(const hoNDArray<T>& x, hoNDArray<T>& std, bool NMinusOne);
+
+    /**
+    * @brief permute E2 dimension of x : [RO E1 CHA SLC E2 ...] to r: [RO E1 E2 CHA SLC ...]
+    */
+    template<typename T> EXPORTGTPLUS bool permuteE2To3rdDimension(const hoNDArray<T>& x, hoNDArray<T>& r);
+
+    /**
+    * @brief permute E2 dimension of x : [RO E1 E2 CHA SLC ...] to r: [RO E1 CHA SLC E2 ...]
+    */
+    template<typename T> EXPORTGTPLUS bool permuteE2To5thDimension(const hoNDArray<T>& x, hoNDArray<T>& r);
+
+    /**
+    * @brief permute RO dimension of x to the 3rd dimension
+             x : [RO E1 E2 ...], r: [E1 E2 RO ...]
+    */
+    template<typename T> EXPORTGTPLUS bool permuteROTo3rdDimensionFor3DRecon(const hoNDArray<T>& x, hoNDArray<T>& r);
+
+    /**
+    * @brief permute RO dimension of x to the 4th dimension
+             x : [RO E1 E2 CHA ...], r: [E1 E2 CHA RO ...]
+    */
+    template<typename T> EXPORTGTPLUS bool permuteROTo4thDimensionFor3DRecon(const hoNDArray<T>& x, hoNDArray<T>& r);
+
+    /**
+    * @brief permute RO dimension of x back to the 1st dimension
+             x : [E1 E2 CHA RO ...], r: [RO E1 E2 CHA ...]
+    */
+    template<typename T> EXPORTGTPLUS bool permuteROTo1stDimensionFor3DRecon(const hoNDArray<T>& x, hoNDArray<T>& r);
+
+    /**
+    * @brief permute the 3rd dimension of x to the 1st dimension
+             x : [RO E1 E2 CHA ...], r: [E2 RO E1 CHA ...]
+    */
+    template<typename T> EXPORTGTPLUS bool permute3rdDimensionTo1stDimension(const hoNDArray<T>& x, hoNDArray<T>& r);
+
+    /**
+    * @brief permute RO dimension of x to the 5th dimension
+             x : [RO E1 E2 srcCHA dstCHA ...], r: [E1 E2 srcCHA dstCHA RO ...]
+    */
+    template<typename T> EXPORTGTPLUS bool permuteROTo5thDimensionFor3DRecon(const hoNDArray<T>& x, hoNDArray<T>& r);
+
+    /**
+    * @brief Image domain unwrapping for 2D
+             x : [RO E1 srcCHA], ker [RO E1 srcCHA dstCHA]
+             buf is a buffer for computer, need to be pre-allocated [RO E1 srcCHA], y [RO E1 dstCHA]
+             for the sake of speed, no check is made in this function
+    */
+    template<typename T> EXPORTGTPLUS bool imageDomainUnwrapping2D(const hoNDArray<T>& x, const hoNDArray<T>& ker, hoNDArray<T>& buf, hoNDArray<T>& y);
+
+    /**
+    * @brief Image domain unwrapping for 2D
+             x : [RO E1 srcCHA N], ker [RO E1 srcCHA dstCHA 1 or N], 
+             buf is a buffer for computer, need to be pre-allocated [RO E1 srcCHA], y [RO E1 dstCHA N]
+             for the sake of speed, no check is made in this function
+    */
+    template<typename T> EXPORTGTPLUS bool imageDomainUnwrapping2DT(const hoNDArray<T>& x, const hoNDArray<T>& ker, hoNDArray<T>& buf, hoNDArray<T>& y);
+
+    /**
+    * @brief compute periodic boundary values for an array
+             x : [N 1] the data point location, y[N M] data point values at x
+             r : [N+2 M], the data point values with computed boundaries
+    */
+    template<typename CoordType, typename T> EXPORTGTPLUS bool computePeriodicBoundaryValues(const hoNDArray<CoordType>& x, const hoNDArray<T>& y, CoordType start, CoordType end, hoNDArray<CoordType>& vx, hoNDArray<T>& vy);
+}
 
 namespace Gadgetron { namespace gtPlus {
 
-// define the dimensions of ISMRMRD
-enum ISMRMRDDIM
-{
-    DIM_ReadOut = 32,
-    DIM_Encoding1,
-    DIM_Channel,
-    DIM_Slice,
-    DIM_Encoding2,
-    DIM_Contrast,
-    DIM_Phase,
-    DIM_Repetition,
-    DIM_Set,
-    DIM_Segment,
-    DIM_Average,
-    DIM_other1,
-    DIM_other2,
-    DIM_other3,
-    DIM_NONE
-};
-
-// define the reconstruction algorithms
-enum ISMRMRDALGO
-{
-    ISMRMRD_GRAPPA = 64,
-    ISMRMRD_SENSE,
-    ISMRMRD_SPIRIT,
-    ISMRMRD_L1SPIRIT,
-    ISMRMRD_SOFTSENSE,
-    ISMRMRD_L1SOFTSENSE,
-    ISMRMRD_NONE
-};
-
-// define the coil sensitivity map estimation algorithms
-enum ISMRMRDCOILMAPALGO
-{
-    ISMRMRD_SOUHEIL = 96,
-    ISMRMRD_SOUHEIL_ITER
-};
-
-// define the partial fourier/asymmetric echo handling algorithms
-enum ISMRMRDPFALGO
-{
-    ISMRMRD_PF_HOMODYNE = 128,          // iterative homodyne
-    ISMRMRD_PF_POCS,                    // POCS
-    ISMRMRD_PF_FENGHUANG,               // convolution based method
-    ISMRMRD_PF_ZEROFILLING_FILTER,      // zero-filling with partial fourier filter
-    ISMRMRD_PF_ZEROFILLING              // zero-filling without partial fourier filter
-};
-
-// define the kspace filter type
-enum ISMRMRDKSPACEFILTER
-{
-    ISMRMRD_FILTER_GAUSSIAN = 160,
-    ISMRMRD_FILTER_HANNING,
-    ISMRMRD_FILTER_TUKEY,
-    ISMRMRD_FILTER_TAPERED_HANNING,
-    ISMRMRD_FILTER_NONE
-};
-
-// define the calibration mode of ISMRMRD
-enum ISMRMRDCALIBMODE
-{
-    ISMRMRD_embedded = 256,
-    ISMRMRD_interleaved,
-    ISMRMRD_separate,
-    ISMRMRD_external,
-    ISMRMRD_other,
-    ISMRMRD_noacceleration
-};
+// ================================================================================================== //
 
 template <typename T> 
 class gtPlusISMRMRDReconUtil
 {
 public:
 
+    typedef typename realType<T>::Type value_type;
+
     gtPlusISMRMRDReconUtil();
     virtual ~gtPlusISMRMRDReconUtil();
 
@@ -224,12 +346,12 @@ public:
     // kspace filter
     // ------------------------------------------------------------------------
     bool compute2DFilterFromTwo1D(const hoNDArray<T>& fx, const hoNDArray<T>& fy, hoNDArray<T>& fxy);
-    bool compute2DFilterFromTwo1D(const hoNDArray<float>& fx, const hoNDArray<float>& fy, hoNDArray<GT_Complex8>& fxy);
-    bool compute2DFilterFromTwo1D(const hoNDArray<double>& fx, const hoNDArray<double>& fy, hoNDArray<GT_Complex16>& fxy);
+    bool compute2DFilterFromTwo1D(const hoNDArray<float>& fx, const hoNDArray<float>& fy, hoNDArray< std::complex<float> >& fxy);
+    bool compute2DFilterFromTwo1D(const hoNDArray<double>& fx, const hoNDArray<double>& fy, hoNDArray< std::complex<double> >& fxy);
 
     bool compute3DFilterFromThree1D(const hoNDArray<T>& fx, const hoNDArray<T>& fy, const hoNDArray<T>& fz, hoNDArray<T>& fxyz);
-    bool compute3DFilterFromThree1D(const hoNDArray<float>& fx, const hoNDArray<float>& fy, const hoNDArray<float>& fz, hoNDArray<GT_Complex8>& fxyz);
-    bool compute3DFilterFromThree1D(const hoNDArray<double>& fx, const hoNDArray<double>& fy, const hoNDArray<double>& fz, hoNDArray<GT_Complex16>& fxyz);
+    bool compute3DFilterFromThree1D(const hoNDArray<float>& fx, const hoNDArray<float>& fy, const hoNDArray<float>& fz, hoNDArray< std::complex<float> >& fxyz);
+    bool compute3DFilterFromThree1D(const hoNDArray<double>& fx, const hoNDArray<double>& fy, const hoNDArray<double>& fz, hoNDArray< std::complex<double> >& fxyz);
 
     // data: in kspace, [RO E1 E2 CHA SLC CON PHS REP SET]
     bool kspacefilterRO(hoNDArray<T>& data, const hoNDArray<T>& fRO);
@@ -258,7 +380,7 @@ public:
     // symmetric filter, used for image filtering
     // sigma: for Gaussian, in the unit of pixel
     // width: for Tukey filter etc., the length of transition band
-    bool generateSymmetricFilter(size_t len, hoNDArray<T>& filter, ISMRMRDKSPACEFILTER filterType, double sigma, size_t width);
+    bool generateSymmetricFilter(size_t len, size_t start, size_t end, hoNDArray<T>& filter, ISMRMRDKSPACEFILTER filterType, double sigma, size_t width);
 
     // asymmetric filter, used for partial fourier/asymmetric echo filtering
     // start, end: the data range
@@ -370,9 +492,15 @@ public:
     // get the partial fourier/asymmetric echo handling algorithm from name
     ISMRMRDPFALGO getISMRMRDPartialFourierReconAlgoFromName(const std::string& name);
 
+    // get the partial fourier/asymmetric echo handling algorithm name from algorithm
+    std::string getNameFromISMRMRDPartialFourierReconAlgo(ISMRMRDPFALGO algo);
+
     // get the kspace filter algorithm from name
     ISMRMRDKSPACEFILTER getISMRMRDKSpaceFilterFromName(const std::string& name);
 
+    // get retro-gating interpolation method from name
+    ISMRMRDINTERPRETROGATING getISMRMRDRetroGatingInterpFromName(const std::string& name);
+
     // extract sub array for a dimension
     // if lessEqual ==  true, [0:value] are extracted for dim
     bool extractSubArrayForDim(const hoNDArray<T>& x, hoNDArray<T>& r, ISMRMRDDIM& dim, size_t value, bool lessEqual);
@@ -407,15 +535,74 @@ public:
     void findStartEndROAfterZeroFilling(size_t centre_column, size_t samples_zerofilled, int& startRO, int& endRO);
 
     // ------------------------------------------------------------------------
+    // ISMRMRD image header
+    // ------------------------------------------------------------------------
+    // set the meta attributes from the ISMRMRD image header
+    bool setMetaAttributesFromImageHeaderISMRMRD(const ISMRMRD::ImageHeader& imgHeader, ISMRMRD::MetaContainer& attrib);
+
+    // compute the image geometry for two acquisition header
+    bool setImageHeaderISMRMRDFromMetaAttributes(const ISMRMRD::MetaContainer& attrib, ISMRMRD::ImageHeader& imgHeader);
+
+    // ------------------------------------------------------------------------
     // utility functions for various things
     // ------------------------------------------------------------------------
     // jobSchedule : for every valid device, it records the job allocated to it
     // what is stored are valid device id and job packages allocated to it
     // for one valid device, multiple job packages can be given to it
-    #ifdef USE_CUDA
-        bool cudaJobSplitter(const std::vector<unsigned int>& jobIDs, size_t jobSize, size_t minimalMemoryForValidDevice, std::vector< std::pair<unsigned int, std::vector<std::vector<unsigned int> > > >& jobSchedule);
-        bool cudaJobSplitter(unsigned int numOfJobs, size_t jobSize, size_t minimalMemoryForValidDevice, std::vector< std::pair<unsigned int, std::vector<std::vector<unsigned int> > > >& jobSchedule);
-    #endif // USE_CUDA
+
+    // load two hoNDArray and compute differences
+    void compareAgainstGroundTruthArray(const std::string& gt_filename, const hoNDArray<T>& x, typename realType<T>::Type& normDiff, typename realType<T>::Type& maxNormDiff);
+    void compareAgainstGroundTruthArray(const hoNDArray<T>& gt, const hoNDArray<T>& x, typename realType<T>::Type& normDiff, typename realType<T>::Type& maxNormDiff);
+
+    template <typename T2, unsigned int D> void compareAgainstGroundTruthImage(const std::string& gt_filename, const hoNDImage<T2, D>& x, typename realType<T2>::Type& normDiff, typename realType<T2>::Type& maxNormDiff)
+    {
+        hoNDImage<T2, D> gt;
+
+        gtPlusIOAnalyze gt_io;
+        gt_io.importImage(gt, gt_filename);
+
+        compareAgainstGroundTruthImage(gt, x, normDiff, maxNormDiff);
+    }
+
+    template <typename T2, unsigned int D> void compareAgainstGroundTruthImage(const hoNDImage<T2, D>& gt, const hoNDImage<T2, D>& x, typename realType<T2>::Type& normDiff, typename realType<T2>::Type& maxNormDiff)
+    {
+        hoNDImage<T2, D> diff(x);
+        Gadgetron::subtract(gt, x, diff);
+
+        hoNDImage<T2, D> gtEps(gt);
+        Gadgetron::addEpsilon(gtEps);
+
+        Gadgetron::norm2(diff, normDiff);
+
+        Gadgetron::divide(diff, gtEps, diff);
+
+        T2 maxV;
+        size_t ind;
+        Gadgetron::maxAbsolute(diff, maxV, ind);
+        maxNormDiff = std::abs(maxV);
+    }
+
+    void getCurrentMoment(std::string& procTime)
+    {
+        char timestamp[100];
+        time_t mytime;
+        struct tm *mytm;
+        mytime=time(NULL);
+        mytm=localtime(&mytime);
+        strftime(timestamp, sizeof(timestamp),"%a, %b %d %Y, %H:%M:%S",mytm);
+        procTime = timestamp;
+    }
+
+    void getCurrentMomentForFileName(std::string& procTime)
+    {
+        char timestamp[100];
+        time_t mytime;
+        struct tm *mytm;
+        mytime=time(NULL);
+        mytm=localtime(&mytime);
+        strftime(timestamp, sizeof(timestamp),"%a_%b_%d_%Y_%H_%M_%S",mytm);
+        procTime = timestamp;
+    }
 };
 
 // utility functions only meaningful for complex data type
@@ -424,6 +611,8 @@ class gtPlusISMRMRDReconUtilComplex : public gtPlusISMRMRDReconUtil<T>
 {
 public:
 
+    typedef typename realType<T>::Type value_type;
+
     gtPlusISMRMRDReconUtilComplex();
     virtual ~gtPlusISMRMRDReconUtilComplex();
 
@@ -486,23 +675,25 @@ public:
     // coil estimation using NIH method
     // data: in image domain, at least 3D [RO E1 CHA], the coil map will be estimated for every 2D kspace
     bool coilMap2DNIH(const hoNDArray<T>& data, hoNDArray<T>& coilMap, ISMRMRDCOILMAPALGO algo, size_t ks=11, size_t power=3, size_t iterNum=5, typename realType<T>::Type thres=1e-3, bool useGPU=true);
-    bool coilMap2DNIHGPU(const hoNDArray<T>& data, hoNDArray<T>& coilMap, ISMRMRDCOILMAPALGO algo, size_t ks=11, size_t power=3, size_t iterNum=5, typename realType<T>::Type thres=1e-3);
 
     // data: in image domain, at least 4D [RO E1 E2 CHA], the coil map will be estimated for every 2D kspace [RO E1 CHA] across E2
     bool coilMap3DNIH(const hoNDArray<T>& data, hoNDArray<T>& coilMap, ISMRMRDCOILMAPALGO algo, size_t ks=7, size_t power=3, size_t iterNum=5, typename realType<T>::Type thres=1e-3, bool true3D=false);
-    // a gpu version of coil map 3D estimation, this function should only be used for the full-res coil map estimation
-    // if gpu is not available, it calls coilMap3DNIH
-    bool coilMap3DNIHGPU_FullResMap(const hoNDArray<T>& data, hoNDArray<T>& coilMap, ISMRMRDCOILMAPALGO algo, size_t ks=7, size_t power=3, size_t iterNum=5, typename realType<T>::Type thres=1e-3, bool true3D=false);
 
     // the Souheil method
     // data: [RO E1 CHA], only 3D array
     // these functions are using 2D data correlation matrix
     bool coilMap2DNIHInner(const hoNDArray<T>& data, hoNDArray<T>& coilMap, size_t ks, size_t power);
-    bool coilMap2DNIHInner_2(const hoNDArray<T>& data, hoNDArray<T>& coilMap, size_t ks, size_t power);
 
     // data: [RO E1 E2 CHA], this functions uses true 3D data correlation matrix
     bool coilMap3DNIHInner(const hoNDArray<T>& data, hoNDArray<T>& coilMap, size_t ks, size_t power);
 
+    // the Souheil iteration method
+    // data: [RO E1 CHA], only 3D array
+    bool coilMap2DNIH2Inner(const hoNDArray<T>& data, hoNDArray<T>& coilMap, size_t ks, size_t iterNum, typename realType<T>::Type thres);
+
+    // data: [RO E1 E2 CHA], true 3D coil map estimation
+    bool coilMap3DNIH2Inner(const hoNDArray<T>& data, hoNDArray<T>& coilMap, size_t ks, size_t kz, size_t iterNum, typename realType<T>::Type thres);
+
     // sum of square coil combination
     // data: in image domain, at least 3D [RO E1 CHA]
     bool sumOfSquare(const hoNDArray<T>& data, hoNDArray<T>& sos);
diff --git a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconUtil.hxx b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconUtil.hxx
index f52f34c..a08fba3 100644
--- a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconUtil.hxx
+++ b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconUtil.hxx
@@ -1,5 +1,6 @@
 
 #include "gtPlusISMRMRDReconUtil.h"
+#include "hoNDArray_elemwise.h"
 
 namespace Gadgetron { namespace gtPlus {
 
@@ -34,34 +35,45 @@ KLT_eigenAnalysis(const hoMatrix<T>& data, hoMatrix<T>& eigenVectors, hoMatrix<T
 
         GADGET_CHECK_RETURN_FALSE(eigenVectors.createMatrix(N, N));
         GADGET_CHECK_RETURN_FALSE(eigenValues.createMatrix(N, 1));
+        Gadgetron::clear(eigenVectors);
+        Gadgetron::clear(eigenValues);
 
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::GeneralMatrixProduct_gemm(eigenVectors, data, true, data, false));
+        //hoMatrix<T> dataCopy(data);
+        //GADGET_CHECK_RETURN_FALSE(Gadgetron::gemm(eigenVectors, data, true, dataCopy, false));
+
+        char uplo = 'L';
+        bool isAHA = true;
+        Gadgetron::herk(eigenVectors, data, uplo, isAHA);
+        eigenVectors.copyLowerTriToUpper();
 
         //eigenVectors.print(std::cout);
 
         hoMatrix<T> mean(N, 1);
         GADGET_CHECK_RETURN_FALSE(data.sumOverCol(mean));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::scal((ValueType)1.0/M, mean));
+        Gadgetron::scal((ValueType)1.0/M, mean);
 
         //mean.print(std::cout);
 
         hoMatrix<T> MMH(N, N);
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::GeneralMatrixProduct_gemm(MMH, mean, false, mean, true));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::scal((ValueType)M, MMH));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::subtract(eigenVectors, MMH, eigenVectors));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::scal((ValueType)1.0/(M-1), eigenVectors));
+        Gadgetron::clear(MMH);
+
+        hoMatrix<T> meanCopy(mean);
+        Gadgetron::gemm(MMH, meanCopy, false, mean, true);
+        Gadgetron::scal((ValueType)M, MMH);
+        Gadgetron::subtract(eigenVectors, MMH, eigenVectors);
+        Gadgetron::scal((ValueType)1.0/(M-1), eigenVectors);
 
         //MMH.print(std::cout);
         //eigenVectors.print(std::cout);
 
         hoMatrix<T> EH(eigenVectors);
-        GADGET_CHECK_RETURN_FALSE(conjugatetrans(eigenVectors, EH));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::add(eigenVectors, EH, eigenVectors));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::scal(0.5, eigenVectors));
+        conjugatetrans(eigenVectors, EH);
+        Gadgetron::add(eigenVectors, EH, eigenVectors);
+        Gadgetron::scal( (ValueType)(0.5), eigenVectors);
 
         //eigenVectors.print(std::cout);
 
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::EigenAnalysis_syev_heev2(eigenVectors, eigenValues));
+        Gadgetron::heev(eigenVectors, eigenValues);
     }
     catch(...)
     {
@@ -86,9 +98,10 @@ KLT_applyEigen(const hoMatrix<T>& data, hoMatrix<T>& dataEigen, const hoMatrix<T
         size_t K = eigenVectors.cols();
 
         GADGET_CHECK_RETURN_FALSE(dataEigen.createMatrix(M, K));
+        Gadgetron::clear(dataEigen);
 
         // M*N multiplies N*K
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::GeneralMatrixProduct_gemm(dataEigen, data, false, eigenVectors, false));
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::gemm(dataEigen, data, false, eigenVectors, false));
     }
     catch(...)
     {
@@ -117,7 +130,8 @@ KLT_applyEigen(const hoNDArray<T>& data, hoNDArray<T>& dataEigen, const hoMatrix
         hoNDArray<T> eigenVec(eigenVectors.get_dimensions(), const_cast<T*>(eigenVectors.begin()));
 
         // M*N multiplies N*K
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::GeneralMatrixProduct_gemm(dataEigen, data, false, eigenVec, false));
+        Gadgetron::clear(dataEigen);
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::gemm(dataEigen, data, false, eigenVec, false));
     }
     catch(...)
     {
@@ -547,10 +561,11 @@ bool gtPlusISMRMRDReconUtil<T>::computeKLFilter(const hoNDArray<T>& data, size_t
         GADGET_CHECK_RETURN_FALSE(Gadgetron::conjugatetrans(eigenVectors, ET));
 
         hoMatrix<T> EET(M, M);
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::GeneralMatrixProduct_gemm(EET, E, false, ET, false));
+        Gadgetron::clear(EET);
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::gemm(EET, E, false, ET, false));
 
         hoMatrix<T> R(N, M, dataKLF.begin());
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::GeneralMatrixProduct_gemm(R, A, false, EET, false));
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::gemm(R, A, false, EET, false));
     }
     catch(...)
     {
@@ -695,7 +710,7 @@ zeropad3D(const hoNDArray<T>& data, size_t sizeX, size_t sizeY, size_t sizeZ, ho
 
             long long z;
             // #pragma omp parallel for default(none) private(z) shared(pDst, pSrc, sE2, eE2, sE1, eE1, sRO, RO, E1, E2, sizeX, sizeY, sizeZ) num_threads(2)
-            for ( z=sE2; z<=eE2; z++ )
+            for ( z=(long long)sE2; z<=(long long)eE2; z++ )
             {
                 long long o1 = z*sizeX*sizeY + sRO;
                 long long o2 = (z-sE2)*RO*E1;
@@ -763,7 +778,7 @@ zeropad3DNoPresetZeros(const hoNDArray<T>& data, size_t sizeX, size_t sizeY, siz
 
             long long z;
             //#pragma omp parallel for default(none) private(z) shared(pDst, pSrc, sE2, eE2, sE1, eE1, sRO, RO, E1, E2, sizeX, sizeY, sizeZ) num_threads(2)
-            for ( z=sE2; z<=eE2; z++ )
+            for ( z=(long long)sE2; z<=(long long)eE2; z++ )
             {
                 long long o1 = z*sizeX*sizeY + sRO;
                 long long o2 = (z-sE2)*RO*E1;
@@ -930,7 +945,7 @@ compute2DFilterFromTwo1D(const hoNDArray<T>& fx, const hoNDArray<T>& fy, hoNDArr
 
 template <typename T> 
 bool gtPlusISMRMRDReconUtil<T>::
-compute2DFilterFromTwo1D(const hoNDArray<float>& fx, const hoNDArray<float>& fy, hoNDArray<GT_Complex8>& fxy)
+compute2DFilterFromTwo1D(const hoNDArray<float>& fx, const hoNDArray<float>& fy, hoNDArray< std::complex<float> >& fxy)
 {
     try
     {
@@ -938,7 +953,7 @@ compute2DFilterFromTwo1D(const hoNDArray<float>& fx, const hoNDArray<float>& fy,
         size_t E1 = fy.get_size(0);
 
         fxy.create(RO, E1);
-        GT_Complex8* pFxy = fxy.begin();
+         std::complex<float> * pFxy = fxy.begin();
 
         size_t x, y;
 
@@ -946,7 +961,7 @@ compute2DFilterFromTwo1D(const hoNDArray<float>& fx, const hoNDArray<float>& fy,
         {
             for ( x=0; x<RO; x++ )
             {
-                pFxy[y*RO+x] = GT_Complex8(fx(x) * fy(y));
+                pFxy[y*RO+x] =  std::complex<float> (fx(x) * fy(y));
             }
         }
     }
@@ -961,7 +976,7 @@ compute2DFilterFromTwo1D(const hoNDArray<float>& fx, const hoNDArray<float>& fy,
 
 template <typename T> 
 bool gtPlusISMRMRDReconUtil<T>::
-compute2DFilterFromTwo1D(const hoNDArray<double>& fx, const hoNDArray<double>& fy, hoNDArray<GT_Complex16>& fxy)
+compute2DFilterFromTwo1D(const hoNDArray<double>& fx, const hoNDArray<double>& fy, hoNDArray< std::complex<double> >& fxy)
 {
     try
     {
@@ -969,7 +984,7 @@ compute2DFilterFromTwo1D(const hoNDArray<double>& fx, const hoNDArray<double>& f
         size_t E1 = fy.get_size(0);
 
         fxy.create(RO, E1);
-        GT_Complex16* pFxy = fxy.begin();
+         std::complex<double> * pFxy = fxy.begin();
 
         size_t x, y;
 
@@ -977,7 +992,7 @@ compute2DFilterFromTwo1D(const hoNDArray<double>& fx, const hoNDArray<double>& f
         {
             for ( x=0; x<RO; x++ )
             {
-                pFxy[y*RO+x] = GT_Complex16(fx(x) * fy(y));
+                pFxy[y*RO+x] =  std::complex<double> (fx(x) * fy(y));
             }
         }
     }
@@ -1038,7 +1053,7 @@ compute3DFilterFromThree1D(const hoNDArray<T>& fx, const hoNDArray<T>& fy, const
 
 template <typename T> 
 bool gtPlusISMRMRDReconUtil<T>::
-compute3DFilterFromThree1D(const hoNDArray<float>& fx, const hoNDArray<float>& fy, const hoNDArray<float>& fz, hoNDArray<GT_Complex8>& fxyz)
+compute3DFilterFromThree1D(const hoNDArray<float>& fx, const hoNDArray<float>& fy, const hoNDArray<float>& fz, hoNDArray< std::complex<float> >& fxyz)
 {
     try
     {
@@ -1047,7 +1062,7 @@ compute3DFilterFromThree1D(const hoNDArray<float>& fx, const hoNDArray<float>& f
         size_t E2 = fz.get_size(0);
 
         fxyz.create(RO, E1, E2);
-        GT_Complex8* pFxyz = fxyz.begin();
+         std::complex<float> * pFxyz = fxyz.begin();
 
         size_t x, y, z;
 
@@ -1057,7 +1072,7 @@ compute3DFilterFromThree1D(const hoNDArray<float>& fx, const hoNDArray<float>& f
             {
                 for ( x=0; x<RO; x++ )
                 {
-                    pFxyz[z+RO*E1+y*RO+x] = GT_Complex8(fx(x)*fy(y)*fz(z));
+                    pFxyz[z+RO*E1+y*RO+x] =  std::complex<float> (fx(x)*fy(y)*fz(z));
                 }
             }
         }
@@ -1073,7 +1088,7 @@ compute3DFilterFromThree1D(const hoNDArray<float>& fx, const hoNDArray<float>& f
 
 template <typename T> 
 bool gtPlusISMRMRDReconUtil<T>::
-compute3DFilterFromThree1D(const hoNDArray<double>& fx, const hoNDArray<double>& fy, const hoNDArray<double>& fz, hoNDArray<GT_Complex16>& fxyz)
+compute3DFilterFromThree1D(const hoNDArray<double>& fx, const hoNDArray<double>& fy, const hoNDArray<double>& fz, hoNDArray< std::complex<double> >& fxyz)
 {
     try
     {
@@ -1082,7 +1097,7 @@ compute3DFilterFromThree1D(const hoNDArray<double>& fx, const hoNDArray<double>&
         size_t E2 = fz.get_size(0);
 
         fxyz.create(RO, E1, E2);
-        GT_Complex16* pFxyz = fxyz.begin();
+         std::complex<double> * pFxyz = fxyz.begin();
 
         size_t x, y, z;
 
@@ -1092,7 +1107,7 @@ compute3DFilterFromThree1D(const hoNDArray<double>& fx, const hoNDArray<double>&
             {
                 for ( x=0; x<RO; x++ )
                 {
-                    pFxyz[z+RO*E1+y*RO+x] = GT_Complex16(fx(x)*fy(y)*fz(z));
+                    pFxyz[z+RO*E1+y*RO+x] =  std::complex<double> (fx(x)*fy(y)*fz(z));
                 }
             }
         }
@@ -1493,14 +1508,23 @@ kspace3DfilterROE1E2(const hoNDArray<T>& data, const hoNDArray<T>& fRO, const ho
 
 template <typename T> 
 bool gtPlusISMRMRDReconUtil<T>::
-generateSymmetricFilter(size_t len, hoNDArray<T>& filter, ISMRMRDKSPACEFILTER filterType, double sigma, size_t width)
+generateSymmetricFilter(size_t len, size_t start, size_t end, hoNDArray<T>& filter, ISMRMRDKSPACEFILTER filterType, double sigma, size_t width)
 {
     try
     {
         if ( len == 0 ) return true;
 
+        if ( start > len-1 ) start = 0;
+        if ( end > len-1 ) end = len-1;
+
+        if ( start > end )
+        {
+            start = 0;
+            end = len-1;
+        }
+
         filter.create(len);
-        Gadgetron::fill(&filter, T(1.0));
+        Gadgetron::fill(filter, T(1.0));
 
         if ( width==0 || width>=len ) width = 1;
 
@@ -1524,7 +1548,7 @@ generateSymmetricFilter(size_t len, hoNDArray<T>& filter, ISMRMRDKSPACEFILTER fi
 
                         for ( ii=0; ii<len-1; ii++ )
                         {
-                            filter(ii+1) = T( std::exp(r*(x[ii]*x[ii])) );
+                            filter(ii+1) = T( (value_type)(std::exp(r*(x[ii]*x[ii]))) );
                         }
 
                         filter(0) = T(0);
@@ -1541,7 +1565,7 @@ generateSymmetricFilter(size_t len, hoNDArray<T>& filter, ISMRMRDKSPACEFILTER fi
 
                         for ( ii=0; ii<len; ii++ )
                         {
-                            filter(ii) = T( std::exp(r*(x[ii]*x[ii])) );
+                            filter(ii) = T( (value_type)(std::exp(r*(x[ii]*x[ii]))) );
                         }
                     }
                 }
@@ -1553,7 +1577,7 @@ generateSymmetricFilter(size_t len, hoNDArray<T>& filter, ISMRMRDKSPACEFILTER fi
 
                     for ( ii=1; ii<=width; ii++ )
                     {
-                        w(ii-1) = T(0.5 * ( 1 - std::cos( 2.0*GT_PI*ii/(2*width+1) ) ));
+                        w(ii-1) = T( (value_type)(0.5 * ( 1 - std::cos( 2.0*GT_PI*ii/(2*width+1) ) )) );
                     }
 
                     if ( len%2 == 0 )
@@ -1584,13 +1608,13 @@ generateSymmetricFilter(size_t len, hoNDArray<T>& filter, ISMRMRDKSPACEFILTER fi
                     if ( len%2 == 0 )
                     {
                         size_t N = len-1;
-                        double halfLen = (N+1)/2;
+                        double halfLen = (double)( (N+1)/2 );
                         for ( ii=1; ii<=halfLen; ii++ )
                         {
-                            filter(ii) = T(0.5 * ( 1 - std::cos( 2.0*GT_PI*ii/(N+1) ) ));
+                            filter(ii) = T( (value_type)(0.5 * ( 1 - std::cos( 2.0*GT_PI*ii/(N+1) ) )) );
                         }
 
-                        for ( ii=halfLen; ii<N; ii++ )
+                        for ( ii=(size_t)halfLen; ii<N; ii++ )
                         {
                             filter(ii+1) = filter(N-ii);
                         }
@@ -1599,13 +1623,13 @@ generateSymmetricFilter(size_t len, hoNDArray<T>& filter, ISMRMRDKSPACEFILTER fi
                     }
                     else
                     {
-                        double halfLen = (len+1)/2;
-                        for ( ii=1; ii<=halfLen; ii++ )
+                        double halfLen = (double)( (len+1)/2 );
+                        for ( ii=1; ii<=(size_t)halfLen; ii++ )
                         {
-                            filter(ii-1) = T(0.5 * ( 1 - std::cos( 2.0*GT_PI*ii/(len+1) ) ));
+                            filter(ii-1) = T( (value_type)(0.5 * ( 1 - std::cos( 2.0*GT_PI*ii/(len+1) ) )) );
                         }
 
-                        for ( ii=halfLen; ii<len; ii++ )
+                        for ( ii=(size_t)halfLen; ii<len; ii++ )
                         {
                             filter(ii) = filter(len-1-ii);
                         }
@@ -1622,7 +1646,8 @@ generateSymmetricFilter(size_t len, hoNDArray<T>& filter, ISMRMRDKSPACEFILTER fi
         {
             sos += filter(ii)*filter(ii);
         }
-        T r = 1.0/std::sqrt( std::abs(sos)/len );
+
+        T r = (value_type)( 1.0/std::sqrt( std::abs(sos)/(len) ) );
         for ( ii=0; ii<len; ii++ )
         {
             filter(ii) *= r;
@@ -1655,7 +1680,7 @@ generateAsymmetricFilter(size_t len, size_t start, size_t end, hoNDArray<T>& fil
         }
 
         filter.create(len);
-        Gadgetron::fill(&filter, T(0.0));
+        Gadgetron::clear(filter);
 
         size_t ii;
         for ( ii=start; ii<=end; ii++ )
@@ -1673,13 +1698,13 @@ generateAsymmetricFilter(size_t len, size_t start, size_t end, hoNDArray<T>& fil
                  {
                     for ( ii=1; ii<=width; ii++ )
                     {
-                        w(ii-1) = T(0.5 * ( 1 - std::cos( 2.0*GT_PI*ii/(2*width+1) ) ));
+                        w(ii-1) = T( (value_type)(0.5 * ( 1 - std::cos( 2.0*GT_PI*ii/(2*width+1) ) )) );
                     }
                 }
             break;
 
             default:
-                Gadgetron::fill(&w, T(1.0));
+                Gadgetron::fill(w, T(1.0));
             break;
         }
 
@@ -1815,7 +1840,9 @@ generateAsymmetricFilter(size_t len, size_t start, size_t end, hoNDArray<T>& fil
         {
             sos += filter(ii)*filter(ii);
         }
-        T r = 1.0/std::sqrt( std::abs(sos)/len );
+
+        // T r = 1.0/std::sqrt( std::abs(sos)/len );
+        T r = (value_type)( 1.0/std::sqrt( std::abs(sos)/(end-start+1) ) ); // SNR unit filter
         for ( ii=0; ii<len; ii++ )
         {
             filter(ii) *= r;
@@ -1843,7 +1870,7 @@ generateSymmetricFilterForRef(size_t len, size_t start, size_t end,
 
         if ( start==0 && end==len-1 )
         {
-            GADGET_CHECK_RETURN_FALSE(generateSymmetricFilter(len, filter, filterType, sigma, width));
+            GADGET_CHECK_RETURN_FALSE(generateSymmetricFilter(len, 0, len-1, filter, filterType, sigma, width));
             return true;
         }
 
@@ -1873,7 +1900,7 @@ generateSymmetricFilterForRef(size_t len, size_t start, size_t end,
         GADGET_CHECK_RETURN_FALSE(lenFilter>0);
 
         hoNDArray<T> filterSym(lenFilter);
-        GADGET_CHECK_RETURN_FALSE(generateSymmetricFilter(lenFilter, filterSym, filterType, sigma, width));
+        GADGET_CHECK_RETURN_FALSE(generateSymmetricFilter(lenFilter, 0, lenFilter-1, filterSym, filterType, sigma, width));
 
         filter.create(len);
         Gadgetron::clear(&filter);
@@ -1958,7 +1985,7 @@ bool gtPlusISMRMRDReconUtil<T>::computeFilterSNRUnitScaleFactor(const hoNDArray<
         sos += filter(ii)*filter(ii);
     }
 
-    scalFactor = T(1.0/std::sqrt( std::abs(sos)/len ));
+    scalFactor = (value_type)(1.0/std::sqrt( std::abs(sos)/len ));
 
     return true;
 }
@@ -1972,7 +1999,7 @@ detectSampledRegion2D(const hoNDArray<T>& data, size_t& startRO, size_t& endRO,
         size_t NDim = data.get_number_of_dimensions();
 
         hoNDArray<typename realType<T>::Type> mag(data.get_dimensions()), magSum, magSumE1, magSumRO;
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::absolute(data, mag));
+        Gadgetron::abs(data, mag);
 
         if ( NDim > 2 )
         {
@@ -2046,7 +2073,7 @@ detectSampledRegion3D(const hoNDArray<T>& data, size_t& startRO, size_t& endRO,
         size_t NDim = data.get_number_of_dimensions();
 
         hoNDArray<typename realType<T>::Type> mag(data.get_dimensions()), magSum, magSum2, magSumRO, magSumE1, magSumE2;
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::absolute(data, mag));
+        Gadgetron::abs(data, mag);
 
         if ( NDim > 5 )
         {
@@ -2149,7 +2176,7 @@ averageKSpace4D(const hoNDArray<T>& data, hoNDArray<T>& ave)
     try
     {
         GADGET_CHECK_RETURN_FALSE(Gadgetron::sumOver4thDimension(data, ave));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::scal( (typename realType<T>::Type)(1.0/data.get_size(3)), ave));
+        Gadgetron::scal( (typename realType<T>::Type)(1.0/data.get_size(3)), ave);
     }
     catch(...)
     {
@@ -2198,12 +2225,12 @@ averageKSpace4D(const hoNDArray<T>& data, hoNDArray<T>& ave, std::vector<size_t>
         size_t ro, e1;
         for ( e1=0; e1<E1; e1++ )
         {
-            double t = sampledTimes[e1];
+            double t = (double)sampledTimes[e1];
             if ( t == 0 ) t = 1;
 
             for ( ro=0; ro<RO; ro++ )
             {
-                pTimes[e1*RO+ro] = T(1.0/t);
+                pTimes[e1*RO+ro] = (value_type)(1.0/t);
             }
         }
 
@@ -2225,7 +2252,7 @@ averageKSpace5D(const hoNDArray<T>& data, hoNDArray<T>& ave)
     try
     {
         GADGET_CHECK_RETURN_FALSE(Gadgetron::sumOver5thDimension(data, ave));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::scal( (typename realType<T>::Type)(1.0/data.get_size(4)), ave));
+        Gadgetron::scal( (typename realType<T>::Type)(1.0/data.get_size(4)), ave);
     }
     catch(...)
     {
@@ -2269,12 +2296,12 @@ averageKSpace5D(const hoNDArray<T>& data, hoNDArray<T>& ave, hoNDArray<size_t>&
         {
             for ( e1=0; e1<E1; e1++ )
             {
-                double t = sampledTimes(e1+e2*E1);
+                double t = (double)sampledTimes(e1+e2*E1);
                 if ( t == 0 ) t = 1;
 
                 for ( ro=0; ro<RO; ro++ )
                 {
-                    pTimes[e2*RO*E1+e1*RO+ro] = T(1.0/t);
+                    pTimes[e2*RO*E1+e1*RO+ro] = (value_type)(1.0/t);
                 }
             }
         }
@@ -2305,7 +2332,7 @@ detectSampledTimesE1(const hoNDArray<T>& data4D, std::vector<size_t>& sampledTim
         size_t N = data4D.get_size(3);
 
         hoNDArray<typename realType<T>::Type> mag(data4D.get_dimensions());
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::absolute(data4D, mag));
+        Gadgetron::abs(data4D, mag);
 
         hoNDArray<typename realType<T>::Type> mag3D(RO, E1, 1, N);
         GADGET_CHECK_RETURN_FALSE(Gadgetron::sumOver3rdDimension(mag, mag3D));
@@ -2393,7 +2420,7 @@ detectSampledTimesE1E2(const hoNDArray<T>& data5D, hoNDArray<size_t>& sampledTim
         hoNDArray<typename realType<T>::Type> mag(RO, E1, E2);
 
         hoNDArray<T> dataFirstChannel(RO, E1, E2, const_cast<T*>(data5D.begin()));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::absolute(dataFirstChannel, mag));
+        Gadgetron::abs(dataFirstChannel, mag);
 
         //hoNDArray<typename realType<T>::Type> mag4D(RO, E1, E2, 1, N);
         //GADGET_CHECK_RETURN_FALSE(Gadgetron::sumOver4thDimension(mag, mag4D));
@@ -2741,11 +2768,11 @@ copyAlongROE1TransitionBand(const hoNDArray<T>& src, hoNDArray<T>& dst, size_t s
         // in this way, the SNR unit scale property is perserved
         T midValue = filter_src_RO(RO/2);
         T scalFactor = T(1.0)/midValue;
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::scal(scalFactor, filter_src_RO));
+        Gadgetron::scal(scalFactor, filter_src_RO);
 
         midValue = filter_src_E1(E1/2);
         scalFactor = T(1.0)/midValue;
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::scal(scalFactor, filter_src_E1));
+        Gadgetron::scal(scalFactor, filter_src_E1);
 
         hoNDArray<T> filter_dst_RO(RO), filter_dst_E1(E1);
 
@@ -2760,6 +2787,14 @@ copyAlongROE1TransitionBand(const hoNDArray<T>& src, hoNDArray<T>& dst, size_t s
             filter_dst_E1(ii) = T(1.0) - filter_src_E1(ii);
         }
 
+        //std::string debugFolder_ = "D:/software/Gadgetron/20130114/install/gadgetron/DebugOutput/";
+        //Gadgetron::gtPlus::gtPlusIOAnalyze gt_exporter_;
+
+        //GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, filter_src_RO, "filter_src_RO");
+        //GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, filter_dst_RO, "filter_dst_RO");
+        //GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, filter_src_E1, "filter_src_E1");
+        //GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, filter_dst_E1, "filter_dst_E1");
+
         hoNDArray<T> srcFiltered(src), dstFiltered(dst);
         if ( startRO==0 && endRO==RO-1 )
         {
@@ -2787,7 +2822,7 @@ copyAlongROE1TransitionBand(const hoNDArray<T>& src, hoNDArray<T>& dst, size_t s
             GADGET_CHECK_RETURN_FALSE(kspacefilterROE1(dst, fxy, dstFiltered));
         }
 
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::add(srcFiltered, dstFiltered, dst));
+        Gadgetron::add(srcFiltered, dstFiltered, dst);
     }
     catch(...)
     {
@@ -2908,15 +2943,15 @@ copyAlongROE1E2TransitionBand(const hoNDArray<T>& src, hoNDArray<T>& dst, size_t
         // in this way, the SNR unit scale property is perserved
         T midValue = filter_src_RO(RO/2);
         T scalFactor = T(1.0)/midValue;
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::scal(scalFactor, filter_src_RO));
+        Gadgetron::scal(scalFactor, filter_src_RO);
 
         midValue = filter_src_E1(E1/2);
         scalFactor = T(1.0)/midValue;
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::scal(scalFactor, filter_src_E1));
+        Gadgetron::scal(scalFactor, filter_src_E1);
 
         midValue = filter_src_E2(E2/2);
         scalFactor = T(1.0)/midValue;
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::scal(scalFactor, filter_src_E2));
+        Gadgetron::scal(scalFactor, filter_src_E2);
 
         hoNDArray<T> filter_dst_RO(RO), filter_dst_E1(E1), filter_dst_E2(E2);
 
@@ -2968,7 +3003,7 @@ copyAlongROE1E2TransitionBand(const hoNDArray<T>& src, hoNDArray<T>& dst, size_t
             GADGET_CHECK_RETURN_FALSE(kspace3DfilterROE1E2(dst, fxyz, dstFiltered));
         }
 
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::add(srcFiltered, dstFiltered, dst));
+        Gadgetron::add(srcFiltered, dstFiltered, dst);
     }
     catch(...)
     {
@@ -3115,6 +3150,10 @@ bool gtPlusISMRMRDReconUtil<T>::getISMRMRDDimIndex(const ISMRMRDDIM& dim, long l
             ind = 9;
         break;
 
+        case Gadgetron::gtPlus::DIM_Average:
+            ind = 10;
+        break;
+
         default:
             ind = -1;
     }
@@ -3149,6 +3188,10 @@ ISMRMRDALGO gtPlusISMRMRDReconUtil<T>::getISMRMRDReconAlgoFromName(const std::st
     if ( name == "ISMRMRD_L1SPIRIT" ) return ISMRMRD_L1SPIRIT;
     if ( name == "ISMRMRD_SOFTSENSE" ) return ISMRMRD_SOFTSENSE;
     if ( name == "ISMRMRD_L1SOFTSENSE" ) return ISMRMRD_L1SOFTSENSE;
+    if ( name == "ISMRMRD_2DTBINNING" ) return ISMRMRD_2DTBINNING;
+    if ( name == "ISMRMRD_2DTBINNING_FLOW" ) return ISMRMRD_2DTBINNING_FLOW;
+    if ( name == "ISMRMRD_L1SPIRIT_SLEP" ) return ISMRMRD_L1SPIRIT_SLEP;
+    if ( name == "ISMRMRD_L1SPIRIT_SLEP_MOTION_COMP" ) return ISMRMRD_L1SPIRIT_SLEP_MOTION_COMP;
 
     return ISMRMRD_NONE;
 }
@@ -3167,10 +3210,23 @@ ISMRMRDPFALGO gtPlusISMRMRDReconUtil<T>::getISMRMRDPartialFourierReconAlgoFromNa
 {
     if ( name == "ISMRMRD_PF_HOMODYNE" ) return ISMRMRD_PF_HOMODYNE;
     if ( name == "ISMRMRD_PF_FENGHUANG" ) return ISMRMRD_PF_FENGHUANG;
-    if ( name == "ISMRMRD_PF_ZEROFILLING_FILTER" ) return ISMRMRD_PF_ZEROFILLING_FILTER;
     if ( name == "ISMRMRD_PF_POCS" ) return ISMRMRD_PF_POCS;
+    if ( name == "ISMRMRD_PF_ZEROFILLING_FILTER" ) return ISMRMRD_PF_ZEROFILLING_FILTER;
+    if ( name == "ISMRMRD_PF_ZEROFILLING" ) return ISMRMRD_PF_ZEROFILLING;
+
+    return ISMRMRD_PF_NONE;
+}
+
+template <typename T> 
+std::string gtPlusISMRMRDReconUtil<T>::getNameFromISMRMRDPartialFourierReconAlgo(ISMRMRDPFALGO algo)
+{
+    if ( algo == ISMRMRD_PF_HOMODYNE ) return std::string("ISMRMRD_PF_HOMODYNE");
+    if ( algo == ISMRMRD_PF_FENGHUANG ) return std::string("ISMRMRD_PF_FENGHUANG");
+    if ( algo == ISMRMRD_PF_ZEROFILLING_FILTER ) return std::string("ISMRMRD_PF_ZEROFILLING_FILTER");
+    if ( algo == ISMRMRD_PF_POCS ) return std::string("ISMRMRD_PF_POCS");
+    if ( algo == ISMRMRD_PF_ZEROFILLING ) return std::string("ISMRMRD_PF_ZEROFILLING");
 
-    return ISMRMRD_PF_ZEROFILLING;
+    return std::string("ISMRMRD_PF_NONE");
 }
 
 template <typename T> 
@@ -3187,6 +3243,16 @@ getISMRMRDKSpaceFilterFromName(const std::string& name)
 }
 
 template <typename T> 
+ISMRMRDINTERPRETROGATING gtPlusISMRMRDReconUtil<T>::getISMRMRDRetroGatingInterpFromName(const std::string& name)
+{
+    if ( name == "ISMRMRD_INTERP_RETRO_GATING_LINEAR" ) return ISMRMRD_INTERP_RETRO_GATING_LINEAR;
+    if ( name == "ISMRMRD_INTERP_RETRO_GATING_CUBIC" ) return ISMRMRD_INTERP_RETRO_GATING_CUBIC;
+    if ( name == "ISMRMRD_INTERP_RETRO_GATING_BSPLINE" ) return ISMRMRD_INTERP_RETRO_GATING_BSPLINE;
+
+    return ISMRMRD_INTERP_RETRO_GATING_LINEAR;
+}
+
+template <typename T> 
 bool gtPlusISMRMRDReconUtil<T>::
 extractSubArrayForDim(const hoNDArray<T>& x, hoNDArray<T>& r, ISMRMRDDIM& dim, size_t value, bool lessEqual)
 {
@@ -3199,7 +3265,7 @@ extractSubArrayForDim(const hoNDArray<T>& x, hoNDArray<T>& r, ISMRMRDDIM& dim, s
 
         GADGET_CHECK_RETURN_FALSE(value<(*dimX)[dimInd]);
 
-        std::vector<size_t> crop_offset(10, 0);
+        std::vector<size_t> crop_offset(11, 0);
         crop_offset[0] = 0;
         crop_offset[1] = 0;
         crop_offset[2] = 0;
@@ -3210,8 +3276,9 @@ extractSubArrayForDim(const hoNDArray<T>& x, hoNDArray<T>& r, ISMRMRDDIM& dim, s
         crop_offset[7] = 0;
         crop_offset[8] = 0;
         crop_offset[9] = 0;
+        crop_offset[10] = 0;
 
-        std::vector<size_t> crop_size(10, 0);
+        std::vector<size_t> crop_size(11, 0);
         crop_size[0] = (*dimX)[0];
         crop_size[1] = (*dimX)[1];
         crop_size[2] = (*dimX)[2];
@@ -3222,6 +3289,7 @@ extractSubArrayForDim(const hoNDArray<T>& x, hoNDArray<T>& r, ISMRMRDDIM& dim, s
         crop_size[7] = (*dimX)[7];
         crop_size[8] = (*dimX)[8];
         crop_size[9] = (*dimX)[9];
+        crop_size[10] = (*dimX)[10];
 
         if ( lessEqual )
         {
@@ -3233,7 +3301,7 @@ extractSubArrayForDim(const hoNDArray<T>& x, hoNDArray<T>& r, ISMRMRDDIM& dim, s
             crop_size[dimInd] = 1;
         }
 
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::cropUpTo10DArray(x, r, crop_offset, crop_size));
+        GADGET_CHECK_RETURN_FALSE(Gadgetron::cropUpTo11DArray(x, r, crop_offset, crop_size));
     }
     catch(...)
     {
@@ -3259,7 +3327,7 @@ extractSubArrayForDim(const hoNDArray<T>& x, hoNDArray<T>& r, ISMRMRDDIM& dim1,
         GADGET_CHECK_RETURN_FALSE(value1<(*dimX)[dimInd1]);
         GADGET_CHECK_RETURN_FALSE(value2<(*dimX)[dimInd2]);
 
-        std::vector<size_t> crop_offset(10, 0);
+        std::vector<size_t> crop_offset(11, 0);
         crop_offset[0] = 0;
         crop_offset[1] = 0;
         crop_offset[2] = 0;
@@ -3270,8 +3338,9 @@ extractSubArrayForDim(const hoNDArray<T>& x, hoNDArray<T>& r, ISMRMRDDIM& dim1,
         crop_offset[7] = 0;
         crop_offset[8] = 0;
         crop_offset[9] = 0;
+        crop_offset[10] = 0;
 
-        std::vector<size_t> crop_size(10, 0);
+        std::vector<size_t> crop_size(11, 0);
         crop_size[0] = (*dimX)[0];
         crop_size[1] = (*dimX)[1];
         crop_size[2] = (*dimX)[2];
@@ -3282,6 +3351,7 @@ extractSubArrayForDim(const hoNDArray<T>& x, hoNDArray<T>& r, ISMRMRDDIM& dim1,
         crop_size[7] = (*dimX)[7];
         crop_size[8] = (*dimX)[8];
         crop_size[9] = (*dimX)[9];
+        crop_size[10] = (*dimX)[10];
 
         if ( lessEqual )
         {
@@ -3297,7 +3367,7 @@ extractSubArrayForDim(const hoNDArray<T>& x, hoNDArray<T>& r, ISMRMRDDIM& dim1,
             crop_size[dimInd2] = 1;
         }
 
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::cropUpTo10DArray(x, r, crop_offset, crop_size));
+        GADGET_CHECK_RETURN_FALSE(Gadgetron::cropUpTo11DArray(x, r, crop_offset, crop_size));
     }
     catch(...)
     {
@@ -3323,7 +3393,7 @@ extractSubArrayForDim1LessEqualDim2Equal(const hoNDArray<T>& x, hoNDArray<T>& r,
         GADGET_CHECK_RETURN_FALSE(value1<(*dimX)[dimInd1]);
         GADGET_CHECK_RETURN_FALSE(value2<(*dimX)[dimInd2]);
 
-        std::vector<size_t> crop_offset(10, 0);
+        std::vector<size_t> crop_offset(11, 0);
         crop_offset[0] = 0;
         crop_offset[1] = 0;
         crop_offset[2] = 0;
@@ -3334,8 +3404,9 @@ extractSubArrayForDim1LessEqualDim2Equal(const hoNDArray<T>& x, hoNDArray<T>& r,
         crop_offset[7] = 0;
         crop_offset[8] = 0;
         crop_offset[9] = 0;
+        crop_offset[10] = 0;
 
-        std::vector<size_t> crop_size(10, 0);
+        std::vector<size_t> crop_size(11, 0);
         crop_size[0] = (*dimX)[0];
         crop_size[1] = (*dimX)[1];
         crop_size[2] = (*dimX)[2];
@@ -3346,13 +3417,14 @@ extractSubArrayForDim1LessEqualDim2Equal(const hoNDArray<T>& x, hoNDArray<T>& r,
         crop_size[7] = (*dimX)[7];
         crop_size[8] = (*dimX)[8];
         crop_size[9] = (*dimX)[9];
+        crop_size[10] = (*dimX)[9];
 
         crop_size[dimInd1] = value1+1;
 
         crop_offset[dimInd2] = value2;
         crop_size[dimInd2] = 1;
 
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::cropUpTo10DArray(x, r, crop_offset, crop_size));
+        GADGET_CHECK_RETURN_FALSE(Gadgetron::cropUpTo11DArray(x, r, crop_offset, crop_size));
     }
     catch(...)
     {
@@ -3371,7 +3443,7 @@ extractSubArrayForMaxEncodingCounters(const hoNDArray<T>& x, hoNDArray<T>& r, co
     {
         boost::shared_ptr< std::vector<size_t> > dimX = x.get_dimensions();
 
-        std::vector<size_t> crop_offset(10, 0);
+        std::vector<size_t> crop_offset(11, 0);
         crop_offset[0] = 0;
         crop_offset[1] = 0;
         crop_offset[2] = 0;
@@ -3382,9 +3454,10 @@ extractSubArrayForMaxEncodingCounters(const hoNDArray<T>& x, hoNDArray<T>& r, co
         crop_offset[7] = 0;
         crop_offset[8] = 0;
         crop_offset[9] = 0;
+        crop_offset[10] = 0;
 
-        // [RO E1 Cha Slice E2 Contrast Phase Rep Set Seg]
-        std::vector<size_t> crop_size(10, 0);
+        // [RO E1 Cha Slice E2 Contrast Phase Rep Set Seg Ave]
+        std::vector<size_t> crop_size(11, 0);
         crop_size[0] = (*dimX)[0];
         crop_size[1] = (*dimX)[1]; if ( maxIdx.kspace_encode_step_1 < crop_size[1]-1 ) crop_size[1] = maxIdx.kspace_encode_step_1+1;
         crop_size[2] = (*dimX)[2]; 
@@ -3395,8 +3468,9 @@ extractSubArrayForMaxEncodingCounters(const hoNDArray<T>& x, hoNDArray<T>& r, co
         crop_size[7] = (*dimX)[7]; if ( maxIdx.repetition           < crop_size[7]-1 ) crop_size[7] = maxIdx.repetition+1;
         crop_size[8] = (*dimX)[8]; if ( maxIdx.set                  < crop_size[8]-1 ) crop_size[8] = maxIdx.set+1;
         crop_size[9] = (*dimX)[9]; if ( maxIdx.segment              < crop_size[9]-1 ) crop_size[9] = maxIdx.segment+1;
+        crop_size[10] = (*dimX)[10]; if ( maxIdx.average            < crop_size[10]-1 ) crop_size[10] = maxIdx.average+1;
 
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::cropUpTo10DArray(x, r, crop_offset, crop_size));
+        GADGET_CHECK_RETURN_FALSE(Gadgetron::cropUpTo11DArray(x, r, crop_offset, crop_size));
     }
     catch(...)
     {
@@ -3418,13 +3492,13 @@ bool gtPlusISMRMRDReconUtil<T>::hasIdenticalGeometryISMRMRD(const ISMRMRD::Acqui
 {
     long long ii;
 
-    for ( ii=0; ii<ISMRMRD_POSITION_LENGTH; ii++ )
+    for ( ii=0; ii<ISMRMRD::ISMRMRD_POSITION_LENGTH; ii++ )
     {
         if ( std::abs(acqHeader1.position[ii]-acqHeader2.position[ii]) > GT_IMAGING_GEOMETRY_DELTA ) return false;
         if ( std::abs(acqHeader1.patient_table_position[ii]-acqHeader2.patient_table_position[ii]) > GT_IMAGING_GEOMETRY_DELTA ) return false;
     }
 
-    for ( ii=0; ii<ISMRMRD_DIRECTION_LENGTH; ii++ )
+    for ( ii=0; ii<ISMRMRD::ISMRMRD_DIRECTION_LENGTH; ii++ )
     {
         if ( std::abs(acqHeader1.read_dir[ii]-acqHeader2.read_dir[ii]) > GT_IMAGING_GEOMETRY_DELTA ) return false;
         if ( std::abs(acqHeader1.phase_dir[ii]-acqHeader2.phase_dir[ii]) > GT_IMAGING_GEOMETRY_DELTA ) return false;
@@ -3510,114 +3584,119 @@ void gtPlusISMRMRDReconUtil<T>::findStartEndROAfterZeroFilling(size_t centre_col
     return;
 }
 
-#ifdef USE_CUDA
-
 template <typename T> 
-bool gtPlusISMRMRDReconUtil<T>::
-cudaJobSplitter(const std::vector<unsigned int>& jobIDs, size_t jobSize, size_t minimalMemoryForValidDevice, 
-                std::vector< std::pair<unsigned int, std::vector<std::vector<unsigned int> > > >& jobSchedule)
+bool gtPlusISMRMRDReconUtil<T>::setMetaAttributesFromImageHeaderISMRMRD(const ISMRMRD::ImageHeader& imgHeader, ISMRMRD::MetaContainer& attrib)
 {
     try
     {
-        unsigned int numOfJobs = jobIDs.size();
-        if ( numOfJobs == 0 )
+        unsigned int ii;
+
+        attrib.set(ISMRMRD_IMAGE_version,                 (long)imgHeader.version);
+        attrib.set(ISMRMRD_IMAGE_flags,                   (long)imgHeader.flags);
+        attrib.set(ISMRMRD_IMAGE_measurement_uid,         (long)imgHeader.measurement_uid);
+
+        // ----------------------------------------
+
+        attrib.set(ISMRMRD_IMAGE_matrix_size, (long)imgHeader.matrix_size[0]);
+        attrib.append(ISMRMRD_IMAGE_matrix_size, (long)imgHeader.matrix_size[1]);
+        attrib.append(ISMRMRD_IMAGE_matrix_size, (long)imgHeader.matrix_size[2]);
+
+        // ----------------------------------------
+
+        attrib.set(ISMRMRD_IMAGE_field_of_view, (double)imgHeader.field_of_view[0]);
+        attrib.append(ISMRMRD_IMAGE_field_of_view, (double)imgHeader.field_of_view[1]);
+        attrib.append(ISMRMRD_IMAGE_field_of_view, (double)imgHeader.field_of_view[2]);
+
+        // ----------------------------------------
+
+        attrib.set(ISMRMRD_IMAGE_channels, (long)imgHeader.channels);
+
+        // ----------------------------------------
+
+        attrib.set(ISMRMRD_IMAGE_position, (double)imgHeader.position[0]);
+        for ( ii=1; ii<ISMRMRD::ISMRMRD_POSITION_LENGTH; ii++ )
         {
-            GADGET_WARN_MSG("numOfJobs == 0");
-            return true;
+            attrib.append(ISMRMRD_IMAGE_position, (double)imgHeader.position[ii]);
         }
 
-        // find valid device
-        int numOfDevices(0);
-        GADGET_CHECK_RETURN_FALSE(cudaGetDeviceCount( &numOfDevices )==cudaSuccess);
+        // ----------------------------------------
 
-        if ( numOfDevices == 0 )
+        attrib.set(ISMRMRD_IMAGE_read_dir, (double)imgHeader.read_dir[0]);
+        for ( ii=1; ii<ISMRMRD::ISMRMRD_DIRECTION_LENGTH; ii++ )
         {
-            GADGET_WARN_MSG("numOfDevices == 0");
-            return true;
+            attrib.append(ISMRMRD_IMAGE_read_dir, (double)imgHeader.read_dir[ii]);
         }
 
-        std::vector<unsigned int> validDevices;
-        int d;
-        for ( d=0; d<numOfDevices; d++ )
+        // ----------------------------------------
+
+        attrib.set(ISMRMRD_IMAGE_phase_dir, (double)imgHeader.phase_dir[0]);
+        for ( ii=1; ii<ISMRMRD::ISMRMRD_DIRECTION_LENGTH; ii++ )
         {
-            size_t totalMem = cudaDeviceManager::Instance()->total_global_mem(d);
-            if ( totalMem >= minimalMemoryForValidDevice )
-            {
-                validDevices.push_back(d);
-            }
+            attrib.append(ISMRMRD_IMAGE_phase_dir, (double)imgHeader.phase_dir[ii]);
         }
 
-        if ( validDevices.empty() )
+        // ----------------------------------------
+
+        attrib.set(ISMRMRD_IMAGE_slice_dir, (double)imgHeader.slice_dir[0]);
+        for ( ii=1; ii<ISMRMRD::ISMRMRD_DIRECTION_LENGTH; ii++ )
         {
-            GADGET_ERROR_MSG("No valid device can be found : " << minimalMemoryForValidDevice);
-            return false;
+            attrib.append(ISMRMRD_IMAGE_slice_dir, (double)imgHeader.slice_dir[ii]);
         }
 
-        std::vector<unsigned int> maxJobN(validDevices.size());
-        for ( d=0; d<validDevices.size(); d++ )
+        // ----------------------------------------
+
+        attrib.set(ISMRMRD_IMAGE_patient_table_position, (double)imgHeader.patient_table_position[0]);
+        for ( ii=1; ii<ISMRMRD::ISMRMRD_POSITION_LENGTH; ii++ )
         {
-            size_t totalMem = cudaDeviceManager::Instance()->total_global_mem(validDevices[d]);
-            maxJobN[d] = totalMem/jobSize;
+            attrib.append(ISMRMRD_IMAGE_patient_table_position, (double)imgHeader.patient_table_position[ii]);
         }
 
-        jobSchedule.clear();
+        // ----------------------------------------
 
-        size_t job = 0;
-        unsigned int validDevice = 0;
-        while ( job < numOfJobs )
-        {
-            size_t start = job;
-            size_t end = job + maxJobN[validDevice] - 1;
+        attrib.set(ISMRMRD_IMAGE_average,       (long)imgHeader.average);
+        attrib.set(ISMRMRD_IMAGE_slice,         (long)imgHeader.slice);
+        attrib.set(ISMRMRD_IMAGE_contrast,      (long)imgHeader.contrast);
+        attrib.set(ISMRMRD_IMAGE_phase,         (long)imgHeader.phase);
+        attrib.set(ISMRMRD_IMAGE_repetition,    (long)imgHeader.repetition);
+        attrib.set(ISMRMRD_IMAGE_set,           (long)imgHeader.set);
 
-            if ( end >= numOfJobs ) end = numOfJobs - 1;
+        // ----------------------------------------
 
-            unsigned int deviceID = validDevices[validDevice];
+        attrib.set(ISMRMRD_IMAGE_acquisition_time_stamp, (long)imgHeader.acquisition_time_stamp);
 
-            unsigned int loc;
-            for ( loc=0; loc<jobSchedule.size(); loc++ )
-            {
-                if ( jobSchedule[loc].first == deviceID ) break;
-            }
+        // ----------------------------------------
 
-            if ( loc < jobSchedule.size() )
-            {
-                // insert a new job package
-                std::vector<unsigned int> jobPackage;
-                for ( unsigned int jj=start; jj<=end; jj++ )
-                {
-                    jobPackage.push_back(jobIDs[jj]);
-                }
+        attrib.set(ISMRMRD_IMAGE_physiology_time_stamp, (long)imgHeader.physiology_time_stamp[0]);
+        for ( ii=1; ii<ISMRMRD::ISMRMRD_PHYS_STAMPS; ii++ )
+        {
+            attrib.append(ISMRMRD_IMAGE_physiology_time_stamp, (long)imgHeader.physiology_time_stamp[ii]);
+        }
 
-                jobSchedule[loc].second.push_back(jobPackage);
-            }
-            else
-            {
-                // create a new entry
-                std::pair<unsigned int, std::vector<std::vector<unsigned int> > > jobItem;
-                jobItem.first = deviceID;
+        // ----------------------------------------
 
-                std::vector<unsigned int> jobPackage;
-                for ( unsigned int jj=start; jj<=end; jj++ )
-                {
-                    jobPackage.push_back(jobIDs[jj]);
-                }
-                jobItem.second.push_back(jobPackage);
+        attrib.set(ISMRMRD_IMAGE_image_data_type,       (long)imgHeader.data_type);
+        attrib.set(ISMRMRD_IMAGE_image_type,            (long)imgHeader.image_type);
+        attrib.set(ISMRMRD_IMAGE_image_series_index,    (long)imgHeader.image_series_index);
 
-                jobSchedule.push_back(jobItem);
-            }
+        // ----------------------------------------
+
+        attrib.set(ISMRMRD_IMAGE_user_int, (long)imgHeader.user_int[0]);
+        for ( ii=1; ii<ISMRMRD::ISMRMRD_USER_INTS; ii++ )
+        {
+            attrib.append(ISMRMRD_IMAGE_user_int, (long)imgHeader.user_int[ii]);
+        }
 
-            job = end+1;
-            validDevice++;
+        // ----------------------------------------
 
-            if ( validDevice >= validDevices.size() )
-            {
-                validDevice = 0;
-            }
+        attrib.set(ISMRMRD_IMAGE_user_float, (double)imgHeader.user_float[0]);
+        for ( ii=1; ii<ISMRMRD::ISMRMRD_USER_FLOATS; ii++ )
+        {
+            attrib.append(ISMRMRD_IMAGE_user_float, (double)imgHeader.user_float[ii]);
         }
     }
     catch(...)
     {
-        GADGET_ERROR_MSG("Errors in gtPlusISMRMRDReconUtil<T>::cudaJobSplitter(...) ... ");
+        GADGET_ERROR_MSG("Errors in gtPlusISMRMRDReconUtil<T>::setMetaAttributesFromImageHeaderISMRMRD(const ISMRMRD::ImageHeader& imgHeader, ISMRMRD::MetaContainer& attrib) ... ");
         return false;
     }
 
@@ -3625,23 +3704,277 @@ cudaJobSplitter(const std::vector<unsigned int>& jobIDs, size_t jobSize, size_t
 }
 
 template <typename T> 
-bool gtPlusISMRMRDReconUtil<T>::
-cudaJobSplitter(unsigned int numOfJobs, size_t jobSize, size_t minimalMemoryForValidDevice, 
-            std::vector< std::pair<unsigned int, std::vector<std::vector<unsigned int> > > >& jobSchedule)
+bool gtPlusISMRMRDReconUtil<T>::setImageHeaderISMRMRDFromMetaAttributes(const ISMRMRD::MetaContainer& attrib, ISMRMRD::ImageHeader& imgHeader)
 {
-    if ( numOfJobs == 0 )
+    try
     {
-        GADGET_WARN_MSG("numOfJobs == 0");
-        return true;
+        unsigned int ii;
+
+        imgHeader.version = (uint16_t)attrib.as_long(ISMRMRD_IMAGE_version, 0);
+        imgHeader.flags = (uint64_t)attrib.as_long(ISMRMRD_IMAGE_flags, 0);
+        imgHeader.measurement_uid = (uint32_t)attrib.as_long(ISMRMRD_IMAGE_measurement_uid, 0);
+
+        // ----------------------------------------
+
+        imgHeader.matrix_size[0] = (uint16_t)attrib.as_long(ISMRMRD_IMAGE_matrix_size, 0);
+        imgHeader.matrix_size[1] = (uint16_t)attrib.as_long(ISMRMRD_IMAGE_matrix_size, 1);
+        imgHeader.matrix_size[2] = (uint16_t)attrib.as_long(ISMRMRD_IMAGE_matrix_size, 2);
+
+        // ----------------------------------------
+
+        imgHeader.field_of_view[0] = (float)attrib.as_double(ISMRMRD_IMAGE_field_of_view, 0);
+        imgHeader.field_of_view[1] = (float)attrib.as_double(ISMRMRD_IMAGE_field_of_view, 1);
+        imgHeader.field_of_view[2] = (float)attrib.as_double(ISMRMRD_IMAGE_field_of_view, 2);
+
+        // ----------------------------------------
+
+        imgHeader.channels = (uint16_t)attrib.as_long(ISMRMRD_IMAGE_channels, 0);;
+
+        // ----------------------------------------
+
+        for ( ii=0; ii<ISMRMRD::ISMRMRD_POSITION_LENGTH; ii++ )
+        {
+            imgHeader.position[ii] = (float)attrib.as_double(ISMRMRD_IMAGE_position, ii);
+        }
+
+        // ----------------------------------------
+
+        for ( ii=0; ii<ISMRMRD::ISMRMRD_DIRECTION_LENGTH; ii++ )
+        {
+            imgHeader.read_dir[ii] = (float)attrib.as_double(ISMRMRD_IMAGE_read_dir, ii);
+        }
+
+        // ----------------------------------------
+
+        for ( ii=0; ii<ISMRMRD::ISMRMRD_DIRECTION_LENGTH; ii++ )
+        {
+            imgHeader.phase_dir[ii] = (float)attrib.as_double(ISMRMRD_IMAGE_phase_dir, ii);
+        }
+
+        // ----------------------------------------
+
+        for ( ii=0; ii<ISMRMRD::ISMRMRD_DIRECTION_LENGTH; ii++ )
+        {
+            imgHeader.slice_dir[ii] = (float)attrib.as_double(ISMRMRD_IMAGE_slice_dir, ii);
+        }
+
+        // ----------------------------------------
+
+        for ( ii=0; ii<ISMRMRD::ISMRMRD_POSITION_LENGTH; ii++ )
+        {
+            imgHeader.patient_table_position[ii] = (float)attrib.as_double(ISMRMRD_IMAGE_patient_table_position, ii);
+        }
+
+        // ----------------------------------------
+
+        imgHeader.average = (uint16_t)attrib.as_long(ISMRMRD_IMAGE_average, 0);
+        imgHeader.slice = (uint16_t)attrib.as_long(ISMRMRD_IMAGE_slice, 0);
+        imgHeader.contrast = (uint16_t)attrib.as_long(ISMRMRD_IMAGE_contrast, 0);
+        imgHeader.phase = (uint16_t)attrib.as_long(ISMRMRD_IMAGE_phase, 0);
+        imgHeader.repetition = (uint16_t)attrib.as_long(ISMRMRD_IMAGE_repetition, 0);
+        imgHeader.set = (uint16_t)attrib.as_long(ISMRMRD_IMAGE_set, 0);
+
+        // ----------------------------------------
+
+        imgHeader.acquisition_time_stamp = (uint32_t)attrib.as_long(ISMRMRD_IMAGE_acquisition_time_stamp, 0);
+
+        // ----------------------------------------
+
+        for ( ii=0; ii<ISMRMRD::ISMRMRD_PHYS_STAMPS; ii++ )
+        {
+            imgHeader.physiology_time_stamp[ii] = (uint32_t)attrib.as_long(ISMRMRD_IMAGE_physiology_time_stamp, ii);
+        }
+
+        // ----------------------------------------
+
+        imgHeader.data_type = (uint16_t)attrib.as_long(ISMRMRD_IMAGE_image_data_type, 0);
+        imgHeader.image_type = (uint16_t)attrib.as_long(ISMRMRD_IMAGE_image_type, 0);
+        imgHeader.image_series_index = (uint16_t)attrib.as_long(ISMRMRD_IMAGE_image_series_index, 0);
+
+        // ----------------------------------------
+
+        for ( ii=0; ii<ISMRMRD::ISMRMRD_USER_INTS; ii++ )
+        {
+            imgHeader.user_int[ii] = (int32_t)attrib.as_long(ISMRMRD_IMAGE_user_int, ii);
+        }
+
+        // ----------------------------------------
+
+        for ( ii=0; ii<ISMRMRD::ISMRMRD_USER_FLOATS; ii++ )
+        {
+            imgHeader.user_float[ii] = (float)attrib.as_double(ISMRMRD_IMAGE_user_float, ii);
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Errors in gtPlusISMRMRDReconUtil<T>::setImageHeaderISMRMRDFromMetaAttributes(const ISMRMRD::MetaContainer& attrib, ISMRMRD::ImageHeader& imgHeader) ... ");
+        return false;
     }
 
-    std::vector<unsigned int> jobIDs(numOfJobs, 0);
-    unsigned int ii;
-    for ( ii=0; ii<numOfJobs; ii++ ) jobIDs[ii] = ii;
-    return cudaJobSplitter(jobIDs, jobSize, minimalMemoryForValidDevice, jobSchedule);
+    return true;
+}
+
+//#ifdef USE_CUDA
+//
+//template <typename T> 
+//bool gtPlusISMRMRDReconUtil<T>::
+//cudaJobSplitter(const std::vector<unsigned int>& jobIDs, size_t jobSize, size_t minimalMemoryForValidDevice, 
+//                std::vector< std::pair<unsigned int, std::vector<std::vector<unsigned int> > > >& jobSchedule)
+//{
+//    try
+//    {
+//        unsigned int numOfJobs = jobIDs.size();
+//        if ( numOfJobs == 0 )
+//        {
+//            GADGET_WARN_MSG("numOfJobs == 0");
+//            return true;
+//        }
+//
+//        // find valid device
+//        int numOfDevices(0);
+//        GADGET_CHECK_RETURN_FALSE(cudaGetDeviceCount( &numOfDevices )==cudaSuccess);
+//
+//        if ( numOfDevices == 0 )
+//        {
+//            GADGET_WARN_MSG("numOfDevices == 0");
+//            return true;
+//        }
+//
+//        std::vector<unsigned int> validDevices;
+//        int d;
+//        for ( d=0; d<numOfDevices; d++ )
+//        {
+//            size_t totalMem = cudaDeviceManager::Instance()->total_global_mem(d);
+//            if ( totalMem >= minimalMemoryForValidDevice )
+//            {
+//                validDevices.push_back(d);
+//            }
+//        }
+//
+//        if ( validDevices.empty() )
+//        {
+//            GADGET_ERROR_MSG("No valid device can be found : " << minimalMemoryForValidDevice);
+//            return false;
+//        }
+//
+//        std::vector<unsigned int> maxJobN(validDevices.size());
+//        for ( d=0; d<validDevices.size(); d++ )
+//        {
+//            size_t totalMem = cudaDeviceManager::Instance()->total_global_mem(validDevices[d]);
+//            maxJobN[d] = totalMem/jobSize;
+//        }
+//
+//        jobSchedule.clear();
+//
+//        size_t job = 0;
+//        unsigned int validDevice = 0;
+//        while ( job < numOfJobs )
+//        {
+//            size_t start = job;
+//            size_t end = job + maxJobN[validDevice] - 1;
+//
+//            if ( end >= numOfJobs ) end = numOfJobs - 1;
+//
+//            unsigned int deviceID = validDevices[validDevice];
+//
+//            unsigned int loc;
+//            for ( loc=0; loc<jobSchedule.size(); loc++ )
+//            {
+//                if ( jobSchedule[loc].first == deviceID ) break;
+//            }
+//
+//            if ( loc < jobSchedule.size() )
+//            {
+//                // insert a new job package
+//                std::vector<unsigned int> jobPackage;
+//                for ( unsigned int jj=start; jj<=end; jj++ )
+//                {
+//                    jobPackage.push_back(jobIDs[jj]);
+//                }
+//
+//                jobSchedule[loc].second.push_back(jobPackage);
+//            }
+//            else
+//            {
+//                // create a new entry
+//                std::pair<unsigned int, std::vector<std::vector<unsigned int> > > jobItem;
+//                jobItem.first = deviceID;
+//
+//                std::vector<unsigned int> jobPackage;
+//                for ( unsigned int jj=start; jj<=end; jj++ )
+//                {
+//                    jobPackage.push_back(jobIDs[jj]);
+//                }
+//                jobItem.second.push_back(jobPackage);
+//
+//                jobSchedule.push_back(jobItem);
+//            }
+//
+//            job = end+1;
+//            validDevice++;
+//
+//            if ( validDevice >= validDevices.size() )
+//            {
+//                validDevice = 0;
+//            }
+//        }
+//    }
+//    catch(...)
+//    {
+//        GADGET_ERROR_MSG("Errors in gtPlusISMRMRDReconUtil<T>::cudaJobSplitter(...) ... ");
+//        return false;
+//    }
+//
+//    return true;
+//}
+//
+//template <typename T> 
+//bool gtPlusISMRMRDReconUtil<T>::
+//cudaJobSplitter(unsigned int numOfJobs, size_t jobSize, size_t minimalMemoryForValidDevice, 
+//            std::vector< std::pair<unsigned int, std::vector<std::vector<unsigned int> > > >& jobSchedule)
+//{
+//    if ( numOfJobs == 0 )
+//    {
+//        GADGET_WARN_MSG("numOfJobs == 0");
+//        return true;
+//    }
+//
+//    std::vector<unsigned int> jobIDs(numOfJobs, 0);
+//    unsigned int ii;
+//    for ( ii=0; ii<numOfJobs; ii++ ) jobIDs[ii] = ii;
+//    return cudaJobSplitter(jobIDs, jobSize, minimalMemoryForValidDevice, jobSchedule);
+//}
+//
+//#endif // USE_CUDA
+
+template <typename T> 
+void gtPlusISMRMRDReconUtil<T>::
+compareAgainstGroundTruthArray(const std::string& gt_filename, const hoNDArray<T>& x, typename realType<T>::Type& normDiff, typename realType<T>::Type& maxNormDiff)
+{
+    hoNDArray<T> gt;
+
+    gtPlusIOAnalyze gt_io;
+    gt_io.importArray(gt, gt_filename);
+
+    compareAgainstGroundTruthArray(gt, x, normDiff, maxNormDiff);
 }
 
-#endif // USE_CUDA
+template <typename T> 
+void gtPlusISMRMRDReconUtil<T>::
+compareAgainstGroundTruthArray(const hoNDArray<T>& gt, const hoNDArray<T>& x, typename realType<T>::Type& normDiff, typename realType<T>::Type& maxNormDiff)
+{
+    hoNDArray<T> diff(x);
+    Gadgetron::subtract(gt, x, diff);
+
+    typename realType<T>::Type v;
+    Gadgetron::norm2(diff, v);
+    normDiff = v;
+
+    T maxV;
+    size_t ind;
+    Gadgetron::maxAbsolute(diff, maxV, ind);
+    maxNormDiff = std::abs(maxV);
+}
 
 // ========================================================================================== //
 
@@ -3674,6 +4007,7 @@ computeNoisePrewhiteningMatrix(const hoNDArray<T>& noise, double noiseBandWidth,
         size_t CHA = noise.get_size(2);
 
         GADGET_CHECK_RETURN_FALSE(prewhiteningMatrix.createMatrix(CHA, CHA));
+        Gadgetron::clear(prewhiteningMatrix);
 
         typedef typename realType<T>::Type ValueType;
 
@@ -3688,18 +4022,18 @@ computeNoisePrewhiteningMatrix(const hoNDArray<T>& noise, double noiseBandWidth,
         hoMatrix<T> R(RO*E1, CHA, const_cast<T*>(noise.begin()));
 
         // R'*R --> CHA by CHA covariance matrix
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::GeneralMatrixProduct_gemm(prewhiteningMatrix, R, true, R, false));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::scal(scaling, prewhiteningMatrix));
+        Gadgetron::gemm(prewhiteningMatrix, R, true, R, false);
+        Gadgetron::scal(scaling, prewhiteningMatrix);
 
         // 0.5*(R+R')
         hoMatrix<T> RH(prewhiteningMatrix);
-        GADGET_CHECK_RETURN_FALSE(conjugatetrans(prewhiteningMatrix, RH));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::add(prewhiteningMatrix, RH, prewhiteningMatrix));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::scal(0.5, prewhiteningMatrix));
+        conjugatetrans(prewhiteningMatrix, RH);
+        Gadgetron::add(prewhiteningMatrix, RH, prewhiteningMatrix);
+        Gadgetron::scal( (ValueType)0.5, prewhiteningMatrix);
 
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::CholeskyHermitianPositiveDefinite_potrf(prewhiteningMatrix, 'L'));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::TriangularInverse_trtri(prewhiteningMatrix, 'L'));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::scal(std::sqrt((double)2.0), prewhiteningMatrix));
+        Gadgetron::potrf(prewhiteningMatrix, 'U');
+        Gadgetron::trtri(prewhiteningMatrix, 'U');
+        Gadgetron::scal( (value_type)(std::sqrt((double)2.0)), prewhiteningMatrix);
     }
     catch(...)
     {
@@ -3733,12 +4067,13 @@ performNoisePrewhitening(hoNDArray<T>& data, const hoMatrix<T>& prewhiteningMatr
         #endif // GCC_OLD_FLAG
         {
             hoMatrix<T> tmp(RO*E1, CHA);
+            Gadgetron::clear(tmp);
 
             #pragma omp for
             for ( n=0; n<(long long)N; n++ )
             {
                 hoMatrix<T> D(RO*E1, CHA, data.begin()+n*RO*E1*CHA);
-                Gadgetron::GeneralMatrixProduct_gemm(tmp, D, false, prewhiteningMatrix, false);
+                Gadgetron::gemm(tmp, D, false, prewhiteningMatrix, false);
                 memcpy(data.begin()+n*RO*E1*CHA, tmp.begin(), sizeof(T)*RO*E1*CHA);
             }
         }
@@ -3820,7 +4155,7 @@ zpadResize2DOnKSpace(const hoNDArray<T>& kspace, size_t sizeX, size_t sizeY, hoN
         GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->ifft2c(dataResized));
 
         typename realType<T>::Type scaling = (typename realType<T>::Type)(std::sqrt((double)sizeX*sizeY)/std::sqrt((double)RO*E1));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::scal(scaling, dataResized));
+        Gadgetron::scal(scaling, dataResized);
     }
     catch(...)
     {
@@ -3903,7 +4238,7 @@ zpadResize3DOnKSpace(const hoNDArray<T>& kspace, size_t sizeX, size_t sizeY, siz
         GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->ifft3c(dataResized));
 
         typename realType<T>::Type scaling = (typename realType<T>::Type)(std::sqrt((double)sizeX*sizeY*sizeZ)/std::sqrt((double)RO*E1*E2));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::scal(scaling, dataResized));
+        Gadgetron::scal(scaling, dataResized);
     }
     catch(...)
     {
@@ -3949,7 +4284,7 @@ zpadResize2DFilter(const hoNDArray<T>& data, size_t sizeX, size_t sizeY, const h
         GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->ifft2c(dataResized));
 
         typename realType<T>::Type scaling = (typename realType<T>::Type)(std::sqrt((double)sizeX*sizeY)/std::sqrt((double)RO*E1));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::scal(scaling, dataResized));
+        Gadgetron::scal(scaling, dataResized);
     }
     catch(...)
     {
@@ -3998,7 +4333,7 @@ zpadResize3DFilter(const hoNDArray<T>& data, size_t sizeX, size_t sizeY, size_t
         GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->ifft3c(dataResized));
 
         typename realType<T>::Type scaling = (typename realType<T>::Type)(std::sqrt((double)sizeX*sizeY*sizeZ)/std::sqrt((double)RO*E1*E2));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::scal(scaling, dataResized));
+        Gadgetron::scal(scaling, dataResized);
     }
     catch(...)
     {
@@ -4169,20 +4504,32 @@ coilMap2DNIHInner(const hoNDArray<T>& data, hoNDArray<T>& coilMap, size_t ks, si
 
         int e1;
 
-        #pragma omp parallel default(none) private(e1) shared(ks, RO, E1, CHA, pSen, pData, halfKs, power, kss)
+        // #pragma omp parallel default(none) private(e1) shared(ks, RO, E1, CHA, pSen, pData, halfKs, power, kss)
+        #pragma omp parallel private(e1) shared(ks, RO, E1, CHA, pSen, pData, halfKs, power, kss)
         {
-            hoMatrix<T> D(ks*ks, CHA);
+            hoNDArray<T> D(ks*ks, CHA);
             T* pD = D.begin();
 
-            hoMatrix<T> DH_D(CHA, CHA);
+            hoNDArray<T> DC(ks*ks, CHA);
+            T* pDC = DC.begin();
 
-            hoMatrix<T> U1(ks*ks, 1);
+            hoNDArray<T> DH_D(CHA, CHA);
+            Gadgetron::clear(DH_D);
+
+            hoNDArray<T> U1(ks*ks, 1);
             T* pU1 = U1.begin();
 
-            hoMatrix<T> V1(CHA, 1);
+            hoNDArray<T> V1(CHA, 1);
             T* pV1 = V1.begin();
 
-            hoMatrix<T> V(CHA, 1);
+            hoNDArray<T> V(CHA, 1);
+
+            Gadgetron::clear(D);
+            Gadgetron::clear(DC);
+            Gadgetron::clear(DH_D);
+            Gadgetron::clear(U1);
+            Gadgetron::clear(V1);
+            Gadgetron::clear(V);
 
             T phaseU1;
 
@@ -4208,9 +4555,9 @@ coilMap2DNIHInner(const hoNDArray<T>& data, hoNDArray<T>& coilMap, size_t ks, si
                                 de1 = e1 + ke1;
                                 for ( kro=-halfKs; kro<=halfKs; kro++ )
                                 {
-                                    D(ind++, cha) = pDataCurr[de1*RO+ro+kro];
-                                    //pD[ind+cha*kss] = pDataCurr[de1*RO+ro+kro];
-                                    //ind++;
+                                    // D(ind++, cha) = pDataCurr[de1*RO+ro+kro];
+                                    pD[ind+cha*kss] = pDataCurr[de1*RO+ro+kro];
+                                    ind++;
                                 }
                             }
                         }
@@ -4233,43 +4580,104 @@ coilMap2DNIHInner(const hoNDArray<T>& data, hoNDArray<T>& coilMap, size_t ks, si
                                     if ( dro < 0 ) dro += RO;
                                     if ( dro >= RO ) dro -= RO;
 
-                                    D(ind++, cha) = pDataCurr[de1*RO+dro];
-                                    //pD[ind+cha*kss] = pDataCurr[de1*RO+ro+kro];
-                                    //ind++;
+                                    // D(ind++, cha) = pDataCurr[de1*RO+dro];
+                                    pD[ind+cha*kss] = pDataCurr[de1*RO+dro];
+                                    ind++;
                                 }
                             }
                         }
                     }
 
                     // compute V1
-                    D.sumOverCol(V1);
-                    norm2(V1, v1Norm);
-                    scal(1.0/v1Norm, V1);
+                    // D.sumOverCol(V1);
+                    T* pTmp;
+                    for ( cha=0; cha<CHA; cha++ )
+                    {
+                        pTmp = pD + cha*kss;
+                        pV1[cha] = pTmp[0];
+                        for ( po=1; po<kss; po++ )
+                        {
+                            pV1[cha] += pTmp[po];
+                        }
+                    }
 
-                    GeneralMatrixProduct_gemm(DH_D, D, true, D, false);
+                    // norm2(V1, v1Norm);
+                    // Gadgetron::math::norm2(CHA, V1.begin(), v1Norm);
+                    value_type sum(0);
+                    for ( cha=0; cha<CHA; cha++ )
+                    {
+                        const T& c = pV1[cha];
+                        const value_type re = c.real();
+                        const value_type im = c.imag();
+                        sum += ( (re*re) + (im * im) );
+                    }
+                    v1Norm = std::sqrt(sum);
+
+                    // scal( (value_type)1.0/v1Norm, V1);
+                    value_type v1NormInv = (value_type)1.0/v1Norm;
+                    for ( cha=0; cha<CHA; cha++ )
+                    {
+                        pV1[cha] *= v1NormInv;
+                    }
+
+                    memcpy(pDC, pD, sizeof(T)*ks*ks*CHA);
+                    gemm(DH_D, DC, true, D, false);
 
                     for ( po=0; po<power; po++ )
                     {
-                        GeneralMatrixProduct_gemm(V, DH_D, false, V1, false);
+                        gemm(V, DH_D, false, V1, false);
+                        // V1 = V;
                         memcpy(V1.begin(), V.begin(), V.get_number_of_bytes());
-                        norm2(V1, v1Norm);
-                        scal(1.0/v1Norm, V1);
+
+                        // norm2(V1, v1Norm);
+
+                        sum = 0;
+                        for ( cha=0; cha<CHA; cha++ )
+                        {
+                            const T& c = pV1[cha];
+                            const value_type re = c.real();
+                            const value_type im = c.imag();
+                            sum += ( (re*re) + (im * im) );
+                        }
+                        v1Norm = std::sqrt(sum);
+
+                        // scal( (value_type)1.0/v1Norm, V1);
+
+                        value_type v1NormInv = (value_type)1.0/v1Norm;
+                        for ( cha=0; cha<CHA; cha++ )
+                        {
+                            pV1[cha] *= v1NormInv;
+                        }
                     }
 
                     // compute U1
-                    GeneralMatrixProduct_gemm(U1, D, false, V1, false);
+                    gemm(U1, D, false, V1, false);
 
+                    //phaseU1 = U1(0, 0);
                     phaseU1 = pU1[0];
                     for ( po=1; po<kss; po++ )
                     {
-                        // phaseU1 += U1(po, 0);
+                        //phaseU1 += U1(po, 0);
                         phaseU1 += pU1[po];
                     }
                     phaseU1 /= std::abs(phaseU1);
 
                     // put the mean object phase to coil map
-                    conjugate(V1, V1);
-                    scal(phaseU1, V1);
+                    // conjugate(V1, V1);
+                    // scal(phaseU1, V1);
+
+                    const value_type c = phaseU1.real();
+                    const value_type d = phaseU1.imag();
+
+                    for ( cha=0; cha<CHA; cha++ )
+                    {
+                        const T& v = pV1[cha];
+                        const value_type a = v.real();
+                        const value_type b = v.imag();
+
+                        reinterpret_cast< value_type(&)[2] >(pV1[cha])[0] = a*c+b*d;
+                        reinterpret_cast< value_type(&)[2] >(pV1[cha])[1] = a*d-b*c;
+                    }
 
                     for ( cha=0; cha<CHA; cha++ )
                     {
@@ -4291,7 +4699,7 @@ coilMap2DNIHInner(const hoNDArray<T>& data, hoNDArray<T>& coilMap, size_t ks, si
 
 template <typename T> 
 bool gtPlusISMRMRDReconUtilComplex<T>::
-coilMap2DNIHInner_2(const hoNDArray<T>& data, hoNDArray<T>& coilMap, size_t ks, size_t power)
+coilMap3DNIHInner(const hoNDArray<T>& data, hoNDArray<T>& coilMap, size_t ks, size_t power)
 {
     try
     {
@@ -4299,9 +4707,10 @@ coilMap2DNIHInner_2(const hoNDArray<T>& data, hoNDArray<T>& coilMap, size_t ks,
 
         long long RO = data.get_size(0);
         long long E1 = data.get_size(1);
-        long long CHA = data.get_size(2);
+        long long E2 = data.get_size(2);
+        long long CHA = data.get_size(3);
 
-        long long N = data.get_number_of_elements()/(RO*E1*CHA);
+        long long N = data.get_number_of_elements()/(RO*E1*E2*CHA);
         GADGET_CHECK_RETURN_FALSE(N==1);
 
         const T* pData = data.begin();
@@ -4317,275 +4726,137 @@ coilMap2DNIHInner_2(const hoNDArray<T>& data, hoNDArray<T>& coilMap, size_t ks,
             ks++;
         }
 
-        size_t kss = ks*ks;
+        size_t kss = ks*ks*ks;
         long long halfKs = (long long)ks/2;
 
-        long long e1, ro, cha;
-        long long kro, ke1, de1, dro;
-
-        // compute the D matrix
-        hoNDArray<T> D(kss, CHA, RO*E1);
-        T* pD = D.begin();
+        long long e2;
 
-        for ( e1=0; e1<(long long)E1; e1++ )
+        #pragma omp parallel default(none) private(e2) shared(ks, RO, E1, E2, CHA, pSen, pData, halfKs, power, kss)
         {
-            for ( ro=0; ro<(long long)RO; ro++ )
-            {
-                long long idx2D = ro + e1*RO;
+            hoMatrix<T> D(kss, CHA);
+            hoMatrix<T> DC(kss, CHA);
+            hoMatrix<T> DH_D(CHA, CHA);
+
+            hoMatrix<T> U1(kss, 1);
+            hoMatrix<T> V1(CHA, 1);
+            hoMatrix<T> V(CHA, 1);
+
+            Gadgetron::clear(D);
+            Gadgetron::clear(DC);
+            Gadgetron::clear(DH_D);
+            Gadgetron::clear(U1);
+            Gadgetron::clear(V1);
+            Gadgetron::clear(V);
+
+            T phaseU1;
+
+            value_type v1Norm(1);
 
-                // fill the data matrix D
-                if ( e1>=halfKs && e1<E1-halfKs && ro>=halfKs && ro<RO-halfKs )
+            long long cha, ro, e1, kro, dro, ke1, de1, ke2, de2;
+            size_t po;
+
+            #pragma omp for
+            for ( e2=0; e2<(long long)E2; e2++ )
+            {
+                for ( e1=0; e1<(long long)E1; e1++ )
                 {
-                    for ( cha=0; cha<CHA; cha++ )
+                    for ( ro=0; ro<(long long)RO; ro++ )
                     {
-                        const T* pDataCurr = pData + cha*RO*E1;
-                        long long ind=0;
-                        for ( ke1=-halfKs; ke1<=halfKs; ke1++ )
+                        // fill the data matrix D
+                        if ( e2>=halfKs && e2<E2-halfKs && e1>=halfKs && e1<E1-halfKs && ro>=halfKs && ro<RO-halfKs )
                         {
-                            de1 = e1 + ke1;
-                            for ( kro=-halfKs; kro<=halfKs; kro++ )
+                            for ( cha=0; cha<CHA; cha++ )
                             {
-                                long long idxD = idx2D*CHA*kss + cha*kss + ind;
-                                D(idxD) = pDataCurr[de1*RO+ro+kro];
-                                ind++;
+                                const T* pDataCurr = pData + cha*RO*E1*E2;
+                                long long ind=0;
+                                for ( ke2=-halfKs; ke2<=halfKs; ke2++ )
+                                {
+                                    de2 = e2 + ke2;
+                                    for ( ke1=-halfKs; ke1<=halfKs; ke1++ )
+                                    {
+                                        de1 = e1 + ke1;
+                                        for ( kro=-halfKs; kro<=halfKs; kro++ )
+                                        {
+                                            D(ind++, cha) = pDataCurr[de2*RO*E1+de1*RO+ro+kro];
+                                        }
+                                    }
+                                }
                             }
                         }
-                    }
-                }
-                else
-                {
-                    for ( cha=0; cha<CHA; cha++ )
-                    {
-                        const T* pDataCurr = pData + cha*RO*E1;
-                        long long ind=0;
-                        for ( ke1=-halfKs; ke1<=halfKs; ke1++ )
+                        else
                         {
-                            de1 = e1 + ke1;
-                            if ( de1 < 0 ) de1 += E1;
-                            if ( de1 >= E1 ) de1 -= E1;
-
-                            for ( kro=-halfKs; kro<=halfKs; kro++ )
+                            for ( cha=0; cha<CHA; cha++ )
                             {
-                                dro = ro + kro;
-                                if ( dro < 0 ) dro += RO;
-                                if ( dro >= RO ) dro -= RO;
+                                const T* pDataCurr = pData + cha*RO*E1*E2;
+                                long long ind=0;
+                                for ( ke2=-halfKs; ke2<=halfKs; ke2++ )
+                                {
+                                    de2 = e2 + ke2;
+                                    if ( de2 < 0 ) de2 += E2;
+                                    if ( de2 >= E2 ) de2 -= E2;
 
-                                long long idxD = idx2D*CHA*kss + cha*kss + ind;
-                                D(idxD) = pDataCurr[de1*RO+dro];
-                                ind++;
-                            }
-                        }
-                    }
-                }
-            }
-        }
+                                    for ( ke1=-halfKs; ke1<=halfKs; ke1++ )
+                                    {
+                                        de1 = e1 + ke1;
+                                        if ( de1 < 0 ) de1 += E1;
+                                        if ( de1 >= E1 ) de1 -= E1;
 
-        // compute DH_D and V1
-        hoNDArray<T> DH_D(CHA, CHA, RO*E1);
-        T* pDH_D = DH_D.begin();
+                                        for ( kro=-halfKs; kro<=halfKs; kro++ )
+                                        {
+                                            dro = ro + kro;
+                                            if ( dro < 0 ) dro += RO;
+                                            if ( dro >= RO ) dro -= RO;
 
-        hoNDArray<T> V1(CHA, RO*E1);
-        T* pV1 = V1.begin();
+                                            D(ind++, cha) = pDataCurr[de2*RO*E1+de1*RO+dro];
+                                        }
+                                    }
+                                }
+                            }
+                        }
 
-        hoNDArray<T> V(CHA, RO*E1);
-        T* pV = V.begin();
-
-        hoNDArray<T> U1(kss, RO*E1);
-        T* pU1 = U1.begin();
-
-        for ( e1=0; e1<(long long)E1; e1++ )
-        {
-            for ( ro=0; ro<(long long)RO; ro++ )
-            {
-                long long idx2D = ro + e1*RO;
-
-                hoNDArray<T> currD(kss, CHA, pD+idx2D*CHA*kss);
-                T* pCurrD = currD.begin();
-
-                hoNDArray<T> currDH_D(CHA, CHA, pDH_D+idx2D*CHA*CHA);
-
-                GeneralMatrixProduct(currDH_D, currD, true, currD, false);
-
-                hoNDArray<T> currV1(CHA, 1, pV1+idx2D*CHA);
-
-                for ( cha=0; cha<CHA; cha++ )
-                {
-                    currV1(cha) = 0;
-                    for ( size_t ii=0; ii<kss; ii++ )
-                    {
-                        currV1(cha) += pCurrD[ii+cha*kss];
-                    }
-                }
-
-                value_type v1Norm(1);
-
-                //norm2(currV1, v1Norm);
-                //scal(1.0/v1Norm, currV1);
+                        // compute V1
+                        D.sumOverCol(V1);
+                        norm2(V1, v1Norm);
+                        scal( (value_type)1.0/v1Norm, V1);
 
-                for ( cha=0; cha<CHA; cha++ )
-                {
-                    T v = currV1(cha) * std::conj(currV1(cha));
-                    v1Norm += v.real();
-                }
-                v1Norm = std::sqrt(v1Norm);
+                        memcpy(DC.begin(), D.begin(), sizeof(T)*kss*CHA);
+                        gemm(DH_D, DC, true, D, false);
+                        // gemm(DH_D, D, true, D, false);
 
-                for ( cha=0; cha<CHA; cha++ )
-                {
-                    currV1(cha) /= v1Norm;
-                }
+                        for ( po=0; po<power; po++ )
+                        {
+                            gemm(V, DH_D, false, V1, false);
+                            V1 = V;
+                            norm2(V1, v1Norm);
+                            scal( (value_type)1.0/v1Norm, V1);
+                        }
 
-                size_t po;
+                        // compute U1
+                        gemm(U1, D, false, V1, false);
 
-                hoNDArray<T> currV(CHA, 1, pV+idx2D*CHA);
-                for ( po=0; po<power; po++ )
-                {
-                    GeneralMatrixProduct(currV, currDH_D, false, currV1, false);
-                    currV1 = currV;
-                    /*norm2(currV1, v1Norm);
-                    scal(1.0/v1Norm, currV1);*/
+                        phaseU1 = U1(0, 0);
+                        for ( po=1; po<kss; po++ )
+                        {
+                            phaseU1 += U1(po, 0);
+                        }
+                        phaseU1 /= std::abs(phaseU1);
 
-                    for ( cha=0; cha<CHA; cha++ )
-                    {
-                        T v = currV1(cha) * std::conj(currV1(cha));
-                        v1Norm += v.real();
-                    }
-                    v1Norm = std::sqrt(v1Norm);
+                        // put the mean object phase to coil map
+                        conjugate(V1, V1);
+                        scal(phaseU1, V1);
 
-                    for ( cha=0; cha<CHA; cha++ )
-                    {
-                        currV1(cha) /= v1Norm;
+                        for ( cha=0; cha<CHA; cha++ )
+                        {
+                            pSen[cha*RO*E1*E2+e2*RO*E1+e1*RO+ro] = V1(cha, 0);
+                        }
                     }
                 }
-
-                // compute U1
-                hoNDArray<T> currU1(kss, 1, pU1+idx2D*kss);
-                GeneralMatrixProduct(currU1, currD, false, currV1, false);
-
-                T phaseU1 = currU1(0);
-                for ( po=1; po<kss; po++ )
-                {
-                    phaseU1 += currU1(po);
-                }
-                phaseU1 /= std::abs(phaseU1);
-
-                // put the mean object phase to coil map
-                for ( cha=0; cha<CHA; cha++ )
-                {
-                    currV1(cha) = phaseU1 * std::conj(currV1(cha));
-                }
-
-                /*conjugate(currV1, currV1);
-                scal(phaseU1, currV1);*/
-
-                for ( cha=0; cha<CHA; cha++ )
-                {
-                    pSen[cha*RO*E1+idx2D] = currV1(cha);
-                }
             }
         }
-
-        //#pragma omp parallel default(none) private(e1) shared(ks, RO, E1, CHA, pSen, pData, halfKs, power, kss)
-        //{
-        //    hoMatrix<T> D(ks*ks, CHA);
-        //    hoMatrix<T> DH_D(CHA, CHA);
-
-        //    hoMatrix<T> U1(ks*ks, 1);
-        //    hoMatrix<T> V1(CHA, 1);
-        //    hoMatrix<T> V(CHA, 1);
-
-        //    T phaseU1;
-
-        //    value_type v1Norm(1), u1Norm(1);
-
-        //    long long cha, ro, kro, ke1, de1, dro;
-        //    size_t po;
-
-        //    #pragma omp for
-        //    for ( e1=0; e1<(long long)E1; e1++ )
-        //    {
-        //        for ( ro=0; ro<(long long)RO; ro++ )
-        //        {
-        //            // fill the data matrix D
-        //            if ( e1>=halfKs && e1<E1-halfKs && ro>=halfKs && ro<RO-halfKs )
-        //            {
-        //                for ( cha=0; cha<CHA; cha++ )
-        //                {
-        //                    const T* pDataCurr = pData + cha*RO*E1;
-        //                    long long ind=0;
-        //                    for ( ke1=-halfKs; ke1<=halfKs; ke1++ )
-        //                    {
-        //                        de1 = e1 + ke1;
-        //                        for ( kro=-halfKs; kro<=halfKs; kro++ )
-        //                        {
-        //                            D(ind++, cha) = pDataCurr[de1*RO+ro+kro];
-        //                        }
-        //                    }
-        //                }
-        //            }
-        //            else
-        //            {
-        //                for ( cha=0; cha<CHA; cha++ )
-        //                {
-        //                    const T* pDataCurr = pData + cha*RO*E1;
-        //                    long long ind=0;
-        //                    for ( ke1=-halfKs; ke1<=halfKs; ke1++ )
-        //                    {
-        //                        de1 = e1 + ke1;
-        //                        if ( de1 < 0 ) de1 += E1;
-        //                        if ( de1 >= E1 ) de1 -= E1;
-
-        //                        for ( kro=-halfKs; kro<=halfKs; kro++ )
-        //                        {
-        //                            dro = ro + kro;
-        //                            if ( dro < 0 ) dro += RO;
-        //                            if ( dro >= RO ) dro -= RO;
-
-        //                            D(ind++, cha) = pDataCurr[de1*RO+dro];
-        //                        }
-        //                    }
-        //                }
-        //            }
-
-        //            // compute V1
-        //            D.sumOverCol(V1);
-        //            norm2(V1, v1Norm);
-        //            scal(1.0/v1Norm, V1);
-
-        //            GeneralMatrixProduct_gemm(DH_D, D, true, D, false);
-
-        //            for ( po=0; po<power; po++ )
-        //            {
-        //                GeneralMatrixProduct_gemm(V, DH_D, false, V1, false);
-        //                V1 = V;
-        //                norm2(V1, v1Norm);
-        //                scal(1.0/v1Norm, V1);
-        //            }
-
-        //            // compute U1
-        //            GeneralMatrixProduct_gemm(U1, D, false, V1, false);
-
-        //            phaseU1 = U1(0, 0);
-        //            for ( po=1; po<kss; po++ )
-        //            {
-        //                phaseU1 += U1(po, 0);
-        //            }
-        //            phaseU1 /= std::abs(phaseU1);
-
-        //            // put the mean object phase to coil map
-        //            conjugate(V1, V1);
-        //            scal(phaseU1, V1);
-
-        //            for ( cha=0; cha<CHA; cha++ )
-        //            {
-        //                pSen[cha*RO*E1+e1*RO+ro] = V1(cha, 0);
-        //            }
-        //        }
-        //    }
-        //}
     }
     catch(...)
     {
-        GADGET_ERROR_MSG("Errors in gtPlusISMRMRDReconUtilComplex<T>::coilMap2DNIHInner_2(...) ... ");
+        GADGET_ERROR_MSG("Errors in gtPlusISMRMRDReconUtilComplex<T>::coilMap3DNIHInner(...) ... ");
         return false;
     }
 
@@ -4594,18 +4865,22 @@ coilMap2DNIHInner_2(const hoNDArray<T>& data, hoNDArray<T>& coilMap, size_t ks,
 
 template <typename T> 
 bool gtPlusISMRMRDReconUtilComplex<T>::
-coilMap3DNIHInner(const hoNDArray<T>& data, hoNDArray<T>& coilMap, size_t ks, size_t power)
+coilMap2DNIH2Inner(const hoNDArray<T>& data, hoNDArray<T>& coilMap, size_t ks, size_t iterNum, typename realType<T>::Type thres)
 {
     try
     {
+        //std::string debugFolder = "D:/software/Gadgetron/20130114/install_debug/gadgetron/DebugOutput/";
+        //gtPlusIOAnalyze gt_io;
+
+        //GADGET_EXPORT_ARRAY_COMPLEX(debugFolder, gt_io, data, "data");
+
         typedef typename realType<T>::Type value_type;
 
         long long RO = data.get_size(0);
         long long E1 = data.get_size(1);
-        long long E2 = data.get_size(2);
-        long long CHA = data.get_size(3);
+        long long CHA = data.get_size(2);
 
-        long long N = data.get_number_of_elements()/(RO*E1*E2*CHA);
+        long long N = data.get_number_of_elements()/(RO*E1*CHA);
         GADGET_CHECK_RETURN_FALSE(N==1);
 
         const T* pData = data.begin();
@@ -4614,134 +4889,116 @@ coilMap3DNIHInner(const hoNDArray<T>& data, hoNDArray<T>& coilMap, size_t ks, si
         {
             coilMap = data;
         }
-        T* pSen = coilMap.begin();
 
-        if ( ks%2 != 1 )
-        {
-            ks++;
-        }
+        // create convolution kernel
+        hoNDArray<T> ker(ks, ks);
+        Gadgetron::fill( ker, T( (value_type)1.0/(ks*ks)) );
 
-        size_t kss = ks*ks*ks;
-        long long halfKs = (long long)ks/2;
+        hoNDArray<T> prevR(RO, E1, 1), R(RO, E1, 1), imT(RO, E1, 1), magT(RO, E1, 1), diffR(RO, E1, 1);
+        hoNDArray<T> coilMapConv(RO, E1, CHA);
+        hoNDArray<T> D(RO, E1, CHA);
+        hoNDArray<T> D_sum(1, E1, CHA);
+        hoNDArray<T> D_sum_1st_2nd(1, 1, CHA);
+        typename realType<T>::Type v, vR, vDiffR;
+        T vCha;
+        size_t iter;
+        long long cha;
 
-        long long e2;
+        GADGET_CHECK_RETURN_FALSE(Gadgetron::sumOver1stDimension(data, D_sum));
+        GADGET_CHECK_RETURN_FALSE(Gadgetron::sumOver2ndDimension(D_sum, D_sum_1st_2nd));
+        Gadgetron::norm2(D_sum_1st_2nd, v);
+        Gadgetron::scal( (value_type)1.0/v, D_sum_1st_2nd);
 
-        #pragma omp parallel default(none) private(e2) shared(ks, RO, E1, E2, CHA, pSen, pData, halfKs, power, kss)
+        Gadgetron::clear(R);
+        for ( cha=0; cha<CHA; cha++ )
         {
-            hoMatrix<T> D(kss, CHA);
-            hoMatrix<T> DH_D(CHA, CHA);
+            hoNDArray<T> dataCHA(RO, E1, const_cast<T*>(data.begin())+cha*RO*E1);
+            vCha = D_sum_1st_2nd(cha);
+            Gadgetron::axpy( std::conj(vCha), dataCHA, R, R);
+        }
 
-            hoMatrix<T> U1(kss, 1);
-            hoMatrix<T> V1(CHA, 1);
-            hoMatrix<T> V(CHA, 1);
+        for ( iter=0; iter<iterNum; iter++ )
+        {
+            prevR = R;
 
-            T phaseU1;
+            Gadgetron::conjugate(R, R);
+            //GADGET_EXPORT_ARRAY_COMPLEX(debugFolder, gt_io, R, "R");
 
-            value_type v1Norm(1);
+            GADGET_CHECK_RETURN_FALSE(Gadgetron::multipleMultiply(R, data, coilMap));
+            //GADGET_EXPORT_ARRAY_COMPLEX(debugFolder, gt_io, coilMap, "coilMap");
 
-            long long cha, ro, e1, kro, dro, ke1, de1, ke2, de2;
-            size_t po;
+            //Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->fft2c(coilMap, coilMapConv);
+            //GADGET_CHECK_RETURN_FALSE(Gadgetron::multipleMultiply(kerKSpace, coilMapConv, D));
+            //Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->ifft2c(D, coilMapConv);
 
-            #pragma omp for
-            for ( e2=0; e2<(long long)E2; e2++ )
-            {
-                for ( e1=0; e1<(long long)E1; e1++ )
-                {
-                    for ( ro=0; ro<(long long)RO; ro++ )
-                    {
-                        // fill the data matrix D
-                        if ( e2>=halfKs && e2<E2-halfKs && e1>=halfKs && e1<E1-halfKs && ro>=halfKs && ro<RO-halfKs )
-                        {
-                            for ( cha=0; cha<CHA; cha++ )
-                            {
-                                const T* pDataCurr = pData + cha*RO*E1*E2;
-                                long long ind=0;
-                                for ( ke2=-halfKs; ke2<=halfKs; ke2++ )
-                                {
-                                    de2 = e2 + ke2;
-                                    for ( ke1=-halfKs; ke1<=halfKs; ke1++ )
-                                    {
-                                        de1 = e1 + ke1;
-                                        for ( kro=-halfKs; kro<=halfKs; kro++ )
-                                        {
-                                            D(ind++, cha) = pDataCurr[de2*RO*E1+de1*RO+ro+kro];
-                                        }
-                                    }
-                                }
-                            }
-                        }
-                        else
-                        {
-                            for ( cha=0; cha<CHA; cha++ )
-                            {
-                                const T* pDataCurr = pData + cha*RO*E1*E2;
-                                long long ind=0;
-                                for ( ke2=-halfKs; ke2<=halfKs; ke2++ )
-                                {
-                                    de2 = e2 + ke2;
-                                    if ( de2 < 0 ) de2 += E2;
-                                    if ( de2 >= E2 ) de2 -= E2;
+            Gadgetron::conv2(coilMap, ker, coilMapConv);
+            //GADGET_EXPORT_ARRAY_COMPLEX(debugFolder, gt_io, coilMapConv, "coilMapConv");
 
-                                    for ( ke1=-halfKs; ke1<=halfKs; ke1++ )
-                                    {
-                                        de1 = e1 + ke1;
-                                        if ( de1 < 0 ) de1 += E1;
-                                        if ( de1 >= E1 ) de1 -= E1;
+            Gadgetron::multiplyConj(coilMapConv, coilMapConv, D);
+            //GADGET_EXPORT_ARRAY_COMPLEX(debugFolder, gt_io, D, "D");
 
-                                        for ( kro=-halfKs; kro<=halfKs; kro++ )
-                                        {
-                                            dro = ro + kro;
-                                            if ( dro < 0 ) dro += RO;
-                                            if ( dro >= RO ) dro -= RO;
+            GADGET_CHECK_RETURN_FALSE(Gadgetron::sumOver3rdDimension(D, R));
+            //GADGET_EXPORT_ARRAY_COMPLEX(debugFolder, gt_io, R, "D_R");
 
-                                            D(ind++, cha) = pDataCurr[de2*RO*E1+de1*RO+dro];
-                                        }
-                                    }
-                                }
-                            }
-                        }
+            Gadgetron::sqrt(R, R);
+            //GADGET_EXPORT_ARRAY_COMPLEX(debugFolder, gt_io, R, "D_R2");
 
-                        // compute V1
-                        D.sumOverCol(V1);
-                        norm2(V1, v1Norm);
-                        scal(1.0/v1Norm, V1);
+            Gadgetron::addEpsilon(R);
+            Gadgetron::inv(R, R);
+            //GADGET_EXPORT_ARRAY_COMPLEX(debugFolder, gt_io, R, "D_R_inv");
 
-                        GeneralMatrixProduct_gemm(DH_D, D, true, D, false);
+            GADGET_CHECK_RETURN_FALSE(Gadgetron::multipleMultiply(R, coilMapConv, coilMap));
 
-                        for ( po=0; po<power; po++ )
-                        {
-                            GeneralMatrixProduct_gemm(V, DH_D, false, V1, false);
-                            V1 = V;
-                            norm2(V1, v1Norm);
-                            scal(1.0/v1Norm, V1);
-                        }
+            Gadgetron::multiplyConj(data, coilMap, D);
+            GADGET_CHECK_RETURN_FALSE(Gadgetron::sumOver3rdDimension(D, R));
+            //GADGET_EXPORT_ARRAY_COMPLEX(debugFolder, gt_io, R, "R2");
 
-                        // compute U1
-                        GeneralMatrixProduct_gemm(U1, D, false, V1, false);
+            //if ( iter < iterNum - 1 )
+            //{
+                GADGET_CHECK_RETURN_FALSE(Gadgetron::multipleMultiply(R, coilMap, D));
+            //}
+            //else
+            //{
+            //    D = coilMap;
+            //}
 
-                        phaseU1 = U1(0, 0);
-                        for ( po=1; po<kss; po++ )
-                        {
-                            phaseU1 += U1(po, 0);
-                        }
-                        phaseU1 /= std::abs(phaseU1);
+            GADGET_CHECK_RETURN_FALSE(Gadgetron::sumOver1stDimension(D, D_sum));
+            GADGET_CHECK_RETURN_FALSE(Gadgetron::sumOver2ndDimension(D_sum, D_sum_1st_2nd));
 
-                        // put the mean object phase to coil map
-                        conjugate(V1, V1);
-                        scal(phaseU1, V1);
+            Gadgetron::norm2(D_sum_1st_2nd, v);
+            Gadgetron::scal( (value_type)1.0/v, D_sum_1st_2nd);
 
-                        for ( cha=0; cha<CHA; cha++ )
-                        {
-                            pSen[cha*RO*E1*E2+e2*RO*E1+e1*RO+ro] = V1(cha, 0);
-                        }
-                    }
-                }
+            Gadgetron::clear(imT);
+            for ( cha=0; cha<CHA; cha++ )
+            {
+                hoNDArray<T> coilMapCHA(RO, E1, coilMap.begin()+cha*RO*E1);
+                vCha = D_sum_1st_2nd(cha);
+                Gadgetron::axpy( std::conj(vCha), coilMapCHA, imT, imT);
             }
+
+            //GADGET_EXPORT_ARRAY_COMPLEX(debugFolder, gt_io, imT, "imT");
+
+            Gadgetron::abs(imT, magT);
+            Gadgetron::divide(imT, magT, imT);
+
+            //GADGET_EXPORT_ARRAY_COMPLEX(debugFolder, gt_io, imT, "imT2");
+
+            Gadgetron::multiply(R, imT, R);
+            Gadgetron::conjugate(imT, imT);
+            GADGET_CHECK_RETURN_FALSE(Gadgetron::multipleMultiply(imT, coilMap, coilMap));
+
+            Gadgetron::subtract(prevR, R, diffR);
+            Gadgetron::norm2(diffR, vDiffR);
+            Gadgetron::norm2(R, vR);
+
+            // GADGET_MSG("coilMap2DNIH2Inner - iter : " << iter << " - norm(prevR-R)/norm(R) : " << vDiffR/vR);
+
+            if ( vDiffR/vR < thres ) break;
         }
     }
     catch(...)
     {
-        GADGET_ERROR_MSG("Errors in gtPlusISMRMRDReconUtilComplex<T>::coilMap3DNIHInner(...) ... ");
+        GADGET_ERROR_MSG("Errors in gtPlusISMRMRDReconUtilComplex<T>::coilMap2DNIH2Inner(...) ... ");
         return false;
     }
 
@@ -4760,96 +5017,70 @@ coilMap2DNIH(const hoNDArray<T>& data, hoNDArray<T>& coilMap, ISMRMRDCOILMAPALGO
         long long E1 = data.get_size(1);
         long long CHA = data.get_size(2);
 
-        #ifdef USE_CUDA
-            int cur_device = cudaDeviceManager::Instance()->getCurrentDevice();
-            int warp_size = cudaDeviceManager::Instance()->warp_size(cur_device);
-            int max_blockdim = cudaDeviceManager::Instance()->max_blockdim(cur_device);
-
-            int numOfDevices = cudaDeviceManager::Instance()->getTotalNumberOfDevice();
-
-            if ( (numOfDevices==0) || (CHA>32) )
-            {
-                useGPU = false;
-            }
-
-        #else
-            useGPU = false;
-        #endif // USE_CUDA
+        size_t N = data.get_number_of_elements()/(RO*E1*CHA);
+        size_t num = RO*E1*CHA;
 
-        if ( useGPU )
+        if ( !data.dimensions_equal(&coilMap) )
         {
-            return coilMap2DNIHGPU(data, coilMap, algo, ks, power, iterNum, thres);
+            coilMap = data;
         }
-        else
-        {
-            size_t N = data.get_number_of_elements()/(RO*E1*CHA);
-            size_t num = RO*E1*CHA;
-
-            if ( !data.dimensions_equal(&coilMap) )
-            {
-                coilMap = data;
-            }
 
-            if ( ks%2 != 1 )
-            {
-                ks++;
-            }
+        if ( ks%2 != 1 )
+        {
+            ks++;
+        }
 
-            long long n;
+        long long n;
 
-            if ( N >= 8 )
+        if ( N >= 16 )
+        {
+            #ifdef GCC_OLD_FLAG
+                #pragma omp parallel default(none) private(n) shared(ks, RO, E1, CHA, num, algo, N, power, iterNum, thres)
+            #else
+                #pragma omp parallel default(none) private(n) shared(ks, RO, E1, CHA, num, algo, N, data, coilMap, power, iterNum, thres)
+            #endif 
             {
-                #ifdef GCC_OLD_FLAG
-                    #pragma omp parallel default(none) private(n) shared(ks, RO, E1, CHA, num, algo, N, power, iterNum, thres)
-                #else
-                    #pragma omp parallel default(none) private(n) shared(ks, RO, E1, CHA, num, algo, N, data, coilMap, power, iterNum, thres)
-                #endif 
+                #pragma omp for
+                for ( n=0; n<(long long)N; n++ )
                 {
-                    #pragma omp for
-                    for ( n=0; n<(long long)N; n++ )
-                    {
-                        hoNDArray<T> dataCurr(RO, E1, CHA, const_cast<T*>(data.begin()+n*num));
-                        hoNDArray<T> coilMapCurr(RO, E1, CHA, coilMap.begin()+n*num);
+                    hoNDArray<T> dataCurr(RO, E1, CHA, const_cast<T*>(data.begin()+n*num));
+                    hoNDArray<T> coilMapCurr(RO, E1, CHA, coilMap.begin()+n*num);
 
-                        if ( algo == ISMRMRD_SOUHEIL_ITER )
-                        {
-                            coilMap2DNIHInner(dataCurr, coilMapCurr, ks, power);
-                        }
-                        else
-                        {
-                            coilMap2DNIHInner(dataCurr, coilMapCurr, ks, power);
-                            //coilMap2DNIHInner_2(dataCurr, coilMapCurr, ks, power);
-                        }
+                    if ( algo == ISMRMRD_SOUHEIL_ITER )
+                    {
+                        coilMap2DNIH2Inner(dataCurr, coilMapCurr, ks, iterNum, thres);
+                    }
+                    else
+                    {
+                        coilMap2DNIHInner(dataCurr, coilMapCurr, ks, power);
                     }
                 }
             }
-            else if ( N == 1 )
+        }
+        else if ( N == 1 )
+        {
+            if ( algo == ISMRMRD_SOUHEIL_ITER )
             {
+                GADGET_CHECK_RETURN_FALSE(coilMap2DNIH2Inner(data, coilMap, ks, iterNum, thres));
+            }
+            else
+            {
+                GADGET_CHECK_RETURN_FALSE(coilMap2DNIHInner(data, coilMap, ks, power));
+            }
+        }
+        else
+        {
+            for ( n=0; n<(long long)N; n++ )
+            {
+                hoNDArray<T> dataCurr(RO, E1, CHA, const_cast<T*>(data.begin()+n*num));
+                hoNDArray<T> coilMapCurr(RO, E1, CHA, coilMap.begin()+n*num);
                 if ( algo == ISMRMRD_SOUHEIL_ITER )
                 {
-                    GADGET_CHECK_RETURN_FALSE(coilMap2DNIHInner(data, coilMap, ks, power));
+                    GADGET_CHECK_RETURN_FALSE(coilMap2DNIH2Inner(dataCurr, coilMapCurr, ks, iterNum, thres));
                 }
                 else
                 {
-                    GADGET_CHECK_RETURN_FALSE(coilMap2DNIHInner(data, coilMap, ks, power));
-                    //GADGET_CHECK_RETURN_FALSE(coilMap2DNIHInner_2(data, coilMap, ks, power));
-                }
-            }
-            else
-            {
-                for ( n=0; n<(long long)N; n++ )
-                {
-                    hoNDArray<T> dataCurr(RO, E1, CHA, const_cast<T*>(data.begin()+n*num));
-                    hoNDArray<T> coilMapCurr(RO, E1, CHA, coilMap.begin()+n*num);
-                    if ( algo == ISMRMRD_SOUHEIL_ITER )
-                    {
-                        GADGET_CHECK_RETURN_FALSE(coilMap2DNIHInner(dataCurr, coilMapCurr, ks, power));
-                    }
-                    else
-                    {
-                        GADGET_CHECK_RETURN_FALSE(coilMap2DNIHInner(dataCurr, coilMapCurr, ks, power));
-                        //GADGET_CHECK_RETURN_FALSE(coilMap2DNIHInner_2(dataCurr, coilMapCurr, ks, power));
-                    }
+                    GADGET_CHECK_RETURN_FALSE(coilMap2DNIHInner(dataCurr, coilMapCurr, ks, power));
                 }
             }
         }
@@ -4865,208 +5096,108 @@ coilMap2DNIH(const hoNDArray<T>& data, hoNDArray<T>& coilMap, ISMRMRDCOILMAPALGO
 
 template <typename T> 
 bool gtPlusISMRMRDReconUtilComplex<T>::
-coilMap2DNIHGPU(const hoNDArray<T>& data, hoNDArray<T>& coilMap, ISMRMRDCOILMAPALGO algo, size_t ks, size_t power, size_t iterNum, typename realType<T>::Type thres)
+coilMap3DNIH2Inner(const hoNDArray<T>& data, hoNDArray<T>& coilMap, size_t ks, size_t kz, size_t iterNum, typename realType<T>::Type thres)
 {
     try
     {
-        #ifdef USE_CUDA
-            typedef typename realType<T>::Type value_type;
-
-            GADGET_MSG("call gpu version of coilMap2DNIH ... ");
-
-            long long RO = data.get_size(0);
-            long long E1 = data.get_size(1);
-            long long CHA = data.get_size(2);
-            long long N = data.get_number_of_elements()/(RO*E1*CHA);
-
-            if ( !data.dimensions_equal(&coilMap) )
-            {
-                coilMap = data;
-            }
-
-            if ( ks%2 != 1 )
-            {
-                ks++;
-            }
-
-            Gadgetron::GadgetronTimer gt_timer1_(false), gt_timer3_(false);
-
-            size_t kss = ks * ks;
-
-            bool gt3_timing = false;
-
-            if ( N == 1 )
-            {
-                Gadgetron::GadgetronTimer gt_timer1_(false), gt_timer3_(false);
-
-                cuNDArray<float_complext> device_data;
-                cuNDArray<float_complext> csm(data.get_dimensions());
-                Gadgetron::clear(&csm);
-                cuNDArray<float_complext > D(RO, E1, kss, CHA);
-                cuNDArray<float_complext > DH_D(RO, E1, CHA, CHA);
-                cuNDArray<float_complext > V1(RO, E1, CHA);
-                cuNDArray<float_complext > U1(RO, E1, kss);
-
-                // calling the b1_map estimation
-                const float_complext* pData = reinterpret_cast<const float_complext*>(data.begin());
-                hoNDArray<float_complext> data_tmp(RO, E1, CHA, const_cast<float_complext*>(pData));
-
-                GADGET_CHECK_PERFORM(gt3_timing, gt_timer3_.start("copy data to device ... "));
-                {
-                    device_data = data_tmp;
-                }
-                GADGET_CHECK_PERFORM(gt3_timing, gt_timer3_.stop());
-
-                GADGET_CHECK_PERFORM(gt3_timing, gt_timer3_.start("estimate_b1_map_2D_NIH_Souheil ... "));
-                {
-                    Gadgetron::estimate_b1_map_2D_NIH_Souheil( &device_data, &csm, ks, power,
-                                                                D, DH_D, V1, U1 );
-                }
-                GADGET_CHECK_PERFORM(gt3_timing, gt_timer3_.stop());
-
-                GADGET_CHECK_PERFORM(gt3_timing, gt_timer3_.start("coil map to host ... "));
-                {
-                    // boost::shared_ptr< hoNDArray<float_complext> > csm_host = csm.to_host();
-                    csm.to_host(reinterpret_cast<hoNDArray<float_complext>* >(&coilMap));
-                    //memcpy(coilMap.begin(), csm_host->begin(), csm_host->get_number_of_bytes());
-                }
-                GADGET_CHECK_PERFORM(gt3_timing, gt_timer3_.stop());
-            }
-            else
-            {
-                size_t jobSize = sizeof(T)*RO*E1*(kss+CHA)*CHA*4.0;
-                size_t minimalMemoryForValidDevice = (size_t)(2.0*1024.0*1024*1024); // 2GB
-
-                std::vector< std::pair<unsigned int, std::vector<std::vector<unsigned int> > > > jobSchedule;
-                if ( !this->cudaJobSplitter(N, jobSize, minimalMemoryForValidDevice, jobSchedule) )
-                {
-                    GADGET_ERROR_MSG("cudaJobSplitter failed, call the gpu coil map estimatoin ... ");
-                    return this->coilMap2DNIH(data, coilMap, algo, ks, power, iterNum, thres);
-                }
-
-                int device;
-                int numOfValidDevices = (int)jobSchedule.size();
-
-                for ( device=0; device<(int)numOfValidDevices; device++ )
-                {
-                    GADGET_MSG("GPU device " << jobSchedule[device].first << " has " << jobSchedule[device].second.size() << " jobs ... ");
-                    GADGET_MSG("Every job has " << jobSchedule[device].second[0].size() << " slics ... ");
-                }
+        typedef typename realType<T>::Type value_type;
 
-                #pragma omp parallel default(none) private(device) shared(numOfValidDevices, jobSchedule, RO, E1, CHA, kss, ks, power, data, coilMap, gt3_timing) num_threads(numOfValidDevices) if ( numOfValidDevices > 1 )
-                {
-                    int tid = 0;
-                    #ifdef USE_OMP
-                        tid = omp_get_thread_num();
-                    #endif // USE_OMP
-                    cudaSetDevice(jobSchedule[tid].first);
+        size_t RO = data.get_size(0);
+        size_t E1 = data.get_size(1);
+        size_t E2 = data.get_size(2);
+        size_t CHA = data.get_size(3);
 
-                    Gadgetron::GadgetronTimer gt_timer1_(false), gt_timer3_(false);
+        size_t N = data.get_number_of_elements()/(RO*E1*E2*CHA);
+        GADGET_CHECK_RETURN_FALSE(N==1);
 
-                    #pragma omp for
-                    for ( device=0; device<numOfValidDevices; device++ )
-                    {
-                        unsigned int totalJobPackage = jobSchedule[device].second.size();
-                        unsigned int usedN = jobSchedule[device].second[0].size();
+        const T* pData = data.begin();
 
-                        cuNDArray<float_complext> device_data;
+        if ( !data.dimensions_equal(&coilMap) )
+        {
+            coilMap = data;
+        }
 
-                        cuNDArray<float_complext> csm(RO, E1, usedN, CHA);
-                        Gadgetron::clear(&csm);
+        // create convolution kernel
+        hoNDArray<T> ker(ks, ks, kz);
+        Gadgetron::fill( &ker, T( (value_type)1.0/(ks*ks*kz)) );
 
-                        cuNDArray<float_complext > D(RO*E1*usedN, kss, CHA);
-                        cuNDArray<float_complext > DH_D(RO*E1*usedN, CHA, CHA);
-                        cuNDArray<float_complext > V1(RO*E1*usedN, CHA);
-                        cuNDArray<float_complext > U1(RO*E1*usedN, kss);
+        hoNDArray<T> R(RO, E1, E2, 1), imT(RO, E1, E2, 1), magT(RO, E1, E2, 1);
+        hoNDArray<T> coilMapConv(RO, E1, E2, CHA);
+        hoNDArray<T> D(RO, E1, E2, CHA);
+        hoNDArray<T> D_sum(1, CHA);
+        typename realType<T>::Type v;
+        T vCha;
+        size_t iter, cha;
 
-                        hoNDArray<T> dataCurr;
-                        hoNDArray<T> coilMapCurr;
+        hoNDArray<T> dataByCha(RO*E1*E2, CHA, const_cast<T*>(data.begin()));
+        GADGET_CHECK_RETURN_FALSE(Gadgetron::sumOver1stDimension(data, D_sum));
+        Gadgetron::norm2(D_sum, v);
+        Gadgetron::scal( (value_type)1.0/v, D_sum);
 
-                        hoNDArray<T> dataTmp, coilMapTmp;
+        Gadgetron::clear(R);
+        for ( cha=0; cha<CHA; cha++ )
+        {
+            hoNDArray<T> dataCHA(RO, E1, E2, const_cast<T*>(data.begin())+cha*RO*E1*E2);
+            vCha = D_sum(cha);
+            Gadgetron::axpy( std::conj(vCha), dataCHA, R, R);
+        }
 
-                        unsigned int package;
-                        for ( package=0; package<totalJobPackage; package++ )
-                        {
-                            unsigned int packageSize = jobSchedule[device].second[package].size();
-                            size_t start = jobSchedule[device].second[package][0];
-                            size_t end = jobSchedule[device].second[package][packageSize-1];
+        for ( iter=0; iter<iterNum; iter++ )
+        {
+            Gadgetron::conjugate(R, R);
 
-                            size_t usedNPackage = end-start+1;
+            GADGET_CHECK_RETURN_FALSE(Gadgetron::multipleMultiply(R, data, coilMap));
 
-                            if ( usedNPackage != usedN )
-                            {
-                                usedN = usedNPackage;
-
-                                device_data.create(RO*E1*usedN, CHA);
-                                csm.create(RO*E1*usedN, CHA);
-                                D.create(RO*E1*usedN, kss, CHA);
-                                DH_D.create(RO*E1*usedN, CHA, CHA);
-                                V1.create(RO*E1*usedN, CHA);
-                                U1.create(RO*E1*usedN, kss);
-                            }
+            Gadgetron::conv2(coilMap, ker, coilMapConv);
 
-                            dataTmp.create(RO, E1, CHA, usedN);
-                            memcpy(dataTmp.begin(), data.begin()+start*RO*E1*CHA, sizeof(T)*RO*E1*CHA*usedN);
+            Gadgetron::multiplyConj(coilMapConv, coilMapConv, D);
 
-                            dataCurr.create(RO, E1, usedN, CHA);
-                            coilMapCurr.create(RO, E1, usedN, CHA);
+            GADGET_CHECK_RETURN_FALSE(Gadgetron::sumOver4thDimension(D, R));
 
-                            GADGET_CHECK_PERFORM(gt3_timing, gt_timer3_.start("permute the data ... "));
-                            Gadgetron::permuteLastTwoDimensions(dataTmp, dataCurr);
-                            GADGET_CHECK_PERFORM(gt3_timing, gt_timer3_.stop());
+            Gadgetron::sqrt(R, R);
 
-                            // calling the b1_map estimation
-                            hoNDArray<float_complext> data_tmp(dataCurr.get_dimensions(), reinterpret_cast<float_complext*>(dataCurr.begin()));
+            Gadgetron::addEpsilon(R);
+            Gadgetron::inv(R, R);
 
-                            GADGET_CHECK_PERFORM(gt3_timing, gt_timer3_.start("copy data to device ... "));
-                            {
-                                device_data = data_tmp;
+            GADGET_CHECK_RETURN_FALSE(Gadgetron::multipleMultiply(R, coilMapConv, coilMap));
 
-                                //{
-                                //boost::shared_ptr< hoNDArray<float_complext> > tmp = device_data.to_host();
-                                //hoNDArray<T> tmp_host(tmp->get_dimensions());
+            Gadgetron::multiplyConj(data, coilMap, D);
+            GADGET_CHECK_RETURN_FALSE(Gadgetron::sumOver4thDimension(D, R));
 
-                                //memcpy(tmp_host.begin(), tmp->begin(), tmp->get_number_of_bytes());
-                                //gtPlusIOAnalyze gt_io;
-                                //std::string dstDir = "D:/software/Gadgetron/20130114/gadgetron/toolboxes/gtplus/ut/result/";
-                                //gt_io.exportArrayComplex(tmp_host, dstDir+"tmp");
-                                //}
-                            }
-                            GADGET_CHECK_PERFORM(gt3_timing, gt_timer3_.stop());
+            //if ( iter < iterNum - 1 )
+            //{
+                GADGET_CHECK_RETURN_FALSE(Gadgetron::multipleMultiply(R, coilMap, D));
+            //}
+            //else
+            //{
+            //    D = coilMap;
+            //}
 
-                            GADGET_CHECK_PERFORM(gt3_timing, gt_timer3_.start("estimate_b1_map_2D_NIH_Souheil ... "));
-                            {
-                                Gadgetron::estimate_b1_map_2D_NIH_Souheil( &device_data, &csm, ks, power,
-                                                                            D, DH_D, V1, U1 );
-                            }
-                            GADGET_CHECK_PERFORM(gt3_timing, gt_timer3_.stop());
+            hoNDArray<T> DByCha(RO*E1*E2, CHA, D.begin());
+            GADGET_CHECK_RETURN_FALSE(Gadgetron::sumOver1stDimension(DByCha, D_sum));
 
-                            GADGET_CHECK_PERFORM(gt3_timing, gt_timer3_.start("coil map to host ... "));
-                            {
-                                // csm.to_host(reinterpret_cast<hoNDArray<float_complext>* >(&coilMapCurr));
-                                boost::shared_ptr< hoNDArray<float_complext> > csm_host = csm.to_host();
-                                memcpy(coilMapCurr.begin(), csm_host->begin(), csm_host->get_number_of_bytes());
-                            }
-                            GADGET_CHECK_PERFORM(gt3_timing, gt_timer3_.stop());
+            Gadgetron::norm2(D_sum, v);
+            Gadgetron::scal( (value_type)1.0/v, D_sum);
 
-                            coilMapTmp.create(RO, E1, CHA, usedN);
+            Gadgetron::clear(imT);
+            for ( cha=0; cha<CHA; cha++ )
+            {
+                hoNDArray<T> coilMapCHA(RO, E1, E2, 1, coilMap.begin()+cha*RO*E1*E2);
+                vCha = D_sum(cha);
+                Gadgetron::axpy( std::conj(vCha), coilMapCHA, imT, imT);
+            }
 
-                            GADGET_CHECK_PERFORM(gt3_timing, gt_timer3_.start("permute the coil map ... "));
-                            Gadgetron::permuteLastTwoDimensions(coilMapCurr, coilMapTmp);
-                            GADGET_CHECK_PERFORM(gt3_timing, gt_timer3_.stop());
+            Gadgetron::abs(imT, magT);
+            Gadgetron::divide(imT, magT, imT);
 
-                            memcpy(coilMap.begin()+start*RO*E1*CHA, coilMapTmp.begin(), sizeof(T)*RO*E1*CHA*usedN);
-                        }
-                    }
-                }
-            }
-        #else
-            return this->coilMap2DNIH(data, coilMap, algo, ks, power, iterNum, thres);
-        #endif // USE_CUDA
+            Gadgetron::multiply(R, imT, R);
+            Gadgetron::conjugate(imT, imT);
+            GADGET_CHECK_RETURN_FALSE(Gadgetron::multipleMultiply(imT, coilMap, coilMap));
+        }
     }
     catch(...)
     {
-        GADGET_ERROR_MSG("Errors in gtPlusISMRMRDReconUtilComplex<T>::coilMap2DNIHGPU(...) ... ");
+        GADGET_ERROR_MSG("Errors in gtPlusISMRMRDReconUtilComplex<T>::coilMap3DNIH2Inner(...) ... ");
         return false;
     }
 
@@ -5098,6 +5229,9 @@ coilMap3DNIH(const hoNDArray<T>& data, hoNDArray<T>& coilMap, ISMRMRDCOILMAPALGO
             ks++;
         }
 
+        //std::string debugFolder = "D:/software/Gadgetron/20130114/install/gadgetron/DebugOutput/";
+        //gtPlusIOAnalyze gt_io;
+
         hoNDArray<T> data2D, coilMap2D;
 
         if ( algo == ISMRMRD_SOUHEIL )
@@ -5109,7 +5243,12 @@ coilMap3DNIH(const hoNDArray<T>& data, hoNDArray<T>& coilMap, ISMRMRDCOILMAPALGO
         int n, e2;
         for ( n=0; n<(long long)N; n++ )
         {
-            if ( algo==ISMRMRD_SOUHEIL && E2>5*ks && true3D )
+            if ( algo == ISMRMRD_SOUHEIL_ITER )
+            {
+                GADGET_MSG("calling 3D version of Souhiel iterative coil map estimation ... ");
+                GADGET_CHECK_RETURN_FALSE(this->coilMap3DNIH2Inner(data, coilMap, ks, ks, iterNum, thres));
+            }
+            else if ( algo==ISMRMRD_SOUHEIL && E2>5*ks && true3D )
             {
                 GADGET_MSG("calling 3D version of Souhiel coil map estimation ... ");
                 GADGET_CHECK_RETURN_FALSE(this->coilMap3DNIHInner(data, coilMap, ks, power));
@@ -5144,7 +5283,7 @@ coilMap3DNIH(const hoNDArray<T>& data, hoNDArray<T>& coilMap, ISMRMRDCOILMAPALGO
                         GADGET_CHECK_PERFORM(timing, gt_timer3_.start("coilMap2DNIHInner"));
                         if ( algo == ISMRMRD_SOUHEIL_ITER )
                         {
-                            coilMap2DNIHInner(data2D, coilMap2D, ks, power);
+                            coilMap2DNIH2Inner(data2D, coilMap2D, ks, iterNum, thres);
                         }
                         else
                         {
@@ -5178,165 +5317,6 @@ coilMap3DNIH(const hoNDArray<T>& data, hoNDArray<T>& coilMap, ISMRMRDCOILMAPALGO
 
 template <typename T> 
 bool gtPlusISMRMRDReconUtilComplex<T>::
-coilMap3DNIHGPU_FullResMap(const hoNDArray<T>& data, hoNDArray<T>& coilMap, ISMRMRDCOILMAPALGO algo, size_t ks, size_t power, size_t iterNum, typename realType<T>::Type thres, bool true3D)
-{
-    try
-    {
-        #ifdef USE_CUDA
-            typedef typename realType<T>::Type value_type;
-
-            GADGET_MSG("compute full resolution coil map using gpu ... ");
-
-            size_t RO = data.get_size(0);
-            size_t E1 = data.get_size(1);
-            size_t E2 = data.get_size(2);
-            size_t CHA = data.get_size(3);
-
-            if ( !data.dimensions_equal(&coilMap) )
-            {
-                coilMap = data;
-            }
-
-            if ( ks%2 != 1 )
-            {
-                ks++;
-            }
-
-            size_t kss = ks*ks;
-
-            int numOfDevices = cudaDeviceManager::Instance()->getTotalNumberOfDevice();
-            if ( (numOfDevices==0) || (CHA>32) )
-            {
-                return this->coilMap3DNIH(data, coilMap, algo, ks, power, iterNum, thres);
-            }
-
-            size_t jobSize = sizeof(T)*RO*E1*(kss+CHA)*CHA*2.0;
-            size_t minimalMemoryForValidDevice = (size_t)(2.0*1024.0*1024*1024); // 4GB
-
-            std::vector< std::pair<unsigned int, std::vector<std::vector<unsigned int> > > > jobSchedule;
-            if ( !this->cudaJobSplitter(E2, jobSize, minimalMemoryForValidDevice, jobSchedule) )
-            {
-                GADGET_ERROR_MSG("cudaJobSplitter failed, call the gpu coil map estimatoin ... ");
-                return this->coilMap3DNIH(data, coilMap, algo, ks, power, iterNum, thres);
-            }
-
-            unsigned int numOfValidDevices = jobSchedule.size();
-
-            int device;
-            for ( device=0; device<(int)numOfValidDevices; device++ )
-            {
-                GADGET_MSG("GPU device " << jobSchedule[device].first << " has " << jobSchedule[device].second.size() << " jobs ... ");
-                GADGET_MSG("Every job has " << jobSchedule[device].second[0].size() << " slics ... ");
-            }
-
-            size_t N = data.get_number_of_elements()/(RO*E1*E2*CHA);
-
-            bool gt3_timing = false;
-
-            long long n;
-            for ( n=0; n<(long long)N; n++ )
-            {
-                hoNDArray<T> dataCurr(RO, E1, E2, CHA, const_cast<T*>(data.begin()+n*RO*E1*E2*CHA));
-                hoNDArray<T> coilMapCurr(RO, E1, E2, CHA, coilMap.begin()+n*RO*E1*E2*CHA);
-
-                #pragma omp parallel default(none) private(device) shared(jobSchedule, dataCurr, coilMapCurr, RO, E1, E2, CHA, algo, ks, kss, power, iterNum, thres, numOfValidDevices, gt3_timing) num_threads(numOfValidDevices) if ( numOfValidDevices > 1 )
-                {
-                    int tid = 0;
-                    #ifdef USE_OMP
-                        tid = omp_get_thread_num();
-                    #endif // USE_OMP
-                    cudaSetDevice(jobSchedule[tid].first);
-
-                    Gadgetron::GadgetronTimer gt_timer1_(false), gt_timer3_(false);
-
-                    #pragma omp for
-                    for ( device=0; device<(int)numOfValidDevices; device++ )
-                    {
-                        int totalJobPackage = jobSchedule[tid].second.size();
-
-                        unsigned int usedN = jobSchedule[tid].second[0].size();
-
-                        cuNDArray<float_complext> device_data(RO, E1, usedN, CHA);
-                        cuNDArray<float_complext> csm(RO, E1, usedN, CHA);
-                        cuNDArray<float_complext > D(RO, E1, usedN, kss, CHA);
-                        cuNDArray<float_complext > DH_D(RO, E1, usedN, CHA, CHA);
-                        cuNDArray<float_complext > V1(RO, E1, usedN, CHA);
-                        cuNDArray<float_complext > U1(RO, E1, usedN, kss);
-
-                        hoNDArray<T> dataCurrN;
-                        hoNDArray<T> coilMapCurrN;
-
-                        int ii;
-                        for ( ii=0; ii<totalJobPackage; ii++ )
-                        {
-                            hoNDArray<T> dataTmp, coilMapTmp;
-
-                            unsigned int packageSize = jobSchedule[tid].second[ii].size();
-
-                            size_t start = jobSchedule[tid].second[ii][0];
-                            size_t end = jobSchedule[tid].second[ii][packageSize-1];
-
-                            size_t usedNCurr = end-start+1;
-
-                            if ( usedNCurr != usedN )
-                            {
-                                usedN = usedNCurr;
-
-                                device_data.create(RO, E1, usedN, CHA);
-                                csm.create(RO, E1, usedN, CHA);
-                                D.create(RO, E1, usedN, kss, CHA);
-                                DH_D.create(RO, E1, usedN, CHA, CHA);
-                                V1.create(RO, E1, usedN, CHA);
-                                U1.create(RO, E1, usedN, kss);
-                            }
-
-                            dataCurrN.create(RO, E1, usedN, CHA);
-                            coilMapCurrN.create(RO, E1, usedN, CHA);
-
-                            Gadgetron::cropOver3rdDimension(dataCurr, dataCurrN, start, end);
-
-                            // calling the b1_map estimation
-                            hoNDArray<float_complext> data_tmp(dataCurrN.get_dimensions(), reinterpret_cast<float_complext*>(dataCurrN.begin()));
-
-                            GADGET_CHECK_PERFORM(gt3_timing, gt_timer3_.start("copy data to device ... "));
-                            {
-                                device_data = data_tmp;
-                            }
-                            GADGET_CHECK_PERFORM(gt3_timing, gt_timer3_.stop());
-
-                            GADGET_CHECK_PERFORM(gt3_timing, gt_timer3_.start("estimate_b1_map_2D_NIH_Souheil ... "));
-                            {
-                                Gadgetron::estimate_b1_map_2D_NIH_Souheil( &device_data, &csm, ks, power,
-                                                                            D, DH_D, V1, U1 );
-                            }
-                            GADGET_CHECK_PERFORM(gt3_timing, gt_timer3_.stop());
-
-                            GADGET_CHECK_PERFORM(gt3_timing, gt_timer3_.start("coil map to host ... "));
-                            {
-                                csm.to_host(reinterpret_cast<hoNDArray<float_complext>* >(&coilMapCurrN));
-                            }
-                            GADGET_CHECK_PERFORM(gt3_timing, gt_timer3_.stop());
-
-                            Gadgetron::setSubArrayOver3rdDimension(coilMapCurrN, coilMapCurr, start, end);
-                        }
-                    }
-                }
-            }
-        #else
-            return this->coilMap3DNIH(data, coilMap, algo, ks, power, iterNum, thres);
-        #endif // USE_CUDA
-    }
-    catch(...)
-    {
-        GADGET_ERROR_MSG("Errors in gtPlusISMRMRDReconUtilComplex<T>::coilMap3DNIHGPU_FullResMap(...) ... ");
-        return false;
-    }
-
-    return true;
-}
-
-template <typename T> 
-bool gtPlusISMRMRDReconUtilComplex<T>::
 sumOfSquare(const hoNDArray<T>& data, hoNDArray<T>& sos)
 {
     try
@@ -5345,7 +5325,7 @@ sumOfSquare(const hoNDArray<T>& data, hoNDArray<T>& sos)
         GADGET_CHECK_RETURN_FALSE(NDim>=3);
 
         hoNDArray<T> tmp(data);
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::multiplyConj(data, data, tmp));
+        Gadgetron::multiplyConj(data, data, tmp);
 
         if ( NDim == 3 )
         {
@@ -5360,7 +5340,7 @@ sumOfSquare(const hoNDArray<T>& data, hoNDArray<T>& sos)
             GADGET_CHECK_RETURN_FALSE(Gadgetron::sumOver3rdDimension(tmp, sos));
         }
 
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::sqrt(sos, sos));
+        Gadgetron::sqrt(sos, sos);
     }
     catch(...)
     {
@@ -5395,39 +5375,133 @@ coilCombine(const hoNDArray<T>& data, const hoNDArray<T>& coilMap, hoNDArray<T>&
         boost::shared_ptr< std::vector<size_t> > dim = data.get_dimensions();
         boost::shared_ptr< std::vector<size_t> > dimCoil = coilMap.get_dimensions();
 
-        size_t N = coilMap.get_number_of_elements();
-        size_t num = data.get_number_of_elements()/coilMap.get_number_of_elements();
-
         std::vector<size_t> dimCombined(*dim);
         dimCombined.erase(dimCombined.begin()+2);
         combined.create(&dimCombined);
 
-        std::vector<size_t> dimCombinedCurr(*dimCoil);
-        dimCombinedCurr[2] = 1;
+        size_t RO = data.get_size(0);
+        size_t E1 = data.get_size(1);
+        size_t CHA = data.get_size(2);
+        size_t N = data.get_size(3);
 
-        size_t NCombined = combined.get_number_of_elements()/num;
+        size_t coilN = coilMap.get_size(3);
 
-        long long nn;
-        //#ifdef GCC_OLD_FLAG
-        //    #pragma omp parallel default(none) private(nn) shared(num, dimCoil, dimCombinedCurr, N, NCombined)
-        //#else
-        //    #pragma omp parallel default(none) private(nn) shared(data, coilMap, num, dimCoil, dimCombinedCurr, combined, N, NCombined)
-        //#endif
+        if ( coilN < N )
         {
-            hoNDArray<T> dataTmp(coilMap);
-            hoNDArray<T> dataCurr;
-            hoNDArray<T> dataCombinedCurr;
+            size_t NCombined = data.get_number_of_elements()/(RO*E1*CHA);
 
-            //#pragma omp for
-            for ( nn=0; nn<(long long)num; nn++ )
+            std::vector<size_t> dataInd, coilMapInd(NDimCoil, 0), coimbinedInd(dimCombined.size(), 0);
+
+            size_t nn;
+            size_t d;
+            hoNDArray<T> dataTmp(RO, E1, CHA);
+            hoNDArray<T> combinedCurr(RO, E1, 1);
+
+            for ( nn=0; nn<NCombined; nn++ )
             {
-                dataCurr.create(dimCoil.get(), const_cast<T*>(data.begin()+nn*N));
-                Gadgetron::multiplyConj(dataCurr, coilMap, dataTmp);
+                size_t offsetData = nn*RO*E1*CHA;
+                dataInd = data.calculate_index(offsetData);
 
-                dataCombinedCurr.create(&dimCombinedCurr, const_cast<T*>(combined.begin()+nn*NCombined));
-                Gadgetron::sumOver3rdDimension(dataTmp, dataCombinedCurr);
+                for ( d=0; d<NDimCoil; d++ )
+                {
+                    if ( dataInd[d]<coilMap.get_size(d) )
+                    {
+                        coilMapInd[d] = dataInd[d];
+                    }
+                    else
+                    {
+                        coilMapInd[d] = 0;
+                    }
+                }
+
+                for ( d=3; d<NDim; d++ )
+                {
+                    coimbinedInd[d-1] = dataInd[d];
+                }
+
+                size_t offsetCoilMap = coilMap.calculate_offset(coilMapInd);
+                size_t offsetCombined = combined.calculate_offset(coimbinedInd);
+
+                hoNDArray<T> dataCurr(RO, E1, CHA, const_cast<T*>(data.begin())+offsetData);
+                hoNDArray<T> coilMapCurr(RO, E1, CHA, const_cast<T*>(coilMap.begin())+offsetCoilMap);
+
+                Gadgetron::multiplyConj(dataCurr, coilMapCurr, dataTmp);
+                Gadgetron::sumOver3rdDimension(dataTmp, combinedCurr);
+
+                memcpy(combined.begin()+offsetCombined, combinedCurr.begin(), sizeof(T)*RO*E1);
             }
         }
+        else
+        {
+            size_t NCombined = data.get_number_of_elements()/(RO*E1*CHA*N);
+
+            std::vector<size_t> dataInd, coilMapInd(NDimCoil, 0), coimbinedInd(dimCombined.size(), 0);
+
+            size_t nn;
+            size_t d;
+            hoNDArray<T> dataTmp(RO, E1, CHA, N);
+            hoNDArray<T> combinedCurr(RO, E1, 1, N);
+
+            for ( nn=0; nn<NCombined; nn++ )
+            {
+                size_t offsetData = nn*RO*E1*CHA*N;
+                dataInd = data.calculate_index(offsetData);
+
+                for ( d=0; d<NDimCoil; d++ )
+                {
+                    if ( dataInd[d]<coilMap.get_size(d) )
+                    {
+                        coilMapInd[d] = dataInd[d];
+                    }
+                    else
+                    {
+                        coilMapInd[d] = 0;
+                    }
+                }
+
+                for ( d=3; d<NDim; d++ )
+                {
+                    coimbinedInd[d-1] = dataInd[d];
+                }
+
+                size_t offsetCoilMap = coilMap.calculate_offset(coilMapInd);
+                size_t offsetCombined = combined.calculate_offset(coimbinedInd);
+
+                hoNDArray<T> dataCurr(RO, E1, CHA, N, const_cast<T*>(data.begin())+offsetData);
+                hoNDArray<T> coilMapCurr(RO, E1, CHA, N, const_cast<T*>(coilMap.begin())+offsetCoilMap);
+
+                Gadgetron::multiplyConj(dataCurr, coilMapCurr, dataTmp);
+                Gadgetron::sumOver3rdDimension(dataTmp, combinedCurr);
+
+                memcpy(combined.begin()+offsetCombined, combinedCurr.begin(), sizeof(T)*RO*E1*N);
+            }
+        }
+
+        //size_t N = coilMap.get_number_of_elements();
+        //size_t num = data.get_number_of_elements()/coilMap.get_number_of_elements();
+        //size_t NCombined = combined.get_number_of_elements()/num;
+
+        //long long nn;
+        ////#ifdef GCC_OLD_FLAG
+        ////    #pragma omp parallel default(none) private(nn) shared(num, dimCoil, dimCombinedCurr, N, NCombined)
+        ////#else
+        ////    #pragma omp parallel default(none) private(nn) shared(data, coilMap, num, dimCoil, dimCombinedCurr, combined, N, NCombined)
+        ////#endif
+        //{
+        //    hoNDArray<T> dataTmp(coilMap);
+        //    hoNDArray<T> dataCurr;
+        //    hoNDArray<T> dataCombinedCurr;
+
+        //    //#pragma omp for
+        //    for ( nn=0; nn<(long long)num; nn++ )
+        //    {
+        //        dataCurr.create(dimCoil.get(), const_cast<T*>(data.begin()+nn*N));
+        //        Gadgetron::multiplyConj(dataCurr, coilMap, dataTmp);
+
+        //        dataCombinedCurr.create(&dimCombinedCurr, const_cast<T*>(combined.begin()+nn*NCombined));
+        //        Gadgetron::sumOver3rdDimension(dataTmp, dataCombinedCurr);
+        //    }
+        //}
     }
     catch(...)
     {
diff --git a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorkFlow.h b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorkFlow.h
index 9564866..098ba17 100644
--- a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorkFlow.h
+++ b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorkFlow.h
@@ -5,9 +5,9 @@
 
 #pragma once
 
-#include "ismrmrd.h"
+#include "ismrmrd/ismrmrd.h"
 
-#include "util/gtPlusIOAnalyze.h"
+#include "gtPlusIOAnalyze.h"
 #include "gtPlusISMRMRDReconUtil.h"
 #include "gtPlusISMRMRDReconWorker.h"
 
@@ -24,6 +24,11 @@ struct DimensionRecordCompare
     }
 };
 
+// [RO E1 CHA SLC E2 CON PHS REP SET SEG AVE]
+#define GTPLUS_RECON_KSPACE_DIM_NUM 11
+// [RO E1 CHA SLC E2 CON PHS REP SET AVE]
+#define GTPLUS_RECON_IMAGE_DIM_NUM 10
+
 template <typename T> 
 class gtPlusISMRMRDReconWorkFlow
 {
@@ -31,6 +36,8 @@ public:
 
     typedef std::pair<ISMRMRDDIM, size_t> DimensionRecordType;
 
+    typedef typename realType<T>::Type real_value_type;
+
     gtPlusISMRMRDReconWorkFlow();
     gtPlusISMRMRDReconWorkFlow(gtPlusReconWorker<T>& worker, gtPlusReconWorkOrder<T>& workOrder);
     virtual ~gtPlusISMRMRDReconWorkFlow();
@@ -44,8 +51,8 @@ public:
     virtual bool postProcessing() = 0;
 
     // assemble the ISMRMRD dimension index
-    // ind must have 9 elements
-    bool ismrmrdDimIndex9D(std::vector<size_t>& ind, const ISMRMRDDIM& dim, size_t value);
+    // ind must have 10 elements
+    bool ismrmrdDimIndex10D(std::vector<size_t>& ind, const ISMRMRDDIM& dim, size_t value);
 
     // find the permute order for ISMRMRD
     bool findISMRMRDPermuteOrder(const std::vector<ISMRMRDDIM>& dimsSrc, const std::vector<ISMRMRDDIM>& dimsDst, std::vector<size_t>& order);
@@ -57,6 +64,7 @@ public:
     std::string printISMRMRDDimensionSize(const std::vector<size_t>& sizes);
 
     bool setDataArray(hoNDArray<T>& data);
+    bool setDataArray(hoNDArray<T>& data, hoNDArray<real_value_type>& time_stamp, hoNDArray<real_value_type>& physio_time_stamp);
     bool setRefArray(hoNDArray<T>& ref);
 
     // -------- these member variables are made as public ------------- //
@@ -107,9 +115,25 @@ public:
     std::vector< DimensionRecordType > dataDimStartingIndexes_;
 
     // ----------------------------------
-    // reconstruction results, complex images, 8D array [RO E1 SLC E2 CON PHS REP SET]
+    // reconstruction results, complex images, 10D array [RO E1 CHA SLC E2 CON PHS REP SET AVE]
     // ----------------------------------
     hoNDArray<T> res_;
+    // optional time stamps for the recon results, in the unit of seconds, 10D array [1 1 1 SLC E2 CON PHS REP SET AVE]
+    // if not set, the stored image header will be used for time stamps
+    hoNDArray<real_value_type> res_time_stamp_;
+    hoNDArray<real_value_type> res_physio_time_stamp_;
+
+    hoNDArray<T> res_second_;
+    hoNDArray<real_value_type> res_time_stamp_second_;
+    hoNDArray<real_value_type> res_physio_time_stamp_second_;
+
+    // gfactor, not all reconstruction fills the gfactor
+    // 10D array [RO E1 CHA SLC E2 CON PHS REP SET AVE]
+    hoNDArray<T> gfactor_;
+
+    // warp-around map, not all reconstruction fills the gfactor
+    // 10D array [RO E1 2 SLC E2 CON PHS REP SET AVE]
+    hoNDArray<T> wrap_around_map_;
 
     // ----------------------------------
     // debug and timing
@@ -127,20 +151,26 @@ public:
     // debug folder
     std::string debugFolder_;
 
-protected:
-
     // ----------------------------------
     // input data array
     // ----------------------------------
-    // image data, [RO E1 CHA SLC E2 CON PHS REP SET SEG]
+    // image data, 11D [RO E1 CHA SLC E2 CON PHS REP SET SEG AVE]
     hoNDArray<T>* data_;
+    // time stamp, 11D [1 E1 1 SLC E2 CON PHS REP SET SEG AVE]
+    // these are set with data array
+    hoNDArray<real_value_type>* time_stamp_;
+    hoNDArray<real_value_type>* physio_time_stamp_;
 
-    // reference calibration, [RO E1 CHA SLC E2 CON PHS REP SET SEG]
+    // reference calibration, 11D [RO E1 CHA SLC E2 CON PHS REP SET SEG AVE]
     hoNDArray<T>* ref_;
 
+protected:
+
     // internal helper memory allocated for computation
     hoNDArray<T> dataCurr_;
     hoNDArray<T> refCurr_;
+    hoNDArray<T> gfactorCurr_;
+    hoNDArray<T> wrap_around_mapCurr_;
 
     // size of dimensions for image data
     DimensionRecordType RO_;
@@ -153,6 +183,7 @@ protected:
     DimensionRecordType REP_;
     DimensionRecordType SET_;
     DimensionRecordType SEG_;
+    DimensionRecordType AVE_;
 
     // size of dimensions for ref data
     DimensionRecordType RO_ref_;
@@ -165,6 +196,7 @@ protected:
     DimensionRecordType REP_ref_;
     DimensionRecordType SET_ref_;
     DimensionRecordType SEG_ref_;
+    DimensionRecordType AVE_ref_;
 
     // expected dimensions for results
     std::vector<ISMRMRDDIM> dimsRes_;
@@ -175,7 +207,7 @@ protected:
 
 template <typename T> 
 gtPlusISMRMRDReconWorkFlow<T>::gtPlusISMRMRDReconWorkFlow() 
-: data_(NULL), ref_(NULL), worker_(NULL), workOrder_(NULL), noise_(NULL), noiseBW_(1.0), receriverBWRatio_(1.0), overSamplingRatioRO_(1.0), ADCSamplingTimeinSecond_(1.0) , performTiming_(false)
+: data_(NULL), time_stamp_(NULL), physio_time_stamp_(NULL), ref_(NULL), worker_(NULL), workOrder_(NULL), noise_(NULL), noiseBW_(1.0), receriverBWRatio_(1.0), overSamplingRatioRO_(1.0), ADCSamplingTimeinSecond_(1.0) , performTiming_(false)
 {
     RO_.first = DIM_ReadOut;
     RO_.second = 1;
@@ -207,6 +239,9 @@ gtPlusISMRMRDReconWorkFlow<T>::gtPlusISMRMRDReconWorkFlow()
     SEG_.first = DIM_Segment;
     SEG_.second = 1;
 
+    AVE_.first = DIM_Average;
+    AVE_.second = 1;
+
     RO_ref_.first = DIM_ReadOut;
     RO_ref_.second = 1;
 
@@ -237,7 +272,10 @@ gtPlusISMRMRDReconWorkFlow<T>::gtPlusISMRMRDReconWorkFlow()
     SEG_ref_.first = DIM_Segment;
     SEG_ref_.second = 1;
 
-    dimsRes_.resize(9);
+    AVE_ref_.first = DIM_Average;
+    AVE_ref_.second = 1;
+
+    dimsRes_.resize(GTPLUS_RECON_IMAGE_DIM_NUM);
     dimsRes_[0] = DIM_ReadOut;
     dimsRes_[1] = DIM_Encoding1;
     dimsRes_[2] = DIM_Channel;
@@ -247,8 +285,9 @@ gtPlusISMRMRDReconWorkFlow<T>::gtPlusISMRMRDReconWorkFlow()
     dimsRes_[6] = DIM_Phase;
     dimsRes_[7] = DIM_Repetition;
     dimsRes_[8] = DIM_Set;
+    dimsRes_[9] = DIM_Average;
 
-    dataDimStartingIndexes_.resize(10);
+    dataDimStartingIndexes_.resize(GTPLUS_RECON_KSPACE_DIM_NUM);
     dataDimStartingIndexes_[0] = DimensionRecordType(DIM_ReadOut, 0);
     dataDimStartingIndexes_[1] = DimensionRecordType(DIM_Encoding1, 0);
     dataDimStartingIndexes_[2] = DimensionRecordType(DIM_Channel, 0);
@@ -259,6 +298,7 @@ gtPlusISMRMRDReconWorkFlow<T>::gtPlusISMRMRDReconWorkFlow()
     dataDimStartingIndexes_[7] = DimensionRecordType(DIM_Repetition, 0);
     dataDimStartingIndexes_[8] = DimensionRecordType(DIM_Set, 0);
     dataDimStartingIndexes_[9] = DimensionRecordType(DIM_Segment, 0);
+    dataDimStartingIndexes_[10] = DimensionRecordType(DIM_Average, 0);
 
     gt_timer1_.set_timing_in_destruction(false);
     gt_timer2_.set_timing_in_destruction(false);
@@ -280,6 +320,7 @@ gtPlusISMRMRDReconWorkFlow<T>::gtPlusISMRMRDReconWorkFlow(gtPlusReconWorker<T>&
     REP_.second = 1;
     SET_.second = 1;
     SEG_.second = 1;
+    AVE_.second = 1;
 
     RO_ref_.second = 1;
     E1_ref_.second = 1;
@@ -291,8 +332,9 @@ gtPlusISMRMRDReconWorkFlow<T>::gtPlusISMRMRDReconWorkFlow(gtPlusReconWorker<T>&
     REP_ref_.second = 1;
     SET_ref_.second = 1;
     SEG_ref_.second = 1;
+    AVE_ref_.second = 1;
 
-    dimsRes_.resize(9);
+    dimsRes_.resize(GTPLUS_RECON_IMAGE_DIM_NUM);
     dimsRes_[0] = DIM_ReadOut;
     dimsRes_[1] = DIM_Encoding1;
     dimsRes_[2] = DIM_Channel;
@@ -302,8 +344,9 @@ gtPlusISMRMRDReconWorkFlow<T>::gtPlusISMRMRDReconWorkFlow(gtPlusReconWorker<T>&
     dimsRes_[6] = DIM_Phase;
     dimsRes_[7] = DIM_Repetition;
     dimsRes_[8] = DIM_Set;
+    dimsRes_[9] = DIM_Average;
 
-    dataDimStartingIndexes_.resize(10);
+    dataDimStartingIndexes_.resize(GTPLUS_RECON_KSPACE_DIM_NUM);
     dataDimStartingIndexes_[0] = DimensionRecordType(DIM_ReadOut, 0);
     dataDimStartingIndexes_[1] = DimensionRecordType(DIM_Encoding1, 0);
     dataDimStartingIndexes_[2] = DimensionRecordType(DIM_Channel, 0);
@@ -314,6 +357,7 @@ gtPlusISMRMRDReconWorkFlow<T>::gtPlusISMRMRDReconWorkFlow(gtPlusReconWorker<T>&
     dataDimStartingIndexes_[7] = DimensionRecordType(DIM_Repetition, 0);
     dataDimStartingIndexes_[8] = DimensionRecordType(DIM_Set, 0);
     dataDimStartingIndexes_[9] = DimensionRecordType(DIM_Segment, 0);
+    dataDimStartingIndexes_[10] = DimensionRecordType(DIM_Average, 0);
 
     gt_timer1_.set_timing_in_destruction(false);
     gt_timer2_.set_timing_in_destruction(false);
@@ -344,7 +388,7 @@ void gtPlusISMRMRDReconWorkFlow<T>::printInfo(std::ostream& os)
 
 template <typename T> 
 inline bool gtPlusISMRMRDReconWorkFlow<T>::
-ismrmrdDimIndex9D(std::vector<size_t>& ind, const ISMRMRDDIM& dim, size_t value)
+ismrmrdDimIndex10D(std::vector<size_t>& ind, const ISMRMRDDIM& dim, size_t value)
 {
     GADGET_CHECK_RETURN_FALSE(ind.size()>(size_t)(dim-DIM_ReadOut));
     ind[dim-DIM_ReadOut] = value;
@@ -435,6 +479,10 @@ printISMRMRDDimensions(const std::vector<ISMRMRDDIM>& dims)
                 os << "Segment ";
             break;
 
+            case DIM_Average:
+                os << "Average ";
+            break;
+
             default:
                 os << " Other";
         }
@@ -487,6 +535,36 @@ bool gtPlusISMRMRDReconWorkFlow<T>::setDataArray(hoNDArray<T>& data)
         REP_.second = data.get_size(7);
         SET_.second = data.get_size(8);
         SEG_.second = data.get_size(9);
+        AVE_.second = data.get_size(10);
+    }
+    catch(...)
+    {
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T> 
+bool gtPlusISMRMRDReconWorkFlow<T>::setDataArray(hoNDArray<T>& data, hoNDArray<real_value_type>& time_stamp, hoNDArray<real_value_type>& physio_time_stamp)
+{
+    try
+    {
+        data_ = &data;
+        time_stamp_ = &time_stamp;
+        physio_time_stamp_ = &physio_time_stamp;
+
+        RO_.second = data.get_size(0);
+        E1_.second = data.get_size(1);
+        CHA_.second = data.get_size(2);
+        SLC_.second = data.get_size(3);
+        E2_.second = data.get_size(4);
+        CON_.second = data.get_size(5);
+        PHS_.second = data.get_size(6);
+        REP_.second = data.get_size(7);
+        SET_.second = data.get_size(8);
+        SEG_.second = data.get_size(9);
+        AVE_.second = data.get_size(10);
     }
     catch(...)
     {
@@ -513,6 +591,7 @@ bool gtPlusISMRMRDReconWorkFlow<T>::setRefArray(hoNDArray<T>& ref)
         REP_ref_.second    = ref.get_size(7);
         SET_ref_.second    = ref.get_size(8);
         SEG_ref_.second    = ref.get_size(9);
+        AVE_ref_.second    = ref.get_size(10);
     }
     catch(...)
     {
diff --git a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorkFlowCartesian.h b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorkFlowCartesian.h
index 0265a5f..98cd61a 100644
--- a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorkFlowCartesian.h
+++ b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorkFlowCartesian.h
@@ -16,6 +16,7 @@ public:
 
     typedef gtPlusISMRMRDReconWorkFlow<T> BaseClass;
     typedef typename BaseClass::DimensionRecordType DimensionRecordType;
+    typedef typename BaseClass::real_value_type real_value_type;
 
     gtPlusISMRMRDReconWorkFlowCartesian();
     virtual ~gtPlusISMRMRDReconWorkFlowCartesian();
@@ -25,6 +26,7 @@ public:
     virtual bool preProcessing();
 
     virtual bool postProcessing();
+    virtual bool postProcessing(hoNDArray<T>& res, bool process_gfactor=true, bool process_wrap_around_map=true);
 
     virtual bool configureWorkOrder(const std::vector<ISMRMRDDIM>& dims);
 
@@ -38,7 +40,11 @@ public:
     virtual bool predictDimensions() = 0;
 
     using BaseClass::data_;
+    using BaseClass::time_stamp_;
+    using BaseClass::physio_time_stamp_;
     using BaseClass::ref_;
+    using BaseClass::gfactor_;
+    using BaseClass::wrap_around_map_;
     using BaseClass::noise_;
     using BaseClass::noiseBW_;
     using BaseClass::receriverBWRatio_;
@@ -55,6 +61,11 @@ public:
     using BaseClass::reconFOV_E2_;
 
     using BaseClass::res_;
+    using BaseClass::res_second_;
+    using BaseClass::res_time_stamp_;
+    using BaseClass::res_physio_time_stamp_;
+    using BaseClass::res_time_stamp_second_;
+    using BaseClass::res_physio_time_stamp_second_;
 
     using BaseClass::worker_;
     using BaseClass::workOrder_;
@@ -94,6 +105,8 @@ protected:
 
     using BaseClass::dataCurr_;
     using BaseClass::refCurr_;
+    using BaseClass::gfactorCurr_;
+    using BaseClass::wrap_around_mapCurr_;
 
     using BaseClass::RO_;
     using BaseClass::E1_;
@@ -105,6 +118,7 @@ protected:
     using BaseClass::REP_;
     using BaseClass::SET_;
     using BaseClass::SEG_;
+    using BaseClass::AVE_;
 
     using BaseClass::RO_ref_;
     using BaseClass::E1_ref_;
@@ -116,8 +130,33 @@ protected:
     using BaseClass::REP_ref_;
     using BaseClass::SET_ref_;
     using BaseClass::SEG_ref_;
+    using BaseClass::AVE_ref_;
 
     using BaseClass::gtPlus_util_;
+
+    /// permute the array to the fixed order
+    template <typename T2> 
+    bool permuteArrayOrder(hoNDArray<T2>& data, std::vector<size_t>& order)
+    {
+        try
+        {
+            boost::shared_ptr< hoNDArray<T2> > data_permuted = Gadgetron::permute(&data, &order);
+            data.reshape(data_permuted->get_dimensions());
+            memcpy(data.begin(), data_permuted->begin(), data_permuted->get_number_of_bytes());
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors in gtPlusISMRMRDReconWorkFlowCartesian<T>::permuteArrayOrder(hoNDArray<T>& data, const std::vector<int>& order) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    /// copy workOrder results to workflow
+    bool copyReconResultsSecond(size_t dim5, size_t dim6, size_t dim7, size_t dim8, size_t dim9);
+    bool copyGFactor(size_t dim5, size_t dim6, size_t dim7, size_t dim8, size_t dim9, bool gfactor_needed);
+    bool copyWrapAroundMap(size_t dim5, size_t dim6, size_t dim7, size_t dim8, size_t dim9, bool wrap_around_map_needed);
 };
 
 template <typename T> 
@@ -149,8 +188,8 @@ void gtPlusISMRMRDReconWorkFlowCartesian<T>::printInfo(std::ostream& os)
     os << "b) Perform the zero-padding resize if required" << endl;
     os << endl;
     os << "Data buffers are named to reflect the typical nature of MR acquisition" << endl;
-    os << "data: image kspace data, 10D array [RO E1 CHA SLC E2 CON PHS REP SET SEG]" << endl;
-    os << "ref: calibration data, 10D array [RO E1 CHA SLC E2 CON PHS REP SET SEG]" << endl;
+    os << "data: image kspace data, 10D array [RO E1 CHA SLC E2 CON PHS REP SET SEG AVE]" << endl;
+    os << "ref: calibration data, 10D array [RO E1 CHA SLC E2 CON PHS REP SET SEG AVE]" << endl;
     os << "----------------------------------------------------------" << endl;
 }
 
@@ -188,7 +227,7 @@ preProcessing()
         if ( overSamplingRatioRO_ > 1.0 )
         {
             GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->ifft1c(*data_));
-            GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<T>().cutpad2D(*data_, data_->get_size(0)/overSamplingRatioRO_, data_->get_size(1), dataCurr_));
+            GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<T>().cutpad2D(*data_, (size_t)(data_->get_size(0)/overSamplingRatioRO_), data_->get_size(1), dataCurr_));
             GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->fft1c(dataCurr_));
             *data_ = dataCurr_;
             RO_.second = data_->get_size(0);
@@ -198,7 +237,7 @@ preProcessing()
             if ( ref_ != NULL && ref_remove_oversampling_RO_ )
             {
                 GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->ifft1c(*ref_));
-                GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<T>().cutpad2D(*ref_, ref_->get_size(0)/overSamplingRatioRO_, ref_->get_size(1), refCurr_));
+                GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<T>().cutpad2D(*ref_, (size_t)(ref_->get_size(0)/overSamplingRatioRO_), ref_->get_size(1), refCurr_));
                 GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->fft1c(refCurr_));
                 *ref_ = refCurr_;
                 RO_ref_.second = ref_->get_size(0);
@@ -208,8 +247,8 @@ preProcessing()
 
             if ( workOrder_->start_RO_>=0 && workOrder_->end_RO_>=0 )
             {
-                workOrder_->start_RO_ /= overSamplingRatioRO_;
-                workOrder_->end_RO_ /= overSamplingRatioRO_;
+                workOrder_->start_RO_ = (int)(workOrder_->start_RO_/overSamplingRatioRO_);
+                workOrder_->end_RO_ = (int)(workOrder_->end_RO_/overSamplingRatioRO_);
             }
         }
 
@@ -230,6 +269,33 @@ preProcessing()
                 GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, *ref_, "ref_noiseprewhitenned");
             }
         }
+
+        // if asymmetric echo is used, set the corresponding RO regions as 0
+        size_t RO = data_->get_size(0);
+        if ( !( workOrder_->start_RO_<0 || workOrder_->end_RO_<0 || (workOrder_->end_RO_-workOrder_->start_RO_+1==RO) ) )
+        {
+            size_t num = data_->get_number_of_elements() / RO;
+            long long n;
+
+            long long startRO = workOrder_->start_RO_;
+            long long endRO = workOrder_->end_RO_;
+            T* pData = data_->begin();
+
+            #pragma omp parallel for default(none) private(n) shared(num, RO, startRO, endRO, pData)
+            for ( n=0; n<(long long)num; n++ )
+            {
+                if ( startRO > 0 )
+                {
+                    memset(pData+n*RO, 0, startRO*sizeof(T) );
+                }
+                else
+                {
+                    memset(pData+n*RO+endRO+1, 0, (RO-endRO)*sizeof(T) );
+                }
+            }
+
+            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, *data_, "incomingKSpace_RO_setzeros");
+        }
     }
     catch(...)
     {
@@ -246,8 +312,11 @@ convertToReconSpace2D(hoNDArray<T>& input_, hoNDArray<T>& output_, bool isKSpace
 {
     try
     {
-        size_t RO = res_.get_size(0);
-        size_t E1 = res_.get_size(1);
+        size_t RO = data_->get_size(0);
+        size_t E1 = data_->get_size(1);
+
+        size_t inputRO = input_.get_size(0);
+        size_t inputE1 = input_.get_size(1);
 
         output_ = input_;
 
@@ -269,7 +338,7 @@ convertToReconSpace2D(hoNDArray<T>& input_, hoNDArray<T>& output_, bool isKSpace
             if ( encodingFOV_E1_ > reconFOV_E1_ )
             {
                 float spacingE1 = reconFOV_E1_/reconSizeE1_;
-                encodingE1 = encodingFOV_E1_/spacingE1;
+                encodingE1 = (size_t)std::floor(encodingFOV_E1_/spacingE1+0.5);
             }
 
             hoNDArray<T>* pSrc = &input_;
@@ -279,7 +348,7 @@ convertToReconSpace2D(hoNDArray<T>& input_, hoNDArray<T>& output_, bool isKSpace
             hoNDArray<T> buffer2D;
 
             // adjust E1
-            if ( encodingE1 > E1 )
+            if ( encodingE1>E1 && encodingE1>inputE1 )
             {
                 if ( isKSpace )
                 {
@@ -290,6 +359,8 @@ convertToReconSpace2D(hoNDArray<T>& input_, hoNDArray<T>& output_, bool isKSpace
                     GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().zpadResize2D(*pSrc, RO, encodingE1, *pDst));
                 }
 
+                GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, *pDst, "complexIm_zpadResize2D_enlarged");
+
                 isKSpace = false;
                 pTmp = pSrc; pSrc = pDst; pDst = pTmp;
             }
@@ -307,6 +378,8 @@ convertToReconSpace2D(hoNDArray<T>& input_, hoNDArray<T>& output_, bool isKSpace
 
                 GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->ifft2c(*pDst));
 
+                GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, *pDst, "complexIm_zpadResize2D_cut");
+
                 isKSpace = false;
                 pTmp = pSrc; pSrc = pDst; pDst = pTmp;
             }
@@ -345,9 +418,15 @@ convertToReconSpace2D(hoNDArray<T>& input_, hoNDArray<T>& output_, bool isKSpace
             }
 
             // final cut
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->fft2c(*pSrc, buffer2D));
-            GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().cutpad2D(buffer2D, reconSizeRO_, reconSizeE1_, *pDst));
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->ifft2c(*pDst));
+            if ( isKSpace )
+            {
+                GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->ifft2c(*pSrc, buffer2D));
+                GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().cutpad2D(buffer2D, reconSizeRO_, reconSizeE1_, *pDst));
+            }
+            else
+            {
+                GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().cutpad2D(*pSrc, reconSizeRO_, reconSizeE1_, *pDst));
+            }
 
             if ( pDst != &output_ )
             {
@@ -394,14 +473,14 @@ convertToReconSpace3D(hoNDArray<T>& input_, hoNDArray<T>& output_, bool isKSpace
             if ( encodingFOV_E1_ > reconFOV_E1_ )
             {
                 float spacingE1 = reconFOV_E1_/reconSizeE1_;
-                encodingE1 = std::floor(encodingFOV_E1_/spacingE1+0.5);
+                encodingE1 = (size_t)std::floor(encodingFOV_E1_/spacingE1+0.5);
             }
 
             size_t encodingE2 = reconSizeE2_;
             if ( encodingFOV_E2_ > reconFOV_E2_ )
             {
                 float spacingE2 = reconFOV_E2_/reconSizeE2_;
-                encodingE2 = std::floor(encodingFOV_E2_/spacingE2+0.5);
+                encodingE2 = (size_t)std::floor(encodingFOV_E2_/spacingE2+0.5);
             }
 
             hoNDArray<T>* pSrc = &input_;
@@ -542,46 +621,61 @@ convertToReconSpace3D(hoNDArray<T>& input_, hoNDArray<T>& output_, bool isKSpace
 
 template <typename T> 
 bool gtPlusISMRMRDReconWorkFlowCartesian<T>::
-postProcessing()
+postProcessing(hoNDArray<T>& res, bool process_gfactor, bool process_wrap_around_map)
 {
     try
     {
-        GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, res_, "complexIm_afterRecon");
-
-        size_t RO = res_.get_size(0);
-        size_t E1 = res_.get_size(1);
-        size_t E2 = res_.get_size(4);
+        size_t RO = res.get_size(0);
+        size_t E1 = res.get_size(1);
+        size_t E2 = res.get_size(4);
 
-        if ( E2_.second > 1 )
+        // whether to process gfactor
+        bool has_gfactor = false;
+        if ( (gfactor_.get_size(0)==RO) && (gfactor_.get_size(1)==E1) )
         {
-            // dataCurr_ = res_;
+            has_gfactor = true;
+        }
 
-            // need to permute the matrix order
-            //size_t NDim = dataCurr_.get_number_of_dimensions();
-            //std::vector<size_t> order(NDim, 1);
+        if ( !process_gfactor )
+        {
+            has_gfactor = false;
+        }
 
-            //size_t ii;
-            //for ( ii=0; ii<NDim; ii++ )
-            //{
-            //    order[ii] = ii;
-            //}
+        // whehter to process wrap_around map
+        bool has_wrap_around = false;
+        if ( (wrap_around_map_.get_size(0)==RO) && (wrap_around_map_.get_size(1)==E1) )
+        {
+            has_wrap_around = true;
+        }
 
-            //order[0] = 0;
-            //order[1] = 1;
-            //order[2] = 4;
-            //order[3] = 2;
-            //order[4] = 3;
+        if ( !process_wrap_around_map )
+        {
+            has_wrap_around = false;
+        }
 
+        if ( E2_.second > 1 )
+        {
             GADGET_CHECK_PERFORM(performTiming_, gt_timer1_.start("postProcessing - permute res array ... "));
             // boost::shared_ptr< hoNDArray<T> > data_permuted = Gadgetron::permute(const_cast<hoNDArray<T>*>(&dataCurr_), &order);
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::permuteE2To3rdDimension(res_, dataCurr_));
+            GADGET_CHECK_RETURN_FALSE(Gadgetron::permuteE2To3rdDimension(res, dataCurr_));
+
+            if ( has_gfactor )
+            {
+                GADGET_CHECK_RETURN_FALSE(Gadgetron::permuteE2To3rdDimension(gfactor_, gfactorCurr_));
+            }
+
+            if ( has_wrap_around )
+            {
+                GADGET_CHECK_RETURN_FALSE(Gadgetron::permuteE2To3rdDimension(wrap_around_map_, wrap_around_mapCurr_));
+            }
+
             GADGET_CHECK_PERFORM(performTiming_, gt_timer1_.stop());
 
             GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, dataCurr_, "data_permuted");
 
             // dataCurr_ = *data_permuted;
 
-            res_.reshape(dataCurr_.get_dimensions());
+            res.reshape(dataCurr_.get_dimensions());
 
             bool inKSpace = false;
 
@@ -589,8 +683,8 @@ postProcessing()
                     && workOrder_->filterROE1E2_.get_size(1)==E1 
                     && workOrder_->filterROE1E2_.get_size(2)==E2 )
             {
-                GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->fft3c(dataCurr_, res_));
-                GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<T>().kspace3DfilterROE1E2(res_, workOrder_->filterROE1E2_, dataCurr_));
+                GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->fft3c(dataCurr_, res));
+                GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<T>().kspace3DfilterROE1E2(res, workOrder_->filterROE1E2_, dataCurr_));
                 inKSpace = true;
             }
             else if ( (workOrder_->filterRO_.get_number_of_elements() == RO) 
@@ -598,13 +692,13 @@ postProcessing()
                         && (workOrder_->filterE2_.get_number_of_elements() == E2) )
             {
                 GADGET_CHECK_PERFORM(performTiming_, gt_timer1_.start("postProcessing - fft3c ... "));
-                GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->fft3c(dataCurr_, res_));
+                GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->fft3c(dataCurr_, res));
                 GADGET_CHECK_PERFORM(performTiming_, gt_timer1_.stop());
 
-                GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, res_, "kspace_beforefiltered");
+                GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, res, "kspace_beforefiltered");
 
                 GADGET_CHECK_PERFORM(performTiming_, gt_timer1_.start("postProcessing - 3D kspace filter ... "));
-                GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<T>().kspace3DfilterROE1E2(res_, workOrder_->filterRO_, workOrder_->filterE1_, workOrder_->filterE2_, dataCurr_));
+                GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<T>().kspace3DfilterROE1E2(res, workOrder_->filterRO_, workOrder_->filterE1_, workOrder_->filterE2_, dataCurr_));
                 GADGET_CHECK_PERFORM(performTiming_, gt_timer1_.stop());
 
                 GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, dataCurr_, "kspace_afterfiltered");
@@ -612,7 +706,7 @@ postProcessing()
             }
             else
             {
-                hoNDArray<T>* pSrc = &res_;
+                hoNDArray<T>* pSrc = &res;
                 hoNDArray<T>* pDst = &dataCurr_;
 
                 GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->fft3c(*pDst, *pSrc));
@@ -649,7 +743,7 @@ postProcessing()
                 }
                 else
                 {
-                    dataCurr_ = res_;
+                    dataCurr_ = res;
                 }
 
                 inKSpace = true;
@@ -666,12 +760,12 @@ postProcessing()
             }
             else
             {
-                GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, res_, "complexIm_filtered");
+                GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, res, "complexIm_filtered");
             }
 
-            GADGET_CHECK_RETURN_FALSE(convertToReconSpace3D(dataCurr_, res_, inKSpace));
+            GADGET_CHECK_RETURN_FALSE(convertToReconSpace3D(dataCurr_, res, inKSpace));
 
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::permuteE2To5thDimension(res_, dataCurr_));
+            GADGET_CHECK_RETURN_FALSE(Gadgetron::permuteE2To5thDimension(res, dataCurr_));
 
             //order[0] = 0;
             //order[1] = 1;
@@ -679,44 +773,68 @@ postProcessing()
             //order[3] = 4;
             //order[4] = 2;
 
-            //data_permuted = Gadgetron::permute(const_cast<hoNDArray<T>*>(&res_), &order);
-            //res_ = *data_permuted;
+            //data_permuted = Gadgetron::permute(const_cast<hoNDArray<T>*>(&res), &order);
+            //res = *data_permuted;
+
+            res.reshape(dataCurr_.get_dimensions());
+            memcpy(res.begin(), dataCurr_.begin(), res.get_number_of_bytes());
+
+            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, res, "complexIm_zpadResize3D");
+
+            if ( has_gfactor )
+            {
+                GADGET_CHECK_RETURN_FALSE(convertToReconSpace3D(gfactorCurr_, gfactor_, false));
+                GADGET_CHECK_RETURN_FALSE(Gadgetron::permuteE2To5thDimension(gfactor_, gfactorCurr_));
+
+                gfactor_.reshape(gfactorCurr_.get_dimensions());
+                memcpy(gfactor_.begin(), gfactorCurr_.begin(), gfactor_.get_number_of_bytes());
+
+                GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, gfactor_, "gfactor_zpadResize3D");
+            }
 
-            res_.reshape(dataCurr_.get_dimensions());
-            memcpy(res_.begin(), dataCurr_.begin(), res_.get_number_of_bytes());
+            if ( has_wrap_around )
+            {
+                GADGET_CHECK_RETURN_FALSE(convertToReconSpace3D(wrap_around_mapCurr_, wrap_around_map_, false));
+                GADGET_CHECK_RETURN_FALSE(Gadgetron::permuteE2To5thDimension(wrap_around_map_, wrap_around_mapCurr_));
 
-            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, res_, "complexIm_zpadResize3D");
+                wrap_around_map_.reshape(wrap_around_mapCurr_.get_dimensions());
+                memcpy(wrap_around_map_.begin(), wrap_around_mapCurr_.begin(), wrap_around_map_.get_number_of_bytes());
+
+                GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, wrap_around_map_, "wrap_around_map_zpadResize3D");
+            }
         }
         else
         {
-            dataCurr_ = res_;
+            dataCurr_ = res;
             bool inKSpace = false;
 
+            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, dataCurr_, "complexIm_before_filtered");
+
             if ( workOrder_->filterROE1_.get_size(0)==RO && workOrder_->filterROE1_.get_size(1)==E1 )
             {
-                GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->fft2c(dataCurr_, res_));
-                GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<T>().kspacefilterROE1(res_, workOrder_->filterROE1_, dataCurr_));
+                GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->fft2c(dataCurr_, res));
+                GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<T>().kspacefilterROE1(res, workOrder_->filterROE1_, dataCurr_));
                 inKSpace = true;
             }
             else if ( (workOrder_->filterRO_.get_number_of_elements() == RO) && (workOrder_->filterE1_.get_number_of_elements() == E1) )
             {
-                GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->fft2c(dataCurr_, res_));
-                GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<T>().kspacefilterROE1(res_, workOrder_->filterRO_, workOrder_->filterE1_, dataCurr_));
+                GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->fft2c(dataCurr_, res));
+                GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<T>().kspacefilterROE1(res, workOrder_->filterRO_, workOrder_->filterE1_, dataCurr_));
                 inKSpace = true;
             }
             else
             {
                 if ( (workOrder_->filterRO_.get_number_of_elements() == RO) && (workOrder_->filterE1_.get_number_of_elements() != E1) )
                 {
-                    GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->fft2c(dataCurr_, res_));
-                    GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<T>().kspacefilterRO(res_, workOrder_->filterRO_, dataCurr_));
+                    GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->fft2c(dataCurr_, res));
+                    GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<T>().kspacefilterRO(res, workOrder_->filterRO_, dataCurr_));
                     inKSpace = true;
                 }
 
                 if ( (workOrder_->filterRO_.get_number_of_elements() != RO) && (workOrder_->filterE1_.get_number_of_elements() == E1) )
                 {
-                    GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->fft2c(dataCurr_, res_));
-                    GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<T>().kspacefilterE1(res_, workOrder_->filterE1_, dataCurr_));
+                    GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->fft2c(dataCurr_, res));
+                    GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<T>().kspacefilterE1(res, workOrder_->filterE1_, dataCurr_));
                     inKSpace = true;
                 }
             }
@@ -727,17 +845,57 @@ postProcessing()
                 {
                     hoNDArray<T> Im(dataCurr_);
                     GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->ifft2c(Im));
-                    GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, Im, "complexIm_filtered");
+                    GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, Im, "complexIm_after_filtered");
                 }
             }
             else
             {
-                GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, res_, "complexIm_filtered");
+                GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, res, "complexIm_after_filtered");
+            }
+
+            GADGET_CHECK_RETURN_FALSE(convertToReconSpace2D(dataCurr_, res, inKSpace));
+
+            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, res, "complexIm_zpadResize2D");
+
+            if ( has_gfactor )
+            {
+                gfactorCurr_ = gfactor_;
+                GADGET_CHECK_RETURN_FALSE(convertToReconSpace2D(gfactorCurr_, gfactor_, false));
+
+                GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, gfactor_, "gfactor_zpadResize2D");
             }
 
-            GADGET_CHECK_RETURN_FALSE(convertToReconSpace2D(dataCurr_, res_, inKSpace));
+            if ( has_wrap_around )
+            {
+                wrap_around_mapCurr_ = wrap_around_map_;
+                GADGET_CHECK_RETURN_FALSE(convertToReconSpace2D(wrap_around_mapCurr_, wrap_around_map_, false));
+
+                GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, wrap_around_map_, "wrap_around_map_zpadResize2D");
+            }
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Errors in gtPlusISMRMRDReconWorkFlowCartesian<T>::postProcessing(res) ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T> 
+bool gtPlusISMRMRDReconWorkFlowCartesian<T>::
+postProcessing()
+{
+    try
+    {
+        GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, res_, "complexIm_afterRecon");
+        GADGET_CHECK_RETURN_FALSE(this->postProcessing(res_, true, true));
 
-            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, res_, "complexIm_zpadResize2D");
+        if ( this->res_second_.get_number_of_elements() > 0 )
+        {
+            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, res_second_, "complexImSecond_afterRecon");
+            GADGET_CHECK_RETURN_FALSE(this->postProcessing(res_second_, false, false));
         }
     }
     catch(...)
@@ -788,6 +946,9 @@ configureWorkOrder(const std::vector<ISMRMRDDIM>& dims)
         GADGET_CONDITION_MSG(!debugFolder_.empty(), "Recon size       : " << this->printISMRMRDDimensionSize(dimSize));
         GADGET_CONDITION_MSG(!debugFolder_.empty(), "Recon ref size   : " << this->printISMRMRDDimensionSize(dimSizeRef));
 
+        bool gfactor_needed = workOrder_->gfactor_needed_;
+        bool wrap_around_map_needed = workOrder_->wrap_around_map_needed_;
+
         // recon workOrder size
         std::vector<size_t> dimReconSize(5);
         dimReconSize[0] = dimSize[0];
@@ -796,6 +957,10 @@ configureWorkOrder(const std::vector<ISMRMRDDIM>& dims)
         dimReconSize[3] = dimSize[3];
         dimReconSize[4] = dimSize[4];
 
+        std::vector<size_t> dimReconTimeStampSize(dimReconSize);
+        dimReconTimeStampSize[0] = 1; // RO = 1
+        dimReconTimeStampSize[2] = 1; // CHA = 1
+
         std::vector<size_t> dimReconSizeRef(5);
         dimReconSizeRef[0] = dimSizeRef[0];
         dimReconSizeRef[1] = dimSizeRef[1];
@@ -814,8 +979,50 @@ configureWorkOrder(const std::vector<ISMRMRDDIM>& dims)
         size_t num_channels_res = workOrder_->num_channels_res_;
 
         std::vector<size_t> dimResSize(dimSize);
+
+        if ( gfactor_needed )
+        {
+            dimResSize[indChannelDim] = 1;
+            gfactor_.create(&dimResSize);
+        }
+
+        if ( wrap_around_map_needed )
+        {
+            if ( workOrder_->acceFactorE2_ > 1 ) // 3D acquisition
+            {
+                dimResSize[indChannelDim] = 3;
+            }
+            else
+            {
+                dimResSize[indChannelDim] = 2;
+            }
+
+            wrap_around_map_.create(&dimResSize);
+        }
+
         dimResSize[indChannelDim] = num_channels_res;
         res_.create(&dimResSize);
+        Gadgetron::clear(res_);
+
+        res_second_.create(&dimResSize);
+        Gadgetron::clear(res_second_);
+
+        std::vector<size_t> dimReconResTimeStampSize(dimResSize);
+        dimReconResTimeStampSize[0] = 1;
+        dimReconResTimeStampSize[1] = 1;
+        dimReconResTimeStampSize[2] = 1;
+
+        res_time_stamp_.create(dimReconResTimeStampSize);
+        Gadgetron::fill(res_time_stamp_, (real_value_type)(-1) );
+
+        res_physio_time_stamp_.create(dimReconResTimeStampSize);
+        Gadgetron::fill(res_physio_time_stamp_, (real_value_type)(-1) );
+
+        res_time_stamp_second_.create(dimReconResTimeStampSize);
+        Gadgetron::fill(res_time_stamp_second_, (real_value_type)(-1) );
+
+        res_physio_time_stamp_second_.create(dimReconResTimeStampSize);
+        Gadgetron::fill(res_physio_time_stamp_second_, (real_value_type)(-1) );
 
         std::vector<ISMRMRDDIM> dimsRes(dims);
 
@@ -828,234 +1035,386 @@ configureWorkOrder(const std::vector<ISMRMRDDIM>& dims)
         {
             gt_exporter_.exportArrayComplex(*data_, debugFolder_ + "data_");
             gt_exporter_.exportArrayComplex(*ref_, debugFolder_ + "ref_");
+
+            if ( time_stamp_ != NULL )
+            {
+                gt_exporter_.exportArray(*time_stamp_, debugFolder_ + "time_stamp_");
+            }
+
+            if ( physio_time_stamp_ != NULL )
+            {
+                gt_exporter_.exportArray(*physio_time_stamp_, debugFolder_ + "physio_time_stamp_");
+            }
         }
 
         bool workFlow_use_BufferedKernel_ = workOrder_->workFlow_use_BufferedKernel_;
 
+        bool has_second_res = false;
+        bool has_recon_time_stamp = false;
+        bool has_recon_physio_time_stamp = false;
+        bool has_recon_time_stamp_second = false;
+        bool has_recon_physio_time_stamp_second = false;
+
         // call up the recon
-        size_t dim8, dim7, dim6, dim5, dim4, dim3, dim2;
-        for ( dim8=0; dim8<dimSize[8]; dim8++ )
+        size_t dim9, dim8, dim7, dim6, dim5, dim4, dim3, dim2;
+        for ( dim9=0; dim9<dimSize[9]; dim9++ )
         {
-            for ( dim7=0; dim7<dimSize[7]; dim7++ )
+            for ( dim8=0; dim8<dimSize[8]; dim8++ )
             {
-                for ( dim6=0; dim6<dimSize[6]; dim6++ )
+                for ( dim7=0; dim7<dimSize[7]; dim7++ )
                 {
-                    for ( dim5=0; dim5<dimSize[5]; dim5++ )
+                    for ( dim6=0; dim6<dimSize[6]; dim6++ )
                     {
-                        std::vector<size_t> ind(10, 0);
-                        this->ismrmrdDimIndex9D(ind, dims[8], dim8);
-                        this->ismrmrdDimIndex9D(ind, dims[7], dim7);
-                        this->ismrmrdDimIndex9D(ind, dims[6], dim6);
-                        this->ismrmrdDimIndex9D(ind, dims[5], dim5);
-
-                        if ( !workOrder_->data_.dimensions_equal(&dimReconSize) )
+                        for ( dim5=0; dim5<dimSize[5]; dim5++ )
                         {
-                            workOrder_->data_.create(&dimReconSize);
-                        }
+                            std::vector<size_t> ind(11, 0);
+                            this->ismrmrdDimIndex10D(ind, dims[9], dim9);
+                            this->ismrmrdDimIndex10D(ind, dims[8], dim8);
+                            this->ismrmrdDimIndex10D(ind, dims[7], dim7);
+                            this->ismrmrdDimIndex10D(ind, dims[6], dim6);
+                            this->ismrmrdDimIndex10D(ind, dims[5], dim5);
+
+                            // ---------------------------
+                            // prepare the data in workOrder
+                            // ---------------------------
+                            if ( !workOrder_->data_.dimensions_equal(&dimReconSize) )
+                            {
+                                workOrder_->data_.create(&dimReconSize);
+                                workOrder_->time_stamp_.create(&dimReconTimeStampSize);
+                                Gadgetron::clear(workOrder_->time_stamp_);
 
-                        std::vector<size_t> indWorkOrder(5, 0);
-                        for ( dim4=0; dim4<dimSize[4]; dim4++ )
-                        {
-                            this->ismrmrdDimIndex9D(ind, dims[4], dim4);
-                            indWorkOrder[4] = dim4;
+                                workOrder_->physio_time_stamp_.create(&dimReconTimeStampSize);
+                                Gadgetron::clear(workOrder_->physio_time_stamp_);
+                            }
 
-                            for ( dim3=0; dim3<dimSize[3]; dim3++ )
+                            std::vector<size_t> indWorkOrder(5, 0);
+                            for ( dim4=0; dim4<dimSize[4]; dim4++ )
                             {
-                                this->ismrmrdDimIndex9D(ind, dims[3], dim3);
-                                indWorkOrder[3] = dim3;
+                                this->ismrmrdDimIndex10D(ind, dims[4], dim4);
+                                indWorkOrder[4] = dim4;
 
-                                if ( dims[2] == DIM_Channel )
+                                for ( dim3=0; dim3<dimSize[3]; dim3++ )
                                 {
-                                    long long offset = data_->calculate_offset(ind);
-
-                                    long long offsetWorkOrder = workOrder_->data_.calculate_offset(indWorkOrder);
+                                    this->ismrmrdDimIndex10D(ind, dims[3], dim3);
+                                    indWorkOrder[3] = dim3;
 
-                                    memcpy(workOrder_->data_.begin()+offsetWorkOrder, data_->begin()+offset, sizeof(T)*N3D);
-                                }
-                                else
-                                {
-                                    for ( dim2=0; dim2<dimSize[2]; dim2++ )
+                                    if ( dims[2] == DIM_Channel )
                                     {
-                                        this->ismrmrdDimIndex9D(ind, dims[2], dim2);
-                                        indWorkOrder[2] = dim2;
-
                                         long long offset = data_->calculate_offset(ind);
-
                                         long long offsetWorkOrder = workOrder_->data_.calculate_offset(indWorkOrder);
+                                        memcpy(workOrder_->data_.begin()+offsetWorkOrder, data_->begin()+offset, sizeof(T)*N3D);
 
-                                        memcpy(workOrder_->data_.begin()+offsetWorkOrder, data_->begin()+offset, sizeof(T)*N2D);
+                                        if ( time_stamp_ != NULL )
+                                        {
+                                            offset = time_stamp_->calculate_offset(ind);
+                                            offsetWorkOrder = workOrder_->time_stamp_.calculate_offset(indWorkOrder);
+                                            memcpy(workOrder_->time_stamp_.begin()+offsetWorkOrder, time_stamp_->begin()+offset, sizeof(real_value_type)*dimReconSize[1]);
+                                            if ( physio_time_stamp_ != NULL )
+                                            {
+                                                memcpy(workOrder_->physio_time_stamp_.begin()+offsetWorkOrder, physio_time_stamp_->begin()+offset, sizeof(real_value_type)*dimReconSize[1]);
+                                            }
+                                        }
+                                    }
+                                    else
+                                    {
+                                        GADGET_WARN_MSG("dims[2] != DIM_Channel, the time stamps will not be copied ... ");
+
+                                        for ( dim2=0; dim2<dimSize[2]; dim2++ )
+                                        {
+                                            this->ismrmrdDimIndex10D(ind, dims[2], dim2);
+                                            indWorkOrder[2] = dim2;
+
+                                            long long offset = data_->calculate_offset(ind);
+                                            long long offsetWorkOrder = workOrder_->data_.calculate_offset(indWorkOrder);
+                                            memcpy(workOrder_->data_.begin()+offsetWorkOrder, data_->begin()+offset, sizeof(T)*N2D);
+                                        }
                                     }
                                 }
                             }
-                        }
 
-                        if ( (ref_ != NULL) && (ref_->get_number_of_elements()>0) )
-                        {
-                            std::vector<size_t> indRef(10, 0);
-                            if ( dim8 < dimSizeRef[8] )
+                            // ---------------------------
+                            // prepare the ref in workOrder
+                            // ---------------------------
+                            if ( (ref_ != NULL) && (ref_->get_number_of_elements()>0) )
                             {
-                                this->ismrmrdDimIndex9D(indRef, dims[8], dim8);
-                            }
-                            else
-                            {
-                                this->ismrmrdDimIndex9D(indRef, dims[8], dimSizeRef[8]-1);
-                            }
+                                std::vector<size_t> indRef(11, 0);
 
-                            if ( dim7 < dimSizeRef[7] )
-                            {
-                                this->ismrmrdDimIndex9D(indRef, dims[7], dim7);
-                            }
-                            else
-                            {
-                                this->ismrmrdDimIndex9D(indRef, dims[7], dimSizeRef[7]-1);
-                            }
+                                if ( dim9 < dimSizeRef[9] )
+                                {
+                                    this->ismrmrdDimIndex10D(indRef, dims[9], dim9);
+                                }
+                                else
+                                {
+                                    this->ismrmrdDimIndex10D(indRef, dims[9], dimSizeRef[9]-1);
+                                }
 
-                            if ( dim6 < dimSizeRef[6] )
-                            {
-                                this->ismrmrdDimIndex9D(indRef, dims[6], dim6);
-                            }
-                            else
-                            {
-                                this->ismrmrdDimIndex9D(indRef, dims[6], dimSizeRef[6]-1);
-                            }
+                                if ( dim8 < dimSizeRef[8] )
+                                {
+                                    this->ismrmrdDimIndex10D(indRef, dims[8], dim8);
+                                }
+                                else
+                                {
+                                    this->ismrmrdDimIndex10D(indRef, dims[8], dimSizeRef[8]-1);
+                                }
 
-                            if ( dim5 < dimSizeRef[5] )
-                            {
-                                this->ismrmrdDimIndex9D(indRef, dims[5], dim5);
-                            }
-                            else
-                            {
-                                this->ismrmrdDimIndex9D(indRef, dims[5], dimSizeRef[5]-1);
-                            }
+                                if ( dim7 < dimSizeRef[7] )
+                                {
+                                    this->ismrmrdDimIndex10D(indRef, dims[7], dim7);
+                                }
+                                else
+                                {
+                                    this->ismrmrdDimIndex10D(indRef, dims[7], dimSizeRef[7]-1);
+                                }
 
-                            if ( !workOrder_->ref_.dimensions_equal(&dimReconSizeRef) )
-                            {
-                                workOrder_->ref_.create(&dimReconSizeRef);
-                            }
+                                if ( dim6 < dimSizeRef[6] )
+                                {
+                                    this->ismrmrdDimIndex10D(indRef, dims[6], dim6);
+                                }
+                                else
+                                {
+                                    this->ismrmrdDimIndex10D(indRef, dims[6], dimSizeRef[6]-1);
+                                }
 
-                            std::vector<size_t> indRefWorkOrder(10, 0);
-                            for ( dim4=0; dim4<dimSize[4]; dim4++ )
-                            {
-                                size_t dim4_ref = dim4;
-                                if ( dim4 < dimSizeRef[4] )
+                                if ( dim5 < dimSizeRef[5] )
                                 {
-                                    this->ismrmrdDimIndex9D(indRef, dims[4], dim4);
+                                    this->ismrmrdDimIndex10D(indRef, dims[5], dim5);
                                 }
                                 else
                                 {
-                                    this->ismrmrdDimIndex9D(indRef, dims[4], dimSizeRef[4]-1);
-                                    dim4_ref = dimSizeRef[4]-1;
+                                    this->ismrmrdDimIndex10D(indRef, dims[5], dimSizeRef[5]-1);
                                 }
-                                indRefWorkOrder[4] = dim4_ref;
 
-                                for ( dim3=0; dim3<dimSize[3]; dim3++ )
+                                if ( !workOrder_->ref_.dimensions_equal(&dimReconSizeRef) )
                                 {
-                                    size_t dim3_ref = dim3;
-                                    if ( dim3 < dimSizeRef[3] )
+                                    workOrder_->ref_.create(&dimReconSizeRef);
+                                }
+
+                                std::vector<size_t> indRefWorkOrder(11, 0);
+                                for ( dim4=0; dim4<dimSize[4]; dim4++ )
+                                {
+                                    size_t dim4_ref = dim4;
+                                    if ( dim4 < dimSizeRef[4] )
                                     {
-                                        this->ismrmrdDimIndex9D(indRef, dims[3], dim3);
+                                        this->ismrmrdDimIndex10D(indRef, dims[4], dim4);
                                     }
                                     else
                                     {
-                                        this->ismrmrdDimIndex9D(indRef, dims[3], dimSizeRef[3]-1);
-                                        dim3_ref = dimSizeRef[3]-1;
+                                        this->ismrmrdDimIndex10D(indRef, dims[4], dimSizeRef[4]-1);
+                                        dim4_ref = dimSizeRef[4]-1;
                                     }
-                                    indRefWorkOrder[3] = dim3_ref;
+                                    indRefWorkOrder[4] = dim4_ref;
 
-                                    if ( dims[2] == DIM_Channel )
+                                    for ( dim3=0; dim3<dimSize[3]; dim3++ )
                                     {
-                                        long long offset = ref_->calculate_offset(indRef);
-                                        long long offsetWorkOrder = workOrder_->ref_.calculate_offset(indRefWorkOrder);
-                                        memcpy(workOrder_->ref_.begin()+offsetWorkOrder, ref_->begin()+offset, sizeof(T)*N3DRef);
-                                    }
-                                    else
-                                    {
-                                        for ( dim2=0; dim2<dimSize[2]; dim2++ )
+                                        size_t dim3_ref = dim3;
+                                        if ( dim3 < dimSizeRef[3] )
                                         {
-                                            size_t dim2_ref = dim2;
-                                            if ( dim2 < dimSizeRef[2] )
-                                            {
-                                                this->ismrmrdDimIndex9D(indRef, dims[2], dim2);
-                                            }
-                                            else
-                                            {
-                                                this->ismrmrdDimIndex9D(indRef, dims[2], dimSizeRef[2]-1);
-                                                dim2_ref = dimSizeRef[2]-1;
-                                            }
-                                            indRefWorkOrder[2] = dim2_ref;
+                                            this->ismrmrdDimIndex10D(indRef, dims[3], dim3);
+                                        }
+                                        else
+                                        {
+                                            this->ismrmrdDimIndex10D(indRef, dims[3], dimSizeRef[3]-1);
+                                            dim3_ref = dimSizeRef[3]-1;
+                                        }
+                                        indRefWorkOrder[3] = dim3_ref;
 
+                                        if ( dims[2] == DIM_Channel )
+                                        {
                                             long long offset = ref_->calculate_offset(indRef);
                                             long long offsetWorkOrder = workOrder_->ref_.calculate_offset(indRefWorkOrder);
-                                            memcpy(workOrder_->ref_.begin()+offsetWorkOrder, ref_->begin()+offset, sizeof(T)*N2DRef);
+                                            memcpy(workOrder_->ref_.begin()+offsetWorkOrder, ref_->begin()+offset, sizeof(T)*N3DRef);
+                                        }
+                                        else
+                                        {
+                                            for ( dim2=0; dim2<dimSize[2]; dim2++ )
+                                            {
+                                                size_t dim2_ref = dim2;
+                                                if ( dim2 < dimSizeRef[2] )
+                                                {
+                                                    this->ismrmrdDimIndex10D(indRef, dims[2], dim2);
+                                                }
+                                                else
+                                                {
+                                                    this->ismrmrdDimIndex10D(indRef, dims[2], dimSizeRef[2]-1);
+                                                    dim2_ref = dimSizeRef[2]-1;
+                                                }
+                                                indRefWorkOrder[2] = dim2_ref;
+
+                                                long long offset = ref_->calculate_offset(indRef);
+                                                long long offsetWorkOrder = workOrder_->ref_.calculate_offset(indRefWorkOrder);
+                                                memcpy(workOrder_->ref_.begin()+offsetWorkOrder, ref_->begin()+offset, sizeof(T)*N2DRef);
+                                            }
                                         }
                                     }
                                 }
                             }
-                        }
 
-                        if ( !shareAcrossWorkOrders && workOrder_->workFlow_BufferKernel_ && !workOrder_->workFlow_use_BufferedKernel_ )
-                        {
-                            GADGET_CHECK_RETURN_FALSE(workOrder_->reset());
-                        }
+                            // ---------------------------
+                            // handle shared work order
+                            // ---------------------------
+                            if ( !shareAcrossWorkOrders && workOrder_->workFlow_BufferKernel_ && !workOrder_->workFlow_use_BufferedKernel_ )
+                            {
+                                GADGET_CHECK_RETURN_FALSE(workOrder_->reset());
+                            }
 
-                        if ( shareAcrossWorkOrders && !workOrder_->workFlow_use_BufferedKernel_ )
-                        {
-                            if ( dim5==0 )
+                            if ( shareAcrossWorkOrders && !workOrder_->workFlow_use_BufferedKernel_ )
                             {
-                                workOrder_->workFlow_use_BufferedKernel_ = false;
+                                if ( dim5==0 )
+                                {
+                                    workOrder_->workFlow_use_BufferedKernel_ = false;
+                                }
+                                else
+                                {
+                                    workOrder_->workFlow_use_BufferedKernel_ = true;
+                                }
                             }
-                            else
+
+                            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, workOrder_->data_, "workOrder_data");
+                            GADGET_EXPORT_ARRAY(debugFolder_, gt_exporter_, workOrder_->time_stamp_, "workOrder_time_stamp");
+                            GADGET_EXPORT_ARRAY(debugFolder_, gt_exporter_, workOrder_->physio_time_stamp_, "workOrder_physio_time_stamp");
+                            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, workOrder_->ref_, "workOrder_ref");
+
+                            // ---------------------------
+                            // perform the recon
+                            // ---------------------------
+                            GADGET_CHECK_RETURN_FALSE(worker_->performRecon(workOrder_));
+
+                            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, workOrder_->complexIm_, "workOrder_complexIm");
+
+                            if ( workOrder_->complexIm_second_.get_number_of_elements()>0 )
                             {
-                                workOrder_->workFlow_use_BufferedKernel_ = true;
+                                GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, workOrder_->complexIm_second_, "workOrder_complexImSecond");
                             }
-                        }
 
-                        GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, workOrder_->data_, "workOrder_data");
-                        GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, workOrder_->ref_, "workOrder_ref");
+                            if ( shareAcrossWorkOrders )
+                            {
+                                workOrder_->workFlow_use_BufferedKernel_ = workFlow_use_BufferedKernel_;
+                            }
 
-                        // trigger the recon
-                        GADGET_CHECK_RETURN_FALSE(worker_->performRecon(workOrder_));
+                            // ---------------------------
+                            // copy the recon complexIm
+                            // ---------------------------
+                            std::vector<size_t> indRes(ind);
+                            indRes[0] = 0;
+                            indRes[1] = 0;
+                            indRes[2] = 0;
+                            indRes[3] = 0;
+                            indRes[4] = 0;
+                            indRes[5] = dim5;
+                            indRes[6] = dim6;
+                            indRes[7] = dim7;
+                            indRes[8] = dim8;
+                            indRes[9] = dim9;
+
+                            long long offset = res_.calculate_offset(indRes);
+                            memcpy(res_.begin()+offset, workOrder_->complexIm_.begin(), workOrder_->complexIm_.get_number_of_bytes());
+
+                            // ---------------------------
+                            // copy the recon time stamp
+                            // ---------------------------
+                            if ( workOrder_->recon_time_stamp_.get_number_of_elements()>0 )
+                            {
+                                has_recon_time_stamp = true;
 
-                        GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, workOrder_->complexIm_, "workOrder_complexIm");
+                                offset = res_time_stamp_.calculate_offset(indRes);
+                                memcpy(res_time_stamp_.begin()+offset, workOrder_->recon_time_stamp_.begin(), workOrder_->recon_time_stamp_.get_number_of_bytes());
+                            }
 
-                        if ( shareAcrossWorkOrders )
-                        {
-                            workOrder_->workFlow_use_BufferedKernel_ = workFlow_use_BufferedKernel_;
+                            // ---------------------------
+                            // copy the recon physio time stamp
+                            // ---------------------------
+                            if ( workOrder_->recon_physio_time_stamp_.get_number_of_elements()>0 )
+                            {
+                                has_recon_physio_time_stamp = true;
+
+                                offset = res_physio_time_stamp_.calculate_offset(indRes);
+                                memcpy(res_physio_time_stamp_.begin()+offset, workOrder_->recon_physio_time_stamp_.begin(), workOrder_->recon_physio_time_stamp_.get_number_of_bytes());
+                            }
+
+                            // ---------------------------
+                            // copy the second set of recon complexIm
+                            // ---------------------------
+                            GADGET_CHECK_RETURN_FALSE(this->copyReconResultsSecond(dim5, dim6, dim7, dim8, dim9));
+
+                            if ( workOrder_->complexIm_second_.get_number_of_elements()>0 )
+                            {
+                                has_second_res = true;
+                            }
+
+                            if ( workOrder_->recon_time_stamp_second_.get_number_of_elements()>0 )
+                            {
+                                has_recon_time_stamp_second = true;
+                            }
+
+                            if ( workOrder_->recon_physio_time_stamp_second_.get_number_of_elements()>0 )
+                            {
+                                has_recon_physio_time_stamp_second = true;
+                            }
+
+                            // ---------------------------
+                            // copy the gfactor
+                            // ---------------------------
+                            GADGET_CHECK_RETURN_FALSE(this->copyGFactor(dim5, dim6, dim7, dim8, dim9, gfactor_needed));
+
+                            // ---------------------------
+                            // copy the wrap-round map
+                            // ---------------------------
+                            GADGET_CHECK_RETURN_FALSE(this->copyWrapAroundMap(dim5, dim6, dim7, dim8, dim9, wrap_around_map_needed));
+
+                            // if not sharing across work order
+                            if ( !shareAcrossWorkOrders && !workOrder_->workFlow_use_BufferedKernel_ && !workOrder_->workFlow_BufferKernel_ )
+                            {
+                                GADGET_CHECK_RETURN_FALSE(workOrder_->reset());
+                            }
                         }
 
-                        // copy the results
-                        std::vector<size_t> indRes(ind);
-                        indRes[0] = 0;
-                        indRes[1] = 0;
-                        indRes[2] = 0;
-                        indRes[3] = 0;
-                        indRes[4] = 0;
-                        indRes[5] = dim5;
-                        indRes[6] = dim6;
-                        indRes[7] = dim7;
-                        indRes[8] = dim8;
-
-                        long long offset = res_.calculate_offset(indRes);
-                        memcpy(res_.begin()+offset, workOrder_->complexIm_.begin(), workOrder_->complexIm_.get_number_of_bytes());
-
-                        // if not sharing across work order
-                        if ( !shareAcrossWorkOrders && !workOrder_->workFlow_use_BufferedKernel_ && !workOrder_->workFlow_BufferKernel_ )
+                        // in the outter dimensions, the work order is always reset
+                        if ( !workOrder_->workFlow_use_BufferedKernel_ && !workOrder_->workFlow_BufferKernel_ )
                         {
                             GADGET_CHECK_RETURN_FALSE(workOrder_->reset());
                         }
                     }
-
-                    // in the outter dimensions, the work order is always reset
-                    if ( !workOrder_->workFlow_use_BufferedKernel_ && !workOrder_->workFlow_BufferKernel_ )
-                    {
-                        GADGET_CHECK_RETURN_FALSE(workOrder_->reset());
-                    }
                 }
             }
         }
 
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, res_, "res_afterunwrapping");
 
+        if ( has_second_res )
+        {
+            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, res_second_, "res_second_afterunwrapping");
+        }
+
+        if ( has_recon_time_stamp )
+        {
+            GADGET_EXPORT_ARRAY(debugFolder_, gt_exporter_, res_time_stamp_, "res_time_stamp");
+        }
+
+        if ( has_recon_physio_time_stamp )
+        {
+            GADGET_EXPORT_ARRAY(debugFolder_, gt_exporter_, res_physio_time_stamp_, "res_physio_time_stamp");
+        }
+
+        if ( has_recon_time_stamp_second )
+        {
+            GADGET_EXPORT_ARRAY(debugFolder_, gt_exporter_, res_time_stamp_second_, "res_time_stamp_second");
+        }
+
+        if ( has_recon_physio_time_stamp_second )
+        {
+            GADGET_EXPORT_ARRAY(debugFolder_, gt_exporter_, res_physio_time_stamp_second_, "res_physio_time_stamp_second");
+        }
+
+        if ( gfactor_needed )
+        {
+            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, gfactor_, "gfactor_afterunwrapping");
+        }
+
+        if ( wrap_around_map_needed )
+        {
+            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, wrap_around_map_, "wrap_around_map_afterunwrapping");
+        }
+
         // permute the res_ to the correct dimension order
         if (   ( (res_.get_number_of_elements()>dimResSize[0]*dimResSize[1]) && (dims[2]!=DIM_Channel) ) 
             || ( (res_.get_number_of_elements()>dimResSize[0]*dimResSize[1]*dimResSize[2])             ) )
@@ -1063,11 +1422,81 @@ configureWorkOrder(const std::vector<ISMRMRDDIM>& dims)
             std::vector<size_t> order;
             GADGET_CHECK_RETURN_FALSE(this->findISMRMRDPermuteOrder(dimsRes, dimsRes_, order));
 
-            boost::shared_ptr< hoNDArray<T> > res_permuted = Gadgetron::permute(&res_, &order);
-            res_.reshape(res_permuted->get_dimensions());
-            memcpy(res_.begin(), res_permuted->begin(), res_permuted->get_number_of_bytes());
-
+            GADGET_CHECK_RETURN_FALSE(this->permuteArrayOrder(res_, order));
             GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, res_, "res_afterPermute");
+
+            if ( has_recon_time_stamp )
+            {
+                GADGET_CHECK_RETURN_FALSE(this->permuteArrayOrder(res_time_stamp_, order));
+                GADGET_EXPORT_ARRAY(debugFolder_, gt_exporter_, res_time_stamp_, "res_time_stamp_afterPermute");
+            }
+
+            if ( has_recon_physio_time_stamp )
+            {
+                GADGET_CHECK_RETURN_FALSE(this->permuteArrayOrder(res_physio_time_stamp_, order));
+                GADGET_EXPORT_ARRAY(debugFolder_, gt_exporter_, res_physio_time_stamp_, "res_physio_time_stamp_afterPermute");
+            }
+
+            if ( gfactor_needed )
+            {
+                GADGET_CHECK_RETURN_FALSE(this->permuteArrayOrder(gfactor_, order));
+                GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, gfactor_, "gfactor_afterPermute");
+            }
+
+            if ( wrap_around_map_needed )
+            {
+                GADGET_CHECK_RETURN_FALSE(this->permuteArrayOrder(wrap_around_map_, order));
+                GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, wrap_around_map_, "wrap_around_map_afterPermute");
+            }
+        }
+
+        if ( has_second_res )
+        {
+            if (   ( (res_second_.get_number_of_elements()>dimResSize[0]*dimResSize[1]) && (dims[2]!=DIM_Channel) ) 
+                || ( (res_second_.get_number_of_elements()>dimResSize[0]*dimResSize[1]*dimResSize[2])             ) )
+            {
+                std::vector<size_t> order;
+                GADGET_CHECK_RETURN_FALSE(this->findISMRMRDPermuteOrder(dimsRes, dimsRes_, order));
+
+                GADGET_CHECK_RETURN_FALSE(this->permuteArrayOrder(res_second_, order));
+                GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, res_second_, "res_second_afterPermute");
+
+                if ( has_recon_time_stamp_second )
+                {
+                    GADGET_CHECK_RETURN_FALSE(this->permuteArrayOrder(res_time_stamp_second_, order));
+                    GADGET_EXPORT_ARRAY(debugFolder_, gt_exporter_, res_time_stamp_, "res_time_stamp_second_afterPermute");
+                }
+
+                if ( has_recon_physio_time_stamp_second )
+                {
+                    GADGET_CHECK_RETURN_FALSE(this->permuteArrayOrder(res_physio_time_stamp_second_, order));
+                    GADGET_EXPORT_ARRAY(debugFolder_, gt_exporter_, res_physio_time_stamp_second_, "res_physio_time_stamp_second_afterPermute");
+                }
+            }
+        }
+        else
+        {
+            res_second_.clear();
+        }
+
+        if ( !has_recon_time_stamp )
+        {
+            res_time_stamp_.clear();
+        }
+
+        if ( !has_recon_physio_time_stamp )
+        {
+            res_physio_time_stamp_.clear();
+        }
+
+        if ( !has_recon_time_stamp_second )
+        {
+            res_time_stamp_second_.clear();
+        }
+
+        if ( !has_recon_physio_time_stamp_second )
+        {
+            res_physio_time_stamp_second_.clear();
         }
     }
     catch(...)
@@ -1079,4 +1508,282 @@ configureWorkOrder(const std::vector<ISMRMRDDIM>& dims)
     return true;
 }
 
+template <typename T> 
+bool gtPlusISMRMRDReconWorkFlowCartesian<T>::
+copyReconResultsSecond(size_t dim5, size_t dim6, size_t dim7, size_t dim8, size_t dim9)
+{
+    try
+    {
+
+        if ( workOrder_->complexIm_second_.get_number_of_elements()>0 )
+        {
+            std::vector<size_t> indRes(10);
+
+            size_t RO = workOrder_->complexIm_second_.get_size(0);
+            size_t E1 = workOrder_->complexIm_second_.get_size(1);
+            size_t N = workOrder_->complexIm_second_.get_size(2);
+            size_t S = workOrder_->complexIm_second_.get_size(3);
+
+            std::vector<size_t> dims;
+
+            bool hasTimeStamp = false;
+            if ( workOrder_->recon_time_stamp_second_.get_number_of_elements()>0 )
+            {
+                hasTimeStamp = true;
+
+                res_time_stamp_second_.get_dimensions(dims);
+                if ( dims[3] != N ) dims[3] = N;
+                if ( dims[4] != S ) dims[4] = S;
+
+                res_time_stamp_second_.create(dims);
+                Gadgetron::clear(res_time_stamp_second_);
+            }
+
+            bool hasPhysioTimeStamp = false;
+            if ( workOrder_->recon_physio_time_stamp_second_.get_number_of_elements()>0 )
+            {
+                hasPhysioTimeStamp = true;
+
+                res_physio_time_stamp_second_.get_dimensions(dims);
+                if ( dims[3] != N ) dims[3] = N;
+                if ( dims[4] != S ) dims[4] = S;
+
+                res_physio_time_stamp_second_.create(dims);
+                Gadgetron::clear(res_physio_time_stamp_second_);
+            }
+
+            res_second_.get_dimensions(dims);
+            if ( dims[3] != N ) dims[3] = N;
+            if ( dims[4] != S ) dims[4] = S;
+
+            res_second_.create(dims);
+            Gadgetron::clear(res_second_);
+
+            size_t n, s;
+            for ( s=0; s<S; s++ )
+            {
+                for ( n=0; n<N; n++ )
+                {
+                    indRes[0] = 0;
+                    indRes[1] = 0;
+                    indRes[2] = 0;
+                    indRes[3] = n;
+                    indRes[4] = s;
+                    indRes[5] = dim5;
+                    indRes[6] = dim6;
+                    indRes[7] = dim7;
+                    indRes[8] = dim8;
+                    indRes[9] = dim9;
+
+                    size_t offset = res_second_.calculate_offset(indRes);
+                    memcpy(res_second_.begin()+offset, workOrder_->complexIm_second_.begin()+n*RO*E1+s*RO*E1*N, sizeof(T)*RO*E1);
+
+                    if ( hasTimeStamp )
+                    {
+                        offset = res_time_stamp_second_.calculate_offset(indRes);
+                        res_time_stamp_second_(offset) = workOrder_->recon_time_stamp_second_(0, 0, 0, n, s);
+                    }
+
+                    if ( hasPhysioTimeStamp )
+                    {
+                        offset = res_physio_time_stamp_second_.calculate_offset(indRes);
+                        res_physio_time_stamp_second_(offset) = workOrder_->recon_physio_time_stamp_second_(0, 0, 0, n, s);
+                    }
+                }
+            }
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Errors in gtPlusISMRMRDReconWorkFlowCartesian<T>::copyReconResultsSecond() ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T> 
+bool gtPlusISMRMRDReconWorkFlowCartesian<T>::
+copyGFactor(size_t dim5, size_t dim6, size_t dim7, size_t dim8, size_t dim9, bool gfactor_needed)
+{
+    try
+    {
+        if ( gfactor_needed && (workOrder_->gfactor_.get_size(0)==res_.get_size(0)) && (workOrder_->gfactor_.get_size(1) == res_.get_size(1)) )
+        {
+            size_t RO = gfactor_.get_size(0);
+            size_t E1 = gfactor_.get_size(1);
+            size_t N = gfactor_.get_size(3);
+            size_t S = gfactor_.get_size(4);
+
+            size_t gfactor_N = workOrder_->gfactor_.get_size(2);
+            size_t gfactor_S = workOrder_->gfactor_.get_size(3);
+
+            std::vector<size_t> indRes(10);
+            indRes[0] = 0;
+            indRes[1] = 0;
+            indRes[2] = 0;
+            indRes[3] = 0;
+            indRes[4] = 0;
+            indRes[5] = dim5;
+            indRes[6] = dim6;
+            indRes[7] = dim7;
+            indRes[8] = dim8;
+            indRes[9] = dim9;
+
+            if ( (gfactor_N == N) && (gfactor_S == S) )
+            {
+                size_t offset = gfactor_.calculate_offset(indRes);
+                memcpy(gfactor_.begin()+offset, workOrder_->gfactor_.begin(), workOrder_->gfactor_.get_number_of_bytes());
+            }
+            else
+            {
+                std::vector<size_t> indGfactor(9);
+                indGfactor[0] = 0;
+                indGfactor[1] = 0;
+                indGfactor[2] = 0;
+                indGfactor[3] = 0;
+                indGfactor[4] = dim5;
+                indGfactor[5] = dim6;
+                indGfactor[6] = dim7;
+                indGfactor[7] = dim8;
+                indGfactor[8] = dim9;
+
+                size_t n, s;
+                for ( s=0; s<S; s++ )
+                {
+                    for ( n=0; n<N; n++ )
+                    {
+                        indRes[3] = n;
+                        indRes[4] = s;
+                        size_t offset = gfactor_.calculate_offset(indRes);
+
+                        if ( n < gfactor_N )
+                        {
+                            indGfactor[2] = n;
+                        }
+                        else
+                        {
+                            indGfactor[2] = gfactor_N-1;
+                        }
+
+                        if ( s < gfactor_S )
+                        {
+                            indGfactor[3] = s;
+                        }
+                        else
+                        {
+                            indGfactor[3] = gfactor_S-1;
+                        }
+
+                        size_t offset2 = workOrder_->gfactor_.calculate_offset(indGfactor);
+
+                        memcpy(gfactor_.begin()+offset, workOrder_->gfactor_.begin()+offset2, sizeof(T)*RO*E1);
+                    }
+                }
+            }
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Errors in gtPlusISMRMRDReconWorkFlowCartesian<T>::copyGFactor() ... ");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T> 
+bool gtPlusISMRMRDReconWorkFlowCartesian<T>::
+copyWrapAroundMap(size_t dim5, size_t dim6, size_t dim7, size_t dim8, size_t dim9, bool wrap_around_map_needed)
+{
+    try
+    {
+        if ( wrap_around_map_needed && (workOrder_->wrap_around_map_.get_size(0)==res_.get_size(0)) && (workOrder_->wrap_around_map_.get_size(1) == res_.get_size(1)) )
+        {
+            size_t RO = wrap_around_map_.get_size(0);
+            size_t E1 = wrap_around_map_.get_size(1);
+            size_t N = wrap_around_map_.get_size(3);
+            size_t S = wrap_around_map_.get_size(4);
+
+            size_t wrap_around_map_CHA = workOrder_->wrap_around_map_.get_size(2);
+            size_t wrap_around_map_N = workOrder_->wrap_around_map_.get_size(3);
+            size_t wrap_around_map_S = workOrder_->wrap_around_map_.get_size(4);
+
+            std::vector<size_t> indRes(10);
+            size_t offset;
+
+            indRes[0] = 0;
+            indRes[1] = 0;
+            indRes[2] = 0;
+            indRes[3] = 0;
+            indRes[4] = 0;
+            indRes[5] = dim5;
+            indRes[6] = dim6;
+            indRes[7] = dim7;
+            indRes[8] = dim8;
+            indRes[9] = dim9;
+
+            if ( (wrap_around_map_N == N) && (wrap_around_map_S == S) )
+            {
+                offset = wrap_around_map_.calculate_offset(indRes);
+                memcpy(wrap_around_map_.begin()+offset, workOrder_->wrap_around_map_.begin(), workOrder_->wrap_around_map_.get_number_of_bytes());
+            }
+            else
+            {
+                std::vector<size_t> indWrapAroundMap(10);
+                indWrapAroundMap[0] = 0;
+                indWrapAroundMap[1] = 0;
+                indWrapAroundMap[2] = 0;
+                indWrapAroundMap[3] = 0;
+                indWrapAroundMap[4] = 0;
+                indWrapAroundMap[5] = dim5;
+                indWrapAroundMap[6] = dim6;
+                indWrapAroundMap[7] = dim7;
+                indWrapAroundMap[8] = dim8;
+                indWrapAroundMap[9] = dim9;
+
+                size_t n, s;
+                for ( s=0; s<S; s++ )
+                {
+                    for ( n=0; n<N; n++ )
+                    {
+                        indRes[3] = n;
+                        indRes[4] = s;
+                        offset = wrap_around_map_.calculate_offset(indRes);
+
+                        if ( n < wrap_around_map_N )
+                        {
+                            indWrapAroundMap[3] = n;
+                        }
+                        else
+                        {
+                            indWrapAroundMap[3] = wrap_around_map_N-1;
+                        }
+
+                        if ( s < wrap_around_map_S )
+                        {
+                            indWrapAroundMap[4] = s;
+                        }
+                        else
+                        {
+                            indWrapAroundMap[4] = wrap_around_map_S-1;
+                        }
+
+                        size_t offset2 = workOrder_->wrap_around_map_.calculate_offset(indWrapAroundMap);
+
+                        memcpy(wrap_around_map_.begin()+offset, workOrder_->wrap_around_map_.begin()+offset2, sizeof(T)*RO*E1*wrap_around_map_CHA);
+                    }
+                }
+            }
+        }
+    }
+    catch(...)
+    {
+        GADGET_ERROR_MSG("Errors in gtPlusISMRMRDReconWorkFlowCartesian<T>::copyWrapAroundMap() ... ");
+        return false;
+    }
+
+    return true;
+}
+
 }}
diff --git a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorkFlowCartesian2DT.h b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorkFlowCartesian2DT.h
index 9e6bc25..93851ed 100644
--- a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorkFlowCartesian2DT.h
+++ b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorkFlowCartesian2DT.h
@@ -16,6 +16,7 @@ public:
 
     typedef gtPlusISMRMRDReconWorkFlowCartesian<T> BaseClass;
     typedef typename BaseClass::DimensionRecordType DimensionRecordType;
+    typedef typename BaseClass::real_value_type real_value_type;
 
     gtPlusISMRMRDReconWorkFlowCartesian2DT();
     virtual ~gtPlusISMRMRDReconWorkFlowCartesian2DT();
@@ -27,6 +28,8 @@ public:
     virtual bool predictDimensions();
 
     using BaseClass::data_;
+    using BaseClass::time_stamp_;
+    using BaseClass::physio_time_stamp_;
     using BaseClass::ref_;
     using BaseClass::noise_;
     using BaseClass::noiseBW_;
@@ -42,6 +45,11 @@ public:
     using BaseClass::reconFOV_E1_;
     using BaseClass::reconFOV_E2_;
     using BaseClass::res_;
+    using BaseClass::res_second_;
+    using BaseClass::res_time_stamp_;
+    using BaseClass::res_physio_time_stamp_;
+    using BaseClass::res_time_stamp_second_;
+    using BaseClass::res_physio_time_stamp_second_;
 
     using BaseClass::worker_;
     using BaseClass::workOrder_;
@@ -81,6 +89,7 @@ protected:
     using BaseClass::REP_;
     using BaseClass::SET_;
     using BaseClass::SEG_;
+    using BaseClass::AVE_;
 
     using BaseClass::RO_ref_;
     using BaseClass::E1_ref_;
@@ -92,6 +101,7 @@ protected:
     using BaseClass::REP_ref_;
     using BaseClass::SET_ref_;
     using BaseClass::SEG_ref_;
+    using BaseClass::AVE_ref_;
 
     using BaseClass::gtPlus_util_;
 };
@@ -115,7 +125,7 @@ void gtPlusISMRMRDReconWorkFlowCartesian2DT<T>::printInfo(std::ostream& os)
     os << "-------------- GTPlus ISMRMRD Recon workflow Cartesian 2D/2DT -------------" << endl;
     os << "Implementation of general reconstruction workflow for cartesian sampling of 2D and 2D+T use cases" << endl;
     os << "The workOrder needs 5 dimensions [RO E1 CHA N S]" << endl;
-    os << "----------------------------------------------------------" << endl;
+    os << "---------------------------------------------------------------------------" << endl;
 }
 
 template <typename T> 
@@ -233,8 +243,13 @@ bool gtPlusISMRMRDReconWorkFlowCartesian2DT<T>::recon()
         size_t dd;
 
         int indWorkOrderSharingDim = -1;
-        for ( dim=DIM_Slice; dim<=DIM_Set; dim++ )
+        for ( dim=DIM_Slice; dim<=DIM_Average; dim++ )
         {
+            if ( dim == DIM_Segment )
+            {
+                continue;
+            }
+
             bool exist = false;
             for ( dd=0; dd<dims.size(); dd++ )
             {
@@ -251,7 +266,7 @@ bool gtPlusISMRMRDReconWorkFlowCartesian2DT<T>::recon()
 
                 if ( dim == WorkOrderShareDim_ )
                 {
-                    indWorkOrderSharingDim = dims.size()-1;
+                    indWorkOrderSharingDim = (int)(dims.size()-1);
                 }
             }
         }
diff --git a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorkFlowCartesian3DT.h b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorkFlowCartesian3DT.h
index 6f46492..6bfbaed 100644
--- a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorkFlowCartesian3DT.h
+++ b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorkFlowCartesian3DT.h
@@ -16,6 +16,7 @@ public:
 
     typedef gtPlusISMRMRDReconWorkFlowCartesian<T> BaseClass;
     typedef typename BaseClass::DimensionRecordType DimensionRecordType;
+    typedef typename BaseClass::real_value_type real_value_type;
 
     gtPlusISMRMRDReconWorkFlowCartesian3DT();
     virtual ~gtPlusISMRMRDReconWorkFlowCartesian3DT();
@@ -27,6 +28,8 @@ public:
     virtual bool predictDimensions();
 
     using BaseClass::data_;
+    using BaseClass::time_stamp_;
+    using BaseClass::physio_time_stamp_;
     using BaseClass::ref_;
     using BaseClass::noise_;
     using BaseClass::noiseBW_;
@@ -42,6 +45,11 @@ public:
     using BaseClass::reconFOV_E1_;
     using BaseClass::reconFOV_E2_;
     using BaseClass::res_;
+    using BaseClass::res_second_;
+    using BaseClass::res_time_stamp_;
+    using BaseClass::res_physio_time_stamp_;
+    using BaseClass::res_time_stamp_second_;
+    using BaseClass::res_physio_time_stamp_second_;
 
     using BaseClass::worker_;
     using BaseClass::workOrder_;
@@ -78,6 +86,7 @@ protected:
     using BaseClass::REP_;
     using BaseClass::SET_;
     using BaseClass::SEG_;
+    using BaseClass::AVE_;
 
     using BaseClass::RO_ref_;
     using BaseClass::E1_ref_;
@@ -89,6 +98,7 @@ protected:
     using BaseClass::REP_ref_;
     using BaseClass::SET_ref_;
     using BaseClass::SEG_ref_;
+    using BaseClass::AVE_ref_;
 
     using BaseClass::gtPlus_util_;
 };
@@ -203,8 +213,13 @@ bool gtPlusISMRMRDReconWorkFlowCartesian3DT<T>::recon()
         size_t dd;
 
         int indWorkOrderSharingDim = -1;
-        for ( dim=DIM_Slice; dim<=DIM_Set; dim++ )
+        for ( dim=DIM_Slice; dim<=DIM_Average; dim++ )
         {
+            if ( dim == DIM_Segment )
+            {
+                continue;
+            }
+
             bool exist = false;
             for ( dd=0; dd<dims.size(); dd++ )
             {
@@ -221,7 +236,7 @@ bool gtPlusISMRMRDReconWorkFlowCartesian3DT<T>::recon()
 
                 if ( dim == WorkOrderShareDim_ )
                 {
-                    indWorkOrderSharingDim = dims.size()-1;
+                    indWorkOrderSharingDim = (int)(dims.size()-1);
                 }
             }
         }
diff --git a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorkOrder.h b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorkOrder.h
index 42c42e5..faa12de 100644
--- a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorkOrder.h
+++ b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorkOrder.h
@@ -5,11 +5,13 @@
 
 #pragma once
 
-#include "ismrmrd.h"
+#include "ismrmrd/ismrmrd.h"
 #include "gtPlusISMRMRDReconUtil.h"
 
 namespace Gadgetron { namespace gtPlus {
 
+#define MAX_MOCO_LEVEL 16
+
 struct gtPlusReconWorkOrderPara
 {
     ISMRMRDCALIBMODE CalibMode_;
@@ -92,7 +94,14 @@ struct gtPlusReconWorkOrderPara
     Gadgetron::gtPlus::ISMRMRDALGO recon_algorithm_;
     bool recon_auto_parameters_;
 
+    bool gfactor_needed_;
+
+    bool wrap_around_map_needed_;
+
+    /// --------------
     // grappa
+    /// --------------
+
     size_t grappa_kSize_RO_;
     size_t grappa_kSize_E1_;
     size_t grappa_kSize_E2_;
@@ -100,15 +109,17 @@ struct gtPlusReconWorkOrderPara
     double grappa_calib_over_determine_ratio_;
     bool grappa_use_gpu_;
 
-    // sense
-
-    // soft sense
-
+    /// --------------
     // SPIRiT
+    /// --------------
     size_t spirit_kSize_RO_;
     size_t spirit_kSize_E1_;
     size_t spirit_kSize_E2_;
 
+    size_t spirit_oSize_RO_;
+    size_t spirit_oSize_E1_;
+    size_t spirit_oSize_E2_;
+
     double spirit_reg_lamda_;
     double spirit_calib_over_determine_ratio_;
 
@@ -120,7 +131,9 @@ struct gtPlusReconWorkOrderPara
 
     bool spirit_use_gpu_;
 
+    /// --------------
     // L1 SPIRiT
+    /// --------------
     bool spirit_perform_linear_;
     bool spirit_perform_nonlinear_;
 
@@ -133,6 +146,13 @@ struct gtPlusReconWorkOrderPara
     bool spirit_ncg_print_iter_;
     double spirit_ncg_scale_factor_;
 
+    size_t spirit_slep_iter_max_;
+    double spirit_slep_iter_thres_;
+    bool spirit_slep_print_iter_;
+    bool spirit_slep_keep_third_dimension_coeff_;
+    bool spirit_slep_keep_approx_coeff_;
+    double spirit_slep_scale_factor_;
+
     bool spirit_use_coil_sen_map_;
     bool spirit_use_moco_enhancement_;
     bool spirit_recon_moco_images_;
@@ -145,7 +165,103 @@ struct gtPlusReconWorkOrderPara
     double spirit_E2_enhancement_ratio_;
     double spirit_temporal_enhancement_ratio_;
 
-    // L1 soft sense
+    /// --------------
+    /// parameters for retro-gating
+    /// --------------
+    // number of retro-gated phases
+    // if 0, retro-gating is not prescribed
+    size_t retro_gated_images_;
+
+    // how many readout lines in each segment for retro-gating
+    size_t retro_gated_segment_size_;
+
+    // which method used for retro-gating
+    ISMRMRDINTERPRETROGATING retro_gated_interp_method_;
+
+    /// --------------
+    /// parameters for binning
+    /// --------------
+
+    // number of target cardiac phases
+    size_t kspace_binning_number_of_cardiac_phases_;
+
+    // minimal allowed cardiac phase width used for binning, in ms
+    // if the binned temporal window is smaller than this threshold,
+    // the binned window will be increased
+    // if <=0, then this value will not take effect
+    double kspace_binning_minimal_cardiac_phase_width_;
+
+    // whether to perform binning recon with multiple channel complex data
+    bool kspace_binning_multiple_channel_recon_;
+
+    // whether to perform non-linear recon
+    bool kspace_binning_iterative_non_linear_recon_;
+
+    // non-linear recon using slep optimizer
+    bool kspace_binning_iterative_non_linear_recon_slep_;
+
+    // whether to use coil map when warpping multiple channel images
+    bool kspace_binning_multiple_channel_recon_with_coil_map_;
+
+    // whether to compute navigator signal
+    bool kspace_binning_compute_navigator_signal_;
+
+    // for navigator detection
+    size_t kspace_binning_navigator_moco_level_;
+    size_t kspace_binning_navigator_moco_iter_[MAX_MOCO_LEVEL];
+    double kspace_binning_navigator_hilbert_strength_;
+    double kspace_binning_navigator_dissimilarity_sigma_;
+    bool  kspace_binning_navigator_bidirectional_moco_;
+
+    // parameters for the moco in kspace binning
+    size_t kspace_binning_moco_level_;
+    size_t kspace_binning_moco_iter_[MAX_MOCO_LEVEL];
+    double kspace_binning_moco_hilbert_strength_;
+    double kspace_binning_moco_dissimilarity_sigma_;
+    bool  kspace_binning_bidirectional_moco_;
+
+    // whether to perform soft combination
+    bool kspace_binning_soft_combination_;
+
+    // navigator signal acceptance window
+    double kspace_binning_navigator_window_wide_;
+    double kspace_binning_navigator_window_narrow_;
+
+    // method for warpping the complex images ("BSpline", "Linear")
+    ISMRMRDINTERP kspace_binning_method_warpping_;
+
+    // whether to exclude the last cardiac cycle for binning
+    bool kspace_binning_exclude_last_cardiac_cycle_;
+
+    // some blocks around central kspace must be filled
+    size_t kspace_binning_number_of_central_kspace_blocks_;
+
+    // maximal allowed temporal ratio window
+    double kspace_binning_max_temporal_window_;
+
+    // temporal ratio window used for binning
+    double kspace_binning_temporal_window_;
+
+    // interpolation method to generate best cardiac cycle ('Linear', 'Spline')
+    ISMRMRDINTERP kspace_binning_best_cardiac_cycle_interpolator_;
+
+    // recon using certain length of data (if <=0, use the whole data), in the unit of seconds
+    double kspace_binning_data_length_used_for_recon_;
+
+    // fill hole with nearest neighbor
+    bool kspace_binning_fill_kspace_with_neighbors_;
+
+    // for the flow binning, whether the flow encoding is performed insided every e1
+    bool kspace_binning_flow_in_e1_;
+
+    // whether to jointly recon all flow encoding directions
+    // if false, every flow encoding direction will be reconed seperately
+    bool kspace_binning_flow_recon_jointly_;
+
+    /// --------------
+    /// parameters for motion compensated recon
+    /// --------------
+    size_t motion_comp_num_of_PD_images_;
 
     // -------------------------------
     // job split
@@ -225,6 +341,8 @@ struct gtPlusReconWorkOrderPara
 
         num_channels_res_ = 1;
 
+        // ----------------------------------------------
+
         upstream_coil_compression_ = false;
         upstream_coil_compression_thres_ = 1e-3;
         upstream_coil_compression_num_modesKept_ = -1;
@@ -239,26 +357,38 @@ struct gtPlusReconWorkOrderPara
         csm_true_3D_ = false;
         csm_iter_num_ = 5;
         csm_iter_thres_ = 1e-3;
-        csm_use_gpu_ = true;
+        csm_use_gpu_ = false;
+
+        // ----------------------------------------------
 
         recon_algorithm_ = ISMRMRD_GRAPPA;
         recon_auto_parameters_ = true;
+        gfactor_needed_ = false;
+        wrap_around_map_needed_ = false;
+
+        // ----------------------------------------------
 
         grappa_kSize_RO_ = 5;
         grappa_kSize_E1_ = 4;
         grappa_kSize_E2_ = 4;
         grappa_reg_lamda_ = 0.0005;
         grappa_calib_over_determine_ratio_ = 0;
-        grappa_use_gpu_ = true;
+        grappa_use_gpu_ = false;
+
+        // ----------------------------------------------
 
         spirit_kSize_RO_ = 7;
         spirit_kSize_E1_ = 7;
         spirit_kSize_E2_ = 7;
 
+        spirit_oSize_RO_ = 1;
+        spirit_oSize_E1_ = 1;
+        spirit_oSize_E2_ = 1;
+
         spirit_reg_lamda_ = 0.005;
         spirit_calib_over_determine_ratio_ = 0;
 
-        spirit_use_gpu_ = true;
+        spirit_use_gpu_ = false;
 
         spirit_solve_symmetric_ = false;
 
@@ -266,6 +396,8 @@ struct gtPlusReconWorkOrderPara
         spirit_iter_thres_ = 1e-5;
         spirit_print_iter_ = false;
 
+        // ----------------------------------------------
+
         spirit_perform_linear_ = true;
         spirit_perform_nonlinear_ = true;
 
@@ -276,7 +408,14 @@ struct gtPlusReconWorkOrderPara
         spirit_ncg_iter_max_ = 10;
         spirit_ncg_iter_thres_ = 1e-3;
         spirit_ncg_print_iter_ = false;
-        spirit_ncg_scale_factor_ = 1.0;
+        spirit_ncg_scale_factor_ = -1.0;
+
+        spirit_slep_iter_max_ = 5;
+        spirit_slep_iter_thres_ = 1e-5;
+        spirit_slep_print_iter_ = false;
+        spirit_slep_keep_third_dimension_coeff_ = false;
+        spirit_slep_keep_approx_coeff_ = true;
+        spirit_slep_scale_factor_ = -1.0;
 
         spirit_use_coil_sen_map_ = true;
         spirit_use_moco_enhancement_ = false;
@@ -290,12 +429,75 @@ struct gtPlusReconWorkOrderPara
         spirit_2D_scale_per_chunk_ = false;
         spirit_3D_scale_per_chunk_ = true;
 
+        // ----------------------------------------------
+
+        retro_gated_images_ = 0;
+        retro_gated_segment_size_ = 0;
+        retro_gated_interp_method_ = ISMRMRD_INTERP_RETRO_GATING_BSPLINE;
+
+        // ----------------------------------------------
+
+        kspace_binning_number_of_cardiac_phases_ = 30;
+        kspace_binning_minimal_cardiac_phase_width_ = 33; // 33ms, 30 phases for the heart rate of 60
+
+        kspace_binning_multiple_channel_recon_ = true;
+        kspace_binning_iterative_non_linear_recon_ = true;
+        kspace_binning_iterative_non_linear_recon_slep_ = true;
+        kspace_binning_multiple_channel_recon_with_coil_map_ = false;
+        kspace_binning_compute_navigator_signal_ = true;
+
+        kspace_binning_navigator_moco_level_ = 4;
+
+        size_t ii;
+        for ( ii=0; ii<MAX_MOCO_LEVEL; ii++ ) kspace_binning_navigator_moco_iter_[ii] = 0;
+        kspace_binning_navigator_moco_iter_[0] = 1;
+        kspace_binning_navigator_moco_iter_[1] = 100;
+        kspace_binning_navigator_moco_iter_[2] = 100;
+        kspace_binning_navigator_moco_iter_[3] = 100;
+
+        kspace_binning_navigator_hilbert_strength_ = 6.0;
+        kspace_binning_navigator_dissimilarity_sigma_ = 2.0;
+        kspace_binning_navigator_bidirectional_moco_ = false;
+
+        kspace_binning_moco_level_ = 5;
+        for ( ii=0; ii<MAX_MOCO_LEVEL; ii++ ) kspace_binning_moco_iter_[ii] = 0;
+        kspace_binning_moco_iter_[0] = 100;
+        kspace_binning_moco_iter_[1] = 100;
+        kspace_binning_moco_iter_[2] = 100;
+        kspace_binning_moco_iter_[3] = 100;
+        kspace_binning_moco_iter_[4] = 100;
+
+        kspace_binning_moco_hilbert_strength_ = 12.0;
+        kspace_binning_moco_dissimilarity_sigma_ = 2.0;
+        kspace_binning_bidirectional_moco_ = false;
+        kspace_binning_soft_combination_ = true;
+        kspace_binning_navigator_window_wide_ = 0.75;
+        kspace_binning_navigator_window_narrow_ = 0.5;
+        kspace_binning_method_warpping_ = ISMRMRD_INTERP_BSPLINE;
+        kspace_binning_exclude_last_cardiac_cycle_ = false;
+        kspace_binning_number_of_central_kspace_blocks_ = 0;
+        kspace_binning_max_temporal_window_ = 1.0;
+        kspace_binning_temporal_window_ = 4.0;
+        kspace_binning_best_cardiac_cycle_interpolator_= ISMRMRD_INTERP_SPLINE;
+        kspace_binning_data_length_used_for_recon_ = 0;
+        kspace_binning_fill_kspace_with_neighbors_ = false;
+        kspace_binning_flow_in_e1_ = true;
+        kspace_binning_flow_recon_jointly_ = true;
+
+        // ----------------------------------------------
+
+        motion_comp_num_of_PD_images_ = 0;
+
+        // ----------------------------------------------
+
         job_split_by_S_ = false;
         job_num_of_N_ = 0;
         job_max_Megabytes_ = 20*1024;
         job_overlap_ = 2;
         job_perform_on_control_node_ = true;
 
+        // ----------------------------------------------
+
         partialFourier_algo_ = ISMRMRD_PF_ZEROFILLING_FILTER;
 
         partialFourier_homodyne_iters_ = 6;
@@ -319,13 +521,13 @@ struct gtPlusReconWorkOrderPara
     ~gtPlusReconWorkOrderPara() {}
 };
 
-
-
 template <typename T> 
 class gtPlusReconWorkOrder : public gtPlusReconWorkOrderPara
 {
 public:
 
+    typedef typename realType<T>::Type real_value_type;
+
     gtPlusReconWorkOrder();
     virtual ~gtPlusReconWorkOrder();
 
@@ -365,6 +567,17 @@ public:
     // other data
     hoNDArray<T> other_;
 
+    // sometime, the initial kspace can be provided
+    hoNDArray<T> kspace_initial_;
+
+    // acqusition time stamp in the unit of second for kspace data lines
+    // for the embedded mode, the time stamps of ref lines are also stored
+    hoNDArray<real_value_type> time_stamp_;
+
+    // physio time stamp in the unit of second for kspace data lines
+    // for the embedded mode, the physio time stamps of ref lines are also stored
+    hoNDArray<real_value_type> physio_time_stamp_;
+
     // dimension starting indexes for the data_
     std::vector< DimensionRecordType > dataDimStartingIndexes_;
 
@@ -384,9 +597,24 @@ public:
     // reconstructed images
     hoNDArray<T> complexIm_;
 
+    // time stamp and physio stamp for reconed images, in the unit of seconds
+    // if these fields are not set, the buffered image header will be used
+    hoNDArray<real_value_type> recon_time_stamp_;
+    hoNDArray<real_value_type> recon_physio_time_stamp_;
+
+    // extra reconstructed results
+    // some methods can generate more than one set of reconstruction results
+    hoNDArray<T> fullkspace_second_;
+    hoNDArray<T> complexIm_second_;
+    hoNDArray<real_value_type> recon_time_stamp_second_;
+    hoNDArray<real_value_type> recon_physio_time_stamp_second_;
+
     // gfactor
     hoNDArray<T> gfactor_;
 
+    // wrap-around eig map
+    hoNDArray<T> wrap_around_map_;
+
     // -------------------------------
     // buffers for computation
     // -------------------------------
@@ -490,114 +718,162 @@ bool gtPlusReconWorkOrder<T>::enforceConsistency(ISMRMRDDIM& /*lastDim*/)
 template <typename T> 
 void gtPlusReconWorkOrder<T>::duplicatePara(gtPlusReconWorkOrderPara& worder) const
 {
-    worder.CalibMode_ = CalibMode_;
-    worder.InterleaveDim_ = InterleaveDim_;
-
-    worder.acceFactorE1_ = acceFactorE1_;
-    worder.acceFactorE2_ = acceFactorE2_;
-
-    worder.kSpaceCenterRO_ = kSpaceCenterRO_;
-    worder.kSpaceCenterEncode1_ = kSpaceCenterEncode1_;
-    worder.kSpaceCenterEncode2_ = kSpaceCenterEncode2_;
-
-    worder.kSpaceMaxRO_ = kSpaceMaxRO_;
-    worder.kSpaceMaxEncode1_ = kSpaceMaxEncode1_;
-    worder.kSpaceMaxEncode2_ = kSpaceMaxEncode2_;
-
-    worder.workFlow_BufferKernel_ = workFlow_BufferKernel_;
-    worder.workFlow_use_BufferedKernel_ = workFlow_use_BufferedKernel_;
-    worder.num_channels_res_ = num_channels_res_;
-
-    worder.upstream_coil_compression_ = upstream_coil_compression_;
-    worder.upstream_coil_compression_thres_ = upstream_coil_compression_thres_;
-    worder.upstream_coil_compression_num_modesKept_ = upstream_coil_compression_num_modesKept_;
-
-    worder.downstream_coil_compression_ = downstream_coil_compression_;
-    worder.coil_compression_thres_ = coil_compression_thres_;
-    worder.coil_compression_num_modesKept_ = coil_compression_num_modesKept_;
-
-    worder.coil_map_algorithm_ = coil_map_algorithm_;
-    worder.csm_kSize_ = csm_kSize_;
-    worder.csm_powermethod_num_ = csm_powermethod_num_;
-    worder.csm_true_3D_ = csm_true_3D_;
-    worder.csm_iter_num_ = csm_iter_num_;
-    worder.csm_iter_thres_ = csm_iter_thres_;
-    worder.csm_use_gpu_ = csm_use_gpu_;
-
-    worder.start_RO_ = start_RO_;
-    worder.end_RO_ = end_RO_;
-
-    worder.start_E1_ = start_E1_;
-    worder.end_E1_ = end_E1_;
-
-    worder.start_E2_ = start_E2_;
-    worder.end_E2_ = end_E2_;
-
-    worder.recon_algorithm_ = recon_algorithm_;
-    worder.recon_auto_parameters_ = recon_auto_parameters_;
-
-    worder.grappa_kSize_RO_ = grappa_kSize_RO_;
-    worder.grappa_kSize_RO_ = grappa_kSize_RO_;
-    worder.grappa_kSize_E1_ = grappa_kSize_E1_;
-    worder.grappa_kSize_E2_ = grappa_kSize_E2_;
-    worder.grappa_reg_lamda_ = grappa_reg_lamda_;
-    worder.grappa_calib_over_determine_ratio_ = grappa_calib_over_determine_ratio_;
-    worder.grappa_use_gpu_ = grappa_use_gpu_;
-
-    worder.spirit_kSize_RO_ = spirit_kSize_RO_;
-    worder.spirit_kSize_E1_ = spirit_kSize_E1_;
-    worder.spirit_kSize_E2_ = spirit_kSize_E2_;
-    worder.spirit_reg_lamda_ = spirit_reg_lamda_;
-    worder.spirit_use_gpu_ = spirit_use_gpu_;
-    worder.spirit_calib_over_determine_ratio_ = spirit_calib_over_determine_ratio_;
-    worder.spirit_solve_symmetric_ = spirit_solve_symmetric_;
-    worder.spirit_iter_max_ = spirit_iter_max_;
-    worder.spirit_iter_thres_ = spirit_iter_thres_;
-    worder.spirit_print_iter_ = spirit_print_iter_;
-
-    worder.spirit_perform_linear_ = spirit_perform_linear_;
-    worder.spirit_perform_nonlinear_ = spirit_perform_nonlinear_;
-    worder.spirit_parallel_imaging_lamda_ = spirit_parallel_imaging_lamda_;
-    worder.spirit_image_reg_lamda_ = spirit_image_reg_lamda_;
-    worder.spirit_data_fidelity_lamda_ = spirit_data_fidelity_lamda_;
-    worder.spirit_ncg_iter_max_ = spirit_ncg_iter_max_;
-    worder.spirit_ncg_iter_thres_ = spirit_ncg_iter_thres_;
-    worder.spirit_ncg_scale_factor_ = spirit_ncg_scale_factor_;
-    worder.spirit_ncg_print_iter_ = spirit_ncg_print_iter_;
-    worder.spirit_use_coil_sen_map_ = spirit_use_coil_sen_map_;
-    worder.spirit_use_moco_enhancement_ = spirit_use_moco_enhancement_;
-    worder.spirit_recon_moco_images_ = spirit_recon_moco_images_;
-    worder.spirit_RO_enhancement_ratio_ = spirit_RO_enhancement_ratio_;
-    worder.spirit_E1_enhancement_ratio_ = spirit_E1_enhancement_ratio_;
-    worder.spirit_E2_enhancement_ratio_ = spirit_E2_enhancement_ratio_;
-    worder.spirit_temporal_enhancement_ratio_ = spirit_temporal_enhancement_ratio_;
-    worder.spirit_2D_scale_per_chunk_ = spirit_2D_scale_per_chunk_;
-    worder.spirit_3D_scale_per_chunk_ = spirit_3D_scale_per_chunk_;
-
-    worder.job_split_by_S_ = job_split_by_S_;
-    worder.job_num_of_N_ = job_num_of_N_;
-    worder.job_max_Megabytes_ = job_max_Megabytes_;
-    worder.job_overlap_ = job_overlap_;
-    worder.job_perform_on_control_node_ = job_perform_on_control_node_;
-
-    worder.partialFourier_algo_ = partialFourier_algo_;
-
-    worder.partialFourier_homodyne_iters_ = partialFourier_homodyne_iters_;
-    worder.partialFourier_homodyne_thres_ = partialFourier_homodyne_thres_;
-    worder.partialFourier_homodyne_densityComp_ = partialFourier_homodyne_densityComp_;
-
-    worder.partialFourier_POCS_iters_ = partialFourier_POCS_iters_;
-    worder.partialFourier_POCS_thres_ = partialFourier_POCS_thres_;
-    worder.partialFourier_POCS_transitBand_ = partialFourier_POCS_transitBand_;
-    worder.partialFourier_POCS_transitBand_E2_ = partialFourier_POCS_transitBand_E2_;
-
-    worder.partialFourier_FengHuang_kSize_RO_ = partialFourier_FengHuang_kSize_RO_;
-    worder.partialFourier_FengHuang_kSize_E1_ = partialFourier_FengHuang_kSize_E1_;
-    worder.partialFourier_FengHuang_kSize_E2_ = partialFourier_FengHuang_kSize_E2_;
-    worder.partialFourier_FengHuang_thresReg_ = partialFourier_FengHuang_thresReg_;
-    worder.partialFourier_FengHuang_sameKernel_allN_ = partialFourier_FengHuang_sameKernel_allN_;
-    worder.partialFourier_FengHuang_transitBand_ = partialFourier_FengHuang_transitBand_;
-    worder.partialFourier_FengHuang_transitBand_E2_ = partialFourier_FengHuang_transitBand_E2_;
+    worder.CalibMode_                                  = CalibMode_;
+    worder.InterleaveDim_                              = InterleaveDim_;
+
+    worder.acceFactorE1_                               = acceFactorE1_;
+    worder.acceFactorE2_                               = acceFactorE2_;
+
+    worder.kSpaceCenterRO_                             = kSpaceCenterRO_;
+    worder.kSpaceCenterEncode1_                        = kSpaceCenterEncode1_;
+    worder.kSpaceCenterEncode2_                        = kSpaceCenterEncode2_;
+
+    worder.kSpaceMaxRO_                                = kSpaceMaxRO_;
+    worder.kSpaceMaxEncode1_                           = kSpaceMaxEncode1_;
+    worder.kSpaceMaxEncode2_                           = kSpaceMaxEncode2_;
+
+    worder.workFlow_BufferKernel_                      = workFlow_BufferKernel_;
+    worder.workFlow_use_BufferedKernel_                = workFlow_use_BufferedKernel_;
+    worder.num_channels_res_                           = num_channels_res_;
+
+    worder.upstream_coil_compression_                  = upstream_coil_compression_;
+    worder.upstream_coil_compression_thres_            = upstream_coil_compression_thres_;
+    worder.upstream_coil_compression_num_modesKept_    = upstream_coil_compression_num_modesKept_;
+
+    worder.downstream_coil_compression_                = downstream_coil_compression_;
+    worder.coil_compression_thres_                     = coil_compression_thres_;
+    worder.coil_compression_num_modesKept_             = coil_compression_num_modesKept_;
+
+    worder.coil_map_algorithm_                         = coil_map_algorithm_;
+    worder.csm_kSize_                                  = csm_kSize_;
+    worder.csm_powermethod_num_                        = csm_powermethod_num_;
+    worder.csm_true_3D_                                = csm_true_3D_;
+    worder.csm_iter_num_                               = csm_iter_num_;
+    worder.csm_iter_thres_                             = csm_iter_thres_;
+    worder.csm_use_gpu_                                = csm_use_gpu_;
+
+    worder.start_RO_                                   = start_RO_;
+    worder.end_RO_                                     = end_RO_;
+
+    worder.start_E1_                                   = start_E1_;
+    worder.end_E1_                                     = end_E1_;
+
+    worder.start_E2_                                   = start_E2_;
+    worder.end_E2_                                     = end_E2_;
+
+    worder.recon_algorithm_                            = recon_algorithm_;
+    worder.recon_auto_parameters_                      = recon_auto_parameters_;
+    worder.gfactor_needed_                             = gfactor_needed_;
+    worder.wrap_around_map_needed_                     = wrap_around_map_needed_;
+
+    worder.grappa_kSize_RO_                            = grappa_kSize_RO_;
+    worder.grappa_kSize_RO_                            = grappa_kSize_RO_;
+    worder.grappa_kSize_E1_                            = grappa_kSize_E1_;
+    worder.grappa_kSize_E2_                            = grappa_kSize_E2_;
+    worder.grappa_reg_lamda_                           = grappa_reg_lamda_;
+    worder.grappa_calib_over_determine_ratio_          = grappa_calib_over_determine_ratio_;
+    worder.grappa_use_gpu_                             = grappa_use_gpu_;
+
+    worder.spirit_kSize_RO_                            = spirit_kSize_RO_;
+    worder.spirit_kSize_E1_                            = spirit_kSize_E1_;
+    worder.spirit_kSize_E2_                            = spirit_kSize_E2_;
+    worder.spirit_oSize_RO_                            = spirit_oSize_RO_;
+    worder.spirit_oSize_E1_                            = spirit_oSize_E1_;
+    worder.spirit_oSize_E2_                            = spirit_oSize_E2_;
+    worder.spirit_reg_lamda_                           = spirit_reg_lamda_;
+    worder.spirit_use_gpu_                             = spirit_use_gpu_;
+    worder.spirit_calib_over_determine_ratio_          = spirit_calib_over_determine_ratio_;
+    worder.spirit_solve_symmetric_                     = spirit_solve_symmetric_;
+    worder.spirit_iter_max_                            = spirit_iter_max_;
+    worder.spirit_iter_thres_                          = spirit_iter_thres_;
+    worder.spirit_print_iter_                          = spirit_print_iter_;
+
+    worder.spirit_perform_linear_                      = spirit_perform_linear_;
+    worder.spirit_perform_nonlinear_                   = spirit_perform_nonlinear_;
+    worder.spirit_parallel_imaging_lamda_              = spirit_parallel_imaging_lamda_;
+    worder.spirit_image_reg_lamda_                     = spirit_image_reg_lamda_;
+    worder.spirit_data_fidelity_lamda_                 = spirit_data_fidelity_lamda_;
+    worder.spirit_ncg_iter_max_                        = spirit_ncg_iter_max_;
+    worder.spirit_ncg_iter_thres_                      = spirit_ncg_iter_thres_;
+    worder.spirit_ncg_scale_factor_                    = spirit_ncg_scale_factor_;
+    worder.spirit_ncg_print_iter_                      = spirit_ncg_print_iter_;
+    worder.spirit_slep_iter_max_                       = spirit_slep_iter_max_;
+    worder.spirit_slep_iter_thres_                     = spirit_slep_iter_thres_;
+    worder.spirit_slep_print_iter_                     = spirit_slep_print_iter_;
+    worder.spirit_slep_keep_third_dimension_coeff_     = spirit_slep_keep_third_dimension_coeff_;
+    worder.spirit_slep_keep_approx_coeff_              = spirit_slep_keep_approx_coeff_;
+    worder.spirit_slep_scale_factor_                   = spirit_slep_scale_factor_;
+    worder.spirit_use_coil_sen_map_                    = spirit_use_coil_sen_map_;
+    worder.spirit_use_moco_enhancement_                = spirit_use_moco_enhancement_;
+    worder.spirit_recon_moco_images_                   = spirit_recon_moco_images_;
+    worder.spirit_RO_enhancement_ratio_                = spirit_RO_enhancement_ratio_;
+    worder.spirit_E1_enhancement_ratio_                = spirit_E1_enhancement_ratio_;
+    worder.spirit_E2_enhancement_ratio_                = spirit_E2_enhancement_ratio_;
+    worder.spirit_temporal_enhancement_ratio_          = spirit_temporal_enhancement_ratio_;
+    worder.spirit_2D_scale_per_chunk_                  = spirit_2D_scale_per_chunk_;
+    worder.spirit_3D_scale_per_chunk_                  = spirit_3D_scale_per_chunk_;
+
+    worder.retro_gated_images_                         = retro_gated_images_;
+    worder.retro_gated_segment_size_                   = retro_gated_segment_size_;
+    worder.retro_gated_interp_method_                  = retro_gated_interp_method_;
+
+    worder.kspace_binning_number_of_cardiac_phases_                 = kspace_binning_number_of_cardiac_phases_;
+    worder.kspace_binning_minimal_cardiac_phase_width_              = kspace_binning_minimal_cardiac_phase_width_;
+    worder.kspace_binning_multiple_channel_recon_                   = kspace_binning_multiple_channel_recon_;
+    worder.kspace_binning_iterative_non_linear_recon_               = kspace_binning_iterative_non_linear_recon_;
+    worder.kspace_binning_iterative_non_linear_recon_slep_          = kspace_binning_iterative_non_linear_recon_slep_;
+    worder.kspace_binning_multiple_channel_recon_with_coil_map_     = kspace_binning_multiple_channel_recon_with_coil_map_;
+    worder.kspace_binning_compute_navigator_signal_                 = kspace_binning_compute_navigator_signal_;
+    worder.kspace_binning_navigator_moco_level_                     = kspace_binning_navigator_moco_level_;
+    memcpy(worder.kspace_binning_navigator_moco_iter_, kspace_binning_navigator_moco_iter_, sizeof(size_t)*MAX_MOCO_LEVEL);
+    worder.kspace_binning_navigator_hilbert_strength_               = kspace_binning_navigator_hilbert_strength_;
+    worder.kspace_binning_navigator_dissimilarity_sigma_            = kspace_binning_navigator_dissimilarity_sigma_;
+    worder.kspace_binning_navigator_bidirectional_moco_             = kspace_binning_navigator_bidirectional_moco_;
+    worder.kspace_binning_moco_level_                               = kspace_binning_moco_level_;
+    memcpy(worder.kspace_binning_moco_iter_, kspace_binning_moco_iter_, sizeof(size_t)*MAX_MOCO_LEVEL);
+    worder.kspace_binning_moco_hilbert_strength_                    = kspace_binning_moco_hilbert_strength_;
+    worder.kspace_binning_moco_dissimilarity_sigma_                 = kspace_binning_moco_dissimilarity_sigma_;
+    worder.kspace_binning_bidirectional_moco_                       = kspace_binning_bidirectional_moco_;
+    worder.kspace_binning_soft_combination_                         = kspace_binning_soft_combination_;
+    worder.kspace_binning_navigator_window_wide_                    = kspace_binning_navigator_window_wide_;
+    worder.kspace_binning_navigator_window_narrow_                  = kspace_binning_navigator_window_narrow_;
+    worder.kspace_binning_method_warpping_                          = kspace_binning_method_warpping_;
+    worder.kspace_binning_exclude_last_cardiac_cycle_               = kspace_binning_exclude_last_cardiac_cycle_;
+    worder.kspace_binning_number_of_central_kspace_blocks_          = kspace_binning_number_of_central_kspace_blocks_;
+    worder.kspace_binning_max_temporal_window_                      = kspace_binning_max_temporal_window_;
+    worder.kspace_binning_temporal_window_                          = kspace_binning_temporal_window_;
+    worder.kspace_binning_best_cardiac_cycle_interpolator_          = kspace_binning_best_cardiac_cycle_interpolator_;
+    worder.kspace_binning_data_length_used_for_recon_               = kspace_binning_data_length_used_for_recon_;
+    worder.kspace_binning_fill_kspace_with_neighbors_               = kspace_binning_fill_kspace_with_neighbors_;
+    worder.kspace_binning_flow_in_e1_                               = kspace_binning_flow_in_e1_;
+    worder.kspace_binning_flow_recon_jointly_                       = kspace_binning_flow_recon_jointly_;
+
+    worder.motion_comp_num_of_PD_images_                            = motion_comp_num_of_PD_images_;
+
+    worder.job_split_by_S_                             = job_split_by_S_;
+    worder.job_num_of_N_                               = job_num_of_N_;
+    worder.job_max_Megabytes_                          = job_max_Megabytes_;
+    worder.job_overlap_                                = job_overlap_;
+    worder.job_perform_on_control_node_                = job_perform_on_control_node_;
+
+    worder.partialFourier_algo_                        = partialFourier_algo_;
+
+    worder.partialFourier_homodyne_iters_              = partialFourier_homodyne_iters_;
+    worder.partialFourier_homodyne_thres_              = partialFourier_homodyne_thres_;
+    worder.partialFourier_homodyne_densityComp_        = partialFourier_homodyne_densityComp_;
+
+    worder.partialFourier_POCS_iters_                  = partialFourier_POCS_iters_;
+    worder.partialFourier_POCS_thres_                  = partialFourier_POCS_thres_;
+    worder.partialFourier_POCS_transitBand_            = partialFourier_POCS_transitBand_;
+    worder.partialFourier_POCS_transitBand_E2_         = partialFourier_POCS_transitBand_E2_;
+
+    worder.partialFourier_FengHuang_kSize_RO_          = partialFourier_FengHuang_kSize_RO_;
+    worder.partialFourier_FengHuang_kSize_E1_          = partialFourier_FengHuang_kSize_E1_;
+    worder.partialFourier_FengHuang_kSize_E2_          = partialFourier_FengHuang_kSize_E2_;
+    worder.partialFourier_FengHuang_thresReg_          = partialFourier_FengHuang_thresReg_;
+    worder.partialFourier_FengHuang_sameKernel_allN_   = partialFourier_FengHuang_sameKernel_allN_;
+    worder.partialFourier_FengHuang_transitBand_       = partialFourier_FengHuang_transitBand_;
+    worder.partialFourier_FengHuang_transitBand_E2_    = partialFourier_FengHuang_transitBand_E2_;
 }
 
 template <typename T> 
@@ -605,142 +881,190 @@ void gtPlusReconWorkOrder<T>::duplicate(gtPlusReconWorkOrder<T>& worder) const
 {
     this->duplicatePara(worder);
 
-    worder.dataDimStartingIndexes_ = dataDimStartingIndexes_;
+    worder.dataDimStartingIndexes_      = dataDimStartingIndexes_;
 
-    worder.filterRO_ = filterRO_;
-    worder.filterE1_ = filterE1_;
-    worder.filterE2_ = filterE2_;
-    worder.filterROE1_ = filterROE1_;
-    worder.filterROE1E2_ = filterROE1E2_;
+    worder.filterRO_                    = filterRO_;
+    worder.filterE1_                    = filterE1_;
+    worder.filterE2_                    = filterE2_;
+    worder.filterROE1_                  = filterROE1_;
+    worder.filterROE1E2_                = filterROE1E2_;
 
-    worder.filterRO_ref_ = filterRO_ref_;
-    worder.filterE1_ref_ = filterE1_ref_;
-    worder.filterE2_ref_ = filterE2_ref_;
-    worder.filterROE1_ref_ = filterROE1_ref_;
-    worder.filterROE1E2_ref_ = filterROE1E2_ref_;
+    worder.filterRO_ref_                = filterRO_ref_;
+    worder.filterE1_ref_                = filterE1_ref_;
+    worder.filterE2_ref_                = filterE2_ref_;
+    worder.filterROE1_ref_              = filterROE1_ref_;
+    worder.filterROE1E2_ref_            = filterROE1E2_ref_;
 
-    worder.filterRO_partialfourier_ = filterRO_partialfourier_;
-    worder.filterE1_partialfourier_ = filterE1_partialfourier_;
-    worder.filterE2_partialfourier_ = filterE2_partialfourier_;
-    worder.filterROE1_partialfourier_ = filterROE1_partialfourier_;
+    worder.filterRO_partialfourier_     = filterRO_partialfourier_;
+    worder.filterE1_partialfourier_     = filterE1_partialfourier_;
+    worder.filterE2_partialfourier_     = filterE2_partialfourier_;
+    worder.filterROE1_partialfourier_   = filterROE1_partialfourier_;
     worder.filterROE1E2_partialfourier_ = filterROE1E2_partialfourier_;
 
-    worder.CloudComputing_ = CloudComputing_;
-    worder.CloudSize_ = CloudSize_;
-    worder.gt_cloud_ = gt_cloud_;
+    worder.CloudComputing_              = CloudComputing_;
+    worder.CloudSize_                   = CloudSize_;
+    worder.gt_cloud_                    = gt_cloud_;
 }
 
 template <typename T> 
 void gtPlusReconWorkOrder<T>::copyFromPara(const gtPlusReconWorkOrderPara& worder)
 {
-    CalibMode_ = worder.CalibMode_;
-    InterleaveDim_ = worder.InterleaveDim_;
-
-    acceFactorE1_ = worder.acceFactorE1_;
-    acceFactorE2_ = worder.acceFactorE2_;
-
-    kSpaceCenterRO_ = worder.kSpaceCenterRO_;
-    kSpaceCenterEncode1_ = worder.kSpaceCenterEncode1_;
-    kSpaceCenterEncode2_ = worder.kSpaceCenterEncode2_;
-
-    kSpaceMaxRO_ = worder.kSpaceMaxRO_;
-    kSpaceMaxEncode1_ = worder.kSpaceMaxEncode1_;
-    kSpaceMaxEncode2_ = worder.kSpaceMaxEncode2_;
-
-    workFlow_BufferKernel_ = worder.workFlow_BufferKernel_;
-    workFlow_use_BufferedKernel_ = worder.workFlow_use_BufferedKernel_;
-    num_channels_res_ = worder.num_channels_res_;
-
-    upstream_coil_compression_ = worder.upstream_coil_compression_;
-    upstream_coil_compression_thres_ = worder.upstream_coil_compression_thres_;
-    upstream_coil_compression_num_modesKept_ = worder.upstream_coil_compression_num_modesKept_;
-
-    downstream_coil_compression_ = worder.downstream_coil_compression_;
-    coil_compression_thres_ = worder.coil_compression_thres_;
-    coil_compression_num_modesKept_ = worder.coil_compression_num_modesKept_;
-
-    coil_map_algorithm_ = worder.coil_map_algorithm_;
-    csm_kSize_ = worder.csm_kSize_;
-    csm_powermethod_num_ = worder.csm_powermethod_num_;
-    csm_true_3D_ = worder.csm_true_3D_;
-    csm_iter_num_ = worder.csm_iter_num_;
-    csm_iter_thres_ = worder.csm_iter_thres_;
-    csm_use_gpu_ = worder.csm_use_gpu_;
-
-    start_RO_ = worder.start_RO_;
-    end_RO_ = worder.end_RO_;
-
-    start_E1_ = worder.start_E1_;
-    end_E1_ = worder.end_E1_;
-
-    start_E2_ = worder.start_E2_;
-    end_E2_ = worder.end_E2_;
-
-    recon_algorithm_ = worder.recon_algorithm_;
-    recon_auto_parameters_ = worder.recon_auto_parameters_;
-
-    grappa_kSize_RO_ = worder.grappa_kSize_RO_;
-    grappa_kSize_RO_ = worder.grappa_kSize_RO_;
-    grappa_kSize_E1_ = worder.grappa_kSize_E1_;
-    grappa_kSize_E2_ = worder.grappa_kSize_E2_;
-    grappa_reg_lamda_ = worder.grappa_reg_lamda_;
-    grappa_calib_over_determine_ratio_ = worder.grappa_calib_over_determine_ratio_;
-    grappa_use_gpu_ = worder.grappa_use_gpu_;
-
-    spirit_kSize_RO_ = worder.spirit_kSize_RO_;
-    spirit_kSize_E1_ = worder.spirit_kSize_E1_;
-    spirit_kSize_E2_ = worder.spirit_kSize_E2_;
-    spirit_reg_lamda_ = worder.spirit_reg_lamda_;
-    spirit_use_gpu_ = worder.spirit_use_gpu_;
-    spirit_calib_over_determine_ratio_ = worder.spirit_calib_over_determine_ratio_;
-    spirit_solve_symmetric_ = worder.spirit_solve_symmetric_;
-    spirit_iter_max_ = worder.spirit_iter_max_;
-    spirit_iter_thres_ = worder.spirit_iter_thres_;
-    spirit_print_iter_ = worder.spirit_print_iter_;
-
-    spirit_perform_linear_ = worder.spirit_perform_linear_;
-    spirit_perform_nonlinear_ = worder.spirit_perform_nonlinear_;
-    spirit_parallel_imaging_lamda_ = worder.spirit_parallel_imaging_lamda_;
-    spirit_image_reg_lamda_ = worder.spirit_image_reg_lamda_;
-    spirit_data_fidelity_lamda_ = worder.spirit_data_fidelity_lamda_;
-    spirit_ncg_iter_max_ = worder.spirit_ncg_iter_max_;
-    spirit_ncg_iter_thres_ = worder.spirit_ncg_iter_thres_;
-    spirit_ncg_scale_factor_ = worder.spirit_ncg_scale_factor_;
-    spirit_ncg_print_iter_ = worder.spirit_ncg_print_iter_;
-    spirit_use_coil_sen_map_ = worder.spirit_use_coil_sen_map_;
-    spirit_use_moco_enhancement_ = worder.spirit_use_moco_enhancement_;
-    spirit_recon_moco_images_ = worder.spirit_recon_moco_images_;
-    spirit_RO_enhancement_ratio_ = worder.spirit_RO_enhancement_ratio_;
-    spirit_E1_enhancement_ratio_ = worder.spirit_E1_enhancement_ratio_;
-    spirit_E2_enhancement_ratio_ = worder.spirit_E2_enhancement_ratio_;
-    spirit_temporal_enhancement_ratio_ = worder.spirit_temporal_enhancement_ratio_;
-    spirit_2D_scale_per_chunk_ = worder.spirit_2D_scale_per_chunk_;
-    spirit_3D_scale_per_chunk_ = worder.spirit_3D_scale_per_chunk_;
-
-    job_split_by_S_ = worder.job_split_by_S_;
-    job_num_of_N_ = worder.job_num_of_N_;
-    job_max_Megabytes_ = worder.job_max_Megabytes_;
-    job_overlap_ = worder.job_overlap_;
-    job_perform_on_control_node_ = worder.job_perform_on_control_node_;
-
-    partialFourier_algo_ = worder.partialFourier_algo_;
-
-    partialFourier_homodyne_iters_ = worder.partialFourier_homodyne_iters_;
-    partialFourier_homodyne_thres_ = worder.partialFourier_homodyne_thres_;
-    partialFourier_homodyne_densityComp_ = worder.partialFourier_homodyne_densityComp_;
-
-    partialFourier_POCS_iters_ = worder.partialFourier_POCS_iters_;
-    partialFourier_POCS_thres_ = worder.partialFourier_POCS_thres_;
-    partialFourier_POCS_transitBand_ = worder.partialFourier_POCS_transitBand_;
-    partialFourier_POCS_transitBand_E2_ = worder.partialFourier_POCS_transitBand_E2_;
-
-    partialFourier_FengHuang_kSize_RO_ = worder.partialFourier_FengHuang_kSize_RO_;
-    partialFourier_FengHuang_kSize_E1_ = worder.partialFourier_FengHuang_kSize_E1_;
-    partialFourier_FengHuang_kSize_E2_ = worder.partialFourier_FengHuang_kSize_E2_;
-    partialFourier_FengHuang_thresReg_ = worder.partialFourier_FengHuang_thresReg_;
-    partialFourier_FengHuang_sameKernel_allN_ = worder.partialFourier_FengHuang_sameKernel_allN_;
-    partialFourier_FengHuang_transitBand_ = worder.partialFourier_FengHuang_transitBand_;
-    partialFourier_FengHuang_transitBand_E2_ = worder.partialFourier_FengHuang_transitBand_E2_;
+    CalibMode_                                  = worder.CalibMode_;
+    InterleaveDim_                              = worder.InterleaveDim_;
+
+    acceFactorE1_                               = worder.acceFactorE1_;
+    acceFactorE2_                               = worder.acceFactorE2_;
+
+    kSpaceCenterRO_                             = worder.kSpaceCenterRO_;
+    kSpaceCenterEncode1_                        = worder.kSpaceCenterEncode1_;
+    kSpaceCenterEncode2_                        = worder.kSpaceCenterEncode2_;
+
+    kSpaceMaxRO_                                = worder.kSpaceMaxRO_;
+    kSpaceMaxEncode1_                           = worder.kSpaceMaxEncode1_;
+    kSpaceMaxEncode2_                           = worder.kSpaceMaxEncode2_;
+
+    workFlow_BufferKernel_                      = worder.workFlow_BufferKernel_;
+    workFlow_use_BufferedKernel_                = worder.workFlow_use_BufferedKernel_;
+    num_channels_res_                           = worder.num_channels_res_;
+
+    upstream_coil_compression_                  = worder.upstream_coil_compression_;
+    upstream_coil_compression_thres_            = worder.upstream_coil_compression_thres_;
+    upstream_coil_compression_num_modesKept_    = worder.upstream_coil_compression_num_modesKept_;
+
+    downstream_coil_compression_                = worder.downstream_coil_compression_;
+    coil_compression_thres_                     = worder.coil_compression_thres_;
+    coil_compression_num_modesKept_             = worder.coil_compression_num_modesKept_;
+
+    coil_map_algorithm_                         = worder.coil_map_algorithm_;
+    csm_kSize_                                  = worder.csm_kSize_;
+    csm_powermethod_num_                        = worder.csm_powermethod_num_;
+    csm_true_3D_                                = worder.csm_true_3D_;
+    csm_iter_num_                               = worder.csm_iter_num_;
+    csm_iter_thres_                             = worder.csm_iter_thres_;
+    csm_use_gpu_                                = worder.csm_use_gpu_;
+
+    start_RO_                                   = worder.start_RO_;
+    end_RO_                                     = worder.end_RO_;
+
+    start_E1_                                   = worder.start_E1_;
+    end_E1_                                     = worder.end_E1_;
+
+    start_E2_                                   = worder.start_E2_;
+    end_E2_                                     = worder.end_E2_;
+
+    recon_algorithm_                            = worder.recon_algorithm_;
+    recon_auto_parameters_                      = worder.recon_auto_parameters_;
+    gfactor_needed_                             = worder.gfactor_needed_;
+    wrap_around_map_needed_                     = worder.wrap_around_map_needed_;
+
+    grappa_kSize_RO_                            = worder.grappa_kSize_RO_;
+    grappa_kSize_RO_                            = worder.grappa_kSize_RO_;
+    grappa_kSize_E1_                            = worder.grappa_kSize_E1_;
+    grappa_kSize_E2_                            = worder.grappa_kSize_E2_;
+    grappa_reg_lamda_                           = worder.grappa_reg_lamda_;
+    grappa_calib_over_determine_ratio_          = worder.grappa_calib_over_determine_ratio_;
+    grappa_use_gpu_                             = worder.grappa_use_gpu_;
+
+    spirit_kSize_RO_                            = worder.spirit_kSize_RO_;
+    spirit_kSize_E1_                            = worder.spirit_kSize_E1_;
+    spirit_kSize_E2_                            = worder.spirit_kSize_E2_;
+    spirit_oSize_RO_                            = worder.spirit_oSize_RO_;
+    spirit_oSize_E1_                            = worder.spirit_oSize_E1_;
+    spirit_oSize_E2_                            = worder.spirit_oSize_E2_;
+    spirit_reg_lamda_                           = worder.spirit_reg_lamda_;
+    spirit_use_gpu_                             = worder.spirit_use_gpu_;
+    spirit_calib_over_determine_ratio_          = worder.spirit_calib_over_determine_ratio_;
+    spirit_solve_symmetric_                     = worder.spirit_solve_symmetric_;
+    spirit_iter_max_                            = worder.spirit_iter_max_;
+    spirit_iter_thres_                          = worder.spirit_iter_thres_;
+    spirit_print_iter_                          = worder.spirit_print_iter_;
+
+    spirit_perform_linear_                      = worder.spirit_perform_linear_;
+    spirit_perform_nonlinear_                   = worder.spirit_perform_nonlinear_;
+    spirit_parallel_imaging_lamda_              = worder.spirit_parallel_imaging_lamda_;
+    spirit_image_reg_lamda_                     = worder.spirit_image_reg_lamda_;
+    spirit_data_fidelity_lamda_                 = worder.spirit_data_fidelity_lamda_;
+    spirit_ncg_iter_max_                        = worder.spirit_ncg_iter_max_;
+    spirit_ncg_iter_thres_                      = worder.spirit_ncg_iter_thres_;
+    spirit_ncg_scale_factor_                    = worder.spirit_ncg_scale_factor_;
+    spirit_ncg_print_iter_                      = worder.spirit_ncg_print_iter_;
+    spirit_slep_iter_max_                       = worder.spirit_slep_iter_max_;
+    spirit_slep_iter_thres_                     = worder.spirit_slep_iter_thres_;
+    spirit_slep_print_iter_                     = worder.spirit_slep_print_iter_;
+    spirit_slep_keep_third_dimension_coeff_     = worder.spirit_slep_keep_third_dimension_coeff_;
+    spirit_slep_keep_approx_coeff_              = worder.spirit_slep_keep_approx_coeff_;
+    spirit_slep_scale_factor_                   = worder.spirit_slep_scale_factor_;
+    spirit_use_coil_sen_map_                    = worder.spirit_use_coil_sen_map_;
+    spirit_use_moco_enhancement_                = worder.spirit_use_moco_enhancement_;
+    spirit_recon_moco_images_                   = worder.spirit_recon_moco_images_;
+    spirit_RO_enhancement_ratio_                = worder.spirit_RO_enhancement_ratio_;
+    spirit_E1_enhancement_ratio_                = worder.spirit_E1_enhancement_ratio_;
+    spirit_E2_enhancement_ratio_                = worder.spirit_E2_enhancement_ratio_;
+    spirit_temporal_enhancement_ratio_          = worder.spirit_temporal_enhancement_ratio_;
+    spirit_2D_scale_per_chunk_                  = worder.spirit_2D_scale_per_chunk_;
+    spirit_3D_scale_per_chunk_                  = worder.spirit_3D_scale_per_chunk_;
+
+    retro_gated_images_                         = worder.retro_gated_images_;
+    retro_gated_segment_size_                   = worder.retro_gated_segment_size_;
+    retro_gated_interp_method_                  = worder.retro_gated_interp_method_;
+
+    kspace_binning_number_of_cardiac_phases_          = worder.kspace_binning_number_of_cardiac_phases_;
+    kspace_binning_minimal_cardiac_phase_width_          = worder.kspace_binning_minimal_cardiac_phase_width_;
+    kspace_binning_multiple_channel_recon_         = worder.kspace_binning_multiple_channel_recon_;
+    kspace_binning_iterative_non_linear_recon_              = worder.kspace_binning_iterative_non_linear_recon_;
+    kspace_binning_iterative_non_linear_recon_slep_              = worder.kspace_binning_iterative_non_linear_recon_slep_;
+    kspace_binning_multiple_channel_recon_with_coil_map_ = worder.kspace_binning_multiple_channel_recon_with_coil_map_;
+    kspace_binning_compute_navigator_signal_       = worder.kspace_binning_compute_navigator_signal_;
+    kspace_binning_navigator_moco_level_                = worder.kspace_binning_navigator_moco_level_;
+    memcpy(kspace_binning_navigator_moco_iter_, worder.kspace_binning_navigator_moco_iter_, sizeof(size_t)*MAX_MOCO_LEVEL);
+    kspace_binning_navigator_hilbert_strength_                    = worder.kspace_binning_navigator_hilbert_strength_;
+    kspace_binning_navigator_dissimilarity_sigma_                 = worder.kspace_binning_navigator_dissimilarity_sigma_;
+    kspace_binning_navigator_bidirectional_moco_         = worder.kspace_binning_navigator_bidirectional_moco_;
+    kspace_binning_moco_level_                   = worder.kspace_binning_moco_level_;
+    memcpy(kspace_binning_moco_iter_, worder.kspace_binning_moco_iter_, sizeof(size_t)*MAX_MOCO_LEVEL);
+    kspace_binning_moco_hilbert_strength_                       = worder.kspace_binning_moco_hilbert_strength_;
+    kspace_binning_moco_dissimilarity_sigma_                    = worder.kspace_binning_moco_dissimilarity_sigma_;
+    kspace_binning_bidirectional_moco_            = worder.kspace_binning_bidirectional_moco_;
+    kspace_binning_soft_combination_            = worder.kspace_binning_soft_combination_;
+    kspace_binning_navigator_window_wide_                  = worder.kspace_binning_navigator_window_wide_;
+    kspace_binning_navigator_window_narrow_                = worder.kspace_binning_navigator_window_narrow_;
+    kspace_binning_method_warpping_              = worder.kspace_binning_method_warpping_;
+    kspace_binning_exclude_last_cardiac_cycle_            = worder.kspace_binning_exclude_last_cardiac_cycle_;
+    kspace_binning_number_of_central_kspace_blocks_         = worder.kspace_binning_number_of_central_kspace_blocks_;
+    kspace_binning_max_temporal_window_    = worder.kspace_binning_max_temporal_window_;
+    kspace_binning_temporal_window_    = worder.kspace_binning_temporal_window_;
+    kspace_binning_best_cardiac_cycle_interpolator_        = worder.kspace_binning_best_cardiac_cycle_interpolator_;
+    kspace_binning_data_length_used_for_recon_            = worder.kspace_binning_data_length_used_for_recon_;
+    kspace_binning_fill_kspace_with_neighbors_ = worder.kspace_binning_fill_kspace_with_neighbors_;
+    kspace_binning_flow_in_e1_ = worder.kspace_binning_flow_in_e1_;
+    kspace_binning_flow_recon_jointly_ = worder.kspace_binning_flow_recon_jointly_;
+
+    motion_comp_num_of_PD_images_ = worder.motion_comp_num_of_PD_images_;
+
+    job_split_by_S_                             = worder.job_split_by_S_;
+    job_num_of_N_                               = worder.job_num_of_N_;
+    job_max_Megabytes_                          = worder.job_max_Megabytes_;
+    job_overlap_                                = worder.job_overlap_;
+    job_perform_on_control_node_                = worder.job_perform_on_control_node_;
+
+    partialFourier_algo_                        = worder.partialFourier_algo_;
+
+    partialFourier_homodyne_iters_              = worder.partialFourier_homodyne_iters_;
+    partialFourier_homodyne_thres_              = worder.partialFourier_homodyne_thres_;
+    partialFourier_homodyne_densityComp_        = worder.partialFourier_homodyne_densityComp_;
+
+    partialFourier_POCS_iters_                  = worder.partialFourier_POCS_iters_;
+    partialFourier_POCS_thres_                  = worder.partialFourier_POCS_thres_;
+    partialFourier_POCS_transitBand_            = worder.partialFourier_POCS_transitBand_;
+    partialFourier_POCS_transitBand_E2_         = worder.partialFourier_POCS_transitBand_E2_;
+
+    partialFourier_FengHuang_kSize_RO_          = worder.partialFourier_FengHuang_kSize_RO_;
+    partialFourier_FengHuang_kSize_E1_          = worder.partialFourier_FengHuang_kSize_E1_;
+    partialFourier_FengHuang_kSize_E2_          = worder.partialFourier_FengHuang_kSize_E2_;
+    partialFourier_FengHuang_thresReg_          = worder.partialFourier_FengHuang_thresReg_;
+    partialFourier_FengHuang_sameKernel_allN_   = worder.partialFourier_FengHuang_sameKernel_allN_;
+    partialFourier_FengHuang_transitBand_       = worder.partialFourier_FengHuang_transitBand_;
+    partialFourier_FengHuang_transitBand_E2_    = worder.partialFourier_FengHuang_transitBand_E2_;
 }
 
 template <typename T> 
@@ -788,6 +1112,8 @@ void gtPlusReconWorkOrder<T>::printInfo(std::ostream& os) const
     os << std::endl;
     GADGET_OSTREAM_PRINT(os, recon_algorithm_);
     GADGET_OSTREAM_PRINT(os, recon_auto_parameters_);
+    GADGET_OSTREAM_PRINT(os, gfactor_needed_);
+    GADGET_OSTREAM_PRINT(os, wrap_around_map_needed_);
     os << std::endl;
     GADGET_OSTREAM_PRINT(os, grappa_kSize_RO_);
     GADGET_OSTREAM_PRINT(os, grappa_kSize_E1_);
@@ -799,6 +1125,9 @@ void gtPlusReconWorkOrder<T>::printInfo(std::ostream& os) const
     GADGET_OSTREAM_PRINT(os, spirit_kSize_RO_);
     GADGET_OSTREAM_PRINT(os, spirit_kSize_E1_);
     GADGET_OSTREAM_PRINT(os, spirit_kSize_E2_);
+    GADGET_OSTREAM_PRINT(os, spirit_oSize_RO_);
+    GADGET_OSTREAM_PRINT(os, spirit_oSize_E1_);
+    GADGET_OSTREAM_PRINT(os, spirit_oSize_E2_);
     GADGET_OSTREAM_PRINT(os, spirit_reg_lamda_);
     GADGET_OSTREAM_PRINT(os, spirit_use_gpu_);
     GADGET_OSTREAM_PRINT(os, spirit_calib_over_determine_ratio_);
@@ -816,6 +1145,12 @@ void gtPlusReconWorkOrder<T>::printInfo(std::ostream& os) const
     GADGET_OSTREAM_PRINT(os, spirit_ncg_iter_thres_);
     GADGET_OSTREAM_PRINT(os, spirit_ncg_scale_factor_);
     GADGET_OSTREAM_PRINT(os, spirit_ncg_print_iter_);
+    GADGET_OSTREAM_PRINT(os, spirit_slep_iter_max_);
+    GADGET_OSTREAM_PRINT(os, spirit_slep_iter_thres_);
+    GADGET_OSTREAM_PRINT(os, spirit_slep_print_iter_);
+    GADGET_OSTREAM_PRINT(os, spirit_slep_keep_third_dimension_coeff_);
+    GADGET_OSTREAM_PRINT(os, spirit_slep_keep_approx_coeff_);
+    GADGET_OSTREAM_PRINT(os, spirit_slep_scale_factor_);
     GADGET_OSTREAM_PRINT(os, spirit_use_coil_sen_map_);
     GADGET_OSTREAM_PRINT(os, spirit_use_moco_enhancement_);
     GADGET_OSTREAM_PRINT(os, spirit_recon_moco_images_);
@@ -826,6 +1161,59 @@ void gtPlusReconWorkOrder<T>::printInfo(std::ostream& os) const
     GADGET_OSTREAM_PRINT(os, spirit_2D_scale_per_chunk_);
     GADGET_OSTREAM_PRINT(os, spirit_3D_scale_per_chunk_);
     os << std::endl;
+    GADGET_OSTREAM_PRINT(os, retro_gated_images_);
+    GADGET_OSTREAM_PRINT(os, retro_gated_segment_size_);
+    GADGET_OSTREAM_PRINT(os, retro_gated_interp_method_);
+    os << std::endl;
+    GADGET_OSTREAM_PRINT(os, kspace_binning_number_of_cardiac_phases_);
+    GADGET_OSTREAM_PRINT(os, kspace_binning_minimal_cardiac_phase_width_);
+    GADGET_OSTREAM_PRINT(os, kspace_binning_multiple_channel_recon_);
+    GADGET_OSTREAM_PRINT(os, kspace_binning_iterative_non_linear_recon_);
+    GADGET_OSTREAM_PRINT(os, kspace_binning_iterative_non_linear_recon_slep_);
+    GADGET_OSTREAM_PRINT(os, kspace_binning_multiple_channel_recon_with_coil_map_);
+    os << std::endl;
+    GADGET_OSTREAM_PRINT(os, kspace_binning_compute_navigator_signal_);
+    GADGET_OSTREAM_PRINT(os, kspace_binning_navigator_moco_level_);
+    os << " [ ";
+    size_t ii;
+    for ( ii=0; ii<kspace_binning_navigator_moco_level_; ii++ )
+    {
+        os << kspace_binning_navigator_moco_iter_[ii] << " ";
+    }
+    os << " ] " << std::endl;
+
+    GADGET_OSTREAM_PRINT(os, kspace_binning_navigator_hilbert_strength_);
+    GADGET_OSTREAM_PRINT(os, kspace_binning_navigator_dissimilarity_sigma_);
+    GADGET_OSTREAM_PRINT(os, kspace_binning_navigator_bidirectional_moco_);
+    os << std::endl;
+    GADGET_OSTREAM_PRINT(os, kspace_binning_moco_level_);
+    os << " [ ";
+    for ( ii=0; ii<kspace_binning_moco_level_; ii++ )
+    {
+        os << kspace_binning_moco_iter_[ii] << " ";
+    }
+    os << " ] " << std::endl;
+
+    GADGET_OSTREAM_PRINT(os, kspace_binning_moco_hilbert_strength_);
+    GADGET_OSTREAM_PRINT(os, kspace_binning_moco_dissimilarity_sigma_);
+    GADGET_OSTREAM_PRINT(os, kspace_binning_bidirectional_moco_);
+    os << std::endl;
+    GADGET_OSTREAM_PRINT(os, kspace_binning_soft_combination_);
+    GADGET_OSTREAM_PRINT(os, kspace_binning_navigator_window_wide_);
+    GADGET_OSTREAM_PRINT(os, kspace_binning_navigator_window_narrow_);
+    GADGET_OSTREAM_PRINT(os, kspace_binning_method_warpping_);
+    GADGET_OSTREAM_PRINT(os, kspace_binning_exclude_last_cardiac_cycle_);
+    GADGET_OSTREAM_PRINT(os, kspace_binning_number_of_central_kspace_blocks_);
+    GADGET_OSTREAM_PRINT(os, kspace_binning_max_temporal_window_);
+    GADGET_OSTREAM_PRINT(os, kspace_binning_temporal_window_);
+    GADGET_OSTREAM_PRINT(os, kspace_binning_best_cardiac_cycle_interpolator_);
+    GADGET_OSTREAM_PRINT(os, kspace_binning_data_length_used_for_recon_);
+    GADGET_OSTREAM_PRINT(os, kspace_binning_fill_kspace_with_neighbors_);
+    GADGET_OSTREAM_PRINT(os, kspace_binning_flow_in_e1_);
+    GADGET_OSTREAM_PRINT(os, kspace_binning_flow_recon_jointly_);
+    os << std::endl;
+    GADGET_OSTREAM_PRINT(os, motion_comp_num_of_PD_images_);
+    os << std::endl;
     GADGET_OSTREAM_PRINT(os, job_split_by_S_);
     GADGET_OSTREAM_PRINT(os, job_num_of_N_);
     GADGET_OSTREAM_PRINT(os, job_max_Megabytes_);
diff --git a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorkOrder2DT.h b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorkOrder2DT.h
index 89e0cea..f19dd0c 100644
--- a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorkOrder2DT.h
+++ b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorkOrder2DT.h
@@ -58,15 +58,26 @@ public:
     using BaseClass::ref_;
     using BaseClass::ref_recon_;
     using BaseClass::ref_coil_map_;
+    using BaseClass::kspace_initial_;
     using BaseClass::CalibMode_;
     using BaseClass::InterleaveDim_;
     using BaseClass::acceFactorE1_;
     using BaseClass::acceFactorE2_;
     using BaseClass::num_channels_res_;
     using BaseClass::coilMap_; // [RO E1 dstCHA 1 or N S]
+
     using BaseClass::fullkspace_; // [RO E1 dstCHA N S]
     using BaseClass::complexIm_; // [RO E1 N S]
+    using BaseClass::recon_time_stamp_; // [1 1 1 N S]
+    using BaseClass::recon_physio_time_stamp_; // [1 1 1 N S]
+
+    using BaseClass::fullkspace_second_; // [RO E1 dstCHA N S]
+    using BaseClass::complexIm_second_; // [RO E1 N S]
+    using BaseClass::recon_time_stamp_second_; // [1 1 1 N S]
+    using BaseClass::recon_physio_time_stamp_second_; // [1 1 1 N S]
+
     using BaseClass::gfactor_; // [RO E1 1 or N S]
+    using BaseClass::wrap_around_map_; // [RO E1 2 1 or N S]
 
     using BaseClass::downstream_coil_compression_;
     using BaseClass::coil_compression_thres_;
@@ -114,6 +125,9 @@ public:
     using BaseClass::spirit_kSize_RO_;
     using BaseClass::spirit_kSize_E1_;
     using BaseClass::spirit_kSize_E2_;
+    using BaseClass::spirit_oSize_RO_;
+    using BaseClass::spirit_oSize_E1_;
+    using BaseClass::spirit_oSize_E2_;
     using BaseClass::spirit_reg_lamda_;
     using BaseClass::spirit_use_gpu_;
     using BaseClass::spirit_iter_max_;
@@ -129,6 +143,11 @@ public:
     using BaseClass::spirit_ncg_iter_thres_;
     using BaseClass::spirit_ncg_scale_factor_;
     using BaseClass::spirit_ncg_print_iter_;
+    using BaseClass::spirit_slep_iter_max_;
+    using BaseClass::spirit_slep_iter_thres_;
+    using BaseClass::spirit_slep_print_iter_;
+    using BaseClass::spirit_slep_keep_third_dimension_coeff_;
+    using BaseClass::spirit_slep_scale_factor_;
     using BaseClass::spirit_use_coil_sen_map_;
     using BaseClass::spirit_use_moco_enhancement_;
     using BaseClass::spirit_recon_moco_images_;
@@ -275,6 +294,14 @@ bool gtPlusReconWorkOrder2DT<T>::reset()
 
         fullkspace_.clear();
         complexIm_.clear();
+        recon_time_stamp_.clear();
+        recon_physio_time_stamp_.clear();
+
+        fullkspace_second_.clear();
+        complexIm_second_.clear();
+        recon_time_stamp_second_.clear();
+        recon_physio_time_stamp_second_.clear();
+
         gfactor_.clear();
     }
     catch(...)
diff --git a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorkOrder3DT.h b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorkOrder3DT.h
index 9398cd5..ef13e2b 100644
--- a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorkOrder3DT.h
+++ b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorkOrder3DT.h
@@ -70,7 +70,16 @@ public:
     using BaseClass::coilMap_;
     using BaseClass::fullkspace_;
     using BaseClass::complexIm_;
+    using BaseClass::recon_time_stamp_;
+    using BaseClass::recon_physio_time_stamp_;
+
+    using BaseClass::fullkspace_second_;
+    using BaseClass::complexIm_second_;
+    using BaseClass::recon_time_stamp_second_;
+    using BaseClass::recon_physio_time_stamp_second_;
+
     using BaseClass::gfactor_;
+    using BaseClass::wrap_around_map_;
 
     using BaseClass::upstream_coil_compression_;
     using BaseClass::upstream_coil_compression_thres_;
@@ -122,6 +131,9 @@ public:
     using BaseClass::spirit_kSize_RO_;
     using BaseClass::spirit_kSize_E1_;
     using BaseClass::spirit_kSize_E2_;
+    using BaseClass::spirit_oSize_RO_;
+    using BaseClass::spirit_oSize_E1_;
+    using BaseClass::spirit_oSize_E2_;
     using BaseClass::spirit_reg_lamda_;
     using BaseClass::spirit_use_gpu_;
     using BaseClass::spirit_iter_max_;
@@ -137,6 +149,11 @@ public:
     using BaseClass::spirit_ncg_iter_thres_;
     using BaseClass::spirit_ncg_scale_factor_;
     using BaseClass::spirit_ncg_print_iter_;
+    using BaseClass::spirit_slep_iter_max_;
+    using BaseClass::spirit_slep_iter_thres_;
+    using BaseClass::spirit_slep_print_iter_;
+    using BaseClass::spirit_slep_keep_third_dimension_coeff_;
+    using BaseClass::spirit_slep_scale_factor_;
     using BaseClass::spirit_use_coil_sen_map_;
     using BaseClass::spirit_use_moco_enhancement_;
     using BaseClass::spirit_recon_moco_images_;
@@ -262,6 +279,14 @@ bool gtPlusReconWorkOrder3DT<T>::reset()
 
         fullkspace_.clear();
         complexIm_.clear();
+        recon_time_stamp_.clear();
+        recon_physio_time_stamp_.clear();
+
+        fullkspace_second_.clear();
+        complexIm_second_.clear();
+        recon_time_stamp_second_.clear();
+        recon_physio_time_stamp_second_.clear();
+
         gfactor_.clear();
     }
     catch(...)
diff --git a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker.h b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker.h
index cffd489..f6261fd 100644
--- a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker.h
+++ b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker.h
@@ -5,15 +5,14 @@
 
 #pragma once
 
-#include "ismrmrd.h"
+#include "ismrmrd/ismrmrd.h"
 
 #include <string>
-#include "util/gtPlusIOAnalyze.h"
+#include "gtPlusIOAnalyze.h"
 #include "gtPlusISMRMRDReconUtil.h"
 #include "gtPlusISMRMRDReconWorkOrder.h"
 #include "gtPlusMemoryManager.h"
 #include "hoNDArrayMemoryManaged.h"
-#include "SerializableObject.h"
 #include "gtPlusCloudScheduler.h"
 
 #ifdef USE_OMP
@@ -23,7 +22,7 @@
 namespace Gadgetron { namespace gtPlus {
 
 template <typename T> 
-struct gtPlusReconJob2DT : public SerializableObject
+struct gtPlusReconJob2DT
 {
     gtPlusReconWorkOrder<T> workOrder2DT;
     hoNDArray<T> kspace;
@@ -42,7 +41,7 @@ struct gtPlusReconJob2DT : public SerializableObject
 
     ~gtPlusReconJob2DT();
 
-    virtual bool serialize(char*& buf, size_t& len) const;
+    virtual bool serialize(char*& buf, size_t& len) const ;
     virtual bool deserialize(char* buf, size_t& len);
 };
 
@@ -74,7 +73,7 @@ gtPlusReconJob2DT<T>::gtPlusReconJob2DT(const gtPlusReconJob2DT& job)
 }
 
 template <typename T> 
-bool gtPlusReconJob2DT<T>::serialize(char*& buf, size_t& len) const
+bool gtPlusReconJob2DT<T>::serialize(char*& buf, size_t& len) const 
 {
     char *bufKSpace(NULL), *bufKernel(NULL), *bufCoilMap(NULL), *bufComplexIm(NULL), *bufRes(NULL);
     try
@@ -244,7 +243,7 @@ public:
 
     typedef typename realType<T>::Type value_type;
 
-    gtPlusReconWorker() : performTiming_(false)
+    gtPlusReconWorker() : partial_fourier_handling_(true), performTiming_(false), verbose_(false)
     {
         gt_timer1_.set_timing_in_destruction(false);
         gt_timer2_.set_timing_in_destruction(false);
@@ -263,6 +262,9 @@ public:
         return true;
     }
 
+    // whether to apply partial fourier processing
+    bool partial_fourier_handling_;
+
     // clock for timing
     Gadgetron::GadgetronTimer gt_timer1_;
     Gadgetron::GadgetronTimer gt_timer2_;
@@ -276,8 +278,12 @@ public:
     // debug folder
     std::string debugFolder_;
 
+    // verbose mode
+    bool verbose_;
+
     // util
     gtPlusISMRMRDReconUtil<T> gtPlus_util_;
+    gtPlusISMRMRDReconUtilComplex<T> gtPlus_util_cplx_;
 
     // memory manager
     boost::shared_ptr<gtPlusMemoryManager> gtPlus_mem_manager_;
@@ -385,14 +391,14 @@ bool gtPlusReconWorker<T>::splitReconJob(gtPlusReconWorkOrder<T>* workOrder2DT,
         size_t dstCHA = ker.get_size(3);
         size_t refN = ker.get_size(4);
 
-        size_t n, s;
+        size_t s;
         int startN, endN;
 
         if ( splitByS )
         {
             jobList.resize(S);
             startN = 0;
-            endN = N-1;
+            endN = (int)N-1;
             for ( s=0; s<S; s++ )
             {
                 GADGET_CHECK_RETURN_FALSE(createAReconJob(workOrder2DT, kspace, ker, startN, endN, s, jobList[s]));
@@ -418,16 +424,16 @@ bool gtPlusReconWorker<T>::splitReconJob(gtPlusReconWorkOrder<T>* workOrder2DT,
         startN = 0;
         while ( startN < N )
         {
-            endN = startN+jobN+overlapN-1;
+            endN = (int)(startN+jobN+overlapN-1);
             numPerN++;
 
             if ( endN >= N )
             {
-                endN = N-1;
+                endN = (int)N-1;
                 break;
             }
 
-            startN = endN-overlapN+1;
+            startN = endN-(int)overlapN+1;
         }
 
         jobList.resize(S*numPerN);
@@ -439,16 +445,16 @@ bool gtPlusReconWorker<T>::splitReconJob(gtPlusReconWorkOrder<T>* workOrder2DT,
             startN = 0;
             while ( startN < N )
             {
-                endN = startN+jobN+overlapN-1;
+                endN = (int)(startN+jobN+(int)overlapN-1);
                 num++;
 
                 if ( endN >= N )
                 {
-                    endN = N-1;
+                    endN = (int)N-1;
 
                     if ( endN-startN+1 < jobN )
                     {
-                        startN = endN-jobN+1;
+                        startN = endN-(int)jobN+1;
                         if ( startN < 0 ) startN = 0;
                     }
 
@@ -458,7 +464,7 @@ bool gtPlusReconWorker<T>::splitReconJob(gtPlusReconWorkOrder<T>* workOrder2DT,
 
                 GADGET_CHECK_RETURN_FALSE(createAReconJob(workOrder2DT, kspace, ker, startN, endN, s, jobList[s*numPerN+num-1]));
 
-                startN = endN-overlapN+1;
+                startN = endN-(int)overlapN+1;
             }
         }
     }
@@ -528,10 +534,10 @@ combineReconJob(gtPlusReconWorkOrder<T>* workOrder2DT, std::vector<gtPlusReconJo
                 if ( fillingTimes(n, s).real() > 1 )
                 {
                     hoNDArray<T> complexIm(RO, E1, workOrder2DT->complexIm_.begin()+s*RO*E1*N+n*RO*E1);
-                    Gadgetron::scal(1.0/fillingTimes(n, s).real(), complexIm);
+                    Gadgetron::scal( (value_type)(1.0)/fillingTimes(n, s).real(), complexIm);
 
                     hoNDArray<T> fullkspace(RO, E1, dstCHA, workOrder2DT->fullkspace_.begin()+s*RO*E1*dstCHA*N+n*RO*E1*dstCHA);
-                    Gadgetron::scal(1.0/fillingTimes(n, s).real(), fullkspace);
+                    Gadgetron::scal( (value_type)(1.0)/fillingTimes(n, s).real(), fullkspace);
                 }
             }
         }
@@ -560,14 +566,14 @@ computeEffectiveNodeNumberBasedOnComputingPowerIndex(gtPlusReconWorkOrder<T>* wo
             return true;
         }
 
-        double minPowerIndex = workOrder->gt_cloud_[0].get<3>();
+        double minPowerIndex = workOrder->gt_cloud_[0].template get<3>();
         double totalPowerIndex = minPowerIndex;
 
         size_t ii;
         for ( ii=1; ii<numOfNodes; ii++ )
         {
-            totalPowerIndex += workOrder->gt_cloud_[ii].get<3>();
-            if ( workOrder->gt_cloud_[ii].get<3>() < minPowerIndex ) minPowerIndex = workOrder->gt_cloud_[ii].get<3>();
+            totalPowerIndex += workOrder->gt_cloud_[ii].template get<3>();
+            if ( workOrder->gt_cloud_[ii].template get<3>() < minPowerIndex ) minPowerIndex = workOrder->gt_cloud_[ii].template get<3>();
         }
 
         numOfEffectiveNodes = (size_t)(std::floor(totalPowerIndex/minPowerIndex));
@@ -595,7 +601,7 @@ scheduleJobForNodes(gtPlusReconWorkOrder<T>* workOrder, size_t numOfJobs, std::v
         std::vector<double> powerIndexes(numOfNodes);
         for ( size_t ii=0; ii<numOfNodes; ii++ )
         {
-            powerIndexes[ii] = workOrder->gt_cloud_[ii].get<3>();
+            powerIndexes[ii] = workOrder->gt_cloud_[ii].template get<3>();
         }
 
         scheduler.setUpNodes(powerIndexes);
diff --git a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker2DT.h b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker2DT.h
index 5a80d0d..17edd93 100644
--- a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker2DT.h
+++ b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker2DT.h
@@ -48,7 +48,7 @@ public:
     {
         // check whether we have all-zeros input
         value_type v(1);
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::norm2(workOrder->data_, v));
+        Gadgetron::norm2(workOrder->data_, v);
         if ( v <= 0 )
         {
             GADGET_WARN_MSG("gtPlusReconWorker2DT, performRecon(workOrder) : incoming data contains all-zeros ... ");
@@ -68,7 +68,7 @@ public:
         {
             this->autoReconParameter(workOrder2DT);
             GADGET_MSG("Gt Plus 2DT -- automatic paramter selection ---");
-            workOrder2DT->print(std::cout);
+            if ( !this->debugFolder_.empty() ) { workOrder2DT->print(std::cout); }
         }
 
         return this->performRecon(workOrder2DT);
@@ -161,13 +161,17 @@ public:
     // estimate the job size, given the maximal memory usage for every job
     virtual bool estimateJobSize(gtPlusReconWorkOrder<T>* workOrder, size_t maxNumOfBytesPerJob, size_t overlapBetweenJobs, size_t numOfNodes, size_t& jobSize);
 
+    using BaseClass::partial_fourier_handling_;
+
     using BaseClass::gt_timer1_;
     using BaseClass::gt_timer2_;
     using BaseClass::gt_timer3_;
     using BaseClass::performTiming_;
+    using BaseClass::verbose_;
     using BaseClass::gt_exporter_;
     using BaseClass::debugFolder_;
     using BaseClass::gtPlus_util_;
+    using BaseClass::gtPlus_util_cplx_;
     using BaseClass::gtPlus_mem_manager_;
 
 protected:
@@ -360,7 +364,7 @@ bool gtPlusReconWorker2DT<T>::prepRef(gtPlusReconWorkOrder2DT<T>* workOrder2DT,
                 }
 
                 hoNDArray<T> croppedRef;
-                GADGET_CHECK_RETURN_FALSE(cropUpTo10DArray(refRecon, croppedRef, crop_offset, crop_size));
+                GADGET_CHECK_RETURN_FALSE(cropUpTo11DArray(refRecon, croppedRef, crop_offset, crop_size));
                 refRecon = croppedRef;
             }
         }
@@ -379,7 +383,7 @@ bool gtPlusReconWorker2DT<T>::prepRef(gtPlusReconWorkOrder2DT<T>* workOrder2DT,
             }
 
             hoNDArray<typename realType<T>::Type> refMag(refRecon.get_dimensions()), refMagSum;
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::absolute(refRecon, refMag));
+            GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::abs(refRecon, refMag));
             GADGET_CHECK_RETURN_FALSE(sumOverLastDimension(refMag, refMagSum));
             GADGET_CHECK_RETURN_FALSE(sumOverLastDimension(refMagSum, refMag));
             GADGET_CHECK_RETURN_FALSE(sumOverLastDimension(refMag, refMagSum));
@@ -405,10 +409,13 @@ bool gtPlusReconWorker2DT<T>::prepRef(gtPlusReconWorkOrder2DT<T>* workOrder2DT,
             if ( workOrder2DT->CalibMode_ == ISMRMRD_embedded )
             {
                 hoNDArray<T> croppedRef;
-                GADGET_CHECK_RETURN_FALSE(cropUpTo10DArray(refRecon, croppedRef, crop_offset, crop_size));
+                GADGET_CHECK_RETURN_FALSE(cropUpTo11DArray(refRecon, croppedRef, crop_offset, crop_size));
                 GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, croppedRef, "refRecon_afterCrop");
 
-                if ( workOrder2DT->recon_algorithm_ == ISMRMRD_SPIRIT || workOrder2DT->recon_algorithm_ == ISMRMRD_L1SPIRIT )
+                if ( workOrder2DT->recon_algorithm_ == ISMRMRD_SPIRIT 
+                    || workOrder2DT->recon_algorithm_ == ISMRMRD_L1SPIRIT 
+                    || workOrder2DT->recon_algorithm_ == ISMRMRD_L1SPIRIT_SLEP 
+                    || workOrder2DT->recon_algorithm_ == ISMRMRD_L1SPIRIT_SLEP_MOTION_COMP )
                 {
                     // copy the ref into the data
                     GADGET_CHECK_RETURN_FALSE(gtPlus_util_.copyAlongE1(refRecon, workOrder2DT->data_, startE1_, endE1_));
@@ -422,7 +429,7 @@ bool gtPlusReconWorker2DT<T>::prepRef(gtPlusReconWorkOrder2DT<T>* workOrder2DT,
                 crop_size[3] = refRecon.get_size(3);
 
                 refCoilMap.create(RO, E1, srcCHA, refRecon.get_size(3), S);
-                GADGET_CHECK_RETURN_FALSE(setSubArrayUpTo10DArray(refRecon, refCoilMap, crop_offset, crop_size));
+                GADGET_CHECK_RETURN_FALSE(setSubArrayUpTo11DArray(refRecon, refCoilMap, crop_offset, crop_size));
                 GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, refCoilMap, "refCoilMap");
 
                 hoNDArray<T> refCoilMapTmp(refCoilMap);
@@ -441,13 +448,13 @@ bool gtPlusReconWorker2DT<T>::prepRef(gtPlusReconWorkOrder2DT<T>* workOrder2DT,
                     }
                 }
 
-                GADGET_CHECK_RETURN_FALSE(cropUpTo10DArray(refRecon, croppedRef, crop_offset, crop_size));
+                GADGET_CHECK_RETURN_FALSE(cropUpTo11DArray(refRecon, croppedRef, crop_offset, crop_size));
                 refRecon = croppedRef;
             }
             else
             {
                 hoNDArray<T> croppedRef;
-                GADGET_CHECK_RETURN_FALSE(cropUpTo10DArray(refRecon, croppedRef, crop_offset, crop_size));
+                GADGET_CHECK_RETURN_FALSE(cropUpTo11DArray(refRecon, croppedRef, crop_offset, crop_size));
                 GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, croppedRef, "croppedRef");
 
                 GADGET_CHECK_RETURN_FALSE(performRefFilter(workOrder2DT, croppedRef, refCoilMap, startRO, endRO, startE1, endE1));
@@ -469,7 +476,7 @@ bool gtPlusReconWorker2DT<T>::prepRef(gtPlusReconWorkOrder2DT<T>* workOrder2DT,
                         crop_offset[1] = 0;
                         crop_size[1] = refRecon.get_size(1);
 
-                        GADGET_CHECK_RETURN_FALSE(cropUpTo10DArray(refRecon, croppedRef, crop_offset, crop_size));
+                        GADGET_CHECK_RETURN_FALSE(cropUpTo11DArray(refRecon, croppedRef, crop_offset, crop_size));
                         refRecon = croppedRef;
                     }
                 }
@@ -503,6 +510,173 @@ bool gtPlusReconWorker2DT<T>::prepRef(gtPlusReconWorkOrder2DT<T>* workOrder2DT,
             return false;
         }
 
+        // if the upstream coil compression is needed
+        if ( workOrder2DT->upstream_coil_compression_ )
+        {
+            GADGET_CHECK_PERFORM(!debugFolder_.empty(), "Upstream coil compression ... ");
+
+            std::vector<hoMatrix<T> > upstreamCoilCoeffRef(workOrder2DT->ref_.get_size(4)), upstreamCoilCoeffRefRecon(refRecon.get_size(4)), upstreamCoilCoeffData(workOrder2DT->data_.get_size(4));
+
+            if ( workOrder2DT->same_coil_compression_coeff_allS_ )
+            {
+                hoNDArray<T> aveAllS;
+
+                std::vector<size_t> allSDim(4);
+                allSDim[0] = refRecon.get_size(0);
+                allSDim[1] = refRecon.get_size(1);
+                allSDim[2] = refRecon.get_size(2);
+                allSDim[3] = refRecon.get_size(3)*refRecon.get_size(4);
+
+                hoNDArray<T> dataAllS(&allSDim, refRecon.begin(), false);
+                GADGET_CHECK_RETURN_FALSE(gtPlus_util_.averageKSpace4D(dataAllS, aveAllS));
+
+                hoMatrix<T> coeff, eigenValues;
+                if ( workOrder2DT->coil_compression_num_modesKept_ > 0 )
+                {
+                    GADGET_CHECK_RETURN_FALSE(gtPlus_util_.computeKLCoilCompressionCoeff(aveAllS, 
+                                workOrder2DT->upstream_coil_compression_num_modesKept_, coeff, eigenValues));
+                }
+                else
+                {
+                    GADGET_CHECK_RETURN_FALSE(gtPlus_util_.computeKLCoilCompressionCoeff(aveAllS, 
+                                workOrder2DT->upstream_coil_compression_thres_, coeff, eigenValues));
+                }
+
+                eigenValues.print(std::cout);
+                GADGET_MSG("Upstream coil compression, number of channel kept is " << coeff.cols());
+
+                size_t n;
+                for ( n=0; n<upstreamCoilCoeffRef.size(); n++ )
+                {
+                    upstreamCoilCoeffRef[n] = coeff;
+                }
+
+                for ( n=0; n<upstreamCoilCoeffRefRecon.size(); n++ )
+                {
+                    upstreamCoilCoeffRefRecon[n] = coeff;
+                }
+
+                for ( n=0; n<upstreamCoilCoeffData.size(); n++ )
+                {
+                    upstreamCoilCoeffData[n] = coeff;
+                }
+            }
+            else
+            {
+                std::vector<size_t> allSDim(4);
+                allSDim[0] = refRecon.get_size(0);
+                allSDim[1] = refRecon.get_size(1);
+                allSDim[2] = refRecon.get_size(2);
+                allSDim[3] = refRecon.get_size(3);
+
+                size_t N_refRecon = allSDim[0]*allSDim[1]*allSDim[2]*allSDim[3];
+
+                size_t num_modesKept = srcCHA;
+
+                size_t s;
+                for ( s=0; s<refRecon.get_size(4); s++ )
+                {
+                    hoNDArray<T> dataCurrS(&allSDim, refRecon.begin()+s*N_refRecon, false);
+
+                    hoMatrix<T> coeff, eigenValues;
+
+                    if ( s == 0 )
+                    {
+                        if ( workOrder2DT->coil_compression_num_modesKept_ > 0 )
+                        {
+                            GADGET_CHECK_RETURN_FALSE(gtPlus_util_.computeKLCoilCompressionCoeff(dataCurrS, 
+                                        workOrder2DT->upstream_coil_compression_num_modesKept_, coeff, eigenValues));
+                        }
+                        else
+                        {
+                            GADGET_CHECK_RETURN_FALSE(gtPlus_util_.computeKLCoilCompressionCoeff(dataCurrS, 
+                                        workOrder2DT->upstream_coil_compression_thres_, coeff, eigenValues));
+                        }
+
+                        num_modesKept = coeff.get_size(1);
+                    }
+                    else
+                    {
+                        GADGET_CHECK_RETURN_FALSE(gtPlus_util_.computeKLCoilCompressionCoeff(dataCurrS, 
+                                        (int)num_modesKept, coeff, eigenValues));
+                    }
+
+                    GADGET_CHECK_PERFORM(!debugFolder_.empty(), eigenValues.print(std::cout));
+                    GADGET_MSG("Upstream coil compression, number of channel kept is " << coeff.cols());
+
+                    if ( s < upstreamCoilCoeffRef.size() )
+                    {
+                        upstreamCoilCoeffRef[s] = coeff;
+                    }
+
+                    upstreamCoilCoeffRefRecon[s] = coeff;
+                    upstreamCoilCoeffData[s] = coeff;
+                }
+            }
+
+            // apply the coil compression
+            #ifdef USE_OMP
+                omp_set_nested(1);
+            #endif // USE_OMP
+
+            GADGET_CHECK_PERFORM(performTiming_, gt_timer2_.start("apply upstream coil compression ... "));
+            #pragma omp parallel sections default(shared)
+            {
+
+                #pragma omp section
+                {
+                    //GADGET_CHECK_PERFORM(performTiming_, gt_timer2_.start("apply the coil compression on data ... "));
+                    // GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<T>().applyKLCoilCompressionCoeff(workOrder3DT->data_, upstreamCoilCoeffData, data_dst_, true));
+                    GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("applyKLCoilCompressionCoeff ... "));
+                    gtPlusISMRMRDReconUtil<T>().applyKLCoilCompressionCoeff(workOrder2DT->data_, upstreamCoilCoeffData, data_dst_);
+                    GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.stop());
+
+                    GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("copy data ... "));
+                    workOrder2DT->data_ = data_dst_;
+                    GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.stop());
+
+                    //GADGET_CHECK_PERFORM(performTiming_, gt_timer2_.stop());
+                }
+
+                #pragma omp section
+                {
+                    //GADGET_CHECK_PERFORM(performTiming_, gt_timer2_.start("apply the coil compression on ref ... "));
+                    //GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<T>().applyKLCoilCompressionCoeff(workOrder3DT->ref_, upstreamCoilCoeff, ref_dst_, true));
+                    gtPlusISMRMRDReconUtil<T>().applyKLCoilCompressionCoeff(workOrder2DT->ref_, upstreamCoilCoeffRef, ref_dst_);
+                    workOrder2DT->ref_ = ref_dst_;
+                    //GADGET_CHECK_PERFORM(performTiming_, gt_timer2_.stop());
+                }
+
+                #pragma omp section
+                {
+                    //GADGET_CHECK_PERFORM(performTiming_, gt_timer2_.start("apply the coil compression on refRecon ... "));
+                    hoNDArray<T> refRecon_upstream;
+                    //GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<T>().applyKLCoilCompressionCoeff(refRecon, upstreamCoilCoeff, refRecon_upstream, true));
+                    gtPlusISMRMRDReconUtil<T>().applyKLCoilCompressionCoeff(refRecon, upstreamCoilCoeffRefRecon, refRecon_upstream);
+                    refRecon = refRecon_upstream;
+                    refRecon_upstream.clear();
+                    //GADGET_CHECK_PERFORM(performTiming_, gt_timer2_.stop());
+                }
+
+                #pragma omp section
+                {
+                    //GADGET_CHECK_PERFORM(performTiming_, gt_timer2_.start("apply the coil compression on ref for coil map ... "));
+                    hoNDArray<T> refCoilMap_upstream;
+                    //GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<T>().applyKLCoilCompressionCoeff(refCoilMap, upstreamCoilCoeff, refCoilMap_upstream, true));
+                    gtPlusISMRMRDReconUtil<T>().applyKLCoilCompressionCoeff(refCoilMap, upstreamCoilCoeffRefRecon, refCoilMap_upstream);
+                    refCoilMap = refCoilMap_upstream;
+                    refCoilMap_upstream.clear();
+                    //GADGET_CHECK_PERFORM(performTiming_, gt_timer2_.stop());
+                }
+            }
+
+            GADGET_CHECK_PERFORM(performTiming_, gt_timer2_.stop());
+
+            #ifdef USE_OMP
+                omp_set_nested(0);
+            #endif // USE_OMP
+        }
+
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, refRecon, "refRecon");
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, refCoilMap, "refCoilMap");
     }
@@ -698,7 +872,11 @@ bool gtPlusReconWorker2DT<T>::performRecon(gtPlusReconWorkOrder2DT<T>* workOrder
                 GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<T>().applyKLCoilCompressionCoeff(workOrder2DT->ref_coil_map_, *workOrder2DT->coilCompressionCoef_, ref_coil_map_dst_));
                 GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, ref_coil_map_dst_, "ref_coil_map_dst_");
 
-                if ( !workOrder2DT->downstream_coil_compression_ || workOrder2DT->recon_algorithm_==ISMRMRD_SPIRIT || workOrder2DT->recon_algorithm_==ISMRMRD_L1SPIRIT )
+                if ( !workOrder2DT->downstream_coil_compression_ 
+                    || workOrder2DT->recon_algorithm_==ISMRMRD_SPIRIT 
+                    || workOrder2DT->recon_algorithm_==ISMRMRD_L1SPIRIT 
+                    || workOrder2DT->recon_algorithm_==ISMRMRD_L1SPIRIT_SLEP 
+                    || workOrder2DT->recon_algorithm_==ISMRMRD_L1SPIRIT_SLEP_MOTION_COMP )
                 {
                     ref_src_ = ref_dst_;
                 }
@@ -798,7 +976,7 @@ estimateCoilMap(gtPlusReconWorkOrder2DT<T>* workOrder2DT, const hoNDArray<T>& re
                 GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->ifft2c(refCoilMapS, buffer2DT_));
                 GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilMap2DNIH(buffer2DT_, 
                         coilMapS, workOrder2DT->coil_map_algorithm_, workOrder2DT->csm_kSize_, 
-                        workOrder2DT->csm_powermethod_num_, workOrder2DT->csm_iter_num_, workOrder2DT->csm_iter_thres_, workOrder2DT->csm_use_gpu_));
+                        workOrder2DT->csm_powermethod_num_, workOrder2DT->csm_iter_num_, (value_type)workOrder2DT->csm_iter_thres_, workOrder2DT->csm_use_gpu_));
 
                 GADGET_CHECK_RETURN_FALSE(repmatLastDimension(*workOrder2DT->coilMap_, usedS));
             }
@@ -807,7 +985,7 @@ estimateCoilMap(gtPlusReconWorkOrder2DT<T>* workOrder2DT, const hoNDArray<T>& re
                 GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->ifft2c(ref_coil_map_dst, buffer2DT_));
                 GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilMap2DNIH(buffer2DT_, 
                         *workOrder2DT->coilMap_, workOrder2DT->coil_map_algorithm_, workOrder2DT->csm_kSize_, 
-                        workOrder2DT->csm_powermethod_num_, workOrder2DT->csm_iter_num_, workOrder2DT->csm_iter_thres_, workOrder2DT->csm_use_gpu_));
+                        workOrder2DT->csm_powermethod_num_, workOrder2DT->csm_iter_num_, (value_type)workOrder2DT->csm_iter_thres_, workOrder2DT->csm_use_gpu_));
             }
         }
 
@@ -883,16 +1061,24 @@ performCalib(gtPlusReconWorkOrder2DT<T>* workOrder2DT, const hoNDArray<T>& ref_s
                     GADGET_CHECK_RETURN_FALSE(this->performCalibImpl(ref_src, ref_dst, workOrder2DT, n, usedS));
                 }
 
-                GADGET_CHECK_RETURN_FALSE(repmatLastDimension(*workOrder2DT->kernel_, usedS));
-                GADGET_CHECK_RETURN_FALSE(repmatLastDimension(*workOrder2DT->kernelIm_, usedS));
-                GADGET_CHECK_RETURN_FALSE(repmatLastDimension(*workOrder2DT->unmixingCoeffIm_, usedS));
-                GADGET_CHECK_RETURN_FALSE(repmatLastDimension(workOrder2DT->gfactor_, usedS));
+                if ( S > 1 )
+                {
+                    GADGET_CHECK_RETURN_FALSE(repmatLastDimension(*workOrder2DT->kernel_, usedS));
+                    GADGET_CHECK_RETURN_FALSE(repmatLastDimension(*workOrder2DT->kernelIm_, usedS));
+                    GADGET_CHECK_RETURN_FALSE(repmatLastDimension(*workOrder2DT->unmixingCoeffIm_, usedS));
+                    if ( workOrder2DT->gfactor_needed_ ) { GADGET_CHECK_RETURN_FALSE(repmatLastDimension(workOrder2DT->gfactor_, usedS)); }
+                    if ( workOrder2DT->wrap_around_map_needed_ ) { GADGET_CHECK_RETURN_FALSE(repmatLastDimension(workOrder2DT->wrap_around_map_, usedS)); }
+                }
             }
             else
             {
                 int usedS;
                 #ifdef USE_OMP
-                    omp_set_nested(1);
+                    if ( S < omp_get_num_procs()/2 )
+                    {
+                        omp_set_nested(1);
+                        GADGET_MSG("performCalib, nested omp is on ... ");
+                    }
                 #endif // USE_OMP
 
                 #ifdef GCC_OLD_FLAG
@@ -995,9 +1181,9 @@ bool gtPlusReconWorker2DT<T>::unmixCoeff(const hoNDArray<T>& kerIm, const hoNDAr
         }
 
         hoNDArray<T> conjUnmixCoeff(unmixCoeff);
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::multiplyConj(unmixCoeff, conjUnmixCoeff, conjUnmixCoeff));
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::multiplyConj(unmixCoeff, conjUnmixCoeff, conjUnmixCoeff));
         GADGET_CHECK_RETURN_FALSE(Gadgetron::sumOverLastDimension(conjUnmixCoeff, gFactor));
-        Gadgetron::sqrt_inplace(&gFactor);
+        Gadgetron::sqrt(gFactor, gFactor);
     }
     catch(...)
     {
@@ -1083,8 +1269,9 @@ bool gtPlusReconWorker2DT<T>::applyImageDomainKernelImage(const hoNDArray<T>& al
 
         int n;
 
-        if ( num <= 16 )
+        if ( num <= 8 )
         {
+            GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("applyImageDomainKernelImage - multipleMultiply - sumOverSecondLastDimension ... "));
             for ( n=0; n<(int)num; n++ )
             {
                 hoNDArray<T> buf3D(&dim3D, const_cast<T*>(aliasedIm.begin()+n*RO*E1*srcCHA));
@@ -1093,6 +1280,7 @@ bool gtPlusReconWorker2DT<T>::applyImageDomainKernelImage(const hoNDArray<T>& al
                 Gadgetron::multipleMultiply(buf3D, kerIm, kerImBuffer);
                 Gadgetron::sumOverSecondLastDimension(kerImBuffer, bufIm3D);
             }
+            GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.stop());
         }
         else
         {
@@ -1205,6 +1393,12 @@ bool gtPlusReconWorker2DT<T>::afterUnwrapping(gtPlusReconWorkOrder2DT<T>* workOr
         size_t N = workOrder2DT->data_.get_size(3);
         size_t S = workOrder2DT->data_.get_size(4);
 
+        if ( workOrder2DT->CalibMode_ == ISMRMRD_noacceleration )
+        {
+            fullres_coilmap = false;
+            ref_fillback = false;
+        }
+
         if ( workOrder2DT->CalibMode_ == ISMRMRD_embedded )
         {
             if ( workOrder2DT->embedded_fullres_coilmap_ )
@@ -1215,7 +1409,9 @@ bool gtPlusReconWorker2DT<T>::afterUnwrapping(gtPlusReconWorkOrder2DT<T>* workOr
 
             if ( workOrder2DT->embedded_ref_fillback_ 
                 && (workOrder2DT->recon_algorithm_!=ISMRMRD_SPIRIT) 
-                && (workOrder2DT->recon_algorithm_!=ISMRMRD_L1SPIRIT) )
+                && (workOrder2DT->recon_algorithm_!=ISMRMRD_L1SPIRIT)
+                && (workOrder2DT->recon_algorithm_!=ISMRMRD_L1SPIRIT_SLEP)
+                && (workOrder2DT->recon_algorithm_!=ISMRMRD_L1SPIRIT_SLEP_MOTION_COMP) )
             {
                 ref_fillback = true;
             }
@@ -1259,6 +1455,8 @@ bool gtPlusReconWorker2DT<T>::afterUnwrapping(gtPlusReconWorkOrder2DT<T>* workOr
 
         if ( ref_fillback )
         {
+            GADGET_MSG("Fill back the reference kspace lines to the reconstruction ");
+
             hoNDArray<T> ref_dst;
             if ( workOrder2DT->coil_compression_ )
             {
@@ -1282,12 +1480,16 @@ bool gtPlusReconWorker2DT<T>::afterUnwrapping(gtPlusReconWorkOrder2DT<T>* workOr
         }
 
         // partial fourier handling
-        GADGET_CHECK_RETURN_FALSE(this->performPartialFourierHandling(workOrder2DT));
+        if ( partial_fourier_handling_ )
+        {
+            GADGET_CHECK_RETURN_FALSE(this->performPartialFourierHandling(workOrder2DT));
+        }
 
         if ( fullres_coilmap )
         {
             GADGET_CHECK_PERFORM(performTiming_, gt_timer2_.start("full res coil map : allocate buffer 2DT ...  "));
-            hoNDArrayMemoryManaged<T> buffer2DT_Two(workOrder2DT->fullkspace_.get_dimensions(), gtPlus_mem_manager_);
+            //hoNDArrayMemoryManaged<T> buffer2DT_Two(workOrder2DT->fullkspace_.get_dimensions(), gtPlus_mem_manager_);
+            hoNDArray<T> buffer2DT_Two(workOrder2DT->fullkspace_.get_dimensions());
             GADGET_CHECK_PERFORM(performTiming_, gt_timer2_.stop());
 
             GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->ifft2c(workOrder2DT->fullkspace_, buffer2DT_, buffer2DT_Two));
@@ -1311,14 +1513,17 @@ bool gtPlusReconWorker2DT<T>::afterUnwrapping(gtPlusReconWorkOrder2DT<T>* workOr
                 else
                 {
                     workOrder2DT->coilMap_->create(RO, E1, dstCHA, 1, S);
+                    //Gadgetron::clear( *(workOrder2DT->coilMap_) );
 
                     size_t s;
 
                     if ( same_coilmap_allS )
                     {
                         hoNDArray<T> aveComplexImS(RO, E1, dstCHA, 1);
+                        //Gadgetron::clear(aveComplexImS);
 
                         buffer2DT_unwrapping_.create(RO, E1, dstCHA, N);
+                        //Gadgetron::clear(aveComplexImS);
 
                         hoMatrix<T> A(RO*E1*dstCHA, N, buffer2DT_.begin()+whichS_coilmap*RO*E1*dstCHA*N);
                         hoMatrix<T> A_KLF(RO*E1*dstCHA, N, buffer2DT_unwrapping_.begin());
@@ -1346,7 +1551,7 @@ bool gtPlusReconWorker2DT<T>::afterUnwrapping(gtPlusReconWorkOrder2DT<T>* workOr
                         hoNDArray<T> coilMapS(RO, E1, dstCHA, 1, workOrder2DT->coilMap_->begin()+whichS_coilmap*RO*E1*dstCHA);
 
                         GADGET_CHECK_PERFORM(performTiming_, gt_timer2_.start("coilMap2DNIH ...  "));
-                        GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilMap2DNIH(aveComplexImS, coilMapS, workOrder2DT->coil_map_algorithm_, workOrder2DT->csm_kSize_, workOrder2DT->csm_powermethod_num_, workOrder2DT->csm_iter_num_, workOrder2DT->csm_iter_thres_, workOrder2DT->csm_use_gpu_));
+                        GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilMap2DNIH(aveComplexImS, coilMapS, workOrder2DT->coil_map_algorithm_, workOrder2DT->csm_kSize_, workOrder2DT->csm_powermethod_num_, workOrder2DT->csm_iter_num_, (value_type)workOrder2DT->csm_iter_thres_, workOrder2DT->csm_use_gpu_));
                         GADGET_CHECK_PERFORM(performTiming_, gt_timer2_.stop());
 
                         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, coilMapS, "coilMapS");
@@ -1366,7 +1571,10 @@ bool gtPlusReconWorker2DT<T>::afterUnwrapping(gtPlusReconWorkOrder2DT<T>* workOr
                     else
                     {
                         hoNDArray<T> aveComplexIm(RO, E1, dstCHA, 1, S);
+                        //Gadgetron::clear(aveComplexIm);
+
                         buffer2DT_unwrapping_ = buffer2DT_;
+                        GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, buffer2DT_unwrapping_, "buffer2DT_unwrapping");
 
                         if ( numOfModesKept>0 && numOfModesKept<dstCHA )
                         {
@@ -1394,7 +1602,7 @@ bool gtPlusReconWorker2DT<T>::afterUnwrapping(gtPlusReconWorkOrder2DT<T>* workOr
 
                         GADGET_CHECK_PERFORM(performTiming_, gt_timer2_.start("coilMap2DNIH ...  "));
 
-                        gtPlusISMRMRDReconUtilComplex<T>().coilMap2DNIH(aveComplexIm, *workOrder2DT->coilMap_, workOrder2DT->coil_map_algorithm_, workOrder2DT->csm_kSize_, workOrder2DT->csm_powermethod_num_, workOrder2DT->csm_iter_num_, workOrder2DT->csm_iter_thres_, workOrder2DT->csm_use_gpu_);
+                        gtPlusISMRMRDReconUtilComplex<T>().coilMap2DNIH(aveComplexIm, *workOrder2DT->coilMap_, workOrder2DT->coil_map_algorithm_, workOrder2DT->csm_kSize_, workOrder2DT->csm_powermethod_num_, workOrder2DT->csm_iter_num_, (value_type)workOrder2DT->csm_iter_thres_, workOrder2DT->csm_use_gpu_);
 
                         gtPlusISMRMRDReconUtilComplex<T>().coilCombine(buffer2DT_, *workOrder2DT->coilMap_, workOrder2DT->complexIm_);
 
@@ -1438,13 +1646,13 @@ bool gtPlusReconWorker2DT<T>::afterUnwrapping(gtPlusReconWorkOrder2DT<T>* workOr
                         hoNDArray<T> complexImS(RO, E1, dstCHA, N, buffer2DT_.begin()+whichS_coilmap*RO*E1*dstCHA*N);
                         hoNDArray<T> coilMapS(RO, E1, dstCHA, N, workOrder2DT->coilMap_->begin()+whichS_coilmap*RO*E1*dstCHA*N);
 
-                        GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilMap2DNIH(complexImS, coilMapS, workOrder2DT->coil_map_algorithm_, workOrder2DT->csm_kSize_, workOrder2DT->csm_powermethod_num_, workOrder2DT->csm_iter_num_, workOrder2DT->csm_iter_thres_, workOrder2DT->csm_use_gpu_));
+                        GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilMap2DNIH(complexImS, coilMapS, workOrder2DT->coil_map_algorithm_, workOrder2DT->csm_kSize_, workOrder2DT->csm_powermethod_num_, workOrder2DT->csm_iter_num_, (value_type)workOrder2DT->csm_iter_thres_, workOrder2DT->csm_use_gpu_));
                         GADGET_CHECK_RETURN_FALSE(repmatLastDimension(*workOrder2DT->coilMap_, whichS_coilmap));
                         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, *workOrder2DT->coilMap_, "coilMap_fullres");
                     }
                     else
                     {
-                        GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilMap2DNIH(buffer2DT_, *workOrder2DT->coilMap_, workOrder2DT->coil_map_algorithm_, workOrder2DT->csm_kSize_, workOrder2DT->csm_powermethod_num_, workOrder2DT->csm_iter_num_, workOrder2DT->csm_iter_thres_, workOrder2DT->csm_use_gpu_));
+                        GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilMap2DNIH(buffer2DT_, *workOrder2DT->coilMap_, workOrder2DT->coil_map_algorithm_, workOrder2DT->csm_kSize_, workOrder2DT->csm_powermethod_num_, workOrder2DT->csm_iter_num_, (value_type)workOrder2DT->csm_iter_thres_, workOrder2DT->csm_use_gpu_));
                         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, *workOrder2DT->coilMap_, "coilMap_fullres");
                     }
                     GADGET_CHECK_PERFORM(performTiming_, gt_timer2_.stop());
@@ -1454,6 +1662,43 @@ bool gtPlusReconWorker2DT<T>::afterUnwrapping(gtPlusReconWorkOrder2DT<T>* workOr
                 }
             }
         }
+        else
+        {
+            if ( partial_fourier_handling_ )
+            {
+                bool partialFourierHandling = true;
+                if ( (workOrder2DT->start_RO_<0 || workOrder2DT->end_RO_<0 || (workOrder2DT->end_RO_-workOrder2DT->start_RO_+1==RO) ) 
+                        && (workOrder2DT->start_E1_<0 || workOrder2DT->end_E1_<0 || (workOrder2DT->end_E1_-workOrder2DT->start_E1_+1==E1) ) )
+                {
+                    partialFourierHandling = false;
+                }
+
+                // if the partial fourier handling is used to compute updated full kspace, the coil combination needs to be repeated
+                if ( partialFourierHandling )
+                {
+                    GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, workOrder2DT->complexIm_, "complexIm_origin_noFullResCoilMap_");
+
+                    // if the partial fourier handling is performed on the fullkspace, an extra coil combination is needed
+                    if ( workOrder2DT->acceFactorE1_==1 && workOrder2DT->acceFactorE2_==1 )
+                    {
+                        hoNDArray<T> buffer2DT_Two(workOrder2DT->data_.get_dimensions());
+                        GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->ifft2c(workOrder2DT->data_, buffer2DT_, buffer2DT_Two));
+                        GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilCombine(buffer2DT_, *workOrder2DT->coilMap_, workOrder2DT->complexIm_));
+                        GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, workOrder2DT->complexIm_, "complexIm_noFullResCoilMap_");
+                    }
+                    else if ( workOrder2DT->fullkspace_.get_number_of_elements() > 0 )
+                    {
+                        if ( workOrder2DT->fullkspace_.get_size(2) == workOrder2DT->coilMap_->get_size(2) )
+                        {
+                            hoNDArray<T> buffer2DT_Two(workOrder2DT->fullkspace_.get_dimensions());
+                            GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->ifft2c(workOrder2DT->fullkspace_, buffer2DT_, buffer2DT_Two));
+                            GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilCombine(buffer2DT_, *workOrder2DT->coilMap_, workOrder2DT->complexIm_));
+                            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, workOrder2DT->complexIm_, "complexIm_noFullResCoilMap_");
+                        }
+                    }
+                }
+            }
+        }
     }
     catch(...)
     {
@@ -1518,10 +1763,37 @@ bool gtPlusReconWorker2DT<T>::performPartialFourierHandling(gtPlusReconWorkOrder
 {
     try
     {
-        if ( workOrder2DT->partialFourier_algo_ == ISMRMRD_PF_ZEROFILLING ) return true;
+        // compensate for the partial fourier to preserve the SNR unit
+        value_type partialFourierCompensationFactor = 1;
+
+        size_t RO = workOrder2DT->data_.get_size(0);
+        size_t E1 = workOrder2DT->data_.get_size(1);
+
+        if ( !( workOrder2DT->start_RO_<0 || workOrder2DT->end_RO_<0 || (workOrder2DT->end_RO_-workOrder2DT->start_RO_+1==RO) ) )
+        {
+            partialFourierCompensationFactor *= (value_type)(RO)/(value_type)(workOrder2DT->end_RO_-workOrder2DT->start_RO_+1);
+        }
+
+        if ( !( workOrder2DT->start_E1_<0 || workOrder2DT->end_E1_<0 || (workOrder2DT->end_E1_-workOrder2DT->start_E1_+1==E1) ) )
+        {
+            if ( workOrder2DT->end_E1_-workOrder2DT->start_E1_+1 <= E1 )
+            {
+                partialFourierCompensationFactor *= (value_type)(E1)/(value_type)(workOrder2DT->end_E1_-workOrder2DT->start_E1_+1);
+            }
+        }
+
+        partialFourierCompensationFactor = std::sqrt(partialFourierCompensationFactor);
+        GADGET_CHECK_PERFORM(performTiming_, GADGET_MSG("Partial fourier scaling factor : " << partialFourierCompensationFactor));
+
+        GADGET_CHECK_PERFORM(performTiming_, GADGET_MSG("Partial fourier algorithm : " << gtPlus_util_.getNameFromISMRMRDPartialFourierReconAlgo(workOrder2DT->partialFourier_algo_) ));
 
         if ( workOrder2DT->acceFactorE1_==1 && workOrder2DT->acceFactorE2_==1 )
         {
+            if ( (workOrder2DT->partialFourier_algo_ == ISMRMRD_PF_ZEROFILLING || workOrder2DT->partialFourier_algo_ == ISMRMRD_PF_ZEROFILLING_FILTER) && (GT_ABS(partialFourierCompensationFactor-1)>FLT_EPSILON) )
+            {
+                GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::scal(partialFourierCompensationFactor, workOrder2DT->data_));
+            }
+
             if ( workOrder2DT->partialFourier_algo_ == ISMRMRD_PF_ZEROFILLING_FILTER )
             {
                 GADGET_CHECK_RETURN_FALSE(performPartialFourierFilter(*workOrder2DT, workOrder2DT->data_));
@@ -1544,6 +1816,11 @@ bool gtPlusReconWorker2DT<T>::performPartialFourierHandling(gtPlusReconWorkOrder
         }
         else if ( workOrder2DT->fullkspace_.get_number_of_elements() > 0 )
         {
+            if ( (workOrder2DT->partialFourier_algo_ == ISMRMRD_PF_ZEROFILLING || workOrder2DT->partialFourier_algo_ == ISMRMRD_PF_ZEROFILLING_FILTER) && (GT_ABS(partialFourierCompensationFactor-1)>FLT_EPSILON) )
+            {
+                GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::scal(partialFourierCompensationFactor, workOrder2DT->fullkspace_));
+            }
+
             if ( workOrder2DT->partialFourier_algo_ == ISMRMRD_PF_ZEROFILLING_FILTER )
             {
                 GADGET_CHECK_RETURN_FALSE(performPartialFourierFilter(*workOrder2DT, workOrder2DT->fullkspace_));
@@ -1570,6 +1847,11 @@ bool gtPlusReconWorker2DT<T>::performPartialFourierHandling(gtPlusReconWorkOrder
             hoNDArray<T> kspace(workOrder2DT->complexIm_);
             GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->fft2c(workOrder2DT->complexIm_, kspace));
 
+            if ( (workOrder2DT->partialFourier_algo_ == ISMRMRD_PF_ZEROFILLING || workOrder2DT->partialFourier_algo_ == ISMRMRD_PF_ZEROFILLING_FILTER) && (GT_ABS(partialFourierCompensationFactor-1)>FLT_EPSILON) )
+            {
+                GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::scal(partialFourierCompensationFactor, kspace));
+            }
+
             if ( workOrder2DT->partialFourier_algo_ == ISMRMRD_PF_ZEROFILLING_FILTER )
             {
                 GADGET_CHECK_RETURN_FALSE(performPartialFourierFilter(*workOrder2DT, kspace));
@@ -1655,9 +1937,9 @@ bool gtPlusReconWorker2DT<T>::performPartialFourierFilter(gtPlusReconWorkOrder2D
             {
                 kspace = buffer2DT_partial_fourier_;
             }
-
-            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, kspace, "kspace_after_PF_Filter");
         }
+
+        GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, kspace, "kspace_after_PF_Filter");
     }
     catch(...)
     {
@@ -1693,17 +1975,17 @@ bool gtPlusReconWorker2DT<T>::performPartialFourierHomodyneRecon(gtPlusReconWork
         double filter_ref_sigma_ = 1.5;
         double filter_ref_width_ = 0.15;
 
-        int startRO(0), endRO(RO-1);
+        size_t startRO(0), endRO(RO-1);
         hoNDArray<T> filterRO(RO);
         if ( (workOrder2DT.start_RO_<0 || workOrder2DT.end_RO_<0) )
         {
             GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilterForRef(RO, 0, RO-1, 
-                filterRO, filter_ref_type_, filter_ref_sigma_, std::ceil(filter_ref_width_*RO)));
+                filterRO, filter_ref_type_, filter_ref_sigma_, (size_t)std::ceil(filter_ref_width_*RO)));
         }
         else
         {
             GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilterForRef(RO, workOrder2DT.start_RO_, workOrder2DT.end_RO_, 
-                filterRO, filter_ref_type_, filter_ref_sigma_, std::ceil(filter_ref_width_*RO)));
+                filterRO, filter_ref_type_, filter_ref_sigma_, (size_t)std::ceil(filter_ref_width_*RO)));
 
             startRO = workOrder2DT.start_RO_;
             endRO = workOrder2DT.end_RO_;
@@ -1711,17 +1993,17 @@ bool gtPlusReconWorker2DT<T>::performPartialFourierHomodyneRecon(gtPlusReconWork
 
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, filterRO, "filterRO_homodyne");
 
-        int startE1(0), endE1(E1-1);
+        size_t startE1(0), endE1(E1-1);
         hoNDArray<T> filterE1(E1);
         if ( (workOrder2DT.start_E1_<0 || workOrder2DT.end_E1_<0) )
         {
             GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilterForRef(E1, 0, E1-1, 
-                filterE1, filter_ref_type_, filter_ref_sigma_, std::ceil(filter_ref_width_*E1)));
+                filterE1, filter_ref_type_, filter_ref_sigma_, (size_t)std::ceil(filter_ref_width_*E1)));
         }
         else
         {
             GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilterForRef(E1, workOrder2DT.start_E1_, workOrder2DT.end_E1_, 
-                filterE1, filter_ref_type_, filter_ref_sigma_, std::ceil(filter_ref_width_*E1)));
+                filterE1, filter_ref_type_, filter_ref_sigma_, (size_t)std::ceil(filter_ref_width_*E1)));
 
             startE1 = workOrder2DT.start_E1_;
             endE1 = workOrder2DT.end_E1_;
@@ -1759,15 +2041,15 @@ bool gtPlusReconWorker2DT<T>::performPartialFourierHomodyneRecon(gtPlusReconWork
             GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, buffer2DT_partial_fourier_, "homodyne_complexIm");
 
             // get the phase
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::absolute(buffer2DT_partial_fourier_, mag));
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::addEpsilon(mag));
+            GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::abs(buffer2DT_partial_fourier_, mag));
+            GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::addEpsilon(mag));
             GADGET_CHECK_RETURN_FALSE(magComplex.copyFrom(mag));
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::divide(buffer2DT_partial_fourier_, magComplex, buffer2DT_));
+            GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::divide(buffer2DT_partial_fourier_, magComplex, buffer2DT_));
             GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, buffer2DT_, "homodyne_phase");
 
             // remove the phase from complex images
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::conjugate(buffer2DT_, buffer2DT_));
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::multiply(complexIm, buffer2DT_, complexIm));
+            GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::conjugate(buffer2DT_, buffer2DT_));
+            GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::multiply(complexIm, buffer2DT_, complexIm));
             GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, complexIm, "homodyne_complexIm_removePhase");
 
             // go back to kspace
@@ -1775,7 +2057,7 @@ bool gtPlusReconWorker2DT<T>::performPartialFourierHomodyneRecon(gtPlusReconWork
             GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, kspaceIter, "homodyne_complexIm_removePhase_kspace");
 
             // compute threshold to stop the iteration
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::subtract(complexImPrev, complexIm, buffer2DT_));
+            GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::subtract(complexImPrev, complexIm, buffer2DT_));
             GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, buffer2DT_, "homodyne_diff_complexIm");
 
             typename realType<T>::Type diff, prev;
@@ -1803,8 +2085,8 @@ bool gtPlusReconWorker2DT<T>::performPartialFourierHomodyneRecon(gtPlusReconWork
 
         if ( workOrder2DT.partialFourier_homodyne_densityComp_ )
         {
-            size_t width_RO = std::floor(0.1*RO);
-            size_t width_E1 = std::floor(0.1*E1);
+            size_t width_RO = (size_t)std::floor(0.1*RO);
+            size_t width_E1 = (size_t)std::floor(0.1*E1);
 
             // compute PF filter for RO and E1
             hoNDArray<T> filterPF_RO, filterPF_E1;
@@ -1877,7 +2159,7 @@ bool gtPlusReconWorker2DT<T>::performPartialFourierHomodyneRecon(gtPlusReconWork
                 GADGET_CHECK_RETURN_FALSE(gtPlus_util_.kspacefilterE1(kspaceIter, filterPF_homodyne_E1, kspaceIter));
                 GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, kspaceIter, "kspaceIter_after_homodyne_PF_Filter");
 
-                GADGET_CHECK_RETURN_FALSE(Gadgetron::add(filterPF_E1, filterPF_homodyne_E1, filterPF));
+                GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::add(filterPF_E1, filterPF_homodyne_E1, filterPF));
                 GADGET_CHECK_RETURN_FALSE(gtPlus_util_.computeFilterSNRUnitScaleFactor(filterPF, scaleFactor));
             }
             else if ( workOrder2DT.start_E1_<0 || workOrder2DT.end_E1_<0 || (workOrder2DT.start_E1_==0 && workOrder2DT.end_E1_==E1-1) )
@@ -1888,7 +2170,7 @@ bool gtPlusReconWorker2DT<T>::performPartialFourierHomodyneRecon(gtPlusReconWork
                 GADGET_CHECK_RETURN_FALSE(gtPlus_util_.kspacefilterRO(kspaceIter, filterPF_homodyne_RO, kspaceIter));
                 GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, kspaceIter, "kspaceIter_after_homodyne_PF_Filter");
 
-                GADGET_CHECK_RETURN_FALSE(Gadgetron::add(filterPF_RO, filterPF_homodyne_RO, filterPF));
+                GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::add(filterPF_RO, filterPF_homodyne_RO, filterPF));
                 GADGET_CHECK_RETURN_FALSE(gtPlus_util_.computeFilterSNRUnitScaleFactor(filterPF, scaleFactor));
             }
             else
@@ -1899,18 +2181,18 @@ bool gtPlusReconWorker2DT<T>::performPartialFourierHomodyneRecon(gtPlusReconWork
                 GADGET_CHECK_RETURN_FALSE(gtPlus_util_.kspacefilterROE1(kspaceIter, filterPF_homodyne_RO, filterPF_homodyne_E1, kspaceIter));
                 GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, kspaceIter, "kspaceIter_after_homodyne_PF_Filter");
 
-                GADGET_CHECK_RETURN_FALSE(Gadgetron::add(filterPF_RO, filterPF_homodyne_RO, filterPF));
+                GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::add(filterPF_RO, filterPF_homodyne_RO, filterPF));
                 GADGET_CHECK_RETURN_FALSE(gtPlus_util_.computeFilterSNRUnitScaleFactor(filterPF, scaleFactor));
 
                 T scaleFactorE1(1.0);
-                GADGET_CHECK_RETURN_FALSE(Gadgetron::add(filterPF_E1, filterPF_homodyne_E1, filterPF));
+                GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::add(filterPF_E1, filterPF_homodyne_E1, filterPF));
                 GADGET_CHECK_RETURN_FALSE(gtPlus_util_.computeFilterSNRUnitScaleFactor(filterPF, scaleFactorE1));
 
                 scaleFactor *= scaleFactorE1;
             }
 
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::add(kspace, kspaceIter, kspace));
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::scal(scaleFactor, kspace));
+            GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::add(kspace, kspaceIter, kspace));
+            GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::scal(scaleFactor, kspace));
         }
         else
         {
@@ -1955,17 +2237,17 @@ bool gtPlusReconWorker2DT<T>::performPartialFourierPOCSRecon(gtPlusReconWorkOrde
         double filter_ref_sigma_ = 1.5;
         double filter_ref_width_ = 0.15;
 
-        int startRO(0), endRO(RO-1);
+        size_t startRO(0), endRO(RO-1);
         hoNDArray<T> filterRO(RO);
         if ( (workOrder2DT.start_RO_<0 || workOrder2DT.end_RO_<0) )
         {
             GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilterForRef(RO, 0, RO-1, 
-                filterRO, filter_ref_type_, filter_ref_sigma_, std::ceil(filter_ref_width_*RO)));
+                filterRO, filter_ref_type_, filter_ref_sigma_, (size_t)std::ceil(filter_ref_width_*RO)));
         }
         else
         {
             GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilterForRef(RO, workOrder2DT.start_RO_, workOrder2DT.end_RO_, 
-                filterRO, filter_ref_type_, filter_ref_sigma_, std::ceil(filter_ref_width_*RO)));
+                filterRO, filter_ref_type_, filter_ref_sigma_, (size_t)std::ceil(filter_ref_width_*RO)));
 
             startRO = workOrder2DT.start_RO_;
             endRO = workOrder2DT.end_RO_;
@@ -1973,17 +2255,17 @@ bool gtPlusReconWorker2DT<T>::performPartialFourierPOCSRecon(gtPlusReconWorkOrde
 
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, filterRO, "filterRO_POCS");
 
-        int startE1(0), endE1(E1-1);
+        size_t startE1(0), endE1(E1-1);
         hoNDArray<T> filterE1(E1);
         if ( (workOrder2DT.start_E1_<0 || workOrder2DT.end_E1_<0) )
         {
             GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilterForRef(E1, 0, E1-1, 
-                filterE1, filter_ref_type_, filter_ref_sigma_, std::ceil(filter_ref_width_*E1)));
+                filterE1, filter_ref_type_, filter_ref_sigma_, (size_t)std::ceil(filter_ref_width_*E1)));
         }
         else
         {
             GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilterForRef(E1, workOrder2DT.start_E1_, workOrder2DT.end_E1_, 
-                filterE1, filter_ref_type_, filter_ref_sigma_, std::ceil(filter_ref_width_*E1)));
+                filterE1, filter_ref_type_, filter_ref_sigma_, (size_t)std::ceil(filter_ref_width_*E1)));
 
             startE1 = workOrder2DT.start_E1_;
             endE1 = workOrder2DT.end_E1_;
@@ -2005,10 +2287,10 @@ bool gtPlusReconWorker2DT<T>::performPartialFourierPOCSRecon(gtPlusReconWorkOrde
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, buffer2DT_partial_fourier_, "POCS_afterFiltered_complexIm");
 
         // get the complex image phase for the filtered kspace
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::absolute(buffer2DT_partial_fourier_, mag));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::addEpsilon(mag));
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::abs(buffer2DT_partial_fourier_, mag));
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::addEpsilon(mag));
         GADGET_CHECK_RETURN_FALSE(magComplex.copyFrom(mag));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::divide(buffer2DT_partial_fourier_, magComplex, buffer2DT_));
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::divide(buffer2DT_partial_fourier_, magComplex, buffer2DT_));
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, buffer2DT_, "POCS_afterFiltered_complexIm_phase");
 
         // complex images, initialized as not filtered complex image
@@ -2022,9 +2304,9 @@ bool gtPlusReconWorker2DT<T>::performPartialFourierPOCSRecon(gtPlusReconWorkOrde
         size_t ii;
         for ( ii=0; ii<workOrder2DT.partialFourier_POCS_iters_; ii++ )
         {
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::absolute(complexImPOCS, mag));
+            GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::abs(complexImPOCS, mag));
             GADGET_CHECK_RETURN_FALSE(magComplex.copyFrom(mag));
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::multiply(magComplex, buffer2DT_, complexImPOCS));
+            GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::multiply(magComplex, buffer2DT_, complexImPOCS));
             GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, complexImPOCS, "POCS_complexImPOCS");
 
             // go back to kspace
@@ -2043,7 +2325,7 @@ bool gtPlusReconWorker2DT<T>::performPartialFourierPOCSRecon(gtPlusReconWorkOrde
             GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, complexImPOCS, "POCS_kspaceIter_copyOri_complexImPOCS");
 
             // compute threshold to stop the iteration
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::subtract(complexImPOCS, complexIm, buffer2DT_partial_fourier_));
+            GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::subtract(complexImPOCS, complexIm, buffer2DT_partial_fourier_));
             typename realType<T>::Type diff, prev;
             Gadgetron::norm2(complexIm, prev);
             Gadgetron::norm2(buffer2DT_partial_fourier_, diff);
@@ -2106,14 +2388,14 @@ bool gtPlusReconWorker2DT<T>::performPartialFourierFengHuangRecon(gtPlusReconWor
 
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, kspace, "kspace_before_FengHuang");
 
-        int startRO(0), endRO(RO-1);
+        size_t startRO(0), endRO(RO-1);
         if ( workOrder2DT.start_RO_>=0 && workOrder2DT.end_RO_<RO )
         {
             startRO = workOrder2DT.start_RO_;
             endRO = workOrder2DT.end_RO_;
         }
 
-        int startE1(0), endE1(E1-1);
+        size_t startE1(0), endE1(E1-1);
         if ( workOrder2DT.start_E1_>=0 && workOrder2DT.end_E1_<E1 )
         {
             startE1 = workOrder2DT.start_E1_;
@@ -2150,8 +2432,8 @@ bool gtPlusReconWorker2DT<T>::performPartialFourierFengHuangRecon(gtPlusReconWor
         size[3] = N;
         size[4] = S;
 
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::cropUpTo10DArray(buffer2DT_, src, start, size));
-        GADGET_CHECK_RETURN_FALSE(cropUpTo10DArray(kspace, dst, start, size));
+        GADGET_CHECK_RETURN_FALSE(Gadgetron::cropUpTo11DArray(buffer2DT_, src, start, size));
+        GADGET_CHECK_RETURN_FALSE(cropUpTo11DArray(kspace, dst, start, size));
 
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, src, "src_FengHuang");
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, dst, "dst_FengHuang");
@@ -2179,16 +2461,16 @@ bool gtPlusReconWorker2DT<T>::performPartialFourierFengHuangRecon(gtPlusReconWor
         if ( workOrder2DT.partialFourier_FengHuang_transitBand_==0 )
         {
             GADGET_CHECK_PERFORM(performTiming_, gt_timer1_.start("performReconFangHuang"));
-            GADGET_CHECK_RETURN_FALSE(this->performReconFangHuang(workOrder2DT, buffer2DT_, kspace, startRO, endRO, startE1, endE1, kernel));
+            GADGET_CHECK_RETURN_FALSE(this->performReconFangHuang(workOrder2DT, buffer2DT_, kspace, (int)startRO, (int)endRO, (int)startE1, (int)endE1, kernel));
             GADGET_CHECK_PERFORM(performTiming_, gt_timer1_.stop());
         }
         else
         {
             GADGET_CHECK_PERFORM(performTiming_, gt_timer1_.start("performReconFangHuang with transition band"));
 
-            int tb =  (int)workOrder2DT.partialFourier_FengHuang_transitBand_;
+            size_t tb =  (int)workOrder2DT.partialFourier_FengHuang_transitBand_;
 
-            int sRO(startRO), eRO(endRO), sE1(startE1), eE1(endE1);
+            size_t sRO(startRO), eRO(endRO), sE1(startE1), eE1(endE1);
 
             if ( startRO > 0 )
             {
@@ -2228,7 +2510,7 @@ bool gtPlusReconWorker2DT<T>::performPartialFourierFengHuangRecon(gtPlusReconWor
 
             buffer2DT_partial_fourier_kspaceIter_ = kspace;
             GADGET_CHECK_RETURN_FALSE(this->performReconFangHuang(workOrder2DT, buffer2DT_, 
-                    buffer2DT_partial_fourier_kspaceIter_, startRO, endRO, startE1, endE1, kernel));
+                    buffer2DT_partial_fourier_kspaceIter_, (int)startRO, (int)endRO, (int)startE1, (int)endE1, kernel));
 
             GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, buffer2DT_partial_fourier_kspaceIter_, "kspace_FengHuang_recon");
 
@@ -2260,26 +2542,26 @@ bool gtPlusReconWorker2DT<T>::calibFengHuang(gtPlusReconWorkOrder2DT<T>& workOrd
     {
         GADGET_CHECK_RETURN_FALSE(src.dimensions_equal(&dst));
 
-        size_t RO = src.get_size(0);
-        size_t E1 = src.get_size(1);
-        size_t srcCHA = src.get_size(2);
-        size_t N = src.get_size(3);
-        size_t S = src.get_size(4);
+        long long RO = (long long)src.get_size(0);
+        long long E1 = (long long)src.get_size(1);
+        long long srcCHA = (long long)src.get_size(2);
+        long long N = (long long)src.get_size(3);
+        long long S = (long long)src.get_size(4);
 
-        size_t kx = workOrder2DT.partialFourier_FengHuang_kSize_RO_;
-        size_t ky = workOrder2DT.partialFourier_FengHuang_kSize_E1_;
+        long long kx = (long long)workOrder2DT.partialFourier_FengHuang_kSize_RO_;
+        long long ky = (long long)workOrder2DT.partialFourier_FengHuang_kSize_E1_;
 
         if ( kx%2 == 0 ) kx++;
         if ( ky%2 == 0 ) ky++;
 
-        int halfKx = (int)kx/2;
-        int halfKy = (int)ky/2;
+        long long halfKx = (long long)kx/2;
+        long long halfKy = (long long)ky/2;
 
         // the cross-channel kernel is not estimated
         kernel.createArray(kx, ky, srcCHA, 1, N, S);
 
-        int ii=0;
-        int num = N*S*srcCHA;
+        long long ii=0;
+        long long num = N*S*srcCHA;
 
         size_t startRO = halfKx;
         size_t endRO = RO - halfKx - 1;
@@ -2287,7 +2569,7 @@ bool gtPlusReconWorker2DT<T>::calibFengHuang(gtPlusReconWorkOrder2DT<T>& workOrd
         size_t startE1 = halfKy;
         size_t endE1 = E1 - halfKy - 1;
 
-        int rowA, colA, rowB, colB;
+        long long rowA, colA, rowB, colB;
         rowA = (endE1-startE1+1)*(endRO-startRO+1); 
         colA = kx*ky;
 
@@ -2319,7 +2601,7 @@ bool gtPlusReconWorker2DT<T>::calibFengHuang(gtPlusReconWorkOrder2DT<T>& workOrd
                 //ho2DArray<T> dst2D(RO, E1, const_cast<T*>(dst.begin())+ii*RO*E1);
 
                 size_t ro, e1, row(0);
-                int x, y;
+                long long x, y;
 
                 for ( e1=startE1; e1<=endE1; e1++ )
                 {
@@ -2368,30 +2650,30 @@ bool gtPlusReconWorker2DT<T>::performReconFangHuang(gtPlusReconWorkOrder2DT<T>&
     {
         GADGET_CHECK_RETURN_FALSE(kspaceConj.dimensions_equal(&kspace));
 
-        size_t RO = kspace.get_size(0);
-        size_t E1 = kspace.get_size(1);
-        size_t CHA = kspace.get_size(2);
-        size_t N = kspace.get_size(3);
-        size_t S = kspace.get_size(4);
+        long long RO = (long long)kspace.get_size(0);
+        long long E1 = (long long)kspace.get_size(1);
+        long long CHA = (long long)kspace.get_size(2);
+        long long N = (long long)kspace.get_size(3);
+        long long S = (long long)kspace.get_size(4);
 
-        size_t kx = kernel.get_size(0);
-        size_t ky = kernel.get_size(1);
+        long long kx = (long long)kernel.get_size(0);
+        long long ky = (long long)kernel.get_size(1);
 
-        int halfKx = kx/2;
-        int halfKy = ky/2;
-        size_t kerN = kernel.get_size(4);
+        long long halfKx = kx/2;
+        long long halfKy = ky/2;
+        long long kerN = (long long)kernel.get_size(4);
         GADGET_CHECK_RETURN_FALSE( (kerN==1) || (kerN==N) );
 
-        int num = CHA*N*S;
+        long long num = CHA*N*S;
 
-        int rowD = RO*E1 - ( (endE1-startE1+1) * (endRO-startRO+1) );
-        int colD = kx*ky;
+        long long rowD = RO*E1 - ( (endE1-startE1+1) * (endRO-startRO+1) );
+        long long colD = kx*ky;
 
         ho2DArray<size_t> coeffX(rowD, colD);
         ho2DArray<size_t> coeffY(rowD, colD);
 
-        size_t ro, e1, row(0);
-        int x, y, dx, dy;
+        long long ro, e1, row(0);
+        long long x, y, dx, dy;
 
         for ( e1=0; e1<E1; e1++ )
         {
@@ -2425,7 +2707,7 @@ bool gtPlusReconWorker2DT<T>::performReconFangHuang(gtPlusReconWorkOrder2DT<T>&
             }
         }
 
-        int ii;
+        long long ii;
         #ifdef GCC_OLD_FLAG
             #pragma omp parallel default(none) private(ii) shared(num, RO, E1, CHA, N, S, kerN, rowD, colD, coeffX, coeffY)
         #else
@@ -2436,13 +2718,17 @@ bool gtPlusReconWorker2DT<T>::performReconFangHuang(gtPlusReconWorkOrder2DT<T>&
             hoMatrix<T> K(colD, 1);
             hoMatrix<T> R(rowD, 1);
 
+            Gadgetron::clear(D);
+            Gadgetron::clear(K);
+            Gadgetron::clear(R);
+
             #pragma omp for
             for ( ii=0; ii<num; ii ++ )
             {
                 ho2DArray<T> src2D(RO, E1, const_cast<T*>(kspaceConj.begin())+ii*RO*E1);
                 ho2DArray<T> dst2D(RO, E1, kspace.begin()+ii*RO*E1);
 
-                size_t ro, e1, row, col;
+                long long row, col;
                 for ( col=0; col<colD; col++ )
                 {
                     for ( row=0; row<rowD; row++ )
@@ -2453,10 +2739,10 @@ bool gtPlusReconWorker2DT<T>::performReconFangHuang(gtPlusReconWorkOrder2DT<T>&
 
                 if ( kerN == 1 )
                 {
-                    int ind = ii;
-                    int currS = ind/(CHA*N);
+                    long long ind = ii;
+                    long long currS = ind/(CHA*N);
                     ind %= CHA*N;
-                    int currN = ind/CHA;
+                    long long currN = ind/CHA;
                     ind %= CHA;
                     memcpy(K.begin(), kernel.begin()+(ind+currS*CHA)*colD, sizeof(T)*colD);
                 }
@@ -2466,7 +2752,7 @@ bool gtPlusReconWorker2DT<T>::performReconFangHuang(gtPlusReconWorkOrder2DT<T>&
                 }
 
                 // R = D*K
-                Gadgetron::GeneralMatrixProduct_gemm(R, D, false, K, false);
+                Gadgetron::gemm(R, D, false, K, false);
 
                 for ( row=0; row<rowD; row++ )
                 {
@@ -2506,7 +2792,7 @@ estimateJobSize(gtPlusReconWorkOrder<T>* workOrder2DT, size_t maxNumOfBytesPerJo
         size_t dstCHA = workOrder2DT->kernelIm_->get_size(3);
 
         size_t totalJobNum = N;
-        jobSize = std::ceil( (double)(totalJobNum+overlapBetweenJobs*(nodeN-1))/(double)nodeN );
+        jobSize = (size_t)std::ceil( (double)(totalJobNum+overlapBetweenJobs*(nodeN-1))/(double)nodeN );
 
         size_t numOfBytesPerJob = sizeof(T)*( RO*E1*srcCHA*dstCHA*jobSize + 2*RO*E1*srcCHA*jobSize );
 
@@ -2514,7 +2800,7 @@ estimateJobSize(gtPlusReconWorkOrder<T>* workOrder2DT, size_t maxNumOfBytesPerJo
         while ( numOfBytesPerJob > maxNumOfBytesPerJob*1024*1024*1024-64.0*1024*1024 )
         {
             nodeN *= 2;
-            jobSize = std::ceil( (double)(totalJobNum+overlapBetweenJobs*(nodeN-1))/(double)nodeN );
+            jobSize = (size_t)std::ceil( (double)(totalJobNum+overlapBetweenJobs*(nodeN-1))/(double)nodeN );
             numOfBytesPerJob = sizeof(T)*( RO*E1*srcCHA*dstCHA*jobSize + 2*RO*E1*srcCHA*jobSize );
         }
 
diff --git a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker2DTGRAPPA.h b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker2DTGRAPPA.h
index aa36501..48ab6ea 100644
--- a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker2DTGRAPPA.h
+++ b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker2DTGRAPPA.h
@@ -5,9 +5,10 @@
 
 #pragma once
 
-#include "ismrmrd.h"
+#include "ismrmrd/ismrmrd.h"
 #include "GadgetronTimer.h"
 #include "gtPlusISMRMRDReconUtil.h"
+#include "gtPlusISMRMRDReconCoilMapEstimation.h"
 #include "gtPlusISMRMRDReconWorker2DT.h"
 #include "gtPlusGRAPPA.h"
 
@@ -19,6 +20,7 @@ class gtPlusReconWorker2DTGRAPPA : public gtPlusReconWorker2DT<T>
 public:
 
     typedef gtPlusReconWorker2DT<T> BaseClass;
+    typedef typename BaseClass::value_type value_type;
 
     gtPlusReconWorker2DTGRAPPA() : BaseClass() {}
     virtual ~gtPlusReconWorker2DTGRAPPA() {}
@@ -32,6 +34,7 @@ public:
     using BaseClass::gt_timer2_;
     using BaseClass::gt_timer3_;
     using BaseClass::performTiming_;
+    using BaseClass::verbose_;
     using BaseClass::gt_exporter_;
     using BaseClass::debugFolder_;
     using BaseClass::gtPlus_util_;
@@ -71,7 +74,7 @@ performCalibPrep(const hoNDArray<T>& ref_src, const hoNDArray<T>& ref_dst, gtPlu
 
         std::vector<int> kE1, oE1;
         bool fitItself = true;
-        GADGET_CHECK_RETURN_FALSE(grappa_.kerPattern(kE1, oE1, workOrder2DT->acceFactorE1_, workOrder2DT->grappa_kSize_E1_, fitItself));
+        GADGET_CHECK_RETURN_FALSE(grappa_.kerPattern(kE1, oE1, (int)workOrder2DT->acceFactorE1_, workOrder2DT->grappa_kSize_E1_, fitItself));
 
         size_t kRO = workOrder2DT->grappa_kSize_RO_;
         size_t kNE1 = workOrder2DT->grappa_kSize_E1_;
@@ -81,6 +84,11 @@ performCalibPrep(const hoNDArray<T>& ref_src, const hoNDArray<T>& ref_dst, gtPlu
         workOrder2DT->kernelIm_->create(RO, E1, srcCHA, dstCHA, refN, S);
         workOrder2DT->unmixingCoeffIm_->create(RO, E1, srcCHA, refN, S);
         workOrder2DT->gfactor_.create(RO, E1, refN, S);
+
+        if ( workOrder2DT->wrap_around_map_needed_ )
+        {
+            workOrder2DT->wrap_around_map_.create(RO, E1, 2, refN, S);
+        }
     }
     catch(...)
     {
@@ -109,9 +117,11 @@ performCalibImpl(const hoNDArray<T>& ref_src, const hoNDArray<T>& ref_dst, gtPlu
         size_t refN = ref_dst.get_size(3);
         size_t dstCHA = ref_dst.get_size(2);
 
+        gtPlusGRAPPA<T> grappa_local;
+
         std::vector<int> kE1, oE1;
         bool fitItself = true;
-        GADGET_CHECK_RETURN_FALSE(grappa_.kerPattern(kE1, oE1, workOrder2DT->acceFactorE1_, workOrder2DT->grappa_kSize_E1_, fitItself));
+        GADGET_CHECK_RETURN_FALSE(grappa_local.kerPattern(kE1, oE1, (size_t)workOrder2DT->acceFactorE1_, workOrder2DT->grappa_kSize_E1_, fitItself));
 
         size_t kRO = workOrder2DT->grappa_kSize_RO_;
         size_t kNE1 = workOrder2DT->grappa_kSize_E1_;
@@ -120,30 +130,73 @@ performCalibImpl(const hoNDArray<T>& ref_src, const hoNDArray<T>& ref_dst, gtPlu
         ho3DArray<T> acsSrc(refRO, refE1, srcCHA, const_cast<T*>(ref_src.begin()+n*refRO*refE1*srcCHA+usedS*refRO*refE1*srcCHA*refN));
         ho3DArray<T> acsDst(refRO, refE1, dstCHA, const_cast<T*>(ref_dst.begin()+n*refRO*refE1*dstCHA+usedS*refRO*refE1*dstCHA*refN));
 
-        GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, acsSrc, "acsSrc");
-        GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, acsDst, "acsDst");
+        std::ostringstream ostr;
+        ostr << "_n_" << n << "s_" << usedS;
+        std::string suffix = ostr.str();
 
-        grappa_.calib_use_gpu_  = workOrder2DT->grappa_use_gpu_;
+        std::string filename = "acsSrc";
+        GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, acsSrc, filename+suffix);
+
+        filename = "acsDst";
+        GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, acsDst, filename+suffix);
+
+        grappa_local.calib_use_gpu_  = workOrder2DT->grappa_use_gpu_;
 
         ho5DArray<T> ker(kRO, kNE1, srcCHA, dstCHA, oNE1, workOrder2DT->kernel_->begin()+n*kRO*kNE1*srcCHA*dstCHA*oNE1+usedS*kRO*kNE1*srcCHA*dstCHA*oNE1*refN);
-        grappa_.calib(acsSrc, acsDst, workOrder2DT->grappa_reg_lamda_, kRO, kE1, oE1, ker);
 
-        GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, ker, "ker");
+         Gadgetron::GadgetronTimer gt_timer_local;
+         gt_timer_local.set_timing_in_destruction(false);
+
+        GADGET_CHECK_PERFORM(performTiming_, gt_timer_local.start("grappa_local.calib ... "));
+        GADGET_CHECK_RETURN_FALSE(grappa_local.calib(acsSrc, acsDst, workOrder2DT->grappa_reg_lamda_, kRO, kE1, oE1, ker));
+        GADGET_CHECK_PERFORM(performTiming_, gt_timer_local.stop());
+
+        filename = "ker";
+        GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, ker, filename+suffix);
 
         hoNDArray<T> kIm(RO, E1, srcCHA, dstCHA, workOrder2DT->kernelIm_->begin()+n*RO*E1*srcCHA*dstCHA+usedS*RO*E1*srcCHA*dstCHA*refN);
-        grappa_.imageDomainKernel(ker, kRO, kE1, oE1, RO, E1, kIm);
+        GADGET_CHECK_PERFORM(performTiming_, gt_timer_local.start("grappa_local.imageDomainKernel ... "));
+        GADGET_CHECK_RETURN_FALSE(grappa_local.imageDomainKernel(ker, kRO, kE1, oE1, RO, E1, kIm));
+        GADGET_CHECK_PERFORM(performTiming_, gt_timer_local.stop());
 
-        GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, kIm, "kIm");
+        filename = "kIm";
+        GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, kIm, filename+suffix);
 
         hoNDArray<T> coilMap(RO, E1, dstCHA, workOrder2DT->coilMap_->begin()+n*RO*E1*dstCHA+usedS*RO*E1*dstCHA*refN);
         hoNDArray<T> unmixC(RO, E1, srcCHA, workOrder2DT->unmixingCoeffIm_->begin()+n*RO*E1*srcCHA+usedS*RO*E1*srcCHA*refN);
         hoNDArray<T> gFactor(RO, E1, workOrder2DT->gfactor_.begin()+n*RO*E1+usedS*RO*E1*refN);
 
-        this->unmixCoeff(kIm, coilMap, unmixC, gFactor);
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::scal(1.0/workOrder2DT->acceFactorE1_, gFactor));
+        GADGET_CHECK_PERFORM(performTiming_, gt_timer_local.start("unmixCoeff ... "));
+        GADGET_CHECK_RETURN_FALSE(this->unmixCoeff(kIm, coilMap, unmixC, gFactor));
+        GADGET_CHECK_PERFORM(performTiming_, gt_timer_local.stop());
+
+        GADGET_CHECK_PERFORM(performTiming_, gt_timer_local.start("scale gfactor ... "));
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::scal( (value_type)(1.0/workOrder2DT->acceFactorE1_), gFactor));
+        GADGET_CHECK_PERFORM(performTiming_, gt_timer_local.stop());
+
+        filename = "unmixC";
+        GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, unmixC, filename+suffix);
+
+        filename = "gFactor";
+        GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, gFactor, filename+suffix);
+
+        if ( workOrder2DT->wrap_around_map_needed_ )
+        {
+            hoNDArray<T> wrapAroundMap(RO, E1, 2, workOrder2DT->wrap_around_map_.begin()+n*RO*E1*2+usedS*RO*E1*2*refN);
 
-        GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, unmixC, "unmixC");
-        GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, gFactor, "gFactor");
+            gtPlusISMRMRDReconCoilMapEstimation<T> coil_map_util;
+
+            hoNDArray<T> coilMap(RO, E1, acsDst.get_size(2));
+            hoNDArray<value_type> eigD(RO, E1, 2);
+
+            value_type thres = workOrder2DT->spirit_reg_lamda_;
+
+            GADGET_CHECK_RETURN_FALSE(coil_map_util.coilMap2DSPIRIT(acsDst, coilMap, eigD, workOrder2DT->spirit_kSize_RO_, workOrder2DT->spirit_kSize_E1_, thres));
+            GADGET_CHECK_RETURN_FALSE(wrapAroundMap.copyFrom(eigD));
+
+            filename = "wrapAroundMap";
+            GADGET_EXPORT_ARRAY(debugFolder_, gt_exporter_, eigD, filename+suffix);
+        }
     }
     catch(...)
     {
@@ -183,6 +236,36 @@ performUnwrapping(gtPlusReconWorkOrder2DT<T>* workOrder2DT, const hoNDArray<T>&
             Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->ifft2c(data_dst, buffer2DT_);
         }
 
+        double effectiveAcceFactor = workOrder2DT->acceFactorE1_;
+        if ( workOrder2DT->start_E1_>0 && workOrder2DT->end_E1_>0 )
+        {
+            size_t num = workOrder2DT->end_E1_ - workOrder2DT->start_E1_ + 1;
+            size_t res = (size_t)( num % (size_t)(std::ceil(workOrder2DT->acceFactorE1_)) );
+            double N = std::floor( (double)(num-res)/(double)workOrder2DT->acceFactorE1_);
+            effectiveAcceFactor = (double)num/N;
+        }
+        else
+        {
+            size_t num = E1;
+            size_t res = (size_t)( num % (size_t)(std::ceil(workOrder2DT->acceFactorE1_)) );
+            double N = std::floor( (double)(num-res)/(double)workOrder2DT->acceFactorE1_);
+            effectiveAcceFactor = (double)num/N;
+        }
+
+        typename realType<T>::Type fftCompensationRatio = (typename realType<T>::Type)(1.0/std::sqrt(effectiveAcceFactor));
+
+        Gadgetron::scal( fftCompensationRatio, buffer2DT_);
+
+        // if the image data is scaled and ref lines are going to be filled back to the data, 
+        // the reference lines should be scaled too
+        if ( workOrder2DT->CalibMode_ == ISMRMRD_embedded )
+        {
+            if ( workOrder2DT->embedded_ref_fillback_ )
+            {
+                Gadgetron::scal( fftCompensationRatio, workOrder2DT->ref_);
+            }
+        }
+
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, buffer2DT_, "buffer2DT_");
 
         bool recon_kspace = false;
@@ -225,7 +308,25 @@ performUnwrapping(gtPlusReconWorkOrder2DT<T>* workOrder2DT, const hoNDArray<T>&
                     hoNDArray<T> unwarppedIm(RO, E1, dstCHA, N, workOrder2DT->fullkspace_.begin()+usedS*RO*E1*dstCHA*N);
 
                     this->applyImageDomainKernelImage(aliasedIm, kIm, buffer2DT_unwrapping_, unwarppedIm);
-                    GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, unwarppedIm, "unwarppedIm");
+
+                    if ( !debugFolder_.empty() )
+                    {
+                        {
+                            std::ostringstream ostr;
+                            ostr << "kIm_" << usedS;
+                            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, kIm, ostr.str());
+                        }
+
+                        {
+                            std::ostringstream ostr;
+                            ostr << "aliasedIm_" << usedS;
+                            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, aliasedIm, ostr.str());
+                        }
+
+                        std::ostringstream ostr;
+                        ostr << "unwarppedIm_" << usedS;
+                        GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, unwarppedIm, ostr.str());
+                    }
                 }
                 else
                 {
@@ -267,10 +368,22 @@ performUnwrapping(gtPlusReconWorkOrder2DT<T>* workOrder2DT, const hoNDArray<T>&
                     gtPlusISMRMRDReconUtilComplex<T>().coilCombine(unwarppedIm, coilMap, combined);
                 }
 
-                GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, combined, "combined");
+                if ( !debugFolder_.empty() )
+                {
+                    std::ostringstream ostr;
+                    ostr << "combined_" << usedS;
+                    GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, combined, ostr.str());
+                }
             }
 
             Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->fft2c(workOrder2DT->fullkspace_);
+
+            if ( !debugFolder_.empty() )
+            {
+                std::ostringstream ostr;
+                ostr << "fullkspace_" << usedS;
+                GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, workOrder2DT->fullkspace_, ostr.str());
+            }
         }
         else
         {
@@ -302,6 +415,10 @@ performUnwrapping(gtPlusReconWorkOrder2DT<T>* workOrder2DT, const hoNDArray<T>&
                     }
                 }
             }
+
+            workOrder2DT->fullkspace_.create(RO, E1, 1, N, S);
+            memcpy(workOrder2DT->fullkspace_.begin(), workOrder2DT->complexIm_.begin(), workOrder2DT->complexIm_.get_number_of_bytes());
+            Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->fft2c(workOrder2DT->fullkspace_);
         }
     }
     catch(...)
diff --git a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker2DTL1SPIRITNCG.h b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker2DTL1SPIRITNCG.h
index dc62a69..83958e0 100644
--- a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker2DTL1SPIRITNCG.h
+++ b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker2DTL1SPIRITNCG.h
@@ -39,6 +39,7 @@ public:
     using BaseClass::gt_timer2_;
     using BaseClass::gt_timer3_;
     using BaseClass::performTiming_;
+    using BaseClass::verbose_;
     using BaseClass::gt_exporter_;
     using BaseClass::debugFolder_;
     using BaseClass::gtPlus_util_;
@@ -103,6 +104,18 @@ performUnwarppingImpl(gtPlusReconWorkOrder<T>* workOrder2DT, hoNDArray<T>& kspac
             GADGET_CHECK_RETURN_FALSE(BaseClass::performUnwarppingImpl(workOrder2DT, kspace, adj_forward_G_I, kspaceLinear, s));
             GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.stop());
         }
+        else
+        {
+            if ( workOrder2DT->kspace_initial_.get_number_of_elements() == kspace.get_number_of_elements() )
+            {
+                GADGET_MSG("Start the iteration with the input initial kspace ... ");
+                memcpy(kspaceLinear.begin(), workOrder2DT->kspace_initial_.begin(), kspace.get_number_of_bytes());
+            }
+            else
+            {
+                GADGET_MSG("Start the iteration with the input kspace ... ");
+            }
+        }
 
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, kspaceLinear, "kspaceLinear");
 
@@ -121,14 +134,17 @@ performUnwarppingImpl(gtPlusReconWorkOrder<T>* workOrder2DT, hoNDArray<T>& kspac
             {
                 typename realType<T>::Type scaleFactor = 1.0;
                 Gadgetron::norm2(kspace, scaleFactor);
-                scaleFactor /= (RO*std::sqrt(double(srcCHA)));
+                scaleFactor /= (typename realType<T>::Type)( (RO*std::sqrt(double(srcCHA))) );
 
                 workOrder2DT->spirit_ncg_scale_factor_ = scaleFactor;
             }
 
             // apply the scale
-            Gadgetron::scal( static_cast<value_type>(1.0/workOrder2DT->spirit_ncg_scale_factor_), kspaceLinear);
-            Gadgetron::scal( static_cast<value_type>(1.0/workOrder2DT->spirit_ncg_scale_factor_), kspace);
+            if ( workOrder2DT->spirit_ncg_scale_factor_ > 0 )
+            {
+                Gadgetron::scal( static_cast<value_type>(1.0/workOrder2DT->spirit_ncg_scale_factor_), kspaceLinear);
+                Gadgetron::scal( static_cast<value_type>(1.0/workOrder2DT->spirit_ncg_scale_factor_), kspace);
+            }
 
             boost::shared_ptr< hoNDArray<T> > coilMapS;
             
@@ -171,9 +187,9 @@ performUnwarppingImpl(gtPlusReconWorkOrder<T>* workOrder2DT, hoNDArray<T>& kspac
                     gtPlusWavelet3DOperator<T> wavNullSpace3DOperator;
                     wavNullSpace3DOperator.setMemoryManager(gtPlus_mem_manager_);
                     wavNullSpace3DOperator.setAcquiredPoints(acq);
-                    wavNullSpace3DOperator.scale_factor_first_dimension_ = workOrder2DT->spirit_RO_enhancement_ratio_;
-                    wavNullSpace3DOperator.scale_factor_second_dimension_ = workOrder2DT->spirit_E1_enhancement_ratio_;
-                    wavNullSpace3DOperator.scale_factor_third_dimension_ = workOrder2DT->spirit_temporal_enhancement_ratio_;
+                    wavNullSpace3DOperator.scale_factor_first_dimension_ = (value_type)workOrder2DT->spirit_RO_enhancement_ratio_;
+                    wavNullSpace3DOperator.scale_factor_second_dimension_ = (value_type)workOrder2DT->spirit_E1_enhancement_ratio_;
+                    wavNullSpace3DOperator.scale_factor_third_dimension_ = (value_type)workOrder2DT->spirit_temporal_enhancement_ratio_;
 
                     if ( workOrder2DT->spirit_use_coil_sen_map_ && workOrder2DT->coilMap_ )
                     {
@@ -181,8 +197,8 @@ performUnwarppingImpl(gtPlusReconWorkOrder<T>* workOrder2DT, hoNDArray<T>& kspac
                     }
 
                     // set operators
-                    ncgsolver.add(spirit, T(workOrder2DT->spirit_parallel_imaging_lamda_) );
-                    ncgsolver.add(wavNullSpace3DOperator, T(workOrder2DT->spirit_image_reg_lamda_) );
+                    ncgsolver.add(spirit, T( (value_type)workOrder2DT->spirit_parallel_imaging_lamda_ ) );
+                    ncgsolver.add(wavNullSpace3DOperator, T( (value_type)workOrder2DT->spirit_image_reg_lamda_ ) );
 
                     GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("NCG spirit solver for 2DT ... "));
                     ncgsolver.solve(b, res);
@@ -208,18 +224,18 @@ performUnwarppingImpl(gtPlusReconWorkOrder<T>* workOrder2DT, hoNDArray<T>& kspac
                     gtPlusWaveletNoNullSpace3DOperator<T> wavNoNullSpace3DOperator;
                     wavNoNullSpace3DOperator.setMemoryManager(gtPlus_mem_manager_);
                     wavNoNullSpace3DOperator.setAcquiredPoints(acq);
-                    wavNoNullSpace3DOperator.scale_factor_first_dimension_ = workOrder2DT->spirit_RO_enhancement_ratio_;
-                    wavNoNullSpace3DOperator.scale_factor_second_dimension_ = workOrder2DT->spirit_E1_enhancement_ratio_;
-                    wavNoNullSpace3DOperator.scale_factor_third_dimension_ = workOrder2DT->spirit_temporal_enhancement_ratio_;
+                    wavNoNullSpace3DOperator.scale_factor_first_dimension_ = (value_type)workOrder2DT->spirit_RO_enhancement_ratio_;
+                    wavNoNullSpace3DOperator.scale_factor_second_dimension_ = (value_type)workOrder2DT->spirit_E1_enhancement_ratio_;
+                    wavNoNullSpace3DOperator.scale_factor_third_dimension_ = (value_type)workOrder2DT->spirit_temporal_enhancement_ratio_;
 
                     if ( workOrder2DT->spirit_use_coil_sen_map_ && workOrder2DT->coilMap_ )
                     {
                         wavNoNullSpace3DOperator.setCoilSenMap(coilMapS);
                     }
 
-                    ncgsolver.add(spirit_noNullSpace, T(workOrder2DT->spirit_parallel_imaging_lamda_) );
-                    ncgsolver.add(wavNoNullSpace3DOperator, T(workOrder2DT->spirit_image_reg_lamda_) );
-                    ncgsolver.add(dataOper, T(workOrder2DT->spirit_data_fidelity_lamda_) );
+                    ncgsolver.add(spirit_noNullSpace, T( (value_type)workOrder2DT->spirit_parallel_imaging_lamda_ ) );
+                    ncgsolver.add(wavNoNullSpace3DOperator, T( (value_type)workOrder2DT->spirit_image_reg_lamda_ ) );
+                    ncgsolver.add(dataOper, T( (value_type)workOrder2DT->spirit_data_fidelity_lamda_ ) );
 
                     GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("NCG spirit solver for 2DT without null space ... "));
                     ncgsolver.solve(b, res);
@@ -262,8 +278,8 @@ performUnwarppingImpl(gtPlusReconWorkOrder<T>* workOrder2DT, hoNDArray<T>& kspac
                     }
 
                     // set operators
-                    ncgsolver.add(spirit, T(workOrder2DT->spirit_parallel_imaging_lamda_) );
-                    ncgsolver.add(wavNullSpace2DOperator, T(workOrder2DT->spirit_image_reg_lamda_) );
+                    ncgsolver.add(spirit, T( (value_type)workOrder2DT->spirit_parallel_imaging_lamda_ ) );
+                    ncgsolver.add(wavNullSpace2DOperator, T( (value_type)workOrder2DT->spirit_image_reg_lamda_ ) );
 
                     GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("NCG spirit solver for 2D ... "));
                     ncgsolver.solve(b, res);
@@ -295,9 +311,9 @@ performUnwarppingImpl(gtPlusReconWorkOrder<T>* workOrder2DT, hoNDArray<T>& kspac
                         wavNoNullSpace2DOperator.setCoilSenMap(coilMapS);
                     }
 
-                    ncgsolver.add(spirit_noNullSpace, T(workOrder2DT->spirit_parallel_imaging_lamda_) );
-                    ncgsolver.add(wavNoNullSpace2DOperator, T(workOrder2DT->spirit_image_reg_lamda_) );
-                    ncgsolver.add(dataOper, T(workOrder2DT->spirit_data_fidelity_lamda_) );
+                    ncgsolver.add(spirit_noNullSpace, T( (value_type)workOrder2DT->spirit_parallel_imaging_lamda_ ) );
+                    ncgsolver.add(wavNoNullSpace2DOperator, T( (value_type)workOrder2DT->spirit_image_reg_lamda_ ) );
+                    ncgsolver.add(dataOper, T( (value_type)workOrder2DT->spirit_data_fidelity_lamda_ ) );
 
                     GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("NCG spirit solver for 2D without null space ... "));
                     ncgsolver.solve(b, res);
@@ -307,7 +323,7 @@ performUnwarppingImpl(gtPlusReconWorkOrder<T>* workOrder2DT, hoNDArray<T>& kspac
                 }
             }
 
-            Gadgetron::scal(T(workOrder2DT->spirit_ncg_scale_factor_), res);
+            Gadgetron::scal(T( (value_type)workOrder2DT->spirit_ncg_scale_factor_ ), res);
         }
         else
         {
diff --git a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker2DTNoAcceleration.h b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker2DTNoAcceleration.h
index 68e815f..86881a0 100644
--- a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker2DTNoAcceleration.h
+++ b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker2DTNoAcceleration.h
@@ -5,7 +5,7 @@
 
 #pragma once
 
-#include "ismrmrd.h"
+#include "ismrmrd/ismrmrd.h"
 
 #include "GadgetronTimer.h"
 
@@ -20,6 +20,7 @@ class gtPlusReconWorker2DTNoAcceleration : public gtPlusReconWorker2DT<T>
 public:
 
     typedef gtPlusReconWorker2DT<T> BaseClass;
+    typedef typename BaseClass::value_type value_type;
 
     gtPlusReconWorker2DTNoAcceleration() : BaseClass() {}
     virtual ~gtPlusReconWorker2DTNoAcceleration() {}
@@ -30,6 +31,7 @@ public:
     using BaseClass::gt_timer2_;
     using BaseClass::gt_timer3_;
     using BaseClass::performTiming_;
+    using BaseClass::verbose_;
     using BaseClass::gt_exporter_;
     using BaseClass::debugFolder_;
     using BaseClass::gtPlus_util_;
@@ -57,8 +59,8 @@ bool gtPlusReconWorker2DTNoAcceleration<T>::performRecon(gtPlusReconWorkOrder2DT
         if ( !workOrder2DT->workFlow_use_BufferedKernel_ )
         {
             GADGET_CHECK_PERFORM(performTiming_, gt_timer1_.start("prepRef"));
-            GADGET_CHECK_RETURN_FALSE(prepRef(workOrder2DT, workOrder2DT->ref_, workOrder2DT->ref_recon_, workOrder2DT->ref_coil_map_, 
-                        workOrder2DT->start_RO_, workOrder2DT->end_RO_, workOrder2DT->start_E1_, workOrder2DT->end_E1_, workOrder2DT->data_.get_size(1)));
+            GADGET_CHECK_RETURN_FALSE(this->prepRef(workOrder2DT, workOrder2DT->ref_, workOrder2DT->ref_recon_, workOrder2DT->ref_coil_map_, 
+                                                workOrder2DT->start_RO_, workOrder2DT->end_RO_, workOrder2DT->start_E1_, workOrder2DT->end_E1_, workOrder2DT->data_.get_size(1)));
             GADGET_CHECK_PERFORM(performTiming_, gt_timer1_.stop());
         }
 
@@ -85,13 +87,13 @@ bool gtPlusReconWorker2DTNoAcceleration<T>::performRecon(gtPlusReconWorkOrder2DT
                 usedS = workOrder2DT->no_acceleration_whichS_combinationcoeff_;
                 if ( usedS >= S ) usedS = S-1;
 
-                hoNDArray<T> refCoilMapS(RO, E1, refN, workOrder2DT->ref_coil_map_.begin()+usedS*RO*E1*refN);
+                hoNDArray<T> refCoilMapS(RO, E1, CHA, refN, workOrder2DT->ref_coil_map_.begin()+usedS*RO*E1*CHA*refN);
                 GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->ifft2c(refCoilMapS, buffer2DT_));
 
                 hoNDArray<T> coilMapS(RO, E1, CHA, refN, workOrder2DT->coilMap_->begin()+usedS*RO*E1*CHA*refN);
 
                 GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilMap2DNIH(buffer2DT_, 
-                        coilMapS, workOrder2DT->coil_map_algorithm_, workOrder2DT->csm_kSize_, workOrder2DT->csm_powermethod_num_, workOrder2DT->csm_iter_num_, workOrder2DT->csm_iter_thres_, workOrder2DT->csm_use_gpu_));
+                        coilMapS, workOrder2DT->coil_map_algorithm_, workOrder2DT->csm_kSize_, workOrder2DT->csm_powermethod_num_, workOrder2DT->csm_iter_num_, (value_type)workOrder2DT->csm_iter_thres_, workOrder2DT->csm_use_gpu_));
 
                 GADGET_CHECK_RETURN_FALSE(repmatLastDimension(*workOrder2DT->coilMap_, usedS));
             }
@@ -100,7 +102,7 @@ bool gtPlusReconWorker2DTNoAcceleration<T>::performRecon(gtPlusReconWorkOrder2DT
                 GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->ifft2c(workOrder2DT->ref_coil_map_, buffer2DT_));
 
                 GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilMap2DNIH(buffer2DT_, 
-                        *workOrder2DT->coilMap_, workOrder2DT->coil_map_algorithm_, workOrder2DT->csm_kSize_, workOrder2DT->csm_powermethod_num_, workOrder2DT->csm_iter_num_, workOrder2DT->csm_iter_thres_, workOrder2DT->csm_use_gpu_));
+                        *workOrder2DT->coilMap_, workOrder2DT->coil_map_algorithm_, workOrder2DT->csm_kSize_, workOrder2DT->csm_powermethod_num_, workOrder2DT->csm_iter_num_, (value_type)workOrder2DT->csm_iter_thres_, workOrder2DT->csm_use_gpu_));
             }
 
             GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, *workOrder2DT->coilMap_, "coilMap_");
@@ -113,23 +115,31 @@ bool gtPlusReconWorker2DTNoAcceleration<T>::performRecon(gtPlusReconWorkOrder2DT
 
         GADGET_CHECK_PERFORM(performTiming_, gt_timer1_.start("perform coil combination"));
 
-        Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->ifft2c(workOrder2DT->data_, buffer2DT_);
-        for ( usedS=0; usedS<S; usedS++ )
-        {
-            hoNDArray<T> unwarppedIm(RO, E1, CHA, N, buffer2DT_.begin()+usedS*RO*E1*CHA*N);
-            hoNDArray<T> combined(RO, E1, N, workOrder2DT->complexIm_.begin()+usedS*RO*E1*N);
+        Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->ifft2c(workOrder2DT->data_, buffer2DT_unwrapping_);
 
-            if ( refN == N )
-            {
-                hoNDArray<T> coilMap(RO, E1, CHA, refN, workOrder2DT->coilMap_->begin()+usedS*RO*E1*CHA*refN);
-                gtPlusISMRMRDReconUtilComplex<T>().coilCombine(unwarppedIm, coilMap, combined);
-            }
-            else
+        /*if ( refN == N )
+        {*/
+            gtPlusISMRMRDReconUtilComplex<T>().coilCombine(buffer2DT_unwrapping_, *(workOrder2DT->coilMap_), workOrder2DT->complexIm_ );
+        /*}
+        else
+        {
+            for ( usedS=0; usedS<S; usedS++ )
             {
-                hoNDArray<T> coilMap(RO, E1, CHA, workOrder2DT->coilMap_->begin()+usedS*RO*E1*CHA*refN);
-                gtPlusISMRMRDReconUtilComplex<T>().coilCombine(unwarppedIm, coilMap, combined);
+                hoNDArray<T> unwarppedIm(RO, E1, CHA, N, buffer2DT_unwrapping_.begin()+usedS*RO*E1*CHA*N);
+                hoNDArray<T> combined(RO, E1, N, workOrder2DT->complexIm_.begin()+usedS*RO*E1*N);
+
+                if ( refN == N )
+                {
+                    hoNDArray<T> coilMap(RO, E1, CHA, refN, workOrder2DT->coilMap_->begin()+usedS*RO*E1*CHA*refN);
+                    gtPlusISMRMRDReconUtilComplex<T>().coilCombine(unwarppedIm, coilMap, combined);
+                }
+                else
+                {
+                    hoNDArray<T> coilMap(RO, E1, CHA, workOrder2DT->coilMap_->begin()+usedS*RO*E1*CHA*N);
+                    gtPlusISMRMRDReconUtilComplex<T>().coilCombine(unwarppedIm, coilMap, combined);
+                }
             }
-        }
+        }*/
         GADGET_CHECK_PERFORM(performTiming_, gt_timer1_.stop());
 
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, workOrder2DT->complexIm_, "combined");
diff --git a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker2DTSPIRIT.h b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker2DTSPIRIT.h
index 05e2052..540996c 100644
--- a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker2DTSPIRIT.h
+++ b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker2DTSPIRIT.h
@@ -5,7 +5,7 @@
 
 #pragma once
 
-#include "ismrmrd.h"
+#include "ismrmrd/ismrmrd.h"
 #include "GadgetronTimer.h"
 #include "gtPlusISMRMRDReconUtil.h"
 #include "gtPlusISMRMRDReconWorker2DT.h"
@@ -42,9 +42,11 @@ public:
     using BaseClass::gt_timer2_;
     using BaseClass::gt_timer3_;
     using BaseClass::performTiming_;
+    using BaseClass::verbose_;
     using BaseClass::gt_exporter_;
     using BaseClass::debugFolder_;
     using BaseClass::gtPlus_util_;
+    using BaseClass::gtPlus_util_cplx_;
     using BaseClass::gtPlus_mem_manager_;
 
     using BaseClass::buffer2DT_;
@@ -81,8 +83,10 @@ performCalibPrep(const hoNDArray<T>& ref_src, const hoNDArray<T>& ref_dst, gtPlu
 
         size_t kRO = workOrder2DT->spirit_kSize_RO_;
         size_t kE1 = workOrder2DT->spirit_kSize_E1_;
+        size_t oRO = workOrder2DT->spirit_oSize_RO_;
+        size_t oE1 = workOrder2DT->spirit_oSize_E1_;
 
-        workOrder2DT->kernel_->create(kRO, kE1, srcCHA, dstCHA, 1, 1, refN, S);
+        workOrder2DT->kernel_->create(kRO, kE1, srcCHA, dstCHA, oRO, oE1, refN, S);
         workOrder2DT->kernelIm_->create(RO, E1, srcCHA, dstCHA, refN, S);
     }
     catch(...)
@@ -156,6 +160,8 @@ performCalibImpl(const hoNDArray<T>& ref_src, const hoNDArray<T>& ref_dst, gtPlu
 
         size_t kRO = workOrder2DT->spirit_kSize_RO_;
         size_t kE1 = workOrder2DT->spirit_kSize_E1_;
+        size_t oRO = workOrder2DT->spirit_oSize_RO_;
+        size_t oE1 = workOrder2DT->spirit_oSize_E1_;
 
         ho3DArray<T> acsSrc(refRO, refE1, srcCHA, const_cast<T*>(ref_src.begin()+n*refRO*refE1*srcCHA+usedS*refRO*refE1*srcCHA*refN));
         ho3DArray<T> acsDst(refRO, refE1, dstCHA, const_cast<T*>(ref_dst.begin()+n*refRO*refE1*dstCHA+usedS*refRO*refE1*dstCHA*refN));
@@ -163,25 +169,24 @@ performCalibImpl(const hoNDArray<T>& ref_src, const hoNDArray<T>& ref_dst, gtPlu
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, acsSrc, "acsSrc");
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, acsDst, "acsDst");
 
-        ho6DArray<T> ker(kRO, kE1, srcCHA, dstCHA, 1, 1, 
+        ho6DArray<T> ker(kRO, kE1, srcCHA, dstCHA, oRO, oE1, 
                             workOrder2DT->kernel_->begin()
-                            +n*kRO*kE1*srcCHA*dstCHA
-                            +usedS*kRO*kE1*srcCHA*dstCHA*refN);
+                            +n*kRO*kE1*srcCHA*dstCHA*oRO*oE1
+                            +usedS*kRO*kE1*srcCHA*dstCHA*oRO*oE1*refN);
 
         gtPlusSPIRIT2DOperator<T> spirit;
         spirit.setMemoryManager(gtPlus_mem_manager_);
 
         spirit.calib_use_gpu_ = workOrder2DT->spirit_use_gpu_;
 
-        spirit.calib(acsSrc, acsDst, workOrder2DT->spirit_reg_lamda_, kRO, kE1, 1, 1, ker);
+        spirit.calib(acsSrc, acsDst, workOrder2DT->spirit_reg_lamda_, kRO, kE1, oRO, oE1, ker);
 
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, ker, "ker");
 
         bool minusI = true;
 
         hoNDArray<T> kIm(RO, E1, srcCHA, dstCHA, workOrder2DT->kernelIm_->begin()+n*RO*E1*srcCHA*dstCHA+usedS*RO*E1*srcCHA*dstCHA*refN);
-        GADGET_CHECK_RETURN_FALSE(spirit.imageDomainKernel(ker, kRO, kE1, 1, 1, RO, E1, kIm, minusI));
-
+        GADGET_CHECK_RETURN_FALSE(spirit.imageDomainKernel(ker, kRO, kE1, oRO, oE1, RO, E1, kIm, minusI));
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, kIm, "kIm");
     }
     catch(...)
@@ -210,14 +215,16 @@ performUnwarppingImpl(gtPlusReconWorkOrder<T>* workOrder2DT, hoNDArray<T>& kspac
 
         res.create(kspace.get_dimensions());
 
-        int n;
+        long long n;
 
         #ifdef USE_OMP
-            int numThreads = (N<64) ? N : 64;
+            int numThreads = (int)( (N<64) ? N : 64 );
 
             int numOpenMPProcs = omp_get_num_procs();
             GADGET_MSG("gtPlusReconWorker2DTSPIRIT, numOpenMPProcs : " << numOpenMPProcs);
 
+            if ( numThreads > numOpenMPProcs ) numThreads = numOpenMPProcs;
+
             int maxOpenMPThreads = omp_get_max_threads();
             GADGET_MSG("gtPlusReconWorker2DTSPIRIT, maxOpenMPThreads : " << maxOpenMPThreads);
 
@@ -244,14 +251,25 @@ performUnwarppingImpl(gtPlusReconWorkOrder<T>* workOrder2DT, hoNDArray<T>& kspac
         Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->ifftshift2D(adj_forward_G_I, ker_Shifted);
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, ker_Shifted, "ker_Shifted");
 
-        hoNDArray<T> kspace_Shifted(kspace);
+        hoNDArray<T> kspace_Shifted;
+        kspace_Shifted = kspace;
         Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->ifftshift2D(kspace, kspace_Shifted);
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, kspace_Shifted, "kspace_Shifted");
 
+        hoNDArray<T> kspace_initial_Shifted;
+        bool hasInitial = false;
+        if ( workOrder2DT->kspace_initial_.dimensions_equal(&kspace) )
+        {
+            kspace_initial_Shifted = workOrder2DT->kspace_initial_;
+            Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->ifftshift2D(workOrder2DT->kspace_initial_, kspace_initial_Shifted);
+            hasInitial = true;
+        }
+        GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, kspace_initial_Shifted, "kspace_initial_Shifted");
+
         #ifdef GCC_OLD_FLAG
-            #pragma omp parallel default(none) private(n) shared(RO, E1, srcCHA, dstCHA, kspace_Shifted, ker_Shifted, workOrder2DT, refN, N) num_threads(numThreads)
+            #pragma omp parallel default(none) private(n) shared(RO, E1, srcCHA, dstCHA, kspace, kspace_Shifted, kspace_initial_Shifted, ker_Shifted, workOrder2DT, refN, N, hasInitial) num_threads(numThreads)
         #else
-            #pragma omp parallel default(none) private(n) shared(RO, E1, srcCHA, dstCHA, kspace_Shifted, ker_Shifted, workOrder2DT, res, refN, N) num_threads(numThreads)
+            #pragma omp parallel default(none) private(n) shared(RO, E1, srcCHA, dstCHA, kspace, kspace_Shifted, kspace_initial_Shifted, ker_Shifted, workOrder2DT, res, refN, N, hasInitial) num_threads(numThreads)
         #endif
         {
             gtPlusSPIRIT2DOperator<T> spirit;
@@ -268,25 +286,51 @@ performUnwarppingImpl(gtPlusReconWorkOrder<T>* workOrder2DT, hoNDArray<T>& kspac
             gtPlusLSQRSolver<hoNDArray<T>, hoNDArray<T>, gtPlusSPIRIT2DOperator<T> > cgSolver;
 
             cgSolver.iterMax_ = workOrder2DT->spirit_iter_max_;
-            cgSolver.thres_ = workOrder2DT->spirit_iter_thres_;
+            cgSolver.thres_ = (value_type)workOrder2DT->spirit_iter_thres_;
             cgSolver.printIter_ = workOrder2DT->spirit_print_iter_;
 
             cgSolver.set(spirit);
 
             hoNDArray<T> b(RO, E1, srcCHA);
+            hoNDArray<T> unwarppedKSpace(RO, E1, dstCHA);
 
             #pragma omp for
-            for ( n=0; n<(int)N; n++ )
+            for ( n=0; n<(long long)N; n++ )
             {
-                hoNDArray<T> unwarppedKSpace(RO, E1, dstCHA, res.begin()+n*RO*E1*dstCHA);
+                // check whether the kspace is undersampled
+                bool undersampled = false;
+                for ( size_t e1=0; e1<E1; e1++ )
+                {
+                    if ( (std::abs( kspace(RO/2, e1, srcCHA-1, n) ) == 0)
+                        && (std::abs( kspace(RO/2, e1, 0, n) ) == 0) )
+                    {
+                        undersampled = true;
+                        break;
+                    }
+                }
+
+                if ( !undersampled )
+                {
+                    memcpy(res.begin()+n*RO*E1*dstCHA, kspace_Shifted.begin()+n*RO*E1*srcCHA, sizeof(T)*RO*E1*dstCHA);
+                    continue;
+                }
 
-                int kernelN = n;
-                if ( kernelN >= refN ) kernelN = refN-1;
+                long long kernelN = n;
+                if ( kernelN >= (long long)refN ) kernelN = (long long)refN-1;
 
-                boost::shared_ptr<hoNDArray<T> > acq(new hoNDArray<T>(RO, E1, srcCHA, kspace_Shifted.begin()+n*RO*E1*srcCHA));
+                boost::shared_ptr< hoNDArray<T> > acq(new hoNDArray<T>(RO, E1, srcCHA, kspace_Shifted.begin()+n*RO*E1*srcCHA));
                 spirit.setAcquiredPoints(acq);
 
-                cgSolver.x0_ = acq.get();
+                boost::shared_ptr< hoNDArray<T> > initialAcq;
+                if ( hasInitial )
+                {
+                    initialAcq = boost::shared_ptr< hoNDArray<T> >(new hoNDArray<T>(RO, E1, srcCHA, kspace_initial_Shifted.begin()+n*RO*E1*srcCHA));
+                    cgSolver.x0_ = initialAcq.get();
+                }
+                else
+                {
+                    cgSolver.x0_ = acq.get();
+                }
 
                 if ( refN > 1 )
                 {
@@ -313,10 +357,16 @@ performUnwarppingImpl(gtPlusReconWorkOrder<T>* workOrder2DT, hoNDArray<T>& kspac
                 // restore the acquired points
                 spirit.restoreAcquiredKSpace(*acq, unwarppedKSpace);
 
+                memcpy(res.begin()+n*RO*E1*dstCHA, unwarppedKSpace.begin(), unwarppedKSpace.get_number_of_bytes());
+
                 GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, unwarppedKSpace, "unwarppedKSpace_n_setAcq");
             }
         }
 
+        #ifdef USE_OMP
+            omp_set_nested(0);
+        #endif
+
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, res, "res_Shifted");
 
         Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->fftshift2D(res, kspace_Shifted);
@@ -357,15 +407,22 @@ performUnwrapping(gtPlusReconWorkOrder2DT<T>* workOrder2DT, const hoNDArray<T>&
         {
             hoNDArray<T> kspaceForScaleFactor(RO, E1, srcCHA, numOfNForScaling, const_cast<T*>(data_dst.begin()));
             Gadgetron::norm2(kspaceForScaleFactor, scaleFactor);
-            scaleFactor /= (numOfNForScaling*std::sqrt(double(srcCHA)));
+            scaleFactor /= (value_type)(numOfNForScaling*std::sqrt(double(srcCHA)));
         }
         else
         {
             Gadgetron::norm2(data_dst, scaleFactor);
-            scaleFactor /= (N*std::sqrt(double(srcCHA)));
+            scaleFactor /= (value_type)(N*std::sqrt(double(srcCHA)));
         }
 
-        workOrder2DT->spirit_ncg_scale_factor_ = scaleFactor;
+        if ( workOrder2DT->spirit_ncg_scale_factor_ < 0 )
+        {
+            workOrder2DT->spirit_ncg_scale_factor_ = scaleFactor;
+        }
+        else
+        {
+            GADGET_MSG("SPIRIT - 2DT - spirit_ncg_scale_factor_ is preset : " << workOrder2DT->spirit_ncg_scale_factor_ << " ... ");
+        }
 
         // split the jobs
         bool splitByS = workOrder2DT->job_split_by_S_;
@@ -453,8 +510,8 @@ performUnwrapping(gtPlusReconWorkOrder2DT<T>* workOrder2DT, const hoNDArray<T>&
 
                 GADGET_MSG("SPIRIT - 2DT - total job : " << jobList.size() << " - job N : " << jobN << " - cloud size : " << cloudSize);
 
-                unsigned int numOfJobRunOnCloud = jobList.size() - jobList.size()/(cloudSize+1);
-                if ( !runJobsOnLocalNode ) numOfJobRunOnCloud = jobList.size();
+                unsigned int numOfJobRunOnCloud = (unsigned int)(jobList.size() - jobList.size()/(cloudSize+1));
+                if ( !runJobsOnLocalNode ) numOfJobRunOnCloud = (unsigned int)jobList.size();
                 GADGET_MSG("SPIRIT - 2DT - numOfJobRunOnCloud : " << numOfJobRunOnCloud << " ... ");
 
                 typedef Gadgetron::GadgetCloudController< gtPlusReconJob2DT<T> > GTCloudControllerType;
diff --git a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker3DT.h b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker3DT.h
index b999e21..121aff4 100644
--- a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker3DT.h
+++ b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker3DT.h
@@ -43,7 +43,7 @@ public:
     {
         // check whether we have all-zeros input
         value_type v(1);
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::norm2(workOrder->data_, v));
+        Gadgetron::norm2(workOrder->data_, v);
         if ( v <= 0 )
         {
             GADGET_WARN_MSG("gtPlusReconWorker2DT, performRecon(workOrder) : incoming data contains all-zeros ... ");
@@ -63,7 +63,7 @@ public:
         {
             this->autoReconParameter(workOrder3DT);
             GADGET_MSG("Gt Plus 3DT -- automatic paramter selection ---");
-            workOrder3DT->print(std::cout);
+            if ( !this->debugFolder_.empty() ) { workOrder3DT->print(std::cout); }
         }
 
         return this->performRecon(workOrder3DT);
@@ -159,10 +159,13 @@ public:
     // estimate job size for 3DT recon
     virtual bool estimateJobSize(gtPlusReconWorkOrder<T>* workOrder3DT, size_t maxNumOfBytesPerJob, size_t overlapBetweenJobs, size_t numOfNodes, size_t& jobSize);
 
+    using BaseClass::partial_fourier_handling_;
+
     using BaseClass::gt_timer1_;
     using BaseClass::gt_timer2_;
     using BaseClass::gt_timer3_;
     using BaseClass::performTiming_;
+    using BaseClass::verbose_;
     using BaseClass::gt_exporter_;
     using BaseClass::debugFolder_;
     using BaseClass::gtPlus_util_;
@@ -212,7 +215,11 @@ bool gtPlusReconWorker3DT<T>::performRecon(WorkOrderType* workOrder3DT)
         // apply coil compression coefficients
         if ( workOrder3DT->workFlow_use_BufferedKernel_ )
         {
-            if ( workOrder3DT->coil_compression_ && workOrder3DT->recon_algorithm_!=ISMRMRD_SPIRIT && workOrder3DT->recon_algorithm_!=ISMRMRD_L1SPIRIT )
+            if ( workOrder3DT->coil_compression_ 
+                && workOrder3DT->recon_algorithm_!=ISMRMRD_SPIRIT 
+                && workOrder3DT->recon_algorithm_!=ISMRMRD_L1SPIRIT 
+                && workOrder3DT->recon_algorithm_!=ISMRMRD_L1SPIRIT_SLEP 
+                && workOrder3DT->recon_algorithm_!=ISMRMRD_L1SPIRIT_SLEP_MOTION_COMP )
             {
                 GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, workOrder3DT->data_, "data_");
                 GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtil<T>().applyKLCoilCompressionCoeff(workOrder3DT->data_, *workOrder3DT->coilCompressionCoef_, data_dst_, true));
@@ -228,6 +235,8 @@ bool gtPlusReconWorker3DT<T>::performRecon(WorkOrderType* workOrder3DT)
             if ( workOrder3DT->coil_compression_ 
                 && workOrder3DT->recon_algorithm_!=ISMRMRD_SPIRIT 
                 && workOrder3DT->recon_algorithm_!=ISMRMRD_L1SPIRIT 
+                && workOrder3DT->recon_algorithm_!=ISMRMRD_L1SPIRIT_SLEP 
+                && workOrder3DT->recon_algorithm_!=ISMRMRD_L1SPIRIT_SLEP_MOTION_COMP 
                 && (workOrder3DT->acceFactorE1_>1 || workOrder3DT->acceFactorE2_>1) )
             {
                 ref_src_ = workOrder3DT->ref_recon_;
@@ -263,7 +272,11 @@ bool gtPlusReconWorker3DT<T>::performRecon(WorkOrderType* workOrder3DT)
 
                 GADGET_CHECK_PERFORM(performTiming_, gt_timer2_.stop());
 
-                if ( !workOrder3DT->downstream_coil_compression_ || workOrder3DT->recon_algorithm_==ISMRMRD_SPIRIT || workOrder3DT->recon_algorithm_==ISMRMRD_L1SPIRIT )
+                if ( !workOrder3DT->downstream_coil_compression_ 
+                    || workOrder3DT->recon_algorithm_==ISMRMRD_SPIRIT 
+                    || workOrder3DT->recon_algorithm_==ISMRMRD_L1SPIRIT 
+                    || workOrder3DT->recon_algorithm_==ISMRMRD_L1SPIRIT_SLEP 
+                    || workOrder3DT->recon_algorithm_==ISMRMRD_L1SPIRIT_SLEP_MOTION_COMP )
                 {
                     ref_src_ = ref_dst_;
                 }
@@ -372,18 +385,9 @@ estimateCoilMap(gtPlusReconWorkOrder3DT<T>* workOrder3DT, const hoNDArray<T>& re
                     GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->ifft3c(refCoilMapN, buffer3DT));
 
                     GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("coil map estimation ... "));
-                    if ( workOrder3DT->csm_use_gpu_ )
-                    {
-                        GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilMap3DNIHGPU_FullResMap(buffer3DT, 
-                                coilMapN, workOrder3DT->coil_map_algorithm_, workOrder3DT->csm_kSize_, 
-                                workOrder3DT->csm_powermethod_num_, workOrder3DT->csm_iter_num_, workOrder3DT->csm_iter_thres_, workOrder3DT->csm_true_3D_));
-                    }
-                    else
-                    {
-                        GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilMap3DNIH(buffer3DT, 
-                                coilMapN, workOrder3DT->coil_map_algorithm_, workOrder3DT->csm_kSize_, 
-                                workOrder3DT->csm_powermethod_num_, workOrder3DT->csm_iter_num_, workOrder3DT->csm_iter_thres_, workOrder3DT->csm_true_3D_));
-                    }
+                    GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilMap3DNIH(buffer3DT, 
+                            coilMapN, workOrder3DT->coil_map_algorithm_, workOrder3DT->csm_kSize_, 
+                            workOrder3DT->csm_powermethod_num_, workOrder3DT->csm_iter_num_, (value_type)workOrder3DT->csm_iter_thres_, workOrder3DT->csm_true_3D_));
                     GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.stop());
 
                     memcpy(workOrder3DT->coilMap_->begin()+usedN*RO*E1*E2*dstCHA, coilMapN.begin(), coilMapN.get_number_of_bytes());
@@ -396,20 +400,10 @@ estimateCoilMap(gtPlusReconWorkOrder3DT<T>* workOrder3DT, const hoNDArray<T>& re
                     GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->ifft3c(ref_coil_map_dst, buffer3DT));
 
                     GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("coil map estimation ... "));
-                    if ( workOrder3DT->csm_use_gpu_ )
-                    {
-                        GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilMap3DNIHGPU_FullResMap(buffer3DT, 
-                                *workOrder3DT->coilMap_, workOrder3DT->coil_map_algorithm_, 
-                                workOrder3DT->csm_kSize_, workOrder3DT->csm_powermethod_num_, 
-                                workOrder3DT->csm_iter_num_, workOrder3DT->csm_iter_thres_, workOrder3DT->csm_true_3D_));
-                    }
-                    else
-                    {
-                        GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilMap3DNIH(buffer3DT, 
-                                *workOrder3DT->coilMap_, workOrder3DT->coil_map_algorithm_, 
-                                workOrder3DT->csm_kSize_, workOrder3DT->csm_powermethod_num_, 
-                                workOrder3DT->csm_iter_num_, workOrder3DT->csm_iter_thres_, workOrder3DT->csm_true_3D_));
-                    }
+                    GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilMap3DNIH(buffer3DT, 
+                            *workOrder3DT->coilMap_, workOrder3DT->coil_map_algorithm_, 
+                            workOrder3DT->csm_kSize_, workOrder3DT->csm_powermethod_num_, 
+                            workOrder3DT->csm_iter_num_, (value_type)workOrder3DT->csm_iter_thres_, workOrder3DT->csm_true_3D_));
                     GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.stop());
                 }
             }
@@ -473,8 +467,6 @@ performCalib(gtPlusReconWorkOrder3DT<T>* workOrder3DT, const hoNDArray<T>& ref_s
         {
            GADGET_CHECK_RETURN_FALSE(this->performCalibPrep(ref_src, ref_dst, workOrder3DT));
 
-            size_t n;
-
             // perform calibration
             if ( same_combinationcoeff_allN )
             {
@@ -531,7 +523,7 @@ performCalib(gtPlusReconWorkOrder3DT<T>* workOrder3DT, const hoNDArray<T>& ref_s
 
 template <typename T> 
 bool gtPlusReconWorker3DT<T>::
-performCalibPrep(const hoNDArray<T>& , const hoNDArray<T>& , WorkOrderType* /*workOrder2DT*/)
+performCalibPrep(const hoNDArray<T>& , const hoNDArray<T>& , WorkOrderType* /*workOrder3DT*/)
 {
     return true;
 }
@@ -756,7 +748,7 @@ bool gtPlusReconWorker3DT<T>::prepRef(WorkOrderType* workOrder3DT, const hoNDArr
                 }
 
                 hoNDArray<T> croppedRef;
-                GADGET_CHECK_RETURN_FALSE(cropUpTo10DArray(refRecon, croppedRef, crop_offset, crop_size));
+                GADGET_CHECK_RETURN_FALSE(cropUpTo11DArray(refRecon, croppedRef, crop_offset, crop_size));
                 refRecon = croppedRef;
             }
         }
@@ -798,11 +790,14 @@ bool gtPlusReconWorker3DT<T>::prepRef(WorkOrderType* workOrder3DT, const hoNDArr
             {
                 GADGET_CHECK_PERFORM(performTiming_, gt_timer2_.start("crop sampled region ... "));
                 hoNDArray<T> croppedRef;
-                GADGET_CHECK_RETURN_FALSE(cropUpTo10DArray(refRecon, croppedRef, crop_offset, crop_size));
+                GADGET_CHECK_RETURN_FALSE(cropUpTo11DArray(refRecon, croppedRef, crop_offset, crop_size));
                 GADGET_CHECK_PERFORM(performTiming_, gt_timer2_.stop());
                 GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, croppedRef, "refRecon_afterCrop");
 
-                if ( workOrder3DT->recon_algorithm_ == ISMRMRD_SPIRIT || workOrder3DT->recon_algorithm_ == ISMRMRD_L1SPIRIT )
+                if ( workOrder3DT->recon_algorithm_ == ISMRMRD_SPIRIT 
+                    || workOrder3DT->recon_algorithm_ == ISMRMRD_L1SPIRIT 
+                    || workOrder3DT->recon_algorithm_ == ISMRMRD_L1SPIRIT_SLEP 
+                    || workOrder3DT->recon_algorithm_ == ISMRMRD_L1SPIRIT_SLEP_MOTION_COMP )
                 {
                     // copy the ref into the data
                     GADGET_CHECK_RETURN_FALSE(gtPlus_util_.copyAlongROE1E2(refRecon, workOrder3DT->data_, 0, refRecon.get_size(0)-1, startE1_, endE1_, startE2_, endE2_));
@@ -816,7 +811,7 @@ bool gtPlusReconWorker3DT<T>::prepRef(WorkOrderType* workOrder3DT, const hoNDArr
 
                 GADGET_CHECK_PERFORM(performTiming_, gt_timer2_.start("set up ref for coil map ... "));
                 refCoilMap.create(RO, E1, E2, srcCHA, refRecon.get_size(4));
-                GADGET_CHECK_RETURN_FALSE(setSubArrayUpTo10DArray(refRecon, refCoilMap, crop_offset, crop_size));
+                GADGET_CHECK_RETURN_FALSE(setSubArrayUpTo11DArray(refRecon, refCoilMap, crop_offset, crop_size));
                 GADGET_CHECK_PERFORM(performTiming_, gt_timer2_.stop());
 
                 GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, refCoilMap, "refCoilMap");
@@ -844,13 +839,13 @@ bool gtPlusReconWorker3DT<T>::prepRef(WorkOrderType* workOrder3DT, const hoNDArr
                     }
                 }
 
-                GADGET_CHECK_RETURN_FALSE(cropUpTo10DArray(refRecon, croppedRef, crop_offset, crop_size));
+                GADGET_CHECK_RETURN_FALSE(cropUpTo11DArray(refRecon, croppedRef, crop_offset, crop_size));
                 refRecon = croppedRef;
             }
             else
             {
                 hoNDArray<T> croppedRef;
-                GADGET_CHECK_RETURN_FALSE(cropUpTo10DArray(refRecon, croppedRef, crop_offset, crop_size));
+                GADGET_CHECK_RETURN_FALSE(cropUpTo11DArray(refRecon, croppedRef, crop_offset, crop_size));
                 GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, croppedRef, "croppedRef");
 
                 GADGET_CHECK_RETURN_FALSE(performRefFilter(workOrder3DT, croppedRef, refCoilMap, startRO, endRO, startE1, endE1, startE2, endE2));
@@ -875,7 +870,7 @@ bool gtPlusReconWorker3DT<T>::prepRef(WorkOrderType* workOrder3DT, const hoNDArr
                         crop_offset[2] = 0;
                         crop_size[2] = refRecon.get_size(2);
 
-                        GADGET_CHECK_RETURN_FALSE(cropUpTo10DArray(refRecon, croppedRef, crop_offset, crop_size));
+                        GADGET_CHECK_RETURN_FALSE(cropUpTo11DArray(refRecon, croppedRef, crop_offset, crop_size));
                         refRecon = croppedRef;
                     }
                 }
@@ -1051,7 +1046,11 @@ bool gtPlusReconWorker3DT<T>::coilCompression(WorkOrderType* workOrder3DT)
         if ( workOrder3DT->acceFactorE1_==1 && workOrder3DT->acceFactorE2_==1 ) return true;
 
         // compute coil compression coeff
-        if ( workOrder3DT->coil_compression_ && workOrder3DT->recon_algorithm_!=ISMRMRD_SPIRIT && workOrder3DT->recon_algorithm_!=ISMRMRD_L1SPIRIT )
+        if ( workOrder3DT->coil_compression_ 
+            && workOrder3DT->recon_algorithm_!=ISMRMRD_SPIRIT 
+            && workOrder3DT->recon_algorithm_!=ISMRMRD_L1SPIRIT 
+            && workOrder3DT->recon_algorithm_!=ISMRMRD_L1SPIRIT_SLEP 
+            && workOrder3DT->recon_algorithm_!=ISMRMRD_L1SPIRIT_SLEP_MOTION_COMP )
         {
             // check whether coil compression coeff has been preset
             if ( workOrder3DT->coilCompressionCoef_->size()!=dataN )
@@ -1211,9 +1210,9 @@ bool gtPlusReconWorker3DT<T>::unmixCoeff(const hoNDArray<T>& kerIm, const hoNDAr
         }
 
         hoNDArray<T> conjUnmixCoeff(unmixCoeff);
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::multiplyConj(unmixCoeff, conjUnmixCoeff, conjUnmixCoeff));
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::multiplyConj(unmixCoeff, conjUnmixCoeff, conjUnmixCoeff));
         GADGET_CHECK_RETURN_FALSE(Gadgetron::sumOverLastDimension(conjUnmixCoeff, gFactor));
-        Gadgetron::sqrt_inplace(&gFactor);
+        Gadgetron::sqrt(gFactor, gFactor);
     }
     catch(...)
     {
@@ -1360,9 +1359,9 @@ bool gtPlusReconWorker3DT<T>::applyImageDomainKernelImage(const hoNDArray<T>& al
             //}
 
             #ifdef GCC_OLD_FLAG
-                #pragma omp parallel default(none) private(n) shared(num, dim4D, RO, E1, E2, srcCHA, dstCHA) num_threads( ((num<16) ? num : 16) )
+                #pragma omp parallel default(none) private(n) shared(num, dim4D, RO, E1, E2, srcCHA, dstCHA) num_threads( (int)((num<16) ? num : 16) )
             #else
-                #pragma omp parallel default(none) private(n) shared(num, dim4D, aliasedIm, RO, E1, E2, srcCHA, dstCHA, kerIm, complexIm) num_threads( ((num<16) ? num : 16) )
+                #pragma omp parallel default(none) private(n) shared(num, dim4D, aliasedIm, RO, E1, E2, srcCHA, dstCHA, kerIm, complexIm) num_threads( (int)((num<16) ? num : 16) )
             #endif
             {
                 hoNDArrayMemoryManaged<T> unwrapped4D(RO, E1, E2, srcCHA, gtPlus_mem_manager_);
@@ -1387,6 +1386,11 @@ bool gtPlusReconWorker3DT<T>::applyImageDomainKernelImage(const hoNDArray<T>& al
                     }
                 }
             }
+
+        #ifdef USE_OMP
+            omp_set_nested(0);
+        #endif
+
         //}
         //else
         //{
@@ -1547,7 +1551,11 @@ bool gtPlusReconWorker3DT<T>::afterUnwrapping(WorkOrderType* workOrder3DT)
             GADGET_CHECK_PERFORM(performTiming_, gt_timer2_.start("ref fill back ... "));
 
             hoNDArray<T> ref_dst;
-            if ( workOrder3DT->coil_compression_ && workOrder3DT->recon_algorithm_!=ISMRMRD_SPIRIT && workOrder3DT->recon_algorithm_!=ISMRMRD_L1SPIRIT )
+            if ( workOrder3DT->coil_compression_ 
+                && workOrder3DT->recon_algorithm_!=ISMRMRD_SPIRIT 
+                && workOrder3DT->recon_algorithm_!=ISMRMRD_L1SPIRIT 
+                && workOrder3DT->recon_algorithm_!=ISMRMRD_L1SPIRIT_SLEP 
+                && workOrder3DT->recon_algorithm_!=ISMRMRD_L1SPIRIT_SLEP_MOTION_COMP )
             {
                 GADGET_CHECK_RETURN_FALSE(gtPlus_util_.applyKLCoilCompressionCoeff(workOrder3DT->ref_, *workOrder3DT->coilCompressionCoef_, ref_dst, true));
             }
@@ -1571,7 +1579,10 @@ bool gtPlusReconWorker3DT<T>::afterUnwrapping(WorkOrderType* workOrder3DT)
         }
 
         // partial fourier handling
-        GADGET_CHECK_RETURN_FALSE(this->performPartialFourierHandling(workOrder3DT));
+        if ( partial_fourier_handling_ )
+        {
+            GADGET_CHECK_RETURN_FALSE(this->performPartialFourierHandling(workOrder3DT));
+        }
 
         if ( this->computeKSpace(workOrder3DT) || fullres_coilmap )
         {
@@ -1606,28 +1617,14 @@ bool gtPlusReconWorker3DT<T>::afterUnwrapping(WorkOrderType* workOrder3DT)
                         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, aveComplexIm, "aveComplexIm");
 
                         GADGET_CHECK_PERFORM(performTiming_, gt_timer2_.start("full res coil map : compute 3D coil map ...  "));
-                        if ( workOrder3DT->csm_use_gpu_ )
-                        {
-                            GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilMap3DNIHGPU_FullResMap(aveComplexIm, *workOrder3DT->coilMap_, workOrder3DT->coil_map_algorithm_, workOrder3DT->csm_kSize_, workOrder3DT->csm_powermethod_num_, workOrder3DT->csm_iter_num_, workOrder3DT->csm_iter_thres_, workOrder3DT->csm_true_3D_));
-                        }
-                        else
-                        {
-                            GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilMap3DNIH(aveComplexIm, *workOrder3DT->coilMap_, workOrder3DT->coil_map_algorithm_, workOrder3DT->csm_kSize_, workOrder3DT->csm_powermethod_num_, workOrder3DT->csm_iter_num_, workOrder3DT->csm_iter_thres_, workOrder3DT->csm_true_3D_));
-                        }
+                        GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilMap3DNIH(aveComplexIm, *workOrder3DT->coilMap_, workOrder3DT->coil_map_algorithm_, workOrder3DT->csm_kSize_, workOrder3DT->csm_powermethod_num_, workOrder3DT->csm_iter_num_, (value_type)workOrder3DT->csm_iter_thres_, workOrder3DT->csm_true_3D_));
                         GADGET_CHECK_PERFORM(performTiming_, gt_timer2_.stop());
                         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, *workOrder3DT->coilMap_, "coilMap_fullres");
                     }
                     else
                     {
                         GADGET_CHECK_PERFORM(performTiming_, gt_timer2_.start("full res coil map : compute 3D coil map ...  "));
-                        if ( workOrder3DT->csm_use_gpu_ )
-                        {
-                            GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilMap3DNIHGPU_FullResMap(buffer3DT, *workOrder3DT->coilMap_, workOrder3DT->coil_map_algorithm_, workOrder3DT->csm_kSize_, workOrder3DT->csm_powermethod_num_, workOrder3DT->csm_iter_num_, workOrder3DT->csm_iter_thres_, workOrder3DT->csm_true_3D_));
-                        }
-                        else
-                        {
-                            GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilMap3DNIH(buffer3DT, *workOrder3DT->coilMap_, workOrder3DT->coil_map_algorithm_, workOrder3DT->csm_kSize_, workOrder3DT->csm_powermethod_num_, workOrder3DT->csm_iter_num_, workOrder3DT->csm_iter_thres_, workOrder3DT->csm_true_3D_));
-                        }
+                        GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilMap3DNIH(buffer3DT, *workOrder3DT->coilMap_, workOrder3DT->coil_map_algorithm_, workOrder3DT->csm_kSize_, workOrder3DT->csm_powermethod_num_, workOrder3DT->csm_iter_num_, (value_type)workOrder3DT->csm_iter_thres_, workOrder3DT->csm_true_3D_));
                         GADGET_CHECK_PERFORM(performTiming_, gt_timer2_.stop());
                         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, *workOrder3DT->coilMap_, "coilMap_fullres");
                     }
@@ -1654,27 +1651,13 @@ bool gtPlusReconWorker3DT<T>::afterUnwrapping(WorkOrderType* workOrder3DT)
                         hoNDArray<T> complexImN(RO, E1, E2, dstCHA, buffer3DT.begin()+whichN_coilmap*RO*E1*E2*dstCHA);
                         hoNDArray<T> coilMapN(RO, E1, E2, dstCHA, workOrder3DT->coilMap_->begin()+whichN_coilmap*RO*E1*E2*dstCHA);
 
-                        if ( workOrder3DT->csm_use_gpu_ )
-                        {
-                            GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilMap3DNIHGPU_FullResMap(complexImN, coilMapN, workOrder3DT->coil_map_algorithm_, workOrder3DT->csm_kSize_, workOrder3DT->csm_powermethod_num_, workOrder3DT->csm_iter_num_, workOrder3DT->csm_iter_thres_, workOrder3DT->csm_true_3D_));
-                        }
-                        else
-                        {
-                            GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilMap3DNIH(complexImN, coilMapN, workOrder3DT->coil_map_algorithm_, workOrder3DT->csm_kSize_, workOrder3DT->csm_powermethod_num_, workOrder3DT->csm_iter_num_, workOrder3DT->csm_iter_thres_, workOrder3DT->csm_true_3D_));
-                        }
+                        GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilMap3DNIH(complexImN, coilMapN, workOrder3DT->coil_map_algorithm_, workOrder3DT->csm_kSize_, workOrder3DT->csm_powermethod_num_, workOrder3DT->csm_iter_num_, (value_type)workOrder3DT->csm_iter_thres_, workOrder3DT->csm_true_3D_));
                         GADGET_CHECK_RETURN_FALSE(repmatLastDimension(*workOrder3DT->coilMap_, whichN_coilmap));
                         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, *workOrder3DT->coilMap_, "coilMap_fullres");
                     }
                     else
                     {
-                        if ( workOrder3DT->csm_use_gpu_ )
-                        {
-                            GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilMap3DNIHGPU_FullResMap(buffer3DT, *workOrder3DT->coilMap_, workOrder3DT->coil_map_algorithm_, workOrder3DT->csm_kSize_, workOrder3DT->csm_powermethod_num_, workOrder3DT->csm_iter_num_, workOrder3DT->csm_iter_thres_, workOrder3DT->csm_true_3D_));
-                        }
-                        else
-                        {
-                            GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilMap3DNIH(buffer3DT, *workOrder3DT->coilMap_, workOrder3DT->coil_map_algorithm_, workOrder3DT->csm_kSize_, workOrder3DT->csm_powermethod_num_, workOrder3DT->csm_iter_num_, workOrder3DT->csm_iter_thres_, workOrder3DT->csm_true_3D_));
-                        }
+                        GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilMap3DNIH(buffer3DT, *workOrder3DT->coilMap_, workOrder3DT->coil_map_algorithm_, workOrder3DT->csm_kSize_, workOrder3DT->csm_powermethod_num_, workOrder3DT->csm_iter_num_, (value_type)workOrder3DT->csm_iter_thres_, workOrder3DT->csm_true_3D_));
                         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, *workOrder3DT->coilMap_, "coilMap_fullres");
                     }
 
@@ -1688,6 +1671,44 @@ bool gtPlusReconWorker3DT<T>::afterUnwrapping(WorkOrderType* workOrder3DT)
             GADGET_CHECK_RETURN_FALSE(workOrder3DT->complexIm_.get_size(0)==RO);
             GADGET_CHECK_RETURN_FALSE(workOrder3DT->complexIm_.get_size(1)==E1);
             GADGET_CHECK_RETURN_FALSE(workOrder3DT->complexIm_.get_size(2)==E2);
+
+            if ( partial_fourier_handling_ )
+            {
+                bool partialFourierHandling = true;
+                if ( (workOrder3DT->start_RO_<0 || workOrder3DT->end_RO_<0 || (workOrder3DT->end_RO_-workOrder3DT->start_RO_+1==RO) ) 
+                        && (workOrder3DT->start_E1_<0 || workOrder3DT->end_E1_<0 || (workOrder3DT->end_E1_-workOrder3DT->start_E1_+1==E1) ) 
+                        && (workOrder3DT->start_E2_<0 || workOrder3DT->end_E2_<0 || (workOrder3DT->end_E2_-workOrder3DT->start_E2_+1==E2) ) )
+                {
+                    partialFourierHandling = false;
+                }
+
+                // if the partial fourier handling is used to compute updated full kspace, the coil combination needs to be repeated
+                if ( partialFourierHandling )
+                {
+                    GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, workOrder3DT->complexIm_, "complexIm_origin_noFullResCoilMap_");
+
+                    GADGET_CHECK_PERFORM(performTiming_, gt_timer2_.start("after partial fourier handling, allocate buffer 3DT ...  "));
+                    hoNDArrayMemoryManaged<T> buffer3DT(workOrder3DT->fullkspace_.get_dimensions(), gtPlus_mem_manager_);
+                    hoNDArrayMemoryManaged<T> buffer3DT_Two(workOrder3DT->fullkspace_.get_dimensions(), gtPlus_mem_manager_);
+                    GADGET_CHECK_PERFORM(performTiming_, gt_timer2_.stop());
+
+                    // if the partial fourier handling is performed on the fullkspace, an extra coil combination is needed
+                    if ( workOrder3DT->acceFactorE1_==1 && workOrder3DT->acceFactorE2_==1 )
+                    {
+                        hoNDArray<T> buffer3DT_Two(workOrder3DT->data_.get_dimensions());
+                        GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->ifft3c(workOrder3DT->data_, buffer3DT, buffer3DT_Two));
+                        GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilCombine(buffer3DT, *workOrder3DT->coilMap_, workOrder3DT->complexIm_));
+                        GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, workOrder3DT->complexIm_, "complexIm_noFullResCoilMap_");
+                    }
+                    else if ( workOrder3DT->fullkspace_.get_number_of_elements() > 0 )
+                    {
+                        hoNDArray<T> buffer3DT_Two(workOrder3DT->fullkspace_.get_dimensions());
+                        GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->ifft3c(workOrder3DT->fullkspace_, buffer3DT, buffer3DT_Two));
+                        GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilCombine(buffer3DT, *workOrder3DT->coilMap_, workOrder3DT->complexIm_));
+                        GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, workOrder3DT->complexIm_, "complexIm_noFullResCoilMap_");
+                    }
+                }
+            }
         }
 
         // flip along E2
@@ -1712,28 +1733,14 @@ bool gtPlusReconWorker3DT<T>::afterUnwrapping(WorkOrderType* workOrder3DT)
         {
             for ( cha=0; cha<imCHA; cha++ )
             {
-                int offset = n*imRO*imE1*imE2*imCHA+cha*imRO*imE1*imE2;
+                size_t offset = n*imRO*imE1*imE2*imCHA+cha*imRO*imE1*imE2;
 
-                for ( int e2=0; e2<(int)imE2; e2++ )
+                for ( size_t e2=0; e2<imE2; e2++ )
                 {
-                    int e2_from = 2*mid_E2-e2;
+                    size_t e2_from = 2*mid_E2-e2;
                     if ( e2_from >= imE2 ) e2_from -= imE2;
 
                     memcpy(pDst+offset+e2*imRO*imE1, pSrc+offset+e2_from*imRO*imE1, sizeof(T)*imRO*imE1);
-
-                    //for ( int e1=0; e1<(int)imE1; e1++ )
-                    //{
-                    //    int e1_from = 2*mid_E1-e1;
-                    //    if ( e1_from >= imE1 ) e1_from -= imE1;
-
-                    //    for ( int ro=0; ro<(int)imRO; ro++ )
-                    //    {
-                    //        int ro_from = 2*mid_RO-ro;
-                    //        if ( ro_from >= imRO ) ro_from -= imRO;
-
-                    //        pDst[offset+e2*imRO*imE1+e1*imRO+ro] = pSrc[offset+e2_from*imRO*imE1+e1_from*imRO+ro_from];
-                    //    }
-                    //}
                 }
             }
         }
@@ -1755,10 +1762,45 @@ bool gtPlusReconWorker3DT<T>::performPartialFourierHandling(WorkOrderType* workO
 {
     try
     {
-        if ( workOrder3DT->partialFourier_algo_ == ISMRMRD_PF_ZEROFILLING ) return true;
+        value_type partialFourierCompensationFactor = 1;
+
+        size_t RO = workOrder3DT->data_.get_size(0);
+        size_t E1 = workOrder3DT->data_.get_size(1);
+        size_t E2 = workOrder3DT->data_.get_size(2);
+
+        if ( !( workOrder3DT->start_RO_<0 || workOrder3DT->end_RO_<0 || (workOrder3DT->end_RO_-workOrder3DT->start_RO_+1==RO) ) )
+        {
+            partialFourierCompensationFactor *= (value_type)(RO)/(value_type)(workOrder3DT->end_RO_-workOrder3DT->start_RO_+1);
+        }
+
+        if ( !( workOrder3DT->start_E1_<0 || workOrder3DT->end_E1_<0 || (workOrder3DT->end_E1_-workOrder3DT->start_E1_+1==E1) ) )
+        {
+            if ( workOrder3DT->end_E1_-workOrder3DT->start_E1_+1 <= E1 )
+            {
+                partialFourierCompensationFactor *= (value_type)(E1)/(value_type)(workOrder3DT->end_E1_-workOrder3DT->start_E1_+1);
+            }
+        }
+
+        if ( !( workOrder3DT->start_E2_<0 || workOrder3DT->end_E2_<0 || (workOrder3DT->end_E2_-workOrder3DT->start_E2_+1==E2) ) )
+        {
+            if ( workOrder3DT->end_E2_-workOrder3DT->start_E2_+1 <= E2 )
+            {
+                partialFourierCompensationFactor *= (value_type)(E2)/(value_type)(workOrder3DT->end_E2_-workOrder3DT->start_E2_+1);
+            }
+        }
+
+        partialFourierCompensationFactor = std::sqrt(partialFourierCompensationFactor);
+        GADGET_CHECK_PERFORM(performTiming_, GADGET_MSG("Partial fourier scaling factor : " << partialFourierCompensationFactor));
+
+        // if ( workOrder3DT->partialFourier_algo_ == ISMRMRD_PF_ZEROFILLING ) return true;
 
         if ( workOrder3DT->acceFactorE1_==1 && workOrder3DT->acceFactorE2_==1 )
         {
+            if ( (workOrder3DT->partialFourier_algo_ == ISMRMRD_PF_ZEROFILLING || workOrder3DT->partialFourier_algo_ == ISMRMRD_PF_ZEROFILLING_FILTER) && (GT_ABS(partialFourierCompensationFactor-1)>FLT_EPSILON) )
+            {
+                GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::scal(partialFourierCompensationFactor, workOrder3DT->data_));
+            }
+
             if ( workOrder3DT->partialFourier_algo_ == ISMRMRD_PF_ZEROFILLING_FILTER )
             {
                 GADGET_CHECK_RETURN_FALSE(performPartialFourierFilter(*workOrder3DT, workOrder3DT->data_));
@@ -1776,6 +1818,11 @@ bool gtPlusReconWorker3DT<T>::performPartialFourierHandling(WorkOrderType* workO
         }
         else if ( workOrder3DT->fullkspace_.get_number_of_elements() > 0 )
         {
+            if ( (workOrder3DT->partialFourier_algo_ == ISMRMRD_PF_ZEROFILLING || workOrder3DT->partialFourier_algo_ == ISMRMRD_PF_ZEROFILLING_FILTER) && (GT_ABS(partialFourierCompensationFactor-1)>FLT_EPSILON) )
+            {
+                GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::scal(partialFourierCompensationFactor, workOrder3DT->fullkspace_));
+            }
+
             if ( workOrder3DT->partialFourier_algo_ == ISMRMRD_PF_ZEROFILLING_FILTER )
             {
                 GADGET_CHECK_RETURN_FALSE(performPartialFourierFilter(*workOrder3DT, workOrder3DT->fullkspace_));
@@ -1797,6 +1844,11 @@ bool gtPlusReconWorker3DT<T>::performPartialFourierHandling(WorkOrderType* workO
             hoNDArrayMemoryManaged<T> kspace(workOrder3DT->complexIm_.get_dimensions(), gtPlus_mem_manager_);
             GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->fft3c(workOrder3DT->complexIm_, kspace));
 
+            if ( (workOrder3DT->partialFourier_algo_ == ISMRMRD_PF_ZEROFILLING || workOrder3DT->partialFourier_algo_ == ISMRMRD_PF_ZEROFILLING_FILTER) && (GT_ABS(partialFourierCompensationFactor-1)>FLT_EPSILON) )
+            {
+                GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::scal(partialFourierCompensationFactor, kspace));
+            }
+
             if ( workOrder3DT->partialFourier_algo_ == ISMRMRD_PF_ZEROFILLING_FILTER )
             {
                 GADGET_CHECK_RETURN_FALSE(performPartialFourierFilter(*workOrder3DT, kspace));
@@ -1938,17 +1990,17 @@ bool gtPlusReconWorker3DT<T>::performPartialFourierPOCSRecon(WorkOrderType& work
         double filter_ref_sigma_ = 1.5;
         double filter_ref_width_ = 0.15;
 
-        int startRO(0), endRO(RO-1);
+        size_t startRO(0), endRO(RO-1);
         hoNDArray<T> filterRO(RO);
         if ( (workOrder3DT.start_RO_<0 || workOrder3DT.end_RO_<0) )
         {
             GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilterForRef(RO, 0, RO-1, 
-                filterRO, filter_ref_type_, filter_ref_sigma_, std::ceil(filter_ref_width_*RO)));
+                filterRO, filter_ref_type_, filter_ref_sigma_, (size_t)std::ceil(filter_ref_width_*RO)));
         }
         else
         {
             GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilterForRef(RO, workOrder3DT.start_RO_, workOrder3DT.end_RO_, 
-                filterRO, filter_ref_type_, filter_ref_sigma_, std::ceil(filter_ref_width_*RO)));
+                filterRO, filter_ref_type_, filter_ref_sigma_, (size_t)std::ceil(filter_ref_width_*RO)));
 
             startRO = workOrder3DT.start_RO_;
             endRO = workOrder3DT.end_RO_;
@@ -1956,17 +2008,17 @@ bool gtPlusReconWorker3DT<T>::performPartialFourierPOCSRecon(WorkOrderType& work
 
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, filterRO, "filterRO_POCS");
 
-        int startE1(0), endE1(E1-1);
+        size_t startE1(0), endE1(E1-1);
         hoNDArray<T> filterE1(E1);
         if ( (workOrder3DT.start_E1_<0 || workOrder3DT.end_E1_<0) )
         {
             GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilterForRef(E1, 0, E1-1, 
-                filterE1, filter_ref_type_, filter_ref_sigma_, std::ceil(filter_ref_width_*E1)));
+                filterE1, filter_ref_type_, filter_ref_sigma_, (size_t)std::ceil(filter_ref_width_*E1)));
         }
         else
         {
             GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilterForRef(E1, workOrder3DT.start_E1_, workOrder3DT.end_E1_, 
-                filterE1, filter_ref_type_, filter_ref_sigma_, std::ceil(filter_ref_width_*E1)));
+                filterE1, filter_ref_type_, filter_ref_sigma_, (size_t)std::ceil(filter_ref_width_*E1)));
 
             startE1 = workOrder3DT.start_E1_;
             endE1 = workOrder3DT.end_E1_;
@@ -1974,17 +2026,17 @@ bool gtPlusReconWorker3DT<T>::performPartialFourierPOCSRecon(WorkOrderType& work
 
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, filterE1, "filterE1_POCS");
 
-        int startE2(0), endE2(E2-1);
+        size_t startE2(0), endE2(E2-1);
         hoNDArray<T> filterE2(E1);
         if ( (workOrder3DT.start_E2_<0 || workOrder3DT.end_E2_<0) )
         {
             GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilterForRef(E2, 0, E2-1, 
-                filterE2, filter_ref_type_, filter_ref_sigma_, std::ceil(filter_ref_width_*E2)));
+                filterE2, filter_ref_type_, filter_ref_sigma_, (size_t)std::ceil(filter_ref_width_*E2)));
         }
         else
         {
             GADGET_CHECK_RETURN_FALSE(gtPlus_util_.generateSymmetricFilterForRef(E2, workOrder3DT.start_E2_, workOrder3DT.end_E2_, 
-                filterE2, filter_ref_type_, filter_ref_sigma_, std::ceil(filter_ref_width_*E2)));
+                filterE2, filter_ref_type_, filter_ref_sigma_, (size_t)std::ceil(filter_ref_width_*E2)));
 
             startE2 = workOrder3DT.start_E2_;
             endE2 = workOrder3DT.end_E2_;
@@ -2011,10 +2063,10 @@ bool gtPlusReconWorker3DT<T>::performPartialFourierPOCSRecon(WorkOrderType& work
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, buffer3DT_partial_fourier, "POCS_afterFiltered_complexIm");
 
         // get the complex image phase for the filtered kspace
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::absolute(buffer3DT_partial_fourier, mag));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::addEpsilon(mag));
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::abs(buffer3DT_partial_fourier, mag));
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::addEpsilon(mag));
         GADGET_CHECK_RETURN_FALSE(magComplex.copyFrom(mag));
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::divide(buffer3DT_partial_fourier, magComplex, buffer3DT));
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::divide(buffer3DT_partial_fourier, magComplex, buffer3DT));
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, buffer3DT, "POCS_afterFiltered_complexIm_phase");
 
         // complex images, initialized as not filtered complex image
@@ -2028,9 +2080,9 @@ bool gtPlusReconWorker3DT<T>::performPartialFourierPOCSRecon(WorkOrderType& work
         size_t ii;
         for ( ii=0; ii<workOrder3DT.partialFourier_POCS_iters_; ii++ )
         {
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::absolute(complexImPOCS, mag));
+            GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::abs(complexImPOCS, mag));
             GADGET_CHECK_RETURN_FALSE(magComplex.copyFrom(mag));
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::multiply(magComplex, buffer3DT, complexImPOCS));
+            GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::multiply(magComplex, buffer3DT, complexImPOCS));
             GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, complexImPOCS, "POCS_complexImPOCS");
 
             // go back to kspace
@@ -2049,7 +2101,7 @@ bool gtPlusReconWorker3DT<T>::performPartialFourierPOCSRecon(WorkOrderType& work
             GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, complexImPOCS, "POCS_kspaceIter_copyOri_complexImPOCS");
 
             // compute threshold to stop the iteration
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::subtract(complexImPOCS, complexIm, buffer3DT_partial_fourier));
+            GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::subtract(complexImPOCS, complexIm, buffer3DT_partial_fourier));
             typename realType<T>::Type diff, prev;
             Gadgetron::norm2(complexIm, prev);
             Gadgetron::norm2(buffer3DT_partial_fourier, diff);
@@ -2117,21 +2169,21 @@ bool gtPlusReconWorker3DT<T>::performPartialFourierFengHuangRecon(WorkOrderType&
 
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, kspace, "kspace_before_FengHuang");
 
-        int startRO(0), endRO(RO-1);
+        size_t startRO(0), endRO(RO-1);
         if ( workOrder3DT.start_RO_>=0 && workOrder3DT.end_RO_<RO )
         {
             startRO = workOrder3DT.start_RO_;
             endRO = workOrder3DT.end_RO_;
         }
 
-        int startE1(0), endE1(E1-1);
+        size_t startE1(0), endE1(E1-1);
         if ( workOrder3DT.start_E1_>=0 && workOrder3DT.end_E1_<E1 )
         {
             startE1 = workOrder3DT.start_E1_;
             endE1 = workOrder3DT.end_E1_;
         }
 
-        int startE2(0), endE2(E2-1);
+        size_t startE2(0), endE2(E2-1);
         if ( workOrder3DT.start_E2_>=0 && workOrder3DT.end_E2_<E2 )
         {
             startE2 = workOrder3DT.start_E2_;
@@ -2173,8 +2225,8 @@ bool gtPlusReconWorker3DT<T>::performPartialFourierFengHuangRecon(WorkOrderType&
         size[3] = CHA;
         size[4] = N;
 
-        GADGET_CHECK_RETURN_FALSE(Gadgetron::cropUpTo10DArray(buffer3DT, src, start, size));
-        GADGET_CHECK_RETURN_FALSE(cropUpTo10DArray(kspace, dst, start, size));
+        GADGET_CHECK_RETURN_FALSE(Gadgetron::cropUpTo11DArray(buffer3DT, src, start, size));
+        GADGET_CHECK_RETURN_FALSE(cropUpTo11DArray(kspace, dst, start, size));
 
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, src, "src_FengHuang");
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, dst, "dst_FengHuang");
@@ -2202,16 +2254,16 @@ bool gtPlusReconWorker3DT<T>::performPartialFourierFengHuangRecon(WorkOrderType&
         if ( workOrder3DT.partialFourier_FengHuang_transitBand_==0 )
         {
             GADGET_CHECK_PERFORM(performTiming_, gt_timer1_.start("performReconFangHuang"));
-            GADGET_CHECK_RETURN_FALSE(this->performReconFangHuang(workOrder3DT, buffer3DT, kspace, startRO, endRO, startE1, endE1, startE2, endE2, kernel));
+            GADGET_CHECK_RETURN_FALSE(this->performReconFangHuang(workOrder3DT, buffer3DT, kspace, (int)startRO, (int)endRO, (int)startE1, (int)endE1, (int)startE2, (int)endE2, kernel));
             GADGET_CHECK_PERFORM(performTiming_, gt_timer1_.stop());
         }
         else
         {
             GADGET_CHECK_PERFORM(performTiming_, gt_timer1_.start("performReconFangHuang with transition band"));
 
-            int tb =  (int)workOrder3DT.partialFourier_FengHuang_transitBand_;
+            long long tb =  (long long)workOrder3DT.partialFourier_FengHuang_transitBand_;
 
-            int sRO(startRO), eRO(endRO), sE1(startE1), eE1(endE1), sE2(startE2), eE2(endE2);
+            long long sRO(startRO), eRO(endRO), sE1(startE1), eE1(endE1), sE2(startE2), eE2(endE2);
 
             if ( startRO > 0 )
             {
@@ -2269,7 +2321,7 @@ bool gtPlusReconWorker3DT<T>::performPartialFourierFengHuangRecon(WorkOrderType&
 
             hoNDArrayMemoryManaged<T> buffer3DT_partial_fourier_kspaceIter(kspace.get_dimensions(), gtPlus_mem_manager_);
             GADGET_CHECK_RETURN_FALSE(this->performReconFangHuang(workOrder3DT, buffer3DT, 
-                    buffer3DT_partial_fourier_kspaceIter, startRO, endRO, startE1, endE1, startE2, endE2, kernel));
+                    buffer3DT_partial_fourier_kspaceIter, (int)startRO, (int)endRO, (int)startE1, (int)endE1, (int)startE2, (int)endE2, kernel));
 
             GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, buffer3DT_partial_fourier_kspaceIter, "kspace_FengHuang_recon");
 
@@ -2302,40 +2354,40 @@ bool gtPlusReconWorker3DT<T>::calibFengHuang(WorkOrderType& workOrder3DT, const
     {
         GADGET_CHECK_RETURN_FALSE(src.dimensions_equal(&dst));
 
-        size_t RO = src.get_size(0);
-        size_t E1 = src.get_size(1);
-        size_t E2 = src.get_size(2);
-        size_t srcCHA = src.get_size(3);
-        size_t N = src.get_size(4);
+        long long RO = (long long)src.get_size(0);
+        long long E1 = (long long)src.get_size(1);
+        long long E2 = (long long)src.get_size(2);
+        long long srcCHA = (long long)src.get_size(3);
+        long long N = (long long)src.get_size(4);
 
-        size_t kx = workOrder3DT.partialFourier_FengHuang_kSize_RO_;
-        size_t ky = workOrder3DT.partialFourier_FengHuang_kSize_E1_;
-        size_t kz = workOrder3DT.partialFourier_FengHuang_kSize_E2_;
+        long long kx = (long long)workOrder3DT.partialFourier_FengHuang_kSize_RO_;
+        long long ky = (long long)workOrder3DT.partialFourier_FengHuang_kSize_E1_;
+        long long kz = (long long)workOrder3DT.partialFourier_FengHuang_kSize_E2_;
 
         if ( kx%2 == 0 ) kx++;
         if ( ky%2 == 0 ) ky++;
         if ( kz%2 == 0 ) kz++;
 
-        int halfKx = (int)kx/2;
-        int halfKy = (int)ky/2;
-        int halfKz = (int)kz/2;
+        long long halfKx = (long long)kx/2;
+        long long halfKy = (long long)ky/2;
+        long long halfKz = (long long)kz/2;
 
         // the cross-channel kernel is not estimated
         kernel.createArray(kx, ky, kz, srcCHA, 1, N);
 
-        int ii=0;
-        int num = N*srcCHA;
+        long long ii=0;
+        long long num = N*srcCHA;
 
-        int startRO = halfKx;
-        int endRO = RO - halfKx - 1;
+        long long startRO = halfKx;
+        long long endRO = RO - halfKx - 1;
 
-        int startE1 = halfKy;
-        int endE1 = E1 - halfKy - 1;
+        long long startE1 = halfKy;
+        long long endE1 = E1 - halfKy - 1;
 
-        int startE2 = halfKz;
-        int endE2 = E2 - halfKz - 1;
+        long long startE2 = halfKz;
+        long long endE2 = E2 - halfKz - 1;
 
-        int rowA, colA, rowB, colB;
+        long long rowA, colA, rowB, colB;
         rowA = (endE2-startE2+1)*(endE1-startE1+1)*(endRO-startRO+1); 
         colA = kx*ky*kz;
 
@@ -2349,9 +2401,9 @@ bool gtPlusReconWorker3DT<T>::calibFengHuang(WorkOrderType& workOrder3DT, const
         #endif // USE_OMP
 
         #ifdef GCC_OLD_FLAG
-            #pragma omp parallel default(none) private(ii) shared(num, RO, E1, E2, srcCHA, N, kx, ky, kz, rowA, colA, rowB, colB, startRO, endRO, startE1, endE1, startE2, endE2, halfKx, halfKy, halfKz, thresReg) if ( num > 1 ) num_threads( (num<16 ? num : 16) )
+            #pragma omp parallel default(none) private(ii) shared(num, RO, E1, E2, srcCHA, N, kx, ky, kz, rowA, colA, rowB, colB, startRO, endRO, startE1, endE1, startE2, endE2, halfKx, halfKy, halfKz, thresReg) if ( num > 1 ) num_threads( (int)(num<16 ? num : 16) )
         #else
-            #pragma omp parallel default(none) private(ii) shared(num, RO, E1, E2, srcCHA, N, kx, ky, kz, src, dst, kernel, rowA, colA, rowB, colB, startRO, endRO, startE1, endE1, startE2, endE2, halfKx, halfKy, halfKz, thresReg) if ( num > 1 ) num_threads( (num<16 ? num : 16) )
+            #pragma omp parallel default(none) private(ii) shared(num, RO, E1, E2, srcCHA, N, kx, ky, kz, src, dst, kernel, rowA, colA, rowB, colB, startRO, endRO, startE1, endE1, startE2, endE2, halfKx, halfKy, halfKz, thresReg) if ( num > 1 ) num_threads( (int)(num<16 ? num : 16) )
         #endif
         {
            /* hoNDArrayMemoryManaged<T> A_mem(colA, rowA, gtPlus_mem_manager_);
@@ -2372,8 +2424,8 @@ bool gtPlusReconWorker3DT<T>::calibFengHuang(WorkOrderType& workOrder3DT, const
                 ho3DArray<T> src3D(RO, E1, E2, const_cast<T*>(src.begin())+ii*RO*E1*E2);
                 ho3DArray<T> dst3D(RO, E1, E2, const_cast<T*>(dst.begin())+ii*RO*E1*E2);
 
-                size_t ro, e1, e2, row(0);
-                int x, y, z;
+                long long ro, e1, e2, row(0);
+                long long x, y, z;
 
                 for ( e2=startE2; e2<=endE2; e2++ )
                 {
@@ -2430,54 +2482,54 @@ bool gtPlusReconWorker3DT<T>::performReconFangHuang(WorkOrderType& workOrder3DT,
     {
         GADGET_CHECK_RETURN_FALSE(kspaceConj.dimensions_equal(&kspace));
 
-        short RO = (short)kspace.get_size(0);
-        short E1 = (short)kspace.get_size(1);
-        short E2 = (short)kspace.get_size(2);
-        size_t CHA = kspace.get_size(3);
-        size_t N = kspace.get_size(4);
+        long long RO = (long long)kspace.get_size(0);
+        long long E1 = (long long)kspace.get_size(1);
+        long long E2 = (long long)kspace.get_size(2);
+        long long CHA = (long long)kspace.get_size(3);
+        long long N = (long long)kspace.get_size(4);
 
-        size_t kx = kernel.get_size(0);
-        size_t ky = kernel.get_size(1);
-        size_t kz = kernel.get_size(2);
+        long long kx = (long long)kernel.get_size(0);
+        long long ky = (long long)kernel.get_size(1);
+        long long kz = (long long)kernel.get_size(2);
 
-        int halfKx = kx/2;
-        int halfKy = ky/2;
-        int halfKz = kz/2;
+        long long halfKx = kx/2;
+        long long halfKy = ky/2;
+        long long halfKz = kz/2;
 
-        size_t kerN = kernel.get_size(5);
+        long long kerN = kernel.get_size(5);
         GADGET_CHECK_RETURN_FALSE( (kerN==1) || (kerN==N) );
 
-        int num = CHA*N;
+        long long num = CHA*N;
 
         long long rowD = RO*E1*E2 - ( (endE2-startE2+1) * (endE1-startE1+1) * (endRO-startRO+1) );
-        int colD = kx*ky*kz;
+        long long colD = kx*ky*kz;
 
-        ho2DArray<short> coeffX(colD, rowD);
-        short* pCx = coeffX.begin();
+        ho2DArray<long long> coeffX(colD, rowD);
+        long long* pCx = coeffX.begin();
 
-        ho2DArray<short> coeffY(colD, rowD);
-        short* pCy = coeffY.begin();
+        ho2DArray<long long> coeffY(colD, rowD);
+        long long* pCy = coeffY.begin();
 
-        ho2DArray<short> coeffZ(colD, rowD);
-        short* pCz = coeffZ.begin();
+        ho2DArray<long long> coeffZ(colD, rowD);
+        long long* pCz = coeffZ.begin();
 
-        short ro, e1, e2;
+        long long ro, e1, e2;
         long long row(0);
-        int x, y, z, dx, dy, dz;
+        long long x, y, z;
 
-        ho2DArray<short> rowInd(3, rowD);
-        short* pRowInd = rowInd.begin();
+        ho2DArray<long long> rowInd(3, rowD);
+        long long* pRowInd = rowInd.begin();
 
-        hoNDArray<short> offsetX(colD);
-        short* pOffsetX = offsetX.begin();
+        hoNDArray<long long> offsetX(colD);
+        long long* pOffsetX = offsetX.begin();
 
-        hoNDArray<short> offsetY(colD);
-        short* pOffsetY = offsetY.begin();
+        hoNDArray<long long> offsetY(colD);
+        long long* pOffsetY = offsetY.begin();
 
-        hoNDArray<short> offsetZ(colD);
-        short* pOffsetZ = offsetZ.begin();
+        hoNDArray<long long> offsetZ(colD);
+        long long* pOffsetZ = offsetZ.begin();
 
-        int colInd(0);
+        long long colInd(0);
         for ( z=-halfKz; z<=halfKz; z++ )
         {
             for ( y=-halfKy; y<=halfKy; y++ )
@@ -2496,7 +2548,7 @@ bool gtPlusReconWorker3DT<T>::performReconFangHuang(WorkOrderType& workOrder3DT,
 
         GADGET_CHECK_PERFORM(performTiming_, gt_timer2_.start("performReconFangHuang - compute coeff array - internal"));
 
-        short* pRowIndCurr;
+        long long* pRowIndCurr;
         for ( e2=0; e2<E2; e2++ )
         {
             for ( e1=0; e1<E1; e1++ )
@@ -2624,8 +2676,8 @@ bool gtPlusReconWorker3DT<T>::performReconFangHuang(WorkOrderType& workOrder3DT,
         }*/
         GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.stop());
 
-        int ii;
-        int numOfThreads = ((num>4) ? 4 : num);
+        long long ii;
+        int numOfThreads = (int)((num>4) ? 4 : num);
         #ifdef GCC_OLD_FLAG
             #pragma omp parallel default(none) private(ii) shared(num, RO, E1, E2, CHA, N, kerN, rowD, colD, coeffX, coeffY, coeffZ, pCx, pCy, pCz) if ( num > 1 ) num_threads( numOfThreads ) 
         #else
@@ -2640,20 +2692,23 @@ bool gtPlusReconWorker3DT<T>::performReconFangHuang(WorkOrderType& workOrder3DT,
             hoMatrix<T> K(colD, 1);
             hoMatrix<T> R(rowD, 1);
 
+            Gadgetron::clear(D);
+            Gadgetron::clear(K);
+            Gadgetron::clear(R);
+
             #pragma omp for
             for ( ii=0; ii<num; ii ++ )
             {
                 ho3DArray<T> src3D(RO, E1, E2, const_cast<T*>(kspaceConj.begin())+ii*RO*E1*E2);
                 ho3DArray<T> dst3D(RO, E1, E2, kspace.begin()+ii*RO*E1*E2);
 
-                size_t ro, e1, e2;
-                long long row, col;
+                long long row;
 
                 GADGET_CHECK_PERFORM(performTiming_, gt_timer2_.start("fill data matrix ... "));
                 #pragma omp parallel for private(row) shared(colD, rowD, D, src3D, pD)
                 for ( row=0; row<rowD; row++ )
                 {
-                    for ( int col=0; col<colD; col++ )
+                    for ( long long col=0; col<colD; col++ )
                     {
                         long long offset = col + row*colD;
                         pD[offset] = src3D(pCx[offset], pCy[offset], pCz[offset]);
@@ -2663,10 +2718,10 @@ bool gtPlusReconWorker3DT<T>::performReconFangHuang(WorkOrderType& workOrder3DT,
 
                 if ( kerN == 1 )
                 {
-                    int ind = ii;
-                    int currS = ind/(CHA*N);
+                    long long ind = ii;
+                    long long currS = ind/(CHA*N);
                     ind %= CHA*N;
-                    int currN = ind/CHA;
+                    long long currN = ind/CHA;
                     ind %= CHA;
                     memcpy(K.begin(), kernel.begin()+(ind+currS*CHA)*colD, sizeof(T)*colD);
                 }
@@ -2677,7 +2732,7 @@ bool gtPlusReconWorker3DT<T>::performReconFangHuang(WorkOrderType& workOrder3DT,
 
                 // R = D*K
                 GADGET_CHECK_PERFORM(performTiming_, gt_timer2_.start("matrix multiplication ... "));
-                Gadgetron::GeneralMatrixProduct_gemm(R, D, false, K, false);
+                Gadgetron::gemm(R, D, false, K, false);
                 GADGET_CHECK_PERFORM(performTiming_, gt_timer2_.stop());
 
                 size_t colCenter = colD/2;
@@ -2723,7 +2778,7 @@ estimateJobSize(gtPlusReconWorkOrder<T>* workOrder3DT, size_t maxNumOfBytesPerJo
         size_t dstCHA = workOrder3DT->kernelIm_->get_size(4);
 
         size_t totalJobNum = RO;
-        jobSize = std::ceil( (double)(totalJobNum+overlapBetweenJobs*(nodeN-1))/(double)nodeN );
+        jobSize = (size_t)std::ceil( (double)(totalJobNum+overlapBetweenJobs*(nodeN-1))/(double)nodeN );
 
         size_t numOfBytesPerJob = sizeof(T)*( E1*E2*srcCHA*dstCHA*jobSize + 2*E1*E2*srcCHA*jobSize );
 
@@ -2731,7 +2786,7 @@ estimateJobSize(gtPlusReconWorkOrder<T>* workOrder3DT, size_t maxNumOfBytesPerJo
         while ( numOfBytesPerJob > maxNumOfBytesPerJob-64.0*1024*1024 )
         {
             nodeN *= 2;
-            jobSize = std::ceil( (double)(totalJobNum+overlapBetweenJobs*(nodeN-1))/(double)nodeN );
+            jobSize = (size_t)std::ceil( (double)(totalJobNum+overlapBetweenJobs*(nodeN-1))/(double)nodeN );
             numOfBytesPerJob = sizeof(T)*( E1*E2*srcCHA*dstCHA*jobSize + 2*E1*E2*srcCHA*jobSize );
         }
 
diff --git a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker3DTGRAPPA.h b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker3DTGRAPPA.h
index 00a38bd..2ac828f 100644
--- a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker3DTGRAPPA.h
+++ b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker3DTGRAPPA.h
@@ -5,7 +5,7 @@
 
 #pragma once
 
-#include "ismrmrd.h"
+#include "ismrmrd/ismrmrd.h"
 #include "GadgetronTimer.h"
 #include "gtPlusISMRMRDReconUtil.h"
 #include "gtPlusISMRMRDReconWorker3DT.h"
@@ -19,6 +19,8 @@ class gtPlusReconWorker3DTGRAPPA : public gtPlusReconWorker3DT<T>
 public:
 
     typedef gtPlusReconWorker3DT<T> BaseClass;
+    typedef typename BaseClass::value_type value_type;
+
     typedef gtPlusReconWorkOrder3DT<T> WorkOrderType;
 
     gtPlusReconWorker3DTGRAPPA() : BaseClass() {}
@@ -39,9 +41,11 @@ public:
     using BaseClass::gt_timer2_;
     using BaseClass::gt_timer3_;
     using BaseClass::performTiming_;
+    using BaseClass::verbose_;
     using BaseClass::gt_exporter_;
     using BaseClass::debugFolder_;
     using BaseClass::gtPlus_util_;
+    using BaseClass::gtPlus_util_cplx_;
     using BaseClass::gtPlus_mem_manager_;
 
     using BaseClass::ref_src_;
@@ -139,10 +143,10 @@ performCalibPrep(const hoNDArray<T>& ref_src, const hoNDArray<T>& ref_dst, WorkO
 
     std::vector<int> kE1, oE1;
     bool fitItself = true;
-    GADGET_CHECK_RETURN_FALSE(grappa_.kerPattern(kE1, oE1, workOrder3DT->acceFactorE1_, workOrder3DT->grappa_kSize_E1_, fitItself));
+    GADGET_CHECK_RETURN_FALSE(grappa_.kerPattern(kE1, oE1, (int)workOrder3DT->acceFactorE1_, workOrder3DT->grappa_kSize_E1_, fitItself));
 
     std::vector<int> kE2, oE2;
-    GADGET_CHECK_RETURN_FALSE(grappa_.kerPattern(kE2, oE2, workOrder3DT->acceFactorE2_, workOrder3DT->grappa_kSize_E2_, fitItself));
+    GADGET_CHECK_RETURN_FALSE(grappa_.kerPattern(kE2, oE2, (int)workOrder3DT->acceFactorE2_, workOrder3DT->grappa_kSize_E2_, fitItself));
 
     size_t kRO = workOrder3DT->grappa_kSize_RO_;
     size_t kNE1 = workOrder3DT->grappa_kSize_E1_;
@@ -250,10 +254,10 @@ performCalibImpl(const hoNDArray<T>& ref_src, const hoNDArray<T>& ref_dst, WorkO
 
     std::vector<int> kE1, oE1;
     bool fitItself = true;
-    GADGET_CHECK_RETURN_FALSE(grappa_.kerPattern(kE1, oE1, workOrder3DT->acceFactorE1_, workOrder3DT->grappa_kSize_E1_, fitItself));
+    GADGET_CHECK_RETURN_FALSE(grappa_.kerPattern(kE1, oE1, (size_t)workOrder3DT->acceFactorE1_, workOrder3DT->grappa_kSize_E1_, fitItself));
 
     std::vector<int> kE2, oE2;
-    GADGET_CHECK_RETURN_FALSE(grappa_.kerPattern(kE2, oE2, workOrder3DT->acceFactorE2_, workOrder3DT->grappa_kSize_E2_, fitItself));
+    GADGET_CHECK_RETURN_FALSE(grappa_.kerPattern(kE2, oE2, (size_t)workOrder3DT->acceFactorE2_, workOrder3DT->grappa_kSize_E2_, fitItself));
 
     size_t kRO = workOrder3DT->grappa_kSize_RO_;
     size_t kNE1 = workOrder3DT->grappa_kSize_E1_;
@@ -265,8 +269,12 @@ performCalibImpl(const hoNDArray<T>& ref_src, const hoNDArray<T>& ref_dst, WorkO
     ho4DArray<T> acsSrc(refRO, refE1, refE2, srcCHA, const_cast<T*>(ref_src.begin()+usedN*refRO*refE1*refE2*srcCHA));
     ho4DArray<T> acsDst(refRO, refE1, refE2, dstCHA, const_cast<T*>(ref_dst.begin()+usedN*refRO*refE1*refE2*dstCHA));
 
-    GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, acsSrc, "acsSrc");
-    GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, acsDst, "acsDst");
+    std::ostringstream ostr;
+    ostr << "_n_" << usedN;
+    std::string suffix = ostr.str();
+
+    GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, acsSrc, "acsSrc"+suffix);
+    GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, acsDst, "acsDst"+suffix);
 
     grappa_.calib_use_gpu_  = workOrder3DT->grappa_use_gpu_;
 
@@ -275,7 +283,7 @@ performCalibImpl(const hoNDArray<T>& ref_src, const hoNDArray<T>& ref_dst, WorkO
     grappa_.calib3D(acsSrc, acsDst, workOrder3DT->grappa_reg_lamda_, workOrder3DT->grappa_calib_over_determine_ratio_, kRO, kE1, kE2, oE1, oE2, ker);
     GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.stop());
 
-    GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, ker, "ker");
+    GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, ker, "ker"+suffix);
 
     size_t jobN;
     bool splitJobs = this->splitJob(workOrder3DT, jobN);
@@ -294,12 +302,12 @@ performCalibImpl(const hoNDArray<T>& ref_src, const hoNDArray<T>& ref_dst, WorkO
             hoNDArray<T> gFactor(RO, E1, E2, workOrder3DT->gfactor_.begin()+usedN*RO*E1*E2);
 
             this->unmixCoeff(kIm, coilMap, unmixC, gFactor);
-            GADGET_CHECK_RETURN_FALSE(Gadgetron::scal(1.0/workOrder3DT->acceFactorE1_/workOrder3DT->acceFactorE2_, gFactor));
+            GADGET_CHECK_EXCEPTION_RETURN_FALSE(Gadgetron::scal( (value_type)(1.0/workOrder3DT->acceFactorE1_/workOrder3DT->acceFactorE2_), gFactor));
 
             memcpy(workOrder3DT->unmixingCoeffIm_->begin()+usedN*RO*E1*E2*srcCHA, unmixC.begin(), unmixC.get_number_of_bytes());
 
-            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, unmixC, "unmixC");
-            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, gFactor, "gFactor");
+            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, unmixC, "unmixC"+suffix);
+            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, gFactor, "gFactor"+suffix);
         }
     }
     else
@@ -327,7 +335,7 @@ performCalibImpl(const hoNDArray<T>& ref_src, const hoNDArray<T>& ref_dst, WorkO
         if ( !debugFolder_.empty() )
         {
             hoNDArray<T> kImROACha(convKE1, convKE2, RO, srcCHA, kIm.begin());
-            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, kImROACha, "kImROACha");
+            GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, kImROACha, "kImROACha"+suffix);
         }
     }
 
@@ -367,6 +375,20 @@ performUnwrapping(gtPlusReconWorkOrder3DT<T>* workOrder3DT, const hoNDArray<T>&
             aliasedIm.create(data_dst.get_dimensions());
             Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->ifft3c(data_dst, aliasedIm);
         }
+
+        typename realType<T>::Type fftCompensationRatio = (typename realType<T>::Type)(1.0/std::sqrt( (double)workOrder3DT->acceFactorE1_ * (double)workOrder3DT->acceFactorE2_ ));
+        Gadgetron::scal( fftCompensationRatio, aliasedIm);
+
+        // if the image data is scaled and ref lines are going to be filled back to the data, 
+        // the reference lines should be scaled too
+        if ( workOrder3DT->CalibMode_ == ISMRMRD_embedded )
+        {
+            if ( workOrder3DT->embedded_ref_fillback_ )
+            {
+                Gadgetron::scal( fftCompensationRatio, workOrder3DT->ref_);
+            }
+        }
+
         GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.stop());
 
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, aliasedIm, "aliasedIm");
@@ -387,7 +409,6 @@ performUnwrapping(gtPlusReconWorkOrder3DT<T>* workOrder3DT, const hoNDArray<T>&
                 size_t kE2 = workOrder3DT->kernelIm_->get_size(1);
                 size_t kRO = workOrder3DT->kernelIm_->get_size(2);
 
-                size_t usedN;
                 if ( (refN<N) || (refN==1) )
                 {
                     hoNDArray<T> kImPermuted(kE1, kE2, RO, srcCHA, dstCHA, workOrder3DT->kernelIm_->begin());
@@ -443,7 +464,7 @@ performUnwrapping(gtPlusReconWorkOrder3DT<T>* workOrder3DT, const hoNDArray<T>&
                             GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.stop());
 
                             GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("imageDomainKernelE1E2RO ... "));
-                            GADGET_CHECK_RETURN_FALSE(grappa_.imageDomainKernelE1E2RO(kImPermutedJob, E1, E2, kImPermutedZeroFilledJob));
+                            GADGET_CHECK_RETURN_FALSE(grappa_.imageDomainKernelE1E2RO(kImPermutedJob, (int)E1, (int)E2, kImPermutedZeroFilledJob));
                             GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.stop());
 
                             GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("cropOver3rdDimension aliased images ... "));
@@ -469,7 +490,7 @@ performUnwrapping(gtPlusReconWorkOrder3DT<T>* workOrder3DT, const hoNDArray<T>&
                                 GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.stop());
 
                                 GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("imageDomainKernelE1E2RO ... "));
-                                GADGET_CHECK_RETURN_FALSE(grappa_.imageDomainKernelE1E2RO(kImPermutedJob, E1, E2, kImPermutedZeroFilledJob));
+                                GADGET_CHECK_RETURN_FALSE(grappa_.imageDomainKernelE1E2RO(kImPermutedJob, (int)E1, (int)E2, kImPermutedZeroFilledJob));
                                 GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.stop());
 
                                 hoNDArray<T> aliasedImPermutedN(E1, E2, RO, srcCHA, aliasedImPermuted.begin()+n*E1*E2*RO*srcCHA);
@@ -505,7 +526,6 @@ performUnwrapping(gtPlusReconWorkOrder3DT<T>* workOrder3DT, const hoNDArray<T>&
             }
             else
             {
-                size_t usedN;
                 if ( (refN<N) || (refN==1) )
                 {
                     hoNDArray<T> kIm(RO, E1, E2, srcCHA, dstCHA, workOrder3DT->kernelIm_->begin());
diff --git a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker3DTL1SPIRITNCG.h b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker3DTL1SPIRITNCG.h
index 49b3320..9c7bc2e 100644
--- a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker3DTL1SPIRITNCG.h
+++ b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker3DTL1SPIRITNCG.h
@@ -25,6 +25,7 @@ public:
 
     typedef gtPlusReconWorker3DTSPIRIT<T> BaseClass;
     typedef gtPlusReconWorkOrder3DT<T> WorkOrderType;
+    typedef typename BaseClass::value_type value_type;
 
     gtPlusReconWorker3DTL1SPIRITNCG() : BaseClass() {}
     virtual ~gtPlusReconWorker3DTL1SPIRITNCG() {}
@@ -42,9 +43,11 @@ public:
     using BaseClass::gt_timer2_;
     using BaseClass::gt_timer3_;
     using BaseClass::performTiming_;
+    using BaseClass::verbose_;
     using BaseClass::gt_exporter_;
     using BaseClass::debugFolder_;
     using BaseClass::gtPlus_util_;
+    using BaseClass::gtPlus_util_cplx_;
     using BaseClass::gtPlus_mem_manager_;
 
 //protected::
@@ -272,7 +275,7 @@ performUnwarppingImplROPermuted(gtPlusReconWorkOrder<T>* workOrder3DT, hoNDArray
                 Gadgetron::clear(kerImE1E2RO);
             }
 
-            GADGET_CHECK_RETURN_FALSE(spirit_.imageDomainKernelE1E2RO(kernel, E1, E2, kerImE1E2RO));
+            GADGET_CHECK_RETURN_FALSE(spirit_.imageDomainKernelE1E2RO(kernel, (int)E1, (int)E2, kerImE1E2RO));
             kerIm = &kerImE1E2RO;
         }
 
@@ -299,14 +302,14 @@ performUnwarppingImplROPermuted(gtPlusReconWorkOrder<T>* workOrder3DT, hoNDArray
             {
                 typename realType<T>::Type scaleFactor = 1.0;
                 Gadgetron::norm2(kspace, scaleFactor);
-                scaleFactor /= (RO*std::sqrt(double(srcCHA)));
+                scaleFactor /= (value_type)(RO*std::sqrt(double(srcCHA)));
 
                 workOrder3DT->spirit_ncg_scale_factor_ = scaleFactor;
             }
 
             // apply the scale
-            Gadgetron::scal(T(1.0/workOrder3DT->spirit_ncg_scale_factor_), kspaceLinear);
-            Gadgetron::scal(T(1.0/workOrder3DT->spirit_ncg_scale_factor_), kspace);
+            Gadgetron::scal( (value_type)(1.0/workOrder3DT->spirit_ncg_scale_factor_), kspaceLinear);
+            Gadgetron::scal( (value_type)(1.0/workOrder3DT->spirit_ncg_scale_factor_), kspace);
 
             boost::shared_ptr< hoNDArray<T> > coilMapN;
             if ( workOrder3DT->coilMap_ 
@@ -346,9 +349,9 @@ performUnwarppingImplROPermuted(gtPlusReconWorkOrder<T>* workOrder3DT, hoNDArray
                     wavNullSpace3DOperator.setMemoryManager(gtPlus_mem_manager_);
                     wavNullSpace3DOperator.setAcquiredPoints(acq);
 
-                    wavNullSpace3DOperator.scale_factor_first_dimension_ = workOrder3DT->spirit_E1_enhancement_ratio_;
-                    wavNullSpace3DOperator.scale_factor_second_dimension_ = workOrder3DT->spirit_E2_enhancement_ratio_;
-                    wavNullSpace3DOperator.scale_factor_third_dimension_ = workOrder3DT->spirit_RO_enhancement_ratio_;
+                    wavNullSpace3DOperator.scale_factor_first_dimension_ = (value_type)workOrder3DT->spirit_E1_enhancement_ratio_;
+                    wavNullSpace3DOperator.scale_factor_second_dimension_ = (value_type)workOrder3DT->spirit_E2_enhancement_ratio_;
+                    wavNullSpace3DOperator.scale_factor_third_dimension_ = (value_type)workOrder3DT->spirit_RO_enhancement_ratio_;
 
                     if ( workOrder3DT->spirit_use_coil_sen_map_ && coilMapN )
                     {
@@ -356,8 +359,8 @@ performUnwarppingImplROPermuted(gtPlusReconWorkOrder<T>* workOrder3DT, hoNDArray
                     }
 
                     // set operators
-                    ncgsolver.add(spirit, T(workOrder3DT->spirit_parallel_imaging_lamda_) );
-                    ncgsolver.add(wavNullSpace3DOperator, T(workOrder3DT->spirit_image_reg_lamda_) );
+                    ncgsolver.add(spirit, (value_type)(workOrder3DT->spirit_parallel_imaging_lamda_) );
+                    ncgsolver.add(wavNullSpace3DOperator, (value_type)(workOrder3DT->spirit_image_reg_lamda_) );
 
                     GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("NCG spirit solver for 3DT ... "));
                     ncgsolver.solve(b, res);
@@ -385,18 +388,18 @@ performUnwarppingImplROPermuted(gtPlusReconWorkOrder<T>* workOrder3DT, hoNDArray
                     wavNoNullSpace3DOperator.setMemoryManager(gtPlus_mem_manager_);
                     wavNoNullSpace3DOperator.setAcquiredPoints(acq);
 
-                    wavNoNullSpace3DOperator.scale_factor_first_dimension_ = workOrder3DT->spirit_E1_enhancement_ratio_;
-                    wavNoNullSpace3DOperator.scale_factor_second_dimension_ = workOrder3DT->spirit_E2_enhancement_ratio_;
-                    wavNoNullSpace3DOperator.scale_factor_third_dimension_ = workOrder3DT->spirit_RO_enhancement_ratio_;
+                    wavNoNullSpace3DOperator.scale_factor_first_dimension_ = (value_type)workOrder3DT->spirit_E1_enhancement_ratio_;
+                    wavNoNullSpace3DOperator.scale_factor_second_dimension_ = (value_type)workOrder3DT->spirit_E2_enhancement_ratio_;
+                    wavNoNullSpace3DOperator.scale_factor_third_dimension_ = (value_type)workOrder3DT->spirit_RO_enhancement_ratio_;
 
                     if ( workOrder3DT->spirit_use_coil_sen_map_ && coilMapN )
                     {
                         wavNoNullSpace3DOperator.setCoilSenMap(coilMapN);
                     }
 
-                    ncgsolver.add(spirit_noNullSpace, T(workOrder3DT->spirit_parallel_imaging_lamda_) );
-                    ncgsolver.add(wavNoNullSpace3DOperator, T(workOrder3DT->spirit_image_reg_lamda_) );
-                    ncgsolver.add(dataOper, T(workOrder3DT->spirit_data_fidelity_lamda_) );
+                    ncgsolver.add(spirit_noNullSpace, (value_type)(workOrder3DT->spirit_parallel_imaging_lamda_) );
+                    ncgsolver.add(wavNoNullSpace3DOperator, (value_type)(workOrder3DT->spirit_image_reg_lamda_) );
+                    ncgsolver.add(dataOper, (value_type)(workOrder3DT->spirit_data_fidelity_lamda_) );
 
                     GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("NCG spirit solver for 3DT without null space ... "));
                     ncgsolver.solve(b, res);
@@ -438,8 +441,8 @@ performUnwarppingImplROPermuted(gtPlusReconWorkOrder<T>* workOrder3DT, hoNDArray
                     }
 
                     // set operators
-                    ncgsolver.add(spirit, T(workOrder3DT->spirit_parallel_imaging_lamda_) );
-                    ncgsolver.add(wavNullSpace2DOperator, T(workOrder3DT->spirit_image_reg_lamda_) );
+                    ncgsolver.add(spirit, (value_type)(workOrder3DT->spirit_parallel_imaging_lamda_) );
+                    ncgsolver.add(wavNullSpace2DOperator, (value_type)(workOrder3DT->spirit_image_reg_lamda_) );
 
                     GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("NCG spirit solver for 3D ... "));
                     ncgsolver.solve(b, res);
@@ -471,9 +474,9 @@ performUnwarppingImplROPermuted(gtPlusReconWorkOrder<T>* workOrder3DT, hoNDArray
                         wavNoNullSpace2DOperator.setCoilSenMap(coilMapN);
                     }
 
-                    ncgsolver.add(spirit_noNullSpace, T(workOrder3DT->spirit_parallel_imaging_lamda_) );
-                    ncgsolver.add(wavNoNullSpace2DOperator, T(workOrder3DT->spirit_image_reg_lamda_) );
-                    ncgsolver.add(dataOper, T(workOrder3DT->spirit_data_fidelity_lamda_) );
+                    ncgsolver.add(spirit_noNullSpace, (value_type)(workOrder3DT->spirit_parallel_imaging_lamda_) );
+                    ncgsolver.add(wavNoNullSpace2DOperator, (value_type)(workOrder3DT->spirit_image_reg_lamda_) );
+                    ncgsolver.add(dataOper, (value_type)(workOrder3DT->spirit_data_fidelity_lamda_) );
 
                     GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("NCG spirit solver for 3D without null space ... "));
                     ncgsolver.solve(b, res);
@@ -483,7 +486,7 @@ performUnwarppingImplROPermuted(gtPlusReconWorkOrder<T>* workOrder3DT, hoNDArray
                 }
             }
 
-            Gadgetron::scal(T(workOrder3DT->spirit_ncg_scale_factor_), res);
+            Gadgetron::scal( (value_type)(workOrder3DT->spirit_ncg_scale_factor_), res);
         }
         else
         {
@@ -532,7 +535,7 @@ performUnwarppingImplROPermuted(gtPlusReconWorkOrder<T>* workOrder3DT, hoNDArray
                 Gadgetron::clear(kerImE1E2RO);
             }
 
-            GADGET_CHECK_RETURN_FALSE(spirit_.imageDomainKernelE1E2RO(kernel, E1, E2, kerImE1E2RO));
+            GADGET_CHECK_RETURN_FALSE(spirit_.imageDomainKernelE1E2RO(kernel, (int)E1, (int)E2, kerImE1E2RO));
             kerIm = &kerImE1E2RO;
         }
 
@@ -551,14 +554,14 @@ performUnwarppingImplROPermuted(gtPlusReconWorkOrder<T>* workOrder3DT, hoNDArray
             {
                 typename realType<T>::Type scaleFactor = 1.0;
                 Gadgetron::norm2(kspace, scaleFactor);
-                scaleFactor /= (RO*std::sqrt(double(srcCHA)));
+                scaleFactor /= (value_type)(RO*std::sqrt(double(srcCHA)));
 
                 workOrder3DT->spirit_ncg_scale_factor_ = scaleFactor;
             }
 
             // apply the scale
-            Gadgetron::scal(T(1.0/workOrder3DT->spirit_ncg_scale_factor_), kspaceLinear);
-            Gadgetron::scal(T(1.0/workOrder3DT->spirit_ncg_scale_factor_), kspace);
+            Gadgetron::scal((value_type)(1.0/workOrder3DT->spirit_ncg_scale_factor_), kspaceLinear);
+            Gadgetron::scal((value_type)(1.0/workOrder3DT->spirit_ncg_scale_factor_), kspace);
 
             boost::shared_ptr< hoNDArray<T> > coilMapN;
             if ( workOrder3DT->coilMap_ 
@@ -598,9 +601,9 @@ performUnwarppingImplROPermuted(gtPlusReconWorkOrder<T>* workOrder3DT, hoNDArray
                     wavNullSpace3DOperator.setMemoryManager(gtPlus_mem_manager_);
                     wavNullSpace3DOperator.setAcquiredPoints(acq);
 
-                    wavNullSpace3DOperator.scale_factor_first_dimension_ = workOrder3DT->spirit_E1_enhancement_ratio_;
-                    wavNullSpace3DOperator.scale_factor_second_dimension_ = workOrder3DT->spirit_E2_enhancement_ratio_;
-                    wavNullSpace3DOperator.scale_factor_third_dimension_ = workOrder3DT->spirit_RO_enhancement_ratio_;
+                    wavNullSpace3DOperator.scale_factor_first_dimension_ = (value_type)workOrder3DT->spirit_E1_enhancement_ratio_;
+                    wavNullSpace3DOperator.scale_factor_second_dimension_ = (value_type)workOrder3DT->spirit_E2_enhancement_ratio_;
+                    wavNullSpace3DOperator.scale_factor_third_dimension_ = (value_type)workOrder3DT->spirit_RO_enhancement_ratio_;
 
                     if ( workOrder3DT->spirit_use_coil_sen_map_ && coilMapN )
                     {
@@ -608,8 +611,8 @@ performUnwarppingImplROPermuted(gtPlusReconWorkOrder<T>* workOrder3DT, hoNDArray
                     }
 
                     // set operators
-                    ncgsolver.add(spirit, T(workOrder3DT->spirit_parallel_imaging_lamda_) );
-                    ncgsolver.add(wavNullSpace3DOperator, T(workOrder3DT->spirit_image_reg_lamda_) );
+                    ncgsolver.add(spirit, (value_type)(workOrder3DT->spirit_parallel_imaging_lamda_) );
+                    ncgsolver.add(wavNullSpace3DOperator, (value_type)(workOrder3DT->spirit_image_reg_lamda_) );
 
                     GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("NCG spirit solver for 3DT ... "));
                     ncgsolver.solve(b, res);
@@ -637,18 +640,18 @@ performUnwarppingImplROPermuted(gtPlusReconWorkOrder<T>* workOrder3DT, hoNDArray
                     wavNoNullSpace3DOperator.setMemoryManager(gtPlus_mem_manager_);
                     wavNoNullSpace3DOperator.setAcquiredPoints(acq);
 
-                    wavNoNullSpace3DOperator.scale_factor_first_dimension_ = workOrder3DT->spirit_E1_enhancement_ratio_;
-                    wavNoNullSpace3DOperator.scale_factor_second_dimension_ = workOrder3DT->spirit_E2_enhancement_ratio_;
-                    wavNoNullSpace3DOperator.scale_factor_third_dimension_ = workOrder3DT->spirit_RO_enhancement_ratio_;
+                    wavNoNullSpace3DOperator.scale_factor_first_dimension_ = (value_type)workOrder3DT->spirit_E1_enhancement_ratio_;
+                    wavNoNullSpace3DOperator.scale_factor_second_dimension_ = (value_type)workOrder3DT->spirit_E2_enhancement_ratio_;
+                    wavNoNullSpace3DOperator.scale_factor_third_dimension_ = (value_type)workOrder3DT->spirit_RO_enhancement_ratio_;
 
                     if ( workOrder3DT->spirit_use_coil_sen_map_ && coilMapN )
                     {
                         wavNoNullSpace3DOperator.setCoilSenMap(coilMapN);
                     }
 
-                    ncgsolver.add(spirit_noNullSpace, T(workOrder3DT->spirit_parallel_imaging_lamda_) );
-                    ncgsolver.add(wavNoNullSpace3DOperator, T(workOrder3DT->spirit_image_reg_lamda_) );
-                    ncgsolver.add(dataOper, T(workOrder3DT->spirit_data_fidelity_lamda_) );
+                    ncgsolver.add(spirit_noNullSpace, (value_type)(workOrder3DT->spirit_parallel_imaging_lamda_) );
+                    ncgsolver.add(wavNoNullSpace3DOperator, (value_type)(workOrder3DT->spirit_image_reg_lamda_) );
+                    ncgsolver.add(dataOper, (value_type)(workOrder3DT->spirit_data_fidelity_lamda_) );
 
                     GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("NCG spirit solver for 3DT without null space ... "));
                     ncgsolver.solve(b, res);
@@ -690,8 +693,8 @@ performUnwarppingImplROPermuted(gtPlusReconWorkOrder<T>* workOrder3DT, hoNDArray
                     }
 
                     // set operators
-                    ncgsolver.add(spirit, T(workOrder3DT->spirit_parallel_imaging_lamda_) );
-                    ncgsolver.add(wavNullSpace2DOperator, T(workOrder3DT->spirit_image_reg_lamda_) );
+                    ncgsolver.add(spirit, (value_type)(workOrder3DT->spirit_parallel_imaging_lamda_) );
+                    ncgsolver.add(wavNullSpace2DOperator, (value_type)(workOrder3DT->spirit_image_reg_lamda_) );
 
                     GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("NCG spirit solver for 3D ... "));
                     ncgsolver.solve(b, res);
@@ -723,9 +726,9 @@ performUnwarppingImplROPermuted(gtPlusReconWorkOrder<T>* workOrder3DT, hoNDArray
                         wavNoNullSpace2DOperator.setCoilSenMap(coilMapN);
                     }
 
-                    ncgsolver.add(spirit_noNullSpace, T(workOrder3DT->spirit_parallel_imaging_lamda_) );
-                    ncgsolver.add(wavNoNullSpace2DOperator, T(workOrder3DT->spirit_image_reg_lamda_) );
-                    ncgsolver.add(dataOper, T(workOrder3DT->spirit_data_fidelity_lamda_) );
+                    ncgsolver.add(spirit_noNullSpace, (value_type)(workOrder3DT->spirit_parallel_imaging_lamda_) );
+                    ncgsolver.add(wavNoNullSpace2DOperator, (value_type)(workOrder3DT->spirit_image_reg_lamda_) );
+                    ncgsolver.add(dataOper, (value_type)(workOrder3DT->spirit_data_fidelity_lamda_) );
 
                     GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("NCG spirit solver for 3D without null space ... "));
                     ncgsolver.solve(b, res);
@@ -735,7 +738,7 @@ performUnwarppingImplROPermuted(gtPlusReconWorkOrder<T>* workOrder3DT, hoNDArray
                 }
             }
 
-            Gadgetron::scal(T(workOrder3DT->spirit_ncg_scale_factor_), res);
+            Gadgetron::scal( (value_type)(workOrder3DT->spirit_ncg_scale_factor_), res);
         }
         else
         {
@@ -753,7 +756,7 @@ performUnwarppingImplROPermuted(gtPlusReconWorkOrder<T>* workOrder3DT, hoNDArray
 
 template <typename T> 
 bool gtPlusReconWorker3DTL1SPIRITNCG<T>::
-    performUnwarppingImpl(gtPlusReconJob2DT<T>& job)
+performUnwarppingImpl(gtPlusReconJob2DT<T>& job)
 {
     try
     {
diff --git a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker3DTNoAcceleration.h b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker3DTNoAcceleration.h
index 64dc4dc..622d298 100644
--- a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker3DTNoAcceleration.h
+++ b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker3DTNoAcceleration.h
@@ -5,7 +5,7 @@
 
 #pragma once
 
-#include "ismrmrd.h"
+#include "ismrmrd/ismrmrd.h"
 
 #include "GadgetronTimer.h"
 
@@ -20,6 +20,7 @@ class gtPlusReconWorker3DTNoAcceleration : public gtPlusReconWorker3DT<T>
 public:
 
     typedef gtPlusReconWorker3DT<T> BaseClass;
+    typedef typename BaseClass::value_type value_type;
 
     gtPlusReconWorker3DTNoAcceleration() : BaseClass() {}
     virtual ~gtPlusReconWorker3DTNoAcceleration() {}
@@ -32,9 +33,11 @@ public:
     using BaseClass::gt_timer2_;
     using BaseClass::gt_timer3_;
     using BaseClass::performTiming_;
+    using BaseClass::verbose_;
     using BaseClass::gt_exporter_;
     using BaseClass::debugFolder_;
     using BaseClass::gtPlus_util_;
+    using BaseClass::gtPlus_util_cplx_;
     using BaseClass::gtPlus_mem_manager_;
 
     using BaseClass::ref_src_;
@@ -57,13 +60,13 @@ bool gtPlusReconWorker3DTNoAcceleration<T>::performRecon(gtPlusReconWorkOrder3DT
         if ( !workOrder3DT->workFlow_use_BufferedKernel_ )
         {
             GADGET_CHECK_PERFORM(performTiming_, gt_timer1_.start("prepRef"));
-            GADGET_CHECK_RETURN_FALSE(prepRef(workOrder3DT, workOrder3DT->ref_, 
-                                            workOrder3DT->ref_recon_, 
-                                            workOrder3DT->ref_coil_map_, 
-                                            workOrder3DT->start_RO_, workOrder3DT->end_RO_, 
-                                            workOrder3DT->start_E1_, workOrder3DT->end_E1_, 
-                                            workOrder3DT->start_E2_, workOrder3DT->end_E2_, 
-                                            workOrder3DT->data_.get_size(1), workOrder3DT->data_.get_size(2)));
+            GADGET_CHECK_RETURN_FALSE(this->prepRef(workOrder3DT, workOrder3DT->ref_, 
+                                                workOrder3DT->ref_recon_, 
+                                                workOrder3DT->ref_coil_map_, 
+                                                workOrder3DT->start_RO_, workOrder3DT->end_RO_, 
+                                                workOrder3DT->start_E1_, workOrder3DT->end_E1_, 
+                                                workOrder3DT->start_E2_, workOrder3DT->end_E2_, 
+                                                workOrder3DT->data_.get_size(1), workOrder3DT->data_.get_size(2)));
             GADGET_CHECK_PERFORM(performTiming_, gt_timer1_.stop());
         }
 
@@ -97,16 +100,9 @@ bool gtPlusReconWorker3DTNoAcceleration<T>::performRecon(gtPlusReconWorkOrder3DT
 
                 hoNDArray<T> coilMapN(RO, E1, E2, CHA, workOrder3DT->coilMap_->begin()+usedN*RO*E1*E2*CHA);
 
-                if ( workOrder3DT->csm_use_gpu_ )
-                {
-                    GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilMap3DNIHGPU_FullResMap(buffer3DT, 
-                            coilMapN, workOrder3DT->coil_map_algorithm_, workOrder3DT->csm_kSize_, workOrder3DT->csm_powermethod_num_, workOrder3DT->csm_iter_num_, workOrder3DT->csm_iter_thres_, workOrder3DT->csm_true_3D_));
-                }
-                else
-                {
-                    GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilMap3DNIH(buffer3DT, 
-                            coilMapN, workOrder3DT->coil_map_algorithm_, workOrder3DT->csm_kSize_, workOrder3DT->csm_powermethod_num_, workOrder3DT->csm_iter_num_, workOrder3DT->csm_iter_thres_, workOrder3DT->csm_true_3D_));
-                }
+                GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilMap3DNIH(buffer3DT, 
+                        coilMapN, workOrder3DT->coil_map_algorithm_, workOrder3DT->csm_kSize_, workOrder3DT->csm_powermethod_num_, workOrder3DT->csm_iter_num_, (value_type)workOrder3DT->csm_iter_thres_, workOrder3DT->csm_true_3D_));
+
                 GADGET_CHECK_RETURN_FALSE(repmatLastDimension(*workOrder3DT->coilMap_, usedN));
             }
             else
@@ -115,16 +111,8 @@ bool gtPlusReconWorker3DTNoAcceleration<T>::performRecon(gtPlusReconWorkOrder3DT
 
                 GADGET_CHECK_RETURN_FALSE(Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->ifft3c(workOrder3DT->ref_coil_map_, buffer3DT));
 
-                if ( workOrder3DT->csm_use_gpu_ )
-                {
-                    GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilMap3DNIHGPU_FullResMap(buffer3DT, 
-                            *workOrder3DT->coilMap_, workOrder3DT->coil_map_algorithm_, workOrder3DT->csm_kSize_, workOrder3DT->csm_powermethod_num_, workOrder3DT->csm_iter_num_, workOrder3DT->csm_iter_thres_, workOrder3DT->csm_true_3D_));
-                }
-                else
-                {
-                    GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilMap3DNIH(buffer3DT, 
-                            *workOrder3DT->coilMap_, workOrder3DT->coil_map_algorithm_, workOrder3DT->csm_kSize_, workOrder3DT->csm_powermethod_num_, workOrder3DT->csm_iter_num_, workOrder3DT->csm_iter_thres_, workOrder3DT->csm_true_3D_));
-                }
+                GADGET_CHECK_RETURN_FALSE(gtPlusISMRMRDReconUtilComplex<T>().coilMap3DNIH(buffer3DT, 
+                        *workOrder3DT->coilMap_, workOrder3DT->coil_map_algorithm_, workOrder3DT->csm_kSize_, workOrder3DT->csm_powermethod_num_, workOrder3DT->csm_iter_num_, (value_type)workOrder3DT->csm_iter_thres_, workOrder3DT->csm_true_3D_));
             }
 
             GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, *workOrder3DT->coilMap_, "coilMap_");
diff --git a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker3DTSPIRIT.h b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker3DTSPIRIT.h
index 3f77bfa..4985312 100644
--- a/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker3DTSPIRIT.h
+++ b/toolboxes/gtplus/workflow/gtPlusISMRMRDReconWorker3DTSPIRIT.h
@@ -5,7 +5,7 @@
 
 #pragma once
 
-#include "ismrmrd.h"
+#include "ismrmrd/ismrmrd.h"
 #include "GadgetronTimer.h"
 #include "gtPlusISMRMRDReconUtil.h"
 #include "gtPlusISMRMRDReconWorker3DT.h"
@@ -25,6 +25,7 @@ public:
 
     typedef gtPlusReconWorker3DT<T> BaseClass;
     typedef gtPlusReconWorkOrder3DT<T> WorkOrderType;
+    typedef typename BaseClass::value_type value_type;
 
     gtPlusReconWorker3DTSPIRIT() : spirit_kernelIm_permuted_(false), BaseClass() {}
     virtual ~gtPlusReconWorker3DTSPIRIT() {}
@@ -49,6 +50,7 @@ public:
     using BaseClass::gt_timer2_;
     using BaseClass::gt_timer3_;
     using BaseClass::performTiming_;
+    using BaseClass::verbose_;
     using BaseClass::gt_exporter_;
     using BaseClass::debugFolder_;
     using BaseClass::gtPlus_util_;
@@ -119,7 +121,7 @@ bool gtPlusReconWorker3DTSPIRIT<T>::autoReconParameter(gtPlusReconWorkOrder<T>*
         workOrder3DT->spirit_iter_thres_ = 0.005;
         workOrder3DT->spirit_reg_lamda_ = 0.01;
 
-        if ( workOrder3DT->recon_algorithm_ == ISMRMRD_embedded )
+        if ( workOrder3DT->recon_algorithm_ == ISMRMRD_SPIRIT )
         {
             workOrder3DT->spirit_iter_thres_ = 0.005;
         }
@@ -153,7 +155,11 @@ performCalibPrep(const hoNDArray<T>& ref_src, const hoNDArray<T>& ref_dst, WorkO
     size_t kE1 = workOrder3DT->spirit_kSize_E1_;
     size_t kE2 = workOrder3DT->spirit_kSize_E2_;
 
-    workOrder3DT->kernel_->create(kRO, kE1, kE2, srcCHA, dstCHA, 1, 1, 1, refN);
+    size_t oRO = workOrder3DT->spirit_oSize_RO_;
+    size_t oE1 = workOrder3DT->spirit_oSize_E1_;
+    size_t oE2 = workOrder3DT->spirit_oSize_E2_;
+
+    workOrder3DT->kernel_->create(kRO, kE1, kE2, srcCHA, dstCHA, oRO, oE1, oE2, refN);
 
     size_t jobN;
     bool splitJobs = this->splitJob(workOrder3DT, jobN);
@@ -223,18 +229,22 @@ performCalibImpl(const hoNDArray<T>& ref_src, const hoNDArray<T>& ref_dst, WorkO
     size_t kE1 = workOrder3DT->spirit_kSize_E1_;
     size_t kE2 = workOrder3DT->spirit_kSize_E2_;
 
+    size_t oRO = workOrder3DT->spirit_oSize_RO_;
+    size_t oE1 = workOrder3DT->spirit_oSize_E1_;
+    size_t oE2 = workOrder3DT->spirit_oSize_E2_;
+
     ho4DArray<T> acsSrc(refRO, refE1, refE2, srcCHA, const_cast<T*>(ref_src.begin()+usedN*refRO*refE1*refE2*srcCHA));
     ho4DArray<T> acsDst(refRO, refE1, refE2, dstCHA, const_cast<T*>(ref_dst.begin()+usedN*refRO*refE1*refE2*dstCHA));
 
     GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, acsSrc, "acsSrc");
     GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, acsDst, "acsDst");
 
-    hoNDArray<T> ker(kRO, kE1, kE2, srcCHA, dstCHA, 1, 1, 1, workOrder3DT->kernel_->begin()+usedN*kRO*kE1*kE2*srcCHA*dstCHA);
+    hoNDArray<T> ker(kRO, kE1, kE2, srcCHA, dstCHA, oRO, oE1, oE2, workOrder3DT->kernel_->begin()+usedN*kRO*kE1*kE2*srcCHA*dstCHA*oRO*oE1*oE2);
 
     spirit_.calib_use_gpu_ = workOrder3DT->spirit_use_gpu_;
 
     GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("SPIRIT 3D calibration ... "));
-    GADGET_CHECK_RETURN_FALSE(spirit_.calib3D(acsSrc, acsDst, workOrder3DT->spirit_reg_lamda_, workOrder3DT->spirit_calib_over_determine_ratio_, kRO, kE1, kE2, 1, 1, 1, ker));
+    GADGET_CHECK_RETURN_FALSE(spirit_.calib3D(acsSrc, acsDst, workOrder3DT->spirit_reg_lamda_, workOrder3DT->spirit_calib_over_determine_ratio_, kRO, kE1, kE2, oRO, oE1, oE2, ker));
     GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.stop());
 
     GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, ker, "ker");
@@ -249,7 +259,7 @@ performCalibImpl(const hoNDArray<T>& ref_src, const hoNDArray<T>& ref_dst, WorkO
         hoNDArray<T> kIm(E1, E2, RO, srcCHA, dstCHA, workOrder3DT->kernelIm_->begin()+usedN*E1*E2*RO*srcCHA*dstCHA);
 
         GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("SPIRIT 3D image domain kernel ... "));
-        GADGET_CHECK_RETURN_FALSE(spirit_.imageDomainKernel3D(ker, kRO, kE1, kE2, 1, 1, 1, RO, E1, E2, kIm, minusI));
+        GADGET_CHECK_RETURN_FALSE(spirit_.imageDomainKernel3D(ker, kRO, kE1, kE2, oRO, oE1, oE2, RO, E1, E2, kIm, minusI));
         GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.stop());
 
         if ( !debugFolder_.empty() )
@@ -266,7 +276,7 @@ performCalibImpl(const hoNDArray<T>& ref_src, const hoNDArray<T>& ref_dst, WorkO
         hoNDArray<T> kIm(convKE1, convKE2, RO, srcCHA, dstCHA, workOrder3DT->kernelIm_->begin()+usedN*convKE1*convKE2*RO*srcCHA*dstCHA);
 
         GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.start("SPIRIT 3D image domain kernel only along RO ... "));
-        GADGET_CHECK_RETURN_FALSE(spirit_.imageDomainKernelRO3D(ker, kRO, kE1, kE2, 1, 1, 1, RO, kIm, minusI));
+        GADGET_CHECK_RETURN_FALSE(spirit_.imageDomainKernelRO3D(ker, kRO, kE1, kE2, oRO, oE1, oE2, RO, kIm, minusI));
         GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.stop());
 
         if ( !debugFolder_.empty() )
@@ -340,10 +350,17 @@ performUnwrapping(gtPlusReconWorkOrder3DT<T>* workOrder3DT, const hoNDArray<T>&
         typename realType<T>::Type scaleFactor = 1.0;
         hoNDArray<T> kspaceForScaleFactor(RO, E1, E2, srcCHA, const_cast<T*>(data_dst.begin()));
         Gadgetron::norm2(kspaceForScaleFactor, scaleFactor);
-        scaleFactor /= (RO*std::sqrt(double(srcCHA)));
+        scaleFactor /= (value_type)(RO*std::sqrt(double(srcCHA)));
 
         workOrder3DT->spirit_ncg_scale_factor_ = scaleFactor;
 
+        size_t indMax;
+        hoNDArray<value_type> mag;
+        Gadgetron::abs(kspaceForScaleFactor, mag);
+        value_type maxMag;
+        Gadgetron::maxAbsolute(mag, maxMag, indMax);
+        workOrder3DT->spirit_slep_scale_factor_ = maxMag;
+
         // split the jobs
         size_t jobMegaBytes = workOrder3DT->job_max_Megabytes_;
         size_t jobN = workOrder3DT->job_num_of_N_;
@@ -478,8 +495,8 @@ performUnwrapping(gtPlusReconWorkOrder3DT<T>* workOrder3DT, const hoNDArray<T>&
 
                 GADGET_MSG("SPIRIT - 3DT - total job : " << jobList.size() << " - job N : " << jobN << " - cloud size : " << cloudSize);
 
-                unsigned int numOfJobRunOnCloud = jobList.size() - jobList.size()/(cloudSize+1);
-                if ( !runJobsOnLocalNode ) numOfJobRunOnCloud = jobList.size();
+                unsigned int numOfJobRunOnCloud = (unsigned int)(jobList.size() - jobList.size()/(cloudSize+1));
+                if ( !runJobsOnLocalNode ) numOfJobRunOnCloud = (unsigned int)jobList.size();
 
                 typedef Gadgetron::GadgetCloudController< gtPlusReconJob2DT<T> > GTCloudControllerType;
                 GTCloudControllerType controller;
@@ -716,7 +733,7 @@ performUnwarppingImplROPermuted(gtPlusReconWorkOrder<T>* workOrder3DT, hoNDArray
                 Gadgetron::clear(kerImE1E2RO);
             }
 
-            GADGET_CHECK_RETURN_FALSE(spirit_.imageDomainKernelE1E2RO(ker, E1, E2, kerImE1E2RO));
+            GADGET_CHECK_RETURN_FALSE(spirit_.imageDomainKernelE1E2RO(ker, (int)E1, (int)E2, kerImE1E2RO));
             kerIm = &kerImE1E2RO;
 
             GADGET_CHECK_PERFORM(performTiming_, gt_timer3_.stop());
@@ -727,7 +744,7 @@ performUnwarppingImplROPermuted(gtPlusReconWorkOrder<T>* workOrder3DT, hoNDArray
         long long NUM = (long long)RO;
 
         #ifdef USE_OMP
-            int numThreads = (NUM<16) ? NUM : 16;
+            int numThreads = (int)( (NUM<16) ? NUM : 16 );
 
             int numOpenMPProcs = omp_get_num_procs();
             GADGET_MSG("gtPlusReconWorker3DTSPIRIT, numOpenMPProcs : " << numOpenMPProcs);
@@ -783,7 +800,7 @@ performUnwarppingImplROPermuted(gtPlusReconWorkOrder<T>* workOrder3DT, hoNDArray
             gtPlusLinearSolver<hoNDArray<T>, hoNDArray<T>, gtPlusSPIRIT2DOperator<T> >& cgSolver = *pCGSolver;
 
             cgSolver.iterMax_ = workOrder3DT->spirit_iter_max_;
-            cgSolver.thres_ = workOrder3DT->spirit_iter_thres_;
+            cgSolver.thres_ = (value_type)workOrder3DT->spirit_iter_thres_;
             cgSolver.printIter_ = workOrder3DT->spirit_print_iter_;
 
             cgSolver.set(spirit);
@@ -830,6 +847,10 @@ performUnwarppingImplROPermuted(gtPlusReconWorkOrder<T>* workOrder3DT, hoNDArray
             delete pCGSolver;
         }
 
+        #ifdef USE_OMP
+            omp_set_nested(0);
+        #endif
+
         GADGET_EXPORT_ARRAY_COMPLEX(debugFolder_, gt_exporter_, res, "res_Shifted");
 
         Gadgetron::hoNDFFT<typename realType<T>::Type>::instance()->fftshift2D(res, kspace_Shifted);
@@ -882,7 +903,7 @@ performUnwarppingImpl(gtPlusReconWorkOrder<T>* workOrder3DT, hoNDArray<T>& kspac
 
         long long t;
 
-        #pragma omp parallel default(none) private(t) shared(RO, E1, E2, srcCHA, dstCHA, workOrder3DT, NUM, resDecoupled, pKspaceIfftROPermuted, pG_I) if ( NUM > 6 ) num_threads( (NUM<16) ? NUM : 16 )
+        #pragma omp parallel default(none) private(t) shared(RO, E1, E2, srcCHA, dstCHA, workOrder3DT, NUM, resDecoupled, pKspaceIfftROPermuted, pG_I) if ( NUM > 6 ) num_threads( (int)((NUM<16) ? NUM : 16) )
         {
             hoNDArrayMemoryManaged<T> adjForG_I_Decoupled(E1, E2, srcCHA, dstCHA, gtPlus_mem_manager_);
             T* pDecoupledG_I = adjForG_I_Decoupled.begin();
@@ -899,7 +920,7 @@ performUnwarppingImpl(gtPlusReconWorkOrder<T>* workOrder3DT, hoNDArray<T>& kspac
             gtPlusLinearSolver<hoNDArray<T>, hoNDArray<T>, gtPlusSPIRIT2DOperator<T> >& cgSolver = *pCGSolver;
 
             cgSolver.iterMax_ = workOrder3DT->spirit_iter_max_;
-            cgSolver.thres_ = workOrder3DT->spirit_iter_thres_;
+            cgSolver.thres_ = (value_type)workOrder3DT->spirit_iter_thres_;
             cgSolver.printIter_ = workOrder3DT->spirit_print_iter_;
 
             cgSolver.set(spirit);
@@ -915,7 +936,7 @@ performUnwarppingImpl(gtPlusReconWorkOrder<T>* workOrder3DT, hoNDArray<T>& kspac
                 hoNDArray<T> resCurr(E1, E2, dstCHA, resDecoupled.begin()+ro*E1*E2*dstCHA);
 
                 // fill in kernel and kspace
-                size_t e1, e2, scha, dcha;
+                size_t scha, dcha;
 
                 for ( dcha=0; dcha<dstCHA; dcha++)
                 {
diff --git a/toolboxes/linalg/CMakeLists.txt b/toolboxes/linalg/CMakeLists.txt
new file mode 100644
index 0000000..0aa4445
--- /dev/null
+++ b/toolboxes/linalg/CMakeLists.txt
@@ -0,0 +1,37 @@
+find_package(BLAS REQUIRED)
+find_package(LAPACK REQUIRED)
+find_package(FFTW3 COMPONENTS single double threads)
+
+message("FFTW3_LIBRARIES: ${FFTW3_LIBRARIES}") 
+if (WIN32)
+	ADD_DEFINITIONS(-D__BUILD_GADGETRON_LINALG__)
+endif (WIN32)
+
+
+include_directories(${CMAKE_SOURCE_DIR}/toolboxes/core/cpu)
+include_directories(${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/math)
+include_directories(${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/image)
+include_directories(${CMAKE_SOURCE_DIR}/toolboxes/core)
+include_directories(${CMAKE_SOURCE_DIR}/toolboxes/linalg)
+include_directories(${CMAKE_SOURCE_DIR}/toolboxes/fft/cpu)
+include_directories(${Boost_INCLUDE_DIR})
+include_directories(${ISMRMRD_INCLUDE_DIR})
+include_directories(${FFTW3_INCLUDE_DIR})
+include_directories(${ARMADILLO_INCLUDE_DIRS})
+
+add_library(linalg ${LIBTYPE} linalg_export.h 
+                          matrix_vector_op.h 
+                          matrix_vector_op.cpp 
+                          matrix_decomposition.h 
+                          matrix_decomposition.cpp)
+
+add_executable(linalg_test linalg_test.cpp)
+
+target_link_libraries(linalg ${BLAS_LIBRARIES} ${LAPACK_LIBRARIES} ${FFTW3_LIBRARIES})
+target_link_libraries(linalg_test ${FFTW3_LIBRARIES} linalg)
+target_link_libraries(linalg_test gadgetron_toolbox_cpucore_math gadgetron_toolbox_cpufft)
+
+install(TARGETS linalg DESTINATION lib COMPONENT main)
+install(TARGETS linalg_test DESTINATION bin COMPONENT main)
+
+install(FILES matrix_vector_op.h matrix_decomposition.h linalg_export.h DESTINATION include COMPONENT main)
diff --git a/toolboxes/linalg/generate_test_data.m b/toolboxes/linalg/generate_test_data.m
new file mode 100644
index 0000000..32cab8a
--- /dev/null
+++ b/toolboxes/linalg/generate_test_data.m
@@ -0,0 +1,32 @@
+%Generate Test Data for linalg toolbox
+
+%Test data for matrix-matrix mult
+M = 30000;
+N = 30000;
+K = 500;
+avgs = 100;
+
+A = single(complex(randn(M,K),randn(M,K)));
+B = single(complex(randn(K,N),randn(K,N)));
+C1 = single(zeros(M,N));
+C2 = single(A*B+C1);
+
+write_mr_raw(A.','A.cplx');
+write_mr_raw(B.','B.cplx');
+write_mr_raw(C1.','C1.cplx');
+write_mr_raw(C2.','C2.cplx');
+
+S = zeros(K);
+for a=1:avgs,
+    tmp_noise = complex(randn(K,1),randn(K,1));
+    S = S + tmp_noise * tmp_noise';
+end
+clear tmp_noise;
+S = S/avgs;
+S = S + diag(10000*diag(S));
+
+write_mr_raw(S.', 'S.cplx');
+S_chol = chol(S,'lower');
+write_mr_raw(S_chol.', 'S_chol.cplx');
+S_chol_inv = inv(S_chol);
+write_mr_raw(S_chol_inv.', 'S_chol_inv.cplx');
diff --git a/toolboxes/linalg/linalg_export.h b/toolboxes/linalg/linalg_export.h
new file mode 100644
index 0000000..16042a1
--- /dev/null
+++ b/toolboxes/linalg/linalg_export.h
@@ -0,0 +1,25 @@
+/*
+ * linalg_export.h
+ *
+ *  Created on: Dec 9, 2011
+ *      Author: Michael S. Hansen
+ */
+
+#ifndef LINALG_EXPORT_H_
+#define LINALG_EXPORT_H_
+
+#if defined (WIN32)
+    #ifdef BUILD_TOOLBOX_STATIC
+        #define EXPORTLINALG 
+    #else
+        #if defined (__BUILD_GADGETRON_LINALG__) || defined (linalg_EXPORTS)
+            #define EXPORTLINALG __declspec(dllexport)
+        #else
+            #define EXPORTLINALG __declspec(dllimport)
+        #endif
+    #endif
+#else
+    #define EXPORTLINALG
+#endif
+
+#endif /* LINALG_EXPORT_H_ */
diff --git a/toolboxes/linalg/linalg_ground_truth_comparison.cpp b/toolboxes/linalg/linalg_ground_truth_comparison.cpp
new file mode 100644
index 0000000..870ce57
--- /dev/null
+++ b/toolboxes/linalg/linalg_ground_truth_comparison.cpp
@@ -0,0 +1,418 @@
+/*
+* linalg_test.cpp
+*
+*  Created on: Dec 9, 2011
+*      Author: Michael S. Hansen
+*/
+
+#include <iostream>
+#include <hoNDArray.h>
+#include <hoNDArray_fileio.h>
+#include <hoNDArray_utils.h>
+#include <matrix_vector_op.h>
+#include <matrix_decomposition.h>
+#include "GadgetronTimer.h"
+#include "hoNDArray_math_util.h"
+#include "hoNDArray_elemwise.h"
+#include "hoMatrix_util.h"
+#include "hoNDMath_util.h"
+#include "hoNDFFT.h"
+#include <fftw3.h>
+#include <valarray>
+#include <omp.h>
+
+#define DIFF_LIMIT 1e-5
+
+using namespace Gadgetron;
+
+double mcompare(hoNDArray< std::complex<float> >* A, hoNDArray< std::complex<float> >* B)
+{
+    float comp = 0.0;
+    float root_sum = 0.0;
+    if (A->get_number_of_elements() != B->get_number_of_elements()) {
+        std::cout << "Wrong number of elements in comparison" << std::endl;
+        return 9e30;
+    }
+
+    hoNDArray< std::complex<float> > diff;
+    Gadgetron::subtract(*A, *B, diff);
+
+    /*std::complex<float>* a = A->get_data_ptr();
+    std::complex<float>* b = B->get_data_ptr();
+    for (unsigned long int i = 0; i < A->get_number_of_elements(); i++) {
+    comp += abs(a[i]-b[i]);
+    root_sum += abs(a[i]*conj(b[i]));
+    }
+    comp /= root_sum;*/
+
+    Gadgetron::norm1(diff, comp);
+
+    std::complex<float> r;
+    Gadgetron::dotc(*A, *B, r);
+    if ( std::abs(r) > 0 ) comp /= std::abs(r);
+    return comp;
+}
+
+double mcompare(hoNDArray< float >* A, hoNDArray< float >* B)
+{
+    float comp = 0.0;
+    float root_sum = 0.0;
+    if (A->get_number_of_elements() != B->get_number_of_elements()) {
+        std::cout << "Wrong number of elements in comparison" << std::endl;
+        return 9e30;
+    }
+
+    hoNDArray< float > diff;
+    Gadgetron::subtract(*A, *B, diff);
+
+    /*std::complex<float>* a = A->get_data_ptr();
+    std::complex<float>* b = B->get_data_ptr();
+    for (unsigned long int i = 0; i < A->get_number_of_elements(); i++) {
+    comp += abs(a[i]-b[i]);
+    root_sum += abs(a[i]*conj(b[i]));
+    }
+    comp /= root_sum;*/
+
+    Gadgetron::norm1(diff, comp);
+
+    float r;
+    Gadgetron::math::dotu(A->get_number_of_elements(), A->begin(), B->begin(), r);
+    if ( std::abs(r) > 0 )  comp /= std::abs(r);
+    return comp;
+}
+
+void compare_result(hoNDArray< std::complex<float> >& res, hoNDArray< std::complex<float> >& res_math, const std::string& msg)
+{
+    double diff = mcompare(&res, &res_math);
+    if (diff > DIFF_LIMIT)
+    {
+        std::cout << msg << " - FAILED with diff: " << diff << std::endl;
+    }
+    else
+    {
+        std::cout << msg << " - SUCCESS with diff: " << diff << std::endl;
+    }
+}
+
+void compare_result(hoNDArray< float >& res, hoNDArray< float >& res_math, const std::string& msg)
+{
+    double diff = mcompare(&res, &res_math);
+    if (diff > DIFF_LIMIT)
+    {
+        std::cout << msg << " - FAILED with diff: " << diff << std::endl;
+    }
+    else
+    {
+        std::cout << msg << " - SUCCESS with diff: " << diff << std::endl;
+    }
+}
+
+/*
+*   Simple test program for linear algebra routines.
+*/
+int main(int argc, char** argv)
+{
+    std::cout << "Simple test of linear algebra routines" << std::endl;
+    if (argc != 2) {
+        std::cout << "Usage: linalg_test <folder_with_test_data>" << std::endl;
+        return -1;
+    }
+
+    std::string filenameA = std::string(argv[1]) + std::string("/A.cplx");
+    std::string filenameB = std::string(argv[1]) + std::string("/B.cplx");
+    std::string filenameC1 = std::string(argv[1]) + std::string("/C1.cplx");
+    std::string filenameC2 = std::string(argv[1]) + std::string("/C2.cplx");
+    std::string filenameS = std::string(argv[1]) + std::string("/S.cplx");
+    std::string filenameS_chol = std::string(argv[1]) + std::string("/S_chol.cplx");
+    std::string filenameS_chol_inv = std::string(argv[1]) + std::string("/S_chol_inv.cplx");
+
+    boost::shared_ptr< hoNDArray<std::complex<float> > > A = read_nd_array< std::complex<float> >(filenameA.c_str());
+    boost::shared_ptr< hoNDArray<std::complex<float> > > B = read_nd_array< std::complex<float> >(filenameB.c_str());
+    boost::shared_ptr< hoNDArray<std::complex<float> > > C1 = read_nd_array< std::complex<float> >(filenameC1.c_str());
+    boost::shared_ptr< hoNDArray<std::complex<float> > > C2 = read_nd_array< std::complex<float> >(filenameC2.c_str());
+
+    boost::shared_ptr< hoNDArray<std::complex<float> > > S = read_nd_array< std::complex<float> >(filenameS.c_str());
+    boost::shared_ptr< hoNDArray<std::complex<float> > > S_chol = read_nd_array< std::complex<float> >(filenameS_chol.c_str());
+    boost::shared_ptr< hoNDArray<std::complex<float> > > S_chol_inv = read_nd_array< std::complex<float> >(filenameS_chol_inv.c_str());
+
+    GADGET_MSG("------------------------------------------------------------------");
+    GADGET_MSG("matrix multiplication");
+    GADGET_MSG("------------------------------------------------------------------");
+
+    std::complex<float> alpha(1.0,0);
+    std::complex<float> beta(1.0,0);
+    double diff;
+
+    /*{
+    GadgetronTimer t("GEMM Time (system)", true);
+    hoNDArray_gemm( A.get(), B.get(), alpha,  C1.get(), beta);
+    std::cout << C1->get_size(0) << ", " << C1->get_size(1) << ", " << C1->get_number_of_elements() << std::endl;
+    }
+
+    {
+    GadgetronTimer t("GEMM Time (MKL)", true);
+    gemm( *C1.get(), *B.get(), *A.get());
+    std::cout << C1->get_size(0) << ", " << C1->get_size(1) << ", " << C1->get_number_of_elements() << std::endl;
+    }
+
+    {
+    GadgetronTimer t("Write time", true);
+    write_nd_array< std::complex<float> >(C1.get(), "C2_calc.cplx");
+    }
+
+    double diff;
+    {
+    GadgetronTimer compare("CompareTime", true);
+    diff = mcompare(C1.get(),C2.get());
+    }
+
+    if (diff > DIFF_LIMIT) {
+    std::cout << "Complex GEMM FAILED with diff: " << diff << std::endl;
+    return -1;
+    } else {
+    std::cout << "Complex GEMM SUCCESS with diff: " << diff << std::endl;
+    }
+
+    hoNDArray_choldc(S.get());
+    zero_tril(S.get());
+
+    write_nd_array< std::complex<float> >(S.get(), "S_chol_calc.cplx");
+
+    diff = mcompare(S.get(),S_chol.get());
+    if (diff > DIFF_LIMIT) {
+    std::cout << "Complex Cholesky decomposition FAILED with diff: " << diff << std::endl;
+    return -1;
+    } else {
+    std::cout << "Complex Cholesky decomposition SUCCESS with diff: " << diff << std::endl;
+    }
+
+    hoNDArray_inv_lower_triangular(S.get());
+
+    write_nd_array< std::complex<float> >(S.get(), "S_chol_inv_calc.cplx");
+
+    diff = mcompare(S.get(),S_chol_inv.get());
+    if (diff > DIFF_LIMIT) {
+    std::cout << "Complex Triangular inversion FAILED with diff: " << diff << std::endl;
+    return -1;
+    } else {
+    std::cout << "Complex Triangular inversion SUCCESS with diff: " << diff << std::endl;
+    }*/
+
+    hoNDArray<std::complex<float> > a(*A);
+    hoNDArray<std::complex<float> > b(*A);
+
+    Gadgetron::scal( std::complex<float>(2), b);
+
+    float r;
+
+    hoNDArray<std::complex<float> > res, res_math;
+    hoNDArray<float > res_f, res_f_math;
+
+    {
+        GadgetronTimer t("allocate res", true);
+        res = a;
+        res_math = a;
+
+        res_f.create(a.get_dimensions());
+        res_f_math.create(a.get_dimensions());
+    }
+
+    GADGET_MSG("------------------------------------------------------------------");
+    GADGET_MSG("axpy");
+    GADGET_MSG("------------------------------------------------------------------");
+
+    {
+        GadgetronTimer t("axpy Time (MKL)", true);
+        Gadgetron::axpy( alpha, a, b, res);
+    }
+
+    GADGET_MSG("------------------------------------------------------------------");
+    GADGET_MSG("vector add");
+    GADGET_MSG("------------------------------------------------------------------");
+
+    {
+        GadgetronTimer t("vzAdd Time (MKL)", true);
+        Gadgetron::add( *A.get(), *A.get(), res);
+    }
+
+    GADGET_MSG("------------------------------------------------------------------");
+    GADGET_MSG("vector subtract");
+    GADGET_MSG("------------------------------------------------------------------");
+
+    {
+        GadgetronTimer t("vzSub Time (MKL)", true);
+        Gadgetron::subtract( a, b, res);
+    }
+
+
+    GADGET_MSG("------------------------------------------------------------------");
+    GADGET_MSG("vector multiplication");
+    GADGET_MSG("------------------------------------------------------------------");
+
+    {
+        GadgetronTimer t("vzMul Time (MKL)", true);
+        Gadgetron::multiply( a, b, res);
+    }
+
+    compare_result(res, res_math, "multiply");
+
+    GADGET_MSG("------------------------------------------------------------------");
+    GADGET_MSG("vector addEpsilon");
+    GADGET_MSG("------------------------------------------------------------------");
+
+    res = a;
+    res_math = a;
+
+    {
+        GadgetronTimer t("addEpsilon Time (MKL)", true);
+        Gadgetron::addEpsilon( res );
+    }
+
+    compare_result(res, res_math, "addEpsilon");
+
+    GADGET_MSG("------------------------------------------------------------------");
+    GADGET_MSG("vector divide");
+    GADGET_MSG("------------------------------------------------------------------");
+
+    {
+        GadgetronTimer t("divide Time (MKL)", true);
+        Gadgetron::divide( a, res, res);
+    }
+
+    compare_result(res, res_math, "divide");
+
+    GADGET_MSG("------------------------------------------------------------------");
+    GADGET_MSG("vector sqrt");
+    GADGET_MSG("------------------------------------------------------------------");
+
+    {
+        GadgetronTimer t("sqrt Time (MKL)", true);
+        Gadgetron::sqrt( a, res);
+    }
+
+    compare_result(res, res_math, "sqrt");
+
+    GADGET_MSG("------------------------------------------------------------------");
+    GADGET_MSG("vector conjugate");
+    GADGET_MSG("------------------------------------------------------------------");
+
+    {
+        GadgetronTimer t("conjugate Time (MKL)", true);
+        Gadgetron::conjugate( a, res);
+    }
+
+    compare_result(res, res_math, "conjugate");
+
+    GADGET_MSG("------------------------------------------------------------------");
+    GADGET_MSG("vector conjugate multiplication");
+    GADGET_MSG("------------------------------------------------------------------");
+
+    {
+        GadgetronTimer t("vcMulByConj Time (MKL)", true);
+        Gadgetron::multiplyConj( a, b, res);
+    }
+
+    compare_result(res, res_math, "multiplyConj");
+
+    GADGET_MSG("------------------------------------------------------------------");
+    GADGET_MSG("vector scal");
+    GADGET_MSG("------------------------------------------------------------------");
+
+    res = a;
+    res_math = a;
+
+    {
+        GadgetronTimer t("scal Time (MKL)", true);
+        Gadgetron::scal( alpha, a);
+    }
+
+    compare_result(res, res_math, "scal");
+
+    GADGET_MSG("------------------------------------------------------------------");
+    GADGET_MSG("vector dotc");
+    GADGET_MSG("------------------------------------------------------------------");
+
+    std::complex<float> rdotc(0);
+
+    {
+        GadgetronTimer t("dotc Time (MKL)", true);
+        rdotc = Gadgetron::dotc( a, b);
+    }
+    std::cout << "dotc = " << rdotc << std::endl;
+
+    GADGET_MSG("------------------------------------------------------------------");
+    GADGET_MSG("vector dotu");
+    GADGET_MSG("------------------------------------------------------------------");
+
+    std::complex<float> rdotu;
+
+    {
+        GadgetronTimer t("dotu Time (MKL)", true);
+        rdotu = Gadgetron::dotu( a, b );
+    }
+    std::cout << "dotu = " << rdotu << std::endl;
+
+    GADGET_MSG("------------------------------------------------------------------");
+    GADGET_MSG("vector absolute");
+    GADGET_MSG("------------------------------------------------------------------");
+
+    {
+        GadgetronTimer t("absolute Time (MKL)", true);
+        Gadgetron::absolute( a, res);
+    }
+
+    compare_result(res, res_math, "absolute");
+
+    GADGET_MSG("------------------------------------------------------------------");
+    GADGET_MSG("vector argument");
+    GADGET_MSG("------------------------------------------------------------------");
+
+    {
+        GadgetronTimer t("argument Time (MKL)", true);
+        Gadgetron::argument( a, res_f);
+    }
+
+    GADGET_MSG("------------------------------------------------------------------");
+    GADGET_MSG("vector inv");
+    GADGET_MSG("------------------------------------------------------------------");
+
+    {
+        GadgetronTimer t("inv Time (MKL)", true);
+        Gadgetron::inv( a, res);
+    }
+
+    GADGET_MSG("------------------------------------------------------------------");
+    GADGET_MSG("norm2");
+    GADGET_MSG("------------------------------------------------------------------");
+
+    float rn;
+
+    {
+        GadgetronTimer t("Time (MKL)", true);
+        Gadgetron::norm2( a, rn);
+    }
+    std::cout << "nrm2 = " << rn << std::endl;
+
+    GADGET_MSG("------------------------------------------------------------------");
+    GADGET_MSG("norm1");
+    GADGET_MSG("------------------------------------------------------------------");
+
+    {
+        GadgetronTimer t("Time (MKL)", true);
+        Gadgetron::norm1( a, rn);
+    }
+    std::cout << "nrm1 = " << rn << std::endl;
+
+    GADGET_MSG("------------------------------------------------------------------");
+    GADGET_MSG("conv2");
+    GADGET_MSG("------------------------------------------------------------------");
+
+    hoNDArray<std::complex<float> > ker;
+    ker.create(3, 3);
+    Gadgetron::fill(ker, std::complex<float>(1) );
+
+    {
+        GadgetronTimer t("conv2 Time (MKL)", true);
+        Gadgetron::conv2( a, ker, res);
+    }
+}
diff --git a/toolboxes/linalg/linalg_test.cpp b/toolboxes/linalg/linalg_test.cpp
new file mode 100644
index 0000000..a9acd49
--- /dev/null
+++ b/toolboxes/linalg/linalg_test.cpp
@@ -0,0 +1,580 @@
+/*
+ * linalg_test.cpp
+ *
+ *  Created on: Dec 9, 2011
+ *      Author: Michael S. Hansen
+ */
+
+
+#include <iostream>
+#include <hoNDArray.h>
+#include <hoNDArray_fileio.h>
+#include <hoNDArray_utils.h>
+#include <matrix_vector_op.h>
+#include <matrix_decomposition.h>
+#include "GadgetronTimer.h"
+#include "hoNDArray_elemwise.h"
+#include "hoNDArray_elemwise.h"
+#include "hoNDArray_reductions.h"
+#include "hoNDArray_linalg.h"
+#include "hoNDFFT.h"
+#include <fftw3.h>
+#include <valarray>
+#include <omp.h>
+#include "hoArmadillo.h"
+
+#define DIFF_LIMIT 1e-5
+
+using namespace Gadgetron;
+
+double mcompare(hoNDArray< std::complex<float> >* A, hoNDArray< std::complex<float> >* B)
+{
+  float comp = 0.0;
+  float root_sum = 0.0;
+  if (A->get_number_of_elements() != B->get_number_of_elements()) {
+    std::cout << "Wrong number of elements in comparison" << std::endl;
+    return 9e30;
+  }
+
+  hoNDArray< std::complex<float> > diff;
+  Gadgetron::subtract(*A, *B, diff);
+
+  /*std::complex<float>* a = A->get_data_ptr();
+    std::complex<float>* b = B->get_data_ptr();
+    for (unsigned long int i = 0; i < A->get_number_of_elements(); i++) {
+    comp += abs(a[i]-b[i]);
+    root_sum += abs(a[i]*conj(b[i]));
+    }
+    comp /= root_sum;*/
+
+  Gadgetron::norm1(diff, comp);
+
+  std::complex<float> r;
+  Gadgetron::dotc(*A, *B, r);
+  comp /= std::abs(r);
+  return comp;
+}
+
+/**
+   Zero out everything except upper triangualar 
+*/
+void zero_tril(hoNDArray< std::complex<float> >* A)
+{
+  size_t rows = A->get_size(0);
+  size_t cols = A->get_size(1);
+  std::complex<float>* d = A->get_data_ptr();
+  for (size_t c = 0; c < cols; c++) {
+    for (size_t r = 0; r < c; r++) {
+      d[r*cols+c] = std::complex<float>(0.0,0.0);
+    } 
+  }
+}
+
+template <typename T> 
+bool multiplyOwn(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r)
+{
+  try
+    {
+      GADGET_DEBUG_CHECK_RETURN_FALSE(x.get_number_of_elements()==y.get_number_of_elements());
+      if ( r.get_number_of_elements()!=x.get_number_of_elements())
+        {
+	  r = x;
+        }
+
+      long long N = (long long)x.get_number_of_elements();
+      long long n;
+
+      const T* pX = x.begin();
+      const T* pY = y.begin();
+      T* pR = r.begin();
+
+      if ( pR == pX )
+        {
+#pragma omp parallel for default(none) private(n) shared(N, pX, pY, pR)
+	  for ( n=0; n<(long long)N; n++ )
+            {
+	      pR[n] *= pY[n];
+            }
+        }
+      else if ( pR == pY )
+        {
+#pragma omp parallel for default(none) private(n) shared(N, pX, pY, pR)
+	  for ( n=0; n<(long long)N; n++ )
+            {
+	      pR[n] *= pX[n];
+            }
+        }
+      else
+        {
+#pragma omp parallel for default(none) private(n) shared(N, pX, pY, pR)
+	  for ( n=0; n<(long long)N; n++ )
+            {
+	      pR[n] = pX[n] * pY[n];
+            }
+        }
+    }
+  catch(...)
+    {
+      GADGET_ERROR_MSG("Error happened in multiply(const hoNDArray<T>& x, const hoNDArray<T>& y, hoNDArray<T>& r) ... ");
+      return false;
+    }
+
+  return true;
+}
+
+void vecMult(size_t N, std::complex<float>* a, 
+	     std::complex<float>*b, std::complex<float>*c)
+{
+  long long i;
+#pragma omp parallel for private(i)
+  for (i = 0; i < (long long)N; i++) {
+    const std::complex<float>& a1 = a[i];
+    const std::complex<float>& b1 = b[i];
+    const float re1 = a1.real();
+    const float im1 = a1.imag();
+    const float re2 = b1.real();
+    const float im2 = b1.imag();
+
+#ifdef WIN32
+    c[i].real(re1*re2-im1*im2);
+    c[i].imag(re1*im2+im1*re2);
+#else
+    c[i].real() = (re1*re2-im1*im2);
+    c[i].imag() = (re1*im2+im1*re2);
+#endif // WIN32
+  }
+}
+
+template <typename T> T hoNDArray_norm1(hoNDArray< std::complex<T> > * a)
+{
+  size_t N = a->get_number_of_elements();
+  std::complex<T>* a_ptr = a->get_data_ptr();
+
+  size_t i;
+  T sum = 0.0;
+  for (i = 0; i < N; i++) {
+    const std::complex<T>& c = a_ptr[i];
+    const T re = c.real();
+    const T im = c.imag();
+    sum += std::sqrt( (re*re) + (im * im) );
+  }
+  
+  return sum;
+}
+
+template <typename T> double hoNDArray_norm1_2(hoNDArray< std::complex<T> > * a)
+{
+  size_t N = a->get_number_of_elements();
+  std::complex<T>* a_ptr = a->get_data_ptr();
+
+  long long i;
+  T sum = 0.0;
+#pragma omp parallel for reduction(+:sum)
+  for (i = 0; i < N; i++) {
+    const std::complex<T>& c = a_ptr[i];
+    const T re = c.real();
+    const T im = c.imag();
+    sum += std::sqrt( (re*re) + (im * im) );
+  }
+  
+  return sum;
+}
+
+template <typename T> double hoNDArray_norm2_2
+(hoNDArray< std::complex<T> > * a)
+{
+  size_t N = a->get_number_of_elements();
+  std::complex<T>* a_ptr = a->get_data_ptr();
+  long long i;
+  T sum(0);
+#pragma omp parallel for reduction(+:sum)
+  for (i = 0; i < N; i++) {
+    const std::complex<T>& c = a_ptr[i];
+    const T re = c.real();
+    const T im = c.imag();
+    sum += ( (re*re) + (im * im) );
+  }  
+  return std::sqrt(sum);
+}
+
+template<typename T> 
+bool fftw_fft2(hoNDArray< std::complex<T> >& a, hoNDArray< std::complex<T> >& r, bool forward)
+{
+  r = a;
+
+  int n0 = a.get_size(1);
+  int n1 = a.get_size(0);
+
+  T fftRatio = 1.0/std::sqrt( T(n0*n1) );
+
+  size_t num = a.get_number_of_elements()/(n0*n1);
+  std::cout << "Number of FFTs: " << num << std::endl;
+  long long n;
+
+  if ( typeid(T) == typeid(float) )
+    {
+      fftwf_init_threads();
+      fftwf_plan_with_nthreads(omp_get_max_threads());
+
+      fftwf_plan p;
+
+      {
+	GadgetronTimer tp("FFTW Planning", true);
+	// mutex_.lock();
+	if ( forward )
+	  {
+	    p = fftwf_plan_dft_2d(n0, n1,
+				  reinterpret_cast<fftwf_complex*>(a.begin()), 
+				  reinterpret_cast<fftwf_complex*>(r.begin()),
+				  FFTW_FORWARD, FFTW_ESTIMATE);
+	  }
+	else
+	  {
+	    p = fftwf_plan_dft_2d(n0, n1,
+				  reinterpret_cast<fftwf_complex*>(a.begin()), 
+				  reinterpret_cast<fftwf_complex*>(r.begin()),
+				  FFTW_BACKWARD, FFTW_ESTIMATE);
+	  }
+	// mutex_.unlock();
+      }
+
+      {
+	GadgetronTimer t("FFT loop time", true); 
+	//#pragma omp parallel for private(n) shared(num, p, a, n0, n1, r)
+	  for ( n=0; n<num; n++ )
+	    {
+	      fftwf_execute_dft(p, reinterpret_cast<fftwf_complex*>(a.begin()+n*n0*n1), 
+				reinterpret_cast<fftwf_complex*>(r.begin()+n*n0*n1));
+	    }
+      }
+
+      {
+	// mutex_.lock();
+	fftwf_destroy_plan(p);
+	// mutex_.unlock();
+      }
+      fftwf_cleanup_threads();
+    }
+  else if ( typeid(T) == typeid(double) )
+    {
+      fftw_init_threads();
+      fftw_plan_with_nthreads(omp_get_max_threads());
+      fftw_plan p;
+
+      {
+	// mutex_.lock();
+	if ( forward )
+	  {
+	    p = fftw_plan_dft_2d(n0, n1,
+				 reinterpret_cast<fftw_complex*>(a.begin()), 
+				 reinterpret_cast<fftw_complex*>(r.begin()),
+				 FFTW_FORWARD, FFTW_ESTIMATE);
+	  }
+	else
+	  {
+	    p = fftw_plan_dft_2d(n0, n1,
+				 reinterpret_cast<fftw_complex*>(a.begin()), 
+				 reinterpret_cast<fftw_complex*>(r.begin()),
+				 FFTW_BACKWARD, FFTW_ESTIMATE);
+	  }
+	// mutex_.unlock();
+      }
+
+      //z#pragma omp parallel for private(n) shared(num, p, a, n0, n1, r)
+        for ( n=0; n<num; n++ )
+	  {
+            fftw_execute_dft(p, reinterpret_cast<fftw_complex*>(a.begin()+n*n0*n1), 
+			     reinterpret_cast<fftw_complex*>(r.begin()+n*n0*n1));
+	  }
+
+        {
+	  // mutex_.lock();
+	  fftw_destroy_plan(p);
+	  fftw_cleanup_threads();
+	  // mutex_.unlock();
+        }
+    }
+
+  {
+    GadgetronTimer tt("FFT Scaling", true);
+    Gadgetron::hoNDArray_scal(std::complex<float>(fftRatio,0.0), &r);
+    //r *= fftRatio;
+  }
+  return true;
+}
+
+/*
+ *   Simple test program for linear algebra routines.
+ */
+int main(int argc, char** argv)
+{
+  std::cout << "Simple test of linear algebra routines" << std::endl;
+  if (argc != 2) {
+    std::cout << "Usage: linalg_test <folder_with_test_data>" << std::endl;
+    return -1;
+  }
+
+  std::string filenameA = std::string(argv[1]) + std::string("/A.cplx");
+  std::string filenameB = std::string(argv[1]) + std::string("/B.cplx");
+  std::string filenameC1 = std::string(argv[1]) + std::string("/C1.cplx");
+  std::string filenameC2 = std::string(argv[1]) + std::string("/C2.cplx");
+  std::string filenameS = std::string(argv[1]) + std::string("/S.cplx");
+  std::string filenameS_chol = std::string(argv[1]) + std::string("/S_chol.cplx");
+  std::string filenameS_chol_inv = std::string(argv[1]) + std::string("/S_chol_inv.cplx");
+
+  boost::shared_ptr< hoNDArray<std::complex<float> > > A = read_nd_array< std::complex<float> >(filenameA.c_str());
+  boost::shared_ptr< hoNDArray<std::complex<float> > > B = read_nd_array< std::complex<float> >(filenameB.c_str());
+  boost::shared_ptr< hoNDArray<std::complex<float> > > C1 = read_nd_array< std::complex<float> >(filenameC1.c_str());
+  boost::shared_ptr< hoNDArray<std::complex<float> > > C2 = read_nd_array< std::complex<float> >(filenameC2.c_str());
+
+  boost::shared_ptr< hoNDArray<std::complex<float> > > S = read_nd_array< std::complex<float> >(filenameS.c_str());
+  boost::shared_ptr< hoNDArray<std::complex<float> > > S_chol = read_nd_array< std::complex<float> >(filenameS_chol.c_str());
+  boost::shared_ptr< hoNDArray<std::complex<float> > > S_chol_inv = read_nd_array< std::complex<float> >(filenameS_chol_inv.c_str());
+
+
+  hoNDArray<std::complex<float> > a(*A);
+  hoNDArray<std::complex<float> > b(*A);
+  hoNDArray<std::complex<float> > res;
+  std::complex<float> alpha(1.0,0);
+  std::complex<float> beta(1.0,0);
+  float rn;
+
+
+  GADGET_MSG("------------------------------------------------------------------");
+  GADGET_MSG("matrix multiplication");
+  GADGET_MSG("------------------------------------------------------------------");
+
+  {
+    GadgetronTimer t("GEMM Time (system)", true);
+    hoNDArray_gemm( A.get(), B.get(), alpha,  C1.get(), beta);
+    std::cout << C1->get_size(0) << ", " << C1->get_size(1) << ", " << C1->get_number_of_elements() << std::endl;
+  }
+
+  {
+    GadgetronTimer t("GEMM Time (MKL)", true);
+    gemm( *C1.get(), *B.get(), *A.get());
+    std::cout << C1->get_size(0) << ", " << C1->get_size(1) << ", " << C1->get_number_of_elements() << std::endl;
+  }
+
+  double diff;
+  {
+    GadgetronTimer compare("CompareTime", true);
+    diff = mcompare(C1.get(),C2.get());
+  }
+
+  if (diff > DIFF_LIMIT) {
+    std::cout << "Complex GEMM FAILED with diff: " << diff << std::endl;
+    return -1;
+  } else {
+    std::cout << "Complex GEMM SUCCESS with diff: " << diff << std::endl;
+  }
+
+  std::vector<size_t> dims;
+  dims.push_back(A->get_size(1));
+  dims.push_back(B->get_size(0));
+  hoNDArray<std::complex<float> > Cres(dims);
+  //std::cout << Cres.get_size(0) << ", " << Cres.get_size(1) << ", " << Cres.get_number_of_elements() << std::endl;
+  {
+    GadgetronTimer t("GEMM Time (Armadillo)", true);
+    arma::Mat<arma::cx_float> armaA = as_arma_matrix(A.get());
+    arma::Mat<arma::cx_float> armaB = as_arma_matrix(B.get());
+    arma::Mat<arma::cx_float> armaCres = as_arma_matrix(&Cres);
+    //std::cout << "A: Nrows: " << armaA.n_rows << ", Ncols: " << armaA.n_cols << ", Nelem: " << armaA.n_elem << std::endl;
+    //std::cout << "B: Nrows: " << armaB.n_rows << ", Ncols: " << armaB.n_cols << ", Nelem: " << armaB.n_elem << std::endl;
+    //std::cout << "C: Nrows: " << armaCres.n_rows << ", Ncols: " << armaCres.n_cols << ", Nelem: " << armaCres.n_elem << std::endl;
+    armaCres = armaB * armaA;
+  }
+
+  {
+    GadgetronTimer compare("CompareTime Armadillo", true);
+    diff = mcompare(&Cres,C2.get());
+  }
+
+  if (diff > DIFF_LIMIT) {
+    std::cout << "Armadillo Complex GEMM FAILED with diff: " << diff << std::endl;
+    return -1;
+  } else {
+    std::cout << "Armadillo Complex GEMM SUCCESS with diff: " << diff << std::endl;
+  }
+
+  {
+    GadgetronTimer t("Write time", true);
+    write_nd_array< std::complex<float> >(C1.get(), "C2_calc.cplx");
+  }
+
+
+  hoNDArray_choldc(S.get());
+  zero_tril(S.get());
+
+  write_nd_array< std::complex<float> >(S.get(), "S_chol_calc.cplx");
+
+  diff = mcompare(S.get(),S_chol.get());
+  if (diff > DIFF_LIMIT) {
+    std::cout << "Complex Cholesky decomposition FAILED with diff: " << diff << std::endl;
+    return -1;
+  } else {
+    std::cout << "Complex Cholesky decomposition SUCCESS with diff: " << diff << std::endl;
+  }
+
+  hoNDArray_inv_lower_triangular(S.get());
+
+  write_nd_array< std::complex<float> >(S.get(), "S_chol_inv_calc.cplx");
+
+  diff = mcompare(S.get(),S_chol_inv.get());
+  if (diff > DIFF_LIMIT) {
+    std::cout << "Complex Triangular inversion FAILED with diff: " << diff << std::endl;
+    return -1;
+  } else {
+    std::cout << "Complex Triangular inversion SUCCESS with diff: " << diff << std::endl;
+  }
+
+  GADGET_MSG("------------------------------------------------------------------");
+  GADGET_MSG("vector add");
+  GADGET_MSG("------------------------------------------------------------------");
+  {
+    GadgetronTimer t("allocate res", true);
+    res = a;
+  }
+
+  {
+    GadgetronTimer t("axpy Time (system)", true);
+    Gadgetron::axpy( alpha, &a, &res);
+  }
+
+  {
+    GadgetronTimer t("linalg (direct BLAS) axpy Time (system)", true);
+    Gadgetron::hoNDArray_axpy( &alpha, &a, &res);
+  }
+
+  {
+    GadgetronTimer t("operator +", true);
+    res += a;
+  }
+
+  {
+    GadgetronTimer t("vzAdd Time (MKL)", true);
+    Gadgetron::add( *A.get(), *A.get(), res);
+  }
+
+  GADGET_MSG("------------------------------------------------------------------");
+  GADGET_MSG("vector multiplication");
+  GADGET_MSG("------------------------------------------------------------------");
+
+  {
+    GadgetronTimer t("operator *", true);
+    res *= a;
+  }
+
+  {
+    GadgetronTimer t("vzMul Time (MKL)", true);
+    Gadgetron::multiply( a, b, res);
+  }
+
+  {
+    GadgetronTimer t("multiplyOwn Time (openMP)", true);
+    multiplyOwn( a, b, res);
+  }
+
+  {
+    GadgetronTimer t("vecMult (vectorized)", true);
+    vecMult( a.get_number_of_elements(), a.get_data_ptr(), b.get_data_ptr(), res.get_data_ptr());
+  }
+
+  GADGET_MSG("------------------------------------------------------------------");
+  GADGET_MSG("norm2");
+  GADGET_MSG("------------------------------------------------------------------");
+
+  {
+    GadgetronTimer t("nrm2", true);
+    rn = Gadgetron::nrm2(&a);
+  }
+  std::cout << "nrm2 = " << rn << std::endl;
+	
+  {
+    GadgetronTimer t("Time (MKL)", true);
+    Gadgetron::norm2( a, rn);
+  }
+  std::cout << "nrm2 = " << rn << std::endl;
+
+  {
+    GadgetronTimer t("Time (DIRECT BLAS)", true);
+    rn = Gadgetron::hoNDArray_norm2(&a);
+  }
+  std::cout << "nrm2 = " << rn << std::endl;
+
+  {
+    GadgetronTimer t("Time (vectorized OMP reduction)", true);
+    rn = hoNDArray_norm2_2(&a);
+  }
+  std::cout << "nrm2 = " << rn << std::endl;
+
+  GADGET_MSG("------------------------------------------------------------------");
+  GADGET_MSG("norm1");
+  GADGET_MSG("------------------------------------------------------------------");
+
+  {
+    GadgetronTimer t("nrm1", true);
+    rn = Gadgetron::nrm1(&a);
+  }
+  std::cout << "nrm1 = " << rn << std::endl;
+
+  {
+    GadgetronTimer t("Time (MKL)", true);
+    Gadgetron::norm1( a, rn);
+  }
+  std::cout << "nrm1 = " << rn << std::endl;
+
+  {
+    GadgetronTimer t("Time (local unthreaded)", true);
+    rn = hoNDArray_norm1(&a);
+  }
+  std::cout << "nrm1 = " << rn << std::endl;
+
+  {
+    GadgetronTimer t("Time (OMP reduction)", true);
+    rn = hoNDArray_norm1_2(&a);
+  }
+
+  std::cout << "nrm1 = " << rn << std::endl;
+
+  GADGET_MSG("------------------------------------------------------------------");
+  GADGET_MSG("axpy");
+  GADGET_MSG("------------------------------------------------------------------");
+
+  {
+    GadgetronTimer t("axpy Time (system)", true);
+    Gadgetron::axpy( alpha, &a, &res);
+  }
+
+  {
+    GadgetronTimer t("linalg (direct BLAS) axpy Time (system)", true);
+    Gadgetron::hoNDArray_axpy( &alpha, &a, &res);
+  }
+
+  {
+    GadgetronTimer t("axpy Time (MKL)", true);
+    Gadgetron::axpy( alpha, a, b, res);
+  }
+
+  GADGET_MSG("------------------------------------------------------------------");
+  GADGET_MSG("fft 2D");
+  GADGET_MSG("------------------------------------------------------------------");
+
+  {
+    GadgetronTimer t("fft2 (MKL)", true);
+    hoNDFFT<float>::instance()->fft2(a, res);
+  }
+  Gadgetron::norm2(res, rn); GADGET_MSG("rn = " << rn);
+
+  {
+    GadgetronTimer t("fftw_fft2", true);
+    fftw_fft2(a, res, true);
+  }
+  Gadgetron::norm2(res, rn); GADGET_MSG("rn = " << rn);
+
+  return 0;
+}
+
+
+
+
diff --git a/toolboxes/linalg/matrix_decomposition.cpp b/toolboxes/linalg/matrix_decomposition.cpp
new file mode 100644
index 0000000..4110355
--- /dev/null
+++ b/toolboxes/linalg/matrix_decomposition.cpp
@@ -0,0 +1,364 @@
+/*
+ * matrix_decomposition.cpp
+ *
+ *  Created on: Dec 10, 2011
+ *      Author: Michael S. Hansen
+ */
+
+#include "matrix_decomposition.h"
+#include <complex>
+#include "complext.h"
+#include "hoNDArray_utils.h"
+
+//Declaration of lapack routines
+extern "C" {
+
+//Cholesky decomposition of symmetric/hermitian positive definite matrix
+void spotrf_(char* UPLO, int* N, void* A, int* LDA, int* info);
+void dpotrf_(char* UPLO, int* N, void* A, int* LDA, int* info);
+void cpotrf_(char* UPLO, int* N, void* A, int* LDA, int* info);
+void zpotrf_(char* UPLO, int* N, void* A, int* LDA, int* info);
+
+
+//Inverse of triangular matrix
+void strtri_( char* UPLO, char* DIAG, int* N, void* A, int* LDA, int* INFO );
+void dtrtri_( char* UPLO, char* DIAG, int* N, void* A, int* LDA, int* INFO );
+void ctrtri_( char* UPLO, char* DIAG, int* N, void* A, int* LDA, int* INFO );
+void ztrtri_( char* UPLO, char* DIAG, int* N, void* A, int* LDA, int* INFO );
+
+//SVD
+void sgesvd_(char* JOBU, char* JOBVT, int* M, int* N, void* A,
+		int* LDA, void* S, void* U, int* LDU, void* VT,
+		int* LDVT, void* WORK, int* LWORK, void* RWORK, int* INFO);
+
+void dgesvd_(char* JOBU, char* JOBVT, int* M, int* N, void* A,
+		int* LDA, void* S, void* U, int* LDU, void* VT,
+		int* LDVT, void* WORK, int* LWORK, void* RWORK, int* INFO);
+
+void cgesvd_(char* JOBU, char* JOBVT, int* M, int* N, void* A,
+		int* LDA, void* S, void* U, int* LDU, void* VT,
+		int* LDVT, void* WORK, int* LWORK, void* RWORK, int* INFO);
+
+void zgesvd_(char* JOBU, char* JOBVT, int* M, int* N, void* A,
+		int* LDA, void* S, void* U, int* LDU, void* VT,
+		int* LDVT, void* WORK, int* LWORK, void* RWORK, int* INFO);
+}
+
+namespace Gadgetron
+{
+
+void potrf_wrapper(char* UPLO, int* N, float* A, int* LDA, int* info)
+{
+	spotrf_(UPLO, N, A, LDA, info);
+}
+
+void potrf_wrapper(char* UPLO, int* N, double* A, int* LDA, int* info)
+{
+	dpotrf_(UPLO, N, A, LDA, info);
+}
+
+void potrf_wrapper(char* UPLO, int* N, std::complex<float>* A, int* LDA, int* info)
+{
+	cpotrf_(UPLO, N, A, LDA, info);
+}
+
+void potrf_wrapper(char* UPLO, int* N, std::complex<double>* A, int* LDA, int* info)
+{
+	zpotrf_(UPLO, N, A, LDA, info);
+}
+
+template <typename T> void hoNDArray_choldc(hoNDArray<T>* A)
+{
+	/*
+	 *  We are specifying Upper Triangular,
+	 *  but matrix comes in transposed (row-major) compared to
+	 *  Fortran column-major order. As a result, we will get the lower
+	 *  triangular matrix.
+	 */
+	char UPLO = 'U';
+	if (A->get_number_of_dimensions() != 2) {
+		throw std::runtime_error("This is not a matrix, only two dimensions allowed");
+	}
+
+	int N = A->get_size(0);
+	if (N != A->get_size(1)) {
+		throw std::runtime_error("Matrix is not symmetric.");
+	}
+
+	int LDA = N;
+	int info = 0;
+
+	potrf_wrapper(&UPLO, &N, A->get_data_ptr(), &LDA, &info);
+
+	if (info != 0) {
+		throw std::runtime_error("Error calling _potrf wrapper routine.");
+	}
+
+	/* Temp code to zero upper triangular */
+	/*
+	T* d = A->get_data_ptr();
+	for (unsigned int i = 0; i < N; i++) {
+		for (unsigned int j = i+1; j < N; j++) {
+			d[i*N+j] = 0;
+		}
+	}
+	*/
+
+}
+
+
+//Template instanciations
+template EXPORTLINALG void hoNDArray_choldc(hoNDArray< std::complex<float> >* A);
+template EXPORTLINALG void hoNDArray_choldc(hoNDArray< std::complex<double> >* A);
+template EXPORTLINALG void hoNDArray_choldc(hoNDArray< float >* A);
+template EXPORTLINALG void hoNDArray_choldc(hoNDArray< double >* A);
+
+
+void trtri_wrapper(char* UPLO, char* DIAG, int* N, float* A, int* LDA, int* info)
+{
+	strtri_(UPLO, DIAG, N, A, LDA, info);
+}
+
+void trtri_wrapper(char* UPLO, char* DIAG, int* N, double* A, int* LDA, int* info)
+{
+	dtrtri_(UPLO, DIAG, N, A, LDA, info);
+}
+
+void trtri_wrapper(char* UPLO, char* DIAG, int* N, std::complex<float>* A, int* LDA, int* info)
+{
+	ctrtri_(UPLO, DIAG, N, A, LDA, info);
+}
+
+void trtri_wrapper(char* UPLO, char* DIAG, int* N, std::complex<double>* A, int* LDA, int* info)
+{
+	ztrtri_(UPLO, DIAG, N, A, LDA, info);
+}
+
+template <typename T> void hoNDArray_inv_lower_triangular(hoNDArray<T>* A)
+{
+	const char* fname = "hoNDArray_inv_lower_triangular(hoNDArray<T>* A)";
+
+	if (A->get_number_of_dimensions() != 2) {
+		throw std::runtime_error("Error array is not 2 dimensional.");
+	}
+
+	int N = A->get_size(0);
+
+	if (N != A->get_size(1)) {
+		throw std::runtime_error("Error array is not 2 dimensional.");
+	}
+
+	int LDA = N;
+	char UPLO = 'U'; //We are passing upper, but matrix is really lower. This is do deal with row and column major order differences
+	char DIAG = 'N';
+	int info;
+
+	trtri_wrapper(&UPLO, &DIAG, &N, A->get_data_ptr(), &LDA, &info);
+
+	if (info != 0) {
+		throw std::runtime_error("Error inverting triangular matrix.");
+	}
+
+}
+
+template EXPORTLINALG void hoNDArray_inv_lower_triangular(hoNDArray<float>* A);
+template EXPORTLINALG void hoNDArray_inv_lower_triangular(hoNDArray<double>* A);
+template EXPORTLINALG void hoNDArray_inv_lower_triangular(hoNDArray< std::complex<float> >* A);
+template EXPORTLINALG void hoNDArray_inv_lower_triangular(hoNDArray< std::complex<double> >* A);
+
+
+template<typename T>
+boost::shared_ptr<hoNDArray<T> > hoNDArray_transpose(hoNDArray<T> *A, bool copy_data = true)
+{
+	const char* fname = "hoNDArray_transpose(hoNDArray<T> *A, bool copy_data = true)";
+
+	boost::shared_ptr<hoNDArray<T> > ret_val;
+
+	if (A->get_number_of_dimensions() != 2) {
+		throw std::runtime_error("Error array is not 2 dimensional.");
+		return ret_val;
+	}
+
+	std::vector<size_t> permute_order(2);
+	permute_order[0] = 1;permute_order[1] = 0;
+
+	std::vector<size_t> perm_dims(2);
+	perm_dims[0] = A->get_size(1);
+	perm_dims[1] = A->get_size(0);
+
+	ret_val.reset(new hoNDArray<T>);
+	ret_val.get()->create(&perm_dims);
+
+	if (copy_data) {
+	  permute(A,ret_val.get(),&permute_order);
+	}
+	return ret_val;
+}
+
+void gesvd_wrapper(char* JOBU, char* JOBVT, int* M, int* N, float* A,
+		int* LDA, float* S, float* U, int* LDU, float* VT,
+		int* LDVT, float* WORK, int* LWORK, float* RWORK, int* INFO)
+{
+	sgesvd_(JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, RWORK, INFO);
+}
+
+void gesvd_wrapper(char* JOBU, char* JOBVT, int* M, int* N, double* A,
+		int* LDA, double* S, double* U, int* LDU, double* VT,
+		int* LDVT, double* WORK, int* LWORK, double* RWORK, int* INFO)
+{
+	dgesvd_(JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, RWORK, INFO);
+}
+
+void gesvd_wrapper(char* JOBU, char* JOBVT, int* M, int* N, std::complex<float>* A,
+		int* LDA, float* S, std::complex<float>* U, int* LDU, std::complex<float>* VT,
+		int* LDVT, std::complex<float>* WORK, int* LWORK, float* RWORK, int* INFO)
+{
+	cgesvd_(JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, RWORK, INFO);
+}
+
+void gesvd_wrapper(char* JOBU, char* JOBVT, int* M, int* N, std::complex<double>* A,
+		int* LDA, double* S, std::complex<double>* U, int* LDU, std::complex<double>* VT,
+		int* LDVT, std::complex<double>* WORK, int* LWORK, double* RWORK, int* INFO)
+{
+	zgesvd_(JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, RWORK, INFO);
+}
+
+
+template<typename T> void hoNDArray_svd(hoNDArray<T> *A,
+		hoNDArray< T > *U, hoNDArray< typename realType<T>::Type > *S, hoNDArray< T > *VT)
+{
+
+
+	if (A->get_number_of_dimensions() != 2) {
+		throw std::runtime_error(" Error array A is not 2 dimensional.");
+	}
+
+	boost::shared_ptr< hoNDArray< T > > A_t = hoNDArray_transpose(A);
+	if (!A_t.get()) {
+		throw std::runtime_error("Transpose of input failed.");
+	}
+
+	int M = A_t->get_size(0);
+	int N = A_t->get_size(1);
+    int min_M_N = M > N ? N : M;
+    int max_M_N = M < N ? N : M;
+
+	T* A_ptr = A_t.get()->get_data_ptr();
+	if (!A_ptr) {
+		throw std::runtime_error( "Data array pointer is undefined.");
+
+	}
+
+
+	boost::shared_ptr< hoNDArray< T > > U_t;
+	boost::shared_ptr< hoNDArray< T > > VT_t;
+
+	char JOBU, JOBVT;
+	T* U_ptr = 0;
+	T* VT_ptr = 0;
+	typename realType<T>::Type * S_ptr = 0;
+
+	if (S) {
+		if (S->get_number_of_elements() < min_M_N) {
+			throw std::runtime_error("S is too small.");
+
+		}
+		S_ptr = S->get_data_ptr();
+	} else {
+		throw std::runtime_error("Null pointer detected for S.");
+
+	}
+
+	int LDU = 1;
+	if (U) {
+		if (U->get_number_of_dimensions() != 2) {
+			throw std::runtime_error("Error array U is not 2 dimensional.");
+
+		}
+
+		U_t = hoNDArray_transpose(U, false);
+
+		if (U_t.get()->get_size(0) != M) {
+			throw std::runtime_error("Number of rows in U is not equal to number of rows in A");
+		}
+
+		if (U_t.get()->get_size(1) == M) {
+			JOBU = 'A';
+		} else if (U_t.get()->get_size(1) == min_M_N) {
+			JOBU = 'S';
+		} else {
+			throw std::runtime_error("Invalid number of columns of U");
+		}
+
+		U_ptr = U_t.get()->get_data_ptr();
+		LDU = U_t.get()->get_size(0);
+	} else {
+		JOBU = 'N';
+	}
+
+	int LDVT = 1;
+	if (VT) {
+		if (VT->get_number_of_dimensions() != 2) {
+			throw std::runtime_error("Error array VT is not 2 dimensional.");
+		}
+
+		VT_t = hoNDArray_transpose(VT, false);
+
+		if (VT_t.get()->get_size(0) == N) {
+			JOBVT = 'A';
+		} else if (VT_t.get()->get_size(0) == min_M_N) {
+			JOBVT = 'S';
+		} else {
+			throw std::runtime_error("Invalid number of rows of VT");
+		}
+
+		VT_ptr = VT_t.get()->get_data_ptr();
+		LDVT = VT_t.get()->get_size(0);
+
+	} else {
+		JOBVT = 'N';
+	}
+
+	//Lets allocate some work storage
+	std::vector<size_t> work_dim(1);
+
+	int LWORK = 5*2*min_M_N+max_M_N;
+	work_dim[0] = LWORK;
+
+	hoNDArray< T > WORK(&work_dim);
+	work_dim[0] = 5*min_M_N;
+	hoNDArray< typename realType<T>::Type > RWORK(&work_dim);
+
+	//Now we are finally ready to call the SVD
+	int INFO;
+
+	gesvd_wrapper(&JOBU, &JOBVT, &M, &N, A_ptr,
+			&M, S_ptr, U_ptr, &LDU, VT_ptr,
+			&LDVT, WORK.get_data_ptr(),
+			&LWORK, RWORK.get_data_ptr(), &INFO);
+
+	if (INFO != 0) {
+		std::stringstream ss;
+		ss << "Call to gesvd failed, INFO = " << INFO << "";
+		throw std::runtime_error(ss.str());
+	}
+
+	std::vector<size_t> permute_order(2);
+	permute_order[0] = 1;permute_order[1] = 0;
+
+	if (U) {
+	  permute(U_t.get(), U, &permute_order);
+	}
+
+	if (VT) {
+	  permute(VT_t.get(), VT, &permute_order);
+	}
+}
+
+//Template instanciations
+template EXPORTLINALG void hoNDArray_svd(hoNDArray< float > *A, hoNDArray< float > *U, hoNDArray< float > *S, hoNDArray< float > *VT);
+template EXPORTLINALG void hoNDArray_svd(hoNDArray< double > *A, hoNDArray< double > *U, hoNDArray< double > *S, hoNDArray< double > *VT);
+template EXPORTLINALG void hoNDArray_svd(hoNDArray< std::complex<float> > *A, hoNDArray< std::complex<float> > *U, hoNDArray< float > *S, hoNDArray< std::complex<float> > *VT);
+template EXPORTLINALG void hoNDArray_svd(hoNDArray< std::complex<double> > *A, hoNDArray< std::complex<double> > *U, hoNDArray< double > *S, hoNDArray< std::complex<double> > *VT);
+
+} //Namespace Gadgetron
diff --git a/toolboxes/linalg/matrix_decomposition.h b/toolboxes/linalg/matrix_decomposition.h
new file mode 100644
index 0000000..dff47a2
--- /dev/null
+++ b/toolboxes/linalg/matrix_decomposition.h
@@ -0,0 +1,42 @@
+/*
+ * matrix_decomposition.h
+ *
+ *  Created on: Dec 10, 2011
+ *      Author: Michael S. Hansen
+ */
+
+#ifndef MATRIX_DECOMPOSITION_H_
+#define MATRIX_DECOMPOSITION_H_
+
+#include "hoNDArray.h"
+#include <complex>
+#include "complext.h"
+
+#include "linalg_export.h"
+
+namespace Gadgetron 
+{
+
+/**
+ *   Perform Cholesky decomposition of matrix.
+ *   hoNDArray should be symmetric/hermitian positive definite.
+ *   Matrix will be replaced with lower triangular matrix.
+ *   Calls LAPACK subroutine _POTRF
+ *
+ */
+template <typename T> EXPORTLINALG void hoNDArray_choldc(hoNDArray<T>* A);
+
+/**
+ * Invert matrix assuming it is lower trinagular
+ */
+template <typename T> EXPORTLINALG void hoNDArray_inv_lower_triangular(hoNDArray<T>* A);
+
+/**
+ *  SVD
+ */
+template <typename T> EXPORTLINALG void hoNDArray_svd(hoNDArray< T >* A, hoNDArray< T >* U, hoNDArray<typename realType<T>::Type>* S, hoNDArray< T >* VT);
+
+} //Namespace Gadgetron
+
+
+#endif /* MATRIX_DECOMPOSITION_H_ */
diff --git a/toolboxes/linalg/matrix_vector_op.cpp b/toolboxes/linalg/matrix_vector_op.cpp
new file mode 100644
index 0000000..0d34190
--- /dev/null
+++ b/toolboxes/linalg/matrix_vector_op.cpp
@@ -0,0 +1,313 @@
+/*
+ * matrix_vector_op.cpp
+ *
+ *  Created on: Dec 9, 2011
+ *      Author: Michael S. Hansen
+ */
+
+#include "matrix_vector_op.h"
+#include <cstdlib>
+
+//Declaration of BLAS routines
+/*
+ * We will opt to not use the easier CBLAS interface to give us the best change of being compatible on all platforms.
+ * We will declare the BLAS (and LAPACK) routines ourselves.
+ *
+ */
+extern "C" {
+  //GEMM - Generalized matrix-matrix multiplication
+  void sgemm_(char* TRANSA,char* TRANSB,int* M, int *N, int *K, void* ALPHA,
+	      void *A, int* LDA, void* B, int* LDB, void* BETA, void* C, int *LDC);
+  void dgemm_(char* TRANSA,char* TRANSB,int* M, int *N, int *K, void* ALPHA,
+	      void *A, int* LDA, void* B, int* LDB, void* BETA, void* C, int *LDC);
+  void cgemm_(char* TRANSA,char* TRANSB,int* M, int *N, int *K, void* ALPHA,
+	      void *A, int* LDA, void* B, int* LDB, void* BETA, void* C, int *LDC);
+  void zgemm_(char* TRANSA,char* TRANSB,int* M, int *N, int *K, void* ALPHA,
+	      void *A, int* LDA, void* B, int* LDB, void* BETA, void* C, int *LDC);
+  
+  //TRMM - Multiplication with a triangular matrix
+  void strmm_(char* SIDE, char* UPLO, char* TRANSA, char* DIAG, int* M,int* N,
+	      void* ALPHA,void* A,int* LDA,void* B, int* LDB);
+  void dtrmm_(char* SIDE, char* UPLO, char* TRANSA, char* DIAG, int* M,int* N,
+	      void* ALPHA,void* A,int* LDA,void* B, int* LDB);
+  void ctrmm_(char* SIDE, char* UPLO, char* TRANSA, char* DIAG, int* M,int* N,
+	      void* ALPHA,void* A,int* LDA,void* B, int* LDB);
+  void ztrmm_(char* SIDE, char* UPLO, char* TRANSA, char* DIAG, int* M,int* N,
+	      void* ALPHA,void* A,int* LDA,void* B, int* LDB);
+  
+  //AXPY - Generalized matrix-matrix multiplication
+  void saxpy_(int* N, void* A, void* X, int* INCX, void* Y, int* INCY);
+  void daxpy_(int* N, void* A, void* X, int* INCX, void* Y, int* INCY);
+  void caxpy_(int* N, void* A, void* X, int* INCX, void* Y, int* INCY);
+  void zaxpy_(int* N, void* A, void* X, int* INCX, void* Y, int* INCY);
+  
+  //NORM2
+  float snrm2_(int*N, void* X, int* INCX);
+  float scnrm2_(int*N, void* X, int* INCX);
+  double dnrm2_(int*N, void* X, int* INCX);
+  double dznrm2_(int*N, void* X, int* INCX);
+
+  //ASUM
+  float sasum_(int* N, void* SX, int* INCX);
+  float scasum_(int* N, void* SX, int* INCX);
+  double dasum_(int* N, void* SX, int* INCX);
+  double dzasum_(int* N, void* SX, int* INCX);
+
+  //SCAL - Scaling of vector
+  void sscal_(int* N, void* SA, void* SX, int* INCX); 
+  void dscal_(int* N, void* SA, void* SX, int* INCX); 
+  void cscal_(int* N, void* SA, void* SX, int* INCX); 
+  void zscal_(int* N, void* SA, void* SX, int* INCX); 
+}
+
+namespace Gadgetron
+{
+  void cblas_scal_wrapper(int N, float SA, float* SX, int INCX) 
+  {
+    sscal_(&N, &SA, SX, &INCX);
+  }
+
+  void cblas_scal_wrapper(int N, double SA, double* SX, int INCX) 
+  {
+    dscal_(&N, &SA, SX, &INCX);
+  }
+
+  void cblas_scal_wrapper(int N, std::complex<float> SA, std::complex<float>* SX, int INCX) 
+  {
+    cscal_(&N, &SA, SX, &INCX);
+  }
+
+  void cblas_scal_wrapper(int N, std::complex<double> SA, std::complex<double>* SX, int INCX) 
+  {
+    zscal_(&N, &SA, SX, &INCX);
+  }
+
+  template <typename T> void hoNDArray_scal(T SA, hoNDArray<T>* X)
+  {
+    return cblas_scal_wrapper(X->get_number_of_elements(),SA,X->get_data_ptr(),1); 
+  }
+
+  template EXPORTLINALG void hoNDArray_scal(float SA, hoNDArray<float>* X);
+  template EXPORTLINALG void hoNDArray_scal(double SA, hoNDArray<double>* X);
+  template EXPORTLINALG void hoNDArray_scal(std::complex<float> SA, hoNDArray< std::complex<float> >* X);
+  template EXPORTLINALG void hoNDArray_scal(std::complex<double> SA, hoNDArray< std::complex<double> >* X);
+
+
+  double cblas_norm2_wrapper(int N, float* X, int INCX) {
+    return static_cast<double>(snrm2_(&N,X,&INCX));
+  }
+
+  double cblas_norm2_wrapper(int N, double* X, int INCX) {
+    return dnrm2_(&N,X,&INCX);
+  }
+
+  double  cblas_norm2_wrapper(int N, std::complex<float>* X, int INCX) {
+    return static_cast<double>(scnrm2_(&N,X,&INCX));
+  }
+
+  double  cblas_norm2_wrapper(int N, std::complex<double>* X, int INCX) {
+    return dznrm2_(&N,X,&INCX);
+  }
+
+  template <typename T> double hoNDArray_norm2(hoNDArray<T>* X)
+  {
+    return cblas_norm2_wrapper(X->get_number_of_elements(),X->get_data_ptr(),1); 
+  }
+
+  //Template instanciations
+  template EXPORTLINALG double hoNDArray_norm2( hoNDArray<float>* X);
+  template EXPORTLINALG double hoNDArray_norm2( hoNDArray<double>* X);
+  template EXPORTLINALG double hoNDArray_norm2( hoNDArray< std::complex<float> >* X);
+  template EXPORTLINALG double hoNDArray_norm2( hoNDArray< std::complex<double> >* X);
+
+  
+  double cblas_asum_wrapper(int N, float* X, int INCX) {
+    return static_cast<double>(sasum_(&N,X,&INCX));
+  }
+
+  double cblas_asum_wrapper(int N, double* X, int INCX) {
+    return dasum_(&N,X,&INCX);
+  }
+
+  double  cblas_asum_wrapper(int N, std::complex<float>* X, int INCX) {
+    return static_cast<double>(scasum_(&N,X,&INCX));
+  }
+
+  double  cblas_asum_wrapper(int N, std::complex<double>* X, int INCX) {
+    return dzasum_(&N,X,&INCX);
+  }
+
+ template <typename T> double hoNDArray_asum(hoNDArray<T>* X)
+  {
+    return cblas_asum_wrapper(X->get_number_of_elements(),X->get_data_ptr(),1); 
+  }
+
+  //Template instanciations
+  template EXPORTLINALG double hoNDArray_asum( hoNDArray<float>* X);
+  template EXPORTLINALG double hoNDArray_asum( hoNDArray<double>* X);
+  template EXPORTLINALG double hoNDArray_asum( hoNDArray< std::complex<float> >* X);
+  template EXPORTLINALG double hoNDArray_asum( hoNDArray< std::complex<double> >* X);
+
+  void cblas_axpy_wrapper(int N, float* A, float* X, int INCX, float* Y, int INCY)
+  {
+    saxpy_(&N, A, X, &INCX, Y, &INCY);
+  }
+
+  void cblas_axpy_wrapper(int N, double* A, double* X, int INCX, double* Y, int INCY)
+  {
+    daxpy_(&N, A, X, &INCX, Y, &INCY);
+  }
+
+  void cblas_axpy_wrapper(int N, std::complex<float>* A, std::complex<float>* X, int INCX, std::complex<float>* Y, int INCY)
+  {
+    caxpy_(&N, A, X, &INCX, Y, &INCY);
+  }
+
+  void cblas_axpy_wrapper(int N, std::complex<double>* A, std::complex<double>* X, int INCX, std::complex<double>* Y, int INCY)
+  {
+    zaxpy_(&N, A, X, &INCX, Y, &INCY);
+  }
+
+  template <typename T> void hoNDArray_axpy( T* A, hoNDArray<T>* X, hoNDArray<T>* Y)
+  {
+    cblas_axpy_wrapper((int)X->get_number_of_elements(),A,X->get_data_ptr(),1,Y->get_data_ptr(),1);
+  }
+
+  //Template instanciations
+  template EXPORTLINALG void hoNDArray_axpy( float* A, hoNDArray< float >* X, hoNDArray< float>* Y);
+  template EXPORTLINALG void hoNDArray_axpy( double* A, hoNDArray< double >* X, hoNDArray< double>* Y);
+
+  template EXPORTLINALG void hoNDArray_axpy( std::complex<float>* A, hoNDArray< std::complex<float> >* X, 
+					     hoNDArray< std::complex<float> >* Y);
+
+  template EXPORTLINALG void hoNDArray_axpy( std::complex<double>* A, hoNDArray< std::complex<double> >* X, 
+					     hoNDArray< std::complex<double> >* Y);
+
+  void cblas_gemm_wrapper(char TRANSA, char TRANSB, float* A, float* B, float* C,
+			  int M, int N, int K, float* alpha, float* beta)
+  {
+    //We have to flip the arguments here to make it fit with FORTRAN column-major order so that we don't have to transpose
+    sgemm_(&TRANSB, &TRANSA,&N, &M, &K, alpha, B, &N, A, &K, beta, C, &N);
+  }
+  
+  void cblas_gemm_wrapper(char TRANSA, char TRANSB, double* A, double* B, double* C,
+			  int M, int N, int K, double* alpha, double* beta)
+  {
+    //We have to flip the arguments here to make it fit with FORTRAN column-major order so that we don't have to transpose
+    dgemm_(&TRANSB, &TRANSA,&N, &M, &K, alpha, B, &N, A, &K, beta, C, &N);
+  }
+  
+  void cblas_gemm_wrapper(char TRANSA, char TRANSB, std::complex<float>* A, std::complex<float>* B, std::complex<float>* C,
+			  int M, int N, int K, std::complex<float>* alpha, std::complex<float>* beta)
+  {
+    //We have to flip the arguments here to make it fit with FORTRAN column-major order so that we don't have to transpose
+    cgemm_(&TRANSB, &TRANSA,&N, &M, &K, alpha, B, &N, A, &K, beta, C, &N);
+  }
+  
+  void cblas_gemm_wrapper(char TRANSA, char TRANSB, std::complex<double>* A, std::complex<double>* B, std::complex<double>* C,
+			  int M, int N, int K, std::complex<double>* alpha, std::complex<double>* beta)
+  {
+    //We have to flip the arguments here to make it fit with FORTRAN column-major order so that we don't have to transpose
+    zgemm_(&TRANSB, &TRANSA,&N, &M, &K, alpha, B, &N, A, &K, beta, C, &N);
+  }
+  
+  template <typename T> void hoNDArray_gemm( hoNDArray<T>* A, hoNDArray<T>* B, T alpha,  hoNDArray<T>* C, T beta)
+  {
+
+	//Let's first check the dimensions A
+	if (A->get_number_of_dimensions() != 2) {
+		throw std::runtime_error("Invalid number of dimensions in matrix A: ");
+	}
+
+	//Let's first check the dimensions B
+	if (B->get_number_of_dimensions() != 2) {
+		throw std::runtime_error("Invalid number of dimensions in matrix B: ");
+	}
+
+	//Let's first check the dimensions C
+	if (C->get_number_of_dimensions() != 2) {
+		throw std::runtime_error("Invalid number of dimensions in matrix C: ");
+	}
+
+	//Do the dimensions match?
+	int M = A->get_size(1); //Number of rows of A
+	int N = B->get_size(0); //Number of columns of B
+	int K = A->get_size(0); //Number of columns of A
+
+	if (K != static_cast<int>(B->get_size(1))) {
+		std::stringstream ss;
+		ss <<"Number of columns of A (" << K << ") does not match rows of B(" << B->get_size(1) << ")" << std::endl;
+		throw std::runtime_error(ss.str());
+	}
+
+
+	//Is the output matric the right size?
+	if ((C->get_size(0) != N) || (C->get_size(1) != M) ) {
+		std::stringstream ss;
+		ss << "Size of output matrix C (" << C->get_size(0) << " (cols), " <<
+		C->get_size(1) << " (rows))" << " does not match the expected" <<
+		N << "(cols), " << M << "(rows)" << std::endl;
+		throw std::runtime_error(ss.str());
+
+
+	}
+
+	//Now call appropriate CBLAS routine
+	char TRANSA = 'N';
+	char TRANSB = 'N';
+	cblas_gemm_wrapper(TRANSA, TRANSB, A->get_data_ptr(), B->get_data_ptr(), C->get_data_ptr(), M, N, K, &alpha, &beta);
+
+}
+
+//Template instanciations
+template EXPORTLINALG void hoNDArray_gemm( hoNDArray< float>* A, hoNDArray< float >* B, float alpha,  hoNDArray< float >* C, float beta);
+template EXPORTLINALG void hoNDArray_gemm( hoNDArray< double >* A, hoNDArray< double >* B, double alpha,  hoNDArray< double >* C, double beta);
+template EXPORTLINALG void hoNDArray_gemm( hoNDArray< std::complex<float> >* A, hoNDArray< std::complex<float> >* B, std::complex<float> alpha,  hoNDArray< std::complex<float> >* C, std::complex<float> beta);
+template EXPORTLINALG void hoNDArray_gemm( hoNDArray< std::complex<double> >* A, hoNDArray< std::complex<double> >* B, std::complex<double> alpha,  hoNDArray< std::complex<double> >* C, std::complex<double> beta);
+
+void trmm_wrapper(int* M,int* N, float* ALPHA,float* A, void* B) {
+	char SIDE = 'R'; char UPLO = 'U'; char TRANSA = 'N'; char DIAG = 'N';
+	strmm_(&SIDE, &UPLO, &TRANSA, &DIAG, N, M, ALPHA, B, N, A, M);
+}
+
+void trmm_wrapper(int* M,int* N, double* ALPHA, double* A, void* B) {
+	char SIDE = 'R'; char UPLO = 'U'; char TRANSA = 'N'; char DIAG = 'N';
+	dtrmm_(&SIDE, &UPLO, &TRANSA, &DIAG, N, M, ALPHA, B, N, A, M);
+}
+
+void trmm_wrapper(int* M,int* N, std::complex<float>* ALPHA,std::complex<float>* A, void* B) {
+	char SIDE = 'R'; char UPLO = 'U'; char TRANSA = 'N'; char DIAG = 'N';
+	ctrmm_(&SIDE, &UPLO, &TRANSA, &DIAG, N, M, ALPHA, A, M, B, N);
+}
+
+void trmm_wrapper(int* M,int* N, std::complex<double>* ALPHA,std::complex<double>* A, void* B) {
+	char SIDE = 'R'; char UPLO = 'U'; char TRANSA = 'N'; char DIAG = 'N';
+	ztrmm_(&SIDE, &UPLO, &TRANSA, &DIAG, N, M, ALPHA, B, N, A, M);
+}
+
+template <typename T> void hoNDArray_trmm( hoNDArray<T>* A, hoNDArray<T>* B, T alpha)
+{
+	//Let's first check the dimensions A
+	if (A->get_number_of_dimensions() != 2) {
+		throw std::runtime_error("Invalid number of dimensions in matrix A: ");
+	}
+
+	//Let's first check the dimensions B
+	if (B->get_number_of_dimensions() != 2) {
+		throw std::runtime_error("Invalid number of dimensions in matrix B: ");
+	}
+
+	//Do the dimensions match?
+	int M = A->get_size(1); //Number of rows of A
+	int N = B->get_size(0); //Number of columns of B
+
+	trmm_wrapper(&M, &N, &alpha, A->get_data_ptr(), B->get_data_ptr());
+
+	}
+
+template EXPORTLINALG void hoNDArray_trmm( hoNDArray<float>* A, hoNDArray<float>* B, float alpha);
+template EXPORTLINALG void hoNDArray_trmm( hoNDArray<double>* A, hoNDArray<double>* B, double alpha);
+template EXPORTLINALG void hoNDArray_trmm( hoNDArray< std::complex<float> >* A, hoNDArray< std::complex<float> >* B, std::complex<float> alpha);
+template EXPORTLINALG void hoNDArray_trmm( hoNDArray< std::complex<double> >* A, hoNDArray< std::complex<double> >* B, std::complex<double> alpha);
+
+} //Namespace Gadgetron
diff --git a/toolboxes/linalg/matrix_vector_op.h b/toolboxes/linalg/matrix_vector_op.h
new file mode 100644
index 0000000..a209190
--- /dev/null
+++ b/toolboxes/linalg/matrix_vector_op.h
@@ -0,0 +1,52 @@
+/*
+ * matrix_vector_op.h
+ *
+ *  Created on: Dec 9, 2011
+ *      Author: hansenms
+ */
+
+#ifndef MATRIX_VECTOR_OP_H_
+#define MATRIX_VECTOR_OP_H_
+
+#include <hoNDArray.h>
+#include "complext.h"
+
+#include "linalg_export.h"
+
+namespace Gadgetron
+{
+
+  template <typename T> EXPORTLINALG double hoNDArray_norm2(hoNDArray<T>* X);
+  template <typename T> EXPORTLINALG double hoNDArray_asum(hoNDArray<T>* X);
+  template <typename T> EXPORTLINALG void hoNDArray_scal(T SA, hoNDArray<T>* X);
+
+/**
+ *
+ *  Performs C = alpha*(A*B) + beta*C
+ *
+ */
+template <typename T> EXPORTLINALG void hoNDArray_gemm( hoNDArray<T>* A, hoNDArray<T>* B, T alpha,  hoNDArray<T>* C, T beta);
+
+
+/**
+ *
+ *  Performs Y = A*X+Y
+ *
+ */
+ template <typename T> EXPORTLINALG void hoNDArray_axpy( T* A, hoNDArray<T>* X, hoNDArray<T>* Y); 
+
+/**
+ *  Performs B = alpha*A*B
+ *
+ *  A should be lower triangular.
+ *
+ */
+template <typename T> EXPORTLINALG void hoNDArray_trmm( hoNDArray<T>* A, hoNDArray<T>* B, T alpha);
+
+
+ void elementWiseMultiply(int n,  std::complex<float> *a, std::complex<float> *x, std::complex<float> *y);
+
+
+} //namespace gadgetron
+
+#endif /* MATRIX_VECTOR_OP_H_ */
diff --git a/toolboxes/linalg/write_mr_raw.m b/toolboxes/linalg/write_mr_raw.m
new file mode 100755
index 0000000..211f88f
--- /dev/null
+++ b/toolboxes/linalg/write_mr_raw.m
@@ -0,0 +1,18 @@
+function write_mr_raw(input,filename)
+
+msize = size(input);
+
+fid = fopen(filename,'wb');
+fwrite(fid,length(msize),'int32');
+fwrite(fid,msize,'int32');
+
+out = zeros(prod(msize)*2,1);
+
+out(1:2:end) = real(input);
+out(2:2:end) = imag(input);
+
+fwrite(fid,out,'float32');
+
+fclose(fid);
+
+return
\ No newline at end of file
diff --git a/toolboxes/mri/CMakeLists.txt b/toolboxes/mri/CMakeLists.txt
index 88128fd..00478ff 100644
--- a/toolboxes/mri/CMakeLists.txt
+++ b/toolboxes/mri/CMakeLists.txt
@@ -1 +1,6 @@
 add_subdirectory(pmri)
+
+if (ARMADILLO_FOUND)
+   add_subdirectory(epi)
+endif (ARMADILLO_FOUND)
+
diff --git a/toolboxes/mri/epi/CMakeLists.txt b/toolboxes/mri/epi/CMakeLists.txt
new file mode 100644
index 0000000..3888be4
--- /dev/null
+++ b/toolboxes/mri/epi/CMakeLists.txt
@@ -0,0 +1,63 @@
+    if (WIN32)
+        ADD_DEFINITIONS(-D__BUILD_GADGETRON_EPI__)
+        link_directories(${Boost_LIBRARY_DIRS})
+    endif (WIN32)
+
+    include_directories(
+        ${ACE_INCLUDE_DIR} 
+        ${Boost_INCLUDE_DIR}
+        ${FFTW3_INCLUDE_DIR}
+        ${ARMADILLO_INCLUDE_DIRS}
+        ${MKL_INCLUDE_DIR}
+        ${CMAKE_SOURCE_DIR}/toolboxes/mri/pmri/gpu
+        ${CMAKE_SOURCE_DIR}/toolboxes/nfft/gpu
+        ${CMAKE_SOURCE_DIR}/toolboxes/core
+        ${CMAKE_SOURCE_DIR}/toolboxes/core/gpu
+        ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu
+        ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/hostutils
+        ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/math
+        ${CMAKE_SOURCE_DIR}/toolboxes/operators
+        ${CMAKE_SOURCE_DIR}/toolboxes/operators/cpu
+        ${CMAKE_SOURCE_DIR}/toolboxes/solvers
+        ${CMAKE_SOURCE_DIR}/toolboxes/solvers/cpu
+        ${CMAKE_SOURCE_DIR}/gadgets/core
+        ${HDF5_INCLUDE_DIR}
+        ${HDF5_INCLUDE_DIR}/cpp
+        ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/math
+        ${CMAKE_SOURCE_DIR}/toolboxes/registration/optical_flow
+        ${CMAKE_SOURCE_DIR}/toolboxes/gtplus
+        ${CMAKE_SOURCE_DIR}/toolboxes/gtplus/util
+        ${CMAKE_SOURCE_DIR}/toolboxes/gtplus/workflow
+        ${CMAKE_SOURCE_DIR}/toolboxes/gtplus/algorithm
+        ${CMAKE_SOURCE_DIR}/toolboxes/gtplus/solver
+        ${CMAKE_SOURCE_DIR}/toolboxes/gadgettools
+        ${CMAKE_SOURCE_DIR}/apps/gadgetron
+        ${CMAKE_SOURCE_DIR}/apps/matlab
+        ${CMAKE_SOURCE_DIR}/gadgets/mri_core 
+        ${CMAKE_SOURCE_DIR}/gadgets/gtPlus 
+    )
+
+    add_library(gadgetron_toolbox_epi ${LIBTYPE}
+     EPIExport.h
+     EPIReconXObject.h
+     EPIReconXObjectFlat.h
+     EPIReconXObjectTrapezoid.h)
+
+    set_target_properties(gadgetron_toolbox_epi PROPERTIES VERSION ${GADGETRON_VERSION_STRING} SOVERSION ${GADGETRON_SOVERSION})
+    set_target_properties(gadgetron_toolbox_epi PROPERTIES LINKER_LANGUAGE CXX)
+
+    target_link_libraries(gadgetron_toolbox_epi gadgetron_toolbox_cpucore gadgetron_toolbox_cpucore_math ${ARMADILLO_LIBRARIES})
+
+    install (FILES 
+        EPIExport.h 
+        EPIReconXObject.h
+        EPIReconXObjectFlat.h
+        EPIReconXObjectTrapezoid.h
+        DESTINATION include COMPONENT main)
+
+    # install(TARGETS epi DESTINATION lib)
+
+    #if (GTEST_FOUND)
+    #    add_subdirectory(ut)
+    #endif (GTEST_FOUND)
+
diff --git a/toolboxes/mri/epi/EPIExport.h b/toolboxes/mri/epi/EPIExport.h
new file mode 100644
index 0000000..8b827b8
--- /dev/null
+++ b/toolboxes/mri/epi/EPIExport.h
@@ -0,0 +1,20 @@
+/** \file       EPIExport.h
+    \brief      Implement windows export/import for EPI toolbox
+    \author     Souheil Inati
+*/
+
+#pragma once
+
+#if defined (WIN32)
+    #ifdef BUILD_TOOLBOX_STATIC
+        #define EXPORTEPI
+    #else
+        #if defined (__BUILD_GADGETRON_EPI__)
+            #define EXPORTEPI __declspec(dllexport)
+        #else
+            #define EXPORTEPI __declspec(dllimport)
+        #endif
+    #endif
+#else
+    #define EXPORTEPI
+#endif
diff --git a/toolboxes/mri/epi/EPIReconXObject.h b/toolboxes/mri/epi/EPIReconXObject.h
new file mode 100644
index 0000000..c2b4d8e
--- /dev/null
+++ b/toolboxes/mri/epi/EPIReconXObject.h
@@ -0,0 +1,73 @@
+/** \file   EPIReconXObject.h
+    \brief  Define the symbols and implement functionality for EPI X reconstruction operator
+    \author Souheil Inati
+*/
+
+#pragma once
+
+#include "EPIExport.h"
+
+#include "ismrmrd/ismrmrd.h"
+#include "hoNDArray.h"
+
+namespace Gadgetron { namespace EPI {
+
+enum EPIType
+{
+  FLAT,
+  TRAPEZOID,
+  SINUSOID,
+  ARBITRARY
+};
+
+enum EPIReceiverPhaseType
+{
+  NONE,
+  EVEN,
+  FULL
+};
+
+template <typename T> class EPIReconXObject 
+{
+ public:
+  EPIReconXObject();
+  virtual ~EPIReconXObject();
+
+  hoNDArray <float> getTrajectoryPos();
+  hoNDArray <float> getTrajectoryNeg();
+
+  hoNDArray <float> filterPos_;
+  hoNDArray <float> filterNeg_;
+  float slicePosition[3];
+
+  virtual int computeTrajectory()=0;
+
+  virtual int apply(ISMRMRD::AcquisitionHeader &hdr_in,  hoNDArray <T> &data_in, 
+		    ISMRMRD::AcquisitionHeader &hdr_out, hoNDArray <T> &data_out)=0;
+  EPIReceiverPhaseType rcvType_;
+
+ protected:
+  hoNDArray <float> trajectoryPos_;
+  hoNDArray <float> trajectoryNeg_;
+
+};
+
+template <typename T> EPIReconXObject<T>::EPIReconXObject()
+{
+}
+
+template <typename T> EPIReconXObject<T>::~EPIReconXObject()
+{
+}
+
+template <typename T> hoNDArray<float> EPIReconXObject<T>::getTrajectoryPos()
+{
+  return trajectoryPos_;
+}
+
+template <typename T> hoNDArray<float> EPIReconXObject<T>::getTrajectoryNeg()
+{
+  return trajectoryNeg_;
+}
+
+}}
diff --git a/toolboxes/mri/epi/EPIReconXObjectFlat.h b/toolboxes/mri/epi/EPIReconXObjectFlat.h
new file mode 100644
index 0000000..d3899a6
--- /dev/null
+++ b/toolboxes/mri/epi/EPIReconXObjectFlat.h
@@ -0,0 +1,189 @@
+/** \file   EPIReconXObjectFlat.h
+    \brief  Implement functionality for EPI X reconstruction operator for Flat type (non-rampsampled)
+    \author Souheil Inati
+*/
+
+#pragma once
+
+#include "EPIExport.h"
+#include "EPIReconXObject.h"
+#include "hoArmadillo.h"
+#include "hoNDArray_elemwise.h"
+#include "gadgetronmath.h"
+#include <complex>
+
+namespace Gadgetron { namespace EPI {
+
+template <typename T> class EPIReconXObjectFlat : public EPIReconXObject<T>
+{
+ public:
+  EPIReconXObjectFlat();
+  virtual ~EPIReconXObjectFlat();
+
+  virtual int computeTrajectory();
+
+  virtual int apply(ISMRMRD::AcquisitionHeader &hdr_in, hoNDArray <T> &data_in, 
+		    ISMRMRD::AcquisitionHeader &hdr_out, hoNDArray <T> &data_out);
+
+  using EPIReconXObject<T>::filterPos_;
+  using EPIReconXObject<T>::filterNeg_;
+  using EPIReconXObject<T>::slicePosition;
+  using EPIReconXObject<T>::rcvType_;
+
+  int   numSamples_;
+  float dwellTime_;
+  int   encodeNx_;
+  float encodeFOV_;
+  int   reconNx_;
+  float reconFOV_;
+
+ protected:
+  using EPIReconXObject<T>::trajectoryPos_;
+  using EPIReconXObject<T>::trajectoryNeg_;
+
+  hoNDArray <T> Mpos_;
+  hoNDArray <T> Mneg_;
+  bool operatorComputed_;
+
+};
+
+template <typename T> EPIReconXObjectFlat<T>::EPIReconXObjectFlat()
+{
+  rcvType_ = EVEN;
+  numSamples_ = 0.0;
+  dwellTime_ = 0.0;
+  encodeNx_ = 0;
+  reconNx_ = 0;
+  encodeFOV_ = 0.0;
+  reconFOV_ = 0.0;
+  operatorComputed_ = false;
+}
+
+template <typename T> EPIReconXObjectFlat<T>::~EPIReconXObjectFlat()
+{
+}
+
+template <typename T> int EPIReconXObjectFlat<T>::computeTrajectory()
+{
+
+  // Initialize the k-space trajectory arrays
+  trajectoryPos_.create(numSamples_);
+  Gadgetron::clear(trajectoryPos_);
+  trajectoryNeg_.create(numSamples_);
+  Gadgetron::clear(trajectoryNeg_);
+
+  // Temporary trajectory for a symmetric readout
+  // first calculate the integral with G = 1;
+  int nK = numSamples_;
+  hoNDArray <float> k(nK);
+  float t;
+  int n;
+
+  // Some timings
+  float readTime = dwellTime_ * numSamples_;
+  float readArea = readTime;
+
+  // Prephase is set so that k=0 is halfway through the readout time
+  float prePhaseArea = 0.5 * readArea;
+
+  // The scale is set so that the read out area corresponds to the number of encoded points
+  float scale = encodeNx_ /readArea;
+
+  for (n=0; n<nK; n++)
+  {
+    t = (n+1.0)*dwellTime_;  // end of the dwell time
+    k[n] = t;
+  }
+
+  // Fill the positive and negative trajectories
+  for (n=0; n<numSamples_; n++)
+  {
+    trajectoryPos_[n] = scale * (k[n] - prePhaseArea);
+    trajectoryNeg_[n] = scale * (-1.0*k[n] + readArea - prePhaseArea);
+  }
+
+  // reset the operatorComputed_ flag
+  operatorComputed_ = false;
+
+  return(0);
+}
+
+
+template <typename T> int EPIReconXObjectFlat<T>::apply(ISMRMRD::AcquisitionHeader &hdr_in, hoNDArray <T> &data_in, 
+		    ISMRMRD::AcquisitionHeader &hdr_out, hoNDArray <T> &data_out)
+{
+  if (!operatorComputed_) {
+    // Compute the reconstruction operator
+    int Km = floor(encodeNx_ / 2.0);
+    int Ne = 2*Km + 1;
+    int p,q; // counters
+
+    // resize the reconstruction operator
+    Mpos_.create(reconNx_,numSamples_);
+    Mneg_.create(reconNx_,numSamples_);
+
+    // evenly spaced k-space locations
+    arma::vec keven = arma::linspace<arma::vec>(-Km, Km, Ne);
+
+    // image domain locations [-0.5,...,0.5)
+    arma::vec x = arma::linspace<arma::vec>(-0.5,(reconNx_-1.)/(2.*reconNx_),reconNx_);
+
+    // DFT operator
+    // Going from k space to image space, we use the IFFT sign convention
+    arma::cx_mat F(reconNx_, Ne);
+    double fftscale = 1.0 / std::sqrt((double)Ne);
+    for (p=0; p<reconNx_; p++) {
+      for (q=0; q<Ne; q++) {
+	F(p,q) = fftscale * std::exp(std::complex<double>(0.0,1.0*2*M_PI*keven(q)*x(p)));
+      }
+    }
+
+    // forward operators
+    arma::mat Qp(numSamples_, Ne);
+    arma::mat Qn(numSamples_, Ne);
+    for (p=0; p<numSamples_; p++) {
+      //std::cout << trajectoryPos_(p) << "    " << trajectoryNeg_(p) << std::endl;
+      for (q=0; q<Ne; q++) {
+	Qp(p,q) = sinc(trajectoryPos_(p)-keven(q));
+	Qn(p,q) = sinc(trajectoryNeg_(p)-keven(q));
+      }
+    }
+
+    // recon operators
+    arma::cx_mat Mp(reconNx_,numSamples_);
+    arma::cx_mat Mn(reconNx_,numSamples_);
+    Mp = F * arma::pinv(Qp);
+    Mn = F * arma::pinv(Qn);
+    for (p=0; p<reconNx_; p++) {
+      for (q=0; q<numSamples_; q++) {
+        Mpos_(p,q) = Mp(p,q);
+        Mneg_(p,q) = Mn(p,q);
+      }
+    }
+
+    // set the operator computed flag
+    operatorComputed_ = true;
+  }
+
+  // convert to armadillo representation of matrices and vectors
+  arma::Mat<typename stdType<T>::Type> adata_in = as_arma_matrix(&data_in);
+  arma::Mat<typename stdType<T>::Type> adata_out = as_arma_matrix(&data_out);
+
+  // Apply it
+  if (hdr_in.isFlagSet(ISMRMRD::ISMRMRD_ACQ_IS_REVERSE)) {
+    // Negative readout
+    adata_out = as_arma_matrix(&Mneg_) * adata_in;
+  } else {
+    // Forward readout
+    adata_out = as_arma_matrix(&Mpos_) * adata_in;
+  }
+
+  // Copy the input header to the output header and set the size and the center sample
+  hdr_out = hdr_in;
+  hdr_out.number_of_samples = reconNx_;
+  hdr_out.center_sample = reconNx_/2;
+  
+  return 0;
+}
+
+}}
diff --git a/toolboxes/mri/epi/EPIReconXObjectTrapezoid.h b/toolboxes/mri/epi/EPIReconXObjectTrapezoid.h
new file mode 100644
index 0000000..105afa3
--- /dev/null
+++ b/toolboxes/mri/epi/EPIReconXObjectTrapezoid.h
@@ -0,0 +1,236 @@
+/** \file   EPIReconXObjectTrapezoid.h
+    \brief  Implement functionality for EPI X reconstruction operator for Trapezoidal type
+    \author Souheil Inati
+*/
+
+#pragma once
+
+#include "EPIExport.h"
+#include "EPIReconXObject.h"
+#include "hoArmadillo.h"
+#include "hoNDArray_elemwise.h"
+#include "gadgetronmath.h"
+#include <complex>
+
+namespace Gadgetron { namespace EPI {
+
+template <typename T> class EPIReconXObjectTrapezoid : public EPIReconXObject<T>
+{
+ public:
+  EPIReconXObjectTrapezoid();
+  virtual ~EPIReconXObjectTrapezoid();
+
+  virtual int computeTrajectory();
+
+  virtual int apply(ISMRMRD::AcquisitionHeader &hdr_in, hoNDArray <T> &data_in, 
+		    ISMRMRD::AcquisitionHeader &hdr_out, hoNDArray <T> &data_out);
+
+  using EPIReconXObject<T>::filterPos_;
+  using EPIReconXObject<T>::filterNeg_;
+  using EPIReconXObject<T>::slicePosition;
+  using EPIReconXObject<T>::rcvType_;
+
+  bool  balanced_;
+  float rampUpTime_;
+  float rampDownTime_;
+  float flatTopTime_;
+  float acqDelayTime_;
+  int   numSamples_;
+  float dwellTime_;
+  int   encodeNx_;
+  float encodeFOV_;
+  int   reconNx_;
+  float reconFOV_;
+
+ protected:
+  using EPIReconXObject<T>::trajectoryPos_;
+  using EPIReconXObject<T>::trajectoryNeg_;
+
+  hoNDArray <T> Mpos_;
+  hoNDArray <T> Mneg_;
+  bool operatorComputed_;
+
+};
+
+template <typename T> EPIReconXObjectTrapezoid<T>::EPIReconXObjectTrapezoid()
+{
+  rcvType_ = EVEN;
+  balanced_ = true;
+  rampUpTime_ = 0.0;
+  rampDownTime_ = 0.0;
+  flatTopTime_ = 0.0;
+  acqDelayTime_ = 0.0;
+  numSamples_ = 0.0;
+  dwellTime_ = 0.0;
+  encodeNx_ = 0;
+  reconNx_ = 0;
+  encodeFOV_ = 0.0;
+  reconFOV_ = 0.0;
+  operatorComputed_ = false;
+}
+
+template <typename T> EPIReconXObjectTrapezoid<T>::~EPIReconXObjectTrapezoid()
+{
+}
+
+template <typename T> int EPIReconXObjectTrapezoid<T>::computeTrajectory()
+{
+
+  // Initialize the k-space trajectory arrays
+  trajectoryPos_.create(numSamples_);
+  Gadgetron::clear(trajectoryPos_);
+  trajectoryNeg_.create(numSamples_);
+  Gadgetron::clear(trajectoryNeg_);
+
+  // Temporary trajectory for a symmetric readout
+  // first calculate the integral with G = 1;
+  int nK = numSamples_;
+  hoNDArray <float> k(nK);
+  float t;
+  int n;
+
+  //std::cout << "Dwell = " << dwellTime_ << "    acqDelayTime = " << acqDelayTime_ << std::endl;
+  //std::cout << "rampUpTime = " << rampUpTime_ << "    flatTopTime = " << flatTopTime_ << "    rampDownTime = " << rampDownTime_ << std::endl;
+  
+  // Some timings
+  float totTime = rampUpTime_ + flatTopTime_ + rampDownTime_;
+  float readTime = dwellTime_ * numSamples_;
+
+  // Fix the acqDelayTime for balanced acquisitions
+  if (balanced_) {
+    acqDelayTime_ = 0.5 * (totTime - readTime);
+  }
+
+  // Some Areas
+  float totArea = 0.5*rampUpTime_ + flatTopTime_ + 0.5*rampDownTime_;
+  float readArea =  0.5*rampUpTime_ + flatTopTime_ + 0.5*rampDownTime_
+                  - 0.5*(acqDelayTime_)*(acqDelayTime_)/rampUpTime_
+                  - 0.5*(totTime - (acqDelayTime_+readTime))*(totTime - (acqDelayTime_+readTime))/rampDownTime_;
+
+  // Prephase is set so that k=0 is halfway through the readout time
+  float prePhaseArea = 0.5 * totArea;
+
+  // The scale is set so that the read out area corresponds to the number of encoded points
+  float scale = encodeNx_ /readArea;
+
+  for (n=0; n<nK; n++)
+  {
+    t = (n+1.0)*dwellTime_ + acqDelayTime_;  // end of the dwell time
+    if (t <= rampUpTime_) {
+      // on the ramp up
+      k[n] = 0.5 / rampUpTime_ * t*t;
+    }
+    else if ((t > rampUpTime_) && (t <= (rampUpTime_+flatTopTime_))) {
+      // on the flat top
+      k[n] = 0.5*rampUpTime_ + (t - rampUpTime_);
+    }
+    else {
+      // on the ramp down
+      float v = (rampUpTime_+flatTopTime_+rampDownTime_-t);
+      k[n] = 0.5*rampUpTime_ + flatTopTime_ + 0.5*rampDownTime_ - 0.5/rampDownTime_*v*v;
+    }
+    //std::cout << n << ":  " << t << "  " << k[n] << " " << std::endl;
+  }
+
+  // Fill the positive and negative trajectories
+  for (n=0; n<numSamples_; n++)
+  {
+    trajectoryPos_[n] = scale * (k[n] - prePhaseArea);
+    trajectoryNeg_[n] = scale * (-1.0*k[n] + totArea - prePhaseArea);
+    //std::cout << n << ":  " << trajectoryPos_[n] << "  " << trajectoryNeg_[n] << std::endl;
+  }
+
+  // reset the operatorComputed_ flag
+  operatorComputed_ = false;
+
+  return(0);
+}
+
+
+template <typename T> int EPIReconXObjectTrapezoid<T>::apply(ISMRMRD::AcquisitionHeader &hdr_in, hoNDArray <T> &data_in, 
+		    ISMRMRD::AcquisitionHeader &hdr_out, hoNDArray <T> &data_out)
+{
+  if (!operatorComputed_) {
+    // Compute the reconstruction operator
+    int Km = floor(encodeNx_ / 2.0);
+    int Ne = 2*Km + 1;
+    int p,q; // counters
+
+    // resize the reconstruction operator
+    Mpos_.create(reconNx_,numSamples_);
+    Mneg_.create(reconNx_,numSamples_);
+
+    // evenly spaced k-space locations
+    arma::vec keven = arma::linspace<arma::vec>(-Km, Km, Ne);
+    //keven.print("keven =");
+
+    // image domain locations [-0.5,...,0.5)
+    arma::vec x = arma::linspace<arma::vec>(-0.5,(reconNx_-1.)/(2.*reconNx_),reconNx_);
+    //x.print("x =");
+
+    // DFT operator
+    // Going from k space to image space, we use the IFFT sign convention
+    arma::cx_mat F(reconNx_, Ne);
+    double fftscale = 1.0 / std::sqrt((double)Ne);
+    for (p=0; p<reconNx_; p++) {
+      for (q=0; q<Ne; q++) {
+	F(p,q) = fftscale * std::exp(std::complex<double>(0.0,1.0*2*M_PI*keven(q)*x(p)));
+      }
+    }
+    //F.print("F =");
+
+    // forward operators
+    arma::mat Qp(numSamples_, Ne);
+    arma::mat Qn(numSamples_, Ne);
+    for (p=0; p<numSamples_; p++) {
+      //std::cout << trajectoryPos_(p) << "    " << trajectoryNeg_(p) << std::endl;
+      for (q=0; q<Ne; q++) {
+	Qp(p,q) = sinc(trajectoryPos_(p)-keven(q));
+	Qn(p,q) = sinc(trajectoryNeg_(p)-keven(q));
+      }
+    }
+
+    //Qp.print("Qp =");
+    //Qn.print("Qn =");
+
+    // recon operators
+    arma::cx_mat Mp(reconNx_,numSamples_);
+    arma::cx_mat Mn(reconNx_,numSamples_);
+    Mp = F * arma::pinv(Qp);
+    Mn = F * arma::pinv(Qn);
+    for (p=0; p<reconNx_; p++) {
+      for (q=0; q<numSamples_; q++) {
+        Mpos_(p,q) = Mp(p,q);
+        Mneg_(p,q) = Mn(p,q);
+      }
+    }
+    
+    //Mp.print("Mp =");
+    //Mn.print("Mn =");
+
+    // set the operator computed flag
+    operatorComputed_ = true;
+  }
+
+  // convert to armadillo representation of matrices and vectors
+  arma::Mat<typename stdType<T>::Type> adata_in = as_arma_matrix(&data_in);
+  arma::Mat<typename stdType<T>::Type> adata_out = as_arma_matrix(&data_out);
+
+  // Apply it
+  if (hdr_in.isFlagSet(ISMRMRD::ISMRMRD_ACQ_IS_REVERSE)) {
+    // Negative readout
+    adata_out = as_arma_matrix(&Mneg_) * adata_in;
+  } else {
+    // Forward readout
+    adata_out = as_arma_matrix(&Mpos_) * adata_in;
+  }
+
+  // Copy the input header to the output header and set the size and the center sample
+  hdr_out = hdr_in;
+  hdr_out.number_of_samples = reconNx_;
+  hdr_out.center_sample = reconNx_/2;
+  
+  return 0;
+}
+
+}}
diff --git a/toolboxes/mri/pmri/CMakeLists.txt b/toolboxes/mri/pmri/CMakeLists.txt
index 2244056..863c5fb 100644
--- a/toolboxes/mri/pmri/CMakeLists.txt
+++ b/toolboxes/mri/pmri/CMakeLists.txt
@@ -1,3 +1,3 @@
-IF (CUDA_FOUND)
+if(CUDA_FOUND)
   add_subdirectory(gpu)
-ENDIF (CUDA_FOUND)
+endif()
diff --git a/toolboxes/mri/pmri/gpu/CMakeLists.txt b/toolboxes/mri/pmri/gpu/CMakeLists.txt
index 7d932bc..2104d63 100644
--- a/toolboxes/mri/pmri/gpu/CMakeLists.txt
+++ b/toolboxes/mri/pmri/gpu/CMakeLists.txt
@@ -1,32 +1,35 @@
 if (WIN32)
-ADD_DEFINITIONS(-D__BUILD_GADGETRON_GPUPMRI__)
-ADD_DEFINITIONS(-DWIN32_LEAN_AND_MEAN)
+  ADD_DEFINITIONS(-D__BUILD_GADGETRON_GPUPMRI__)
+  ADD_DEFINITIONS(-DWIN32_LEAN_AND_MEAN)
 endif (WIN32)
 
 if(WIN32)
-link_directories(${Boost_LIBRARY_DIRS})
+  link_directories(${Boost_LIBRARY_DIRS})
 endif(WIN32)
 
-find_package(CULA REQUIRED)
-
-include_directories(
-  ${CUDA_INCLUDE_DIRS}
-  ${CULA_INCLUDE_DIR} 
-  ${Boost_INCLUDE_DIR} 
+include_directories(  
   ${CMAKE_SOURCE_DIR}/toolboxes/core
   ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu
   ${CMAKE_SOURCE_DIR}/toolboxes/core/gpu
   ${CMAKE_SOURCE_DIR}/toolboxes/operators
+  ${CMAKE_SOURCE_DIR}/toolboxes/fft/gpu
   ${CMAKE_SOURCE_DIR}/toolboxes/nfft/gpu
   ${CMAKE_SOURCE_DIR}/toolboxes/solvers
   ${CMAKE_SOURCE_DIR}/toolboxes/solvers/gpu
+  ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/math
+  ${CUDA_INCLUDE_DIRS}
+  ${Boost_INCLUDE_DIR}
+  ${ARMADILLO_INCLUDE_DIRS} 
   )
 
-cuda_add_library(gpuparallelmri SHARED 
+cuda_add_library(gadgetron_toolbox_gpuparallelmri SHARED 
     b1_map.h
+    spirit_calibration.h
     cuCartesianSenseOperator.h
     cuNonCartesianKtSenseOperator.h
     cuNonCartesianSenseOperator.h
+    cuSpiritOperator.h
+    cuBuffer.h
     cuSenseBuffer.h
     cuSenseBufferCg.h
     cuSenseOperator.h
@@ -36,28 +39,35 @@ cuda_add_library(gpuparallelmri SHARED
     sense_utilities.h
     b1_map.cu
     b1_map_NIH_Souheil.cu
+    spirit_calibration.cu
     sense_utilities.cu
     cuSenseOperator.cu
     cuCartesianSenseOperator.cu
     cuNonCartesianSenseOperator.cu
     cuNonCartesianKtSenseOperator.cu
+    cuBuffer.cpp
     cuSenseBuffer.cpp
     cuSenseBufferCg.cpp
+    cuSpiritBuffer.cpp
+    htgrappa.cpp
     htgrappa.cu
   )
 
-target_link_libraries(gpuparallelmri 
-  gpucore 
-  gpunfft 
+set_target_properties(gadgetron_toolbox_gpuparallelmri PROPERTIES VERSION ${GADGETRON_VERSION_STRING} SOVERSION ${GADGETRON_SOVERSION})
+
+target_link_libraries(gadgetron_toolbox_gpuparallelmri 
+  gadgetron_toolbox_gpucore 
+  gadgetron_toolbox_gpunfft 
+  gadgetron_toolbox_cpucore
+  gadgetron_toolbox_cpucore_math
   ${Boost_LIBRARIES}
   ${FFTW3_LIBRARIES} 
   ${CUDA_LIBRARIES} 
   ${CUDA_CUFFT_LIBRARIES} 
   ${CUDA_CUBLAS_LIBRARIES} 
-  ${CULA_LIBRARIES}
   )
 
-install(TARGETS gpuparallelmri DESTINATION lib)
+install(TARGETS gadgetron_toolbox_gpuparallelmri DESTINATION lib COMPONENT main)
 
 install(FILES 
 	b1_map.h
@@ -68,6 +78,10 @@ install(FILES
 	cuCartesianSenseOperator.h
 	cuNonCartesianSenseOperator.h
 	cuNonCartesianKtSenseOperator.h
+        cuSpiritOperator.h
+        cuBuffer.h
 	cuSenseBuffer.h
+	cuSenseBufferCg.h
+	cuSpiritBuffer.h
 	gpupmri_export.h
-DESTINATION include)
+DESTINATION include COMPONENT main)
diff --git a/toolboxes/mri/pmri/gpu/b1_map.cu b/toolboxes/mri/pmri/gpu/b1_map.cu
index b060a55..5a87e0b 100644
--- a/toolboxes/mri/pmri/gpu/b1_map.cu
+++ b/toolboxes/mri/pmri/gpu/b1_map.cu
@@ -18,13 +18,13 @@ namespace Gadgetron{
 
   const int kernel_width = 7;
 
-  template<class REAL, unsigned int D> void smooth_correlation_matrices( cuNDArray<complext<REAL> >*, cuNDArray<complext<REAL> >*);
-  template<class REAL> boost::shared_ptr< cuNDArray<complext<REAL> > > extract_csm( cuNDArray<complext<REAL> >*, unsigned int, unsigned int);
-  template<class REAL> void set_phase_reference( cuNDArray<complext<REAL> >*, unsigned int, unsigned int);
-  template<class T> void find_stride( cuNDArray<T> *in, unsigned int dim, unsigned int *stride, std::vector<size_t> *dims );
-  template<class T> boost::shared_ptr< cuNDArray<T> > correlation( cuNDArray<T> *in );
-  template<class T> void rss_normalize( cuNDArray<T> *in_out, unsigned int dim );
-  
+  template<class REAL, unsigned int D> static void smooth_correlation_matrices( cuNDArray<complext<REAL> >*, cuNDArray<complext<REAL> >*);
+  template<class REAL> static boost::shared_ptr< cuNDArray<complext<REAL> > > extract_csm( cuNDArray<complext<REAL> >*, unsigned int, unsigned int);
+  template<class REAL> static void set_phase_reference( cuNDArray<complext<REAL> >*, unsigned int, unsigned int);
+  template<class T> static void find_stride( cuNDArray<T> *in, unsigned int dim, unsigned int *stride, std::vector<size_t> *dims );
+  template<class T> static boost::shared_ptr< cuNDArray<T> > correlation( cuNDArray<T> *in );
+  template<class T> static void rss_normalize( cuNDArray<T> *in_out, unsigned int dim );
+
   //
   // Main method
   //
@@ -120,7 +120,7 @@ namespace Gadgetron{
     }
   }
   
-  template<class REAL, class T> __inline__  __device__ REAL
+  template<class REAL, class T> __inline__  __device__ static REAL
   _rss( unsigned int idx, T *in, unsigned int stride, unsigned int number_of_batches )
   {
     unsigned int in_idx = (idx/stride)*stride*number_of_batches+(idx%stride);
@@ -134,7 +134,7 @@ namespace Gadgetron{
     return rss;
   }
   
-  template<class T> __global__ void
+  template<class T> __global__ static void
   rss_normalize_kernel( T *in_out, unsigned int stride, unsigned int number_of_batches, unsigned int number_of_elements )
   {
     typedef typename realType<T>::Type REAL;
@@ -156,7 +156,7 @@ namespace Gadgetron{
   }
   
   // Normalized RSS
-  template<class T>
+  template<class T> static
   void rss_normalize( cuNDArray<T> *in_out, unsigned int dim )
   {
     unsigned int number_of_batches = in_out->get_size(dim);
@@ -176,8 +176,8 @@ namespace Gadgetron{
     CHECK_FOR_CUDA_ERROR();    
   }
 
-  template<class REAL, class T> __global__ void
-  correlation_kernel( T *in, T *corrm, unsigned int num_batches, unsigned int num_elements )
+  template<class REAL, class T> __global__ static void
+  correlation_kernel( const T * __restrict__ in, T * __restrict__ corrm, unsigned int num_batches, unsigned int num_elements )
   {
     const unsigned int p = blockIdx.x*blockDim.x + threadIdx.x;
     const unsigned int i = threadIdx.y;
@@ -194,7 +194,7 @@ namespace Gadgetron{
   }
   
   // Build correlation matrix
-  template<class T> boost::shared_ptr< cuNDArray<T> > correlation( cuNDArray<T> *in )
+  template<class T> static boost::shared_ptr< cuNDArray<T> > correlation( cuNDArray<T> *in )
   {
     typedef typename realType<T>::Type REAL;
     // Prepare internal array
@@ -226,8 +226,8 @@ namespace Gadgetron{
   }
 
   // Smooth correlation matrices by box filter (1D)
-  template<class REAL> __global__ void
-  smooth_correlation_matrices_kernel( complext<REAL> *corrm, complext<REAL> *corrm_smooth, intd<1>::Type image_dims )
+  template<class REAL> __global__ static void
+  smooth_correlation_matrices_kernel( const complext<REAL> * __restrict__ corrm, complext<REAL> * __restrict__ corrm_smooth, intd<1>::Type image_dims )
   {
     const int idx = blockIdx.x*blockDim.x + threadIdx.x;
     const int batch = blockIdx.y;
@@ -262,8 +262,8 @@ namespace Gadgetron{
   }
 
   // Smooth correlation matrices by box filter (2D)
-  template<class REAL> __global__ void
-  smooth_correlation_matrices_kernel( complext<REAL> *corrm, complext<REAL> *corrm_smooth, intd<2>::Type image_dims )
+  template<class REAL> __global__ static  void
+  smooth_correlation_matrices_kernel( const complext<REAL> * __restrict__ corrm, complext<REAL> * __restrict__ corrm_smooth, intd<2>::Type image_dims )
   {
     const int idx = blockIdx.x*blockDim.x + threadIdx.x;
     const int batch = blockIdx.y;
@@ -317,8 +317,8 @@ namespace Gadgetron{
   }
 
   // Smooth correlation matrices by box filter (3D)
-  template<class REAL> __global__ void
-  smooth_correlation_matrices_kernel( complext<REAL> *corrm, complext<REAL> *corrm_smooth, intd<3>::Type image_dims )
+  template<class REAL> __global__ static  void
+  smooth_correlation_matrices_kernel( const  complext<REAL> * __restrict__ corrm, complext<REAL> * __restrict__ corrm_smooth, intd<3>::Type image_dims )
   {
     const int idx = blockIdx.x*blockDim.x + threadIdx.x;
     const int batch = blockIdx.y;
@@ -368,8 +368,8 @@ namespace Gadgetron{
   }
 
   // Smooth correlation matrices by box filter (3D)
-  template<class REAL> __global__ void
-  smooth_correlation_matrices_kernel( complext<REAL> *corrm, complext<REAL> *corrm_smooth, intd<4>::Type image_dims )
+  template<class REAL> __global__ static void
+  smooth_correlation_matrices_kernel( const complext<REAL> * __restrict__ corrm, complext<REAL> * __restrict__ corrm_smooth, intd<4>::Type image_dims )
   {
     const int idx = blockIdx.x*blockDim.x + threadIdx.x;
     const int batch = blockIdx.y;
@@ -430,8 +430,8 @@ namespace Gadgetron{
   }
 
   // Smooth correlation matrices border by box filter (2D)
-  template<class REAL> __global__ void
-  smooth_correlation_matrices_border_kernel( complext<REAL> *corrm, complext<REAL> *corrm_smooth, intd<2>::Type image_dims, unsigned int number_of_border_threads )
+  template<class REAL> __global__ static void
+  smooth_correlation_matrices_border_kernel( const complext<REAL> * __restrict__ corrm, complext<REAL> * __restrict__ corrm_smooth, intd<2>::Type image_dims, unsigned int number_of_border_threads )
   {
     const int idx = blockIdx.x*blockDim.x + threadIdx.x;
     const int batch = blockIdx.y;
@@ -504,8 +504,8 @@ namespace Gadgetron{
     }
   }
 
-  template<class REAL, unsigned int D> void
-  smooth_correlation_matrices( cuNDArray<complext<REAL> > *corrm, cuNDArray<complext<REAL> > *corrm_smooth )
+  template<class REAL, unsigned int D> static void
+  smooth_correlation_matrices( cuNDArray<complext<REAL> > * corrm, cuNDArray<complext<REAL> > * corrm_smooth )
   {
     typename intd<D>::Type image_dims;
 
@@ -543,8 +543,8 @@ namespace Gadgetron{
   extern __shared__ char shared_mem[];
 
   // Extract CSM
-  template<class REAL> __global__ void
-  extract_csm_kernel( complext<REAL> *corrm, complext<REAL> *csm, unsigned int num_batches, unsigned int num_elements )
+  template<class REAL> __global__ static void
+  extract_csm_kernel( const complext<REAL> * __restrict__ corrm, complext<REAL> * __restrict__ csm, unsigned int num_batches, unsigned int num_elements )
   {
     const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x;
     const unsigned int i = threadIdx.x;
@@ -598,8 +598,8 @@ namespace Gadgetron{
   }
 
   // Extract CSM
-  template<class REAL> __global__ void
-  extract_csm_kernel( complext<REAL> *corrm, complext<REAL> *csm, unsigned int num_batches, unsigned int num_elements, complext<REAL> *tmp_v )
+  template<class REAL> __global__ static void
+  extract_csm_kernel( const complext<REAL> * __restrict__ corrm, complext<REAL> * __restrict__ csm, unsigned int num_batches, unsigned int num_elements, complext<REAL> * __restrict__ tmp_v )
   {
     const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x;
 
@@ -646,7 +646,7 @@ namespace Gadgetron{
   }
 
   // Extract CSM
-  template<class REAL> __host__ 
+  template<class REAL> __host__ static
   boost::shared_ptr<cuNDArray<complext<REAL> > > extract_csm(cuNDArray<complext<REAL> > *corrm_in, unsigned int number_of_batches, unsigned int number_of_elements )
   {
     vector<size_t> image_dims;
@@ -681,7 +681,7 @@ namespace Gadgetron{
   }
 
   // Set refence phase
-  template<class REAL> __global__ void
+  template<class REAL> __global__ static void
   set_phase_reference_kernel( complext<REAL> *csm, unsigned int num_batches, unsigned int num_elements )
   {
     const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x;
@@ -704,7 +704,7 @@ namespace Gadgetron{
   }
   
   // Set reference phase
-  template<class REAL> __host__ 
+  template<class REAL> __host__ static
   void set_phase_reference(cuNDArray<complext<REAL> > *csm, unsigned int number_of_batches, unsigned int number_of_elements )
   {
     dim3 blockDim(128);
diff --git a/toolboxes/mri/pmri/gpu/b1_map_NIH_Souheil.cu b/toolboxes/mri/pmri/gpu/b1_map_NIH_Souheil.cu
index d649b4b..fffc3f2 100644
--- a/toolboxes/mri/pmri/gpu/b1_map_NIH_Souheil.cu
+++ b/toolboxes/mri/pmri/gpu/b1_map_NIH_Souheil.cu
@@ -107,7 +107,7 @@ namespace Gadgetron{
 
     // assemble_D
     template<class T> __global__ void
-    assemble_D_kernel( T* pData, T* pD, int RO, int E1, int N, int CHA, int kss, int halfKs )
+    assemble_D_kernel( const T* __restrict__ pData, T* __restrict__ pD, int RO, int E1, int N, int CHA, int kss, int halfKs )
     {
         typedef typename realType<T>::Type REAL;
 
@@ -227,7 +227,7 @@ namespace Gadgetron{
 
     // compute DH_D
     template<class T> __global__ void
-    computeDH_D_kernel( T* pD, T* pDH_D, int RO, int E1, int N, int CHA, int kss )
+    computeDH_D_kernel( const T* __restrict__ pD, T* __restrict__ pDH_D, int RO, int E1, int N, int CHA, int kss )
     {
         typedef typename realType<T>::Type REAL;
 
@@ -258,7 +258,7 @@ namespace Gadgetron{
 
     // use the shared memory
     template<class T> __global__ void
-    computeDH_D_kernel3( T* pD, T* pDH_D, int RO, int E1, int N, int CHA, int kss, int ks, int num )
+    computeDH_D_kernel3( const T*  __restrict__ pD, T* __restrict__ pDH_D, int RO, int E1, int N, int CHA, int kss, int ks, int num )
     {
         typedef typename realType<T>::Type REAL;
 
@@ -374,7 +374,7 @@ namespace Gadgetron{
 
     // compute V1
     template<class T> __global__ void
-    computeV1_kernel( T* pD, T* pV1, int RO, int E1, int N, int CHA, int kss )
+    computeV1_kernel( const T* __restrict__ pD, T* __restrict__ pV1, int RO, int E1, int N, int CHA, int kss )
     {
         typedef typename realType<T>::Type REAL;
 
@@ -398,7 +398,7 @@ namespace Gadgetron{
     }
 
     template<class T> __global__ void
-    power_method_kernel( T* pDH_D, T* pV1, T* pV, unsigned int RO, unsigned int E1, unsigned int N, unsigned int CHA, unsigned int kss, unsigned int power )
+    power_method_kernel( const T* __restrict__ pDH_D, T* __restrict__ pV1,  T* __restrict__ pV, unsigned int RO, unsigned int E1, unsigned int N, unsigned int CHA, unsigned int kss, unsigned int power )
     {
         typedef typename realType<T>::Type REAL;
 
@@ -507,7 +507,7 @@ namespace Gadgetron{
 
     // compute U1
     template<class T> __global__ void
-    computeU1_kernel( T* pD, T* pV1, T* pU1, int RO, int E1, int N, int CHA, int kss )
+    computeU1_kernel(const  T* __restrict__ pD, const T* __restrict__ pV1, T* __restrict__ pU1, int RO, int E1, int N, int CHA, int kss )
     {
         typedef typename realType<T>::Type REAL;
 
@@ -581,7 +581,7 @@ namespace Gadgetron{
 
     // extract the csm
     template<class T> __global__ void
-    extract_csm_kernel( T* pV1, T* pU1, T* pCSM, unsigned int RO, unsigned int E1, unsigned int N, unsigned int CHA, unsigned int kss )
+    extract_csm_kernel( const T* __restrict__ pV1, const T* __restrict__ pU1, T* __restrict__ pCSM, unsigned int RO, unsigned int E1, unsigned int N, unsigned int CHA, unsigned int kss )
     {
         typedef typename realType<T>::Type REAL;
 
diff --git a/toolboxes/mri/pmri/gpu/cuBuffer.cpp b/toolboxes/mri/pmri/gpu/cuBuffer.cpp
new file mode 100644
index 0000000..260af2a
--- /dev/null
+++ b/toolboxes/mri/pmri/gpu/cuBuffer.cpp
@@ -0,0 +1,197 @@
+#include "cuBuffer.h"
+#include "vector_td_utilities.h"
+#include "cuNDArray_operators.h"
+#include "cuNDArray_elemwise.h"
+#include "cuNDArray_utils.h"
+
+namespace Gadgetron{
+
+  template<class REAL, unsigned int D, bool ATOMICS>
+  cuBuffer<REAL,D,ATOMICS>::cuBuffer() 
+  {
+    acc_buffer_ = boost::shared_ptr< cuNDArray<_complext> >(new cuNDArray<_complext>);
+    cyc_buffer_ = boost::shared_ptr< cuNDArray<_complext> >(new cuNDArray<_complext>);
+    nfft_plan_  = boost::shared_ptr< cuNFFT_plan<REAL,D,ATOMICS> >(new cuNFFT_plan<REAL,D,ATOMICS>);
+    num_coils_ = 0;
+    cur_idx_ = cur_sub_idx_ = 0;
+    cycle_length_ = 0; sub_cycle_length_ = 0;
+    acc_buffer_empty_ = true;
+    Gadgetron::clear(matrix_size_);
+    Gadgetron::clear(matrix_size_os_);
+    W_ = REAL(0);
+  }
+  
+  template<class REAL, unsigned int D, bool ATOMICS>
+  void cuBuffer<REAL,D,ATOMICS>::clear()
+  {
+    Gadgetron::clear(acc_buffer_.get());
+    Gadgetron::clear(cyc_buffer_.get());
+    cur_idx_ = cur_sub_idx_ = 0;
+    acc_buffer_empty_ = true;
+  }
+
+  template<class REAL, unsigned int D, bool ATOMICS>
+  void cuBuffer<REAL,D,ATOMICS>
+  ::setup( _uint64d matrix_size, _uint64d matrix_size_os, REAL W, 
+           unsigned int num_coils, unsigned int num_cycles, unsigned int num_sub_cycles )
+  {      
+    bool matrix_size_changed = (matrix_size_ == matrix_size);
+    bool matrix_size_os_changed = (matrix_size_os_ == matrix_size_os);
+    bool kernel_changed = (W_ == W);
+    bool num_coils_changed = (num_coils_ == num_coils );
+    bool num_cycles_changed = (cycle_length_ == num_cycles+1);
+
+    matrix_size_ = matrix_size;
+    matrix_size_os_ = matrix_size_os;
+    W_ = W;
+    num_coils_ = num_coils;
+    cycle_length_ = num_cycles+1; // +1 as we need a "working buffer" in a addition to 'cycle_length' full ones
+    sub_cycle_length_ = num_sub_cycles;
+
+    if( !nfft_plan_->is_setup() || matrix_size_changed || matrix_size_os_changed || kernel_changed ){
+      nfft_plan_->setup( matrix_size_, matrix_size_os_, W );
+    }
+    
+    std::vector<size_t> dims = to_std_vector(matrix_size_os_);    
+    dims.push_back(num_coils_);
+
+    if( acc_buffer_->get_number_of_elements() == 0 || matrix_size_os_changed || num_coils_changed ){
+      acc_buffer_->create(&dims);
+      Gadgetron::clear( acc_buffer_.get() );
+    }
+
+    dims.push_back(cycle_length_);
+    if( cyc_buffer_->get_number_of_elements() == 0 || matrix_size_os_changed || num_coils_changed ){
+      cyc_buffer_->create(&dims);      
+      Gadgetron::clear( cyc_buffer_.get() );
+    }
+    else if( num_cycles_changed ){
+      // Reuse the old buffer content in this case...
+      // This happens automatically (in all cases?) with the current design?
+    }
+  }
+  
+  template<class REAL, unsigned int D, bool ATOMICS> 
+  bool cuBuffer<REAL,D,ATOMICS>::add_frame_data( cuNDArray<_complext> *samples, cuNDArray<_reald> *trajectory )
+  {
+    if( !samples || !trajectory ){
+      throw std::runtime_error("cuBuffer::add_frame_data: illegal input pointer");
+    }
+
+    if( num_coils_ != samples->get_size(samples->get_number_of_dimensions()-1) ){
+      throw std::runtime_error("cuBuffer::add_frame_data: unexpected number of coils according to setup");
+    }
+
+    //if( dcw_.get() == 0x0 ){
+    //throw std::runtime_error("cuBuffer::density compensation weights not set");
+    //}
+    
+    // Make array containing the "current" buffer from the cyclic buffer
+    //
+
+    cuNDArray<_complext> cur_buffer(acc_buffer_->get_dimensions().get(),
+				    cyc_buffer_->get_data_ptr()+cur_idx_*acc_buffer_->get_number_of_elements());
+
+    // Preprocess frame
+    //
+
+    nfft_plan_->preprocess( trajectory, cuNFFT_plan<REAL,D,ATOMICS>::NFFT_PREP_NC2C );
+    
+    // Convolve to form k-space frame (accumulation mode)
+    //
+    
+    nfft_plan_->convolve( samples, &cur_buffer, dcw_.get(), cuNFFT_plan<REAL,D,ATOMICS>::NFFT_CONV_NC2C, true );
+
+    // Update the accumulation buffer (if it is time...)
+    //
+
+    bool cycle_completed = false;
+
+    if( cur_sub_idx_ == sub_cycle_length_-1 ){
+
+      cycle_completed = true;
+      
+      // Buffer complete, add to accumulation buffer
+      //
+
+      *acc_buffer_ += cur_buffer;
+      acc_buffer_empty_ = false;
+
+      // Start filling the next buffer in the cycle ...
+      //
+
+      cur_idx_++; 
+      if( cur_idx_ == cycle_length_ ) cur_idx_ = 0;
+
+      // ... but first subtract this next buffer from the accumulation buffer
+      //
+
+      cur_buffer.create( acc_buffer_->get_dimensions().get(), cyc_buffer_->get_data_ptr()+cur_idx_*acc_buffer_->get_number_of_elements() );
+      *acc_buffer_ -= cur_buffer;
+
+      // Clear new buffer before refilling
+      //
+
+      Gadgetron::clear(&cur_buffer);
+    }
+
+    cur_sub_idx_++;
+    if( cur_sub_idx_ == sub_cycle_length_ ) cur_sub_idx_ = 0;
+
+    return cycle_completed;
+  }
+
+  template<class REAL, unsigned int D, bool ATOMICS>
+  boost::shared_ptr< cuNDArray<complext<REAL> > > cuBuffer<REAL,D,ATOMICS>::get_accumulated_coil_images()
+  {
+    std::vector<size_t> dims = to_std_vector(matrix_size_);
+    dims.push_back(num_coils_);
+
+    acc_image_ = boost::shared_ptr< cuNDArray<_complext> >( new cuNDArray<_complext>(&dims) );
+				    
+    // Check if we are ready to reconstruct. If not return an image of ones...
+    if( acc_buffer_empty_ ){
+      fill(acc_image_.get(),_complext(1));
+      return acc_image_;
+    }
+
+    // Finalize gridding of k-space CSM image (convolution has been done already)
+    //
+
+    // Copy accumulation buffer before in-place FFT
+    cuNDArray<_complext> acc_copy = *acc_buffer_;
+
+    // FFT
+    nfft_plan_->fft( &acc_copy, cuNFFT_plan<REAL,D,ATOMICS>::NFFT_BACKWARDS );
+    
+    // Deapodize
+    nfft_plan_->deapodize( &acc_copy );
+    
+    // Remove oversampling
+    crop<_complext,D>( (matrix_size_os_-matrix_size_)>>1, &acc_copy, acc_image_.get() );
+    
+    //if( normalize ){
+    //REAL scale = REAL(1)/(((REAL)cycle_length_-REAL(1))*(REAL)sub_cycle_length_);
+    //*acc_image_ *= scale;
+    //}
+    
+    return acc_image_;
+  }
+
+  //
+  // Instantiations
+  //
+  
+  template class EXPORTGPUPMRI cuBuffer<float,2,true>;
+  template class EXPORTGPUPMRI cuBuffer<float,2,false>;
+  
+  template class EXPORTGPUPMRI cuBuffer<float,3,true>;
+  template class EXPORTGPUPMRI cuBuffer<float,3,false>;
+  
+  template class EXPORTGPUPMRI cuBuffer<float,4,true>;
+  template class EXPORTGPUPMRI cuBuffer<float,4,false>;
+  
+  template class EXPORTGPUPMRI cuBuffer<double,2,false>;
+  template class EXPORTGPUPMRI cuBuffer<double,3,false>;
+  template class EXPORTGPUPMRI cuBuffer<double,4,false>;
+}
diff --git a/toolboxes/mri/pmri/gpu/cuBuffer.h b/toolboxes/mri/pmri/gpu/cuBuffer.h
new file mode 100644
index 0000000..9fdd316
--- /dev/null
+++ b/toolboxes/mri/pmri/gpu/cuBuffer.h
@@ -0,0 +1,61 @@
+#pragma once
+
+#include "vector_td_utilities.h"
+#include "complext.h"
+#include "cuNDArray.h"
+#include "cuNFFT.h"
+#include "gpupmri_export.h"
+
+#include <boost/shared_ptr.hpp>
+
+namespace Gadgetron{
+  
+  template<class REAL, unsigned int D, bool ATOMICS = false> class EXPORTGPUPMRI cuBuffer
+  {
+  public:
+    
+    typedef complext<REAL> _complext;
+    typedef typename uint64d<D>::Type _uint64d;
+    typedef typename reald<REAL,D>::Type _reald;
+
+    cuBuffer();
+    virtual ~cuBuffer() {}
+    
+    virtual void set_dcw( boost::shared_ptr< cuNDArray<REAL> > dcw ){
+      dcw_ = dcw;
+    }
+    
+    inline REAL get_normalization_factor(){
+      return REAL(1)/(((REAL)cycle_length_-REAL(1))*(REAL)sub_cycle_length_);
+    }
+    
+    virtual void clear();
+
+    virtual void setup( _uint64d matrix_size, _uint64d matrix_size_os, REAL W, 
+                        unsigned int num_coils, unsigned int num_cycles, unsigned int num_sub_cycles );
+
+    // Boolean return value indicates whether the accumulation buffer has changed (i.e. a cycle has been completed)
+    virtual bool add_frame_data( cuNDArray<_complext> *samples, cuNDArray<_reald> *trajectory ); 
+
+    virtual boost::shared_ptr< cuNDArray< complext<REAL> > > get_accumulated_coil_images();
+
+    // Workaround for weird boost/g++ error
+    virtual boost::shared_ptr< cuNDArray< complext<REAL> > > get_combined_coil_image() = 0;
+    
+  protected:
+    _uint64d matrix_size_, matrix_size_os_;
+    REAL W_;
+    unsigned int num_coils_;
+    unsigned int cycle_length_, sub_cycle_length_;
+    unsigned int cur_idx_, cur_sub_idx_;
+    bool acc_buffer_empty_;
+    boost::shared_ptr< cuNDArray<_complext> > acc_buffer_;
+    boost::shared_ptr< cuNDArray<_complext> > cyc_buffer_;
+    boost::shared_ptr< cuNDArray<_complext> > acc_image_;
+    boost::shared_ptr< cuNDArray<REAL> > dcw_;
+    boost::shared_ptr< cuNFFT_plan<REAL,D,ATOMICS> > nfft_plan_;
+  };
+
+  // To prevent the use of atomics with doubles.
+  template<unsigned int D> class EXPORTGPUPMRI cuBuffer<double,D,true>{};  
+}
diff --git a/toolboxes/mri/pmri/gpu/cuCartesianSenseOperator.cu b/toolboxes/mri/pmri/gpu/cuCartesianSenseOperator.cu
index af98ead..8b18fc2 100644
--- a/toolboxes/mri/pmri/gpu/cuCartesianSenseOperator.cu
+++ b/toolboxes/mri/pmri/gpu/cuCartesianSenseOperator.cu
@@ -6,7 +6,7 @@
 using namespace Gadgetron;
 
 template<class REAL> __global__ void 
-sample_array_kernel( complext<REAL> *in, complext<REAL> *out,
+sample_array_kernel( const complext<REAL> * __restrict__ in, complext<REAL> * __restrict__ out,
 		     unsigned int *idx, 
 		     unsigned int image_elements,
 		     unsigned int samples,
@@ -22,7 +22,7 @@ sample_array_kernel( complext<REAL> *in, complext<REAL> *out,
 }
 
 template<class REAL> __global__ void 
-insert_samples_kernel( complext<REAL> *in, complext<REAL> *out,
+insert_samples_kernel( const complext<REAL> * __restrict__ in, complext<REAL> * __restrict__ out,
 		       unsigned int *idx, 
 		       unsigned int image_elements,
 		       unsigned int samples,
diff --git a/toolboxes/mri/pmri/gpu/cuNonCartesianSenseOperator.cu b/toolboxes/mri/pmri/gpu/cuNonCartesianSenseOperator.cu
index ee7b209..76dfb74 100644
--- a/toolboxes/mri/pmri/gpu/cuNonCartesianSenseOperator.cu
+++ b/toolboxes/mri/pmri/gpu/cuNonCartesianSenseOperator.cu
@@ -3,27 +3,12 @@
 
 using namespace Gadgetron;
 
-/*
-static unsigned int prodv( std::vector<unsigned int> &vec )
-{
-  unsigned int result = 1;
-  for( unsigned int i=0; i<vec.size(); i++ ){
-    result *= vec[i];
-  }
-  return result;
-  }*/
-
 template<class REAL, unsigned int D, bool ATOMICS> void
 cuNonCartesianSenseOperator<REAL,D,ATOMICS>::mult_M( cuNDArray< complext<REAL> >* in, cuNDArray< complext<REAL> >* out, bool accumulate )
 {
   if( !in || !out ){
     throw std::runtime_error("cuNonCartesianSenseOperator::mult_M : 0x0 input/output not accepted");
   }
-  /*  
-  if( (in->get_number_of_elements() != prodv(*this->get_domain_dimensions())) ||
-      (out->get_number_of_elements() != prodv(*this->get_codomain_dimensions())) ) {
-    throw std::runtime_error("cuNonCartesianSenseOperator::mult_M: dimensions mismatch");
-    }*/
 
   std::vector<size_t> full_dimensions = *this->get_domain_dimensions();
   full_dimensions.push_back(this->ncoils_);
@@ -47,11 +32,6 @@ cuNonCartesianSenseOperator<REAL,D,ATOMICS>::mult_MH( cuNDArray< complext<REAL>
   if( !in || !out ){
     throw std::runtime_error("cuNonCartesianSenseOperator::mult_MH : 0x0 input/output not accepted");
   }
-  /*  
-  if( (out->get_number_of_elements() != prodv(*this->get_domain_dimensions())) ||
-      (in->get_number_of_elements() != prodv(*this->get_codomain_dimensions())) ) {
-    throw std::runtime_error("cuNonCartesianSenseOperator::mult_MH: dimensions mismatch");
-    }*/
 
   std::vector<size_t> tmp_dimensions = *this->get_domain_dimensions();
   tmp_dimensions.push_back(this->ncoils_);
diff --git a/toolboxes/mri/pmri/gpu/cuSenseBuffer.cpp b/toolboxes/mri/pmri/gpu/cuSenseBuffer.cpp
index b8a49cb..2b085d6 100644
--- a/toolboxes/mri/pmri/gpu/cuSenseBuffer.cpp
+++ b/toolboxes/mri/pmri/gpu/cuSenseBuffer.cpp
@@ -1,210 +1,44 @@
 #include "cuSenseBuffer.h"
-#include "vector_td_utilities.h"
-#include "cuNDArray_utils.h"
 
-namespace Gadgetron{
+namespace Gadgetron {
 
   template<class REAL, unsigned int D, bool ATOMICS>
-  cuSenseBuffer<REAL,D,ATOMICS>::cuSenseBuffer() 
-  {
-    num_coils_ = 0;
-    cur_idx_ = cur_sub_idx_ = 0;
-    cycle_length_ = 0; sub_cycle_length_ = 0;
-    acc_buffer_empty_ = true;
-    Gadgetron::clear(matrix_size_);
-    Gadgetron::clear(matrix_size_os_);
-    W_ = REAL(0);
-  }
-  
-  template<class REAL, unsigned int D, bool ATOMICS>
-  void cuSenseBuffer<REAL,D,ATOMICS>::clear()
-  {
-    Gadgetron::clear(&acc_buffer_);
-    Gadgetron::clear(&cyc_buffer_);
-
-    cur_idx_ = cur_sub_idx_ = 0;
-    acc_buffer_empty_ = true;
-  }
-
-  template<class REAL, unsigned int D, bool ATOMICS>
-  void cuSenseBuffer<REAL,D,ATOMICS>::
-  setup( _uint64d matrix_size, _uint64d matrix_size_os, REAL W, 
-	 unsigned int num_coils, unsigned int num_cycles, unsigned int num_sub_cycles )
+  void cuSenseBuffer<REAL,D,ATOMICS>
+  ::setup( _uint64d matrix_size, _uint64d matrix_size_os, REAL W, 
+           unsigned int num_coils, unsigned int num_cycles, unsigned int num_sub_cycles )
   {      
-    bool matrix_size_changed = (matrix_size_ == matrix_size);
-    bool matrix_size_os_changed = (matrix_size_os_ == matrix_size_os);
-    bool kernel_changed = (W_ == W);
-    bool num_coils_changed = (num_coils_ == num_coils );
-    bool num_cycles_changed = (cycle_length_ == num_cycles+1);
-    bool is_virgin = (E_.get() == 0x0);
-    //bool num_sub_cycles_changed = (sub_cycle_length_ == num_sub_cycles);
-
-    matrix_size_ = matrix_size;
-    matrix_size_os_ = matrix_size_os;
-    W_ = W;
-    num_coils_ = num_coils;
-    cycle_length_ = num_cycles+1; // +1 as we need a "working buffer" in a addition to 'cycle_length' full ones
-    sub_cycle_length_ = num_sub_cycles;
-
-    std::vector<size_t> dims = to_std_vector(matrix_size_);
-    
-    if( is_virgin )
-      E_ = boost::shared_ptr< cuNonCartesianSenseOperator<REAL,D,ATOMICS> >(new cuNonCartesianSenseOperator<REAL,D,ATOMICS>);
+    cuBuffer<REAL,D,ATOMICS>::setup(matrix_size, matrix_size_os, W, num_coils, num_cycles, num_sub_cycles );
     
-    if( is_virgin || matrix_size_changed || matrix_size_os_changed || kernel_changed ){
+    if( E_.get() == 0x0 ){   
+      std::vector<size_t> dims = to_std_vector(this->matrix_size_);    
+      E_ = boost::shared_ptr< cuNonCartesianSenseOperator<REAL,D,ATOMICS> >(new cuNonCartesianSenseOperator<REAL,D,ATOMICS>);      
       E_->set_domain_dimensions(&dims);
-      E_->setup( matrix_size_, matrix_size_os_, W );
-      nfft_plan_.setup( matrix_size_, matrix_size_os_, W );
-    }
-    
-    dims = to_std_vector(matrix_size_os_);    
-    dims.push_back(num_coils_);
-
-    if( acc_buffer_.get_number_of_elements() == 0 || matrix_size_os_changed || num_coils_changed ){
-      acc_buffer_.create(&dims);
-      Gadgetron::clear( &acc_buffer_ );
-    }
-
-    dims.push_back(cycle_length_);
-    if( cyc_buffer_.get_number_of_elements() == 0 || matrix_size_os_changed || num_coils_changed ){
-      cyc_buffer_.create(&dims);      
-      Gadgetron::clear( &cyc_buffer_);
-    }
-    else if( num_cycles_changed ){
-      // Reuse the old buffer content in this case...
-      // This happens automatically (in all cases?) with the current design?
-    }
+      E_->setup( this->matrix_size_, this->matrix_size_os_, W );
+    }    
   }
   
-  template<class REAL, unsigned int D, bool ATOMICS> 
-  bool cuSenseBuffer<REAL,D,ATOMICS>::add_frame_data( cuNDArray<_complext> *samples, cuNDArray<_reald> *trajectory )
-  {
-    if( !samples || !trajectory ){
-      throw std::runtime_error("cuSenseBuffer::add_frame_data: illegal input pointer");
-    }
-
-    if( num_coils_ != samples->get_size(samples->get_number_of_dimensions()-1) ){
-      throw std::runtime_error("cuSenseBuffer::add_frame_data: unexpected number of coils according to setup");
-    }
-
-    if( dcw_.get() == 0x0 ){
-      throw std::runtime_error("cuSenseBuffer::density compensation weights not set");
-    }
-    
-    // Make array containing the "current" buffer from the cyclic buffer
-    //
-
-    cuNDArray<_complext> cur_buffer(acc_buffer_.get_dimensions().get(),
-				    cyc_buffer_.get_data_ptr()+cur_idx_*acc_buffer_.get_number_of_elements());
-
-    // Preprocess frame
-    //
-
-    nfft_plan_.preprocess( trajectory, cuNFFT_plan<REAL,D,ATOMICS>::NFFT_PREP_NC2C );
-    
-    // Convolve to form k-space frame (accumulation mode)
-    //
-    
-    nfft_plan_.convolve( samples, &cur_buffer, dcw_.get(), cuNFFT_plan<REAL,D,ATOMICS>::NFFT_CONV_NC2C, true );
-
-    // Update the accumulation buffer (if it is time...)
-    //
-
-    bool cycle_completed = false;
-
-    if( cur_sub_idx_ == sub_cycle_length_-1 ){
-
-      cycle_completed = true;
-      
-      // Buffer complete, add to accumulation buffer
-      //
-
-      acc_buffer_ += cur_buffer;
-      acc_buffer_empty_ = false;
-
-      // Start filling the next buffer in the cycle ...
-      //
-
-      cur_idx_++; 
-      if( cur_idx_ == cycle_length_ ) cur_idx_ = 0;
-
-      // ... but first subtract this next buffer from the accumulation buffer
-      //
-
-      cur_buffer.create( acc_buffer_.get_dimensions().get(), cyc_buffer_.get_data_ptr()+cur_idx_*acc_buffer_.get_number_of_elements() );
-      acc_buffer_ -= cur_buffer;
-
-      // Clear new buffer before refilling
-      //
-
-      Gadgetron::clear(&cur_buffer);
-    }
-
-    cur_sub_idx_++;
-    if( cur_sub_idx_ == sub_cycle_length_ ) cur_sub_idx_ = 0;
-
-    return cycle_completed;
-  }
-
-  template<class REAL, unsigned int D, bool ATOMICS>
-  boost::shared_ptr< cuNDArray<complext<REAL> > > cuSenseBuffer<REAL,D,ATOMICS>::get_accumulated_coil_images()
-  {
-    std::vector<size_t> dims = to_std_vector(matrix_size_);
-    dims.push_back(num_coils_);
-
-    acc_image_ = boost::shared_ptr< cuNDArray<_complext> >( new cuNDArray<_complext>(&dims) );
-				    
-    // Check if we are ready to reconstruct. If not return an image of ones...
-    if( acc_buffer_empty_ ){
-      fill(acc_image_.get(),_complext(1));
-      return acc_image_;
-    }
-
-    // Finalize gridding of k-space CSM image (convolution has been done already)
-    //
-
-    // Copy accumulation buffer before in-place FFT
-    cuNDArray<_complext> acc_copy = acc_buffer_;
-
-    // FFT
-    nfft_plan_.fft( &acc_copy, cuNFFT_plan<REAL,D,ATOMICS>::NFFT_BACKWARDS );
-    
-    // Deapodize
-    nfft_plan_.deapodize( &acc_copy );
-    
-    // Remove oversampling
-    crop<_complext,D>( (matrix_size_os_-matrix_size_)>>1, &acc_copy, acc_image_.get() );
-    
-    //if( normalize ){
-    //REAL scale = REAL(1)/(((REAL)cycle_length_-REAL(1))*(REAL)sub_cycle_length_);
-    //*acc_image_ *= scale;
-    //}
-    
-    return acc_image_;
-  }
-
   template<class REAL, unsigned int D, bool ATOMICS>
   boost::shared_ptr< cuNDArray<complext<REAL> > > cuSenseBuffer<REAL,D,ATOMICS>::get_combined_coil_image()
   {
-    if( csm_.get() == 0x0 ){
+    if( this->csm_.get() == 0x0 ){
       throw std::runtime_error("cuSenseBuffer::get_combined_coil_image: csm not set");
     }
-
-    if( acc_image_.get() == 0x0 ){
-      if( get_accumulated_coil_images().get() == 0x0 ){ // This updates acc_image_
-	throw std::runtime_error("cuSenseBuffer::get_combined_coil_image: unable to acquire accumulated coil images");
+    
+    if( this->acc_image_.get() == 0x0 ){
+      if( this->get_accumulated_coil_images().get() == 0x0 ){ // This updates acc_image_
+        throw std::runtime_error("cuSenseBuffer::get_combined_coil_image: unable to acquire accumulated coil images");
       }
     }
     
-    std::vector<size_t> dims = to_std_vector(matrix_size_);
+    std::vector<size_t> dims = to_std_vector(this->matrix_size_);
     boost::shared_ptr< cuNDArray<_complext> > image( new cuNDArray<_complext>(&dims) );
 
-    E_->set_csm(csm_);
-    E_->mult_csm_conj_sum( acc_image_.get(), image.get() );
+    E_->set_csm(this->csm_);
+    E_->mult_csm_conj_sum( this->acc_image_.get(), image.get() );
 
     return image;
   }
-
+  
   //
   // Instantiations
   //
diff --git a/toolboxes/mri/pmri/gpu/cuSenseBuffer.h b/toolboxes/mri/pmri/gpu/cuSenseBuffer.h
index aeffb99..755e849 100644
--- a/toolboxes/mri/pmri/gpu/cuSenseBuffer.h
+++ b/toolboxes/mri/pmri/gpu/cuSenseBuffer.h
@@ -1,61 +1,38 @@
 #pragma once
 
+#include "cuBuffer.h"
 #include "cuNonCartesianSenseOperator.h"
-#include "vector_td_utilities.h"
-#include "complext.h"
-#include "gpupmri_export.h"
+
+#include <stdio.h>
 
 namespace Gadgetron{
-  
-  template<class REAL, unsigned int D, bool ATOMICS = false> class EXPORTGPUPMRI cuSenseBuffer
+
+  template<class REAL, unsigned int D, bool ATOMICS = false> 
+  class EXPORTGPUPMRI cuSenseBuffer : public cuBuffer<REAL,D,ATOMICS>
   {
   public:
     
-    typedef complext<REAL> _complext;
-    typedef typename uint64d<D>::Type _uint64d;
-    typedef typename reald<REAL,D>::Type _reald;
+    typedef typename cuBuffer<REAL,D,ATOMICS>::_complext _complext;
+    typedef typename cuBuffer<REAL,D,ATOMICS>::_uint64d  _uint64d;
+    typedef typename cuBuffer<REAL,D,ATOMICS>::_reald    _reald;
 
-    cuSenseBuffer();
+    cuSenseBuffer() : cuBuffer<REAL,D,ATOMICS>() {}
     virtual ~cuSenseBuffer() {}
-    
+
+    virtual void setup( _uint64d matrix_size, _uint64d matrix_size_os, REAL W, 
+                        unsigned int num_coils, unsigned int num_cycles, unsigned int num_sub_cycles );
+
     virtual void set_csm( boost::shared_ptr< cuNDArray<_complext> > csm ){
       csm_ = csm;
     }
-
-    virtual void set_dcw( boost::shared_ptr< cuNDArray<REAL> > dcw ){
-      dcw_ = dcw;
-    }
-    
-    inline REAL get_normalization_factor(){
-      return REAL(1)/(((REAL)cycle_length_-REAL(1))*(REAL)sub_cycle_length_);
-    }
     
-    virtual void clear();
+    virtual boost::shared_ptr< cuNDArray< complext<REAL> > > get_combined_coil_image();
 
-    virtual void setup( _uint64d matrix_size, _uint64d matrix_size_os, REAL W, 
-			unsigned int num_coils, unsigned int num_cycles, unsigned int num_sub_cycles );
-
-    // Boolean return value indicates whether the accumulation buffer has changed (i.e. a cycle has been completed)
-    virtual bool add_frame_data( cuNDArray<_complext> *samples, cuNDArray<_reald> *trajectory ); 
-
-    virtual boost::shared_ptr< cuNDArray<_complext> > get_accumulated_coil_images();
-    virtual boost::shared_ptr< cuNDArray<_complext> > get_combined_coil_image();
-    
   protected:
-    _uint64d matrix_size_, matrix_size_os_;
-    REAL W_;
-    unsigned int num_coils_;
-    unsigned int cycle_length_, sub_cycle_length_;
-    unsigned int cur_idx_, cur_sub_idx_;
-    bool acc_buffer_empty_;
-    cuNDArray<_complext> acc_buffer_, cyc_buffer_;
-    boost::shared_ptr< cuNDArray<_complext> > acc_image_;
     boost::shared_ptr< cuNDArray<_complext> > csm_;
-    boost::shared_ptr< cuNDArray<REAL> > dcw_;
-    boost::shared_ptr< cuNonCartesianSenseOperator<REAL,D,ATOMICS> > E_;
-    cuNFFT_plan<REAL,D,ATOMICS> nfft_plan_;
+    boost::shared_ptr< cuNonCartesianSenseOperator<REAL,D,ATOMICS> > E_;    
   };
   
   // To prevent the use of atomics with doubles.
-  template<unsigned int D> class EXPORTGPUPMRI cuSenseBuffer<double,D,true>{};
+  template<unsigned int D> class EXPORTGPUPMRI cuSenseBuffer<double,D,true>{};  
 }
diff --git a/toolboxes/mri/pmri/gpu/cuSenseBufferCg.h b/toolboxes/mri/pmri/gpu/cuSenseBufferCg.h
index 2733815..c24385f 100644
--- a/toolboxes/mri/pmri/gpu/cuSenseBufferCg.h
+++ b/toolboxes/mri/pmri/gpu/cuSenseBufferCg.h
@@ -19,7 +19,7 @@ namespace Gadgetron{
     virtual ~cuSenseBufferCg() {}
 
     inline void set_dcw_for_rhs( boost::shared_ptr< cuNDArray<REAL> > dcw ){
-      this->E_->set_dcw(dcw);
+      this->E_->set_dcw(sqrt(dcw.get()));
     }
     
     virtual void preprocess( cuNDArray<_reald> *traj );
diff --git a/toolboxes/mri/pmri/gpu/cuSpiritBuffer.cpp b/toolboxes/mri/pmri/gpu/cuSpiritBuffer.cpp
new file mode 100644
index 0000000..12bd026
--- /dev/null
+++ b/toolboxes/mri/pmri/gpu/cuSpiritBuffer.cpp
@@ -0,0 +1,89 @@
+#include "cuSpiritBuffer.h"
+#include "cuCgSolver.h"
+#include "cuNDArray_elemwise.h"
+#include "cuNDArray_reductions.h"
+#include "hoNDArray_fileio.h"
+
+namespace Gadgetron {
+
+  template<class REAL, unsigned int D, bool ATOMICS>
+  void cuSpiritBuffer<REAL,D,ATOMICS>::
+  setup( _uint64d matrix_size, _uint64d matrix_size_os, REAL W, 
+	 unsigned int num_coils, unsigned int num_cycles, unsigned int num_sub_cycles )
+  {      
+    cuBuffer<REAL,D,ATOMICS>::setup( matrix_size, matrix_size_os, W, num_coils, num_cycles, num_sub_cycles );
+    
+    E_->setup( this->matrix_size_, this->matrix_size_os_, W );
+
+    cg_.set_encoding_operator( this->E_ );
+    cg_.set_max_iterations( 5 );
+    cg_.set_tc_tolerance( 1e-8 );
+    cg_.set_output_mode( cuCgSolver<_complext>::OUTPUT_VERBOSE);
+  }
+
+  template<class REAL, unsigned int D, bool ATOMICS>
+  void cuSpiritBuffer<REAL,D,ATOMICS>::preprocess( cuNDArray<_reald> *traj ) {
+    E_->preprocess(traj);
+    std::vector<size_t> dims = *traj->get_dimensions();
+    dims.push_back(this->num_coils_);
+    E_->set_codomain_dimensions(&dims);
+  }
+
+  template<class REAL, unsigned int D, bool ATOMICS>
+  boost::shared_ptr< cuNDArray<complext<REAL> > > cuSpiritBuffer<REAL,D,ATOMICS>::get_accumulated_coil_images()
+  {
+    // Apply adjoint operator to get the rhs
+    //
+
+    boost::shared_ptr< cuNDArray<_complext> > rhs = cuBuffer<REAL,D,ATOMICS>::get_accumulated_coil_images();
+
+    // Invert by cg solver
+    //
+
+    *rhs *= this->get_normalization_factor();
+    this->acc_image_ = cg_.solve_from_rhs(rhs.get());
+
+    static int counter = 0;
+    char filename[256];
+    sprintf((char*)filename, "_coil_images_%d.real", counter);
+    write_nd_array<REAL>( abs(this->acc_image_.get())->to_host().get(), filename );
+    counter++;
+
+    return this->acc_image_;
+  }
+
+  template<class REAL, unsigned int D, bool ATOMICS>
+  boost::shared_ptr< cuNDArray<complext<REAL> > > cuSpiritBuffer<REAL,D,ATOMICS>::get_combined_coil_image()
+  {
+    // Get the individual coil images
+    //
+
+    if( this->acc_image_.get() == 0x0 ){
+      if( this->get_accumulated_coil_images().get() == 0x0 ){ // This updates acc_image_
+        throw std::runtime_error("cuSpiritBuffer::get_combined_coil_image: unable to acquire accumulated coil images");
+      }
+    }
+    
+    // Compute RSS
+    //
+
+    return real_to_complex< complext<REAL> >(sqrt(sum(abs_square(this->acc_image_.get()).get(), 2).get()).get());
+  }
+  
+  //
+  // Instantiations
+  //
+
+  template class EXPORTGPUPMRI cuSpiritBuffer<float,2,true>;
+  template class EXPORTGPUPMRI cuSpiritBuffer<float,2,false>;
+
+  template class EXPORTGPUPMRI cuSpiritBuffer<float,3,true>;
+  template class EXPORTGPUPMRI cuSpiritBuffer<float,3,false>;
+
+  template class EXPORTGPUPMRI cuSpiritBuffer<float,4,true>;
+  template class EXPORTGPUPMRI cuSpiritBuffer<float,4,false>;
+
+  template class EXPORTGPUPMRI cuSpiritBuffer<double,2,false>;
+  template class EXPORTGPUPMRI cuSpiritBuffer<double,3,false>;
+  template class EXPORTGPUPMRI cuSpiritBuffer<double,4,false>;
+}
diff --git a/toolboxes/mri/pmri/gpu/cuSpiritBuffer.h b/toolboxes/mri/pmri/gpu/cuSpiritBuffer.h
new file mode 100644
index 0000000..a333691
--- /dev/null
+++ b/toolboxes/mri/pmri/gpu/cuSpiritBuffer.h
@@ -0,0 +1,43 @@
+#pragma once
+
+#include "cuBuffer.h"
+#include "cuCgSolver.h"
+#include "cuNFFTOperator.h"
+
+namespace Gadgetron{
+
+  template<class REAL, unsigned int D, bool ATOMICS = false> 
+  class EXPORTGPUPMRI cuSpiritBuffer : public cuBuffer<REAL,D,ATOMICS>
+  {
+  public:
+    
+    typedef typename cuBuffer<REAL,D,ATOMICS>::_complext _complext;
+    typedef typename cuBuffer<REAL,D,ATOMICS>::_uint64d  _uint64d;
+    typedef typename cuBuffer<REAL,D,ATOMICS>::_reald    _reald;
+
+    cuSpiritBuffer() : cuBuffer<REAL,D,ATOMICS>() {
+      E_ = boost::shared_ptr< cuNFFTOperator<REAL,D> >(new cuNFFTOperator<REAL,D>() );
+    }
+    
+    virtual ~cuSpiritBuffer() {}
+    
+    inline void set_dcw_for_rhs( boost::shared_ptr< cuNDArray<REAL> > dcw ){
+      this->E_->set_dcw(dcw);
+    }
+
+    virtual void setup( _uint64d matrix_size, _uint64d matrix_size_os, REAL W, 
+                        unsigned int num_coils, unsigned int num_cycles, unsigned int num_sub_cycles );
+    
+    virtual void preprocess( cuNDArray<_reald> *traj );
+
+    virtual boost::shared_ptr< cuNDArray< complext<REAL> > > get_accumulated_coil_images();
+    virtual boost::shared_ptr< cuNDArray< complext<REAL> > > get_combined_coil_image();
+    
+  protected:
+    cuCgSolver<_complext> cg_;
+    boost::shared_ptr< cuNFFTOperator<REAL,D> > E_;
+  };
+  
+  // To prevent the use of atomics with doubles.
+  template<unsigned int D> class EXPORTGPUPMRI cuSpiritBuffer<double,D,true>{};  
+}
diff --git a/toolboxes/mri/pmri/gpu/cuSpiritOperator.h b/toolboxes/mri/pmri/gpu/cuSpiritOperator.h
new file mode 100644
index 0000000..86e9d99
--- /dev/null
+++ b/toolboxes/mri/pmri/gpu/cuSpiritOperator.h
@@ -0,0 +1,130 @@
+/** \file cuSpiritOperator.h
+    \brief Spirit regularization operator.
+
+    The operator domain and codomain sizes are the image size times the number of coils. 
+*/
+
+#pragma once
+
+#include "cuDiagonalSumOperator.h"
+
+namespace Gadgetron {
+
+  template<class REAL> class cuSpirit2DOperator : public linearOperator< cuNDArray< complext<REAL> > >
+  {
+  public:
+    
+    cuSpirit2DOperator() : linearOperator< cuNDArray< complext<REAL> > >() {
+      D_ = boost::shared_ptr< cuDiagonalSumOperator< complext<REAL> > >(new cuDiagonalSumOperator< complext<REAL> >());
+    }
+    
+    virtual ~cuSpirit2DOperator() {}
+    
+    virtual void set_calibration_kernels( boost::shared_ptr< cuNDArray< complext<REAL> > > kernels )
+    { 
+      if( kernels->get_number_of_dimensions() != 3 ){
+        throw std::runtime_error("cuSpirit2DOperator::set_calibration kernels: kernels array must be three-dimensionsal (x,y,squared num coils)");
+      }
+      kernels_ = kernels;
+    }
+  
+    virtual void mult_M( cuNDArray< complext<REAL> > *in, cuNDArray< complext<REAL> > *out, bool accumulate = false )
+    {
+      if( !kernels_.get() ){
+        throw std::runtime_error("cuSpiritCalibrationOperator::mult_M failed: calibration kernels not set");
+      }
+      
+      if( in->get_number_of_dimensions() != 3 || out->get_number_of_dimensions() != 3 ){
+        throw std::runtime_error("cuSpiritCalibrationOperator::mult_M failed: expected exactly 3 dimensions in input and output images");
+      }
+
+      const unsigned int num_coils_squared = kernels_->get_size(2);
+      const unsigned int num_phases_in = in->get_size(2);
+      const unsigned int num_phases_out = out->get_size(2);
+      
+      if( num_phases_out != num_phases_in ){
+        throw std::runtime_error("cuSpirit2DOperator::mult_M failed: array size mismatch between input/output images");
+      }
+
+      if( num_phases_in*num_phases_out != num_coils_squared ){
+        throw std::runtime_error("cuSpirit2DOperator::mult_M failed: the calibration kernels do not correspond to the squared number of coils");
+      }
+      
+      std::vector<size_t> dim_coils = *in->get_dimensions();
+      std::vector<size_t> dim_image = dim_coils; dim_image.pop_back();
+
+      size_t num_elements_image = dim_coils[0]*dim_coils[1];
+      size_t num_elements_coils = num_elements_image*dim_coils[2];
+
+      // Iterate over the coils
+      //
+      
+      for( unsigned int i=0; i<num_phases_out; i++ ){
+        boost::shared_ptr< cuNDArray< complext<REAL> > > tmp_kernel( new cuNDArray< complext<REAL> >(&dim_coils, kernels_->get_data_ptr()+i*num_elements_coils ));
+        cuNDArray< complext<REAL> > tmp_out( &dim_image, out->get_data_ptr()+i*num_elements_image );
+        D_->set_diagonal(tmp_kernel);
+        D_->mult_M( in, &tmp_out, accumulate );
+      }
+      
+      // Subtract identity
+      //
+
+      *out -= *in;
+    }
+    
+    virtual void mult_MH( cuNDArray< complext<REAL> > *in, cuNDArray< complext<REAL> > *out, bool accumulate = false )
+    {
+      if( !kernels_.get() ){
+        throw std::runtime_error("cuSpiritCalibrationOperator::mult_MH failed: calibration kernels not set");
+      }
+      
+      if( in->get_number_of_dimensions() != 3 || out->get_number_of_dimensions() != 3 ){
+        throw std::runtime_error("cuSpiritCalibrationOperator::mult_MH failed: expected exactly 3 dimensions in input and output images");
+      }
+
+      const unsigned int num_coils_squared = kernels_->get_size(2);
+      const unsigned int num_phases_in = in->get_size(2);
+      const unsigned int num_phases_out = out->get_size(2);
+      
+      if( num_phases_out != num_phases_in ){
+        throw std::runtime_error("cuSpirit2DOperator::mult_MH failed: array size mismatch between input/output images");
+      }
+
+      if( num_phases_in*num_phases_out != num_coils_squared ){
+        throw std::runtime_error("cuSpirit2DOperator::mult_MH failed: the calibration kernels do not correspond to the squared number of coils");
+      }
+      
+      std::vector<size_t> dim_coils = *in->get_dimensions();
+      std::vector<size_t> dim_image = dim_coils; dim_image.pop_back();
+
+      size_t num_elements_image = dim_coils[0]*dim_coils[1];
+      size_t num_elements_coils = num_elements_image*dim_coils[2];
+
+      // Iterate over the coils
+      //
+      
+      for( unsigned int i=0; i<num_phases_in; i++ ){
+        boost::shared_ptr< cuNDArray< complext<REAL> > > tmp_kernel( new cuNDArray< complext<REAL> >(&dim_coils, kernels_->get_data_ptr()+i*num_elements_coils ));
+        cuNDArray< complext<REAL> > tmp_in( &dim_image, in->get_data_ptr()+i*num_elements_image );
+        D_->set_diagonal(tmp_kernel);
+        if( i==0 && !accumulate )
+          D_->mult_MH( &tmp_in, out, false );
+        else
+          D_->mult_MH( &tmp_in, out, true );
+      }
+
+      // Subtract identity
+      //
+      
+      *out -= *in;
+    }
+    
+    virtual boost::shared_ptr< linearOperator< cuNDArray< complext<REAL> > > > clone() {
+      return linearOperator< cuNDArray< complext<REAL> > >::clone(this);
+    }
+
+  protected:    
+    boost::shared_ptr< cuNDArray< complext<REAL> > > kernels_;
+    boost::shared_ptr< cuDiagonalSumOperator< complext<REAL> > > D_;
+  };
+}
diff --git a/toolboxes/mri/pmri/gpu/htgrappa.cpp b/toolboxes/mri/pmri/gpu/htgrappa.cpp
new file mode 100644
index 0000000..0910cb1
--- /dev/null
+++ b/toolboxes/mri/pmri/gpu/htgrappa.cpp
@@ -0,0 +1,48 @@
+#include "htgrappa.h"
+#include "hoNDArray.h"
+#include "hoNDArray_fileio.h"
+#include "hoNDArray_utils.h"
+#include "hoNDArray_linalg.h"
+
+#ifdef USE_OMP
+#include <omp.h>
+#endif
+
+
+/*
+  This file is used to hide certain Armadillo calls from the nvcc compiler. If Armadillo functions need to 
+  be called in a *.cu file, it is preferably to wrap the calls in a function and place that function in 
+  a *.cpp file so that Armadillo code will not be compiled by nvcc.
+
+  Some error handling may be needed in these functions, but eventually SymmetricHermitianPositiveDefiniteLinearSystem_posv
+  will be renamed and made to throw exceptions and then it should be handled. 
+
+ */
+
+
+
+namespace Gadgetron
+{
+  template <class T> void ht_grappa_solve_spd_system(hoNDArray<T> *A, hoNDArray<T> *B) {
+    /*
+      We are swithcing off OpenMP threading before this call to posv. There seems to be a bad interaction between openmp, cuda, and BLAS. 
+      So far this problem has only been observed from *.cu files (or in functions called from *.cu files) but the problem may be more general. 
+
+      This is a temporary fix that we should keep an eye on. 
+     */
+#ifdef USE_OMP
+    int num_threads = omp_get_num_threads();
+    omp_set_num_threads(1);
+#endif //USE_OMP
+
+    posv(*A, *B);
+
+#ifdef USE_OMP
+    omp_set_num_threads(num_threads);
+#endif //USE_OMP
+
+  }
+
+  template void ht_grappa_solve_spd_system< float_complext >(hoNDArray< float_complext > *A, hoNDArray< float_complext > *B);
+  
+}
diff --git a/toolboxes/mri/pmri/gpu/htgrappa.cu b/toolboxes/mri/pmri/gpu/htgrappa.cu
index a1bd753..f4f6457 100644
--- a/toolboxes/mri/pmri/gpu/htgrappa.cu
+++ b/toolboxes/mri/pmri/gpu/htgrappa.cu
@@ -1,18 +1,20 @@
 #include "htgrappa.h"
-#include "hoNDArray_fileio.h"
+#include "cuNDArray_elemwise.h"
 #include "cuNDFFT.h"
 #include "GadgetronTimer.h"
 #include "GPUTimer.h"
-
+#include "cuNDArray_elemwise.h"
 #include "CUBLASContextProvider.h"
+#include "hoNDArray_fileio.h"
+#include "hoNDArray_utils.h"
 
 #include <cublas_v2.h>
-#include <cula_lapack_device.h>
+//#include <cula_lapack_device.h>
 #include <iostream>
 
 namespace Gadgetron {
 
-  int2 vec_to_int2(std::vector<unsigned int> vec)
+  static int2 vec_to_int2(std::vector<unsigned int> vec)
   {
     int2 ret; ret.x = 0; ret.y = 0;
     if (vec.size() < 2) {
@@ -24,38 +26,14 @@ namespace Gadgetron {
     return ret;
   }
 
-  __global__ void clear_array(complext<float> * in, unsigned long int elements)
-  {
-    unsigned long idx_in = blockIdx.x*blockDim.x+threadIdx.x;
-    if (idx_in < elements) {
-      in[idx_in] = complext<float>(0);
-    }
-  }
-
-  int clear(cuNDArray<complext<float> >* in)
-  {
-    dim3 blockDim(512,1,1);
-    dim3 gridDim((unsigned int) ceil((double)in->get_number_of_elements()/blockDim.x), 1, 1 );
-
-    clear_array<<< gridDim, blockDim >>>( in->get_data_ptr(), in->get_number_of_elements());
-
-    cudaError_t err = cudaGetLastError();
-    if( err != cudaSuccess ){
-      std::cerr << "clear : Error during kernel call: " << cudaGetErrorString(err) << std::endl;
-      return -1;
-    }
-
-    return 0;
-  }
-
-  template <class T> int write_cuNDArray_to_disk(cuNDArray<T>* a, const char* filename)
+  template <class T> static int write_cuNDArray_to_disk(cuNDArray<T>* a, const char* filename)
   {
     boost::shared_ptr< hoNDArray<T> > host = a->to_host();
     write_nd_array<complext<float> >(host.get(), filename);
     return 0;
   }
-
-  template <class T> __global__ void form_grappa_system_matrix_kernel_2d(T* ref_data,
+  
+  template <class T> __global__ void form_grappa_system_matrix_kernel_2d(const T* __restrict__ ref_data,
                                                                          int2 dims,
                                                                          int source_coils,
                                                                          int target_coils,
@@ -64,8 +42,8 @@ namespace Gadgetron {
                                                                          int2 kernel_size,
                                                                          int acceleration_factor,
                                                                          int set_number,
-                                                                         T* out_matrix,
-                                                                         T* b)
+                                                                         T* __restrict__ out_matrix,
+                                                                         T* __restrict__ b)
   {
     long idx_in = blockIdx.x*blockDim.x+threadIdx.x;
     int klocations = ros.x*ros.y;
@@ -103,8 +81,8 @@ namespace Gadgetron {
   }
 
   //TODO: This should take source and target coils into consideration
-  template <class T> __global__ void copy_grappa_coefficients_to_kernel_2d(T* coeffs,
-                                                                           T* kernel,
+  template <class T> __global__ void copy_grappa_coefficients_to_kernel_2d(const T* __restrict__ coeffs,
+                                                                           T* __restrict__ kernel,
                                                                            int source_coils,
                                                                            int target_coils,
                                                                            int2 kernel_size,
@@ -138,8 +116,8 @@ namespace Gadgetron {
     }
   }
 
-  template <class T> __global__ void copy_grappa_kernel_to_kspace_2d(T* kernel,
-                                                                     T* out,
+  template <class T> __global__ void copy_grappa_kernel_to_kspace_2d(const T* __restrict__ kernel,
+                                                                     T* __restrict__ out,
                                                                      int2 dims,
                                                                      int2 kernel_size,
                                                                      int coils)
@@ -162,9 +140,9 @@ namespace Gadgetron {
     }
   }
 
-  __global__ void scale_and_add_unmixing_coeffs(complext<float> * unmixing,
-                                                complext<float> * csm,
-                                                complext<float> * out,
+  __global__ void scale_and_add_unmixing_coeffs(const complext<float> * __restrict__ unmixing,
+                                                const complext<float> * __restrict__ csm,
+                                                complext<float> * __restrict__ out,
                                                 int elements,
                                                 int coils,
                                                 float scale_factor)
@@ -181,8 +159,8 @@ namespace Gadgetron {
     }
   }
 
-  __global__ void scale_and_copy_unmixing_coeffs(complext<float> * unmixing,
-                                                 complext<float> * out,
+  __global__ void scale_and_copy_unmixing_coeffs(const complext<float> * __restrict__ unmixing,
+                                                 complext<float> * __restrict__ out,
                                                  int elements,
                                                  int coils,
                                                  float scale_factor)
@@ -197,8 +175,8 @@ namespace Gadgetron {
     }
   }
 
-  __global__ void conj_csm_coeffs(complext<float> * csm,
-                                  complext<float> * out,
+  __global__ void conj_csm_coeffs(const complext<float> * __restrict__ csm,
+                                  complext<float> * __restrict__ out,
                                   int source_elements,
                                   int target_elements)
   {
@@ -259,7 +237,7 @@ namespace Gadgetron {
 
     if (acceleration_factor == 1) {
       dim3 blockDim(512,1,1);
-      dim3 gridDim((unsigned int) ceil((1.0f*elements_per_coil*source_coils)/blockDim.x), 1, 1 );
+      dim3 gridDim((unsigned int) std::ceil((1.0f*elements_per_coil*source_coils)/blockDim.x), 1, 1 );
 
       conj_csm_coeffs<<< gridDim, blockDim >>>( b1->get_data_ptr(),
                                                 out_mixing_coeff->get_data_ptr(),
@@ -267,7 +245,7 @@ namespace Gadgetron {
                                                 b1->get_number_of_elements());
 
       std::list<unsigned int>::iterator it;
-      gridDim = dim3((unsigned int) ceil((1.0f*(elements_per_coil))/blockDim.x), 1, 1 );
+      gridDim = dim3((unsigned int) std::ceil((1.0f*(elements_per_coil))/blockDim.x), 1, 1 );
       int uncombined_channel_no = 0;
       for ( it = uncombined_channels->begin(); it != uncombined_channels->end(); it++ ) {
         uncombined_channel_no++;
@@ -293,10 +271,9 @@ namespace Gadgetron {
     std::vector<size_t> rosTmp = *ref_data->get_dimensions();
 
     std::vector<unsigned int> ros(rosTmp.size());
-    for ( unsigned int ii=0; ii<rosTmp.size(); ii++ )
-      {
-        ros[ii ] = rosTmp[ii];
-      }
+    for ( unsigned int ii=0; ii<rosTmp.size(); ii++ ){
+      ros[ii] = rosTmp[ii];
+    }
 
     ros.pop_back(); //Remove the number of coils
     std::vector<unsigned int> ros_offset(ref_data->get_number_of_dimensions(),0);
@@ -373,13 +350,6 @@ namespace Gadgetron {
     cuNDArray<T> AHrhs = cuNDArray<T>(&AHrhs_dims);
 
     cublasHandle_t handle = *CUBLASContextProvider::instance()->getCublasHandle();
-    /*
-      if (cublasCreate_v2(&handle) != CUBLAS_STATUS_SUCCESS) {
-      std::cerr << "htgrappa_calculate_grappa_unmixing: unable to create cublas handle" << std::endl;
-      return -1;
-
-      }
-    */
 
     std::vector<size_t> gkernel_dims;
     gkernel_dims.push_back((*kernel_size)[0]);
@@ -402,7 +372,7 @@ namespace Gadgetron {
         std::string appendix = ostr.str();
 
         dim3 blockDim(512,1,1);
-        dim3 gridDim((unsigned int) ceil((1.0f*kspace_locations)/blockDim.x), 1, 1 );
+        dim3 gridDim((unsigned int) std::ceil((1.0f*kspace_locations)/blockDim.x), 1, 1 );
 
         form_grappa_system_matrix_kernel_2d<<< gridDim, blockDim >>>( ref_data->get_data_ptr(), dims,
                                                                       source_coils, target_coils, dros, dros_offset,
@@ -433,119 +403,137 @@ namespace Gadgetron {
         cublasStatus_t stat;
 
         if ( set == 0 )
-        {
+          {
             {
-                //GPUTimer t2("Cgemm call");
-                stat = cublasCgemm(handle, CUBLAS_OP_C, CUBLAS_OP_N,
-                                                n,n,m,(float2*) &alpha,
-                                                (float2*) system_matrix.get_data_ptr(), m,
-                                                (float2*) system_matrix.get_data_ptr(), m,
-                                                (float2*) &beta, (float2*) AHA.get_data_ptr(), n);
-
-                if (stat != CUBLAS_STATUS_SUCCESS) {
-                    std::cerr << "htgrappa_calculate_grappa_unmixing: Failed to form AHA product using cublas gemm" << std::endl;
-                    std::cerr << "---- cublas error code " << stat << std::endl;
-                    return -1;
-                }
+              //GPUTimer t2("Cgemm call");
+              stat = cublasCgemm(handle, CUBLAS_OP_C, CUBLAS_OP_N,
+                                 n,n,m,(float2*) &alpha,
+                                 (float2*) system_matrix.get_data_ptr(), m,
+                                 (float2*) system_matrix.get_data_ptr(), m,
+                                 (float2*) &beta, (float2*) AHA.get_data_ptr(), n);
+                
+              if (stat != CUBLAS_STATUS_SUCCESS) {
+                std::cerr << "htgrappa_calculate_grappa_unmixing: Failed to form AHA product using cublas gemm" << std::endl;
+                std::cerr << "---- cublas error code " << stat << std::endl;
+                return -1;
+              }
             }
-
+            
             {
-                //timer.start("copy AHA to host");
-                if (cudaMemcpy(pAHA, AHA.get_data_ptr(), AHA_host.get_number_of_bytes(), cudaMemcpyDeviceToHost) != cudaSuccess)
+              //timer.start("copy AHA to host");
+              if (cudaMemcpy(pAHA, AHA.get_data_ptr(), AHA_host.get_number_of_bytes(), cudaMemcpyDeviceToHost) != cudaSuccess)
                 {
-                    std::cerr << "htgrappa_calculate_grappa_unmixing: Failed to copy AHA to host" << std::endl;
-                    std::cerr << "---- cublas error code " << stat << std::endl;
-                    return -1;
+                  std::cerr << "htgrappa_calculate_grappa_unmixing: Failed to copy AHA to host" << std::endl;
+                  std::cerr << "---- cublas error code " << stat << std::endl;
+                  return -1;
                 }
-                //timer.stop();
+              //timer.stop();
 
-                //timer.start("apply the regularization");
-                // apply the regularization
-                double lamda = 0.0005;
+              //timer.start("apply the regularization");
+              // apply the regularization
+              double lamda = 0.0005;
 
-                double trA = std::sqrt(pAHA[0].x*pAHA[0].x + pAHA[0].y*pAHA[0].y);
-                size_t c;
-                for ( c=1; c<n; c++ )
+              double trA = std::sqrt(pAHA[0].x*pAHA[0].x + pAHA[0].y*pAHA[0].y);
+              size_t c;
+              for ( c=1; c<n; c++ )
                 {
-                    float x = pAHA[c+c*n].x;
-                    float y = pAHA[c+c*n].y;
-                    trA += std::sqrt(x*x+y*y);
+                  float x = pAHA[c+c*n].x;
+                  float y = pAHA[c+c*n].y;
+                  trA += std::sqrt(x*x+y*y);
                 }
 
-                double value = trA*lamda/n;
-                for ( c=0; c<n; c++ )
+              double value = trA*lamda/n;
+              for ( c=0; c<n; c++ )
                 {
-                    float x = pAHA[c+c*n].x;
-                    float y = pAHA[c+c*n].y;
-                    pAHA[c+c*n].x = std::sqrt(x*x+y*y) + value;
-                    pAHA[c+c*n].y = 0;
+                  float x = pAHA[c+c*n].x;
+                  float y = pAHA[c+c*n].y;
+                  pAHA[c+c*n].x = std::sqrt(x*x+y*y) + value;
+                  pAHA[c+c*n].y = 0;
                 }
-                //timer.stop();
+              //timer.stop();
 
-                //timer.start("copy the AHA to device");
-                if (cudaMemcpy(AHA.get_data_ptr(), pAHA, AHA_host.get_number_of_bytes(), cudaMemcpyHostToDevice) != cudaSuccess)
+              //timer.start("copy the AHA to device");
+              if (cudaMemcpy(AHA.get_data_ptr(), pAHA, AHA_host.get_number_of_bytes(), cudaMemcpyHostToDevice) != cudaSuccess)
                 {
-                    std::cerr << "htgrappa_calculate_grappa_unmixing: Failed to copy regularized AHA to device" << std::endl;
-                    std::cerr << "---- cublas error code " << stat << std::endl;
-                    return -1;
+                  std::cerr << "htgrappa_calculate_grappa_unmixing: Failed to copy regularized AHA to device" << std::endl;
+                  std::cerr << "---- cublas error code " << stat << std::endl;
+                  return -1;
                 }
-                //timer.stop();
+              //timer.stop();
             }
 
             AHA_set0 = AHA;
-        }
+          }
         else
-        {
+          {
             AHA = AHA_set0;
-        }
+          }
 
-      //  {
-      //      std::string filename = debugFolder+appendix+"AHA.cplx";
-            //write_cuNDArray_to_disk(&AHA, filename.c_str());
-      //  }
+        //  {
+        //      std::string filename = debugFolder+appendix+"AHA.cplx";
+        //write_cuNDArray_to_disk(&AHA, filename.c_str());
+        //  }
 
         {
 
-            //GPUTimer timer("GRAPPA cublas gemm");
-            //TODO: Sort out arguments for source and target coils here.
-            stat = cublasCgemm(handle, CUBLAS_OP_C, CUBLAS_OP_N,
-                    n,target_coils,m,(float2*) &alpha,
-                    (float2*) system_matrix.get_data_ptr(), m,
-                    (float2*) b.get_data_ptr(), m,
-                    (float2*) &beta, (float2*)AHrhs.get_data_ptr(), n);
+          //GPUTimer timer("GRAPPA cublas gemm");
+          //TODO: Sort out arguments for source and target coils here.
+          stat = cublasCgemm(handle, CUBLAS_OP_C, CUBLAS_OP_N,
+                             n,target_coils,m,(float2*) &alpha,
+                             (float2*) system_matrix.get_data_ptr(), m,
+                             (float2*) b.get_data_ptr(), m,
+                             (float2*) &beta, (float2*)AHrhs.get_data_ptr(), n);
 
         }
 
-      //  {
-      //      std::string filename = debugFolder+appendix+"AHrhs.cplx";
-            //write_cuNDArray_to_disk(&AHrhs, filename.c_str());
-      //  }
+        //  {
+        //      std::string filename = debugFolder+appendix+"AHrhs.cplx";
+        //write_cuNDArray_to_disk(&AHrhs, filename.c_str());
+        //  }
 
         if (stat != CUBLAS_STATUS_SUCCESS) {
-            std::cerr << "htgrappa_calculate_grappa_unmixing: Failed to form AHrhs product using cublas gemm" << std::endl;
-            std::cerr << "---- cublas error code " << stat << std::endl;
-            return -1;
+          std::cerr << "htgrappa_calculate_grappa_unmixing: Failed to form AHrhs product using cublas gemm" << std::endl;
+          std::cerr << "---- cublas error code " << stat << std::endl;
+          return -1;
         }
 
 
-        culaStatus s;
-        /*
-          s = culaInitialize();
-          if(s != culaNoError) {
-          std::cerr << "htgrappa: failed to initialize CULA" << std::endl;
-          return -1;
-          }
-        */
-
-        s = culaDeviceCgels( 'N', n, n, target_coils,
+	/*
+	{
+	  //This is the OLD GPU code using CULA
+	  GPUTimer gpu_invert_time("GPU Inversion time"); 
+	  culaStatus s;
+	  s = culaDeviceCgels( 'N', n, n, target_coils,
                              (culaDeviceFloatComplex*)AHA.get_data_ptr(), n,
                              (culaDeviceFloatComplex*)AHrhs.get_data_ptr(), n);
 
 
-        if (s != culaNoError) {
-          std::cout << "htgrappa_calculate_grappa_unmixing: linear solve failed" << std::endl;
-          return -1;
-        }
+	  if (s != culaNoError) {
+	    std::cout << "htgrappa_calculate_grappa_unmixing: linear solve failed" << std::endl;
+	    return -1;
+	  }
+	}
+	*/
+      
+
+	{
+	  //It actually turns out to be faster to do this inversion on the CPU. Problem is probably too small for GPU to make sense
+	  //GPUTimer cpu_invert_time("CPU Inversion time");
+	  boost::shared_ptr< hoNDArray<T> > AHA_h = AHA.to_host();
+	  boost::shared_ptr< hoNDArray<T> > AHrhs_h = AHrhs.to_host();
+	  
+	  std::vector<size_t> perm_dim;
+	  perm_dim.push_back(1);
+	  perm_dim.push_back(0);
+	  
+	  permute(AHA_h.get(),&perm_dim);
+	  permute(AHrhs_h.get(),&perm_dim);
+
+	  ht_grappa_solve_spd_system(AHA_h.get(), AHrhs_h.get());	  
+
+	  permute(AHrhs_h.get(),&perm_dim);
+	  AHrhs = cuNDArray<T>(*AHrhs_h);
+	}
 
 #if 0
         size_t free = 0, total = 0;
@@ -566,7 +554,7 @@ namespace Gadgetron {
 		    //write_cuNDArray_to_disk(&AHrhs, filename.c_str());
         //  }
 
-        gridDim = dim3((unsigned int) ceil((1.0f*n*source_coils)/blockDim.x), 1, 1 );
+        gridDim = dim3((unsigned int) std::ceil((1.0f*n*source_coils)/blockDim.x), 1, 1 );
 
         //TODO: This should be target coils used as argument here.
         copy_grappa_coefficients_to_kernel_2d<<< gridDim, blockDim >>>( AHrhs.get_data_ptr(),
@@ -613,7 +601,7 @@ namespace Gadgetron {
         clear(&tmp_mixing);
 
         dim3 blockDim(512,1,1);
-        dim3 gridDim((unsigned int) ceil((1.0f*kernel_elements)/blockDim.x), 1, 1 );
+        dim3 gridDim((unsigned int) std::ceil((1.0f*kernel_elements)/blockDim.x), 1, 1 );
 
         //TODO: Take source and target into consideration
         copy_grappa_kernel_to_kspace_2d<<< gridDim, blockDim >>>((gkernel.get_data_ptr()+(c*kernel_elements)),
@@ -633,7 +621,7 @@ namespace Gadgetron {
 
         float scale_factor = total_elements;
 
-        gridDim = dim3((unsigned int) ceil(1.0f*total_elements/blockDim.x), 1, 1 );
+        gridDim = dim3((unsigned int) std::ceil(1.0f*total_elements/blockDim.x), 1, 1 );
         scale_and_add_unmixing_coeffs<<< gridDim, blockDim >>>(tmp_mixing.get_data_ptr(),
                                                                (b1->get_data_ptr()+ c*total_elements),
                                                                out_mixing_coeff->get_data_ptr(),
@@ -667,11 +655,11 @@ namespace Gadgetron {
     return 0;
   }
 
-template <class T> int inverse_clib_matrix(cuNDArray<T>* A,
-                                cuNDArray<T>* b,
-                                cuNDArray<T>* coeff,
-                                double lamda)
-{
+  template <class T> int inverse_clib_matrix(cuNDArray<T>* A,
+                                             cuNDArray<T>* b,
+                                             cuNDArray<T>* coeff,
+                                             double lamda)
+  {
     // A: M*N
     // b: M*K
     size_t M = A->get_size(0);
@@ -704,18 +692,18 @@ template <class T> int inverse_clib_matrix(cuNDArray<T>* A,
     //}
 
     {
-        //GPUTimer t2("compute AHA ...");
-        cublasStatus_t stat = cublasCgemm(handle, CUBLAS_OP_C, CUBLAS_OP_N,
-                N,N,M,(float2*) &alpha,
-                (float2*) A->get_data_ptr(), M,
-                (float2*) A->get_data_ptr(), M,
-                (float2*) &beta, (float2*) AHA.get_data_ptr(), N);
-
-        if (stat != CUBLAS_STATUS_SUCCESS)
+      //GPUTimer t2("compute AHA ...");
+      cublasStatus_t stat = cublasCgemm(handle, CUBLAS_OP_C, CUBLAS_OP_N,
+                                        N,N,M,(float2*) &alpha,
+                                        (float2*) A->get_data_ptr(), M,
+                                        (float2*) A->get_data_ptr(), M,
+                                        (float2*) &beta, (float2*) AHA.get_data_ptr(), N);
+
+      if (stat != CUBLAS_STATUS_SUCCESS)
         {
-            std::cerr << "inverse_clib_matrix: Failed to form AHA product using cublas gemm" << std::endl;
-            std::cerr << "---- cublas error code " << stat << std::endl;
-            return -1;
+          std::cerr << "inverse_clib_matrix: Failed to form AHA product using cublas gemm" << std::endl;
+          std::cerr << "---- cublas error code " << stat << std::endl;
+          return -1;
         }
     }
 
@@ -725,18 +713,18 @@ template <class T> int inverse_clib_matrix(cuNDArray<T>* A,
     //}
 
     {
-        //GPUTimer t2("compute AHrhs ...");
-        cublasStatus_t stat = cublasCgemm(handle, CUBLAS_OP_C, CUBLAS_OP_N,
-                N,K,M,(float2*) &alpha,
-                (float2*) A->get_data_ptr(), M,
-                (float2*) b->get_data_ptr(), M,
-                (float2*) &beta, (float2*)coeff->get_data_ptr(), N);
-
-        if (stat != CUBLAS_STATUS_SUCCESS)
+      //GPUTimer t2("compute AHrhs ...");
+      cublasStatus_t stat = cublasCgemm(handle, CUBLAS_OP_C, CUBLAS_OP_N,
+                                        N,K,M,(float2*) &alpha,
+                                        (float2*) A->get_data_ptr(), M,
+                                        (float2*) b->get_data_ptr(), M,
+                                        (float2*) &beta, (float2*)coeff->get_data_ptr(), N);
+
+      if (stat != CUBLAS_STATUS_SUCCESS)
         {
-            std::cerr << "inverse_clib_matrix: Failed to form AHrhs product using cublas gemm" << std::endl;
-            std::cerr << "---- cublas error code " << stat << std::endl;
-            return -1;
+          std::cerr << "inverse_clib_matrix: Failed to form AHrhs product using cublas gemm" << std::endl;
+          std::cerr << "---- cublas error code " << stat << std::endl;
+          return -1;
         }
     }
 
@@ -747,7 +735,7 @@ template <class T> int inverse_clib_matrix(cuNDArray<T>* A,
 
     // apply the regularization
     if ( lamda > 0 )
-    {
+      {
         hoNDArray<T> AHA_host(N, N);
         float2* pAHA = (float2*) AHA_host.get_data_ptr();
 
@@ -755,10 +743,10 @@ template <class T> int inverse_clib_matrix(cuNDArray<T>* A,
 
         //timer.start("copy AHA to host");
         if (cudaMemcpy(pAHA, AHA.get_data_ptr(), AHA_host.get_number_of_bytes(), cudaMemcpyDeviceToHost) != cudaSuccess)
-        {
+          {
             std::cerr << "inverse_clib_matrix: Failed to copy AHA to host" << std::endl;
             return -1;
-        }
+          }
         //timer.stop();
 
         //timer.start("apply the regularization");
@@ -766,35 +754,55 @@ template <class T> int inverse_clib_matrix(cuNDArray<T>* A,
         double trA = std::sqrt(pAHA[0].x*pAHA[0].x + pAHA[0].y*pAHA[0].y);
         size_t c;
         for ( c=1; c<N; c++ )
-        {
+          {
             float x = pAHA[c+c*N].x;
             float y = pAHA[c+c*N].y;
             trA += std::sqrt(x*x+y*y);
-        }
+          }
 
         double value = trA*lamda/N;
         for ( c=0; c<N; c++ )
-        {
+          {
             float x = pAHA[c+c*N].x;
             float y = pAHA[c+c*N].y;
             pAHA[c+c*N].x = std::sqrt(x*x+y*y) + value;
             pAHA[c+c*N].y = 0;
-        }
+          }
         //timer.stop();
 
         //timer.start("copy the AHA to device");
         if (cudaMemcpy(AHA.get_data_ptr(), pAHA, AHA_host.get_number_of_bytes(), cudaMemcpyHostToDevice) != cudaSuccess)
-        {
+          {
             std::cerr << "inverse_clib_matrix: Failed to copy regularized AHA to device" << std::endl;
             return -1;
-        }
+          }
         //timer.stop();
-    }
+      }
 
-    culaStatus s;
-    s = culaDeviceCgels( 'N', N, N, K,
-            (culaDeviceFloatComplex*)AHA.get_data_ptr(), N,
-            (culaDeviceFloatComplex*)coeff->get_data_ptr(), N);
+    /*
+      culaStatus s;
+      s = culaDeviceCgels( 'N', N, N, K,
+      (culaDeviceFloatComplex*)AHA.get_data_ptr(), N,
+      (culaDeviceFloatComplex*)coeff->get_data_ptr(), N);
+    */
+    {
+      //It actually turns out to be faster to do this inversion on the CPU. Problem is probably too small for GPU to make sense
+      //GPUTimer cpu_invert_time("CPU Inversion time");
+      boost::shared_ptr< hoNDArray<T> > AHA_h = AHA.to_host();
+      boost::shared_ptr< hoNDArray<T> > AHrhs_h = coeff->to_host();
+      
+      std::vector<size_t> perm_dim;
+      perm_dim.push_back(1);
+      perm_dim.push_back(0);
+      
+      permute(AHA_h.get(),&perm_dim);
+      permute(AHrhs_h.get(),&perm_dim);
+      
+      ht_grappa_solve_spd_system(AHA_h.get(), AHrhs_h.get());	  
+
+      permute(AHrhs_h.get(),&perm_dim);
+      *coeff = cuNDArray<T>(*AHrhs_h);
+    }
 
 
     //{
@@ -802,26 +810,27 @@ template <class T> int inverse_clib_matrix(cuNDArray<T>* A,
     //    write_cuNDArray_to_disk(coeff, filename.c_str());
     //}
 
+    /*
     if (s != culaNoError)
-    {
+      {
         std::cout << "inverse_clib_matrix: linear solve failed" << std::endl;
         return -1;
-    }
-
+      }
+    */
     return 0;
-}
+  }
 
   //Template instanciation
-    template EXPORTGPUPMRI int htgrappa_calculate_grappa_unmixing(cuNDArray<complext<float> >* ref_data,
+  template EXPORTGPUPMRI int htgrappa_calculate_grappa_unmixing(cuNDArray<complext<float> >* ref_data,
                                                                 cuNDArray<complext<float> >* b1,
                                                                 unsigned int acceleration_factor,
                                                                 std::vector<unsigned int> *kernel_size,
                                                                 cuNDArray<complext<float> >* out_mixing_coeff,
                                                                 std::vector< std::pair<unsigned int, unsigned int> >* sampled_region,
                                                                 std::list< unsigned int >* uncombined_channels);
-
-    template EXPORTGPUPMRI int inverse_clib_matrix(cuNDArray<complext<float> >* A,
-                                    cuNDArray<complext<float> >* b,
-                                    cuNDArray<complext<float> >* coeff,
-                                    double lamda);
+  
+  template EXPORTGPUPMRI int inverse_clib_matrix(cuNDArray<complext<float> >* A,
+                                                 cuNDArray<complext<float> >* b,
+                                                 cuNDArray<complext<float> >* coeff,
+                                                 double lamda);
 }
diff --git a/toolboxes/mri/pmri/gpu/htgrappa.h b/toolboxes/mri/pmri/gpu/htgrappa.h
index 7ffce77..06ae26d 100644
--- a/toolboxes/mri/pmri/gpu/htgrappa.h
+++ b/toolboxes/mri/pmri/gpu/htgrappa.h
@@ -1,3 +1,8 @@
+/** \file htgrappa.h
+    \brief Utilities to calibrate grappa weights and corresponding unmixing coefficients - GPU-based.
+*/
+
+#pragma once
 #ifndef HTGRAPPA_H
 #define HTGRAPPA_H
 
@@ -9,20 +14,22 @@
 namespace Gadgetron
 {
 
-template <class T> EXPORTGPUPMRI 
-int htgrappa_calculate_grappa_unmixing(cuNDArray<T>* ref_data, 
-                                    cuNDArray<T>* b1,
-                                    unsigned int acceleration_factor,
-                                    std::vector<unsigned int>* kernel_size,
-                                    cuNDArray<T>* out_mixing_coeff,
-                                    std::vector< std::pair<unsigned int, unsigned int> >* sampled_region = 0, 
-                                    std::list< unsigned int >* uncombined_channels = 0);
-
-template <class T> EXPORTGPUPMRI 
-int inverse_clib_matrix(cuNDArray<T>* A, 
-                            cuNDArray<T>* b,
-                            cuNDArray<T>* out_mixing_coeff, 
-                            double lamda);
+  template <class T> EXPORTGPUPMRI 
+  int htgrappa_calculate_grappa_unmixing(cuNDArray<T>* ref_data, 
+                                         cuNDArray<T>* b1,
+                                         unsigned int acceleration_factor,
+                                         std::vector<unsigned int>* kernel_size,
+                                         cuNDArray<T>* out_mixing_coeff,
+                                         std::vector< std::pair<unsigned int, unsigned int> >* sampled_region = 0, 
+                                         std::list< unsigned int >* uncombined_channels = 0);
+  
+  template <class T> EXPORTGPUPMRI 
+  int inverse_clib_matrix(cuNDArray<T>* A, 
+                          cuNDArray<T>* b,
+                          cuNDArray<T>* out_mixing_coeff, 
+                          double lamda);  
+
+  template <class T> void ht_grappa_solve_spd_system(hoNDArray<T> *A, hoNDArray<T> *B);
 
 }
 
diff --git a/toolboxes/mri/pmri/gpu/sense_utilities.cu b/toolboxes/mri/pmri/gpu/sense_utilities.cu
index 02d00be..0c486bd 100644
--- a/toolboxes/mri/pmri/gpu/sense_utilities.cu
+++ b/toolboxes/mri/pmri/gpu/sense_utilities.cu
@@ -5,7 +5,7 @@
 namespace Gadgetron{
 
   template<class REAL> __global__ void 
-  mult_csm_kernel( complext<REAL> *in, complext<REAL> *out, complext<REAL> *csm,
+  mult_csm_kernel( const complext<REAL> * __restrict__ in, complext<REAL> * __restrict__ out, complext<REAL> *csm,
 		   size_t image_elements, unsigned int nframes, unsigned int ncoils )
   {
     unsigned int idx = blockIdx.x*blockDim.x+threadIdx.x;
@@ -64,7 +64,7 @@ namespace Gadgetron{
   }
 
   template <class REAL> __global__ void 
-  mult_csm_conj_sum_kernel( complext<REAL> *in, complext<REAL> *out, complext<REAL> *csm,
+  mult_csm_conj_sum_kernel(const  complext<REAL> * __restrict__ in, complext<REAL> * __restrict__ out, const complext<REAL> * __restrict__ csm,
 			    size_t image_elements, unsigned int nframes, unsigned int ncoils )
   {
     unsigned int idx = blockIdx.x*blockDim.x+threadIdx.x;
diff --git a/toolboxes/mri/pmri/gpu/spirit_calibration.cu b/toolboxes/mri/pmri/gpu/spirit_calibration.cu
new file mode 100644
index 0000000..f2499a8
--- /dev/null
+++ b/toolboxes/mri/pmri/gpu/spirit_calibration.cu
@@ -0,0 +1,363 @@
+#include "spirit_calibration.h"
+#include "vector_td_operators.h"
+#include "vector_td_utilities.h"
+#include "cuNDArray_elemwise.h"
+#include "cuNDArray_operators.h"
+#include "cuNDArray_reductions.h"
+#include "cuNDArray_utils.h"
+#include "cuNDArray_blas.h"
+#include "cuNDFFT.h"
+#include "cudaDeviceManager.h"
+#include "setup_grid.h"
+#include "complext.h"
+#include "CUBLASContextProvider.h"
+#include "GPUTimer.h"
+#include "hoNDArray_fileio.h"
+#include "hoNDArray_utils.h"
+#include "htgrappa.h"
+
+#include <cublas_v2.h>
+//#include <cula_lapack_device.h>
+
+namespace Gadgetron {
+
+  static __global__ void 
+  compute_system_matrix_kernel( intd2 dims,
+                                int num_coils,
+                                int kernel_size,
+                                float_complext *kspace,
+                                float_complext *A )
+  {
+    // The grid contains one thread per coil element. 
+    // Each thread reads its corresponding data element and is responsible 
+    // for filling into the corresponding kernel_size*kernel entries in the matrix.
+    //
+    // The storage format is column major due to BLAS/LAPACK conventions.
+    // This increases the overhead of writes in this kernel (they are non-coaslesced and MANY). 
+    // TODO: optimize for performance.
+    //
+    
+    const int idx = blockIdx.y*gridDim.x*blockDim.x + blockIdx.x*blockDim.x+threadIdx.x;
+    const int elements_per_coil = prod(dims);
+
+    if( idx < elements_per_coil*num_coils ){
+
+      // Get the k-space value for this thread
+      //
+
+      float_complext val = kspace[idx];
+
+      const int num_kernel_elements = kernel_size*kernel_size-1;
+      const int coil = idx/elements_per_coil;
+      const int idx_in_coil = idx-coil*elements_per_coil;
+
+      // Loop over the number of outputs produced per thread
+      //
+
+      const int half_kernel_size = kernel_size>>1;
+
+      for( int j = -half_kernel_size; j<half_kernel_size+1; j++ ){ // row iterator
+        for( int i = -half_kernel_size; i<half_kernel_size+1; i++ ){ // column iterator
+
+          if( j==0 && i==0 ) continue; // The weight of the central points is set to 0
+
+          int kernel_idx = co_to_idx( intd2(i+half_kernel_size,j+half_kernel_size), intd2(kernel_size,kernel_size) );
+          if( (j==0 && i>0) || j>0 ) kernel_idx--;
+
+          const int m = 
+            (idx_in_coil+j*dims[0]+i+elements_per_coil)%elements_per_coil; // row idx
+
+          const int n = 
+            coil*num_kernel_elements + kernel_idx;
+          
+          const int A_idx = 
+            n*elements_per_coil + m; // Column major storage
+
+          A[A_idx] = val;
+        }
+      }      
+    }
+  }
+  
+
+  static __global__ void 
+  write_convolution_masks_kernel( intd2 dims,
+                                  int num_coils,
+                                  int kernel_size,
+                                  float_complext *kernels,
+                                  float_complext *kspace )
+  {
+    // Write out convolution masks in the center of kspace
+    // - thus prepare for FFT into image space
+    //
+
+    const int idx = blockIdx.y*gridDim.x*blockDim.x + blockIdx.x*blockDim.x+threadIdx.x;
+    const int elements_per_coil = prod(dims);
+
+    if( idx < elements_per_coil*num_coils*num_coils ){
+
+      const int half_kernel_size = kernel_size>>1;
+      const int num_kernel_elements = kernel_size*kernel_size-1;
+      const int batch = idx/(elements_per_coil*num_coils);
+      const int idx_in_batch = idx-batch*elements_per_coil*num_coils;
+      const int coil = idx_in_batch/elements_per_coil;
+      const int idx_in_coil = idx_in_batch-coil*elements_per_coil;
+      const intd2 co = idx_to_co( idx_in_coil, dims ) - (dims>>1);
+
+      if( co[1] >= -half_kernel_size && co[1] <= half_kernel_size && 
+          co[0] >= -half_kernel_size && co[0] <= half_kernel_size ){
+
+        // Compute kernel index 
+        // - keeping in mind the central elements are missing (forced to 0)
+        //
+        
+        int kernel_idx = co_to_idx( co+intd2(half_kernel_size, half_kernel_size), intd2(kernel_size, kernel_size) );
+        
+        if( co[1] == 0 && co[0] == 0 ) {
+          kspace[idx] = float_complext(0.0f);
+        }
+        else {
+          if( (co[1]==0 && co[0]>0) || co[1]>0 ) kernel_idx--;
+          kspace[idx] = kernels[batch*num_kernel_elements*num_coils + coil*num_kernel_elements + kernel_idx];
+        }
+      }
+      else{
+        kspace[idx] = float_complext(0.0f);
+      }          
+    }
+  }
+
+  boost::shared_ptr< cuNDArray<float_complext> > 
+  estimate_spirit_kernels( cuNDArray<float_complext> *_kspace, unsigned int kernel_size )
+  {
+    // Calibration is performed in k-space. 
+    // The result is Fourier transformed and returned as image space kernels.
+    // The convolution is expressed explicitly as a matrix equation an solved using BLAS/LAPACK.
+    //
+
+    if( _kspace == 0x0 ){
+      throw std::runtime_error("estimate_spirit_kernels: 0x0 input array");
+    }
+    
+    if( _kspace->get_number_of_dimensions() != 3 ) {
+      throw std::runtime_error("estimate_spirit_kernels: Only 2D spirit is supported currently");
+    }
+
+    if( (kernel_size%2) == 0 ) {
+      throw std::runtime_error("estimate_spirit_kernels: The kernel size should be odd");
+    }
+
+
+    // Normalize input array to an average intensity of one per element
+    //
+    std::vector<size_t> old_dims = *_kspace->get_dimensions();
+    std::vector<size_t> dims= old_dims;
+    /*dims[0] /= 2;
+    dims[1] /= 2;*/
+    //dims[0]=36;
+    //dims[1]=36;
+    //cuNDArray<float_complext> kspace(_kspace);
+    cuNDArray<float_complext> kspace(dims);
+
+    vector_td<size_t,2> offset((old_dims[0]-dims[0])/2,(old_dims[1]-dims[1])/2);
+    crop<float_complext,2>(offset,_kspace,&kspace);
+    float sum = nrm2(&kspace);    
+    float_complext in_max = kspace[amax(&kspace)];
+    kspace /= (float(kspace.get_number_of_elements())/sum);
+    unsigned int num_coils = kspace.get_size(kspace.get_number_of_dimensions()-1);
+    unsigned int elements_per_coil = kspace.get_number_of_elements()/num_coils;
+    
+    std::vector<size_t> out_dims;
+    out_dims.push_back(_kspace->get_size(0)); out_dims.push_back(_kspace->get_size(1));
+    out_dims.push_back(num_coils*num_coils);
+    
+    boost::shared_ptr< cuNDArray<float_complext> > kernel_images
+      ( new cuNDArray<float_complext>(&out_dims) );
+
+    // Clear to ones in case we terminate early
+    //
+
+    fill(kernel_images.get(), float_complext(1.0f/num_coils));
+
+    // Form m x n system matrix A
+    //
+
+    unsigned int m = elements_per_coil;
+    unsigned int n = num_coils*(kernel_size*kernel_size-1);
+
+    std::vector<size_t> A_dims; A_dims.push_back(m); A_dims.push_back(n);    
+    cuNDArray<float_complext> A(&A_dims); clear(&A);
+
+    // Fill system matrix
+    //
+
+    dim3 blockDim; dim3 gridDim;
+    setup_grid( kspace.get_number_of_elements(), &blockDim, &gridDim );
+    
+    compute_system_matrix_kernel<<< gridDim, blockDim >>>
+      ( intd2(kspace.get_size(0), kspace.get_size(1)), num_coils, kernel_size,
+        kspace.get_data_ptr(), A.get_data_ptr() );
+
+    CHECK_FOR_CUDA_ERROR();    
+
+    /*
+    static int counter = 0;
+    char filename[256];
+    sprintf((char*)filename, "_A_%d.cplx", counter);
+    write_nd_array<float_complext>( A.to_host().get(), filename );
+    counter++;
+    */
+
+    // Compute A^H A
+    //
+
+    cublasStatus_t stat;
+    cublasHandle_t handle = *CUBLASContextProvider::instance()->getCublasHandle();
+
+    std::vector<size_t> AHA_dims(2,n);
+    cuNDArray<float_complext> AHA(&AHA_dims);
+
+    // Initialize AHA to identity (Tikhonov regularization)
+    //
+
+    float_complext one(1.0f);
+    clear(&AHA);
+    for( unsigned int i=0; i<n; i++ ){
+      cudaMemcpy( AHA.get_data_ptr()+i*n+i, &one, sizeof(float_complext), cudaMemcpyHostToDevice );
+    }
+    CHECK_FOR_CUDA_ERROR();
+
+    float_complext alpha(1.0f);
+    //float_complext beta(0.1f*in_max); // Tikhonov regularization weight
+    float_complext beta(0.0f); // Tikhonov regularization weight
+    
+    stat = cublasCgemm( handle, CUBLAS_OP_C, CUBLAS_OP_N,
+                        n,n,m,
+                        (cuFloatComplex*) &alpha,
+                        (cuFloatComplex*) A.get_data_ptr(), m,
+                        (cuFloatComplex*) A.get_data_ptr(), m,
+                        (cuFloatComplex*) &beta, 
+                        (cuFloatComplex*) AHA.get_data_ptr(), n );
+    
+    if (stat != CUBLAS_STATUS_SUCCESS) {
+      std::cerr << "CUBLAS error code " << stat << std::endl;
+      throw std::runtime_error("estimate_spirit_kernels: CUBLAS error computing A^HA");
+    }
+
+    /*
+    static int counter = 0;
+    char filename[256];
+    sprintf((char*)filename, "_AHA_%d.cplx", counter);
+    write_nd_array<float_complext>( AHA.to_host().get(), filename );
+    counter++;
+    */
+
+    // Multiply A^H with each coil image (to form the rhs)
+    //
+
+    std::vector<size_t> rhs_dims; rhs_dims.push_back(n); rhs_dims.push_back(num_coils);    
+    cuNDArray<float_complext> rhs(&rhs_dims); clear(&rhs);
+
+    beta = float_complext(0.0f);
+
+    stat = cublasCgemm( handle, CUBLAS_OP_C, CUBLAS_OP_N,
+                        n, num_coils, m,
+                        (cuFloatComplex*) &alpha,
+                        (cuFloatComplex*) A.get_data_ptr(), m,
+                        (cuFloatComplex*) kspace.get_data_ptr(), m,
+                        (cuFloatComplex*) &beta, 
+                        (cuFloatComplex*) rhs.get_data_ptr(), n );
+    
+    if (stat != CUBLAS_STATUS_SUCCESS) {
+      std::cerr << "CUBLAS error code " << stat << std::endl;
+      throw std::runtime_error("estimate_spirit_kernels: CUBLAS error computing rhs");
+    }
+    
+    /*
+    static int counter = 0;
+    char filename[256];
+    sprintf((char*)filename, "_rhs_%d.cplx", counter);
+    write_nd_array<float_complext>( rhs.to_host().get(), filename );
+    counter++;
+    */
+
+
+
+    //CGELS is used rather than a more conventional solver as it is part of CULA free.
+    /*
+    culaStatus s = culaDeviceCgels( 'N', n, n, num_coils,
+                                 (culaDeviceFloatComplex*)AHA.get_data_ptr(), n,
+                                 (culaDeviceFloatComplex*)rhs.get_data_ptr(), n);
+    */
+    {
+      //It actually turns out to be faster to do this inversion on the CPU. Problem is probably too small for GPU to make sense
+      //GPUTimer cpu_invert_time("CPU Inversion time");
+      boost::shared_ptr< hoNDArray<float_complext> > AHA_h = AHA.to_host();
+      boost::shared_ptr< hoNDArray<float_complext> > AHrhs_h = rhs.to_host();
+      
+      std::vector<size_t> perm_dim;
+      perm_dim.push_back(1);
+      perm_dim.push_back(0);
+      
+      permute(AHA_h.get(),&perm_dim);
+      permute(AHrhs_h.get(),&perm_dim);
+      
+      ht_grappa_solve_spd_system(AHA_h.get(), AHrhs_h.get());	  
+
+      permute(AHrhs_h.get(),&perm_dim);
+      rhs = cuNDArray<float_complext>(*AHrhs_h);
+    }
+
+
+    /*
+    if( s != culaNoError ) {
+      if( s == 8 ){
+        std::cerr << "CULA error code " << s << ": " << culaGetStatusString(s) << std::endl;
+        std::cerr << "Assuming that the buffer is not yet filled and return ones" << std::endl;
+        return kernel_images;
+      }
+      std::cerr << "CULA error code " << s << ": " << culaGetStatusString(s) << std::endl;
+      culaInfo i = culaGetErrorInfo();
+      char buf[2048];
+      culaGetErrorInfoString(s, i, buf, sizeof(buf));
+      printf("Error %d: %s\n", (int)i, buf);      
+      throw std::runtime_error("estimate_spirit_kernels: CULA error computing 'getrs'");
+    }
+    */
+
+    //CULA will sometime return NaN without an explicit error. This code tests for NaNs and returns if found.
+    float nan_test = nrm2(&rhs);
+    if (nan_test != nan_test) return kernel_images;
+
+    // Fill k-spaces with the computed kernels at the center
+    //
+
+    setup_grid( kernel_images->get_number_of_elements(), &blockDim, &gridDim );
+    
+    write_convolution_masks_kernel<<< gridDim, blockDim >>>
+      ( intd2(kernel_images->get_size(0), kernel_images->get_size(1)), num_coils, kernel_size,
+        rhs.get_data_ptr(), kernel_images->get_data_ptr() );
+    
+    CHECK_FOR_CUDA_ERROR();
+
+    // Batch FFT into image space
+    //
+    A.clear();
+    AHA.clear();
+    rhs.clear();
+
+    std::vector<size_t> dims_to_xform;
+    dims_to_xform.push_back(0); dims_to_xform.push_back(1);    
+    cuNDFFT<float>::instance()->ifft( kernel_images.get(), &dims_to_xform, false );
+    
+    /*
+    static int counter = 0;
+    char filename[256];
+    sprintf((char*)filename, "_kernels_%d.cplx", counter);
+    write_nd_array<float_complext>( kernel_images->to_host().get(), filename );
+    counter++;
+    */
+
+    return kernel_images;
+  }
+}
diff --git a/toolboxes/mri/pmri/gpu/spirit_calibration.h b/toolboxes/mri/pmri/gpu/spirit_calibration.h
new file mode 100644
index 0000000..d2622d9
--- /dev/null
+++ b/toolboxes/mri/pmri/gpu/spirit_calibration.h
@@ -0,0 +1,22 @@
+/** \file spirit_calibration.h
+    \brief Utility to calibrate spirit convolution kernels, GPU-based.
+*/
+
+#pragma once
+
+#include "gpupmri_export.h"
+#include "cuNDArray.h"
+
+namespace Gadgetron
+{
+  
+  /**
+     @brief Utility to estimate spirit convolution kernels, GPU-based.
+     @param[in] cartesian_kspace_data Array with fully sampled kspace data (Cartesian). E.g. as a result of accumulation of multiple frames.
+     @param[in] kernel_size Size of the convolution kernel to use for k-space calibration. Must be an odd number.
+     @return A set convolution kernels Fourier transformed into image space. For 'n' coils, n^2 calibration images are estimated, i.e. 'n' kernels for each coil.
+     Currently only 2D Spirit is supported in this function (higher-dimensional Spirit is supported in the gt-plus toolbox).
+  */
+  EXPORTGPUPMRI boost::shared_ptr< cuNDArray<float_complext> > 
+  estimate_spirit_kernels( cuNDArray<float_complext> *cartesian_kspace_data, unsigned int kernel_size );
+}
diff --git a/toolboxes/mri_core/CMakeLists.txt b/toolboxes/mri_core/CMakeLists.txt
new file mode 100644
index 0000000..11aaac4
--- /dev/null
+++ b/toolboxes/mri_core/CMakeLists.txt
@@ -0,0 +1,3 @@
+install(FILES
+  mri_core_data.h
+  DESTINATION include COMPONENT main)
diff --git a/toolboxes/mri_core/mri_core_data.h b/toolboxes/mri_core/mri_core_data.h
new file mode 100644
index 0000000..12842e3
--- /dev/null
+++ b/toolboxes/mri_core/mri_core_data.h
@@ -0,0 +1,262 @@
+#ifndef MRI_CORE_DATA_H
+#define MRI_CORE_DATA_H
+
+#include "GadgetContainerMessage.h"
+#include "ismrmrd/ismrmrd.h"
+#include "Gadgetron.h"
+#include <vector>
+#include <set>
+
+namespace Gadgetron 
+{
+
+    /** 
+      This is a list of lables of the coordinates described in the ISMRMRD acquisition header.
+
+      It is useful for accumulators and triggers and for labeling the storage used in
+      the @IsmrmrdAcquisitionBucket and @IsmrmrdDataBuffered structures. 
+
+   */
+    enum IsmrmrdCONDITION {
+	KSPACE_ENCODE_STEP_1,
+	KSPACE_ENCODE_STEP_2,
+	AVERAGE,
+	SLICE,
+	CONTRAST,
+	PHASE,
+	REPETITION,
+	SET,
+	SEGMENT,
+	USER_0,
+	USER_1,
+	USER_2,
+	USER_3,
+	USER_4,
+	USER_5,
+	USER_6,
+	USER_7,
+	NONE
+      };
+    
+  /** 
+      This class functions as a storage unit for statistics related to
+      the @IsmrmrdAcquisitionData objects.
+
+   */
+  class IsmrmrdAcquisitionBucketStats
+  {
+    public:
+      // Set of labels found in the data or ref part of a bucket
+      //11D, fixed order [RO, E1, E2, CHA, SLC, PHS, CON, REP, SET, SEG, AVE]
+      std::set<uint16_t> kspace_encode_step_1;
+      std::set<uint16_t> kspace_encode_step_2;
+      std::set<uint16_t> slice;
+      std::set<uint16_t> phase;
+      std::set<uint16_t> contrast;
+      std::set<uint16_t> repetition;
+      std::set<uint16_t> set;
+      std::set<uint16_t> segment;
+      std::set<uint16_t> average;
+  };
+
+  /** 
+      This class functions as a storage unit for GadgetContainerMessage pointers
+      that point to acquisiton headers, data and trajectories.
+
+      It is the storage used in the @IsmrmrdAcquisitionBucket structure. 
+
+   */
+  class IsmrmrdAcquisitionData
+  {
+  public:
+    /**
+       Default Constructor
+    */
+    IsmrmrdAcquisitionData()
+      : head_(0)
+      , data_(0)
+      , traj_(0)
+      {
+
+      }
+    
+    /**
+       Constructor
+    */
+    IsmrmrdAcquisitionData(GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* head,
+                           GadgetContainerMessage< hoNDArray< std::complex<float> > >* data,
+                           GadgetContainerMessage< hoNDArray< float > >* traj = 0)
+    {
+      if (head) {
+	head_ = head->duplicate();
+      } else {
+	head_ = 0;
+      }
+
+      if (data) {
+	data_ = data->duplicate();
+      } else {
+	data_ = 0;
+      }
+
+      if (traj) {
+	traj_ = traj->duplicate();
+      } else {
+	traj_ = 0;
+      }
+    }
+
+    /** 
+	Assignment operator
+     */
+    IsmrmrdAcquisitionData& operator=(const IsmrmrdAcquisitionData& d)
+      {
+	if (this != &d) { 
+	  if (d.head_) {
+	    if (head_) head_->release();
+	    head_ = d.head_->duplicate();
+	  } else {
+	    head_ = 0;
+	  }
+	  
+	  if (d.data_) {
+	    if (data_) data_->release();
+	    data_ = d.data_->duplicate();
+	  } else {
+	    data_ = 0;
+	  }
+	  
+	  if (d.traj_) {
+	    if (traj_) traj_->release();
+	    traj_ = d.traj_->duplicate();
+	  } else {
+	    traj_ = 0;
+	  }
+	}
+	return *this;
+      }
+
+    /**
+       Copy constructor
+     */
+    IsmrmrdAcquisitionData(const IsmrmrdAcquisitionData& d)
+      : head_(0)
+      , data_(0)
+      , traj_(0)
+      {
+	*this = d;
+      }
+
+
+    /**
+       Destructor. The memory in the GadgetContainer Messages will be deleted
+       when the object is destroyed. 
+     */
+    ~IsmrmrdAcquisitionData() {
+      if (head_) {
+	head_->release();
+	head_ = 0;
+      }
+
+      if (data_) {
+	data_->release();
+	data_ = 0;
+      }
+
+      if (traj_) {
+	traj_->release();
+	traj_ = 0;
+      }
+    }
+
+
+    GadgetContainerMessage<ISMRMRD::AcquisitionHeader>* head_;
+    GadgetContainerMessage< hoNDArray< std::complex<float> > >* data_;
+    GadgetContainerMessage< hoNDArray< float > > * traj_;
+  };
+
+
+  /**
+
+     This class serves as the storage unit for buffered data. 
+     The @IsmrmrdAcquisitionData structure contains pointers 
+     to the GadgetContainerMessages with the data. 
+
+     Data stored in these buckets will automatically get deleted when the object is
+     destroyed. 
+
+   */ 
+  class IsmrmrdAcquisitionBucket
+  {
+  public:
+    std::vector< IsmrmrdAcquisitionData > data_;
+    std::vector< IsmrmrdAcquisitionData > ref_;
+    std::vector< IsmrmrdAcquisitionBucketStats > datastats_;
+    std::vector< IsmrmrdAcquisitionBucketStats > refstats_;
+  };
+  
+  
+  class SamplingLimit
+  {
+  public:
+    uint16_t min_;
+    uint16_t max_;
+    uint16_t center_;
+  };
+  
+  class SamplingDescription
+  {
+  public:
+    // encoding FOV
+    float encoded_FOV_[3];
+    // recon FOV
+    float recon_FOV_[3];
+    
+    uint16_t encoded_matrix_[3];
+    uint16_t recon_matrix_[3];
+    
+    // sampled range along RO, E1, E2 (for asymmetric echo and partial fourier)
+    // min, max and center
+    SamplingLimit sampling_limits_[3];
+  };
+  
+  class IsmrmrdDataBuffered
+  {
+  public:
+    //7D, fixed order [RO, E1, E2, CHA, SLC, N, S]
+    hoNDArray< std::complex<float> > data_;
+    
+    //11D, fixed order [RO, E1, E2, CHA, SLC, PHS, CON, REP, SET, SEG, AVE]
+    //This element is optional (length is 0 if not present)
+    hoNDArray< float > trajectory_;
+    
+    //9D, fixed order [E1, E2, SLC, PHS, CON, REP, SET, SEG, AVE]
+    hoNDArray< ISMRMRD::AcquisitionHeader > headers_;
+    
+    SamplingDescription sampling_;
+
+    // function to check if it's empty
+  };
+  
+
+  /**
+     This class is used to group a sub-unit of the data that would feed into a reconstruction. 
+   */
+  class IsmrmrdReconBit
+  {
+  public:
+    IsmrmrdDataBuffered data_;
+    IsmrmrdDataBuffered ref_;
+  };
+
+  /**
+     This class is used to store a unit of data that would feed into a reconstruction. 
+   */
+  class IsmrmrdReconData
+  {
+  public:
+    std::vector<IsmrmrdReconBit> rbit_;
+  };
+  
+}
+#endif //MRI_CORE_DATA_H
diff --git a/toolboxes/nfft/gpu/CMakeLists.txt b/toolboxes/nfft/gpu/CMakeLists.txt
index 332daed..4f209a3 100644
--- a/toolboxes/nfft/gpu/CMakeLists.txt
+++ b/toolboxes/nfft/gpu/CMakeLists.txt
@@ -1,22 +1,23 @@
 if (WIN32)
-ADD_DEFINITIONS(-D__BUILD_GADGETRON_GPUNFFT__)
-ADD_DEFINITIONS(-D_USE_MATH_DEFINES)
+  ADD_DEFINITIONS(-D__BUILD_GADGETRON_GPUNFFT__)
+  ADD_DEFINITIONS(-D_USE_MATH_DEFINES)
 endif (WIN32)
 
 if(WIN32)
-link_directories(${Boost_LIBRARY_DIRS})
+  link_directories(${Boost_LIBRARY_DIRS})
 endif(WIN32)
 
 include_directories( 
-  ${CUDA_INCLUDE_DIRS}
-  ${Boost_INCLUDE_DIR}
   ${CMAKE_SOURCE_DIR}/toolboxes/core
   ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu
   ${CMAKE_SOURCE_DIR}/toolboxes/core/gpu
+  ${CMAKE_SOURCE_DIR}/toolboxes/fft/gpu
   ${CMAKE_SOURCE_DIR}/toolboxes/operators
+  ${CUDA_INCLUDE_DIRS}
+  ${Boost_INCLUDE_DIR}
   )
 
-cuda_add_library(gpunfft SHARED 
+cuda_add_library(gadgetron_toolbox_gpunfft SHARED 
     cuNFFT.h
     cuNFFTOperator.h
     gpunfft_export.h
@@ -24,20 +25,21 @@ cuda_add_library(gpunfft SHARED
     cuNFFTOperator.cu
   )
 
-target_link_libraries(
-  gpunfft 
-  gpucore 
+set_target_properties(gadgetron_toolbox_gpunfft PROPERTIES VERSION ${GADGETRON_VERSION_STRING} SOVERSION ${GADGETRON_SOVERSION})
+
+target_link_libraries(gadgetron_toolbox_gpunfft 
+  gadgetron_toolbox_gpufft
+  gadgetron_toolbox_gpucore 
   ${Boost_LIBRARIES}
-  ${FFTW3_LIBRARIES} 
   ${CUDA_LIBRARIES} 
   ${CUDA_CUFFT_LIBRARIES} 
   ${CUDA_CUBLAS_LIBRARIES}
   )
 
-install(TARGETS gpunfft DESTINATION lib)
+install(TARGETS gadgetron_toolbox_gpunfft DESTINATION lib COMPONENT main)
 
 install(FILES 
   cuNFFT.h 
   cuNFFTOperator.h 
   gpunfft_export.h 
-  DESTINATION include)
+  DESTINATION include COMPONENT main)
diff --git a/toolboxes/nfft/gpu/NFFT_C2NC_conv_kernel.cu b/toolboxes/nfft/gpu/NFFT_C2NC_conv_kernel.cu
index b34655b..85c284e 100644
--- a/toolboxes/nfft/gpu/NFFT_C2NC_conv_kernel.cu
+++ b/toolboxes/nfft/gpu/NFFT_C2NC_conv_kernel.cu
@@ -21,7 +21,7 @@
 //
 
 template<class REAL> __inline__ __device__ void 
-NFFT_output( unsigned int number_of_samples, unsigned int number_of_batches, complext<REAL> *samples,
+NFFT_output( unsigned int number_of_samples, unsigned int number_of_batches, complext<REAL> * __restrict__ samples,
 	     unsigned int double_warp_size_power, unsigned int globalThreadId, unsigned int sharedMemFirstSampleIdx, bool accumulate )
 {
   
@@ -49,7 +49,7 @@ resolve_wrap( vector_td<int,D> &grid_position, vector_td<unsigned int,D> &matrix
 
 template<class REAL, unsigned int D> __inline__ __device__ void
 NFFT_iterate_body( typename reald<REAL,D>::Type alpha, typename reald<REAL,D>::Type beta, REAL W, 
-		   vector_td<unsigned int, D> matrix_size_os, unsigned int number_of_batches, complext<REAL> *image,
+		   vector_td<unsigned int, D> matrix_size_os, unsigned int number_of_batches, complext<REAL> * __restrict__ image,
 		   unsigned int double_warp_size_power, REAL half_W, REAL one_over_W, vector_td<REAL,D> matrix_size_os_real, unsigned int sharedMemFirstSampleIdx,
 		   vector_td<REAL,D> sample_position, vector_td<int,D> grid_position )
 {
@@ -93,7 +93,7 @@ NFFT_iterate_body( typename reald<REAL,D>::Type alpha, typename reald<REAL,D>::T
 
 template<class REAL> __inline__ __device__ void
 NFFT_iterate( typename reald<REAL,1>::Type alpha, typename reald<REAL,1>::Type beta, REAL W, 
-	      vector_td<unsigned int,1> matrix_size_os, unsigned int number_of_batches, complext<REAL> *image,
+	      vector_td<unsigned int,1> matrix_size_os, unsigned int number_of_batches, complext<REAL> * __restrict__ image,
 	      unsigned int double_warp_size_power, REAL half_W, REAL one_over_W, vector_td<REAL,1> matrix_size_os_real, unsigned int sharedMemFirstSampleIdx,
 	      vector_td<REAL,1> sample_position, vector_td<int,1> lower_limit, vector_td<int,1> upper_limit )
 {
@@ -113,7 +113,7 @@ NFFT_iterate( typename reald<REAL,1>::Type alpha, typename reald<REAL,1>::Type b
 
 template<class REAL> __inline__ __device__ void
 NFFT_iterate( typename reald<REAL,2>::Type alpha, typename reald<REAL,2>::Type beta, REAL W, 
-	      vector_td<unsigned int,2> matrix_size_os, unsigned int number_of_batches, complext<REAL> *image,
+	      vector_td<unsigned int,2> matrix_size_os, unsigned int number_of_batches, complext<REAL> * __restrict__ image,
 	      unsigned int double_warp_size_power, REAL half_W, REAL one_over_W, vector_td<REAL,2> matrix_size_os_real, unsigned int sharedMemFirstSampleIdx,
 	      vector_td<REAL,2> sample_position, vector_td<int,2> lower_limit, vector_td<int,2> upper_limit )
 {
@@ -135,7 +135,7 @@ NFFT_iterate( typename reald<REAL,2>::Type alpha, typename reald<REAL,2>::Type b
 
 template<class REAL> __inline__ __device__ void
 NFFT_iterate( typename reald<REAL,3>::Type alpha, typename reald<REAL,3>::Type beta, REAL W, 
-	      vector_td<unsigned int,3> matrix_size_os, unsigned int number_of_batches, complext<REAL> *image,
+	      vector_td<unsigned int,3> matrix_size_os, unsigned int number_of_batches, complext<REAL> * __restrict__ image,
 	      unsigned int double_warp_size_power, REAL half_W, REAL one_over_W, vector_td<REAL,3> matrix_size_os_real, unsigned int sharedMemFirstSampleIdx,
 	      vector_td<REAL,3> sample_position, vector_td<int,3> lower_limit, vector_td<int,3> upper_limit )
 {
@@ -159,7 +159,7 @@ NFFT_iterate( typename reald<REAL,3>::Type alpha, typename reald<REAL,3>::Type b
 
 template<class REAL> __inline__ __device__ void
 NFFT_iterate( typename reald<REAL,4>::Type alpha, typename reald<REAL,4>::Type beta, REAL W, 
-	      vector_td<unsigned int,4> matrix_size_os, unsigned int number_of_batches, complext<REAL> *image,
+	      vector_td<unsigned int,4> matrix_size_os, unsigned int number_of_batches, complext<REAL> * __restrict__ image,
 	      unsigned int double_warp_size_power, REAL half_W, REAL one_over_W, vector_td<REAL,4> matrix_size_os_real, unsigned int sharedMemFirstSampleIdx,
 	      vector_td<REAL,4> sample_position, vector_td<int,4> lower_limit, vector_td<int,4> upper_limit )
 {
@@ -182,7 +182,7 @@ NFFT_iterate( typename reald<REAL,4>::Type alpha, typename reald<REAL,4>::Type b
 template<class REAL, unsigned int D> __inline__ __device__ void
 NFFT_convolve( typename reald<REAL,D>::Type alpha, typename reald<REAL,D>::Type beta, REAL W, 
 	       vector_td<unsigned int, D> matrix_size_os, vector_td<unsigned int, D> matrix_size_wrap, 
-	       unsigned int number_of_samples, unsigned int number_of_batches, vector_td<REAL,D> *traj_positions, complext<REAL> *image,
+	       unsigned int number_of_samples, unsigned int number_of_batches, const vector_td<REAL,D> * __restrict__ traj_positions, complext<REAL> * __restrict__ image,
 	       unsigned int double_warp_size_power, REAL half_W, REAL one_over_W, vector_td<REAL,D> matrix_size_os_real,
 	       unsigned int globalThreadId, unsigned int sharedMemFirstSampleIdx )
 {
@@ -212,7 +212,7 @@ template<class REAL, unsigned int D> __global__ void
 NFFT_convolve_kernel( typename reald<REAL,D>::Type alpha, typename reald<REAL,D>::Type beta, REAL W, 
 		      vector_td<unsigned int, D> matrix_size_os, vector_td<unsigned int, D> matrix_size_wrap,
 		      unsigned int number_of_samples, unsigned int number_of_batches, 
-		      vector_td<REAL,D> *traj_positions, complext<REAL> *image, complext<REAL> *samples,
+		      const vector_td<REAL,D> * __restrict__ traj_positions, complext<REAL> *image,  complext<REAL> * __restrict__ samples,
 		      unsigned int double_warp_size_power, REAL half_W, REAL one_over_W, bool accumulate, vector_td<REAL,D> matrix_size_os_real )
 {
 
diff --git a/toolboxes/nfft/gpu/NFFT_NC2C_atomic_conv_kernel.cu b/toolboxes/nfft/gpu/NFFT_NC2C_atomic_conv_kernel.cu
index d1b49de..09ccc33 100644
--- a/toolboxes/nfft/gpu/NFFT_NC2C_atomic_conv_kernel.cu
+++ b/toolboxes/nfft/gpu/NFFT_NC2C_atomic_conv_kernel.cu
@@ -23,10 +23,10 @@
 // First the implementation of the inner-most loop
 // 
 
-template<class REAL, unsigned int D> __inline__ __device__ void
+template<class REAL, unsigned int D> static __inline__ __device__ void
 NFFT_iterate_body( typename reald<REAL,D>::Type alpha, typename reald<REAL,D>::Type beta, 
 		   REAL W, vector_td<unsigned int, D> matrix_size_os, 
-		   unsigned int number_of_batches, complext<REAL> *samples, complext<REAL> *image,
+		   unsigned int number_of_batches, const complext<REAL> * __restrict__ samples,  complext<REAL> * __restrict__ image,
 		   unsigned int double_warp_size_power, REAL half_W, REAL one_over_W, vector_td<REAL,D> matrix_size_os_real, 
 		   unsigned int frame, unsigned int num_frames,
 		   unsigned int num_samples_per_batch, unsigned int sample_idx_in_batch, 
@@ -73,7 +73,7 @@ NFFT_iterate_body( typename reald<REAL,D>::Type alpha, typename reald<REAL,D>::T
 template<class REAL> __inline__ __device__ void
 NFFT_iterate( typename reald<REAL,1>::Type alpha, typename reald<REAL,1>::Type beta, 
 	      REAL W, vector_td<unsigned int,1> matrix_size_os, 
-	      unsigned int number_of_batches, complext<REAL> *samples, complext<REAL> *image,
+	      unsigned int number_of_batches, const complext<REAL> * __restrict__ samples, complext<REAL> * __restrict__ image,
 	      unsigned int double_warp_size_power, REAL half_W, REAL one_over_W, 
 	      vector_td<REAL,1> matrix_size_os_real, 
 	      unsigned int frame, unsigned int num_frames, 
@@ -98,7 +98,7 @@ NFFT_iterate( typename reald<REAL,1>::Type alpha, typename reald<REAL,1>::Type b
 template<class REAL> __inline__ __device__ void
 NFFT_iterate( typename reald<REAL,2>::Type alpha, typename reald<REAL,2>::Type beta, 
 	      REAL W, vector_td<unsigned int,2> matrix_size_os, 
-	      unsigned int number_of_batches, complext<REAL> *samples, complext<REAL> *image,
+	      unsigned int number_of_batches, const complext<REAL> * __restrict__ samples, complext<REAL> * __restrict__ image,
 	      unsigned int double_warp_size_power, REAL half_W, REAL one_over_W, 
 	      vector_td<REAL,2> matrix_size_os_real, 
 	      unsigned int frame, unsigned int num_frames, 
@@ -125,7 +125,7 @@ NFFT_iterate( typename reald<REAL,2>::Type alpha, typename reald<REAL,2>::Type b
 template<class REAL> __inline__ __device__ void
 NFFT_iterate( typename reald<REAL,3>::Type alpha, typename reald<REAL,3>::Type beta, 
 	      REAL W, vector_td<unsigned int,3> matrix_size_os, 
-	      unsigned int number_of_batches, complext<REAL> *samples, complext<REAL> *image,
+	      unsigned int number_of_batches, const complext<REAL> * __restrict__ samples, complext<REAL> * __restrict__ image,
 	      unsigned int double_warp_size_power, REAL half_W, REAL one_over_W, 
 	      vector_td<REAL,3> matrix_size_os_real, 
 	      unsigned int frame, unsigned int num_frames, 	      
@@ -154,7 +154,7 @@ NFFT_iterate( typename reald<REAL,3>::Type alpha, typename reald<REAL,3>::Type b
 template<class REAL> __inline__ __device__ void
 NFFT_iterate( typename reald<REAL,4>::Type alpha, typename reald<REAL,4>::Type beta, 
 	      REAL W, vector_td<unsigned int,4> matrix_size_os, 
-	      unsigned int number_of_batches, complext<REAL> *samples, complext<REAL> *image,
+	      unsigned int number_of_batches, const complext<REAL> * __restrict__ samples, complext<REAL> * __restrict image,
 	      unsigned int double_warp_size_power, REAL half_W, REAL one_over_W,
 	      vector_td<REAL,4> matrix_size_os_real, 
 	      unsigned int frame, unsigned int num_frames, 
@@ -186,7 +186,7 @@ template<class REAL, unsigned int D> __global__ void
 NFFT_H_atomic_convolve_kernel( typename reald<REAL,D>::Type alpha, typename reald<REAL,D>::Type beta, REAL W, 
 			       vector_td<unsigned int, D> matrix_size_os, vector_td<unsigned int, D> matrix_size_wrap,
 			       unsigned int num_samples_per_frame, unsigned int num_batches, 
-			       vector_td<REAL,D> *traj_positions, complext<REAL> *samples, complext<REAL> *image,
+			       const vector_td<REAL,D> * __restrict__ traj_positions, const complext<REAL> * __restrict__ samples, complext<REAL> * __restrict__ image,
 			       unsigned int double_warp_size_power, REAL half_W, REAL one_over_W,
 			       vector_td<REAL,D> matrix_size_os_real )
 {
diff --git a/toolboxes/nfft/gpu/NFFT_NC2C_conv_kernel.cu b/toolboxes/nfft/gpu/NFFT_NC2C_conv_kernel.cu
index f3164df..ed2ed2c 100644
--- a/toolboxes/nfft/gpu/NFFT_NC2C_conv_kernel.cu
+++ b/toolboxes/nfft/gpu/NFFT_NC2C_conv_kernel.cu
@@ -21,7 +21,7 @@
 //
 
 template<class REAL> __inline__ __device__ void 
-NFFT_H_output( unsigned int number_of_batches, complext<REAL>*image,
+NFFT_H_output( unsigned int number_of_batches, complext<REAL>* __restrict__ image,
 	       unsigned int double_warp_size_power, unsigned int number_of_domains, 
 	       unsigned int globalThreadId, unsigned int sharedMemFirstCellIdx )
 {
@@ -40,7 +40,8 @@ NFFT_H_output( unsigned int number_of_batches, complext<REAL>*image,
 template<class REAL, unsigned int D> __inline__ __device__ void
 NFFT_H_convolve( typename reald<REAL,D>::Type alpha, typename reald<REAL,D>::Type beta, REAL W, 
 		 unsigned int number_of_samples, unsigned int number_of_batches, unsigned int number_of_domains,
-		 vector_td<REAL,D> *traj_positions, complext<REAL>*samples, unsigned int *tuples_last, unsigned int *bucket_begin, unsigned int *bucket_end,
+		 const vector_td<REAL,D> * __restrict__ traj_positions, complext<REAL>*samples, const unsigned int * __restrict__ tuples_last,
+		 const unsigned int * __restrict__ bucket_begin, const unsigned int * __restrict__ bucket_end,
 		 unsigned int double_warp_size_power, REAL half_W, REAL one_over_W, vector_td<REAL,D> matrix_size_os_real, 
 		 unsigned int globalThreadId, vector_td<unsigned int,D> domainPos, unsigned int sharedMemFirstCellIdx )
 {
@@ -94,8 +95,9 @@ NFFT_H_convolve( typename reald<REAL,D>::Type alpha, typename reald<REAL,D>::Typ
 template<class REAL, unsigned int D> __global__ void
 NFFT_H_convolve_kernel( typename reald<REAL,D>::Type alpha, typename reald<REAL,D>::Type beta, REAL W,
 			vector_td<unsigned int,D> domain_count_grid, unsigned int number_of_samples, unsigned int number_of_batches,
-			vector_td<REAL,D> *traj_positions, complext<REAL>*image, complext<REAL>*samples,
-			unsigned int *tuples_last, unsigned int *bucket_begin, unsigned int *bucket_end, unsigned int double_warp_size_power, 
+			const vector_td<REAL,D> * __restrict__ traj_positions, complext<REAL>* __restrict__ image, complext<REAL>* __restrict__ samples,
+			const unsigned int * __restrict__ tuples_last, const unsigned int * __restrict__ bucket_begin, const unsigned int * __restrict__ bucket_end,
+			unsigned int double_warp_size_power,
 			REAL half_W, REAL one_over_W, vector_td<REAL,D> matrix_size_os_real )
 {
   
diff --git a/toolboxes/nfft/gpu/NFFT_preprocess_kernel.cu b/toolboxes/nfft/gpu/NFFT_preprocess_kernel.cu
index 95ecbcc..94c892a 100644
--- a/toolboxes/nfft/gpu/NFFT_preprocess_kernel.cu
+++ b/toolboxes/nfft/gpu/NFFT_preprocess_kernel.cu
@@ -43,7 +43,7 @@ struct compute_num_cells_per_sample
 template<class REAL> __inline__ __device__ void
 output_pairs( unsigned int sample_idx, unsigned int frame, 
 	      typename reald<REAL,1>::Type &p, typename uintd<1>::Type &matrix_size_os, typename uintd<1>::Type &matrix_size_wrap, 
-	      REAL half_W, unsigned int *write_offsets, unsigned int *tuples_first, unsigned int *tuples_last )
+	      REAL half_W, const unsigned int * __restrict__ write_offsets, unsigned int * __restrict__ tuples_first, unsigned int * __restrict__ tuples_last )
 {
   unsigned int lower_limit_x = (unsigned int)ceil(p.vec[0]-half_W);
   unsigned int upper_limit_x = (unsigned int)floor(p.vec[0]+half_W);
@@ -62,7 +62,7 @@ output_pairs( unsigned int sample_idx, unsigned int frame,
 template<class REAL> __inline__ __device__ void
 output_pairs( unsigned int sample_idx, unsigned int frame, 
 	      typename reald<REAL,2>::Type &p, typename uintd<2>::Type &matrix_size_os, typename uintd<2>::Type &matrix_size_wrap, 
-	      REAL half_W, unsigned int *write_offsets, unsigned int *tuples_first, unsigned int *tuples_last )
+	      REAL half_W, const unsigned int * __restrict__ write_offsets, unsigned int * __restrict__ tuples_first, unsigned int * __restrict__ tuples_last )
 {
   unsigned int lower_limit_x = (unsigned int)ceil(p.vec[0]-half_W);
   unsigned int lower_limit_y = (unsigned int)ceil(p.vec[1]-half_W);
@@ -85,7 +85,7 @@ output_pairs( unsigned int sample_idx, unsigned int frame,
 template <class REAL> __inline__ __device__ void
 output_pairs( unsigned int sample_idx, unsigned int frame, 
 	      typename reald<REAL,3>::Type &p, typename uintd<3>::Type &matrix_size_os, typename uintd<3>::Type &matrix_size_wrap, 
-	      REAL half_W, unsigned int *write_offsets, unsigned int *tuples_first, unsigned int *tuples_last )
+	      REAL half_W, const unsigned int * __restrict__ write_offsets, unsigned int * __restrict__ tuples_first, unsigned int * __restrict__ tuples_last )
 {
   unsigned int lower_limit_x = (unsigned int)ceil(p.vec[0]-half_W);
   unsigned int lower_limit_y = (unsigned int)ceil(p.vec[1]-half_W);
@@ -112,7 +112,7 @@ output_pairs( unsigned int sample_idx, unsigned int frame,
 template <class REAL> __inline__ __device__ void
 output_pairs( unsigned int sample_idx, unsigned int frame, 
 	      typename reald<REAL,4>::Type &p, typename uintd<4>::Type &matrix_size_os, typename uintd<4>::Type &matrix_size_wrap, 
-	      REAL half_W, unsigned int *write_offsets, unsigned int *tuples_first, unsigned int *tuples_last )
+	      REAL half_W, const unsigned int * __restrict__ write_offsets, unsigned int * __restrict__ tuples_first, unsigned int * __restrict__ tuples_last )
 {
   unsigned int lower_limit_x = (unsigned int)ceil(p.vec[0]-half_W);
   unsigned int lower_limit_y = (unsigned int)ceil(p.vec[1]-half_W);
@@ -142,7 +142,7 @@ output_pairs( unsigned int sample_idx, unsigned int frame,
 
 template<class REAL, unsigned int D> __global__ void
 write_pairs_kernel( typename uintd<D>::Type matrix_size_os, typename uintd<D>::Type matrix_size_wrap, unsigned int num_samples_per_frame, REAL half_W, 
-		    typename reald<REAL,D>::Type *traj_positions, unsigned int *write_offsets, unsigned int *tuples_first, unsigned int *tuples_last )
+		   const typename reald<REAL,D>::Type * __restrict__ traj_positions, unsigned int * __restrict__ write_offsets, unsigned int * __restrict__ tuples_first, unsigned int * __restrict__ tuples_last )
 {
   // Get sample idx
   unsigned int sample_idx = blockIdx.x*blockDim.x + threadIdx.x;
@@ -158,7 +158,7 @@ write_pairs_kernel( typename uintd<D>::Type matrix_size_os, typename uintd<D>::T
 
 template <class REAL, unsigned int D> void 
 write_pairs( typename uintd<D>::Type matrix_size_os, typename uintd<D>::Type matrix_size_wrap, unsigned int num_samples_per_frame, unsigned int num_frames, REAL W, 
-	     typename reald<REAL,D>::Type *traj_positions, unsigned int *write_offsets, unsigned int *tuples_first, unsigned int *tuples_last )
+	     const typename reald<REAL,D>::Type * __restrict__ traj_positions, unsigned int * __restrict__ write_offsets, unsigned int * __restrict__ tuples_first, unsigned int * __restrict__ tuples_last )
 {  
   dim3 blockDim(256);
   dim3 gridDim((int)ceil((double)num_samples_per_frame/(double)blockDim.x), num_frames);
diff --git a/toolboxes/nfft/gpu/cuNFFT.cu b/toolboxes/nfft/gpu/cuNFFT.cu
index e3d3b14..9bf81a4 100644
--- a/toolboxes/nfft/gpu/cuNFFT.cu
+++ b/toolboxes/nfft/gpu/cuNFFT.cu
@@ -442,6 +442,9 @@ Gadgetron::cuNFFT_plan<REAL,D,ATOMICS>::compute( cuNDArray<complext<REAL> > *in,
     
     compute_NFFT_C2NC( working_image, out_int );
 
+    if( dcw_int )
+        	*out_int *= *dcw_int;
+
     if( !oversampled_image ){
       delete working_image; working_image = 0x0;
     }    
@@ -526,6 +529,11 @@ Gadgetron::cuNFFT_plan<REAL,D,ATOMICS>::compute( cuNDArray<complext<REAL> > *in,
     
     compute_NFFTH_C2NC( working_image, out_int );
     
+    if( dcw_int )
+    	*out_int *= *dcw_int;
+
+
+
     if( !oversampled_image ){
       delete working_image; working_image = 0x0;
     }
@@ -589,6 +597,7 @@ Gadgetron::cuNFFT_plan<REAL,D,ATOMICS>::mult_MH_M( cuNDArray<complext<REAL> > *i
   // Density compensation
   if( dcw ){
     *working_samples *= *dcw_int;
+    *working_samples *= *dcw_int;
   }
     
   compute_NFFTH_NC2C( working_samples, working_image );
@@ -658,6 +667,7 @@ Gadgetron::cuNFFT_plan<REAL,D,ATOMICS>::convolve( cuNDArray<complext<REAL> > *in
 
   case NFFT_CONV_C2NC:
   	convolve_NFFT_C2NC( in_int, out_int, accumulate );
+  	if( dcw_int ) *out_int *= *dcw_int;
     break;
     
   case NFFT_CONV_NC2C:
@@ -772,6 +782,7 @@ Gadgetron::cuNFFT_plan<REAL,D,ATOMICS>::check_consistency( cuNDArray<complext<RE
   
   if( (components & _NFFT_CONV_C2NC ) || (components & _NFFT_CONV_NC2C )){    
     if( (samples->get_number_of_elements() == 0) || (samples->get_number_of_elements() % (number_of_frames*number_of_samples)) ){
+      printf("\ncuNFFT::check_consistency() failed:\n#elements in the samples array: %ld.\n#samples from preprocessing: %d.\n#frames from preprocessing: %d.\n",samples->get_number_of_elements(), number_of_samples, number_of_frames ); fflush(stdout);
       throw std::runtime_error("Error: cuNFFT_plan: The number of samples is not a multiple of #samples/frame x #frames as requested through preprocessing");
     }
     
@@ -794,6 +805,7 @@ Gadgetron::cuNFFT_plan<REAL,D,ATOMICS>::check_consistency( cuNDArray<complext<RE
       if( weights->get_number_of_elements() == 0 ||
           !( weights->get_number_of_elements() == number_of_samples || 
              weights->get_number_of_elements() == number_of_frames*number_of_samples) ){
+        printf("\ncuNFFT::check_consistency() failed:\n#elements in the samples array: %ld.\n#samples from preprocessing: %d.\n#frames from preprocessing: %d.\n#weights: %ld.\n",samples->get_number_of_elements(), number_of_samples, number_of_frames, weights->get_number_of_elements() ); fflush(stdout);
         throw std::runtime_error("Error: cuNFFT_plan: The number of weights should match #samples/frame x #frames as requested through preprocessing");
       }
     }
@@ -870,7 +882,7 @@ void Gadgetron::cuNFFT_plan<REAL,D,ATOMICS>::compute_beta()
 template<class REAL, unsigned int D> __global__ void
 compute_deapodization_filter_kernel( typename uintd<D>::Type matrix_size_os, typename reald<REAL,D>::Type matrix_size_os_real, 
                                      REAL W, REAL half_W, REAL one_over_W, 
-                                     typename reald<REAL,D>::Type beta, complext<REAL> *image_os )
+                                     typename reald<REAL,D>::Type beta, complext<REAL> * __restrict__ image_os )
 {
   const unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x;
   const unsigned int num_elements = prod(matrix_size_os);
@@ -1053,7 +1065,7 @@ Gadgetron::cuNFFT_plan<REAL,D,ATOMICS>::convolve_NFFT_C2NC( cuNDArray<complext<R
 
   for( unsigned int repetition = 0; repetition<num_repetitions; repetition++ ){
     NFFT_convolve_kernel<REAL,D>
-      <<<dimGrid, dimBlock, (repetition==num_repetitions-1) ? dimBlock.x*bytes_per_thread_tail : dimBlock.x*bytes_per_thread>>>
+      <<<dimGrid, dimBlock, ((repetition==num_repetitions-1) ? dimBlock.x*bytes_per_thread_tail : dimBlock.x*bytes_per_thread)>>>
       ( alpha, beta, W, vector_td<unsigned int,D>(matrix_size_os), vector_td<unsigned int,D>(matrix_size_wrap), number_of_samples,
         (repetition==num_repetitions-1) ? domain_size_coils_tail : domain_size_coils, 
         raw_pointer_cast(&(*trajectory_positions)[0]), 
@@ -1153,7 +1165,7 @@ _convolve_NFFT_NC2C<float,D,true>{ // True: use atomic operations variant
     for( unsigned int repetition = 0; repetition<num_repetitions; repetition++ ){
       
       NFFT_H_atomic_convolve_kernel<float,D>
-        <<<dimGrid, dimBlock, (repetition==num_repetitions-1) ? dimBlock.x*bytes_per_thread_tail : dimBlock.x*bytes_per_thread>>>
+        <<<dimGrid, dimBlock, ((repetition==num_repetitions-1) ? dimBlock.x*bytes_per_thread_tail : dimBlock.x*bytes_per_thread)>>>
         ( alpha, beta, W, vector_td<unsigned int,D>(matrix_size_os), vector_td<unsigned int,D>(matrix_size_wrap), number_of_samples,
           (repetition==num_repetitions-1) ? domain_size_coils_tail : domain_size_coils,
           raw_pointer_cast(&(*trajectory_positions)[0]), 
@@ -1263,7 +1275,7 @@ _convolve_NFFT_NC2C<REAL,D,false>{ // False: use non-atomic operations variant
     for( unsigned int repetition = 0; repetition<num_repetitions; repetition++ ){
       
       NFFT_H_convolve_kernel<REAL,D>
-        <<<dimGrid, dimBlock, (repetition==num_repetitions-1) ? dimBlock.x*bytes_per_thread_tail : dimBlock.x*bytes_per_thread>>>
+        <<<dimGrid, dimBlock, ((repetition==num_repetitions-1) ? dimBlock.x*bytes_per_thread_tail : dimBlock.x*bytes_per_thread)>>>
         ( alpha, beta, W, vector_td<unsigned int,D>(matrix_size_os+matrix_size_wrap), number_of_samples,
           (repetition==num_repetitions-1) ? domain_size_coils_tail : domain_size_coils, 
           raw_pointer_cast(&(*trajectory_positions)[0]), 
@@ -1283,7 +1295,7 @@ _convolve_NFFT_NC2C<REAL,D,false>{ // False: use non-atomic operations variant
 
 template<class REAL, unsigned int D> __global__ void
 image_wrap_kernel( typename uintd<D>::Type matrix_size_os, typename uintd<D>::Type matrix_size_wrap, bool accumulate,
-                   complext<REAL> *in, complext<REAL> *out )
+                   const complext<REAL> * __restrict__ in, complext<REAL> * __restrict__ out )
 {
   unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x;
   const unsigned int num_elements_per_image_src = prod(matrix_size_os+matrix_size_wrap);
@@ -1411,31 +1423,6 @@ Gadgetron::cuNFFT_plan<REAL,D,ATOMICS>::image_wrap( cuNDArray<complext<REAL> > *
   CHECK_FOR_CUDA_ERROR();
 }	
 
-template<class REAL, unsigned int D, bool ATOMICS> typename uint64d<D>::Type
-Gadgetron::cuNFFT_plan<REAL,D,ATOMICS>::get_matrix_size()
-{
-  return matrix_size;
-}
-
-template<class REAL, unsigned int D, bool ATOMICS> typename uint64d<D>::Type
-Gadgetron::cuNFFT_plan<REAL,D,ATOMICS>::get_matrix_size_os()
-{
-  return matrix_size_os;
-}
-
-template<class REAL, unsigned int D, bool ATOMICS> REAL 
-Gadgetron::cuNFFT_plan<REAL,D,ATOMICS>::get_W()
-{
-  return W;
-}
-
-template<class REAL, unsigned int D, bool ATOMICS> unsigned int 
-Gadgetron::cuNFFT_plan<REAL,D,ATOMICS>::get_device()
-{
-  return device;
-}
-
-
 //
 // Template instantion
 //
diff --git a/toolboxes/nfft/gpu/cuNFFT.h b/toolboxes/nfft/gpu/cuNFFT.h
index 2c400a0..de7c170 100644
--- a/toolboxes/nfft/gpu/cuNFFT.h
+++ b/toolboxes/nfft/gpu/cuNFFT.h
@@ -48,7 +48,7 @@ namespace Gadgetron{
   public: // Main interface
     
     /** 
-	Default constructor
+        Default constructor
     */
     cuNFFT_plan();
 
@@ -63,9 +63,9 @@ namespace Gadgetron{
        The larger W the better quality at the cost of increased runtime.
        \param device the device (GPU id) to use for the NFFT computation. 
        The default value of -1 indicates that the currently active device is used.
-     */
+    */
     cuNFFT_plan( typename uint64d<D>::Type matrix_size, typename uint64d<D>::Type matrix_size_os,
-	       REAL W, int device = -1 );
+                 REAL W, int device = -1 );
 
     /**
        Destructor
@@ -73,7 +73,7 @@ namespace Gadgetron{
     virtual ~cuNFFT_plan();
 
     /** 
-	Enum to specify the desired mode for cleaning up when using the wipe() method.
+        Enum to specify the desired mode for cleaning up when using the wipe() method.
     */
     enum NFFT_wipe_mode { 
       NFFT_WIPE_ALL, /**< delete all internal memory. */
@@ -81,19 +81,19 @@ namespace Gadgetron{
     };
 
     /** 
-	Clear internal storage
-	\param mode enum defining the wipe mode
+        Clear internal storage
+        \param mode enum defining the wipe mode
     */
     void wipe( NFFT_wipe_mode mode );
 
     /** 
-	Setup the plan. Please see the constructor taking similar arguments for a parameter description.
+        Setup the plan. Please see the constructor taking similar arguments for a parameter description.
     */
     void setup( typename uint64d<D>::Type matrix_size, typename uint64d<D>::Type matrix_size_os,
-		REAL W, int device = -1 );
+                REAL W, int device = -1 );
 
     /**
-      Enum to specify the preprocessing mode.
+       Enum to specify the preprocessing mode.
     */
     enum NFFT_prep_mode { 
       NFFT_PREP_C2NC, /**< preprocess to perform a Cartesian to non-Cartesian NFFT. */
@@ -119,31 +119,31 @@ namespace Gadgetron{
     };
 
     /**
-      Execute the NFFT.
-      \param[in] in the input array.
-      \param[out] out the output array.
-      \param[in] dcw optional density compensation weights weighing the input samples according to the sampling density. 
-      If an 0x0-pointer is provided no density compensation is used.
-      \param mode enum specifying the mode of operation.
+       Execute the NFFT.
+       \param[in] in the input array.
+       \param[out] out the output array.
+       \param[in] dcw optional density compensation weights weighing the input samples according to the sampling density. 
+       If an 0x0-pointer is provided no density compensation is used.
+       \param mode enum specifying the mode of operation.
     */
     void compute( cuNDArray<complext<REAL> > *in, cuNDArray<complext<REAL> > *out,
-		  cuNDArray<REAL> *dcw, NFFT_comp_mode mode );
+                  cuNDArray<REAL> *dcw, NFFT_comp_mode mode );
 
     /**
-      Execute an NFFT iteraion (from Cartesian image space to non-Cartesian Fourier space and back to Cartesian image space).
-      \param[in] in the input array.
-      \param[out] out the output array.
-      \param[in] dcw optional density compensation weights weighing the input samples according to the sampling density. 
-      If an 0x0-pointer is provided no density compensation is used.
-      \param[in] halfway_dims specifies the dimensions of the intermediate Fourier space (codomain).
+       Execute an NFFT iteraion (from Cartesian image space to non-Cartesian Fourier space and back to Cartesian image space).
+       \param[in] in the input array.
+       \param[out] out the output array.
+       \param[in] dcw optional density compensation weights weighing the input samples according to the sampling density. 
+       If an 0x0-pointer is provided no density compensation is used.
+       \param[in] halfway_dims specifies the dimensions of the intermediate Fourier space (codomain).
     */
     void mult_MH_M( cuNDArray<complext<REAL> > *in, cuNDArray<complext<REAL> > *out,
-		    cuNDArray<REAL> *dcw, std::vector<size_t> halfway_dims );
+                    cuNDArray<REAL> *dcw, std::vector<size_t> halfway_dims );
   
   public: // Utilities
   
     /**
-      Enum specifying the direction of the NFFT standalone convolution
+       Enum specifying the direction of the NFFT standalone convolution
     */
     enum NFFT_conv_mode { 
       NFFT_CONV_C2NC, /**< convolution: Cartesian to non-Cartesian. */
@@ -159,7 +159,7 @@ namespace Gadgetron{
        \param[in] accumulate specifies whether the result is added to the output (accumulation) or if the output is overwritten.
     */
     void convolve( cuNDArray<complext<REAL> > *in, cuNDArray<complext<REAL> > *out, cuNDArray<REAL> *dcw,
-		   NFFT_conv_mode mode, bool accumulate = false );
+                   NFFT_conv_mode mode, bool accumulate = false );
     
     /**
        Enum specifying the direction of the NFFT standalone FFT.
@@ -188,30 +188,38 @@ namespace Gadgetron{
     /**
        Get the matrix size.
     */
-    typename uint64d<D>::Type get_matrix_size();
+    inline typename uint64d<D>::Type get_matrix_size(){
+      return matrix_size;
+    }
 
     /**
        Get the oversampled matrix size.
     */
-    typename uint64d<D>::Type get_matrix_size_os();
+    inline typename uint64d<D>::Type get_matrix_size_os(){
+      return matrix_size_os;
+    }
 
     /**
        Get the convolution kernel size
     */
-    REAL get_W();
+    inline REAL get_W(){
+      return W;
+    }
     
     /**
        Get the assigned device id
     */
-    unsigned int get_device();
+    inline unsigned int get_device(){
+      return device;
+    }
+    
+    /**
+       Query of the plan has been setup
+    */
+    inline bool is_setup(){
+      return initialized;
+    }
     
-  public: 
-
-    // Custom operators new/delete for windows memory handling across dll boundaries
-    void* operator new (size_t bytes) { return ::new char[bytes]; }
-    void operator delete (void *ptr) { delete [] static_cast <char *> (ptr); } 
-    void * operator new(size_t s, void * p) { return p; }
-
     friend struct _convolve_NFFT_NC2C<REAL,D,ATOMICS>;
   
   private: // Internal to the implementation
@@ -219,7 +227,7 @@ namespace Gadgetron{
     // Validate setup / arguments
     enum NFFT_components { _NFFT_CONV_C2NC = 1, _NFFT_CONV_NC2C = 2, _NFFT_FFT = 4, _NFFT_DEAPODIZATION = 8 };
     void check_consistency( cuNDArray<complext<REAL> > *samples, cuNDArray<complext<REAL> > *image,
-			    cuNDArray<REAL> *dcw, unsigned char components );
+                            cuNDArray<REAL> *dcw, unsigned char components );
 
     // Shared barebones constructor
     void barebones();
diff --git a/toolboxes/operators/CMakeLists.txt b/toolboxes/operators/CMakeLists.txt
index b90e16b..a6d664c 100644
--- a/toolboxes/operators/CMakeLists.txt
+++ b/toolboxes/operators/CMakeLists.txt
@@ -9,6 +9,7 @@ install(FILES
   linearOperator.h
   identityOperator.h
   diagonalOperator.h
+  diagonalSumOperator.h
   encodingOperatorContainer.h
   multiplicationOperatorContainer.h
   FFTOperator.h
@@ -20,7 +21,7 @@ install(FILES
   downsampleOperator.h
   upsampleOperator.h
   tvPicsOperator.h
-  DESTINATION include)
+  DESTINATION include COMPONENT main)
 
 IF (ARMADILLO_FOUND)
   add_subdirectory(cpu)
diff --git a/toolboxes/operators/FFTOperator.h b/toolboxes/operators/FFTOperator.h
index 4760b72..73a48bf 100644
--- a/toolboxes/operators/FFTOperator.h
+++ b/toolboxes/operators/FFTOperator.h
@@ -5,7 +5,7 @@
     To simplify the actual instantiation we refer to 
     - the class(/file) hoFFTOperator(/.h) for a cpu instantiated operator using the hoNDArray class
     - the class(/file) cuFFTOperator(/.h) for a gpu instantiated operator using the cuNDArray class
-*/
+ */
 
 #pragma once
 
@@ -13,58 +13,64 @@
 
 namespace Gadgetron{
 
-  template <class ARRAY_TYPE, class FFT> class FFTOperator : public linearOperator<ARRAY_TYPE>
-  {
-  public:
-    
-    FFTOperator() : linearOperator<ARRAY_TYPE>() {}
-    virtual ~FFTOperator() {}
-    
-    virtual void mult_M( ARRAY_TYPE *in, ARRAY_TYPE *out, bool accumulate = false )
-    {
-      if( in == 0x0 || out == 0x0 ){
-	throw std::runtime_error("Error: FFTOperator::mult_M(): illegal array pointer provided");
-      }
-      
-      if( accumulate ){
-	ARRAY_TYPE tmp(in);
-	FFT::instance()->fft(&tmp);
-    	*out += tmp;
-      }
-      else{
-	*out = *in;
-	FFT::instance()->fft(out);
-      }
-    }
-    
-    virtual void mult_MH( ARRAY_TYPE *in, ARRAY_TYPE *out, bool accumulate = false )
-    {
-      if( in == 0x0 || out == 0x0 ){
-	throw std::runtime_error("Error: FFTOperator::mult_M(): illegal array pointer provided");
-      }
-      
-      if( accumulate ){
-	ARRAY_TYPE tmp(in);
-	FFT::instance()->ifft(&tmp);
-    	*out += tmp;
-      }
-      else{
-	*out = *in;
-	FFT::instance()->ifft(out);
-      }
-    }
-    
-    virtual void mult_MH_M( ARRAY_TYPE *in, ARRAY_TYPE *out, bool accumulate = false )
-    {
-      if( accumulate )
-    	*out += *in;
-      else 
-	*out = *in;           
-    }
-    
-    virtual boost::shared_ptr< linearOperator< ARRAY_TYPE > > clone()
-    {
-      return linearOperator<ARRAY_TYPE>::clone(this);
-    }
-  };
+template <class ARRAY_TYPE, class FFT> class FFTOperator : public linearOperator<ARRAY_TYPE>
+{
+public:
+
+	FFTOperator() : linearOperator<ARRAY_TYPE>() {}
+	virtual ~FFTOperator() {}
+
+	virtual void mult_M( ARRAY_TYPE *in, ARRAY_TYPE *out, bool accumulate = false )
+	{
+		if( in == 0x0 || out == 0x0 ){
+			throw std::runtime_error("Error: FFTOperator::mult_M(): illegal array pointer provided");
+		}
+
+		if( accumulate ){
+			ARRAY_TYPE tmp(in);
+			FFT::instance()->fft(&tmp);
+			*out += tmp;
+			axpy(ELEMENT_TYPE(sqrt(1.0/tmp.get_number_of_elements())),&tmp,out);
+		}
+		else{
+			*out = *in;
+			FFT::instance()->fft(out);
+			out *= ELEMENT_TYPE(sqrt(1.0/tmp.get_number_of_elements()));
+		}
+	}
+
+	virtual void mult_MH( ARRAY_TYPE *in, ARRAY_TYPE *out, bool accumulate = false )
+	{
+		if( in == 0x0 || out == 0x0 ){
+			throw std::runtime_error("Error: FFTOperator::mult_M(): illegal array pointer provided");
+		}
+
+		if( accumulate ){
+			ARRAY_TYPE tmp(in);
+			FFT::instance()->ifft(&tmp,false);
+			axpy(ELEMENT_TYPE(sqrt(1.0/tmp.get_number_of_elements())),&tmp,out);
+			//*out += tmp;
+		}
+		else{
+			*out = *in;
+			FFT::instance()->ifft(out,false);
+			out *= ELEMENT_TYPE(sqrt(1.0/tmp.get_number_of_elements()));
+		}
+	}
+
+	virtual void mult_MH_M( ARRAY_TYPE *in, ARRAY_TYPE *out, bool accumulate = false )
+	{
+		if( accumulate )
+			*out += *in;
+		else
+			*out = *in;
+	}
+
+	virtual boost::shared_ptr< linearOperator< ARRAY_TYPE > > clone()
+    		{
+		return linearOperator<ARRAY_TYPE>::clone(this);
+    		}
+private:
+	typedef typename ARRAY_TYPE::element_type ELEMENT_TYPE;
+};
 }
diff --git a/toolboxes/operators/cpu/CMakeLists.txt b/toolboxes/operators/cpu/CMakeLists.txt
index e8f00e3..a2ef00a 100644
--- a/toolboxes/operators/cpu/CMakeLists.txt
+++ b/toolboxes/operators/cpu/CMakeLists.txt
@@ -15,8 +15,10 @@ include_directories(
 install(FILES 	
   hoIdentityOperator.h
   hoImageOperator.h
+  hoDiagonalOperator.h
+  hoDiagonalSumOperator.h
   hoFFTOperator.h
   hoPartialDerivativeOperator.h
   hoTvOperator.h
   hoTvPicsOperator.h
-  DESTINATION include)
+  DESTINATION include COMPONENT main)
diff --git a/toolboxes/operators/cpu/hoDiagonalOperator.h b/toolboxes/operators/cpu/hoDiagonalOperator.h
new file mode 100644
index 0000000..e691094
--- /dev/null
+++ b/toolboxes/operators/cpu/hoDiagonalOperator.h
@@ -0,0 +1,20 @@
+/** \file hoDiagonalOperator.h
+    \brief Diagonal matrix operator, CPU instantiation.
+*/
+
+#pragma once
+
+#include "hoNDArray_operators.h"
+#include "hoNDArray_elemwise.h"
+#include "hoNDArray_blas.h"
+#include "diagonalOperator.h"
+
+namespace Gadgetron{
+
+  template <class T> class hoDiagonalOperator : public diagonalOperator< hoNDArray<T> >
+  {
+  public:
+    hoDiagonalOperator() : diagonalOperator< hoNDArray<T> >() {}
+    virtual ~hoDiagonalOperator() {}
+  };
+}
diff --git a/toolboxes/operators/cpu/hoDiagonalSumOperator.h b/toolboxes/operators/cpu/hoDiagonalSumOperator.h
new file mode 100644
index 0000000..a24ed3e
--- /dev/null
+++ b/toolboxes/operators/cpu/hoDiagonalSumOperator.h
@@ -0,0 +1,20 @@
+/** \file hoDiagonalSumOperator.h
+    \brief Sum of diagonal matrices operator, CPU instantiation.
+*/
+
+#pragma once
+
+#include "hoNDArray_operators.h"
+#include "hoNDArray_elemwise.h"
+#include "hoNDArray_blas.h"
+#include "diagonalSumOperator.h"
+
+namespace Gadgetron{
+
+  template <class T> class hoDiagonalSumOperator : public diagonalSumOperator< hoNDArray<T> >
+  {
+  public:
+    hoDiagonalSumOperator() : diagonalSumOperator< hoNDArray<T> >() {}
+    virtual ~hoDiagonalSumOperator() {}
+  };
+}
diff --git a/toolboxes/operators/diagonalOperator.h b/toolboxes/operators/diagonalOperator.h
index ffe6361..66a349e 100644
--- a/toolboxes/operators/diagonalOperator.h
+++ b/toolboxes/operators/diagonalOperator.h
@@ -1,4 +1,4 @@
-/** \file laplaceOperator.h
+/** \file diagonalOperator.h
     \brief Base class for the diagonal matrix operators.
 */
 
@@ -25,48 +25,48 @@ namespace Gadgetron {
 
     virtual boost::shared_ptr<ARRAY_TYPE> get_diagonal() { return diagonal_; }
   
-    // Apply diagonal operator (twice)
-    virtual void mult_MH_M( ARRAY_TYPE *in, ARRAY_TYPE *out, bool accumulate )
-    {    
+    virtual void mult_M( ARRAY_TYPE *in, ARRAY_TYPE *out, bool accumulate = false )
+    {
       if( accumulate ) {
-	ARRAY_TYPE tmp(*in);
-	tmp *= *diagonal_;
-	tmp *= *diagonal_conj_;
-	*out += tmp;
+        ARRAY_TYPE tmp(*in);
+        tmp *= *diagonal_;
+        *out += tmp;
       }
       else{
-	*out = *in;
-	*out *= *diagonal_;
-	*out *= *diagonal_conj_;
+        *out = *in;
+        *out *= *diagonal_;
       }
     }
   
-    virtual void mult_M( ARRAY_TYPE *in, ARRAY_TYPE *out, bool accumulate = false )
+    virtual void mult_MH( ARRAY_TYPE *in, ARRAY_TYPE *out, bool accumulate = false )
     {
       if( accumulate ) {
-	ARRAY_TYPE tmp(*in);
-	tmp *= *diagonal_;
-	*out += tmp;
+        ARRAY_TYPE tmp(*in);
+        tmp *= *diagonal_conj_;
+        *out += tmp;
       }
       else{
-	*out = *in;
-	*out *= *diagonal_;
+        *out = *in;
+        *out *= *diagonal_conj_;
       }
     }
-  
-    virtual void mult_MH( ARRAY_TYPE *in, ARRAY_TYPE *out, bool accumulate = false )
-    {
+    
+    // Apply diagonal operator (twice)
+    virtual void mult_MH_M( ARRAY_TYPE *in, ARRAY_TYPE *out, bool accumulate )
+    {    
       if( accumulate ) {
-	ARRAY_TYPE tmp(*in);
-	tmp *= *diagonal_conj_;
-	*out += tmp;
+        ARRAY_TYPE tmp(*in);
+        tmp *= *diagonal_;
+        tmp *= *diagonal_conj_;
+        *out += tmp;
       }
       else{
-	*out = *in;
-	*out *= *diagonal_conj_;
+        *out = *in;
+        *out *= *diagonal_;
+        *out *= *diagonal_conj_;
       }
     }
-    
+  
     virtual boost::shared_ptr< linearOperator<ARRAY_TYPE> > clone() {
       return linearOperator<ARRAY_TYPE>::clone(this);
     }
diff --git a/toolboxes/operators/diagonalSumOperator.h b/toolboxes/operators/diagonalSumOperator.h
new file mode 100644
index 0000000..0432298
--- /dev/null
+++ b/toolboxes/operators/diagonalSumOperator.h
@@ -0,0 +1,95 @@
+/** \file diagonalSumOperator.h
+    \brief Operator to compute the sum over a set of diagonal matrices times a set of corresponding vectors.
+
+    The domain of this operator is a set of images, the codomain a single image. 
+    The sum is computed over the last dimension of the provided diagonal array.
+*/
+
+#pragma once
+
+#include "diagonalOperator.h"
+
+namespace Gadgetron {
+
+  template <class ARRAY_TYPE> class diagonalSumOperator : public diagonalOperator<ARRAY_TYPE>
+  {
+  public:
+  
+    diagonalSumOperator() : diagonalOperator<ARRAY_TYPE>() {}
+    virtual ~diagonalSumOperator() {}
+  
+    virtual void mult_M( ARRAY_TYPE *in, ARRAY_TYPE *out, bool accumulate = false )
+    {
+      if( !this->diagonal_ ){
+        throw std::runtime_error("diagonalSumOperator::mult_M failed: diagonal not set");
+      }       
+
+      const unsigned int num_phases = this->diagonal_->get_size(this->diagonal_->get_number_of_dimensions()-1);
+      const unsigned int elements_per_phase = this->diagonal_->get_number_of_elements()/num_phases;
+      
+      if( in->get_number_of_elements() != this->diagonal_->get_number_of_elements() ){
+        throw std::runtime_error("diagonalSumOperator::mult_M failed: array size mismatch between input image and diagonal");
+      }
+
+      if( out->get_number_of_elements() != elements_per_phase ){
+        throw std::runtime_error("diagonalSumOperator::mult_M failed: the output image domain should only be a single image");
+      }
+
+      if( !accumulate ) 
+        clear(out);
+
+      std::vector<size_t> dims = *out->get_dimensions();
+     
+      // Iterate over the last dimension of the provided diagonal image
+      //
+
+      for( unsigned int i=0; i<num_phases; i++ ){
+
+        ARRAY_TYPE tmp_in( &dims, in->get_data_ptr()+i*elements_per_phase );
+        ARRAY_TYPE tmp_diag( &dims, this->diagonal_->get_data_ptr()+i*elements_per_phase );
+
+        if(i==0 && !accumulate){
+          *out = tmp_in;
+          *out *= tmp_diag;
+        }
+        else{
+          ARRAY_TYPE tmp(&tmp_in);
+          tmp *= tmp_diag;
+          *out += tmp;
+        }
+      }
+    }
+    
+    virtual void mult_MH( ARRAY_TYPE *in, ARRAY_TYPE *out, bool accumulate = false )
+    {
+      if( !this->diagonal_conj_ ){
+        throw std::runtime_error("diagonalSumOperator::mult_MH failed: diagonal not set");
+      }       
+
+      const unsigned int num_phases = this->diagonal_conj_->get_size(this->diagonal_conj_->get_number_of_dimensions()-1);
+      const unsigned int elements_per_phase = this->diagonal_conj_->get_number_of_elements()/num_phases;
+      
+      if( in->get_number_of_elements() != elements_per_phase ){
+        throw std::runtime_error("diagonalSumOperator::mult_MH failed: the input image domain should only be a single image");
+      }
+
+      if( out->get_number_of_elements() != this->diagonal_conj_->get_number_of_elements() ){
+        throw std::runtime_error("diagonalSumOperator::mult_MH failed: array size mismatch between output image and diagonal");
+      }
+
+      if( !accumulate ){
+        *out = *this->diagonal_conj_;
+        *out *= *in; // multiplies all phases with the input
+      }
+      else{
+        ARRAY_TYPE tmp(this->diagonal_conj_.get());
+        tmp *= *in; // multiplies all phases with the input
+        *out += tmp;
+      }
+    }
+    
+    virtual boost::shared_ptr< linearOperator<ARRAY_TYPE> > clone() {
+      return linearOperator<ARRAY_TYPE>::clone(this);
+    }
+  };
+}
diff --git a/toolboxes/operators/encodedImageOperator.h b/toolboxes/operators/encodedImageOperator.h
index bcb48bb..e39fb33 100644
--- a/toolboxes/operators/encodedImageOperator.h
+++ b/toolboxes/operators/encodedImageOperator.h
@@ -28,7 +28,7 @@ namespace Gadgetron{
     virtual void mult_MH_M( ARRAY_TYPE_OPERATOR *in, ARRAY_TYPE_OPERATOR *out, bool accumulate = false )
     {    
       if( !encoding_operator_.get() ){
-	throw std::runtime_error("encodedImageOperator::mult_MH_M failed : encoding operator not set");
+        throw std::runtime_error("encodedImageOperator::mult_MH_M failed : encoding operator not set");
       }
     
       ARRAY_TYPE_OPERATOR tmp(in->get_dimensions());
diff --git a/toolboxes/operators/generalOperator.h b/toolboxes/operators/generalOperator.h
index 4103771..14d8f6c 100644
--- a/toolboxes/operators/generalOperator.h
+++ b/toolboxes/operators/generalOperator.h
@@ -20,13 +20,13 @@ namespace Gadgetron{
     typedef typename ARRAY::element_type ELEMENT_TYPE;
     typedef typename realType<ELEMENT_TYPE>::Type REAL;
 
-    generalOperator() : weight_(REAL(1)){};
+    generalOperator() : weight_(REAL(1)){}
 
     generalOperator(std::vector<size_t> *dims) : weight_(REAL(1)){
       set_domain_dimensions(dims);
     }
-    
-    virtual ~generalOperator(){};
+
+    virtual ~generalOperator();
 
     /**
      * @brief Calculates the gradient of the operator at point "in"
@@ -35,7 +35,6 @@ namespace Gadgetron{
      * @param[in] accumulate If false, overrides the output array. Otherwise adds result.
      */
     virtual void gradient(ARRAY* in, ARRAY* out, bool accumulate = false ) = 0;
-    
 
     /**
      * @brief Calculates the function value of the operator
@@ -44,7 +43,6 @@ namespace Gadgetron{
      */
     virtual REAL magnitude(ARRAY* in)=0;
 
-
     /**
      * Set the domain dimension (image size) of the operator
      * @param[in] dims Domain dimensions
@@ -54,7 +52,7 @@ namespace Gadgetron{
       if( dims == 0x0 ) throw std::runtime_error("Null pointer provided");
       domain_dims_ = *dims;  
     }
-    
+
     /**
      *
      * @return The domain dimensions (image size) of the operator
@@ -65,7 +63,7 @@ namespace Gadgetron{
       *dims = domain_dims_;
       return boost::shared_ptr< std::vector<size_t> >(dims);
     }
-    
+
     /**
      * Sets the weight of the operator
      * @param[in] weight
@@ -77,13 +75,14 @@ namespace Gadgetron{
      * @return Weight of the operator
      */
     virtual REAL get_weight(){ return weight_; }
-    
-    void* operator new (size_t bytes) { return ::new char[bytes]; }
-    void operator delete (void *ptr) { delete [] static_cast <char *> (ptr); } 
-    void * operator new(size_t s, void * p) { return p; }
-    
+
   protected:
     REAL weight_;
     std::vector<size_t> domain_dims_;
-  };  
+  };
+
+  template <class ARRAY> 
+  generalOperator<ARRAY>::~generalOperator()
+  {
+  }
 }
diff --git a/toolboxes/operators/gpu/CMakeLists.txt b/toolboxes/operators/gpu/CMakeLists.txt
index fa7691d..a23323c 100644
--- a/toolboxes/operators/gpu/CMakeLists.txt
+++ b/toolboxes/operators/gpu/CMakeLists.txt
@@ -6,14 +6,23 @@ if(WIN32)
 link_directories(${Boost_LIBRARY_DIRS})
 endif(WIN32)
 
-include_directories(		
+include_directories(
   ${CUDA_INCLUDE_DIRS}
   ${CMAKE_SOURCE_DIR}/toolboxes/core/gpu
+  ${CMAKE_SOURCE_DIR}/toolboxes/fft/gpu
   ${CMAKE_SOURCE_DIR}/toolboxes/nfft/gpu
   ${CMAKE_SOURCE_DIR}/toolboxes/operators
+  ${CUDA_INCLUDE_DIRS}
   )
 
-cuda_add_library(gpuoperators SHARED 
+cuda_add_library(gadgetron_toolbox_gpuoperators SHARED 
+  ../generalOperator.h
+  ../linearOperator.h
+  cuPartialDerivativeOperator.h
+  cuLaplaceOperator.h
+  cuTvOperator.h
+  cuTv1dOperator.h
+  cuConvolutionOperator.h
   cuPartialDerivativeOperator.cu
   cuLaplaceOperator.cu
   cuTvOperator.cu
@@ -21,19 +30,22 @@ cuda_add_library(gpuoperators SHARED
   cuConvolutionOperator.cu
   )
 
-target_link_libraries(gpuoperators 
-  gpucore 
-  gpunfft
+set_target_properties(gadgetron_toolbox_gpuoperators PROPERTIES VERSION ${GADGETRON_VERSION_STRING} SOVERSION ${GADGETRON_SOVERSION})
+
+target_link_libraries(gadgetron_toolbox_gpuoperators 
+  gadgetron_toolbox_gpucore 
+  gadgetron_toolbox_gpunfft
   ${Boost_LIBRARIES}
   ${CUDA_LIBRARIES}
   ${CUDA_CUBLAS_LIBRARIES} 
   )
 
-install(TARGETS gpuoperators DESTINATION lib)
+install(TARGETS gadgetron_toolbox_gpuoperators DESTINATION lib COMPONENT main)
 
-install(FILES 	
+install(FILES 
   cuImageOperator.h
   cuDiagonalOperator.h
+  cuDiagonalSumOperator.h
   cuPartialDerivativeOperator.h
   cuConvolutionOperator.h
   cuLaplaceOperator.h
@@ -43,9 +55,11 @@ install(FILES
   cuDownsampleOperator.h
   cuFFTOperator.h
   cuUpsampleOperator.h
+  hoCuIdentityOperator.h
+  hoCuPartialDerivativeOperator.h
   hoCuTvOperator.h
   hoCuTvPicsOperator.h
   hoCuEncodingOperatorContainer.h
   gpuoperators_export.h
   hoCuOperator.h
-  DESTINATION include)
+  DESTINATION include COMPONENT main)
diff --git a/toolboxes/operators/gpu/cuConvolutionOperator.cu b/toolboxes/operators/gpu/cuConvolutionOperator.cu
index 967243d..65898c6 100644
--- a/toolboxes/operators/gpu/cuConvolutionOperator.cu
+++ b/toolboxes/operators/gpu/cuConvolutionOperator.cu
@@ -7,7 +7,7 @@ namespace Gadgetron {
 
   // Mirror, but keep the origin unchanged
   template<class T, unsigned int D> __global__ void
-  origin_mirror_kernel( vector_td<unsigned int,D> matrix_size, vector_td<unsigned int,D> origin, T *in, T *out, bool zero_fill )
+  origin_mirror_kernel( vector_td<unsigned int,D> matrix_size, vector_td<unsigned int,D> origin, const T * __restrict__ in, T * __restrict__ out, bool zero_fill )
   {
     const unsigned int idx = blockIdx.y*gridDim.x*blockDim.x + blockIdx.x*blockDim.x+threadIdx.x;
     
diff --git a/toolboxes/operators/gpu/cuConvolutionOperator.h b/toolboxes/operators/gpu/cuConvolutionOperator.h
index ac23cd4..b2ca106 100644
--- a/toolboxes/operators/gpu/cuConvolutionOperator.h
+++ b/toolboxes/operators/gpu/cuConvolutionOperator.h
@@ -4,12 +4,11 @@
 
 #pragma once
 
-
+#include "gpuoperators_export.h"
 #include "cuNDArray_math.h"
 #include "cuNDFFT.h"
 #include "vector_td_utilities.h"
 #include "convolutionOperator.h"
-#include "gpuoperators_export.h"
 
 namespace Gadgetron{
 
diff --git a/toolboxes/operators/gpu/cuDiagonalOperator.h b/toolboxes/operators/gpu/cuDiagonalOperator.h
index 5f3c038..83dadde 100644
--- a/toolboxes/operators/gpu/cuDiagonalOperator.h
+++ b/toolboxes/operators/gpu/cuDiagonalOperator.h
@@ -1,5 +1,5 @@
 /** \file cuDiagonalOperator.h
-    \brief Diagonal matrix regularization operator, GPU based.
+    \brief Diagonal matrix operator, GPU instantiation.
 */
 
 #pragma once
diff --git a/toolboxes/operators/gpu/cuDiagonalSumOperator.h b/toolboxes/operators/gpu/cuDiagonalSumOperator.h
new file mode 100644
index 0000000..e6900e4
--- /dev/null
+++ b/toolboxes/operators/gpu/cuDiagonalSumOperator.h
@@ -0,0 +1,20 @@
+/** \file cuDiagonalSumOperator.h
+    \brief Sum of diagonal matrices, GPU instantiation.
+*/
+
+#pragma once
+
+#include "cuNDArray_operators.h"
+#include "cuNDArray_elemwise.h"
+#include "cuNDArray_blas.h"
+#include "diagonalSumOperator.h"
+
+namespace Gadgetron{
+
+  template <class T> class cuDiagonalSumOperator : public diagonalSumOperator< cuNDArray<T> >
+  {
+  public:
+    cuDiagonalSumOperator() : diagonalSumOperator< cuNDArray<T> >() {}
+    virtual ~cuDiagonalSumOperator() {}
+  };
+}
diff --git a/toolboxes/operators/gpu/cuLaplaceOperator.cu b/toolboxes/operators/gpu/cuLaplaceOperator.cu
index ded5974..24ff704 100644
--- a/toolboxes/operators/gpu/cuLaplaceOperator.cu
+++ b/toolboxes/operators/gpu/cuLaplaceOperator.cu
@@ -22,7 +22,7 @@ namespace Gadgetron{
 
   template<class T, unsigned int D, unsigned int dim> class inner_laplace_functor{
   public:
-		static __device__ __inline__ void apply(T& val,const T* in, const typename intd<D>::Type dims,const typename intd<D>::Type co, typename intd<D>::Type& stride){
+		static __device__ __inline__ void apply(T& val,const T* __restrict__ in, const typename intd<D>::Type dims,const typename intd<D>::Type co, typename intd<D>::Type& stride){
 			for (int d = -1; d < 2; d++)
 				stride[dim]=d;
 				inner_laplace_functor<T,D,dim-1>::apply(val,in,dims,co,stride);
@@ -30,14 +30,14 @@ namespace Gadgetron{
   };
   template<class T, unsigned int D> class inner_laplace_functor<T,D,0>{
   public:
-  	static __device__ __inline__ void apply(T& val,const T* in, const typename intd<D>::Type dims,const typename intd<D>::Type co, typename intd<D>::Type& stride){
+  	static __device__ __inline__ void apply(T& val,const T* __restrict__ in, const typename intd<D>::Type dims,const typename intd<D>::Type co, typename intd<D>::Type& stride){
   		typename intd<D>::Type coN = (co+dims+stride)%dims;
   		val -= in[co_to_idx<D>(coN,dims)];
   	}
   };
 
   template<class REAL, class T, unsigned int D> __global__ void
-  laplace_kernel( typename intd<D>::Type dims, T *in, T *out )
+  laplace_kernel( typename intd<D>::Type dims, const T * __restrict__ in, T * __restrict__ out )
   {  
     const int idx = blockIdx.y*gridDim.x*blockDim.x + blockIdx.x*blockDim.x + threadIdx.x;
     if( idx < prod(dims) ){
diff --git a/toolboxes/operators/gpu/cuLaplaceOperator.h b/toolboxes/operators/gpu/cuLaplaceOperator.h
index 4419007..a4360f3 100644
--- a/toolboxes/operators/gpu/cuLaplaceOperator.h
+++ b/toolboxes/operators/gpu/cuLaplaceOperator.h
@@ -4,11 +4,10 @@
 
 #pragma once
 
+#include "gpuoperators_export.h"
 #include "cuNDArray_math.h"
 #include "laplaceOperator.h"
 
-#include "gpuoperators_export.h"
-
 namespace Gadgetron{
 
   template < class T, unsigned int D> class EXPORTGPUOPERATORS cuLaplaceOperator : public laplaceOperator<D, cuNDArray<T> >
diff --git a/toolboxes/operators/gpu/cuPartialDerivativeOperator.cu b/toolboxes/operators/gpu/cuPartialDerivativeOperator.cu
index 0f5e28c..1f2b7d1 100644
--- a/toolboxes/operators/gpu/cuPartialDerivativeOperator.cu
+++ b/toolboxes/operators/gpu/cuPartialDerivativeOperator.cu
@@ -13,7 +13,7 @@ namespace Gadgetron{
   template<class T, unsigned int D> __global__ void
   first_order_partial_derivative_kernel( typename intd<D>::Type stride, 
                                          typename intd<D>::Type dims, 
-                                         T *in, T *out )
+                                         const T  * __restrict__ in, T * __restrict__ out )
   {
     const int idx = blockIdx.y*gridDim.x*blockDim.x + blockIdx.x*blockDim.x + threadIdx.x;
     if( idx < prod(dims) ){
@@ -36,7 +36,7 @@ namespace Gadgetron{
   second_order_partial_derivative_kernel( typename intd<D>::Type forwards_stride, 
                                           typename intd<D>::Type adjoint_stride, 
                                           typename intd<D>::Type dims, 
-                                          T *in, T *out )
+                                          const T  * __restrict__ in, T * __restrict__ out )
   {
     const int idx = blockIdx.y*gridDim.x*blockDim.x + blockIdx.x*blockDim.x + threadIdx.x;
     if( idx < prod(dims) ){
@@ -75,7 +75,7 @@ namespace Gadgetron{
     dim3 dimBlock( dims.vec[0] );
     dim3 dimGrid( 1, dims.vec[D-1] );
   
-    for( unsigned int d=1; d<D-1; d++ )
+    for(int d=1; d<D-1; d++ )
       dimGrid.x *= dims.vec[d];
   
     size_t elements = in->get_number_of_elements();
@@ -105,7 +105,7 @@ namespace Gadgetron{
     dim3 dimBlock( dims.vec[0] );
     dim3 dimGrid( 1, dims.vec[D-1] );
   
-    for( unsigned int d=1; d<D-1; d++ )
+    for(int d=1; d<D-1; d++ )
       dimGrid.x *= dims.vec[d];
   
     size_t elements = in->get_number_of_elements();
diff --git a/toolboxes/operators/gpu/cuPartialDerivativeOperator.h b/toolboxes/operators/gpu/cuPartialDerivativeOperator.h
index 3e47e71..d29dc0f 100644
--- a/toolboxes/operators/gpu/cuPartialDerivativeOperator.h
+++ b/toolboxes/operators/gpu/cuPartialDerivativeOperator.h
@@ -4,11 +4,10 @@
 
 #pragma once
 
+#include "gpuoperators_export.h"
 #include "cuNDArray_math.h"
 #include "partialDerivativeOperator.h"
 
-#include "gpuoperators_export.h"
-
 namespace Gadgetron{
 
   template <class T, unsigned int D> class EXPORTGPUOPERATORS cuPartialDerivativeOperator 
diff --git a/toolboxes/operators/gpu/cuTv1dOperator.cu b/toolboxes/operators/gpu/cuTv1dOperator.cu
index 5a58ef6..ec302c8 100644
--- a/toolboxes/operators/gpu/cuTv1dOperator.cu
+++ b/toolboxes/operators/gpu/cuTv1dOperator.cu
@@ -10,7 +10,7 @@
 
 using namespace Gadgetron;
 
-template<class REAL, class T, unsigned int D> static inline	__device__ REAL gradient(const T* in, const vector_td<int,D>& dims, vector_td<int,D>& co){
+template<class REAL, class T, unsigned int D> static inline	__device__ REAL gradient(const T* __restrict__ in, const vector_td<int,D>& dims, vector_td<int,D>& co){
 
 	T xi = in[co_to_idx<D>((co+dims)%dims,dims)];
 
@@ -23,7 +23,7 @@ template<class REAL, class T, unsigned int D> static inline	__device__ REAL grad
 }
 
 
-template<class REAL, class T, unsigned int D> static __global__ void tvGradient_kernel(const T* in, T* out, const vector_td<int,D> dims,REAL limit,REAL weight){
+template<class REAL, class T, unsigned int D> static __global__ void tvGradient_kernel(const T* __restrict__ in, T* __restrict__ out, const vector_td<int,D> dims,REAL limit,REAL weight){
 	const int idx = blockIdx.y*gridDim.x*blockDim.x + blockIdx.x*blockDim.x + threadIdx.x;
 	if( idx < prod(dims) ){
 		T xi = in[idx];
diff --git a/toolboxes/operators/gpu/cuTv1dOperator.h b/toolboxes/operators/gpu/cuTv1dOperator.h
index e0c0d68..340c365 100644
--- a/toolboxes/operators/gpu/cuTv1dOperator.h
+++ b/toolboxes/operators/gpu/cuTv1dOperator.h
@@ -4,10 +4,10 @@
 
 #pragma once
 
+#include "gpuoperators_export.h"
 #include "cuNDArray_math.h"
 #include "generalOperator.h"
 #include "complext.h"
-#include "gpuoperators_export.h"
 
 namespace Gadgetron{
   
diff --git a/toolboxes/operators/gpu/cuTvOperator.cu b/toolboxes/operators/gpu/cuTvOperator.cu
index ccc630f..a7c91f5 100644
--- a/toolboxes/operators/gpu/cuTvOperator.cu
+++ b/toolboxes/operators/gpu/cuTvOperator.cu
@@ -6,109 +6,118 @@
 #include <iostream>
 #include "check_CUDA.h"
 #include "cudaDeviceManager.h"
+#include <stdio.h>
 
 using namespace Gadgetron;
 
-template<class REAL, class T, unsigned int D> static inline  __device__ REAL gradient(const T* in, const vector_td<int,D>& dims, vector_td<int,D>& co)
+template<class REAL, class T, unsigned int D> static inline  __device__ REAL gradient(const T* __restrict__ in, const vector_td<int,D>& dims, vector_td<int,D>& co)
 {
-  REAL grad = REAL(0);
-  T xi = in[co_to_idx<D>((co+dims)%dims,dims)];
-  for (int i = 0; i < D; i++){
-    co[i]+=1;
-    T dt = in[co_to_idx<D>((co+dims)%dims,dims)];
-    grad += norm(xi-dt);
-    co[i]-=1;
-  }
-  return sqrt(grad);
+	REAL grad = REAL(0);
+	T xi = in[co_to_idx<D>((co+dims)%dims,dims)];
+	for (int i = 0; i < D; i++){
+		co[i]+=1;
+		T dt = in[co_to_idx<D>((co+dims)%dims,dims)];
+		grad += norm(xi-dt);
+		co[i]-=1;
+	}
+	return sqrt(grad);
 }
 
 
-template<class REAL, class T, unsigned int D> static __global__ void tvGradient_kernel(const T* in, T* out, const vector_td<int,D> dims,REAL limit,REAL weight)
+template<class REAL, class T, unsigned int D> static __global__ void tvGradient_kernel(const T* __restrict__ in, T* __restrict__ out, const vector_td<int,D> dims,REAL limit,REAL weight)
 {
-  const int idx = blockIdx.y*gridDim.x*blockDim.x + blockIdx.x*blockDim.x + threadIdx.x;
-  if( idx < prod(dims) ){
-    T xi = in[idx];
-    T result=T(0);
-
-    vector_td<int,D> co = idx_to_co<D>(idx, dims);
-
-    REAL grad = gradient<REAL,T,D>(in,dims,co);
-
-    if (grad > limit) {
-      result += REAL(D)*xi/grad;
-      for (int i = 0; i < D; i++){
-	co[i]+=1;
-	result -= in[co_to_idx<D>((co+dims)%dims,dims)]/grad;
-	co[i]-=1;
-      }
-    }
-
-    for (int i = 0; i < D; i++){
-      co[i]-=1;
-      grad = gradient<REAL,T,D>(in,dims,co);
-      if (grad > limit) {
-	result +=(xi-in[co_to_idx<D>((co+dims)%dims,dims)])/grad;
-      }
-      co[i]+=1;
-    }
-    out[idx] += result*weight;
-  }
+	const int idx = blockIdx.y*gridDim.x*blockDim.x + blockIdx.x*blockDim.x + threadIdx.x;
+	if( idx < prod(dims) ){
+		T xi = in[idx];
+		T result=T(0);
+
+		vector_td<int,D> co = idx_to_co<D>(idx, dims);
+
+		REAL grad = ::max(gradient<REAL,T,D>(in,dims,co),limit);
+
+		if (grad > limit) {
+			//result += REAL(D)*xi/grad;
+			for (int i = 0; i < D; i++){
+				co[i]+=1;
+				result += (xi-in[co_to_idx<D>((co+dims)%dims,dims)])/grad;
+				co[i]-=1;
+			}
+		}
+
+		for (int i = 0; i < D; i++){
+			co[i]-=1;
+			grad = ::max(gradient<REAL,T,D>(in,dims,co),limit);
+
+			if (grad > limit) {
+				result +=(xi-in[co_to_idx<D>((co+dims)%dims,dims)])/grad;
+			}
+			co[i]+=1;
+		}
+		out[idx] += result*weight;
+	}
 }
 
 
 
 
 
+
+
+
 template<class T, unsigned int D> void cuTvOperator<T,D>::gradient (cuNDArray<T> * in,cuNDArray<T> * out, bool accumulate)
 {
-  if (!accumulate) 
-    clear(out);
-
-  const typename intd<D>::Type dims = vector_td<int,D>( from_std_vector<size_t,D>(*(in->get_dimensions())));
-  int elements = in->get_number_of_elements();
-
-  int threadsPerBlock =std::min(prod(dims),cudaDeviceManager::Instance()->max_blockdim());
-  dim3 dimBlock( threadsPerBlock);
-  int totalBlocksPerGrid = std::max(1,prod(dims)/cudaDeviceManager::Instance()->max_blockdim());
-  dim3 dimGrid(totalBlocksPerGrid);
-
-  for (int i =0; i < (elements/prod(dims)); i++){
-    tvGradient_kernel<<<dimGrid,dimBlock>>>(in->get_data_ptr()+i*prod(dims),out->get_data_ptr()+i*prod(dims),dims,limit_,this->weight_);
-  }
-  
-  cudaDeviceSynchronize();
-  CHECK_FOR_CUDA_ERROR();
+	if (!accumulate)
+		clear(out);
+
+	const typename intd<D>::Type dims = vector_td<int,D>( from_std_vector<size_t,D>(*(in->get_dimensions())));
+	int elements = in->get_number_of_elements();
+
+	int threadsPerBlock =std::min(prod(dims),256); //Using hardcoded blockSize because we use quite a lot of registers
+
+	dim3 dimBlock( threadsPerBlock);
+	int totalBlocksPerGridx = std::min(std::max(1,prod(dims)/threadsPerBlock),cudaDeviceManager::Instance()->max_griddim());
+	int totalBlocksPerGridy = (prod(dims)-1)/(threadsPerBlock*totalBlocksPerGridx)+1;
+	dim3 dimGrid(totalBlocksPerGridx,totalBlocksPerGridy);
+
+	for (int i =0; i < (elements/prod(dims)); i++){
+		tvGradient_kernel<<<dimGrid,dimBlock>>>(in->get_data_ptr()+i*prod(dims),out->get_data_ptr()+i*prod(dims),dims,limit_,this->weight_);
+	}
+
+	//cudaDeviceSynchronize();
+	//CHECK_FOR_CUDA_ERROR();
 }
 
-template<class REAL, class T, unsigned int D> static __global__ void tvMagnitude_kernel(const T* in,T* out,const vector_td<int,D> dims,REAL limit,REAL weight)
+template<class REAL, class T, unsigned int D> static __global__ void tvMagnitude_kernel(const  T* __restrict__  in,T* __restrict__ out,const vector_td<int,D> dims,REAL limit,REAL weight)
 {
-  const int idx = blockIdx.y*gridDim.x*blockDim.x + blockIdx.x*blockDim.x + threadIdx.x;
-  if( idx < prod(dims) ){
-    vector_td<int,D> co = idx_to_co<D>(idx, dims);
-    REAL grad = gradient<REAL,T,D>(in,dims,co);
-    out[idx] = grad*weight;
-  }
+	const int idx = blockIdx.y*gridDim.x*blockDim.x + blockIdx.x*blockDim.x + threadIdx.x;
+	if( idx < prod(dims) ){
+		vector_td<int,D> co = idx_to_co<D>(idx, dims);
+		REAL grad = gradient<REAL,T,D>(in,dims,co);
+		//out[idx] =  (grad > limit) ? grad*weight : REAL(0);
+		out[idx] = grad*weight;
+	}
 }
 
 
 template<class T, unsigned int D> typename realType<T>::Type cuTvOperator<T,D>::magnitude (cuNDArray<T> * in)
 {
-  cuNDArray<T> out(in->get_dimensions());
-  const typename intd<D>::Type dims = vector_td<int,D>( from_std_vector<size_t,D>(*(in->get_dimensions())));
-  int elements = in->get_number_of_elements();
-
-  int threadsPerBlock =std::min(prod(dims),cudaDeviceManager::Instance()->max_blockdim());
-  dim3 dimBlock( threadsPerBlock);
-  int totalBlocksPerGrid = std::max(1,prod(dims)/cudaDeviceManager::Instance()->max_blockdim());
-  dim3 dimGrid(totalBlocksPerGrid);
-
-  for (int i =0; i < (elements/prod(dims)); i++){
-    tvMagnitude_kernel<<<dimGrid,dimBlock>>>(in->get_data_ptr()+i*prod(dims),out.get_data_ptr()+i*prod(dims),dims,limit_,this->weight_);
-  }
-
-  cudaDeviceSynchronize();
-  CHECK_FOR_CUDA_ERROR();
-  return asum(&out);
+	cuNDArray<T> out(in->get_dimensions());
+	const typename intd<D>::Type dims = vector_td<int,D>( from_std_vector<size_t,D>(*(in->get_dimensions())));
+	int elements = in->get_number_of_elements();
+
+	int threadsPerBlock =std::min(prod(dims),256); //Using hardcoded blockSize because we use quite a lot of registers
+	dim3 dimBlock( threadsPerBlock);
+	int totalBlocksPerGridx = std::min(std::max(1,prod(dims)/threadsPerBlock),cudaDeviceManager::Instance()->max_griddim());
+	int totalBlocksPerGridy = (prod(dims)-1)/(threadsPerBlock*totalBlocksPerGridx)+1;
+	dim3 dimGrid(totalBlocksPerGridx,totalBlocksPerGridy);
+
+	for (int i =0; i < (elements/prod(dims)); i++){
+		tvMagnitude_kernel<<<dimGrid,dimBlock>>>(in->get_data_ptr()+i*prod(dims),out.get_data_ptr()+i*prod(dims),dims,limit_,this->weight_);
+	}
+
+	//cudaDeviceSynchronize();
+	//CHECK_FOR_CUDA_ERROR();
+	return asum(&out);
 }
 
 template class EXPORTGPUOPERATORS cuTvOperator<float,1>;
diff --git a/toolboxes/operators/gpu/cuTvOperator.h b/toolboxes/operators/gpu/cuTvOperator.h
index 5fa70c0..a5cd2f5 100644
--- a/toolboxes/operators/gpu/cuTvOperator.h
+++ b/toolboxes/operators/gpu/cuTvOperator.h
@@ -4,11 +4,11 @@
 
 #pragma once
 
+#include "gpuoperators_export.h"
 #include "cuNDArray_math.h"
 #include "generalOperator.h"
 
 #include "complext.h"
-#include "gpuoperators_export.h"
 
 namespace Gadgetron{
 
@@ -22,7 +22,7 @@ namespace Gadgetron{
   public:
 
     cuTvOperator() : generalOperator<cuNDArray<T> >(){
-      limit_ = REAL(1e-8);
+      limit_ = REAL(0);
     }
 
     virtual ~cuTvOperator(){};
diff --git a/toolboxes/operators/gpu/gpuoperators_export.h b/toolboxes/operators/gpu/gpuoperators_export.h
index 3cd3385..0a9622b 100644
--- a/toolboxes/operators/gpu/gpuoperators_export.h
+++ b/toolboxes/operators/gpu/gpuoperators_export.h
@@ -6,7 +6,7 @@
 #define GPUOPERATORS_EXPORT_H_
 
 #if defined (WIN32)
-#if defined (__BUILD_GADGETRON_GPUOPERATORS__) || defined (gpusolvers_EXPORTS)
+#if defined (__BUILD_GADGETRON_GPUOPERATORS__)
 #define EXPORTGPUOPERATORS __declspec(dllexport)
 #else
 #define EXPORTGPUOPERATORS __declspec(dllimport)
diff --git a/toolboxes/operators/gpu/hoCuDiagonalOperator.h b/toolboxes/operators/gpu/hoCuDiagonalOperator.h
new file mode 100644
index 0000000..e262cf1
--- /dev/null
+++ b/toolboxes/operators/gpu/hoCuDiagonalOperator.h
@@ -0,0 +1,20 @@
+/** \file hoCuDiagonalOperator.h
+    \brief Diagonal matrix regularization operator for array type hoCuNDarray
+*/
+
+#pragma once
+
+#include "hoCuNDArray_operators.h"
+#include "hoCuNDArray_elemwise.h"
+#include "hoCuNDArray_blas.h"
+#include "diagonalOperator.h"
+
+namespace Gadgetron{
+
+  template <class T> class hoCuDiagonalOperator : public diagonalOperator< hoCuNDArray<T> >
+  {
+  public:
+    hoCuDiagonalOperator() : diagonalOperator< hoCuNDArray<T> >() {}
+    virtual ~hoCuDiagonalOperator() {}
+  };
+}
diff --git a/toolboxes/operators/gpu/hoCuIdentityOperator.h b/toolboxes/operators/gpu/hoCuIdentityOperator.h
new file mode 100644
index 0000000..40d7a0b
--- /dev/null
+++ b/toolboxes/operators/gpu/hoCuIdentityOperator.h
@@ -0,0 +1,28 @@
+/** \file hoCuIdentityOperator.h
+    \brief Instantiation of the identity operator for array type hoCuNDArray
+    
+    The file hoCuIdentityOperator.h is a convienience wrapper for the device independent identityOperator class.
+    The class hoCuIdentityOperator instantiates the identityOperator for the hoCuNDArray
+    and the header furthermore includes additional neccessary header files.
+*/
+
+#pragma once
+
+#include "hoCuNDArray_math.h"
+#include "identityOperator.h"
+
+namespace Gadgetron{
+  
+  /** \class hoCuIdentityOperator
+      \brief Instantiation of the identity operator for array type hoCuNDArray
+      
+      The class hoCuIdentityOperator is a convienience wrapper for the device independent identityOperator.
+      hoCuIdentityOperator instantiates the identityOperator for type hoCuNDArray<T>.
+  */
+  template <class T> class hoCuIdentityOperator : public identityOperator< hoCuNDArray<T> >
+  {
+  public:    
+    hoCuIdentityOperator() : identityOperator< hoCuNDArray<T> >() {}
+    virtual ~hoCuIdentityOperator() {}
+  }; 
+}
diff --git a/toolboxes/operators/gpu/hoCuPartialDerivativeOperator.h b/toolboxes/operators/gpu/hoCuPartialDerivativeOperator.h
new file mode 100644
index 0000000..9a74bef
--- /dev/null
+++ b/toolboxes/operators/gpu/hoCuPartialDerivativeOperator.h
@@ -0,0 +1,94 @@
+#pragma once
+
+#include "partialDerivativeOperator.h"
+#include "cuPartialDerivativeOperator.h"
+#include "hoCuNDArray.h"
+#include "cudaDeviceManager.h"
+#include "cuNDArray_operators.h"
+#include "cuNDArray_elemwise.h"
+
+#include "hoPartialDerivativeOperator.h"
+
+namespace Gadgetron{
+
+  template <class T, unsigned int D> class hoCuPartialDerivativeOperator :
+    public linearOperator<hoCuNDArray<T> >
+  {
+  public: 
+  
+    hoCuPartialDerivativeOperator() : 
+      linearOperator<hoCuNDArray<T> >(),dev(),hoDev(),_dimension(0) {}
+  
+    hoCuPartialDerivativeOperator( unsigned int dimension ) : 
+      linearOperator<hoCuNDArray<T> >(),dev(dimension),hoDev(dimension), _dimension(dimension){ }
+
+    virtual ~hoCuPartialDerivativeOperator() {}
+      
+    virtual boost::shared_ptr< linearOperator<hoCuNDArray<T> > > clone() {
+      return linearOperator<hoCuNDArray<T> >::clone(this);
+    }
+
+    //TODO: Generalize to work if we can fit just the 1 single dimension on the gpu
+    virtual void mult_M(hoCuNDArray<T>* in, hoCuNDArray<T>* out, bool accumulate)
+    {
+      size_t free = cudaDeviceManager::Instance()->getFreeMemory();
+
+      if( free/sizeof(T) < in->get_number_of_elements()*2)
+	throw std::runtime_error("hoCuPartialDerivativeOperator: not enough device memory");
+      cuNDArray<T> cuIn(in);
+      cuNDArray<T> cuOut(out->get_dimensions());
+
+      if (accumulate) cuOut =cuNDArray<T>(out);
+
+      dev.mult_M(&cuIn,&cuOut,accumulate);
+
+      cudaMemcpy(out->get_data_ptr(),cuOut.get_data_ptr(),out->get_number_of_elements()*sizeof(T),cudaMemcpyDeviceToHost);
+    	//hoDev.mult_M(in,out,accumulate);
+    }
+
+    //TODO: Generalize to work if we can fit just the 1 single dimension on the gpu
+    virtual void mult_MH(hoCuNDArray<T>* in, hoCuNDArray<T>* out, bool accumulate)
+    {
+
+      size_t free = cudaDeviceManager::Instance()->getFreeMemory();
+
+      if( free/sizeof(T) < in->get_number_of_elements()*2)
+	throw std::runtime_error("hoCuPartialDerivativeOperator: not enough device memory");
+      cuNDArray<T> cuIn(in);
+      cuNDArray<T> cuOut(out->get_dimensions());
+
+      if (accumulate) cuOut =cuNDArray<T>(out);
+
+      dev.mult_MH(&cuIn,&cuOut,accumulate);
+
+      cudaMemcpy(out->get_data_ptr(),cuOut.get_data_ptr(),out->get_number_of_elements()*sizeof(T),cudaMemcpyDeviceToHost);
+
+    	//hoDev.mult_MH(in,out,accumulate);
+    }
+
+    //TODO: Generalize to work if we can fit just the 1 single dimension on the gpu
+    virtual void mult_MH_M(hoCuNDArray<T>* in, hoCuNDArray<T>* out, bool accumulate)
+    {
+
+      size_t free = cudaDeviceManager::Instance()->getFreeMemory();
+
+      if( free/sizeof(T) < in->get_number_of_elements()*2)
+	throw std::runtime_error("hoCuPartialDerivativeOperator: not enough device memory");
+      cuNDArray<T> cuIn(in);
+      cuNDArray<T> cuOut(out->get_dimensions());
+
+      if (accumulate) cuOut =cuNDArray<T>(out);
+
+      dev.mult_MH_M(&cuIn,&cuOut,accumulate);
+
+      cudaMemcpy(out->get_data_ptr(),cuOut.get_data_ptr(),out->get_number_of_elements()*sizeof(T),cudaMemcpyDeviceToHost);
+
+    	//hoDev.mult_MH_M(in,out,accumulate);
+    }
+
+  protected:
+    cuPartialDerivativeOperator<T,D> dev;
+    hoPartialDerivativeOperator<T,D> hoDev;
+    unsigned int _dimension;
+  };
+}
diff --git a/toolboxes/operators/gpu/hoCuTvOperator.h b/toolboxes/operators/gpu/hoCuTvOperator.h
index 600ae09..5dd1b3d 100644
--- a/toolboxes/operators/gpu/hoCuTvOperator.h
+++ b/toolboxes/operators/gpu/hoCuTvOperator.h
@@ -8,7 +8,7 @@
 
 namespace Gadgetron{
 
-template<class T, size_t D> class EXPORTGPUOPERATORS hoCuTvOperator :
+template<class T, size_t D> class hoCuTvOperator :
 public generalOperator< hoCuNDArray<T> >
 {
 
diff --git a/toolboxes/registration/optical_flow/CMakeLists.txt b/toolboxes/registration/optical_flow/CMakeLists.txt
index 6e02a83..7ecdbce 100644
--- a/toolboxes/registration/optical_flow/CMakeLists.txt
+++ b/toolboxes/registration/optical_flow/CMakeLists.txt
@@ -12,17 +12,20 @@ install(FILES
   opticalFlowSolver.h 
   resampleOperator.h
   opticalFlowOperator.h 
-  DESTINATION include)
+  DESTINATION include COMPONENT main)
 
 if(ARMADILLO_FOUND)
   if(${ARMADILLO_VERSION_STRING} VERSION_GREATER "3.819" )
     message("Compiling cpu based optical flow registration toolbox.")
-    add_subdirectory(cpu)
+    set(BUILD_CPU_OPTIMAL_FLOW_REG On)
   else (${ARMADILLO_VERSION_STRING} VERSION_GREATER "3.819" )
     message("Armadillo (at least version 3.820) not found. Not compiling cpu-based optical flow registration toolbox. ")  
+    set(BUILD_CPU_OPTIMAL_FLOW_REG Off)
   endif(${ARMADILLO_VERSION_STRING} VERSION_GREATER "3.819" )
 endif(ARMADILLO_FOUND)
 
+add_subdirectory(cpu)
+
 if (CUDA_FOUND)
   message("Compiling gpu based optical flow registration toolbox.")
   add_subdirectory(gpu)
diff --git a/toolboxes/registration/optical_flow/cpu/CMakeLists.txt b/toolboxes/registration/optical_flow/cpu/CMakeLists.txt
index 219fa2b..865dac1 100644
--- a/toolboxes/registration/optical_flow/cpu/CMakeLists.txt
+++ b/toolboxes/registration/optical_flow/cpu/CMakeLists.txt
@@ -1,35 +1,131 @@
-if (WIN32)
-ADD_DEFINITIONS(-D__BUILD_GADGETRON_CPUREG__)
-endif (WIN32)
-
-include_directories(   
-  ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/arma_math
-  ${CMAKE_SOURCE_DIR}/toolboxes/registration/optical_flow
-  ${ARMADILLO_INCLUDE_DIR}
-)
-
-add_library(cpureg ${LIBTYPE} 
-  hoOpticalFlowSolver.cpp
-  hoHSOpticalFlowSolver.cpp
-  hoCKOpticalFlowSolver.cpp
-  hoLinearResampleOperator.cpp
-  hoRegistration_utils.cpp
-  )
-
-target_link_libraries(cpureg 
-  cpucore
-  cpucore_math 
-  ${ARMADILLO_LIBRARIES}
-  )
-
-install(TARGETS cpureg DESTINATION lib)
-
-install(FILES
-  hoOpticalFlowSolver.h
-  hoHSOpticalFlowSolver.h
-  hoCKOpticalFlowSolver.h
-  hoRegistration_utils.h
-  cpureg_export.h
-  DESTINATION include)
-
-install(FILES hoLinearResampleOperator.h DESTINATION include)
+    if (WIN32)
+        ADD_DEFINITIONS(-D__BUILD_GADGETRON_CPUREG__)
+    endif (WIN32)
+
+    if(WIN32)
+        link_directories(${Boost_LIBRARY_DIRS})
+    endif(WIN32)
+
+    include_directories(
+        ${CMAKE_SOURCE_DIR}/toolboxes/registration/optical_flow
+        ${CMAKE_SOURCE_DIR}/toolboxes/registration/optical_flow/cpu
+        ${CMAKE_SOURCE_DIR}/toolboxes/registration/optical_flow/cpu/transformation
+        ${CMAKE_SOURCE_DIR}/toolboxes/registration/optical_flow/cpu/solver
+        ${CMAKE_SOURCE_DIR}/toolboxes/registration/optical_flow/cpu/warper
+        ${CMAKE_SOURCE_DIR}/toolboxes/registration/optical_flow/cpu/dissimilarity
+        ${CMAKE_SOURCE_DIR}/toolboxes/registration/optical_flow/cpu/register
+        ${CMAKE_SOURCE_DIR}/toolboxes/registration/optical_flow/cpu/application
+        ${CMAKE_SOURCE_DIR}/toolboxes/core
+        ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu
+        ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/image
+        ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/algorithm
+        ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/hostutils
+        ${CMAKE_SOURCE_DIR}/toolboxes/core/cpu/math
+        ${CMAKE_SOURCE_DIR}/toolboxes/core/gpu
+        ${CMAKE_SOURCE_DIR}/toolboxes/mri/pmri/gpu
+        ${CMAKE_SOURCE_DIR}/toolboxes/nfft/gpu
+        ${CMAKE_SOURCE_DIR}/toolboxes/core
+        ${CMAKE_SOURCE_DIR}/toolboxes/core/gpu
+        ${CMAKE_SOURCE_DIR}/toolboxes/operators
+        ${CMAKE_SOURCE_DIR}/toolboxes/operators/cpu
+        ${CMAKE_SOURCE_DIR}/toolboxes/solvers
+        ${CMAKE_SOURCE_DIR}/toolboxes/solvers/cpu
+        ${CMAKE_SOURCE_DIR}/gadgets/core
+        ${CMAKE_SOURCE_DIR}/toolboxes/gadgettools
+        ${Boost_INCLUDE_DIR}
+        ${ARMADILLO_INCLUDE_DIR} 
+        ${ACE_INCLUDE_DIR}
+        ${ISMRMRD_INCLUDE_DIR} )
+
+    set(opticalflow_files 
+                hoCKOpticalFlowSolver.cpp
+                hoCKOpticalFlowSolver.h
+                hoHSOpticalFlowSolver.cpp
+                hoHSOpticalFlowSolver.h
+                hoLinearResampleOperator.cpp
+                hoLinearResampleOperator.h
+                hoOpticalFlowSolver.cpp
+                hoOpticalFlowSolver.h )
+
+    set(transformation_files transformation/hoImageRegTransformation.h
+                             transformation/hoImageRegParametricTransformation.h 
+                             transformation/hoImageRegHomogenousTransformation.h 
+                             transformation/hoImageRegRigid2DTransformation.h 
+                             transformation/hoImageRegRigid3DTransformation.h 
+                             transformation/hoImageRegNonParametricTransformation.h 
+                             transformation/hoImageRegDeformationField.h )
+
+    set(solver_files solver/hoImageRegSolver.h 
+                     solver/hoImageRegParametricSolver.h
+                     solver/hoImageRegParametricDownHillSolver.h
+                     solver/hoImageRegParametricGradientDescentSolver.h
+                     solver/hoImageRegNonParametricSolver.h
+                     solver/hoImageRegDeformationFieldSolver.h 
+                     solver/hoImageRegDeformationFieldBidirectionalSolver.h )
+
+    set(warper_files warper/hoImageRegWarper.h)
+
+    set(similarity_files dissimilarity/hoImageRegDissimilarity.h
+                         dissimilarity/hoImageRegDissimilarityHistogramBased.h
+                         dissimilarity/hoImageRegDissimilarityLocalCCR.h
+                         dissimilarity/hoImageRegDissimilarityMutualInformation.h
+                         dissimilarity/hoImageRegDissimilarityNormalizedMutualInformation.h
+                         dissimilarity/hoImageRegDissimilaritySSD.h )
+
+    set(register_files register/hoImageRegRegister.h
+                       register/hoImageRegParametricRegister.h
+                       register/hoImageRegNonParametricRegister.h
+                       register/hoImageRegDeformationFieldRegister.h 
+                       register/hoImageRegDeformationFieldBidirectionalRegister.h )
+
+    set(application_files application/hoImageRegContainer2DRegistration.h )
+
+    if ( BUILD_CPU_OPTIMAL_FLOW_REG )
+
+        add_library(gadgetron_toolbox_cpureg ${LIBTYPE} 
+                    cpureg_export.h
+                    ${opticalflow_files} 
+                    ${transformation_files} 
+                    ${solver_files} 
+                    ${warper_files}
+                    ${similarity_files} 
+                    ${register_files}
+                    ${application_files} )
+
+        set_target_properties(gadgetron_toolbox_cpureg PROPERTIES VERSION ${GADGETRON_VERSION_STRING} SOVERSION ${GADGETRON_SOVERSION})
+
+
+        target_link_libraries(gadgetron_toolbox_cpureg 
+                gadgetron_toolbox_cpucore
+                gadgetron_toolbox_cpucore_math 
+                ${ARMADILLO_LIBRARIES}
+                optimized ${ACE_LIBRARIES} debug ${ACE_DEBUG_LIBRARY} )
+
+    else ( BUILD_CPU_OPTIMAL_FLOW_REG )
+        message("cpureg library will not be built ... ")
+    endif ( BUILD_CPU_OPTIMAL_FLOW_REG )
+
+    if ( BUILD_CPU_OPTIMAL_FLOW_REG )
+        source_group(opticalflow FILES ${opticalflow_files})
+        install(FILES ${opticalflow_files} DESTINATION include COMPONENT main)
+    endif ( BUILD_CPU_OPTIMAL_FLOW_REG )
+
+    source_group(transformation FILES ${transformation_files})
+    source_group(solver FILES ${solver_files})
+    source_group(warper FILES ${warper_files})
+    source_group(similarity FILES ${similarity_files})
+    source_group(register FILES ${register_files})
+    source_group(application FILES ${application_files})
+
+    if ( BUILD_CPU_OPTIMAL_FLOW_REG )
+        install(TARGETS gadgetron_toolbox_cpureg DESTINATION lib COMPONENT main)
+    endif ( BUILD_CPU_OPTIMAL_FLOW_REG )
+
+    install(FILES
+        ${transformation_files}
+        ${solver_files}
+        ${warper_files}
+        ${similarity_files}
+        ${register_files}
+        ${application_files}
+        DESTINATION include COMPONENT main)
diff --git a/toolboxes/registration/optical_flow/cpu/application/hoImageRegContainer2DRegistration.h b/toolboxes/registration/optical_flow/cpu/application/hoImageRegContainer2DRegistration.h
new file mode 100644
index 0000000..627e455
--- /dev/null
+++ b/toolboxes/registration/optical_flow/cpu/application/hoImageRegContainer2DRegistration.h
@@ -0,0 +1,1449 @@
+/** \file   hoImageRegContainer2DRegistration.h
+    \brief  Define the class to perform image registration over a 2D image container
+    \author Hui Xue
+*/
+
+#pragma once
+
+#include <sstream>
+#include "hoNDArray.h"
+#include "hoNDImage.h"
+#include "hoNDInterpolator.h"
+#include "hoNDBoundaryHandler.h"
+#include "hoMatrix.h"
+#include "hoNDArray_utils.h"
+#include "hoNDArray_elemwise.h"
+#include "hoNDImage_util.h"
+#include "gtPlusISMRMRDReconUtil.h"
+
+// transformation
+#include "hoImageRegTransformation.h"
+#include "hoImageRegParametricTransformation.h"
+#include "hoImageRegDeformationField.h"
+
+// warper
+#include "hoImageRegWarper.h"
+
+// solver
+#include "hoImageRegDeformationFieldSolver.h"
+#include "hoImageRegParametricSolver.h"
+#include "hoImageRegDeformationFieldBidirectionalSolver.h"
+
+// dissimilarity
+#include "hoImageRegDissimilaritySSD.h"
+#include "hoImageRegDissimilarityLocalCCR.h"
+#include "hoImageRegDissimilarityMutualInformation.h"
+#include "hoImageRegDissimilarityNormalizedMutualInformation.h"
+
+// register
+#include "hoImageRegDeformationFieldRegister.h"
+#include "hoImageRegDeformationFieldBidirectionalRegister.h"
+
+// container2D
+#include "hoNDImageContainer2D.h"
+
+namespace Gadgetron
+{
+    template <typename ObjType> void printInfo(const ObjType& obj)
+    {
+        std::ostringstream outs;
+        obj.print(outs);
+        outs << std::ends;
+        std::string msg(outs.str());
+        GADGET_MSG(msg.c_str());
+    }
+
+    enum GT_IMAGE_REG_CONTAINER_MODE
+    {
+        GT_IMAGE_REG_CONTAINER_PAIR_WISE,
+        GT_IMAGE_REG_CONTAINER_FIXED_REFERENCE,
+        GT_IMAGE_REG_CONTAINER_PROGRESSIVE
+    };
+
+    inline std::string getImageRegContainerModeName(GT_IMAGE_REG_CONTAINER_MODE v)
+    {
+        std::string name;
+
+        switch (v)
+        {
+            case GT_IMAGE_REG_CONTAINER_PAIR_WISE:
+                name = "Pair-wise";
+                break;
+
+            case GT_IMAGE_REG_CONTAINER_FIXED_REFERENCE:
+                name = "FixedReference";
+                break;
+
+            case GT_IMAGE_REG_CONTAINER_PROGRESSIVE:
+                name = "Progressive";
+                break;
+
+            default:
+                GADGET_ERROR_MSG("Unrecognized image registration container mode type : " << v);
+        }
+
+        return name;
+    }
+
+    inline GT_IMAGE_REG_CONTAINER_MODE getImageRegContainerModeType(const std::string& name)
+    {
+        GT_IMAGE_REG_CONTAINER_MODE v;
+
+        if ( name == "Pair-wise" )
+        {
+            v = GT_IMAGE_REG_CONTAINER_PAIR_WISE;
+        }
+        else if ( name == "FixedReference" )
+        {
+            v = GT_IMAGE_REG_CONTAINER_FIXED_REFERENCE;
+        }
+        else if ( name == "Progressive" )
+        {
+            v = GT_IMAGE_REG_CONTAINER_PROGRESSIVE;
+        }
+        else
+        {
+            GADGET_ERROR_MSG("Unrecognized image registration container mode name : " << name);
+        }
+
+        return v;
+    }
+
+    /// perform the image registration over an image container2D
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    class hoImageRegContainer2DRegistration
+    {
+    public:
+
+        typedef hoImageRegContainer2DRegistration<ValueType, CoordType, DIn, DOut> Self;
+
+        typedef hoNDImage<ValueType, DOut> TargetType;
+        typedef hoNDImage<ValueType, DIn> SourceType;
+
+        typedef hoNDImage<ValueType, 2> Target2DType;
+        typedef Target2DType Source2DType;
+
+        typedef hoNDImage<ValueType, 3> Target3DType;
+        typedef Target2DType Source3DType;
+
+        typedef ValueType T;
+        typedef ValueType element_type;
+        typedef ValueType value_type;
+
+        typedef CoordType coord_type;
+
+        /// boundary handler and interpolator for target image
+        typedef hoNDBoundaryHandler<TargetType> BoundaryHandlerTargetType;
+        typedef hoNDBoundaryHandlerFixedValue<TargetType> BoundaryHandlerTargetFixedValueType;
+        typedef hoNDBoundaryHandlerBorderValue<TargetType> BoundaryHandlerTargetBorderValueType;
+        typedef hoNDBoundaryHandlerPeriodic<TargetType> BoundaryHandlerTargetPeriodicType;
+        typedef hoNDBoundaryHandlerMirror<TargetType> BoundaryHandlerTargetMirrorType;
+
+        typedef hoNDInterpolator<TargetType> InterpTargetType;
+        typedef hoNDInterpolatorLinear<TargetType> InterpTargetLinearType;
+        typedef hoNDInterpolatorNearestNeighbor<TargetType> InterpTargetNearestNeighborType;
+        typedef hoNDInterpolatorBSpline<TargetType, DIn> InterpTargetBSplineType;
+
+        /// boundary handler and interpolator for source image
+        typedef hoNDBoundaryHandler<SourceType> BoundaryHandlerSourceType;
+        typedef hoNDBoundaryHandlerFixedValue<SourceType> BoundaryHandlerSourceFixedValueType;
+        typedef hoNDBoundaryHandlerBorderValue<SourceType> BoundaryHandlerSourceBorderValueType;
+        typedef hoNDBoundaryHandlerPeriodic<SourceType> BoundaryHandlerSourcePeriodicType;
+        typedef hoNDBoundaryHandlerMirror<SourceType> BoundaryHandlerSourceMirrorType;
+
+        typedef hoNDInterpolator<SourceType> InterpSourceType;
+        typedef hoNDInterpolatorLinear<SourceType> InterpSourceLinearType;
+        typedef hoNDInterpolatorNearestNeighbor<SourceType> InterpSourceNearestNeighborType;
+        typedef hoNDInterpolatorBSpline<SourceType, DIn> InterpSourceBSplineType;
+
+        /// warper type
+        typedef hoImageRegWarper<ValueType, CoordType, DIn, DOut> WarperType;
+
+        /// image dissimilarity type
+        typedef hoImageRegDissimilarity<ValueType, DOut> DissimilarityType;
+
+        /// transformation
+        typedef hoImageRegParametricTransformation<CoordType, DIn, DOut> TransformationParametricType;
+
+        typedef hoImageRegDeformationField<CoordType, DIn> TransformationDeformationFieldType;
+        typedef typename TransformationDeformationFieldType::input_point_type input_point_type;
+        typedef typename TransformationDeformationFieldType::output_point_type output_point_type;
+        typedef typename TransformationDeformationFieldType::jacobian_position_type jacobian_position_type;
+        typedef typename TransformationDeformationFieldType::DeformationFieldType DeformationFieldType;
+
+        /// container
+        typedef hoNDImageContainer2D<TargetType> TargetContinerType;
+        typedef hoNDImageContainer2D<SourceType> SourceContinerType;
+        typedef hoNDImageContainer2D<DeformationFieldType> DeformationFieldContinerType;
+
+        hoImageRegContainer2DRegistration(unsigned int resolution_pyramid_levels=3, bool use_world_coordinates=false, ValueType bg_value=ValueType(0));
+        virtual ~hoImageRegContainer2DRegistration();
+
+        /// set the default parameters
+        virtual bool setDefaultParameters(unsigned int resolution_pyramid_levels=3, bool use_world_coordinates=false);
+
+        /// register two images
+        /// transform or deform can contain the initial transformation or deformation
+        /// if warped == NULL, warped images will not be computed
+        virtual bool registerTwoImagesParametric(const TargetType& target, const SourceType& source, bool initial, TargetType* warped, TransformationParametricType& transform);
+        virtual bool registerTwoImagesDeformationField(const TargetType& target, const SourceType& source, bool initial, TargetType* warped, DeformationFieldType** deform);
+        virtual bool registerTwoImagesDeformationFieldBidirectional(const TargetType& target, const SourceType& source, bool initial, TargetType* warped, DeformationFieldType** deform, DeformationFieldType** deformInv);
+
+        virtual bool registerOverContainer2DPairWise(TargetContinerType& targetContainer, SourceContinerType& sourceContainer, bool warped);
+        virtual bool registerOverContainer2DFixedReference(TargetContinerType& targetContainer, const std::vector<unsigned int>& referenceFrame, bool warped);
+        virtual bool registerOverContainer2DProgressive(TargetContinerType& targetContainer, const std::vector<unsigned int>& referenceFrame);
+
+        /// warp image containers
+        template <typename ValueType2> 
+        bool warpContainer2D(const hoNDImageContainer2D< hoNDImage<ValueType2, DOut> >& targetContainer, 
+                             const hoNDImageContainer2D< hoNDImage<ValueType2, DIn> >& sourceContainer, 
+                             DeformationFieldContinerType deformation_field[], 
+                             hoNDImageContainer2D< hoNDImage<ValueType2, DOut> >& warppedContainer,
+                             Gadgetron::GT_BOUNDARY_CONDITION bh=GT_BOUNDARY_CONDITION_FIXEDVALUE)
+        {
+            try
+            {
+                typedef hoNDImage<ValueType2, DOut> ImageTargetType;
+                typedef hoNDImage<ValueType2, DIn> ImageSourceType;
+
+                size_t R = sourceContainer.rows();
+                std::vector<size_t> cols = sourceContainer.cols();
+
+                GADGET_CHECK_RETURN_FALSE(targetContainer.dimensions_equal_container(sourceContainer));
+                GADGET_CHECK_RETURN_FALSE(targetContainer.dimensions_equal_container(deformation_field[0]));
+
+                if ( !targetContainer.dimensions_equal_container(warppedContainer) )
+                {
+                    GADGET_CHECK_RETURN_FALSE(warppedContainer.copyFrom(targetContainer));
+                }
+
+                if ( R == 1 )
+                {
+                    long long N = (long long)cols[0];
+
+                    long long c;
+                    #pragma omp parallel private(c) shared(N, targetContainer, sourceContainer, warppedContainer, deformation_field, bh) if ( DIn==2 )
+                    {
+                        hoImageRegDeformationField<CoordType, DIn> deformTransform;
+                        hoNDBoundaryHandlerFixedValue< ImageSourceType > bhFixedValue;
+                        hoNDBoundaryHandlerBorderValue< ImageSourceType > bhBorderValue;
+                        hoNDBoundaryHandlerPeriodic< ImageSourceType > bhPeriodic;
+                        hoNDBoundaryHandlerMirror< ImageSourceType > bhMirror;
+
+                        hoNDInterpolatorBSpline<ImageSourceType, DIn> interpBSpline(5);
+
+                        hoImageRegWarper<ValueType2, CoordType, DIn, DOut> warper;
+                        warper.setBackgroundValue(bg_value_);
+                        warper.setTransformation(deformTransform);
+                        warper.setInterpolator(interpBSpline);
+
+                        #pragma omp for 
+                        for ( c=0; c<N; c++ )
+                        {
+                            const ImageTargetType& target = targetContainer(0, c);
+                            ImageSourceType& source = const_cast<ImageSourceType&>(sourceContainer(0, c));
+                            ImageTargetType& warpped = warppedContainer(0, c);
+
+                            bhFixedValue.setArray( source );
+                            interpBSpline.setArray( source );
+
+                            if ( bh == GT_BOUNDARY_CONDITION_FIXEDVALUE )
+                                interpBSpline.setBoundaryHandler(bhFixedValue);
+                            else if ( bh == GT_BOUNDARY_CONDITION_BORDERVALUE )
+                                interpBSpline.setBoundaryHandler(bhBorderValue);
+                            else if ( bh == GT_BOUNDARY_CONDITION_PERIODIC )
+                                interpBSpline.setBoundaryHandler(bhPeriodic);
+                            else if ( bh == GT_BOUNDARY_CONDITION_MIRROR )
+                                interpBSpline.setBoundaryHandler(bhMirror);
+                            else
+                                interpBSpline.setBoundaryHandler(bhFixedValue);
+
+                            for ( unsigned int ii=0; ii<DIn; ii++ )
+                            {
+                                deformTransform.setDeformationField( deformation_field[ii](0, c), ii );
+                            }
+
+                            warper.warp(target, source, use_world_coordinates_, warpped);
+                        }
+                    }
+                }
+                else
+                {
+
+                    long long r, c;
+                    #pragma omp parallel default(none) private(r, c) shared(targetContainer, sourceContainer, warppedContainer, deformation_field, R, cols, bh) if ( DIn==2 )
+                    {
+                        hoImageRegDeformationField<CoordType, DIn> deformTransform;
+                        hoNDBoundaryHandlerFixedValue< ImageSourceType > bhFixedValue;
+                        hoNDBoundaryHandlerBorderValue< ImageSourceType > bhBorderValue;
+                        hoNDBoundaryHandlerPeriodic< ImageSourceType > bhPeriodic;
+                        hoNDBoundaryHandlerMirror< ImageSourceType > bhMirror;
+
+                        hoNDInterpolatorBSpline<ImageSourceType, DIn> interpBSpline(5);
+
+                        hoImageRegWarper<ValueType2, CoordType, DIn, DOut> warper;
+                        warper.setBackgroundValue(bg_value_);
+                        warper.setTransformation(deformTransform);
+                        warper.setInterpolator(interpBSpline);
+
+                        #pragma omp for 
+                        for ( r=0; r<(long long)R; r++ )
+                        {
+                            long long N = (long long)cols[r];
+                            for ( c=0; c<N; c++ )
+                            {
+                                const ImageTargetType& target = targetContainer(r, c);
+                                ImageSourceType& source = const_cast<ImageSourceType&>(sourceContainer(r, c));
+                                ImageTargetType& warpped = warppedContainer(r, c);
+
+                                bhFixedValue.setArray( source );
+                                interpBSpline.setArray( source );
+
+                                if ( bh == GT_BOUNDARY_CONDITION_FIXEDVALUE )
+                                    interpBSpline.setBoundaryHandler(bhFixedValue);
+                                else if ( bh == GT_BOUNDARY_CONDITION_BORDERVALUE )
+                                    interpBSpline.setBoundaryHandler(bhBorderValue);
+                                else if ( bh == GT_BOUNDARY_CONDITION_PERIODIC )
+                                    interpBSpline.setBoundaryHandler(bhPeriodic);
+                                else if ( bh == GT_BOUNDARY_CONDITION_MIRROR )
+                                    interpBSpline.setBoundaryHandler(bhMirror);
+                                else
+                                    interpBSpline.setBoundaryHandler(bhFixedValue);
+
+                                for ( unsigned int ii=0; ii<DIn; ii++ )
+                                {
+                                    deformTransform.setDeformationField( deformation_field[ii](r, c), ii );
+                                }
+
+                                warper.warp(target, source, use_world_coordinates_, warpped);
+                            }
+                        }
+                    }
+                }
+            }
+            catch(...)
+            {
+                GADGET_ERROR_MSG("Errors happened in hoImageRegContainer2DRegistration<...>::warpContainer2D(...) ... ");
+                return false;
+            }
+
+            return true;
+        }
+
+        /// print the class information
+        virtual void print(std::ostream& os) const;
+
+        // ----------------------------------
+        // parameters
+        // ----------------------------------
+
+        /// mode for registration over the container
+        GT_IMAGE_REG_CONTAINER_MODE container_reg_mode_;
+
+        /// mode for transformation
+        GT_IMAGE_REG_TRANSFORMATION container_reg_transformation_;
+
+        /// back ground values, used to mark regions in the target image which will not be warped
+        ValueType bg_value_;
+
+        /// whether to perform world coordinate registration
+        bool use_world_coordinates_;
+
+        /// number of resolution pyramid levels
+        unsigned int resolution_pyramid_levels_;
+
+        /// number of iterations for every pyramid level
+        std::vector<unsigned int> max_iter_num_pyramid_level_;
+
+        /// dissimilarity
+        GT_IMAGE_DISSIMILARITY dissimilarity_type_;
+
+        /// threshold for dissimilarity for every pyramid level
+        std::vector<ValueType> dissimilarity_thres_pyramid_level_;
+
+        /// number of search size division for every pyramid level
+        std::vector<unsigned int> div_num_pyramid_level_;
+
+        /// parameters for dissimilarity measures, for every paramid level
+        /// LocalCCR
+        std::vector<std::vector<ValueType> > dissimilarity_LocalCCR_sigmaArg_;
+
+        /// Histogram based
+        /// Mutual information
+        std::vector<ValueType> dissimilarity_MI_betaArg_;
+
+        /// regularization strength for every pyramid level
+        /// if regularization_hilbert_strength_world_coordinate_=true, this strength is in the unit of world coordinate
+        /// if regularization_hilbert_strength_world_coordinate_=false, this strength is in the unit of pixel
+        bool regularization_hilbert_strength_world_coordinate_;
+        std::vector< std::vector<ValueType> > regularization_hilbert_strength_pyramid_level_;
+
+        /// boundary handler type
+        std::vector<GT_BOUNDARY_CONDITION> boundary_handler_type_warper_;
+        std::vector<GT_IMAGE_INTERPOLATOR> interp_type_warper_;
+
+        /// number of iterations to improve the estimation of the inverse transform
+        std::vector<unsigned int> inverse_deform_enforce_iter_pyramid_level_;
+        /// weight to update the estimation of the inverse transform, must be within [0 1]
+        std::vector<CoordType> inverse_deform_enforce_weight_pyramid_level_;
+
+        /// in-FOV constraint
+        bool apply_in_FOV_constraint_;
+
+        /// verbose mode
+        bool verbose_;
+
+        // ----------------------------------
+        // debug and timing
+        // ----------------------------------
+        // clock for timing
+        Gadgetron::GadgetronTimer gt_timer1_;
+        Gadgetron::GadgetronTimer gt_timer2_;
+        Gadgetron::GadgetronTimer gt_timer3_;
+
+        bool performTiming_;
+
+        // exporter
+        Gadgetron::gtPlus::gtPlusIOAnalyze gt_exporter_;
+
+        // debug folder
+        std::string debugFolder_;
+
+        // ----------------------------------
+        // registration results
+        // ----------------------------------
+
+        /// warpped images
+        TargetContinerType warped_container_;
+
+        /// for parametric registration
+        std::vector< std::vector<TransformationParametricType*> > parametric_tranformation_;
+
+        /// deformation field registration
+        DeformationFieldContinerType deformation_field_[DIn];
+        DeformationFieldContinerType deformation_field_inverse_[DIn];
+
+    protected:
+
+        bool initialize(const TargetContinerType& targetContainer, bool warped);
+
+    };
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    hoImageRegContainer2DRegistration<ValueType, CoordType, DIn, DOut>::
+    hoImageRegContainer2DRegistration(unsigned int resolution_pyramid_levels, bool use_world_coordinates, ValueType bg_value) 
+    : bg_value_(bg_value), use_world_coordinates_(use_world_coordinates), resolution_pyramid_levels_(resolution_pyramid_levels), performTiming_(false)
+    {
+        gt_timer1_.set_timing_in_destruction(false);
+        gt_timer2_.set_timing_in_destruction(false);
+        gt_timer3_.set_timing_in_destruction(false);
+
+        GADGET_CHECK_THROW(this->setDefaultParameters(resolution_pyramid_levels, use_world_coordinates));
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    hoImageRegContainer2DRegistration<ValueType, CoordType, DIn, DOut>::
+    ~hoImageRegContainer2DRegistration()
+    {
+        if ( !parametric_tranformation_.empty() )
+        {
+            size_t r, c;
+            for ( r=0; r<parametric_tranformation_.size(); r++ )
+            {
+                if ( !parametric_tranformation_[r].empty() )
+                {
+                    for ( c=0; c<parametric_tranformation_[r].size(); c++ )
+                    {
+                        if ( parametric_tranformation_[r][c] != NULL )
+                        {
+                            delete parametric_tranformation_[r][c];
+                            parametric_tranformation_[r][c] = NULL;
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    bool hoImageRegContainer2DRegistration<ValueType, CoordType, DIn, DOut>::setDefaultParameters(unsigned int resolution_pyramid_levels, bool use_world_coordinates)
+    {
+        unsigned int ii;
+
+        use_world_coordinates_ = use_world_coordinates;
+        resolution_pyramid_levels_ = resolution_pyramid_levels;
+
+        container_reg_mode_ = GT_IMAGE_REG_CONTAINER_PAIR_WISE;
+        container_reg_transformation_ = GT_IMAGE_REG_TRANSFORMATION_DEFORMATION_FIELD;
+
+        max_iter_num_pyramid_level_.clear();
+        max_iter_num_pyramid_level_.resize(resolution_pyramid_levels_, 32);
+        max_iter_num_pyramid_level_[0] = 16;
+
+        dissimilarity_type_ = GT_IMAGE_DISSIMILARITY_LocalCCR;
+
+        dissimilarity_thres_pyramid_level_.clear();
+        dissimilarity_thres_pyramid_level_.resize(resolution_pyramid_levels_, (ValueType)(1e-5) );
+
+        div_num_pyramid_level_.clear();
+        div_num_pyramid_level_.resize(resolution_pyramid_levels_, 2);
+
+        dissimilarity_LocalCCR_sigmaArg_.clear();
+        dissimilarity_LocalCCR_sigmaArg_.resize(resolution_pyramid_levels_);
+        for ( ii=0; ii<resolution_pyramid_levels_; ii++ )
+        {
+            dissimilarity_LocalCCR_sigmaArg_[ii].resize(DIn, 2.0);
+        }
+
+        dissimilarity_MI_betaArg_.clear();
+        dissimilarity_MI_betaArg_.resize(resolution_pyramid_levels_, 2);
+
+        regularization_hilbert_strength_world_coordinate_ = false;
+        regularization_hilbert_strength_pyramid_level_.resize(resolution_pyramid_levels_);
+        for ( ii=0; ii<resolution_pyramid_levels_; ii++ )
+        {
+            regularization_hilbert_strength_pyramid_level_[ii].resize(DIn, 12.0);
+        }
+
+        boundary_handler_type_warper_.clear();
+        boundary_handler_type_warper_.resize(resolution_pyramid_levels_, GT_BOUNDARY_CONDITION_BORDERVALUE);
+
+        interp_type_warper_.clear();
+        interp_type_warper_.resize(resolution_pyramid_levels_, GT_IMAGE_INTERPOLATOR_LINEAR);
+
+        inverse_deform_enforce_iter_pyramid_level_.clear();
+        inverse_deform_enforce_iter_pyramid_level_.resize(resolution_pyramid_levels_, 10);
+
+        inverse_deform_enforce_weight_pyramid_level_.clear();
+        inverse_deform_enforce_weight_pyramid_level_.resize(resolution_pyramid_levels_, 0.5);
+
+        apply_in_FOV_constraint_ = false;
+
+        verbose_ = false;
+
+        return true;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    bool hoImageRegContainer2DRegistration<ValueType, CoordType, DIn, DOut>::
+    registerTwoImagesParametric(const TargetType& target, const SourceType& source, bool initial, TargetType* warped, TransformationParametricType& transform)
+    {
+        try
+        {
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Error happened in hoImageRegContainer2DRegistration<ValueType, CoordType, DIn, DOut>::registerTwoImagesParametric(...) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    bool hoImageRegContainer2DRegistration<ValueType, CoordType, DIn, DOut>::
+    registerTwoImagesDeformationField(const TargetType& target, const SourceType& source, bool initial, TargetType* warped, DeformationFieldType** deform)
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(DIn==DOut);
+            GADGET_CHECK_RETURN_FALSE(deform!=NULL);
+
+            hoImageRegDeformationFieldRegister<ValueType, CoordType, DIn> reg(resolution_pyramid_levels_, use_world_coordinates_, bg_value_);
+
+            if ( !debugFolder_.empty() )
+            {
+                reg.debugFolder_ = debugFolder_;
+            }
+
+            GADGET_CHECK_RETURN_FALSE(reg.setDefaultParameters(resolution_pyramid_levels_, use_world_coordinates_));
+
+            reg.max_iter_num_pyramid_level_ = max_iter_num_pyramid_level_;
+            reg.div_num_pyramid_level_ = div_num_pyramid_level_;
+            reg.dissimilarity_MI_betaArg_ = dissimilarity_MI_betaArg_;
+            reg.regularization_hilbert_strength_world_coordinate_ = regularization_hilbert_strength_world_coordinate_;
+            reg.regularization_hilbert_strength_pyramid_level_ = regularization_hilbert_strength_pyramid_level_;
+            reg.dissimilarity_LocalCCR_sigmaArg_ = dissimilarity_LocalCCR_sigmaArg_;
+            reg.boundary_handler_type_warper_ = boundary_handler_type_warper_;
+            reg.interp_type_warper_ = interp_type_warper_;
+            reg.apply_in_FOV_constraint_ = apply_in_FOV_constraint_;
+            reg.verbose_ = verbose_;
+
+            reg.dissimilarity_type_.clear();
+            reg.dissimilarity_type_.resize(resolution_pyramid_levels_, dissimilarity_type_);
+
+            reg.setTarget( const_cast<TargetType&>(target) );
+            reg.setSource( const_cast<TargetType&>(source) );
+
+            if ( verbose_ )
+            {
+                std::ostringstream outs;
+                reg.print(outs);
+                GADGET_MSG(outs.str());
+            }
+
+            GADGET_CHECK_RETURN_FALSE(reg.initialize());
+
+            unsigned int d;
+
+            if ( target.dimensions_equal( *(deform[0]) ) )
+            {
+                if ( initial )
+                {
+                    for ( d=0; d<DIn; d++ )
+                    {
+                        reg.transform_->setDeformationField( *(deform[d]), d);
+                    }
+                }
+            }
+            else
+            {
+                for ( d=0; d<DIn; d++ )
+                {
+                    deform[d]->copyImageInfo(target);
+                    Gadgetron::clear( *(deform[d]) );
+                }
+            }
+
+            GADGET_CHECK_RETURN_FALSE(reg.performRegistration());
+
+            for ( d=0; d<DIn; d++ )
+            {
+                *(deform[d]) = reg.transform_->getDeformationField(d);
+            }
+
+            if ( warped != NULL )
+            {
+                /// bspline warp
+                hoNDBoundaryHandlerFixedValue<SourceType> bhFixedValue;
+                bhFixedValue.setArray( const_cast<SourceType&>(source) );
+
+                hoNDInterpolatorBSpline<SourceType, DIn> interpBSpline(5);
+                interpBSpline.setArray( const_cast<SourceType&>(source) );
+                interpBSpline.setBoundaryHandler(bhFixedValue);
+
+                hoImageRegWarper<ValueType, ValueType, DIn, DOut> warper;
+                warper.setBackgroundValue(bg_value_);
+                warper.setTransformation(*reg.transform_);
+                warper.setInterpolator(interpBSpline);
+
+                warper.warp(target, source, use_world_coordinates_, *warped);
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Error happened in hoImageRegContainer2DRegistration<ValueType, CoordType, DIn, DOut>::registerTwoImagesDeformationField(...) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    bool hoImageRegContainer2DRegistration<ValueType, CoordType, DIn, DOut>::
+    registerTwoImagesDeformationFieldBidirectional(const TargetType& target, const SourceType& source, bool initial, TargetType* warped, DeformationFieldType** deform, DeformationFieldType** deformInv)
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(DIn==DOut);
+            GADGET_CHECK_RETURN_FALSE(deform!=NULL);
+            GADGET_CHECK_RETURN_FALSE(deformInv!=NULL);
+
+            hoImageRegDeformationFieldBidirectionalRegister<ValueType, coord_type, DIn> reg(resolution_pyramid_levels_, use_world_coordinates_, bg_value_);
+
+            if ( !debugFolder_.empty() )
+            {
+                reg.debugFolder_ = debugFolder_;
+            }
+
+            GADGET_CHECK_RETURN_FALSE(reg.setDefaultParameters(resolution_pyramid_levels_, use_world_coordinates_));
+
+            reg.max_iter_num_pyramid_level_ = max_iter_num_pyramid_level_;
+            reg.div_num_pyramid_level_ = div_num_pyramid_level_;
+            reg.dissimilarity_MI_betaArg_ = dissimilarity_MI_betaArg_;
+            reg.regularization_hilbert_strength_world_coordinate_ = regularization_hilbert_strength_world_coordinate_;
+            reg.regularization_hilbert_strength_pyramid_level_ = regularization_hilbert_strength_pyramid_level_;
+            reg.dissimilarity_LocalCCR_sigmaArg_ = dissimilarity_LocalCCR_sigmaArg_;
+            reg.boundary_handler_type_warper_ = boundary_handler_type_warper_;
+            reg.interp_type_warper_ = interp_type_warper_;
+            reg.inverse_deform_enforce_iter_pyramid_level_ = inverse_deform_enforce_iter_pyramid_level_;
+            reg.inverse_deform_enforce_weight_pyramid_level_ = inverse_deform_enforce_weight_pyramid_level_;
+            reg.apply_in_FOV_constraint_ = apply_in_FOV_constraint_;
+
+            reg.verbose_ = verbose_;
+
+            reg.dissimilarity_type_.clear();
+            reg.dissimilarity_type_.resize(resolution_pyramid_levels_, dissimilarity_type_);
+
+            reg.setTarget( const_cast<TargetType&>(target) );
+            reg.setSource( const_cast<SourceType&>(source) );
+
+            if ( verbose_ )
+            {
+                Gadgetron::printInfo(reg);
+            }
+
+            GADGET_CHECK_RETURN_FALSE(reg.initialize());
+
+            unsigned int d;
+
+            if ( target.dimensions_equal( *(deform[0]) ) )
+            {
+                if ( initial )
+                {
+                    for ( d=0; d<DIn; d++ )
+                    {
+                        reg.transform_->setDeformationField( *(deform[d]), d);
+                        reg.transform_inverse_->setDeformationField( *(deformInv[d]), d);
+                    }
+                }
+            }
+            else
+            {
+                for ( d=0; d<DIn; d++ )
+                {
+                    deform[d]->copyImageInfo(target);
+                    Gadgetron::clear( *(deform[d]) );
+                    deformInv[d]->copyImageInfo(target);
+                    Gadgetron::clear( *(deformInv[d]) );
+                }
+            }
+
+            GADGET_CHECK_RETURN_FALSE(reg.performRegistration());
+
+            for ( d=0; d<DIn; d++ )
+            {
+                *(deform[d]) = reg.transform_->getDeformationField(d);
+                *(deformInv[d]) = reg.transform_inverse_->getDeformationField(d);
+            }
+
+            if ( warped != NULL )
+            {
+                /// bspline warp
+                // hoNDBoundaryHandlerFixedValue<SourceType> bhFixedValue;
+                hoNDBoundaryHandlerBorderValue<SourceType> bhFixedValue;
+                bhFixedValue.setArray(const_cast<SourceType&>(source));
+
+                hoNDInterpolatorBSpline<SourceType, DIn> interpBSpline(5);
+                interpBSpline.setArray(const_cast<SourceType&>(source));
+                interpBSpline.setBoundaryHandler(bhFixedValue);
+
+                hoImageRegWarper<ValueType, ValueType, DIn, DOut> warper;
+                warper.setBackgroundValue(bg_value_);
+                warper.setTransformation(*reg.transform_);
+                warper.setInterpolator(interpBSpline);
+
+                GADGET_CHECK_RETURN_FALSE(warper.warp(target, source, use_world_coordinates_, *warped));
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Error happened in hoImageRegContainer2DRegistration<ValueType, CoordType, DIn, DOut>::registerTwoImagesDeformationFieldBidirectional(...) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    bool hoImageRegContainer2DRegistration<ValueType, CoordType, DIn, DOut>::
+    initialize(const TargetContinerType& targetContainer, bool warped)
+    {
+        try
+        {
+            if ( warped )
+            {
+                GADGET_CHECK_RETURN_FALSE(warped_container_.copyFrom(targetContainer));
+            }
+
+            std::vector<size_t> col = targetContainer.cols();
+
+            unsigned int ii;
+
+            if ( container_reg_transformation_ == GT_IMAGE_REG_TRANSFORMATION_DEFORMATION_FIELD )
+            {
+                for ( ii=0; ii<DIn; ii++ )
+                {
+                    GADGET_CHECK_RETURN_FALSE(deformation_field_[ii].create(col));
+                    GADGET_CHECK_RETURN_FALSE(deformation_field_[ii].fillWithZeros());
+                }
+            }
+            else if ( container_reg_transformation_ == GT_IMAGE_REG_TRANSFORMATION_DEFORMATION_FIELD_BIDIRECTIONAL )
+            {
+                for ( ii=0; ii<DIn; ii++ )
+                {
+                    GADGET_CHECK_RETURN_FALSE(deformation_field_[ii].create(col));
+                    GADGET_CHECK_RETURN_FALSE(deformation_field_[ii].fillWithZeros());
+
+                    GADGET_CHECK_RETURN_FALSE(deformation_field_inverse_[ii].create(col));
+                    GADGET_CHECK_RETURN_FALSE(deformation_field_inverse_[ii].fillWithZeros());
+                }
+            }
+            else if ( container_reg_transformation_==GT_IMAGE_REG_TRANSFORMATION_RIGID 
+                        || container_reg_transformation_==GT_IMAGE_REG_TRANSFORMATION_AFFINE )
+            {
+                GADGET_MSG("To be implemented ...");
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Error happened in hoImageRegContainer2DRegistration<ValueType, CoordType, DIn, DOut>::initialize(const TargetContinerType& targetContainer) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    bool hoImageRegContainer2DRegistration<ValueType, CoordType, DIn, DOut>::
+    registerOverContainer2DPairWise(TargetContinerType& targetContainer, SourceContinerType& sourceContainer, bool warped)
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(this->initialize(targetContainer, warped));
+
+            std::vector<TargetType*> targetImages;
+            targetContainer.get_all_images(targetImages);
+
+            std::vector<SourceType*> sourceImages;
+            sourceContainer.get_all_images(sourceImages);
+
+            long long numOfImages = targetImages.size();
+
+            GADGET_CHECK_RETURN_FALSE(numOfImages==sourceImages.size());
+
+            std::vector<SourceType*> warpedImages(numOfImages, NULL);
+            if ( warped )
+            {
+                warped_container_.get_all_images(warpedImages);
+            }
+
+            GADGET_MSG("registerOverContainer2DPairWise - threading ... ");
+
+            #ifdef USE_OMP
+                int numOfProcs = omp_get_num_procs();
+                int nested = omp_get_nested();
+                if ( numOfImages < numOfProcs-1 )
+                {
+                    omp_set_nested(1);
+                    GADGET_MSG("registerOverContainer2DPairWise - nested openMP on ... ");
+                }
+                else
+                {
+                    omp_set_nested(0);
+                    GADGET_MSG("registerOverContainer2DPairWise - nested openMP off ... ");
+                }
+            #endif // USE_OMP
+
+            unsigned int ii;
+            long long n;
+
+            if ( container_reg_transformation_ == GT_IMAGE_REG_TRANSFORMATION_DEFORMATION_FIELD )
+            {
+                std::vector< std::vector<DeformationFieldType*> > deform(DIn);
+
+                for ( ii=0; ii<DIn; ii++ )
+                {
+                    deformation_field_[ii].get_all_images(deform[ii]);
+                }
+
+                bool initial = false;
+
+                #pragma omp parallel default(none) private(n, ii) shared(numOfImages, initial, targetImages, sourceImages, deform, warpedImages)
+                {
+                    DeformationFieldType* deformCurr[DIn];
+
+                    #pragma omp for 
+                    for ( n=0; n<numOfImages; n++ )
+                    {
+                        TargetType& target = *(targetImages[n]);
+                        SourceType& source = *(sourceImages[n]);
+
+                        if ( &target == &source )
+                        {
+                            for ( ii=0; ii<DIn; ii++ )
+                            {
+                                deform[ii][n]->create(target.get_dimensions());
+                                Gadgetron::clear( *deform[ii][n] );
+                            }
+                        }
+                        else
+                        {
+                            for ( ii=0; ii<DIn; ii++ )
+                            {
+                                deformCurr[ii] = deform[ii][n];
+                            }
+
+                            registerTwoImagesDeformationField(target, source, initial, warpedImages[n], deformCurr);
+                        }
+                    }
+                }
+            }
+            else if ( container_reg_transformation_ == GT_IMAGE_REG_TRANSFORMATION_DEFORMATION_FIELD_BIDIRECTIONAL )
+            {
+                std::vector< std::vector<DeformationFieldType*> > deform(DIn);
+                std::vector< std::vector<DeformationFieldType*> > deformInv(DIn);
+
+                for ( ii=0; ii<DIn; ii++ )
+                {
+                    deformation_field_[ii].get_all_images(deform[ii]);
+                    deformation_field_inverse_[ii].get_all_images(deformInv[ii]);
+                }
+
+                bool initial = false;
+
+                #pragma omp parallel default(none) private(n, ii) shared(numOfImages, initial, targetImages, sourceImages, deform, deformInv, warpedImages)
+                {
+                    DeformationFieldType* deformCurr[DIn];
+                    DeformationFieldType* deformInvCurr[DIn];
+
+                    #pragma omp for 
+                    for ( n=0; n<numOfImages; n++ )
+                    {
+                        TargetType& target = *(targetImages[n]);
+                        SourceType& source = *(sourceImages[n]);
+
+                        if ( &target == &source )
+                        {
+                            for ( ii=0; ii<DIn; ii++ )
+                            {
+                                deform[ii][n]->create(target.get_dimensions());
+                                Gadgetron::clear( *deform[ii][n] );
+
+                                deformInv[ii][n]->create(source.get_dimensions());
+                                Gadgetron::clear( *deformInv[ii][n] );
+                            }
+                        }
+                        else
+                        {
+                            for ( ii=0; ii<DIn; ii++ )
+                            {
+                                deformCurr[ii] = deform[ii][n];
+                                deformInvCurr[ii] = deformInv[ii][n];
+                            }
+
+                            registerTwoImagesDeformationFieldBidirectional(target, source, initial, warpedImages[n], deformCurr, deformInvCurr);
+                        }
+                    }
+                }
+            }
+            else if ( container_reg_transformation_==GT_IMAGE_REG_TRANSFORMATION_RIGID 
+                        || container_reg_transformation_==GT_IMAGE_REG_TRANSFORMATION_AFFINE )
+            {
+                GADGET_MSG("To be implemented ...");
+            }
+
+            #ifdef USE_OMP
+                omp_set_nested(nested);
+            #endif // USE_OMP
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Error happened in hoImageRegContainer2DRegistration<ValueType, CoordType, DIn, DOut>::registerOverContainer2DPairWise(...) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    bool hoImageRegContainer2DRegistration<ValueType, CoordType, DIn, DOut>::
+    registerOverContainer2DFixedReference(TargetContinerType& imageContainer, const std::vector<unsigned int>& referenceFrame, bool warped)
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(this->initialize(imageContainer, warped));
+
+            size_t row = imageContainer.rows();
+            std::vector<size_t> col = imageContainer.cols();
+
+            GADGET_CHECK_RETURN_FALSE(referenceFrame.size() == col.size());
+
+            std::vector<SourceType*> sourceImages;
+            imageContainer.get_all_images(sourceImages);
+
+            long long numOfImages = (long long)sourceImages.size();
+
+            // warped images
+            std::vector<SourceType*> warpedImages(numOfImages, NULL);
+            if ( warped )
+            {
+                warped_container_.get_all_images(warpedImages);
+            }
+
+            unsigned int ii;
+            long long n;
+            size_t r, c;
+
+            // fill in the reference frames
+            std::vector<TargetType*> targetImages(numOfImages, NULL);
+
+            size_t ind=0;
+            for ( r=0; r<row; r++ )
+            {
+                TargetType& ref = const_cast<TargetType&>(imageContainer(r, referenceFrame[r]));
+
+                for ( c=0; c<col[r]; c++ )
+                {
+                    targetImages[ind] = &ref;
+                    ind++;
+                }
+            }
+
+            GADGET_CHECK_RETURN_FALSE(numOfImages==targetImages.size());
+
+            #ifdef USE_OMP
+                int numOfProcs = omp_get_num_procs();
+                int nested = omp_get_nested();
+                if ( numOfImages < numOfProcs-1 )
+                {
+                    omp_set_nested(1);
+                    GADGET_MSG("registerOverContainer2DFixedReference - nested openMP on ... ");
+                }
+                else
+                {
+                    omp_set_nested(0);
+                    GADGET_MSG("registerOverContainer2DFixedReference - nested openMP off ... ");
+                }
+            #endif // USE_OMP
+
+            if ( container_reg_transformation_ == GT_IMAGE_REG_TRANSFORMATION_DEFORMATION_FIELD )
+            {
+                std::vector< std::vector<DeformationFieldType*> > deform(DIn);
+
+                for ( ii=0; ii<DIn; ii++ )
+                {
+                    deformation_field_[ii].get_all_images(deform[ii]);
+                }
+
+                bool initial = false;
+
+                #pragma omp parallel default(none) private(n, ii) shared(numOfImages, initial, targetImages, sourceImages, deform, warpedImages)
+                {
+                    DeformationFieldType* deformCurr[DIn];
+
+                    #pragma omp for 
+                    for ( n=0; n<numOfImages; n++ )
+                    {
+                        if ( targetImages[n] == sourceImages[n] )
+                        {
+                            if ( warpedImages[n] != NULL )
+                            {
+                                *(warpedImages[n]) = *(targetImages[n]);
+                            }
+
+                            for ( ii=0; ii<DIn; ii++ )
+                            {
+                                deform[ii][n]->create(targetImages[n]->get_dimensions());
+                                Gadgetron::clear(*deform[ii][n]);
+                            }
+
+                            continue;
+                        }
+
+                        TargetType& target = *(targetImages[n]);
+                        SourceType& source = *(sourceImages[n]);
+
+                        for ( ii=0; ii<DIn; ii++ )
+                        {
+                            deformCurr[ii] = deform[ii][n];
+                        }
+
+                        registerTwoImagesDeformationField(target, source, initial, warpedImages[n], deformCurr);
+                    }
+                }
+            }
+            else if ( container_reg_transformation_ == GT_IMAGE_REG_TRANSFORMATION_DEFORMATION_FIELD_BIDIRECTIONAL )
+            {
+                std::vector< std::vector<DeformationFieldType*> > deform(DIn);
+                std::vector< std::vector<DeformationFieldType*> > deformInv(DIn);
+
+                for ( ii=0; ii<DIn; ii++ )
+                {
+                    deformation_field_[ii].get_all_images(deform[ii]);
+                    deformation_field_inverse_[ii].get_all_images(deformInv[ii]);
+                }
+
+                bool initial = false;
+
+                #pragma omp parallel default(none) private(n, ii) shared(numOfImages, initial, targetImages, sourceImages, deform, deformInv, warpedImages)
+                {
+                    DeformationFieldType* deformCurr[DIn];
+                    DeformationFieldType* deformInvCurr[DIn];
+
+                    #pragma omp for 
+                    for ( n=0; n<numOfImages; n++ )
+                    {
+                        if ( targetImages[n] == sourceImages[n] )
+                        {
+                            if ( warpedImages[n] != NULL )
+                            {
+                                *(warpedImages[n]) = *(targetImages[n]);
+                            }
+
+                            for ( ii=0; ii<DIn; ii++ )
+                            {
+                                deform[ii][n]->create(targetImages[n]->get_dimensions());
+                                Gadgetron::clear(*deform[ii][n]);
+
+                                deformInv[ii][n]->create(targetImages[n]->get_dimensions());
+                                Gadgetron::clear(*deformInv[ii][n]);
+                            }
+
+                            continue;
+                        }
+
+                        TargetType& target = *(targetImages[n]);
+                        SourceType& source = *(sourceImages[n]);
+
+                        for ( ii=0; ii<DIn; ii++ )
+                        {
+                            deformCurr[ii] = deform[ii][n];
+                            deformInvCurr[ii] = deformInv[ii][n];
+                        }
+
+                        registerTwoImagesDeformationFieldBidirectional(target, source, initial, warpedImages[n], deformCurr, deformInvCurr);
+                    }
+                }
+            }
+            else if ( container_reg_transformation_==GT_IMAGE_REG_TRANSFORMATION_RIGID 
+                        || container_reg_transformation_==GT_IMAGE_REG_TRANSFORMATION_AFFINE )
+            {
+                GADGET_MSG("To be implemented ...");
+            }
+
+            #ifdef USE_OMP
+                omp_set_nested(nested);
+            #endif // USE_OMP
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Error happened in hoImageRegContainer2DRegistration<ValueType, CoordType, DIn, DOut>::registerOverContainer2DFixedReference(...) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    bool hoImageRegContainer2DRegistration<ValueType, CoordType, DIn, DOut>::
+    registerOverContainer2DProgressive(TargetContinerType& imageContainer, const std::vector<unsigned int>& referenceFrame)
+    {
+        try
+        {
+            bool warped = true;
+            GADGET_CHECK_RETURN_FALSE(this->initialize(imageContainer, warped));
+
+            long long row = (long long)imageContainer.rows();
+            std::vector<size_t> col = imageContainer.cols();
+
+            GADGET_CHECK_RETURN_FALSE(referenceFrame.size() == col.size());
+
+            unsigned int ii;
+            long long n;
+            long long r, c;
+
+            // for every row, two registration tasks can be formatted
+
+            long long numOfTasks = (long long)(2*row);
+
+            std::vector< std::vector<TargetType*> > regImages(numOfTasks);
+            std::vector< std::vector<TargetType*> > warpedImages(numOfTasks);
+
+            std::vector< std::vector< std::vector<DeformationFieldType*> > > deform(DIn);
+            std::vector< std::vector< std::vector<DeformationFieldType*> > > deformInv(DIn);
+
+            for ( ii=0; ii<DIn; ii++ )
+            {
+                deform[ii].resize(numOfTasks);
+                deformInv[ii].resize(numOfTasks);
+            }
+
+            for ( r=0; r<row; r++ )
+            {
+                unsigned int refFrame = referenceFrame[r];
+
+                regImages[2*r].resize(col[r]-refFrame);
+                regImages[2*r+1].resize(1+refFrame);
+
+                warpedImages[2*r].resize(col[r]-refFrame);
+                warpedImages[2*r+1].resize(1+refFrame);
+
+                // copy over the reference frame
+                warped_container_(r, refFrame) = imageContainer(r, refFrame);
+
+                if ( container_reg_transformation_ == GT_IMAGE_REG_TRANSFORMATION_DEFORMATION_FIELD )
+                {
+                    for ( ii=0; ii<DIn; ii++ )
+                    {
+                        deformation_field_[ii](r, refFrame).create(imageContainer(r, refFrame).get_dimensions());
+                        Gadgetron::clear(deformation_field_[ii](r, refFrame));
+                    }
+                }
+
+                if ( container_reg_transformation_ == GT_IMAGE_REG_TRANSFORMATION_DEFORMATION_FIELD_BIDIRECTIONAL )
+                {
+                    for ( ii=0; ii<DIn; ii++ )
+                    {
+                        deformation_field_[ii](r, refFrame).create(imageContainer(r, refFrame).get_dimensions());
+                        Gadgetron::clear(deformation_field_[ii](r, refFrame));
+
+                        deformation_field_inverse_[ii](r, refFrame).create(imageContainer(r, refFrame).get_dimensions());
+                        Gadgetron::clear(deformation_field_inverse_[ii](r, refFrame));
+                    }
+                }
+
+                // task one
+                for ( c=refFrame; c<(long long)col[r]; c++ )
+                {
+                    regImages[2*r][c-refFrame] = &(imageContainer(r, c));
+                    warpedImages[2*r][c-refFrame] = &(warped_container_(r, c));
+                }
+
+                // task two
+                for ( c=refFrame; c>=0; c-- )
+                {
+                    regImages[2*r+1][refFrame-c] = &(imageContainer(r, c));
+                    warpedImages[2*r+1][refFrame-c] = &(warped_container_(r, c));
+                }
+
+                for ( ii=0; ii<DIn; ii++ )
+                {
+                    if ( container_reg_transformation_ == GT_IMAGE_REG_TRANSFORMATION_DEFORMATION_FIELD )
+                    {
+                        deform[ii][2*r].resize(col[r]-refFrame);
+                        deform[ii][2*r+1].resize(1+refFrame);
+
+                        // task one
+                        for ( c=refFrame; c<(long long)col[r]; c++ )
+                        {
+                            deform[ii][2*r][c-refFrame] = &(deformation_field_[ii](r, c));
+                        }
+
+                        // task two
+                        for ( c=refFrame; c>=0; c-- )
+                        {
+                            deform[ii][2*r+1][refFrame-c] = &(deformation_field_[ii](r, c));
+                        }
+                    }
+
+                    if ( container_reg_transformation_ == GT_IMAGE_REG_TRANSFORMATION_DEFORMATION_FIELD_BIDIRECTIONAL )
+                    {
+                        deform[ii][2*r].resize(col[r]-refFrame);
+                        deform[ii][2*r+1].resize(1+refFrame);
+
+                        deformInv[ii][2*r].resize(col[r]-refFrame);
+                        deformInv[ii][2*r+1].resize(1+refFrame);
+
+                        // task one
+                        for ( c=refFrame; c<(long long)col[r]; c++ )
+                        {
+                            deform[ii][2*r][c-refFrame] = &(deformation_field_[ii](r, c));
+                            deformInv[ii][2*r][c-refFrame] = &(deformation_field_inverse_[ii](r, c));
+                        }
+
+                        // task two
+                        for ( c=refFrame; c>=0; c-- )
+                        {
+                            deform[ii][2*r+1][refFrame-c] = &(deformation_field_[ii](r, c));
+                            deformInv[ii][2*r+1][refFrame-c] = &(deformation_field_inverse_[ii](r, c));
+                        }
+                    }
+                }
+            }
+
+            #ifdef USE_OMP
+                int numOfProcs = omp_get_num_procs();
+                int nested = omp_get_nested();
+                //if ( numOfTasks < numOfProcs-1 )
+                //{
+                //    omp_set_nested(1);
+                //}
+                //else
+                //{
+                    omp_set_nested(0);
+                //}
+            #endif // USE_OMP
+
+            if ( container_reg_transformation_ == GT_IMAGE_REG_TRANSFORMATION_DEFORMATION_FIELD )
+            {
+                bool initial = false;
+
+                #pragma omp parallel default(none) private(n, ii) shared(numOfTasks, initial, regImages, warpedImages, deform) if ( numOfTasks > 6)
+                {
+                    DeformationFieldType* deformCurr[DIn];
+
+                    #pragma omp for 
+                    for ( n=0; n<numOfTasks; n++ )
+                    {
+                        size_t numOfImages = regImages[n].size();
+
+                        // no need to copy the refrence frame to warped
+
+                        size_t k;
+                        for ( k=1; k<numOfImages; k++ )
+                        {
+                            TargetType& target = *(warpedImages[n][k-1]);
+                            SourceType& source = *(regImages[n][k]);
+
+                            for ( ii=0; ii<DIn; ii++ )
+                            {
+                                deformCurr[ii] = deform[ii][n][k];
+                            }
+
+                            registerTwoImagesDeformationField(target, source, initial, warpedImages[n][k], deformCurr);
+                        }
+                    }
+                }
+            }
+            else if ( container_reg_transformation_ == GT_IMAGE_REG_TRANSFORMATION_DEFORMATION_FIELD_BIDIRECTIONAL )
+            {
+                bool initial = false;
+
+                #pragma omp parallel default(none) private(n, ii) shared(numOfTasks, initial, regImages, warpedImages, deform, deformInv) if ( numOfTasks > 6)
+                {
+                    DeformationFieldType* deformCurr[DIn];
+                    DeformationFieldType* deformInvCurr[DIn];
+
+                    #pragma omp for 
+                    for ( n=0; n<numOfTasks; n++ )
+                    {
+                        size_t numOfImages = regImages[n].size();
+
+                        size_t k;
+                        for ( k=1; k<numOfImages; k++ )
+                        {
+                            TargetType& target = *(warpedImages[n][k-1]);
+                            SourceType& source = *(regImages[n][k]);
+
+                            for ( ii=0; ii<DIn; ii++ )
+                            {
+                                deformCurr[ii] = deform[ii][n][k];
+                                deformInvCurr[ii] = deformInv[ii][n][k];
+                            }
+
+                            registerTwoImagesDeformationFieldBidirectional(target, source, initial, warpedImages[n][k], deformCurr, deformInvCurr);
+                        }
+                    }
+                }
+            }
+            else if ( container_reg_transformation_==GT_IMAGE_REG_TRANSFORMATION_RIGID 
+                        || container_reg_transformation_==GT_IMAGE_REG_TRANSFORMATION_AFFINE )
+            {
+                GADGET_MSG("To be implemented ...");
+            }
+
+            #ifdef USE_OMP
+                omp_set_nested(nested);
+            #endif // USE_OMP
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Error happened in hoImageRegContainer2DRegistration<ValueType, CoordType, DIn, DOut>::registerOverContainer2DProgressive(...) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    void hoImageRegContainer2DRegistration<ValueType, CoordType, DIn, DOut>::print(std::ostream& os) const
+    {
+        using namespace std;
+
+        unsigned int ii, jj;
+
+        os << "--------------Gagdgetron image registration container 2D -------------" << endl;
+
+        os << "Input dimension is : " << DIn << endl;
+        os << "Output dimension is : " << DOut << endl;
+
+        std::string elemTypeName = std::string(typeid(ValueType).name());
+        os << "Image data type is : " << elemTypeName << std::endl;
+
+        elemTypeName = std::string(typeid(CoordType).name());
+        os << "Transformation coordinate data type is : " << elemTypeName << std::endl;
+
+        os << "Whether to perform world coordinate registration is : " << use_world_coordinates_ << std::endl;
+        os << "Number of resolution pyramid levels is : " << resolution_pyramid_levels_ << std::endl;
+
+        os << "------------" << std::endl;
+        os << "Number of iterations is : " << std::endl;
+        for ( ii=0; ii<resolution_pyramid_levels_; ii++ )
+        {
+            os << " Level " << ii << " - " 
+                << max_iter_num_pyramid_level_[ii] << std::endl;
+        }
+
+        os << "------------" << std::endl;
+        os << "Image dissimilarity is : " << std::endl;
+        for ( ii=0; ii<resolution_pyramid_levels_; ii++ )
+        {
+            os << " Level " << ii << " - " 
+                << getDissimilarityName(dissimilarity_type_) << std::endl;
+        }
+
+        os << "------------" << std::endl;
+        os << "Threshold for dissimilarity is : " << std::endl;
+        for ( ii=0; ii<resolution_pyramid_levels_; ii++ )
+        {
+            os << " Level " << ii << " - " 
+                << dissimilarity_thres_pyramid_level_[ii] << std::endl;
+        }
+
+        os << "------------" << std::endl;
+        os << "Number of search size division is : " << std::endl;
+        for ( ii=0; ii<resolution_pyramid_levels_; ii++ )
+        {
+            os << " Level " << ii << " - " 
+                << div_num_pyramid_level_[ii] << std::endl;
+        }
+
+        os << "------------" << std::endl;
+        if ( regularization_hilbert_strength_world_coordinate_ )
+        {
+            os << "Regularization strength  is in the unit of physical metric, e.g. mm ... ";
+        }
+        else
+        {
+            os << "Regularization strength  is in the unit of image pixel size ... ";
+        }
+
+        os << "Regularization strength for every pyramid level is : " << std::endl;
+        for ( ii=0; ii<resolution_pyramid_levels_; ii++ )
+        {
+            os << " Level " << ii << " - [ ";
+            for( jj=0; jj<DIn; jj++ )
+            {
+                os << regularization_hilbert_strength_pyramid_level_[ii][jj] << " ";
+            } 
+            os << " ] " << std::endl;
+        }
+
+        os << "------------" << std::endl;
+        os << "Boundary handler and interpolator type for warper is : " << std::endl;
+        for ( ii=0; ii<resolution_pyramid_levels_; ii++ )
+        {
+            os << " Level " << ii << " - " 
+                << getBoundaryHandlerName(boundary_handler_type_warper_[ii]) 
+                << " - " << getInterpolatorName(interp_type_warper_[ii]) << std::endl;
+        }
+
+        os << "------------" << std::endl;
+        os << "Number of iterations to improve the estimation of the inverse transform is : " << std::endl;
+        for ( ii=0; ii<resolution_pyramid_levels_; ii++ )
+        {
+            os << " Level " << ii << " - " 
+                << inverse_deform_enforce_iter_pyramid_level_[ii] << std::endl;
+        }
+
+        os << "------------" << std::endl;
+        os << "Weight to update the estimation of the inverse transform is : " << std::endl;
+        for ( ii=0; ii<resolution_pyramid_levels_; ii++ )
+        {
+            os << " Level " << ii << " - " 
+                << inverse_deform_enforce_weight_pyramid_level_[ii] << std::endl;
+        }
+        os << "------------" << std::endl;
+    }
+}
diff --git a/toolboxes/registration/optical_flow/cpu/dissimilarity/hoImageRegDissimilarity.h b/toolboxes/registration/optical_flow/cpu/dissimilarity/hoImageRegDissimilarity.h
new file mode 100644
index 0000000..b8feaf1
--- /dev/null
+++ b/toolboxes/registration/optical_flow/cpu/dissimilarity/hoImageRegDissimilarity.h
@@ -0,0 +1,251 @@
+/** \file   hoImageRegDissimilarity.h
+    \brief  Define the class to compute image dissimilarity in gadgetron registration
+
+            Four different types of image dissimilarity measures are implemented here:
+
+            SSD: sum-of-square difference
+            LocalCCR: localized cross-correlation
+            MI: mutual information
+            NMI: normalized mutual information
+
+            For  SSD, LocalCCR and MI, the analytical derivatives are computed.
+
+            The analytical derivatives are computed by using the formula proposed at:
+
+            [1] Gerardo Hermosillo, Christophe Chefd'Hotel, Olivier Faugeras. Variational Methods for Multimodal Image Matching. 
+            International Journal of Computer Vision. December 2002, Volume 50, Issue 3, pp 329-343.
+            http://link.springer.com/article/10.1023%2FA%3A1020830525823
+
+            [2] Gerardo Hermosillo. Variational Methods for Multimodal Image Matching. PhD Thesis, UNIVERSIT�E DE NICE - SOPHIA ANTIPOLIS. May 2002.
+            http://webdocs.cs.ualberta.ca/~dana/readingMedIm/papers/hermosilloPhD.pdf
+
+            The derivative computation code is based on the listed source code at page 179 - 185 in ref [2] and extended.
+
+            [3] Christophe Chefd'Hotel, Gerardo Hermosillo, Olivier D. Faugeras: Flows of diffeomorphisms for multimodal image registration. ISBI 2002: 753-756.
+            http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=1029367&tag=1
+
+            [4] C. Studholme, D.L.G. Hill, D.J. Hawkes. An overlap invariant entropy measure of 3D medical image alignment. Pattern Recognition, 32, 71-86, 1999.
+            http://eecs.vanderbilt.edu/courses/cs359/other_links/papers/studholme_NMI_1999.pdf
+
+    \author Hui Xue
+*/
+
+#pragma once
+
+#include "hoNDArray.h"
+#include "hoNDImage.h"
+#include "hoNDInterpolator.h"
+#include "hoNDBoundaryHandler.h"
+#include "hoMatrix.h"
+#include "hoNDArray_utils.h"
+#include "hoNDArray_elemwise.h"
+#include "hoNDImage_util.h"
+#include "gtPlusISMRMRDReconUtil.h"
+#include "GtPrepUtil.h"
+
+#ifdef USE_OMP
+    #include <omp.h>
+#endif // USE_OMP
+
+namespace Gadgetron
+{
+    // define the image dissimilarity type
+    enum GT_IMAGE_DISSIMILARITY
+    {
+        GT_IMAGE_DISSIMILARITY_SSD,
+        GT_IMAGE_DISSIMILARITY_LocalCCR,
+        GT_IMAGE_DISSIMILARITY_MI,
+        GT_IMAGE_DISSIMILARITY_NMI
+    };
+
+    inline std::string getDissimilarityName(GT_IMAGE_DISSIMILARITY v)
+    {
+        std::string name;
+
+        switch (v)
+        {
+            case GT_IMAGE_DISSIMILARITY_SSD:
+                name = "SSD";
+                break;
+
+            case GT_IMAGE_DISSIMILARITY_LocalCCR:
+                name = "LocalCCR";
+                break;
+
+            case GT_IMAGE_DISSIMILARITY_MI:
+                name = "MutualInformation";
+                break;
+
+            case GT_IMAGE_DISSIMILARITY_NMI:
+                name = "NormalizedMutualInformation";
+                break;
+
+            default:
+                GADGET_ERROR_MSG("Unrecognized image dissimilarity type : " << v);
+        }
+
+        return name;
+    }
+
+    inline GT_IMAGE_DISSIMILARITY getDissimilarityType(const std::string& name)
+    {
+        GT_IMAGE_DISSIMILARITY v;
+
+        if ( name == "SSD" )
+        {
+            v = GT_IMAGE_DISSIMILARITY_SSD;
+        }
+        else if ( name == "LocalCCR" )
+        {
+            v = GT_IMAGE_DISSIMILARITY_LocalCCR;
+        }
+        else if ( name == "MutualInformation" )
+        {
+            v = GT_IMAGE_DISSIMILARITY_MI;
+        }
+        else if ( name == "NormalizedMutualInformation" )
+        {
+            v = GT_IMAGE_DISSIMILARITY_NMI;
+        }
+        else
+        {
+            GADGET_ERROR_MSG("Unrecognized image dissimilarity name : " << name);
+        }
+
+        return v;
+    }
+
+    /// compute the image dissimilarity measures
+    /// if possible, compute the analytical derivatives
+    template<typename ValueType, unsigned int D> 
+    class hoImageRegDissimilarity
+    {
+    public:
+
+        typedef hoImageRegDissimilarity<ValueType, D> Self;
+
+        typedef hoNDImage<ValueType, D> ImageType;
+
+        typedef hoNDInterpolator<ImageType> InterpolatorType;
+
+        typedef ValueType T;
+        typedef ValueType element_type;
+        typedef ValueType value_type;
+
+        typedef float coord_type;
+
+        hoImageRegDissimilarity(ValueType bg_value=ValueType(0));
+        virtual ~hoImageRegDissimilarity();
+
+        /// initialize the dissimilarity
+        virtual void initialize(ImageType& t);
+
+        const ImageType& getDeriv() const { return deriv_; }
+
+        ValueType getDissimilarity() const { return dissimilarity_; }
+
+        void setBackgroundValue(ValueType bg_value) { bg_value_ = bg_value; }
+
+        /// compute the dissimilarity value
+        virtual ValueType evaluate(ImageType& w);
+
+        /// compute the derivative and dissimilarity value
+        virtual bool evaluateDeriv(ImageType& w) = 0;
+
+        virtual void print(std::ostream& os) const;
+
+        // ----------------------------------
+        // debug and timing
+        // ----------------------------------
+        // clock for timing
+        Gadgetron::GadgetronTimer gt_timer1_;
+        Gadgetron::GadgetronTimer gt_timer2_;
+        Gadgetron::GadgetronTimer gt_timer3_;
+
+        bool performTiming_;
+
+        // exporter
+        Gadgetron::gtPlus::gtPlusIOAnalyze gt_exporter_;
+
+        // debug folder
+        std::string debugFolder_;
+
+    protected:
+
+        ImageType* target_;
+        ImageType* warpped_;
+
+        std::vector<size_t> image_dim_;
+
+        /// background pixels
+        ValueType bg_value_;
+
+        /// derivative to spatial locations
+        ImageType deriv_;
+
+        /// dissimilarity value
+        ValueType dissimilarity_;
+
+        hoNDArray<ValueType> target;
+        hoNDArray<ValueType> warped;
+        hoNDArray<ValueType> deriv;
+    };
+
+    template<typename ValueType, unsigned int D> 
+    hoImageRegDissimilarity<ValueType, D>::hoImageRegDissimilarity(ValueType bg_value) 
+        : target_(NULL), warpped_(NULL), bg_value_(bg_value), dissimilarity_(0), performTiming_(false)
+    {
+        gt_timer1_.set_timing_in_destruction(false);
+        gt_timer2_.set_timing_in_destruction(false);
+        gt_timer3_.set_timing_in_destruction(false);
+    }
+
+    template<typename ValueType, unsigned int D> 
+    hoImageRegDissimilarity<ValueType, D>::~hoImageRegDissimilarity()
+    {
+    }
+
+    template<typename ValueType, unsigned int D> 
+    void hoImageRegDissimilarity<ValueType, D>::initialize(ImageType& t)
+    {
+        target_ = &t;
+
+        if ( !deriv_.dimensions_equal(*target_) )
+        {
+            deriv_.create(target_->get_dimensions());
+        }
+        memset( deriv_.get_data_ptr(), 0, deriv_.get_number_of_elements()*sizeof(ValueType));
+
+        target_->get_dimensions(image_dim_);
+
+        /// these conversion can be removed if more utility functions are added for hoNDImage
+        target.create(image_dim_, target_->begin(), false);
+        deriv.create(image_dim_, deriv_.begin(), false);
+    }
+
+    template<typename ValueType, unsigned int D> 
+    ValueType hoImageRegDissimilarity<ValueType, D>::evaluate(ImageType& w)
+    {
+        if ( warpped_ != &w )
+        {
+            warpped_ = &w;
+            GADGET_CHECK_THROW(warpped_->dimensions_equal(*target_));
+            warped.create(image_dim_, warpped_->begin(), false);
+        }
+
+        this->dissimilarity_ = 0;
+
+        return this->dissimilarity_;
+    }
+
+    template<typename ValueType, unsigned int D> 
+    void hoImageRegDissimilarity<ValueType, D>::print(std::ostream& os) const
+    {
+        using namespace std;
+        os << "--------------Gagdgetron image dissimilarity measure -------------" << endl;
+        os << "Image dimension is : " << D << endl;
+
+        std::string elemTypeName = std::string(typeid(ValueType).name());
+        os << "Transformation data type is : " << elemTypeName << endl << ends;
+    }
+}
diff --git a/toolboxes/registration/optical_flow/cpu/dissimilarity/hoImageRegDissimilarityHistogramBased.h b/toolboxes/registration/optical_flow/cpu/dissimilarity/hoImageRegDissimilarityHistogramBased.h
new file mode 100644
index 0000000..7e84dfc
--- /dev/null
+++ b/toolboxes/registration/optical_flow/cpu/dissimilarity/hoImageRegDissimilarityHistogramBased.h
@@ -0,0 +1,226 @@
+/** \file   hoImageRegDissimilarityHistogramBased.h
+    \brief  Define the class to compute image dissimilarity based on histogram
+    \author Hui Xue
+*/
+
+#pragma once
+
+#include <limits>
+#include "hoMatrix.h"
+#include "hoImageRegDissimilarity.h"
+
+namespace Gadgetron
+{
+    template<typename ValueType, unsigned int D> 
+    class hoImageRegDissimilarityHistogramBased : public hoImageRegDissimilarity<ValueType, D>
+    {
+    public:
+
+        typedef hoImageRegDissimilarityHistogramBased<ValueType, D> Self;
+        typedef hoImageRegDissimilarity<ValueType, D> BaseClass;
+
+        typedef typename BaseClass::ImageType ImageType;
+        typedef typename BaseClass::InterpolatorType InterpolatorType;
+
+        typedef ValueType T;
+        typedef ValueType element_type;
+        typedef ValueType value_type;
+
+        typedef typename BaseClass::coord_type coord_type;
+
+        typedef double hist_value_type;
+
+        hoImageRegDissimilarityHistogramBased(unsigned int num_bin_target=64, unsigned int num_bin_warpped=64, ValueType bg_value=ValueType(0));
+        virtual ~hoImageRegDissimilarityHistogramBased();
+
+        virtual ValueType evaluate(ImageType& w);
+
+        virtual bool evaluateDeriv(ImageType& w) = 0;
+
+        virtual void print(std::ostream& os) const;
+
+        /// number of intensity bins
+        unsigned int num_bin_target_;
+        unsigned int num_bin_warpped_;
+
+        /// whether to perform partial interpolation for histogram
+        bool pv_interpolation_;
+
+        /// step size to ignore pixels when creating histogram
+        size_t step_size_ignore_pixel_;
+
+        using BaseClass::gt_timer1_;
+        using BaseClass::gt_timer2_;
+        using BaseClass::gt_timer3_;
+        using BaseClass::performTiming_;
+        using BaseClass::gt_exporter_;
+        using BaseClass::debugFolder_;
+
+    protected:
+
+        using BaseClass::target_;
+        using BaseClass::warpped_;
+        using BaseClass::deriv_;
+        using BaseClass::bg_value_;
+        using BaseClass::dissimilarity_;
+        using BaseClass::target;
+        using BaseClass::warped;
+        using BaseClass::deriv;
+        using BaseClass::image_dim_;
+
+        /// store the 2D histogram
+        hoMatrix<hist_value_type> hist_;
+
+        /// min/max intensities of target and warped
+        ValueType min_target_;
+        ValueType max_target_;
+
+        ValueType min_warpped_;
+        ValueType max_warpped_;
+
+        size_t num_samples_in_hist_;
+    };
+
+    template<typename ValueType, unsigned int D> 
+    hoImageRegDissimilarityHistogramBased<ValueType, D>::
+    hoImageRegDissimilarityHistogramBased(unsigned int num_bin_target, unsigned int num_bin_warpped, ValueType bg_value) 
+        : BaseClass(bg_value), num_bin_target_(num_bin_target), num_bin_warpped_(num_bin_warpped), pv_interpolation_(false), step_size_ignore_pixel_(1)
+    {
+    }
+
+    template<typename ValueType, unsigned int D> 
+    hoImageRegDissimilarityHistogramBased<ValueType, D>::~hoImageRegDissimilarityHistogramBased()
+    {
+    }
+
+    template<typename ValueType, unsigned int D> 
+    ValueType hoImageRegDissimilarityHistogramBased<ValueType, D>::evaluate(ImageType& w)
+    {
+        try
+        {
+            BaseClass::evaluate(w);
+
+            // allocate histogram
+            hist_.createMatrix(num_bin_target_, num_bin_warpped_);
+            Gadgetron::clear(hist_);
+
+            // intensity range
+            min_target_ = std::numeric_limits<ValueType>::max();
+            max_target_ = std::numeric_limits<ValueType>::min();
+
+            min_warpped_ = min_target_;
+            max_warpped_ = max_target_;
+
+            size_t N = target_->get_number_of_elements();
+
+            long long n;
+            for ( n=0; n<(long long)N; n++ )
+            {
+                ValueType vt = target(n);
+                if ( vt < min_target_ ) min_target_ = vt;
+                if ( vt > max_target_ ) max_target_ = vt;
+
+                ValueType vw = warped(n);
+                if ( vw < min_warpped_ ) min_warpped_ = vw;
+                if ( vw > max_warpped_ ) max_warpped_ = vw;
+            }
+
+            ValueType range_t = ValueType(1.0)/(max_target_ - min_target_ + std::numeric_limits<ValueType>::epsilon());
+            ValueType range_w = ValueType(1.0)/(max_warpped_ - min_warpped_ + std::numeric_limits<ValueType>::epsilon());
+
+            num_samples_in_hist_ = 0;
+
+            if ( pv_interpolation_ )
+            {
+                #pragma omp parallel for default(none) private(n) shared(N, range_t, range_w)
+                for ( n=0; n<(long long)N; n+=(long long)step_size_ignore_pixel_ )
+                {
+                    ValueType vt = target(n);
+                    ValueType vw = warped(n);
+
+                    if ( GT_ABS(vt-bg_value_)<FLT_EPSILON 
+                        && GT_ABS(vw-bg_value_)<FLT_EPSILON )
+                    {
+                        continue;
+                    }
+
+                    ValueType xT = range_t*(vt-min_target_)*(num_bin_target_-1);
+                    ValueType xW = range_w*(vw-min_warpped_)*(num_bin_warpped_-1);
+
+                    size_t indT = static_cast<size_t>(xT);
+                    size_t indW = static_cast<size_t>(xW);
+
+                    ValueType sT, s1T, sW, s1W;
+
+                    sT = xT - indT; s1T = 1 - sT;
+                    sW = xW - indW; s1W = 1 - sW;
+
+                    #pragma omp critical
+                    {
+                        hist_(indT, indW) += s1T*s1W;
+
+                        if ( indT<num_bin_target_-1 && indW<num_bin_warpped_-1 )
+                        {
+                            hist_(indT, indW+1) += s1T*sW;
+                            hist_(indT+1, indW) += sT*s1W;
+                            hist_(indT+1, indW+1) += sT*sW;
+                        }
+                    }
+
+                    #pragma omp atomic
+                    num_samples_in_hist_++;
+                }
+            }
+            else
+            {
+                #pragma omp parallel for default(none) private(n) shared(N, range_t, range_w)
+                for ( n=0; n<(long long)N; n+=(long long)step_size_ignore_pixel_ )
+                {
+                    ValueType vt = target(n);
+                    ValueType vw = warped(n);
+
+                    if ( GT_ABS(vt-bg_value_)<FLT_EPSILON 
+                        && GT_ABS(vw-bg_value_)<FLT_EPSILON )
+                    {
+                        continue;
+                    }
+
+                    size_t indT = static_cast<size_t>( range_t*(vt-min_target_)*(num_bin_target_-1) + 0.5 );
+                    size_t indW = static_cast<size_t>( range_w*(vw-min_warpped_)*(num_bin_warpped_-1) + 0.5 );
+
+                    #pragma omp critical
+                    {
+                        hist_(indT, indW)++;
+                    }
+
+                    #pragma omp atomic
+                    num_samples_in_hist_++;
+                }
+            }
+
+            GADGET_CHECK_PERFORM(!debugFolder_.empty(), GADGET_EXPORT_ARRAY(debugFolder_, gt_exporter_, hist_, "hist2D"));
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegDissimilarityHistogramBased<ValueType, D>::evaluate(ImageType& t, ImageType& w) ... ");
+        }
+
+        return this->dissimilarity_;
+    }
+
+    template<typename ValueType, unsigned int D> 
+    void hoImageRegDissimilarityHistogramBased<ValueType, D>::print(std::ostream& os) const
+    {
+        using namespace std;
+        os << "--------------Gagdgetron image dissimilarity with histogram -------------" << endl;
+        os << "Image dimension is : " << D << endl;
+
+        std::string elemTypeName = std::string(typeid(ValueType).name());
+        os << "Transformation data type is : " << elemTypeName << std::endl;
+
+        os << "Number of intensity bins for target is : " << num_bin_target_ << endl;
+        os << "Number of intensity bins for warped is : " << num_bin_warpped_ << endl;
+        os << "PV interpolation for histogram is : " << pv_interpolation_ << endl;
+        os << "Step size to ignore pixels when creating histogram is : " << step_size_ignore_pixel_ << endl << ends;
+    }
+}
diff --git a/toolboxes/registration/optical_flow/cpu/dissimilarity/hoImageRegDissimilarityLocalCCR.h b/toolboxes/registration/optical_flow/cpu/dissimilarity/hoImageRegDissimilarityLocalCCR.h
new file mode 100644
index 0000000..d7fd20f
--- /dev/null
+++ b/toolboxes/registration/optical_flow/cpu/dissimilarity/hoImageRegDissimilarityLocalCCR.h
@@ -0,0 +1,412 @@
+/** \file   hoImageRegDissimilarityLocalCCR.h
+    \brief  Define the class to compute image Local Cross CorRelation (LocalCCR) in gadgetron registration
+
+            The analytical derivatives are computed by using the formula proposed at:
+
+            [1] Gerardo Hermosillo, Christophe Chefd'Hotel, Olivier Faugeras. Variational Methods for Multimodal Image Matching. 
+            International Journal of Computer Vision. December 2002, Volume 50, Issue 3, pp 329-343.
+            http://link.springer.com/article/10.1023%2FA%3A1020830525823
+
+            [2] Gerardo Hermosillo. Variational Methods for Multimodal Image Matching. PhD Thesis, UNIVERSIT�E DE NICE - SOPHIA ANTIPOLIS. May 2002.
+            http://webdocs.cs.ualberta.ca/~dana/readingMedIm/papers/hermosilloPhD.pdf
+
+            This derivative computation code is based on the listed source code at page 183 - 185 in ref [2].
+
+    \author Hui Xue
+*/
+
+#pragma once
+
+#include <limits>
+#include "hoImageRegDissimilarity.h"
+
+namespace Gadgetron
+{
+    template<typename ValueType, unsigned int D> 
+    class hoImageRegDissimilarityLocalCCR : public hoImageRegDissimilarity<ValueType, D>
+    {
+    public:
+
+        typedef hoImageRegDissimilarityLocalCCR<ValueType, D> Self;
+        typedef hoImageRegDissimilarity<ValueType, D> BaseClass;
+
+        typedef typename BaseClass::ImageType ImageType;
+        typedef typename BaseClass::InterpolatorType InterpolatorType;
+
+        typedef ValueType T;
+        typedef ValueType element_type;
+        typedef ValueType value_type;
+
+        typedef double computing_value_type;
+
+        typedef typename BaseClass::coord_type coord_type;
+
+        hoImageRegDissimilarityLocalCCR(computing_value_type betaArg=std::numeric_limits<ValueType>::epsilon() );
+        hoImageRegDissimilarityLocalCCR(ValueType sigmaArg[D], computing_value_type betaArg=std::numeric_limits<ValueType>::epsilon() );
+        virtual ~hoImageRegDissimilarityLocalCCR();
+
+        void initialize(ImageType& t);
+
+        virtual ValueType evaluate(ImageType& w);
+        virtual bool evaluateDeriv(ImageType& w);
+
+        virtual void print(std::ostream& os) const;
+
+        /// these parameter names are kept same as the source code on page 183 - 185 in ref [2]
+        computing_value_type sigmaArg_[D]; // kernel size of local weighting function
+
+        computing_value_type betaArg_;
+
+        using BaseClass::gt_timer1_;
+        using BaseClass::gt_timer2_;
+        using BaseClass::gt_timer3_;
+        using BaseClass::performTiming_;
+        using BaseClass::gt_exporter_;
+        using BaseClass::debugFolder_;
+
+    protected:
+
+        using BaseClass::target_;
+        using BaseClass::warpped_;
+        using BaseClass::deriv_;
+        using BaseClass::bg_value_;
+        using BaseClass::dissimilarity_;
+        using BaseClass::target;
+        using BaseClass::warped;
+        using BaseClass::deriv;
+        using BaseClass::image_dim_;
+
+        /// these parameter names are kept same as the source code on page 183 - 185 in ref [2]
+        hoNDArray<computing_value_type> cc; computing_value_type* p_cc;
+        hoNDArray<computing_value_type> mu1; computing_value_type* p_mu1;
+        hoNDArray<computing_value_type> mu2; computing_value_type* p_mu2;
+        hoNDArray<computing_value_type> v1; computing_value_type* p_v1;
+        hoNDArray<computing_value_type> v2; computing_value_type* p_v2;
+        hoNDArray<computing_value_type> v12; computing_value_type* p_v12;
+
+        //hoNDArray<computing_value_type> vv1; computing_value_type* p_vv1;
+        //hoNDArray<computing_value_type> vv2; computing_value_type* p_vv2;
+        //hoNDArray<computing_value_type> vv12; computing_value_type* p_vv12;
+
+        hoNDArray<computing_value_type> mem_;
+
+        computing_value_type eps_;
+    };
+
+    template<typename ValueType, unsigned int D> 
+    hoImageRegDissimilarityLocalCCR<ValueType, D>::hoImageRegDissimilarityLocalCCR(computing_value_type betaArg) 
+        : BaseClass(), betaArg_(betaArg)
+    {
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            sigmaArg_[ii] = (computing_value_type)(2.0);
+        }
+    }
+
+    template<typename ValueType, unsigned int D> 
+    hoImageRegDissimilarityLocalCCR<ValueType, D>::hoImageRegDissimilarityLocalCCR(ValueType sigmaArg[D], computing_value_type betaArg) 
+        : BaseClass(), betaArg_(betaArg)
+    {
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            sigmaArg_[ii] = (computing_value_type)(sigmaArg[ii]);
+        }
+    }
+
+    template<typename ValueType, unsigned int D> 
+    hoImageRegDissimilarityLocalCCR<ValueType, D>::~hoImageRegDissimilarityLocalCCR()
+    {
+    }
+
+    template<typename ValueType, unsigned int D> 
+    void hoImageRegDissimilarityLocalCCR<ValueType, D>::initialize(ImageType& t)
+    {
+        BaseClass::initialize(t);
+
+        // allocate arrays for the computation
+        cc.create(image_dim_); p_cc = cc.begin();
+        mu1.create(image_dim_); p_mu1 = mu1.begin();
+        mu2.create(image_dim_); p_mu2 = mu2.begin();
+        v1.create(image_dim_); p_v1 = v1.begin();
+        v2.create(image_dim_); p_v2 = v2.begin();
+        v12.create(image_dim_); p_v12 = v12.begin();
+
+        //vv1.create(image_dim_); p_vv1 = vv1.begin();
+        //vv2.create(image_dim_); p_vv2 = vv2.begin();
+        //vv12.create(image_dim_); p_vv12 = vv12.begin();
+
+        #ifdef WIN32
+            size_t v=0;
+            for ( size_t ii=0; ii<image_dim_.size(); ii++ ) v+=image_dim_[ii];
+            mem_.create(2*v);
+        #endif // WIN32
+
+        eps_ = std::numeric_limits<computing_value_type>::epsilon();
+    }
+
+    template<typename ValueType, unsigned int D> 
+    ValueType hoImageRegDissimilarityLocalCCR<ValueType, D>::evaluate(ImageType& w)
+    {
+        try
+        {
+            /// in the ref [2], the code are:
+            /*
+            Image<float>
+                mu1(I1.domain()), mu2(I1.domain()),
+                v1(I1.domain()), v2(I1.domain()),
+                v12(I1.domain()), f1(I1.domain()),
+                f2(I1.domain()), f3(I1.domain());
+                Map(I1,x) {
+                const real i1 = I1[x];
+                const real i2 = I2[x];
+                mu1[x] = i1; v1[x] = i1 * i1;
+                mu2[x] = i2; v12[x] = i1 * i2;
+                v2[x] = i2 * i2;
+                }
+                mu1.SelfRecSmoothZeroBC(sigma); v1.SelfRecSmoothZeroBC(sigma);
+                mu2.SelfRecSmoothZeroBC(sigma); v2.SelfRecSmoothZeroBC(sigma);
+                v12.SelfRecSmoothZeroBC(sigma);
+
+                criter = 0;
+                Map(v1,x) {
+                const real u1 = mu1[x];
+                const real u2 = mu2[x];
+                const real vv1 = v1[x] + beta - u1 * u1;
+                const real vv2 = v2[x] + beta - u2 * u2;
+                const real vv12 = v12[x] - u1 * u2;
+                const real ff1 = vv12 / (vv1 * vv2);
+                const real CC = vv12 * ff1;
+                const real ff2 = - CC / vv2;
+                const real ff3 =  - (ff2 * u2 + ff1 * u1);
+                f1[x] = ff1; f2[x] = ff2; f3[x] = ff3;
+                cc[x] = -CC;
+                criter += -CC;
+                }
+                f1.SelfRecSmoothZeroBC(sigma);
+                f2.SelfRecSmoothZeroBC(sigma);
+                f3.SelfRecSmoothZeroBC(sigma);
+
+                norm = 0;
+                Map(f1,x) {
+                const float val = 2.0 * ( f1[x] * I1[x] + f2[x] * I2[x] + f3[x] ) ;
+                dist[x] = val;
+                norm += val * val;
+                }
+            */
+
+            /// we rewrite these code for gadgetron
+
+            //GADGET_CHECK_PERFORM(performTiming_, gt_timer1_.start("1"));
+            BaseClass::evaluate(w);
+            //GADGET_CHECK_PERFORM(performTiming_, gt_timer1_.stop());
+
+            long long N = (long long)target.get_number_of_elements();
+
+            //GADGET_CHECK_PERFORM(performTiming_, gt_timer1_.start("2"));
+            //mu1.copyFrom(target);
+            //mu2.copyFrom(warped);
+            //Gadgetron::multiply(mu1, mu1, v1);
+            //Gadgetron::multiply(mu2, mu2, v2);
+            //Gadgetron::multiply(mu1, mu2, v12);
+
+            long long n;
+
+            ValueType* pT = target.begin();
+            ValueType* pW = warped.begin();
+
+            for ( n=0; n<N; ++n )
+            {
+                const computing_value_type v1 = (computing_value_type)pT[n];
+                const computing_value_type v2 = (computing_value_type)pW[n];
+
+                p_mu1[n] = v1;
+                p_mu2[n] = v2;
+                p_v1[n] = v1*v1;
+                p_v2[n] = v2*v2;
+                p_v12[n] = v1*v2;
+            }
+
+                //#ifdef WIN32
+                    Gadgetron::filterGaussian(mu1, sigmaArg_, mem_.begin());
+                    Gadgetron::filterGaussian(mu2, sigmaArg_, mem_.begin());
+                    Gadgetron::filterGaussian(v1, sigmaArg_, mem_.begin());
+                    Gadgetron::filterGaussian(v2, sigmaArg_, mem_.begin());
+                    Gadgetron::filterGaussian(v12, sigmaArg_, mem_.begin());
+                //#else
+                //    Gadgetron::filterGaussian(mu1, sigmaArg_);
+                //    Gadgetron::filterGaussian(mu2, sigmaArg_);
+                //    Gadgetron::filterGaussian(v1, sigmaArg_);
+                //    Gadgetron::filterGaussian(v2, sigmaArg_);
+                //    Gadgetron::filterGaussian(v12, sigmaArg_);
+                //#endif // WIN32
+
+            //if ( 0 )
+            //{
+            //    //#pragma omp parallel sections if ( D==2 )
+            //    {
+            //        //#pragma omp section
+            //        {
+            //            Gadgetron::multiply(mu1, mu1, vv1);
+            //            Gadgetron::subtract(v1, vv1, vv1);
+            //            Gadgetron::addEpsilon(vv1);
+            //        }
+
+            //        //#pragma omp section
+            //        {
+            //            Gadgetron::multiply(mu2, mu2, vv2);
+            //            Gadgetron::subtract(v2, vv2, vv2);
+            //            Gadgetron::addEpsilon(vv2);
+            //        }
+
+            //        //#pragma omp section
+            //        {
+            //            Gadgetron::multiply(mu1, mu2, vv12);
+            //            Gadgetron::subtract(v12, vv12, vv12);
+            //        }
+            //    }
+
+            //    Gadgetron::multiply(vv1, vv2, vv1);
+            //    Gadgetron::divide(vv12, vv1, v1); // ff1
+
+            //    Gadgetron::multiply(vv12, v1, cc); // cc
+
+            //    Gadgetron::divide(cc, vv2, v2); // ff2
+            //    Gadgetron::scal( (computing_value_type)(-1), v2);
+
+            //    Gadgetron::multiply(v2, mu2, v12);
+            //    Gadgetron::multiply(v1, mu1, vv12);
+            //    Gadgetron::add(v12, vv12, v12);
+
+            //    computing_value_type v=0;
+            //    Gadgetron::norm1(cc, v);
+
+            //    dissimilarity_ = static_cast<T>(-v/N);
+            //}
+
+            dissimilarity_ = 0;
+            computing_value_type v=0;
+
+            //#pragma omp parallel for private(n)
+            for ( n=0; n<N; ++n )
+            {
+                const computing_value_type u1 = p_mu1[n];
+                const computing_value_type u2 = p_mu2[n];
+
+                const computing_value_type vv1 = p_v1[n] - u1 * u1;
+                const computing_value_type vv2 = p_v2[n] - u2 * u2;
+                const computing_value_type vv12 = p_v12[n] - u1 * u2;
+
+                const computing_value_type ff1 = vv12 / (vv1 * vv2);
+                const computing_value_type lcc = vv12 * ff1;
+
+                const computing_value_type ff2 = - lcc / vv2;
+                const computing_value_type ff3 = ff2 * u2 + ff1 * u1;
+
+                p_v1[n] = ff1; p_v2[n] = ff2; p_v12[n] = ff3;
+
+                p_cc[n] = lcc;
+            }
+
+            computing_value_type lcc = 0;
+
+            // #pragma omp parallel for reduction(+:lcc)
+            for (n=0; n<N; n++)
+            {
+                lcc += cc[n];
+            }
+
+            dissimilarity_ = -lcc/N;
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegDissimilarityLocalCCR<ValueType, D>::evaluate(w) ... ");
+        }
+
+        return this->dissimilarity_;
+    }
+
+    template<typename ValueType, unsigned int D> 
+    bool hoImageRegDissimilarityLocalCCR<ValueType, D>::evaluateDeriv(ImageType& w)
+    {
+        try
+        {
+            this->evaluate(w);
+
+            size_t N = target.get_number_of_elements();
+
+            long long n;
+
+            //#pragma omp parallel sections if ( D==2 )
+            {
+                //#ifdef WIN32
+                    //#pragma omp section
+                    {
+                        Gadgetron::filterGaussian(v1, sigmaArg_, mem_.begin());
+                    }
+
+                    //#pragma omp section
+                    {
+                        Gadgetron::filterGaussian(v2, sigmaArg_, mem_.begin());
+                    }
+
+                    //#pragma omp section
+                    {
+                        Gadgetron::filterGaussian(v12, sigmaArg_, mem_.begin());
+                    }
+                //#else
+                //    Gadgetron::filterGaussian(v1, sigmaArg_);
+                //    Gadgetron::filterGaussian(v2, sigmaArg_);
+                //    Gadgetron::filterGaussian(v12, sigmaArg_);
+                //#endif // WIN32
+            }
+
+            //if ( !debugFolder_.empty() )
+            //{
+            //    GADGET_EXPORT_ARRAY(debugFolder_, gt_exporter_, v1, "f1_filtered");
+            //    GADGET_EXPORT_ARRAY(debugFolder_, gt_exporter_, v2, "f2_filtered");
+            //    GADGET_EXPORT_ARRAY(debugFolder_, gt_exporter_, v12, "f3_filtered");
+            //}
+
+            // deriv = f1*i1 + f2*i2 + f3, we don't need to multiply this by 2.0
+
+            //if ( typeid(ValueType) == typeid(computing_value_type) )
+            //{
+                //Gadgetron::multiply(v1, target, mu1);
+                //Gadgetron::multiply(v2, warped, mu2);
+                //Gadgetron::add(mu1, mu2, deriv);
+                //Gadgetron::subtract(deriv, v12, deriv);
+            //}
+            //else
+            //{
+                T* pT = target.begin();
+                T* pW = warped.begin();
+
+                // #pragma omp parallel for default(none) shared(N, pT, pW)
+                for ( n=0; n<(long long)N; n++ )
+                {
+                    deriv(n) = static_cast<T>( p_v1[n]* (computing_value_type)pT[n] + ( p_v2[n]*(computing_value_type)pW[n] - p_v12[n] ) );
+                }
+            //}
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegDissimilarityLocalCCR<ValueType, D>::evaluateDeriv(w) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename ValueType, unsigned int D> 
+    void hoImageRegDissimilarityLocalCCR<ValueType, D>::print(std::ostream& os) const
+    {
+        using namespace std;
+        os << "--------------Gagdgetron image dissimilarity LocalCCR measure -------------" << endl;
+        os << "Image dimension is : " << D << endl;
+
+        std::string elemTypeName = std::string(typeid(ValueType).name());
+        os << "Transformation data type is : " << elemTypeName << endl << ends;
+    }
+}
diff --git a/toolboxes/registration/optical_flow/cpu/dissimilarity/hoImageRegDissimilarityMutualInformation.h b/toolboxes/registration/optical_flow/cpu/dissimilarity/hoImageRegDissimilarityMutualInformation.h
new file mode 100644
index 0000000..6dbef54
--- /dev/null
+++ b/toolboxes/registration/optical_flow/cpu/dissimilarity/hoImageRegDissimilarityMutualInformation.h
@@ -0,0 +1,295 @@
+/** \file   hoImageRegDissimilarityMutualInformation.h
+    \brief  Define the class to compute mutual information.
+
+            The analytical derivatives are computed by using the formula proposed at:
+
+            [1] Gerardo Hermosillo, Christophe Chefd'Hotel, Olivier Faugeras. Variational Methods for Multimodal Image Matching. 
+            International Journal of Computer Vision. December 2002, Volume 50, Issue 3, pp 329-343.
+            http://link.springer.com/article/10.1023%2FA%3A1020830525823
+
+            [2] Gerardo Hermosillo. Variational Methods for Multimodal Image Matching. PhD Thesis, UNIVERSIT�E DE NICE - SOPHIA ANTIPOLIS. May 2002.
+            http://webdocs.cs.ualberta.ca/~dana/readingMedIm/papers/hermosilloPhD.pdf
+
+            This derivative computation code is based on the listed source code at page 172 - 174 in ref [2].
+
+    \author Hui Xue
+*/
+
+#pragma once
+
+#include "hoImageRegDissimilarityHistogramBased.h"
+
+namespace Gadgetron
+{
+    template<typename ValueType, unsigned int D> 
+    class hoImageRegDissimilarityMutualInformation : public hoImageRegDissimilarityHistogramBased<ValueType, D>
+    {
+    public:
+
+        typedef hoImageRegDissimilarityMutualInformation<ValueType, D> Self;
+        typedef hoImageRegDissimilarityHistogramBased<ValueType, D> BaseClass;
+
+        typedef typename BaseClass::ImageType ImageType;
+        typedef typename BaseClass::InterpolatorType InterpolatorType;
+
+        typedef ValueType T;
+        typedef ValueType element_type;
+        typedef ValueType value_type;
+
+        typedef typename BaseClass::coord_type coord_type;
+
+        typedef typename BaseClass::hist_value_type hist_value_type;
+
+        hoImageRegDissimilarityMutualInformation(ValueType betaArg=ValueType(2.0), unsigned int num_bin_target=64, unsigned int num_bin_warpped=64, ValueType bg_value=ValueType(0));
+        virtual ~hoImageRegDissimilarityMutualInformation();
+
+        virtual ValueType evaluate(ImageType& w);
+        virtual bool evaluateDeriv(ImageType& w);
+
+        virtual void print(std::ostream& os) const;
+
+        /// kernel size for density estimation
+        ValueType betaArg_[2];
+
+        using BaseClass::num_bin_target_;
+        using BaseClass::num_bin_warpped_;
+        using BaseClass::pv_interpolation_;
+        using BaseClass::step_size_ignore_pixel_;
+        using BaseClass::gt_timer1_;
+        using BaseClass::gt_timer2_;
+        using BaseClass::gt_timer3_;
+        using BaseClass::performTiming_;
+        using BaseClass::gt_exporter_;
+        using BaseClass::debugFolder_;
+
+    protected:
+
+        using BaseClass::target_;
+        using BaseClass::warpped_;
+        using BaseClass::deriv_;
+        using BaseClass::bg_value_;
+        using BaseClass::dissimilarity_;
+        using BaseClass::target;
+        using BaseClass::warped;
+        using BaseClass::deriv;
+        using BaseClass::image_dim_;
+        using BaseClass::hist_;
+        using BaseClass::min_target_;
+        using BaseClass::max_target_;
+        using BaseClass::min_warpped_;
+        using BaseClass::max_warpped_;
+        using BaseClass::num_samples_in_hist_;
+
+        hoNDArray<hist_value_type> hist_target_;
+        hoNDArray<hist_value_type> hist_warpped_;
+
+        /// these variable names are kept same as the ref [2].
+        ho2DArray<hist_value_type> Hy;
+        hoNDArray<hist_value_type> hy;
+
+        ho2DArray<hist_value_type> P;
+        hoNDArray<hist_value_type> p;
+
+        ho2DArray<hist_value_type> Dist;
+    };
+
+    template<typename ValueType, unsigned int D> 
+    hoImageRegDissimilarityMutualInformation<ValueType, D>::
+    hoImageRegDissimilarityMutualInformation(ValueType betaArg, unsigned int num_bin_target, unsigned int num_bin_warpped, ValueType bg_value) 
+        : BaseClass(num_bin_target, num_bin_warpped, bg_value) 
+    {
+        betaArg_[0] = betaArg;
+        betaArg_[1] = betaArg;
+    }
+
+    template<typename ValueType, unsigned int D> 
+    hoImageRegDissimilarityMutualInformation<ValueType, D>::~hoImageRegDissimilarityMutualInformation()
+    {
+    }
+
+    template<typename ValueType, unsigned int D> 
+    ValueType hoImageRegDissimilarityMutualInformation<ValueType, D>::evaluate(ImageType& w)
+    {
+        try
+        {
+            BaseClass::evaluate(w);
+
+            /// compute entorpy
+            hist_target_.create(num_bin_target_);
+            Gadgetron::clear(hist_target_);
+
+            hist_warpped_.create(num_bin_warpped_);
+            Gadgetron::clear(hist_warpped_);
+
+            if ( betaArg_[0] > 0 )
+            {
+                Gadgetron::filterGaussian(hist_, betaArg_);
+
+                GADGET_CHECK_PERFORM(!debugFolder_.empty(), GADGET_EXPORT_ARRAY(debugFolder_, gt_exporter_, hist_, "hist2D_filtered"));
+            }
+
+            hist_value_type histSum=0;
+            Gadgetron::norm1(hist_, histSum);
+            Gadgetron::scal( hist_value_type(1.0/histSum), hist_);
+
+            hist_.sumOverRow(hist_target_);
+            GADGET_CHECK_PERFORM(!debugFolder_.empty(), GADGET_EXPORT_ARRAY(debugFolder_, gt_exporter_, hist_target_, "hist2D_target"));
+
+            hist_.sumOverCol(hist_warpped_);
+            GADGET_CHECK_PERFORM(!debugFolder_.empty(), GADGET_EXPORT_ARRAY(debugFolder_, gt_exporter_, hist_warpped_, "hist2D_warpped"));
+
+            dissimilarity_ = 0;
+
+            size_t t, w;
+
+            hist_value_type log2R = hist_value_type(1.0)/log( hist_value_type(2.0) );
+
+            for ( w=0; w<num_bin_warpped_; w++ )
+            {
+                hist_value_type prob_w = hist_warpped_(w);
+
+                for ( t=0; t<num_bin_target_; t++ )
+                {
+                    hist_value_type prob = hist_(t, w);
+                    if ( prob > 0 )
+                    {
+                        hist_value_type prob_t = hist_target_(t);
+
+                        dissimilarity_ -= prob * log( prob / (prob_t * prob_w) ) * log2R;
+                    }
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegDissimilarityMutualInformation<ValueType, D>::evaluate(ImageType& t, ImageType& w) ... ");
+        }
+
+        return this->dissimilarity_;
+    }
+
+    template<typename ValueType, unsigned int D> 
+    bool hoImageRegDissimilarityMutualInformation<ValueType, D>::evaluateDeriv(ImageType& w)
+    {
+        try
+        {
+            this->evaluate(w);
+
+            Hy.createArray(num_bin_target_, num_bin_warpped_);
+            hy.create(num_bin_warpped_);
+
+            P.createArray(num_bin_target_, num_bin_warpped_);
+            p.create(num_bin_warpped_);
+
+            Dist.createArray(num_bin_target_, num_bin_warpped_);
+
+            // hoNDBoundaryHandlerFixedValue< hoMatrix<hist_value_type> > bh_hist(hist_, 0);
+
+            size_t t, w;
+
+            for ( t=0; t<num_bin_target_; t++ )
+            {
+                Hy(t, 0) = ( hist_(t, 1) - 0 );
+                Hy(t, num_bin_warpped_-1) = ( 0 - hist_(t, num_bin_warpped_-2) );
+
+                for ( w=1; w<num_bin_warpped_-1; w++ )
+                {
+                    Hy(t, w) = ( hist_(t, w+1) - hist_(t, w-1) );
+                }
+            }
+
+            Gadgetron::scal( (hist_value_type)(0.5), Hy);
+
+            hoNDBoundaryHandlerFixedValue< hoNDArray<hist_value_type> > bh_hist_warpped(hist_warpped_, 0);
+            for ( w=0; w<num_bin_warpped_; w++ )
+            {
+                hy(w) = (hist_value_type)(0.5) * ( bh_hist_warpped(w+1) - bh_hist_warpped(w-1) );
+            }
+
+            P = Hy;
+            p = hy;
+
+            for ( t=0; t<num_bin_target_; t++ )
+            {
+                for ( w=0; w<num_bin_warpped_; w++ )
+                {
+                    hist_value_type v = hist_(t, w);
+
+                    if ( v > 0 )
+                    {
+                        P(t, w) = Hy(t, w)/v;
+                    }
+                }
+            }
+
+            for ( w=0; w<num_bin_warpped_; w++ )
+            {
+                hist_value_type v = hist_warpped_(w);
+
+                if ( v > 0 )
+                {
+                    p(w) = hy(w)/v;
+                }
+            }
+
+            for ( t=0; t<num_bin_target_; t++ )
+            {
+                for ( w=0; w<num_bin_warpped_; w++ )
+                {
+                    Dist(t, w) = P(t, w) - p(w);
+                }
+            }
+
+            if ( betaArg_[0] > 0 )
+            {
+                Gadgetron::filterGaussian(Dist, betaArg_);
+                GADGET_CHECK_PERFORM(!debugFolder_.empty(), GADGET_EXPORT_ARRAY(debugFolder_, gt_exporter_, Dist, "MI_Dist"));
+            }
+
+            hoNDBoundaryHandlerFixedValue< ho2DArray<hist_value_type> > bh_Dist(Dist, 0);
+            hoNDInterpolatorLinear< ho2DArray<hist_value_type> > interp_Dist(Dist, bh_Dist);
+
+            size_t N = target_->get_number_of_elements();
+
+            ValueType range_t = ValueType(1.0)/(max_target_ - min_target_ + std::numeric_limits<ValueType>::epsilon());
+            ValueType range_w = ValueType(1.0)/(max_warpped_ - min_warpped_ + std::numeric_limits<ValueType>::epsilon());
+
+            long long n;
+
+            ValueType v = (ValueType)(1.0/N);
+            for ( n=0; n<(long long)N; n++ )
+            {
+                coord_type it = (coord_type)(range_t*(target(n)-min_target_)*(num_bin_target_-1));
+                coord_type iw = (coord_type)(range_w*(warped(n)-min_warpped_)*(num_bin_warpped_-1));
+
+                deriv_(n) = ValueType( interp_Dist(it, iw) ) * v;
+            }
+
+            // Gadgetron::math::scal(deriv_.get_number_of_elements(), ValueType(1.0/N), deriv_.begin());
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegDissimilarityMutualInformation<ValueType, D>::evaluate() ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename ValueType, unsigned int D> 
+    void hoImageRegDissimilarityMutualInformation<ValueType, D>::print(std::ostream& os) const
+    {
+        using namespace std;
+        os << "--------------Gagdgetron mutual information image dissimilarity meausre -------------" << endl;
+        os << "Image dimension is : " << D << endl;
+
+        std::string elemTypeName = std::string(typeid(ValueType).name());
+        os << "Transformation data type is : " << elemTypeName << std::endl;
+
+        os << "Number of intensity bins for target is : " << num_bin_target_ << endl;
+        os << "Number of intensity bins for warped is : " << num_bin_warpped_ << endl;
+        os << "PV interpolation for histogram is : " << pv_interpolation_ << endl;
+        os << "Step size to ignore pixels when creating histogram is : " << step_size_ignore_pixel_ << endl;
+        os << "Kernel size for probability density estimation is : " << betaArg_[0] << " x " << betaArg_[1] << endl;
+    }
+}
diff --git a/toolboxes/registration/optical_flow/cpu/dissimilarity/hoImageRegDissimilarityNormalizedMutualInformation.h b/toolboxes/registration/optical_flow/cpu/dissimilarity/hoImageRegDissimilarityNormalizedMutualInformation.h
new file mode 100644
index 0000000..e9f178f
--- /dev/null
+++ b/toolboxes/registration/optical_flow/cpu/dissimilarity/hoImageRegDissimilarityNormalizedMutualInformation.h
@@ -0,0 +1,173 @@
+/** \file   hoImageRegDissimilarityNormalizedMutualInformation.h
+    \brief  Define the class to compute normalized mutual information.
+
+            C. Studholme, D.L.G. Hill, D.J. Hawkes. An overlap invariant entropy measure of 3D medical image alignment. Pattern Recognition, 32, 71-86, 1999.
+            http://eecs.vanderbilt.edu/courses/cs359/other_links/papers/studholme_NMI_1999.pdf
+
+    \author Hui Xue
+*/
+
+#pragma once
+
+#include "hoImageRegDissimilarityHistogramBased.h"
+
+namespace Gadgetron
+{
+    template<typename ValueType, unsigned int D> 
+    class hoImageRegDissimilarityNormalizedMutualInformation : public hoImageRegDissimilarityHistogramBased<ValueType, D>
+    {
+    public:
+
+        typedef hoImageRegDissimilarityNormalizedMutualInformation<ValueType, D> Self;
+        typedef hoImageRegDissimilarityHistogramBased<ValueType, D> BaseClass;
+
+        typedef typename BaseClass::ImageType ImageType;
+        typedef typename BaseClass::InterpolatorType InterpolatorType;
+
+        typedef ValueType T;
+        typedef ValueType element_type;
+        typedef ValueType value_type;
+
+        typedef typename BaseClass::coord_type coord_type;
+
+        typedef typename BaseClass::hist_value_type hist_value_type;
+
+        hoImageRegDissimilarityNormalizedMutualInformation(unsigned int num_bin_target=64, unsigned int num_bin_warpped=64, ValueType bg_value=ValueType(0));
+        virtual ~hoImageRegDissimilarityNormalizedMutualInformation();
+
+        virtual ValueType evaluate(ImageType& w);
+
+        virtual bool evaluateDeriv(ImageType& /*w*/) { return true; }
+
+        virtual void print(std::ostream& os) const;
+
+        using BaseClass::num_bin_target_;
+        using BaseClass::num_bin_warpped_;
+        using BaseClass::pv_interpolation_;
+        using BaseClass::step_size_ignore_pixel_;
+        using BaseClass::gt_timer1_;
+        using BaseClass::gt_timer2_;
+        using BaseClass::gt_timer3_;
+        using BaseClass::performTiming_;
+        using BaseClass::gt_exporter_;
+        using BaseClass::debugFolder_;
+
+    protected:
+
+        using BaseClass::target_;
+        using BaseClass::warpped_;
+        using BaseClass::deriv_;
+        using BaseClass::image_dim_;
+        using BaseClass::bg_value_;
+        using BaseClass::dissimilarity_;
+        using BaseClass::target;
+        using BaseClass::warped;
+        using BaseClass::deriv;
+        using BaseClass::hist_;
+        using BaseClass::min_target_;
+        using BaseClass::max_target_;
+        using BaseClass::min_warpped_;
+        using BaseClass::max_warpped_;
+        using BaseClass::num_samples_in_hist_;
+
+        hoNDArray<hist_value_type> hist_target_;
+        hoNDArray<hist_value_type> hist_warpped_;
+    };
+
+    template<typename ValueType, unsigned int D> 
+    hoImageRegDissimilarityNormalizedMutualInformation<ValueType, D>::
+    hoImageRegDissimilarityNormalizedMutualInformation(unsigned int num_bin_target, unsigned int num_bin_warpped, ValueType bg_value) 
+        : BaseClass(num_bin_target, num_bin_warpped, bg_value)
+    {
+    }
+
+    template<typename ValueType, unsigned int D> 
+    hoImageRegDissimilarityNormalizedMutualInformation<ValueType, D>::~hoImageRegDissimilarityNormalizedMutualInformation()
+    {
+    }
+
+    template<typename ValueType, unsigned int D> 
+    ValueType hoImageRegDissimilarityNormalizedMutualInformation<ValueType, D>::evaluate(ImageType& w)
+    {
+        try
+        {
+            BaseClass::evaluate(w);
+
+            // convert to probabilities
+            if ( num_samples_in_hist_ > 0 )
+            {
+                Gadgetron::scal((hist_value_type)(1.0/num_samples_in_hist_), hist_);
+            }
+
+            /// compute entorpy
+            hist_target_.create(num_bin_target_);
+            Gadgetron::clear(hist_target_);
+
+            hist_warpped_.create(num_bin_warpped_);
+            Gadgetron::clear(hist_warpped_);
+
+            hist_.sumOverRow(hist_target_);
+            hist_.sumOverCol(hist_warpped_);
+
+            hist_value_type entropy_t(0), entropy_w(0), joint_entropy(0);
+
+            size_t t, w;
+
+            hist_value_type log2 = hist_value_type(1.0)/log( hist_value_type(2.0) );
+
+            for ( t=0; t<(size_t)num_bin_target_; t++ )
+            {
+                hist_value_type prob = hist_target_(t);
+                if ( prob > 0 )
+                {
+                    entropy_t -= prob * log(prob) * log2;
+                }
+            }
+
+            for ( w=0; w<num_bin_warpped_; w++ )
+            {
+                hist_value_type prob = hist_warpped_(w);
+                if ( prob > 0 )
+                {
+                    entropy_w -= prob * log(prob) * log2;
+                }
+            }
+
+            for ( w=0; w<num_bin_warpped_; w++ )
+            {
+                for ( t=0; t<num_bin_target_; t++ )
+                {
+                    hist_value_type prob = hist_(t, w);
+                    if ( prob > 0 )
+                    {
+                        joint_entropy -= prob * log(prob) * log2;
+                    }
+                }
+            }
+
+            dissimilarity_ = - (entropy_t + entropy_w) / joint_entropy;
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegDissimilarityNormalizedMutualInformation<ValueType, D>::evaluate(ImageType& t, ImageType& w) ... ");
+        }
+
+        return this->dissimilarity_;
+    }
+
+    template<typename ValueType, unsigned int D> 
+    void hoImageRegDissimilarityNormalizedMutualInformation<ValueType, D>::print(std::ostream& os) const
+    {
+        using namespace std;
+        os << "--------------Gagdgetron image dissimilarity with histogram -------------" << endl;
+        os << "Image dimension is : " << D << endl;
+
+        std::string elemTypeName = std::string(typeid(ValueType).name());
+        os << "Transformation data type is : " << elemTypeName << std::endl;
+
+        os << "Number of intensity bins for target is : " << num_bin_target_ << endl;
+        os << "Number of intensity bins for warped is : " << num_bin_warpped_ << endl;
+        os << "PV interpolation for histogram is : " << pv_interpolation_ << endl;
+        os << "Step size to ignore pixels when creating histogram is : " << step_size_ignore_pixel_ << endl;
+    }
+}
diff --git a/toolboxes/registration/optical_flow/cpu/dissimilarity/hoImageRegDissimilaritySSD.h b/toolboxes/registration/optical_flow/cpu/dissimilarity/hoImageRegDissimilaritySSD.h
new file mode 100644
index 0000000..7b2e7cd
--- /dev/null
+++ b/toolboxes/registration/optical_flow/cpu/dissimilarity/hoImageRegDissimilaritySSD.h
@@ -0,0 +1,108 @@
+/** \file   hoImageRegDissimilaritySSD.h
+    \brief  Define the class to compute image sum-of-square difference (SSD ) in gadgetron registration
+
+            The analytical derivatives are computed by using the formula proposed at:
+
+            [1] Gerardo Hermosillo, Christophe Chefd'Hotel, Olivier Faugeras. Variational Methods for Multimodal Image Matching. 
+            International Journal of Computer Vision. December 2002, Volume 50, Issue 3, pp 329-343.
+            http://link.springer.com/article/10.1023%2FA%3A1020830525823
+
+            [2] Gerardo Hermosillo. Variational Methods for Multimodal Image Matching. PhD Thesis, UNIVERSIT�E DE NICE - SOPHIA ANTIPOLIS. May 2002.
+            http://webdocs.cs.ualberta.ca/~dana/readingMedIm/papers/hermosilloPhD.pdf
+
+            The derivative computation code is modified from the listed source code at page 179 - 185 in ref [2].
+
+    \author Hui Xue
+*/
+
+#pragma once
+
+#include "hoImageRegDissimilarity.h"
+
+namespace Gadgetron
+{
+    template<typename ValueType, unsigned int D> 
+    class hoImageRegDissimilaritySSD : public hoImageRegDissimilarity<ValueType, D>
+    {
+    public:
+
+        typedef hoImageRegDissimilaritySSD<ValueType, D> Self;
+        typedef hoImageRegDissimilarity<ValueType, D> BaseClass;
+
+        typedef typename BaseClass::ImageType ImageType;
+        typedef typename BaseClass::InterpolatorType InterpolatorType;
+
+        typedef ValueType T;
+        typedef ValueType element_type;
+        typedef ValueType value_type;
+
+        typedef typename BaseClass::coord_type coord_type;
+
+        hoImageRegDissimilaritySSD();
+        virtual ~hoImageRegDissimilaritySSD();
+
+        virtual ValueType evaluate(ImageType& w);
+        virtual bool evaluateDeriv(ImageType& w) { this->evaluate(w); return true; }
+
+        virtual void print(std::ostream& os) const;
+
+        using BaseClass::gt_timer1_;
+        using BaseClass::gt_timer2_;
+        using BaseClass::gt_timer3_;
+        using BaseClass::performTiming_;
+        using BaseClass::gt_exporter_;
+        using BaseClass::debugFolder_;
+
+    protected:
+
+        using BaseClass::target_;
+        using BaseClass::warpped_;
+        using BaseClass::deriv_;
+        using BaseClass::bg_value_;
+        using BaseClass::dissimilarity_;
+        using BaseClass::target;
+        using BaseClass::warped;
+        using BaseClass::deriv;
+    };
+
+    template<typename ValueType, unsigned int D> 
+    hoImageRegDissimilaritySSD<ValueType, D>::hoImageRegDissimilaritySSD() : BaseClass()
+    {
+    }
+
+    template<typename ValueType, unsigned int D> 
+    hoImageRegDissimilaritySSD<ValueType, D>::~hoImageRegDissimilaritySSD()
+    {
+    }
+
+    template<typename ValueType, unsigned int D> 
+    ValueType hoImageRegDissimilaritySSD<ValueType, D>::evaluate(ImageType& w)
+    {
+        try
+        {
+            BaseClass::evaluate(w);
+
+            Gadgetron::subtract(target, warped, deriv);
+            Gadgetron::norm2(deriv, dissimilarity_);
+
+            dissimilarity_ = (dissimilarity_*dissimilarity_) / (ValueType)(target.get_number_of_elements());
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegDissimilaritySSD<ValueType, D>::evaluate(w) ... ");
+        }
+
+        return this->dissimilarity_;
+    }
+
+    template<typename ValueType, unsigned int D> 
+    void hoImageRegDissimilaritySSD<ValueType, D>::print(std::ostream& os) const
+    {
+        using namespace std;
+        os << "--------------Gagdgetron image dissimilarity SSD measure -------------" << endl;
+        os << "Image dimension is : " << D << endl;
+
+        std::string elemTypeName = std::string(typeid(ValueType).name());
+        os << "Transformation data type is : " << elemTypeName << std::endl;
+    }
+}
diff --git a/toolboxes/registration/optical_flow/cpu/hoOpticalFlowSolver.h b/toolboxes/registration/optical_flow/cpu/hoOpticalFlowSolver.h
index 924bb7d..9bc6dd6 100644
--- a/toolboxes/registration/optical_flow/cpu/hoOpticalFlowSolver.h
+++ b/toolboxes/registration/optical_flow/cpu/hoOpticalFlowSolver.h
@@ -9,11 +9,9 @@
 #pragma once
 
 #include "hoNDArray.h"
-#include "hoNDArray_operators.h"
 #include "hoNDArray_elemwise.h"
 #include "hoNDArray_utils.h"
-#include "hoNDArray_blas.h"
-#include "hoRegistration_utils.h"
+#include "hoNDArray_math.h"
 #include "opticalFlowSolver.h"
 #include "cpureg_export.h"
 
diff --git a/toolboxes/registration/optical_flow/cpu/hoRegistration_utils.cpp b/toolboxes/registration/optical_flow/cpu/hoRegistration_utils.cpp
deleted file mode 100644
index bf6b50e..0000000
--- a/toolboxes/registration/optical_flow/cpu/hoRegistration_utils.cpp
+++ /dev/null
@@ -1,233 +0,0 @@
-#include "hoRegistration_utils.h"
-#include "vector_td_utilities.h"
-
-#ifdef USE_OMP
-#include <omp.h>
-#endif 
-
-namespace Gadgetron{
-
-  // Utility to check if all neighbors required for the linear interpolation exists
-  // ... do not include dimensions of size 1
-
-  template<class REAL, unsigned int D> inline bool
-  is_border_pixel( vector_td<size_t,D> co, vector_td<size_t,D> dims )
-  {
-    for( size_t dim=0; dim<D; dim++ ){
-      if( dims[dim] > 1 && ( co[dim] == 0 || co[dim] == (dims[dim]-1) ) )
-	return true;
-    }
-    return false;
-  }
-
-  // Downsample
-  template<class REAL, unsigned int D> 
-  boost::shared_ptr< hoNDArray<REAL> > downsample( hoNDArray<REAL> *_in )
-  {
-    // A few sanity checks 
-
-    if( _in == 0x0 ){
-      throw std::runtime_error( "downsample(): illegal input provided.");
-    }
-    
-    if( _in->get_number_of_dimensions() < D ){
-      throw std::runtime_error( "downsample(): the number of array dimensions should be at least D");
-    }
-    
-    for( size_t d=0; d<D; d++ ){
-      if( (_in->get_size(d)%2) == 1 && _in->get_size(d) != 1 ){
-	throw std::runtime_error( "downsample(): uneven array dimensions larger than one not accepted");
-      }
-    }
-    
-    typename uint64d<D>::Type matrix_size_in = from_std_vector<size_t,D>( *_in->get_dimensions() );
-    typename uint64d<D>::Type matrix_size_out = matrix_size_in >> 1;
-
-    for( size_t d=0; d<D; d++ ){
-      if( matrix_size_out[d] == 0 ) 
-	matrix_size_out[d] = 1;
-    }
-  
-    size_t num_elements = prod(matrix_size_out);
-    size_t num_batches = 1;
-
-    for( size_t d=D; d<_in->get_number_of_dimensions(); d++ ){
-      num_batches *= _in->get_size(d);
-    }
-  
-    std::vector<size_t> dims = to_std_vector(matrix_size_out);
-    for( size_t d=D; d<_in->get_number_of_dimensions(); d++ ){
-      dims.push_back(_in->get_size(d));
-    }
-  
-    REAL *in = _in->get_data_ptr();
-
-    boost::shared_ptr< hoNDArray<REAL> > _out( new hoNDArray<REAL>(&dims) );
-    REAL *out = _out->get_data_ptr();
-    
-    typedef vector_td<size_t,D> uint64d;
-
-#ifdef USE_OMP
-#pragma omp parallel for
-#endif
-    for( long long idx=0; idx < num_elements*num_batches; idx++ ){
-
-      const size_t frame_offset = idx/num_elements;
-      const uint64d co_out = idx_to_co<D>( idx-frame_offset*num_elements, matrix_size_out );
-      const uint64d co_in = co_out << 1;
-      const uint64d twos(2);
-      const size_t num_adds = 1 << D;
-
-      size_t actual_adds = 0;
-      REAL res = REAL(0);
-
-      for( size_t i=0; i<num_adds; i++ ){
-	const uint64d local_co = idx_to_co<D>( i, twos );
-	if( weak_greater_equal( local_co, matrix_size_out ) ) continue; // To allow array dimensions of size 1
-	const size_t in_idx = co_to_idx<D>(co_in+local_co, matrix_size_in)+frame_offset*prod(matrix_size_in);
-	actual_adds++;
-	res += in[in_idx];
-      }    
-      out[idx] = res/REAL(actual_adds);
-    }
-
-    return _out;
-  }
-
-  // Linear interpolation upsampling
-  template<class REAL, unsigned int D> boost::shared_ptr< hoNDArray<REAL> >
-  upsample( hoNDArray<REAL> *_in )
-  {
-    // A few sanity checks 
-
-    if( _in == 0x0 ){
-      throw std::runtime_error("upsample(): illegal input provided.");
-    }
-
-    if( _in->get_number_of_dimensions() < D ){
-      throw std::runtime_error( "upsample(): the number of array dimensions should be at least D");
-    }
-    
-    typename uint64d<D>::Type matrix_size_in = from_std_vector<size_t,D>( *_in->get_dimensions() );
-    typename uint64d<D>::Type matrix_size_out = matrix_size_in << 1;
-
-    for( size_t d=0; d<D; d++ ){
-      if( matrix_size_in[d] == 1 )
-	matrix_size_out[d] = 1;
-    }
-  
-    size_t num_elements = prod(matrix_size_out);
-    size_t num_batches = 1;
-
-    for( size_t d=D; d<_in->get_number_of_dimensions(); d++ ){
-      num_batches *= _in->get_size(d);
-    }
-  
-    std::vector<size_t> dims = to_std_vector(matrix_size_out);
-    for( size_t d=D; d<_in->get_number_of_dimensions(); d++ ){
-      dims.push_back(_in->get_size(d));
-    }
-
-    REAL *in = _in->get_data_ptr();
-
-    boost::shared_ptr< hoNDArray<REAL> > _out( new hoNDArray<REAL>(&dims) );
-    REAL *out = _out->get_data_ptr();
-    
-    typedef vector_td<size_t,D> uint64d;
-
-#ifdef USE_OMP
-#pragma omp parallel for
-#endif
-    for( long long idx=0; idx < num_elements*num_batches; idx++ ){
-      
-      REAL res = REAL(0);
-
-      const size_t num_neighbors = 1 << D;
-      const size_t frame_idx = idx/num_elements;
-      const uint64d co_out = idx_to_co<D>( idx-frame_idx*num_elements, matrix_size_out );
-
-      // We will only proceed if all neighbours exist (this adds a zero-boundary to the upsampled image/vector field)
-      //
-    
-      if( !is_border_pixel<REAL,D>(co_out, matrix_size_out) ){
-      
-	for( size_t i=0; i<num_neighbors; i++ ){
-	
-	  // Determine coordinate of neighbor in input
-	  //
-
-	  const uint64d twos(2);
-	  const uint64d stride = idx_to_co<D>( i, twos );
-
-	  if( weak_greater_equal( stride, matrix_size_out ) ) continue; // To allow array dimensions of 1
-
-	  // Be careful about dimensions of size 1
-	  uint64d ones(1);
-	  for( size_t d=0; d<D; d++ ){
-	    if( matrix_size_out[d] == 1 )
-	      ones[d] = 0;
-	  }
-	  uint64d co_in = ((co_out-ones)>>1)+stride;
-	
-	  // Read corresponding pixel value
-	  //
-	
-	  const size_t in_idx = co_to_idx<D>(co_in, matrix_size_in)+frame_idx*prod(matrix_size_in);
-	  REAL value = in[in_idx];
-	
-	  // Determine weight
-	  //
-	
-	  REAL weight = REAL(1);
-	
-	  for( size_t dim=0; dim<D; dim++ ){	  
-	    if( matrix_size_in[dim] > 1 ){
-	      if( stride.vec[dim] == (co_out.vec[dim]%2) ) {
-		weight *= REAL(0.25);
-	      }
-	      else{
-		weight *= REAL(0.75);
-	      }
-	    }
-	  }
-	
-	  // Accumulate result
-	  //
-	
-	  res += weight*value;
-	}
-      }
-      out[idx] = res;
-    }
-    
-    return _out;
-  }
-
-  //
-  // Instantiation
-  //
-  
-  template EXPORTCPUREG boost::shared_ptr< hoNDArray<float> > downsample<float,1>(hoNDArray<float>*);
-  template EXPORTCPUREG boost::shared_ptr< hoNDArray<float> > upsample<float,1>(hoNDArray<float>*);
-
-  template EXPORTCPUREG boost::shared_ptr< hoNDArray<float> > downsample<float,2>(hoNDArray<float>*);
-  template EXPORTCPUREG boost::shared_ptr< hoNDArray<float> > upsample<float,2>(hoNDArray<float>*);
-
-  template EXPORTCPUREG boost::shared_ptr< hoNDArray<float> > downsample<float,3>(hoNDArray<float>*);
-  template EXPORTCPUREG boost::shared_ptr< hoNDArray<float> > upsample<float,3>(hoNDArray<float>*);
-
-  template EXPORTCPUREG boost::shared_ptr< hoNDArray<float> > downsample<float,4>(hoNDArray<float>*);
-  template EXPORTCPUREG boost::shared_ptr< hoNDArray<float> > upsample<float,4>(hoNDArray<float>*);
-
-  template EXPORTCPUREG boost::shared_ptr< hoNDArray<double> > downsample<double,1>(hoNDArray<double>*);
-  template EXPORTCPUREG boost::shared_ptr< hoNDArray<double> > upsample<double,1>(hoNDArray<double>*);
-
-  template EXPORTCPUREG boost::shared_ptr< hoNDArray<double> > downsample<double,2>(hoNDArray<double>*);
-  template EXPORTCPUREG boost::shared_ptr< hoNDArray<double> > upsample<double,2>(hoNDArray<double>*);
-
-  template EXPORTCPUREG boost::shared_ptr< hoNDArray<double> > downsample<double,3>(hoNDArray<double>*);
-  template EXPORTCPUREG boost::shared_ptr< hoNDArray<double> > upsample<double,3>(hoNDArray<double>*);
-
-  template EXPORTCPUREG boost::shared_ptr< hoNDArray<double> > downsample<double,4>(hoNDArray<double>*);
-  template EXPORTCPUREG boost::shared_ptr< hoNDArray<double> > upsample<double,4>(hoNDArray<double>*);
-}
diff --git a/toolboxes/registration/optical_flow/cpu/hoRegistration_utils.h b/toolboxes/registration/optical_flow/cpu/hoRegistration_utils.h
deleted file mode 100644
index a9afafb..0000000
--- a/toolboxes/registration/optical_flow/cpu/hoRegistration_utils.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#pragma once
-
-#include "hoNDArray.h"
-#include "cpureg_export.h"
-
-namespace Gadgetron{
-  
-  // Downsample array to half size by averaging
-  template<class REAL, unsigned int D> EXPORTCPUREG boost::shared_ptr< hoNDArray<REAL> > downsample( hoNDArray<REAL> *data );
-  
-  // Linear interpolation upsampling to array of doubled dimensions
-  template<class REAL, unsigned int D> EXPORTCPUREG boost::shared_ptr< hoNDArray<REAL> > upsample( hoNDArray<REAL> *data );
-}
diff --git a/toolboxes/registration/optical_flow/cpu/register/hoImageRegDeformationFieldBidirectionalRegister.h b/toolboxes/registration/optical_flow/cpu/register/hoImageRegDeformationFieldBidirectionalRegister.h
new file mode 100644
index 0000000..9a8669a
--- /dev/null
+++ b/toolboxes/registration/optical_flow/cpu/register/hoImageRegDeformationFieldBidirectionalRegister.h
@@ -0,0 +1,501 @@
+/** \file   hoImageRegDeformationFieldBidirectionalRegister.h
+    \brief  Define the class to perform non-rigid image registration to estimate bi-directional variational deformation field
+    \author Hui Xue
+*/
+
+#pragma once
+
+#include "hoImageRegDeformationFieldRegister.h"
+
+namespace Gadgetron
+{
+    template<typename ValueType, typename CoordType, unsigned int D> 
+    class hoImageRegDeformationFieldBidirectionalRegister : public hoImageRegDeformationFieldRegister<ValueType, CoordType, D>
+    {
+    public:
+
+        typedef hoImageRegDeformationFieldBidirectionalRegister<ValueType, CoordType, D> Self;
+        typedef hoImageRegDeformationFieldRegister<ValueType, CoordType, D> BaseClass;
+        typedef hoImageRegNonParametricRegister<ValueType, CoordType, D, D> NonParametricRegisterClass;
+
+        typedef typename BaseClass::TargetType TargetType;
+        typedef typename BaseClass::SourceType SourceType;
+
+        typedef typename BaseClass::Target2DType Target2DType;
+        typedef typename BaseClass::Source2DType Source2DType;
+
+        typedef typename BaseClass::Target3DType Target3DType;
+        typedef typename BaseClass::Source3DType Source3DType;
+
+        typedef ValueType T;
+        typedef ValueType element_type;
+        typedef ValueType value_type;
+
+        /// boundary handler and interpolator for target image
+        typedef typename BaseClass::BoundaryHandlerTargetType BoundaryHandlerTargetType;
+        typedef typename BaseClass::BoundaryHandlerTargetFixedValueType BoundaryHandlerTargetFixedValueType;
+        typedef typename BaseClass::BoundaryHandlerTargetBorderValueType BoundaryHandlerTargetBorderValueType;
+        typedef typename BaseClass::BoundaryHandlerTargetPeriodicType BoundaryHandlerTargetPeriodicType;
+        typedef typename BaseClass::BoundaryHandlerTargetMirrorType BoundaryHandlerTargetMirrorType;
+
+        typedef typename BaseClass::InterpTargetType InterpTargetType;
+        typedef typename BaseClass::InterpTargetLinearType InterpTargetLinearType;
+        typedef typename BaseClass::InterpTargetNearestNeighborType InterpTargetNearestNeighborType;
+        typedef typename BaseClass::InterpTargetBSplineType InterpTargetBSplineType;
+
+        /// boundary handler and interpolator for source image
+        typedef typename BaseClass::BoundaryHandlerSourceType BoundaryHandlerSourceType;
+        typedef typename BaseClass::BoundaryHandlerSourceFixedValueType BoundaryHandlerSourceFixedValueType;
+        typedef typename BaseClass::BoundaryHandlerSourceBorderValueType BoundaryHandlerSourceBorderValueType;
+        typedef typename BaseClass::BoundaryHandlerSourcePeriodicType BoundaryHandlerSourcePeriodicType;
+        typedef typename BaseClass::BoundaryHandlerSourceMirrorType BoundaryHandlerSourceMirrorType;
+
+        typedef typename BaseClass::InterpSourceType InterpSourceType;
+        typedef typename BaseClass::InterpSourceLinearType InterpSourceLinearType;
+        typedef typename BaseClass::InterpSourceNearestNeighborType InterpSourceNearestNeighborType;
+        typedef typename BaseClass::InterpSourceBSplineType InterpSourceBSplineType;
+
+        /// warper type
+        typedef typename BaseClass::WarperType WarperType;
+
+        /// image dissimilarity type
+        typedef typename BaseClass::DissimilarityType DissimilarityType;
+
+        /// transformation type
+        typedef hoImageRegDeformationField<CoordType, D> TransformationType;
+        typedef typename TransformationType::input_point_type input_point_type;
+        typedef typename TransformationType::output_point_type output_point_type;
+        typedef typename TransformationType::jacobian_position_type jacobian_position_type;
+        typedef typename TransformationType::DeformationFieldType DeformationFieldType;
+        typedef typename TransformationType::coord_type coord_type;
+
+        /// solver type
+        typedef hoImageRegDeformationFieldBidirectionalSolver<ValueType, CoordType, D> SolverType;
+
+        hoImageRegDeformationFieldBidirectionalRegister(unsigned int resolution_pyramid_levels=3, bool use_world_coordinates=false, ValueType bg_value=ValueType(0));
+        virtual ~hoImageRegDeformationFieldBidirectionalRegister();
+
+        /// initialize the registration
+        /// should be called after all images and parameters of registration are set
+        virtual bool initialize();
+
+        /// perform the registration
+        virtual bool performRegistration();
+
+        virtual void printContent(std::ostream& os) const;
+        virtual void print(std::ostream& os) const;
+
+        /// parameters
+
+        using BaseClass::use_world_coordinates_;
+        using BaseClass::resolution_pyramid_divided_by_2_;
+        using BaseClass::resolution_pyramid_levels_;
+        using BaseClass::resolution_pyramid_downsample_ratio_;
+        using BaseClass::resolution_pyramid_blurring_sigma_;
+        using BaseClass::boundary_handler_type_warper_;
+        using BaseClass::interp_type_warper_;
+        using BaseClass::boundary_handler_type_pyramid_construction_;
+        using BaseClass::interp_type_pyramid_construction_;
+        using BaseClass::dissimilarity_type_;
+        using BaseClass::apply_in_FOV_constraint_;
+        using BaseClass::solver_type_;
+
+        using BaseClass::gt_timer1_;
+        using BaseClass::gt_timer2_;
+        using BaseClass::gt_timer3_;
+        using BaseClass::performTiming_;
+        using BaseClass::gt_exporter_;
+        using BaseClass::debugFolder_;
+
+        using BaseClass::max_iter_num_pyramid_level_;
+        using BaseClass::dissimilarity_thres_pyramid_level_;
+        using BaseClass::div_num_pyramid_level_;
+        using BaseClass::step_size_para_pyramid_level_;
+        using BaseClass::step_size_div_para_pyramid_level_;
+        using BaseClass::regularization_hilbert_strength_world_coordinate_;
+        using BaseClass::regularization_hilbert_strength_pyramid_level_;
+        using BaseClass::verbose_;
+
+        /// number of iterations to improve the estimation of the inverse transform
+        std::vector<unsigned int> inverse_deform_enforce_iter_pyramid_level_;
+        /// weight to update the estimation of the inverse transform, must be within [0 1]
+        std::vector<CoordType> inverse_deform_enforce_weight_pyramid_level_;
+
+        /// set the default parameters
+        virtual bool setDefaultParameters(unsigned int resolution_pyramid_levels, bool use_world_coordinates);
+
+        /// deformation field transformation, defined in the grid of target image
+        using BaseClass::transform_;
+
+        TransformationType* transform_inverse_;
+
+        /// solver
+        std::vector<SolverType> solver_pyramid_inverse_;
+
+    protected:
+
+        using BaseClass::target_;
+        using BaseClass::source_;
+        using BaseClass::bg_value_;
+        using BaseClass::target_pyramid_;
+        using BaseClass::source_pyramid_;
+        using BaseClass::target_bh_warper_;
+        using BaseClass::target_interp_warper_;
+        using BaseClass::source_bh_warper_;
+        using BaseClass::source_interp_warper_;
+        using BaseClass::target_bh_pyramid_construction_;
+        using BaseClass::target_interp_pyramid_construction_;
+        using BaseClass::source_bh_pyramid_construction_;
+        using BaseClass::source_interp_pyramid_construction_;
+        using BaseClass::warper_pyramid_;
+        using BaseClass::warper_pyramid_inverse_;
+        using BaseClass::dissimilarity_pyramid_;
+        using BaseClass::dissimilarity_pyramid_inverse_;
+        using BaseClass::preset_transform_;
+    };
+
+    template<typename ValueType, typename CoordType, unsigned int D> 
+    hoImageRegDeformationFieldBidirectionalRegister<ValueType, CoordType, D>::
+    hoImageRegDeformationFieldBidirectionalRegister(unsigned int resolution_pyramid_levels, bool use_world_coordinates, ValueType bg_value) 
+    : BaseClass(resolution_pyramid_levels, bg_value)
+    {
+        inverse_deform_enforce_iter_pyramid_level_.clear();
+        inverse_deform_enforce_iter_pyramid_level_.resize(resolution_pyramid_levels, 10);
+
+        inverse_deform_enforce_weight_pyramid_level_.clear();
+        inverse_deform_enforce_weight_pyramid_level_.resize(resolution_pyramid_levels, 0.5);
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int D> 
+    hoImageRegDeformationFieldBidirectionalRegister<ValueType, CoordType, D>::~hoImageRegDeformationFieldBidirectionalRegister()
+    {
+        if ( !preset_transform_ )
+        {
+            // delete transform_;
+            delete transform_inverse_;
+            transform_inverse_ = NULL;
+        }
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int D> 
+    bool hoImageRegDeformationFieldBidirectionalRegister<ValueType, CoordType, D>::setDefaultParameters(unsigned int resolution_pyramid_levels, bool use_world_coordinates)
+    {
+        BaseClass::setDefaultParameters(resolution_pyramid_levels, use_world_coordinates);
+
+        inverse_deform_enforce_iter_pyramid_level_.clear();
+        inverse_deform_enforce_iter_pyramid_level_.resize(resolution_pyramid_levels, 10);
+
+        inverse_deform_enforce_weight_pyramid_level_.clear();
+        inverse_deform_enforce_weight_pyramid_level_.resize(resolution_pyramid_levels, 0.5);
+
+        return true;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int D> 
+    bool hoImageRegDeformationFieldBidirectionalRegister<ValueType, CoordType, D>::initialize()
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(NonParametricRegisterClass::initialize());
+
+            if ( transform_ == NULL )
+            {
+                transform_ = new TransformationType(*target_);
+                transform_inverse_ = new TransformationType(*source_);
+
+                preset_transform_ = false;
+            }
+
+            warper_pyramid_.resize(resolution_pyramid_levels_);
+            warper_pyramid_inverse_.resize(resolution_pyramid_levels_);
+
+            solver_pyramid_inverse_.resize(resolution_pyramid_levels_);
+
+            unsigned int ii, jj;
+            for ( ii=0; ii<resolution_pyramid_levels_; ii++ )
+            {
+                warper_pyramid_[ii].setTransformation(*transform_);
+                warper_pyramid_[ii].setInterpolator( *source_interp_warper_[ii] );
+                warper_pyramid_[ii].setBackgroundValue(bg_value_);
+                warper_pyramid_[ii].debugFolder_ = this->debugFolder_;
+
+                warper_pyramid_inverse_[ii].setTransformation(*transform_inverse_);
+                warper_pyramid_inverse_[ii].setInterpolator( *target_interp_warper_[ii] );
+                warper_pyramid_inverse_[ii].setBackgroundValue(bg_value_);
+                warper_pyramid_inverse_[ii].debugFolder_ = this->debugFolder_;
+
+                solver_pyramid_inverse_[ii].setTransform(*transform_);
+                solver_pyramid_inverse_[ii].setTransformInverse(*transform_inverse_);
+
+                if ( regularization_hilbert_strength_world_coordinate_ )
+                {
+                    // world to pixel
+                    std::vector<coord_type> pixelSize;
+                    target_->get_pixel_size(pixelSize);
+
+                    for ( jj=0; jj<D; jj++ )
+                    {
+                        solver_pyramid_inverse_[ii].regularization_hilbert_strength_[jj] = (regularization_hilbert_strength_pyramid_level_[ii][jj] / pixelSize[jj]);
+                    }
+                }
+                else
+                {
+                    for ( jj=0; jj<D; jj++ )
+                    {
+                        solver_pyramid_inverse_[ii].regularization_hilbert_strength_[jj] = regularization_hilbert_strength_pyramid_level_[ii][jj];
+                    }
+                }
+
+                solver_pyramid_inverse_[ii].max_iter_num_ = max_iter_num_pyramid_level_[ii];
+                solver_pyramid_inverse_[ii].dissimilarity_thres_ = dissimilarity_thres_pyramid_level_[ii];
+                solver_pyramid_inverse_[ii].div_num_ = div_num_pyramid_level_[ii];
+                solver_pyramid_inverse_[ii].step_size_para_ = step_size_para_pyramid_level_[ii];
+                solver_pyramid_inverse_[ii].step_size_div_para_ = step_size_div_para_pyramid_level_[ii];
+                solver_pyramid_inverse_[ii].verbose_ = verbose_;
+                solver_pyramid_inverse_[ii].debugFolder_ = this->debugFolder_;
+
+                solver_pyramid_inverse_[ii].setTarget(target_pyramid_[ii]);
+                solver_pyramid_inverse_[ii].setSource(source_pyramid_[ii]);
+
+                solver_pyramid_inverse_[ii].setDissimilarity(*dissimilarity_pyramid_[ii]);
+                solver_pyramid_inverse_[ii].setWarper(warper_pyramid_[ii]);
+                solver_pyramid_inverse_[ii].setInterpolator(*source_interp_warper_[ii]);
+
+                solver_pyramid_inverse_[ii].setDissimilarityInverse(*dissimilarity_pyramid_inverse_[ii]);
+                solver_pyramid_inverse_[ii].setWarperInverse(warper_pyramid_inverse_[ii]);
+                solver_pyramid_inverse_[ii].setInterpolatorInverse(*target_interp_warper_[ii]);
+
+                solver_pyramid_inverse_[ii].setBackgroundValue(bg_value_);
+                solver_pyramid_inverse_[ii].setUseWorldCoordinate(use_world_coordinates_);
+
+                solver_pyramid_inverse_[ii].inverse_deform_enforce_iter_ = inverse_deform_enforce_iter_pyramid_level_[ii];
+                solver_pyramid_inverse_[ii].inverse_deform_enforce_weight_ = inverse_deform_enforce_weight_pyramid_level_[ii];
+
+                solver_pyramid_inverse_[ii].apply_in_FOV_constraint_ = apply_in_FOV_constraint_;
+            }
+
+            // downsample the deformation field if necessary
+            if ( !transform_->getDeformationField(0).dimensions_equal(target_pyramid_[resolution_pyramid_levels_-1]) )
+            {
+                std::vector<size_t> dim;
+                target_pyramid_[resolution_pyramid_levels_-1].get_dimensions(dim);
+
+                std::vector<size_t> dimInv;
+                source_pyramid_[resolution_pyramid_levels_-1].get_dimensions(dimInv);
+
+                for ( jj=0; jj<D; jj++ )
+                {
+                    DeformationFieldType& deField = transform_->getDeformationField(jj);
+                    DeformationFieldType& deField_inverse = transform_inverse_->getDeformationField(jj);
+
+                    if ( preset_transform_ )
+                    {
+                        // forward
+                        DeformationFieldType deFieldResampled;
+
+                        hoNDBoundaryHandlerBorderValue<DeformationFieldType> bhBorderValue(deField);
+                        hoNDInterpolatorLinear<DeformationFieldType> interpLinear(deField, bhBorderValue);
+
+                        GADGET_CHECK_RETURN_FALSE(Gadgetron::resampleImage(deField, interpLinear, dim, deFieldResampled));
+
+                        deField = deFieldResampled;
+                        deField.copyImageInfoWithoutImageSize(target_pyramid_[resolution_pyramid_levels_-1]);
+
+                        // inverse
+                        DeformationFieldType deFieldResampled_inverse;
+
+                        bhBorderValue.setArray(deField_inverse);
+                        interpLinear.setArray(deField_inverse);
+                        GADGET_CHECK_RETURN_FALSE(Gadgetron::resampleImage(deField_inverse, interpLinear, dimInv, deFieldResampled_inverse));
+
+                        deField_inverse = deFieldResampled_inverse;
+                        deField_inverse.copyImageInfoWithoutImageSize(source_pyramid_[resolution_pyramid_levels_-1]);
+                    }
+                    else
+                    {
+                        deField.createFrom(target_pyramid_[resolution_pyramid_levels_-1]);
+                        Gadgetron::clear(deField);
+
+                        deField_inverse.createFrom(source_pyramid_[resolution_pyramid_levels_-1]);
+                        Gadgetron::clear(deField_inverse);
+                    }
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegDeformationFieldBidirectionalRegister<ValueType, CoordType, D>::initialize() ... ");
+        }
+
+        return true;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int D> 
+    bool hoImageRegDeformationFieldBidirectionalRegister<ValueType, CoordType, D>::performRegistration()
+    {
+        try
+        {
+            // starting from the most coarse level
+
+            int level;
+            for ( level=(int)resolution_pyramid_levels_-1; level>=0; level-- )
+            {
+                // update the transform for multi-resolution pyramid
+                transform_->update();
+                transform_inverse_->update();
+
+                GADGET_CHECK_RETURN_FALSE(solver_pyramid_inverse_[level].solve());
+
+                if ( !debugFolder_.empty() )
+                {
+                    unsigned int jj;
+                    for ( jj=0; jj<D; jj++ )
+                    {
+                        std::ostringstream ostr;
+                        ostr << "deform_" << jj;
+
+                        GADGET_EXPORT_IMAGE(debugFolder_, gt_exporter_, transform_->getDeformationField(jj), ostr.str());
+
+                        std::ostringstream ostr2;
+                        ostr2 << "deform_inverse_" << jj;
+
+                        GADGET_EXPORT_IMAGE(debugFolder_, gt_exporter_, transform_inverse_->getDeformationField(jj), ostr2.str());
+                    }
+                }
+
+                // expand the deformation field for next resolution level
+                if ( level>0 )
+                {
+                    std::vector<float> ratio = resolution_pyramid_downsample_ratio_[level-1];
+
+                    unsigned int jj;
+                    bool downsampledBy2 = true;
+                    for ( jj=0; jj<D; jj++ )
+                    {
+                        if ( GT_ABS(ratio[jj]-2.0f) > FLT_EPSILON )
+                        {
+                            downsampledBy2 = false;
+                            break;
+                        }
+                    }
+
+                    DeformationFieldType deformExpanded;
+                    deformExpanded.createFrom(target_pyramid_[level-1]);
+                    Gadgetron::clear(deformExpanded);
+
+                    DeformationFieldType deformInverseExpanded;
+                    deformInverseExpanded.createFrom(source_pyramid_[level-1]);
+                    Gadgetron::clear(deformInverseExpanded);
+
+                    if ( downsampledBy2 )
+                    {
+                        for ( jj=0; jj<D; jj++ )
+                        {
+                            DeformationFieldType& deform = transform_->getDeformationField(jj);
+                            Gadgetron::expandImageBy2(deform, *target_bh_pyramid_construction_, deformExpanded);
+
+                            if ( !use_world_coordinates_ )
+                            {
+                                Gadgetron::scal(ValueType(2.0), deformExpanded); // the deformation vector should be doubled in length
+                            }
+
+                            deform = deformExpanded;
+
+                            DeformationFieldType& deformInv = transform_inverse_->getDeformationField(jj);
+                            Gadgetron::expandImageBy2(deformInv, *source_bh_pyramid_construction_, deformInverseExpanded);
+
+                            if ( !use_world_coordinates_ )
+                            {
+                                Gadgetron::scal(ValueType(2.0), deformInverseExpanded); // the deformation vector should be doubled in length
+                            }
+
+                            deformInv = deformInverseExpanded;
+                        }
+                    }
+                    else
+                    {
+                        for ( jj=0; jj<D; jj++ )
+                        {
+                            DeformationFieldType& deform = transform_->getDeformationField(jj);
+                            Gadgetron::upsampleImage(deform, *target_interp_pyramid_construction_, deformExpanded, &ratio[0]);
+
+                            if ( !use_world_coordinates_ )
+                            {
+                                Gadgetron::scal(ValueType(ratio[jj]), deformExpanded);
+                            }
+
+                            deform = deformExpanded;
+
+                            DeformationFieldType& deformInv = transform_inverse_->getDeformationField(jj);
+                            Gadgetron::upsampleImage(deformInv, *source_interp_pyramid_construction_, deformInverseExpanded, &ratio[0]);
+
+                            if ( !use_world_coordinates_ )
+                            {
+                                Gadgetron::scal(ValueType(ratio[jj]), deformInverseExpanded);
+                            }
+
+                            deformInv = deformInverseExpanded;
+                        }
+                    }
+                }
+
+                if ( !debugFolder_.empty() )
+                {
+                    unsigned int jj;
+                    for ( jj=0; jj<D; jj++ )
+                    {
+                        std::ostringstream ostr;
+                        ostr << "deformExpanded_" << jj;
+
+                        GADGET_EXPORT_IMAGE(debugFolder_, gt_exporter_, transform_->getDeformationField(jj), ostr.str());
+
+                        std::ostringstream ostr2;
+                        ostr2 << "deformExpanded_inverse_" << jj;
+
+                        GADGET_EXPORT_IMAGE(debugFolder_, gt_exporter_, transform_inverse_->getDeformationField(jj), ostr2.str());
+                    }
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegDeformationFieldBidirectionalRegister<ValueType, CoordType, D>::performRegistration() ... ");
+        }
+
+        return true;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int D> 
+    void hoImageRegDeformationFieldBidirectionalRegister<ValueType, CoordType, D>::printContent(std::ostream& os) const
+    {
+        using namespace std;
+        BaseClass::printContent(os);
+
+        unsigned int ii;
+
+        os << "------------" << std::endl;
+        os << "Number of iterations to improve the estimation of the inverse transform is : " << std::endl;
+        for ( ii=0; ii<resolution_pyramid_levels_; ii++ )
+        {
+            os << " Level " << ii << " - " 
+                << inverse_deform_enforce_iter_pyramid_level_[ii] << std::endl;
+        }
+
+        os << "------------" << std::endl;
+        os << "Weight to update the estimation of the inverse transform is : " << std::endl;
+        for ( ii=0; ii<resolution_pyramid_levels_; ii++ )
+        {
+            os << " Level " << ii << " - " 
+                << inverse_deform_enforce_weight_pyramid_level_[ii] << std::endl;
+        }
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int D> 
+    void hoImageRegDeformationFieldBidirectionalRegister<ValueType, CoordType, D>::print(std::ostream& os) const
+    {
+        using namespace std;
+        os << "--------------Gagdgetron non-parametric bi-directional deformation field image register -------------" << endl;
+        this->printContent(os);
+        os << "--------------------------------------------------------------------" << endl << ends;
+    }
+}
diff --git a/toolboxes/registration/optical_flow/cpu/register/hoImageRegDeformationFieldRegister.h b/toolboxes/registration/optical_flow/cpu/register/hoImageRegDeformationFieldRegister.h
new file mode 100644
index 0000000..8d82a20
--- /dev/null
+++ b/toolboxes/registration/optical_flow/cpu/register/hoImageRegDeformationFieldRegister.h
@@ -0,0 +1,527 @@
+/** \file   hoImageRegDeformationFieldRegister.h
+    \brief  Define the class to perform non-rigid image registration to estimate variational deformation field
+    \author Hui Xue
+*/
+
+#pragma once
+
+#include "hoImageRegNonParametricRegister.h"
+
+namespace Gadgetron
+{
+    template<typename ValueType, typename CoordType, unsigned int D> 
+    class hoImageRegDeformationFieldRegister : public hoImageRegNonParametricRegister<ValueType, CoordType, D, D>
+    {
+    public:
+
+        typedef hoImageRegDeformationFieldRegister<ValueType, CoordType, D> Self;
+        typedef hoImageRegNonParametricRegister<ValueType, CoordType, D, D> BaseClass;
+
+        typedef typename BaseClass::TargetType TargetType;
+        typedef typename BaseClass::SourceType SourceType;
+
+        typedef typename BaseClass::Target2DType Target2DType;
+        typedef typename BaseClass::Source2DType Source2DType;
+
+        typedef typename BaseClass::Target3DType Target3DType;
+        typedef typename BaseClass::Source3DType Source3DType;
+
+        typedef ValueType T;
+        typedef ValueType element_type;
+        typedef ValueType value_type;
+
+        /// boundary handler and interpolator for target image
+        typedef typename BaseClass::BoundaryHandlerTargetType BoundaryHandlerTargetType;
+        typedef typename BaseClass::BoundaryHandlerTargetFixedValueType BoundaryHandlerTargetFixedValueType;
+        typedef typename BaseClass::BoundaryHandlerTargetBorderValueType BoundaryHandlerTargetBorderValueType;
+        typedef typename BaseClass::BoundaryHandlerTargetPeriodicType BoundaryHandlerTargetPeriodicType;
+        typedef typename BaseClass::BoundaryHandlerTargetMirrorType BoundaryHandlerTargetMirrorType;
+
+        typedef typename BaseClass::InterpTargetType InterpTargetType;
+        typedef typename BaseClass::InterpTargetLinearType InterpTargetLinearType;
+        typedef typename BaseClass::InterpTargetNearestNeighborType InterpTargetNearestNeighborType;
+        typedef typename BaseClass::InterpTargetBSplineType InterpTargetBSplineType;
+
+        /// boundary handler and interpolator for source image
+        typedef typename BaseClass::BoundaryHandlerSourceType BoundaryHandlerSourceType;
+        typedef typename BaseClass::BoundaryHandlerSourceFixedValueType BoundaryHandlerSourceFixedValueType;
+        typedef typename BaseClass::BoundaryHandlerSourceBorderValueType BoundaryHandlerSourceBorderValueType;
+        typedef typename BaseClass::BoundaryHandlerSourcePeriodicType BoundaryHandlerSourcePeriodicType;
+        typedef typename BaseClass::BoundaryHandlerSourceMirrorType BoundaryHandlerSourceMirrorType;
+
+        typedef typename BaseClass::InterpSourceType InterpSourceType;
+        typedef typename BaseClass::InterpSourceLinearType InterpSourceLinearType;
+        typedef typename BaseClass::InterpSourceNearestNeighborType InterpSourceNearestNeighborType;
+        typedef typename BaseClass::InterpSourceBSplineType InterpSourceBSplineType;
+
+        /// warper type
+        typedef typename BaseClass::WarperType WarperType;
+
+        /// image dissimilarity type
+        typedef typename BaseClass::DissimilarityType DissimilarityType;
+
+        /// transformation type
+        typedef hoImageRegDeformationField<CoordType, D> TransformationType;
+        typedef typename TransformationType::input_point_type input_point_type;
+        typedef typename TransformationType::output_point_type output_point_type;
+        typedef typename TransformationType::jacobian_position_type jacobian_position_type;
+        typedef typename TransformationType::DeformationFieldType DeformationFieldType;
+        typedef typename TransformationType::coord_type coord_type;
+
+        /// solver type
+        typedef hoImageRegDeformationFieldSolver<ValueType, CoordType, D> SolverType;
+
+        hoImageRegDeformationFieldRegister(unsigned int resolution_pyramid_levels=3, bool use_world_coordinates=false, ValueType bg_value=ValueType(0));
+        virtual ~hoImageRegDeformationFieldRegister();
+
+        /// initialize the registration
+        /// should be called after all images and parameters of registration are set
+        virtual bool initialize();
+
+        /// perform the registration
+        virtual bool performRegistration();
+
+        virtual void printContent(std::ostream& os) const;
+        virtual void print(std::ostream& os) const;
+
+        /// parameters
+
+        using BaseClass::use_world_coordinates_;
+        using BaseClass::resolution_pyramid_divided_by_2_;
+        using BaseClass::resolution_pyramid_levels_;
+        using BaseClass::resolution_pyramid_downsample_ratio_;
+        using BaseClass::resolution_pyramid_blurring_sigma_;
+        using BaseClass::boundary_handler_type_warper_;
+        using BaseClass::interp_type_warper_;
+        using BaseClass::boundary_handler_type_pyramid_construction_;
+        using BaseClass::interp_type_pyramid_construction_;
+        using BaseClass::dissimilarity_type_;
+        using BaseClass::solver_type_;
+
+        using BaseClass::gt_timer1_;
+        using BaseClass::gt_timer2_;
+        using BaseClass::gt_timer3_;
+        using BaseClass::performTiming_;
+        using BaseClass::gt_exporter_;
+        using BaseClass::debugFolder_;
+
+        /// number of iterations for every pyramid level
+        std::vector<unsigned int> max_iter_num_pyramid_level_;
+        /// threshold for dissimilarity for every pyramid level
+        std::vector<ValueType> dissimilarity_thres_pyramid_level_;
+        /// number of search size division for every pyramid level
+        std::vector<unsigned int> div_num_pyramid_level_;
+        /// solver step size for every pyramid level
+        std::vector<ValueType> step_size_para_pyramid_level_;
+        /// step size division ratio for every pyramid level
+        std::vector<ValueType> step_size_div_para_pyramid_level_;
+        /// regularization strength for every pyramid level
+        /// if regularization_hilbert_strength_world_coordinate_=true, this strength is in the unit of world coordinate
+        /// if regularization_hilbert_strength_world_coordinate_=false, this strength is in the unit of pixel
+        bool regularization_hilbert_strength_world_coordinate_;
+        std::vector< std::vector<ValueType> > regularization_hilbert_strength_pyramid_level_;
+
+        /// in-FOV constraint
+        bool apply_in_FOV_constraint_;
+
+        /// verbose mode
+        bool verbose_;
+
+        /// set the default parameters
+        virtual bool setDefaultParameters(unsigned int resolution_pyramid_levels, bool use_world_coordinates);
+
+        /// deformation field transformation, defined in the world coordinate of target image
+        TransformationType* transform_;
+
+        /// solver
+        std::vector<SolverType> solver_pyramid_;
+
+    protected:
+
+        using BaseClass::target_;
+        using BaseClass::source_;
+        using BaseClass::bg_value_;
+        using BaseClass::target_pyramid_;
+        using BaseClass::source_pyramid_;
+        using BaseClass::target_bh_warper_;
+        using BaseClass::target_interp_warper_;
+        using BaseClass::source_bh_warper_;
+        using BaseClass::source_interp_warper_;
+        using BaseClass::target_bh_pyramid_construction_;
+        using BaseClass::target_interp_pyramid_construction_;
+        using BaseClass::source_bh_pyramid_construction_;
+        using BaseClass::source_interp_pyramid_construction_;
+        using BaseClass::warper_pyramid_;
+        using BaseClass::dissimilarity_pyramid_;
+        using BaseClass::warper_pyramid_inverse_;
+        using BaseClass::dissimilarity_pyramid_inverse_;
+
+        /// whether the transformation is preset or not
+        /// the preset transformation can be used to pass in an initial deformation field
+        bool preset_transform_;
+    };
+
+    template<typename ValueType, typename CoordType, unsigned int D> 
+    hoImageRegDeformationFieldRegister<ValueType, CoordType, D>::
+    hoImageRegDeformationFieldRegister(unsigned int resolution_pyramid_levels, bool use_world_coordinates, ValueType bg_value) 
+    : transform_(NULL), regularization_hilbert_strength_world_coordinate_(false), verbose_(false), preset_transform_(false), BaseClass(resolution_pyramid_levels, bg_value)
+    {
+        this->setDefaultParameters(resolution_pyramid_levels, use_world_coordinates);
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int D> 
+    hoImageRegDeformationFieldRegister<ValueType, CoordType, D>::~hoImageRegDeformationFieldRegister()
+    {
+        if ( !preset_transform_ )
+        {
+            delete transform_;
+            transform_ = NULL;
+        }
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int D> 
+    bool hoImageRegDeformationFieldRegister<ValueType, CoordType, D>::setDefaultParameters(unsigned int resolution_pyramid_levels, bool use_world_coordinates)
+    {
+        use_world_coordinates_ = use_world_coordinates;
+        resolution_pyramid_levels_ = resolution_pyramid_levels;
+
+        resolution_pyramid_downsample_ratio_.clear();
+        resolution_pyramid_downsample_ratio_.resize(resolution_pyramid_levels_-1, std::vector<float>(D, 2.0) );
+
+        resolution_pyramid_blurring_sigma_.clear();
+        resolution_pyramid_blurring_sigma_.resize(resolution_pyramid_levels_, std::vector<float>(D, 0.0) );
+
+        boundary_handler_type_warper_.clear();
+        // boundary_handler_type_warper_.resize(resolution_pyramid_levels_, GT_BOUNDARY_CONDITION_FIXEDVALUE);
+        boundary_handler_type_warper_.resize(resolution_pyramid_levels_, GT_BOUNDARY_CONDITION_BORDERVALUE);
+
+        interp_type_warper_.clear();
+        interp_type_warper_.resize(resolution_pyramid_levels_, GT_IMAGE_INTERPOLATOR_LINEAR);
+
+        boundary_handler_type_pyramid_construction_ = GT_BOUNDARY_CONDITION_BORDERVALUE;
+        interp_type_pyramid_construction_ = GT_IMAGE_INTERPOLATOR_LINEAR;
+
+        dissimilarity_type_.clear();
+        dissimilarity_type_.resize(resolution_pyramid_levels_, GT_IMAGE_DISSIMILARITY_LocalCCR);
+
+        solver_type_.clear();
+        solver_type_.resize(resolution_pyramid_levels_, GT_IMAGE_REG_SOLVER_PDE_TIME_INTEGRATION);
+
+        max_iter_num_pyramid_level_.clear();
+        max_iter_num_pyramid_level_.resize(resolution_pyramid_levels_, 32);
+
+        dissimilarity_thres_pyramid_level_.clear();
+        dissimilarity_thres_pyramid_level_.resize(resolution_pyramid_levels_, 1e-6);
+
+        div_num_pyramid_level_.clear();
+        div_num_pyramid_level_.resize(resolution_pyramid_levels_, 2);
+
+        step_size_para_pyramid_level_.clear();
+        step_size_para_pyramid_level_.resize(resolution_pyramid_levels_, 0.8);
+
+        step_size_div_para_pyramid_level_.clear();
+        step_size_div_para_pyramid_level_.resize(resolution_pyramid_levels_, 0.5);
+
+        regularization_hilbert_strength_world_coordinate_ = false;
+
+        regularization_hilbert_strength_pyramid_level_.clear();
+        regularization_hilbert_strength_pyramid_level_.resize(resolution_pyramid_levels_);
+
+        unsigned int ii;
+        for ( ii=0; ii<resolution_pyramid_levels_; ii++ )
+        {
+            regularization_hilbert_strength_pyramid_level_[ii].resize(D, 12.0);
+        }
+
+        apply_in_FOV_constraint_ = false;
+
+        verbose_ = false;
+
+        return true;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int D> 
+    bool hoImageRegDeformationFieldRegister<ValueType, CoordType, D>::initialize()
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(BaseClass::initialize());
+
+            if ( transform_ == NULL )
+            {
+                transform_ = new TransformationType(*target_);
+                preset_transform_ = false;
+            }
+
+            warper_pyramid_.resize(resolution_pyramid_levels_);
+            solver_pyramid_.resize(resolution_pyramid_levels_);
+
+            unsigned int ii, jj;
+            for ( ii=0; ii<resolution_pyramid_levels_; ii++ )
+            {
+                warper_pyramid_[ii].setTransformation(*transform_);
+                warper_pyramid_[ii].setInterpolator( *source_interp_warper_[ii] );
+                warper_pyramid_[ii].setBackgroundValue(bg_value_);
+                warper_pyramid_[ii].debugFolder_ = this->debugFolder_;
+
+                solver_pyramid_[ii].setTransform(*transform_);
+
+                if ( regularization_hilbert_strength_world_coordinate_ )
+                {
+                    // world to pixel
+                    std::vector<coord_type> pixelSize;
+                    target_->get_pixel_size(pixelSize);
+
+                    for ( jj=0; jj<D; jj++ )
+                    {
+                        solver_pyramid_[ii].regularization_hilbert_strength_[jj] = (regularization_hilbert_strength_pyramid_level_[ii][jj] / pixelSize[jj]);
+                    }
+                }
+                else
+                {
+                    for ( jj=0; jj<D; jj++ )
+                    {
+                        solver_pyramid_[ii].regularization_hilbert_strength_[jj] = regularization_hilbert_strength_pyramid_level_[ii][jj];
+                    }
+                }
+
+                solver_pyramid_[ii].max_iter_num_ = max_iter_num_pyramid_level_[ii];
+                solver_pyramid_[ii].dissimilarity_thres_ = dissimilarity_thres_pyramid_level_[ii];
+                solver_pyramid_[ii].div_num_ = div_num_pyramid_level_[ii];
+                solver_pyramid_[ii].step_size_para_ = step_size_para_pyramid_level_[ii];
+                solver_pyramid_[ii].step_size_div_para_ = step_size_div_para_pyramid_level_[ii];
+                solver_pyramid_[ii].verbose_ = verbose_;
+                solver_pyramid_[ii].debugFolder_ = this->debugFolder_;
+
+                solver_pyramid_[ii].setTarget(target_pyramid_[ii]);
+                solver_pyramid_[ii].setSource(source_pyramid_[ii]);
+                solver_pyramid_[ii].setDissimilarity(*dissimilarity_pyramid_[ii]);
+                solver_pyramid_[ii].setWarper(warper_pyramid_[ii]);
+                solver_pyramid_[ii].setInterpolator(*source_interp_warper_[ii]);
+                solver_pyramid_[ii].setBackgroundValue(bg_value_);
+                solver_pyramid_[ii].setUseWorldCoordinate(use_world_coordinates_);
+
+                solver_pyramid_[ii].apply_in_FOV_constraint_ = apply_in_FOV_constraint_;
+            }
+
+            // downsample the deformation field if necessary
+            if ( !transform_->getDeformationField(0).dimensions_equal(target_pyramid_[resolution_pyramid_levels_-1]) )
+            {
+                std::vector<size_t> dim;
+                target_pyramid_[resolution_pyramid_levels_-1].get_dimensions(dim);
+
+                for ( jj=0; jj<D; jj++ )
+                {
+                    DeformationFieldType& deField = transform_->getDeformationField(jj);
+
+                    if ( preset_transform_ )
+                    {
+                        DeformationFieldType deFieldResampled;
+
+                        hoNDBoundaryHandlerBorderValue<DeformationFieldType> bhBorderValue(deField);
+                        hoNDInterpolatorLinear<DeformationFieldType> interpLinear(deField, bhBorderValue);
+
+                        GADGET_CHECK_RETURN_FALSE(Gadgetron::resampleImage(deField, interpLinear, dim, deFieldResampled));
+
+                        deField = deFieldResampled;
+                        deField.copyImageInfoWithoutImageSize(target_pyramid_[resolution_pyramid_levels_-1]);
+                    }
+                    else
+                    {
+                        deField.createFrom(target_pyramid_[resolution_pyramid_levels_-1]);
+                        Gadgetron::clear(deField);
+                    }
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegDeformationFieldRegister<ValueType, CoordType, D>::initialize() ... ");
+        }
+
+        return true;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int D> 
+    bool hoImageRegDeformationFieldRegister<ValueType, CoordType, D>::performRegistration()
+    {
+        try
+        {
+            // starting from the most coarse level
+
+            int level;
+            for ( level=(int)resolution_pyramid_levels_-1; level>=0; level-- )
+            {
+                // update the transform for multi-resolution pyramid
+                transform_->update();
+
+                // GADGET_CHECK_RETURN_FALSE(solver_pyramid_[level].initialize());
+                GADGET_CHECK_RETURN_FALSE(solver_pyramid_[level].solve());
+
+                if ( !debugFolder_.empty() )
+                {
+                    unsigned int jj;
+                    for ( jj=0; jj<D; jj++ )
+                    {
+                        std::ostringstream ostr;
+                        ostr << "deform_" << jj;
+
+                        GADGET_EXPORT_IMAGE(debugFolder_, gt_exporter_, transform_->getDeformationField(jj), ostr.str());
+                    }
+                }
+
+                // expand the deformation field for next resolution level
+                if ( level>0 )
+                {
+                    std::vector<float> ratio = resolution_pyramid_downsample_ratio_[level-1];
+
+                    unsigned int jj;
+                    bool downsampledBy2 = true;
+                    for ( jj=0; jj<D; jj++ )
+                    {
+                        if ( GT_ABS(ratio[jj]-2.0f) > FLT_EPSILON )
+                        {
+                            downsampledBy2 = false;
+                            break;
+                        }
+                    }
+
+                    DeformationFieldType deformExpanded;
+                    deformExpanded.createFrom(target_pyramid_[level-1]);
+                    // Gadgetron::clear(deformExpanded);
+                    memset(deformExpanded.begin(), 0, deformExpanded.get_number_of_bytes());
+
+                    if ( downsampledBy2 || resolution_pyramid_divided_by_2_ )
+                    {
+                        for ( jj=0; jj<D; jj++ )
+                        {
+                            DeformationFieldType& deform = transform_->getDeformationField(jj);
+                            Gadgetron::expandImageBy2(deform, *target_bh_pyramid_construction_, deformExpanded);
+
+                            if ( !use_world_coordinates_ )
+                            {
+                                Gadgetron::scal(ValueType(2.0), deformExpanded); // the deformation vector should be doubled in length
+                            }
+
+                            deform = deformExpanded;
+                        }
+                    }
+                    else
+                    {
+                        for ( jj=0; jj<D; jj++ )
+                        {
+                            DeformationFieldType& deform = transform_->getDeformationField(jj);
+                            Gadgetron::upsampleImage(deform, *target_interp_pyramid_construction_, deformExpanded, &ratio[0]);
+
+                            if ( !use_world_coordinates_ )
+                            {
+                                Gadgetron::scal(ValueType(ratio[jj]), deformExpanded);
+                            }
+
+                            deform = deformExpanded;
+                        }
+                    }
+
+                    if ( !debugFolder_.empty() )
+                    {
+                        for ( jj=0; jj<D; jj++ )
+                        {
+                            std::ostringstream ostr;
+                            ostr << "deformExpanded_" << jj;
+
+                            GADGET_EXPORT_IMAGE(debugFolder_, gt_exporter_, transform_->getDeformationField(jj), ostr.str());
+                        }
+                    }
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegDeformationFieldRegister<ValueType, CoordType, D>::performRegistration() ... ");
+        }
+
+        return true;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int D> 
+    void hoImageRegDeformationFieldRegister<ValueType, CoordType, D>::printContent(std::ostream& os) const
+    {
+        using namespace std;
+        BaseClass::printContent(os);
+
+        unsigned int ii, jj;
+
+        os << "------------" << std::endl;
+        os << "Maximal iteration number for every pyramid level is : " << std::endl;
+        for ( ii=0; ii<resolution_pyramid_levels_; ii++ )
+        {
+            os << " Level " << ii << " - " 
+                << max_iter_num_pyramid_level_[ii] << std::endl;
+        }
+
+        os << "------------" << std::endl;
+        os << "Threshold for dissimilarity for every pyramid level is : " << std::endl;
+        for ( ii=0; ii<resolution_pyramid_levels_; ii++ )
+        {
+            os << " Level " << ii << " - " 
+                << dissimilarity_thres_pyramid_level_[ii] << std::endl;
+        }
+
+        os << "------------" << std::endl;
+        os << "Number of search size division for every pyramid level is : " << std::endl;
+        for ( ii=0; ii<resolution_pyramid_levels_; ii++ )
+        {
+            os << " Level " << ii << " - " 
+                << div_num_pyramid_level_[ii] << std::endl;
+        }
+
+        os << "------------" << std::endl;
+        os << "Solver step size for every pyramid level is : " << std::endl;
+        for ( ii=0; ii<resolution_pyramid_levels_; ii++ )
+        {
+            os << " Level " << ii << " - " 
+                << step_size_para_pyramid_level_[ii] << std::endl;
+        }
+
+        os << "------------" << std::endl;
+        os << "Step size division ratio for every pyramid level is : " << std::endl;
+        for ( ii=0; ii<resolution_pyramid_levels_; ii++ )
+        {
+            os << " Level " << ii << " - " 
+                << step_size_div_para_pyramid_level_[ii] << std::endl;
+        }
+
+        os << "------------" << std::endl;
+        if ( regularization_hilbert_strength_world_coordinate_ )
+        {
+            os << "Regularization strength  is in the unit of physical metric, e.g. mm ... ";
+        }
+        else
+        {
+            os << "Regularization strength  is in the unit of image pixel size ... ";
+        }
+
+        os << "Regularization strength for every pyramid level is : " << std::endl;
+        for ( ii=0; ii<resolution_pyramid_levels_; ii++ )
+        {
+            os << " Level " << ii << " - [ ";
+            for( jj=0; jj<D; jj++ )
+            {
+                os << regularization_hilbert_strength_pyramid_level_[ii][jj] << " ";
+            } 
+            os << " ] " << std::endl;
+        }
+
+        os << "------------" << std::endl;
+        os << "Verbose mode is : " << verbose_ << std::endl;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int D> 
+    void hoImageRegDeformationFieldRegister<ValueType, CoordType, D>::print(std::ostream& os) const
+    {
+        using namespace std;
+        os << "--------------Gagdgetron non-parametric deformation field image register -------------" << endl;
+        this->printContent(os);
+        os << "--------------------------------------------------------------------" << endl << ends;
+    }
+}
diff --git a/toolboxes/registration/optical_flow/cpu/register/hoImageRegNonParametricRegister.h b/toolboxes/registration/optical_flow/cpu/register/hoImageRegNonParametricRegister.h
new file mode 100644
index 0000000..426913e
--- /dev/null
+++ b/toolboxes/registration/optical_flow/cpu/register/hoImageRegNonParametricRegister.h
@@ -0,0 +1,148 @@
+/** \file   hoImageRegNonParametricRegister.h
+    \brief  Define the class to perform non-parametric image registration in gadgetron
+    \author Hui Xue
+*/
+
+#pragma once
+
+#include "hoImageRegRegister.h"
+
+namespace Gadgetron
+{
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    class hoImageRegNonParametricRegister : public hoImageRegRegister<ValueType, CoordType, DIn, DOut>
+    {
+    public:
+
+        typedef hoImageRegNonParametricRegister<ValueType, CoordType, DIn, DOut> Self;
+        typedef hoImageRegRegister<ValueType, CoordType, DIn, DOut> BaseClass;
+
+        typedef typename BaseClass::TargetType TargetType;
+        typedef typename BaseClass::SourceType SourceType;
+
+        typedef typename BaseClass::Target2DType Target2DType;
+        typedef typename BaseClass::Source2DType Source2DType;
+
+        typedef typename BaseClass::Target3DType Target3DType;
+        typedef typename BaseClass::Source3DType Source3DType;
+
+        typedef ValueType T;
+        typedef ValueType element_type;
+        typedef ValueType value_type;
+
+        typedef CoordType coord_type;
+
+        /// boundary handler and interpolator for target image
+        typedef typename BaseClass::BoundaryHandlerTargetType BoundaryHandlerTargetType;
+        typedef typename BaseClass::BoundaryHandlerTargetFixedValueType BoundaryHandlerTargetFixedValueType;
+        typedef typename BaseClass::BoundaryHandlerTargetBorderValueType BoundaryHandlerTargetBorderValueType;
+        typedef typename BaseClass::BoundaryHandlerTargetPeriodicType BoundaryHandlerTargetPeriodicType;
+        typedef typename BaseClass::BoundaryHandlerTargetMirrorType BoundaryHandlerTargetMirrorType;
+
+        typedef typename BaseClass::InterpTargetType InterpTargetType;
+        typedef typename BaseClass::InterpTargetLinearType InterpTargetLinearType;
+        typedef typename BaseClass::InterpTargetNearestNeighborType InterpTargetNearestNeighborType;
+        typedef typename BaseClass::InterpTargetBSplineType InterpTargetBSplineType;
+
+        /// boundary handler and interpolator for source image
+        typedef typename BaseClass::BoundaryHandlerSourceType BoundaryHandlerSourceType;
+        typedef typename BaseClass::BoundaryHandlerSourceFixedValueType BoundaryHandlerSourceFixedValueType;
+        typedef typename BaseClass::BoundaryHandlerSourceBorderValueType BoundaryHandlerSourceBorderValueType;
+        typedef typename BaseClass::BoundaryHandlerSourcePeriodicType BoundaryHandlerSourcePeriodicType;
+        typedef typename BaseClass::BoundaryHandlerSourceMirrorType BoundaryHandlerSourceMirrorType;
+
+        typedef typename BaseClass::InterpSourceType InterpSourceType;
+        typedef typename BaseClass::InterpSourceLinearType InterpSourceLinearType;
+        typedef typename BaseClass::InterpSourceNearestNeighborType InterpSourceNearestNeighborType;
+        typedef typename BaseClass::InterpSourceBSplineType InterpSourceBSplineType;
+
+        /// warper type
+        typedef typename BaseClass::WarperType WarperType;
+
+        /// image dissimilarity type
+        typedef typename BaseClass::DissimilarityType DissimilarityType;
+
+        hoImageRegNonParametricRegister(unsigned int resolution_pyramid_levels=3, ValueType bg_value=ValueType(0));
+        virtual ~hoImageRegNonParametricRegister();
+
+        /// initialize the registration
+        /// should be called after all images and parameters of registration are set
+        virtual bool initialize() { return BaseClass::initialize(); }
+
+        /// perform the registration
+        virtual bool performRegistration() = 0;
+
+        virtual void print(std::ostream& os) const;
+
+        /// parameters
+
+        using BaseClass::use_world_coordinates_;
+        using BaseClass::resolution_pyramid_divided_by_2_;
+        using BaseClass::resolution_pyramid_levels_;
+        using BaseClass::resolution_pyramid_downsample_ratio_;
+        using BaseClass::resolution_pyramid_blurring_sigma_;
+        using BaseClass::boundary_handler_type_warper_;
+        using BaseClass::interp_type_warper_;
+        using BaseClass::boundary_handler_type_pyramid_construction_;
+        using BaseClass::interp_type_pyramid_construction_;
+        using BaseClass::dissimilarity_type_;
+        using BaseClass::solver_type_;
+
+        using BaseClass::dissimilarity_LocalCCR_sigmaArg_;
+        using BaseClass::dissimilarity_hist_num_bin_target_;
+        using BaseClass::dissimilarity_hist_num_bin_warpped_;
+        using BaseClass::dissimilarity_hist_pv_interpolation_;
+        using BaseClass::dissimilarity_hist_step_size_ignore_pixel_;
+
+        using BaseClass::dissimilarity_MI_betaArg_;
+
+        using BaseClass::gt_timer1_;
+        using BaseClass::gt_timer2_;
+        using BaseClass::gt_timer3_;
+        using BaseClass::performTiming_;
+        using BaseClass::gt_exporter_;
+        using BaseClass::debugFolder_;
+
+    protected:
+
+        using BaseClass::target_;
+        using BaseClass::source_;
+        using BaseClass::bg_value_;
+        using BaseClass::target_pyramid_;
+        using BaseClass::source_pyramid_;
+        using BaseClass::target_bh_warper_;
+        using BaseClass::target_interp_warper_;
+        using BaseClass::source_bh_warper_;
+        using BaseClass::source_interp_warper_;
+        using BaseClass::target_bh_pyramid_construction_;
+        using BaseClass::target_interp_pyramid_construction_;
+        using BaseClass::source_bh_pyramid_construction_;
+        using BaseClass::source_interp_pyramid_construction_;
+        using BaseClass::warper_pyramid_;
+        using BaseClass::dissimilarity_pyramid_;
+        using BaseClass::warper_pyramid_inverse_;
+        using BaseClass::dissimilarity_pyramid_inverse_;
+    };
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    hoImageRegNonParametricRegister<ValueType, CoordType, DIn, DOut>::
+    hoImageRegNonParametricRegister(unsigned int resolution_pyramid_levels, ValueType bg_value) : BaseClass(resolution_pyramid_levels, bg_value)
+    {
+
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    hoImageRegNonParametricRegister<ValueType, CoordType, DIn, DOut>::~hoImageRegNonParametricRegister()
+    {
+
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    void hoImageRegNonParametricRegister<ValueType, CoordType, DIn, DOut>::print(std::ostream& os) const
+    {
+        using namespace std;
+        os << "--------------Gagdgetron non-parametric image register -------------" << endl;
+        BaseClass::printContent(os);
+        os << "--------------------------------------------------------------------" << endl << ends;
+    }
+}
diff --git a/toolboxes/registration/optical_flow/cpu/register/hoImageRegParametricRegister.h b/toolboxes/registration/optical_flow/cpu/register/hoImageRegParametricRegister.h
new file mode 100644
index 0000000..215f589
--- /dev/null
+++ b/toolboxes/registration/optical_flow/cpu/register/hoImageRegParametricRegister.h
@@ -0,0 +1,408 @@
+/** \file   hoImageRegParametricRegister.h
+    \brief  Define the class to perform parametric image registration in gadgetron
+            By default, the multi-level multi-step parametric solver is used
+    \author Hui Xue
+*/
+
+#pragma once
+
+#include "hoImageRegRegister.h"
+
+namespace Gadgetron
+{
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    class hoImageRegParametricRegister : public hoImageRegRegister<ValueType, CoordType, DIn, DOut>
+    {
+    public:
+
+        typedef hoImageRegParametricRegister<ValueType, CoordType, DIn, DOut> Self;
+        typedef hoImageRegRegister<ValueType, CoordType, DIn, DOut> BaseClass;
+
+        typedef typename BaseClass::TargetType TargetType;
+        typedef typename BaseClass::SourceType SourceType;
+
+        typedef typename BaseClass::Target2DType Target2DType;
+        typedef typename BaseClass::Source2DType Source2DType;
+
+        typedef typename BaseClass::Target3DType Target3DType;
+        typedef typename BaseClass::Source3DType Source3DType;
+
+        typedef ValueType T;
+        typedef ValueType element_type;
+        typedef ValueType value_type;
+
+        typedef CoordType coord_type;
+
+        /// boundary handler and interpolator for target image
+        typedef typename BaseClass::BoundaryHandlerTargetType BoundaryHandlerTargetType;
+        typedef typename BaseClass::BoundaryHandlerTargetFixedValueType BoundaryHandlerTargetFixedValueType;
+        typedef typename BaseClass::BoundaryHandlerTargetBorderValueType BoundaryHandlerTargetBorderValueType;
+        typedef typename BaseClass::BoundaryHandlerTargetPeriodicType BoundaryHandlerTargetPeriodicType;
+        typedef typename BaseClass::BoundaryHandlerTargetMirrorType BoundaryHandlerTargetMirrorType;
+
+        typedef typename BaseClass::InterpTargetType InterpTargetType;
+        typedef typename BaseClass::InterpTargetLinearType InterpTargetLinearType;
+        typedef typename BaseClass::InterpTargetNearestNeighborType InterpTargetNearestNeighborType;
+        typedef typename BaseClass::InterpTargetBSplineType InterpTargetBSplineType;
+
+        /// boundary handler and interpolator for source image
+        typedef typename BaseClass::BoundaryHandlerSourceType BoundaryHandlerSourceType;
+        typedef typename BaseClass::BoundaryHandlerSourceFixedValueType BoundaryHandlerSourceFixedValueType;
+        typedef typename BaseClass::BoundaryHandlerSourceBorderValueType BoundaryHandlerSourceBorderValueType;
+        typedef typename BaseClass::BoundaryHandlerSourcePeriodicType BoundaryHandlerSourcePeriodicType;
+        typedef typename BaseClass::BoundaryHandlerSourceMirrorType BoundaryHandlerSourceMirrorType;
+
+        typedef typename BaseClass::InterpSourceType InterpSourceType;
+        typedef typename BaseClass::InterpSourceLinearType InterpSourceLinearType;
+        typedef typename BaseClass::InterpSourceNearestNeighborType InterpSourceNearestNeighborType;
+        typedef typename BaseClass::InterpSourceBSplineType InterpSourceBSplineType;
+
+        /// warper type
+        typedef typename BaseClass::WarperType WarperType;
+
+        /// image dissimilarity type
+        typedef typename BaseClass::DissimilarityType DissimilarityType;
+
+        /// transformation type
+        typedef hoImageRegParametricTransformation<CoordType, DIn, DOut> TransformationType;
+        typedef typename TransformationType::input_point_type input_point_type;
+        typedef typename TransformationType::output_point_type output_point_type;
+        typedef typename TransformationType::jacobian_position_type jacobian_position_type;
+
+        /// solver type
+        typedef hoImageRegParametricSolver<ValueType, CoordType, DIn, DOut> SolverType;
+
+        hoImageRegParametricRegister(unsigned int resolution_pyramid_levels=3, bool use_world_coordinates=true, ValueType bg_value=ValueType(0));
+        virtual ~hoImageRegParametricRegister();
+
+        /// initialize the registration
+        /// should be called after all images and parameters of registration are set
+        virtual bool initialize();
+
+        /// create parametric solver
+        SolverType* createParametricSolver(GT_IMAGE_REG_SOLVER v, unsigned int level);
+
+        /// set the default parameters
+        virtual bool setDefaultParameters(unsigned int resolution_pyramid_levels, bool use_world_coordinates);
+
+        /// perform the registration
+        virtual bool performRegistration();
+
+        virtual void print(std::ostream& os) const;
+
+        /// parameters
+
+        using BaseClass::use_world_coordinates_;
+        using BaseClass::resolution_pyramid_divided_by_2_;
+        using BaseClass::resolution_pyramid_levels_;
+        using BaseClass::resolution_pyramid_downsample_ratio_;
+        using BaseClass::resolution_pyramid_blurring_sigma_;
+        using BaseClass::boundary_handler_type_warper_;
+        using BaseClass::interp_type_warper_;
+        using BaseClass::boundary_handler_type_pyramid_construction_;
+        using BaseClass::interp_type_pyramid_construction_;
+        using BaseClass::dissimilarity_type_;
+        using BaseClass::solver_type_;
+
+        using BaseClass::dissimilarity_LocalCCR_sigmaArg_;
+        using BaseClass::dissimilarity_hist_num_bin_target_;
+        using BaseClass::dissimilarity_hist_num_bin_warpped_;
+        using BaseClass::dissimilarity_hist_pv_interpolation_;
+        using BaseClass::dissimilarity_hist_step_size_ignore_pixel_;
+
+        using BaseClass::dissimilarity_MI_betaArg_;
+
+        using BaseClass::gt_timer1_;
+        using BaseClass::gt_timer2_;
+        using BaseClass::gt_timer3_;
+        using BaseClass::performTiming_;
+        using BaseClass::gt_exporter_;
+        using BaseClass::debugFolder_;
+
+        /// verbose mode
+        bool verbose_;
+
+        /// deformation field transformation, defined in the world coordinate of target image
+        TransformationType* transform_;
+
+        /// solver
+        std::vector<SolverType*> solver_pyramid_;
+
+        /// for solver of every pyramid level
+
+        /// maximal number of iterations
+        std::vector<unsigned int> max_iter_num_pyramid_level_;
+
+        /// threshold for minimal dissimilarity changes
+        ValueType dissimilarity_thres_;
+
+        /// threshold for minimal parameter changes
+        ValueType parameter_thres_;
+
+        /// number of search division
+        std::vector<unsigned int> div_num_pyramid_level_;
+
+        /// step size for every parameter
+        std::vector< std::vector<ValueType> > step_size_para_pyramid_level_;
+
+        /// step size division ratio
+        /// step_size_para_ = step_size_para_ .* step_size_div_para_ to reduce search step size
+        std::vector< std::vector<ValueType> > step_size_div_para_pyramid_level_;
+
+    protected:
+
+        using BaseClass::target_;
+        using BaseClass::source_;
+        using BaseClass::bg_value_;
+        using BaseClass::target_pyramid_;
+        using BaseClass::source_pyramid_;
+        using BaseClass::target_bh_warper_;
+        using BaseClass::target_interp_warper_;
+        using BaseClass::source_bh_warper_;
+        using BaseClass::source_interp_warper_;
+        using BaseClass::target_bh_pyramid_construction_;
+        using BaseClass::target_interp_pyramid_construction_;
+        using BaseClass::source_bh_pyramid_construction_;
+        using BaseClass::source_interp_pyramid_construction_;
+        using BaseClass::warper_pyramid_;
+        using BaseClass::dissimilarity_pyramid_;
+        using BaseClass::warper_pyramid_inverse_;
+        using BaseClass::dissimilarity_pyramid_inverse_;
+
+        bool preset_transform_;
+    };
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    hoImageRegParametricRegister<ValueType, CoordType, DIn, DOut>::
+    hoImageRegParametricRegister(unsigned int resolution_pyramid_levels, bool use_world_coordinates, ValueType bg_value) : BaseClass(resolution_pyramid_levels, bg_value), verbose_(false), preset_transform_(false)
+    {
+        GADGET_CHECK_THROW(this->setDefaultParameters(resolution_pyramid_levels, use_world_coordinates));
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    hoImageRegParametricRegister<ValueType, CoordType, DIn, DOut>::~hoImageRegParametricRegister()
+    {
+
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    bool hoImageRegParametricRegister<ValueType, CoordType, DIn, DOut>::setDefaultParameters(unsigned int resolution_pyramid_levels, bool use_world_coordinates)
+    {
+        use_world_coordinates_ = use_world_coordinates;
+        resolution_pyramid_levels_ = resolution_pyramid_levels;
+
+        resolution_pyramid_downsample_ratio_.clear();
+        resolution_pyramid_downsample_ratio_.resize(resolution_pyramid_levels_-1, std::vector<float>(GT_MAX(DIn, DOut), 2.0) );
+
+        resolution_pyramid_blurring_sigma_.clear();
+        resolution_pyramid_blurring_sigma_.resize(resolution_pyramid_levels_, std::vector<float>(GT_MAX(DIn, DOut), 0.0) );
+
+        boundary_handler_type_warper_.clear();
+        boundary_handler_type_warper_.resize(resolution_pyramid_levels_, GT_BOUNDARY_CONDITION_FIXEDVALUE);
+
+        interp_type_warper_.clear();
+        interp_type_warper_.resize(resolution_pyramid_levels_, GT_IMAGE_INTERPOLATOR_LINEAR);
+
+        boundary_handler_type_pyramid_construction_ = GT_BOUNDARY_CONDITION_BORDERVALUE;
+        interp_type_pyramid_construction_ = GT_IMAGE_INTERPOLATOR_LINEAR;
+
+        dissimilarity_type_.clear();
+        dissimilarity_type_.resize(resolution_pyramid_levels_, GT_IMAGE_DISSIMILARITY_NMI);
+
+        solver_type_.clear();
+        solver_type_.resize(resolution_pyramid_levels_, GT_IMAGE_REG_SOLVER_DOWNHILL);
+
+        max_iter_num_pyramid_level_.clear();
+        max_iter_num_pyramid_level_.resize(resolution_pyramid_levels_, 100);
+
+        dissimilarity_thres_ = 1e-6;
+
+        div_num_pyramid_level_.clear();
+        div_num_pyramid_level_.resize(resolution_pyramid_levels_, 5);
+
+        step_size_para_pyramid_level_.clear();
+        step_size_para_pyramid_level_.resize(resolution_pyramid_levels_);
+
+        step_size_div_para_pyramid_level_.clear();
+        step_size_div_para_pyramid_level_.resize(resolution_pyramid_levels_);
+
+        size_t maxParaNum = 4096;
+
+        step_size_para_pyramid_level_[resolution_pyramid_levels_-1].resize(maxParaNum, 3.2);
+        step_size_div_para_pyramid_level_[resolution_pyramid_levels_-1].resize(maxParaNum, 0.5);
+
+        int ii;
+        unsigned int jj;
+        for ( ii=(int)resolution_pyramid_levels_-2; ii>=0; ii-- )
+        {
+            step_size_div_para_pyramid_level_[ii].resize(maxParaNum, 0.5);
+            step_size_para_pyramid_level_[ii].resize(maxParaNum);
+
+            for ( jj=0; jj<maxParaNum; jj++ )
+            {
+                step_size_para_pyramid_level_[ii][jj] = step_size_div_para_pyramid_level_[ii][jj]*step_size_para_pyramid_level_[ii+1][jj];
+            }
+        }
+
+        verbose_ = false;
+
+        return true;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    bool hoImageRegParametricRegister<ValueType, CoordType, DIn, DOut>::initialize()
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(BaseClass::initialize());
+
+            GADGET_CHECK_RETURN_FALSE( transform_ != NULL );
+
+            warper_pyramid_.resize(resolution_pyramid_levels_);
+            solver_pyramid_.resize(resolution_pyramid_levels_);
+
+            unsigned int ii, jj;
+            for ( ii=0; ii<resolution_pyramid_levels_; ii++ )
+            {
+                warper_pyramid_[ii].setTransformation(*transform_);
+                warper_pyramid_[ii].setInterpolator( *source_interp_warper_[ii] );
+                warper_pyramid_[ii].setBackgroundValue(bg_value_);
+                warper_pyramid_[ii].debugFolder_ = this->debugFolder_;
+
+                solver_pyramid_[ii] = this->createParametricSolver(solver_type_[ii], ii);
+
+                solver_pyramid_[ii]->setTransform(*transform_);
+
+                solver_pyramid_[ii]->verbose_ = verbose_;
+                solver_pyramid_[ii]->debugFolder_ = this->debugFolder_;
+
+                solver_pyramid_[ii]->setTarget(target_pyramid_[ii]);
+                solver_pyramid_[ii]->setSource(source_pyramid_[ii]);
+                solver_pyramid_[ii]->setDissimilarity(*dissimilarity_pyramid_[ii]);
+                solver_pyramid_[ii]->setWarper(warper_pyramid_[ii]);
+                solver_pyramid_[ii]->setInterpolator(*source_interp_warper_[ii]);
+                solver_pyramid_[ii]->setBackgroundValue(bg_value_);
+                solver_pyramid_[ii]->setUseWorldCoordinate(use_world_coordinates_);
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegDeformationFieldRegister<ValueType, CoordType, D>::initialize() ... ");
+        }
+
+        return true;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    bool hoImageRegParametricRegister<ValueType, CoordType, DIn, DOut>::performRegistration()
+    {
+        try
+        {
+            // starting from the most coarse level
+
+            if ( verbose_ )
+            {
+                GADGET_MSG("Initial transformation : ");
+                transform_->print(std::cout);
+            }
+
+            int level;
+            for ( level=(int)resolution_pyramid_levels_-1; level>=0; level-- )
+            {
+                // GADGET_CHECK_RETURN_FALSE(solver_pyramid_[level].initialize());
+                GADGET_CHECK_RETURN_FALSE(solver_pyramid_[level]->solve());
+
+                if ( verbose_ )
+                {
+                    GADGET_MSG("Transformation for level " << level << " : ");
+                    transform_->printTransform(std::cout);
+                }
+
+                // adjust transformation for the next resolution level
+                if ( level>0 )
+                {
+                    if ( !use_world_coordinates_ )
+                    {
+                        hoMatrix<ValueType> lowResI2W, highResI2W;
+                        source_pyramid_[level].image_to_world_matrix(lowResI2W);
+                        source_pyramid_[level-1].image_to_world_matrix(highResI2W);
+
+                        GADGET_CHECK_RETURN_FALSE(transform_->adjustForResolutionPyramid(lowResI2W, highResI2W));
+                    }
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegParametricRegister<ValueType, CoordType, DIn, DOut>::performRegistration() ... ");
+        }
+
+        return true;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    hoImageRegParametricSolver<ValueType, CoordType, DIn, DOut>* hoImageRegParametricRegister<ValueType, CoordType, DIn, DOut>::createParametricSolver(GT_IMAGE_REG_SOLVER v, unsigned int level)
+    {
+        SolverType* res = NULL;
+
+        unsigned int ii;
+
+        switch (v)
+        {
+            case GT_IMAGE_REG_SOLVER_DOWNHILL:
+                res = new hoImageRegParametricDownHillSolver<ValueType, CoordType, DIn, DOut>();
+                break;
+
+            case GT_IMAGE_REG_SOLVER_GRADIENT_DESCENT:
+                res = new hoImageRegParametricGradientDescentSolver<ValueType, CoordType, DIn, DOut>();
+                break;
+
+            default:
+                GADGET_ERROR_MSG("Unrecognized parametric solver type : " << v);
+        }
+
+        res->max_iter_num_ = max_iter_num_pyramid_level_[level];
+        res->dissimilarity_thres_ = dissimilarity_thres_;
+        res->parameter_thres_ = parameter_thres_;
+        res->div_num_ = div_num_pyramid_level_[level];
+        res->step_size_para_ = step_size_para_pyramid_level_[level];
+        res->step_size_div_para_ = step_size_div_para_pyramid_level_[level];
+
+        return res;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    void hoImageRegParametricRegister<ValueType, CoordType, DIn, DOut>::print(std::ostream& os) const
+    {
+        using namespace std;
+        os << "--------------Gagdgetron parametric image register -------------" << endl;
+        BaseClass::printContent(os);
+
+        unsigned int ii, jj;
+
+        os << "------------" << std::endl;
+        os << "Maximal iteration number for every pyramid level is : " << std::endl;
+        for ( ii=0; ii<resolution_pyramid_levels_; ii++ )
+        {
+            os << " Level " << ii << " - " 
+                << max_iter_num_pyramid_level_[ii] << std::endl;
+        }
+
+        os << "------------" << std::endl;
+        os << "Threshold for dissimilarity for every pyramid level is : " << std::endl;
+        for ( ii=0; ii<resolution_pyramid_levels_; ii++ )
+        {
+            os << " Level " << ii << " - " 
+                << dissimilarity_thres_ << std::endl;
+        }
+
+        os << "------------" << std::endl;
+        os << "Number of search size division for every pyramid level is : " << std::endl;
+        for ( ii=0; ii<resolution_pyramid_levels_; ii++ )
+        {
+            os << " Level " << ii << " - " 
+                << div_num_pyramid_level_[ii] << std::endl;
+        }
+        os << "--------------------------------------------------------------------" << endl << ends;
+    }
+}
diff --git a/toolboxes/registration/optical_flow/cpu/register/hoImageRegRegister.h b/toolboxes/registration/optical_flow/cpu/register/hoImageRegRegister.h
new file mode 100644
index 0000000..fd1d70b
--- /dev/null
+++ b/toolboxes/registration/optical_flow/cpu/register/hoImageRegRegister.h
@@ -0,0 +1,651 @@
+/** \file   hoImageRegRegister.h
+    \brief  Define the class to perform image registration in gadgetron
+    \author Hui Xue
+*/
+
+#pragma once
+
+#include "hoNDArray.h"
+#include "hoNDImage.h"
+#include "hoNDInterpolator.h"
+#include "hoNDBoundaryHandler.h"
+#include "hoMatrix.h"
+#include "hoNDArray_utils.h"
+#include "hoNDArray_elemwise.h"
+#include "hoNDImage_util.h"
+#include "gtPlusISMRMRDReconUtil.h"
+
+// transformation
+#include "hoImageRegTransformation.h"
+#include "hoImageRegParametricTransformation.h"
+#include "hoImageRegTransformation.h"
+#include "hoImageRegHomogenousTransformation.h"
+#include "hoImageRegRigid2DTransformation.h"
+#include "hoImageRegRigid3DTransformation.h"
+
+// warper
+#include "hoImageRegWarper.h"
+
+// solver
+#include "hoImageRegDeformationFieldSolver.h"
+#include "hoImageRegParametricSolver.h"
+#include "hoImageRegDeformationFieldBidirectionalSolver.h"
+#include "hoImageRegParametricDownHillSolver.h"
+#include "hoImageRegParametricGradientDescentSolver.h"
+
+// dissimilarity
+#include "hoImageRegDissimilaritySSD.h"
+#include "hoImageRegDissimilarityLocalCCR.h"
+#include "hoImageRegDissimilarityMutualInformation.h"
+#include "hoImageRegDissimilarityNormalizedMutualInformation.h"
+#include "GtPrepUtil.h"
+
+namespace Gadgetron
+{
+    /// perform the image registration using pyramid scheme
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    class hoImageRegRegister
+    {
+    public:
+
+        typedef hoImageRegRegister<ValueType, CoordType, DIn, DOut> Self;
+
+        typedef hoNDImage<ValueType, DOut> TargetType;
+        typedef hoNDImage<ValueType, DIn> SourceType;
+
+        typedef hoNDImage<ValueType, 2> Target2DType;
+        typedef Target2DType Source2DType;
+
+        typedef hoNDImage<ValueType, 3> Target3DType;
+        typedef Target2DType Source3DType;
+
+        typedef ValueType T;
+        typedef ValueType element_type;
+        typedef ValueType value_type;
+
+        typedef CoordType coord_type;
+
+        /// boundary handler and interpolator for target image
+        typedef hoNDBoundaryHandler<TargetType> BoundaryHandlerTargetType;
+        typedef hoNDBoundaryHandlerFixedValue<TargetType> BoundaryHandlerTargetFixedValueType;
+        typedef hoNDBoundaryHandlerBorderValue<TargetType> BoundaryHandlerTargetBorderValueType;
+        typedef hoNDBoundaryHandlerPeriodic<TargetType> BoundaryHandlerTargetPeriodicType;
+        typedef hoNDBoundaryHandlerMirror<TargetType> BoundaryHandlerTargetMirrorType;
+
+        typedef hoNDInterpolator<TargetType> InterpTargetType;
+        typedef hoNDInterpolatorLinear<TargetType> InterpTargetLinearType;
+        typedef hoNDInterpolatorNearestNeighbor<TargetType> InterpTargetNearestNeighborType;
+        typedef hoNDInterpolatorBSpline<TargetType, DIn> InterpTargetBSplineType;
+
+        /// boundary handler and interpolator for source image
+        typedef hoNDBoundaryHandler<SourceType> BoundaryHandlerSourceType;
+        typedef hoNDBoundaryHandlerFixedValue<SourceType> BoundaryHandlerSourceFixedValueType;
+        typedef hoNDBoundaryHandlerBorderValue<SourceType> BoundaryHandlerSourceBorderValueType;
+        typedef hoNDBoundaryHandlerPeriodic<SourceType> BoundaryHandlerSourcePeriodicType;
+        typedef hoNDBoundaryHandlerMirror<SourceType> BoundaryHandlerSourceMirrorType;
+
+        typedef hoNDInterpolator<SourceType> InterpSourceType;
+        typedef hoNDInterpolatorLinear<SourceType> InterpSourceLinearType;
+        typedef hoNDInterpolatorNearestNeighbor<SourceType> InterpSourceNearestNeighborType;
+        typedef hoNDInterpolatorBSpline<SourceType, DIn> InterpSourceBSplineType;
+
+        /// warper type
+        typedef hoImageRegWarper<ValueType, CoordType, DIn, DOut> WarperType;
+
+        /// image dissimilarity type
+        typedef hoImageRegDissimilarity<ValueType, DOut> DissimilarityType;
+
+        hoImageRegRegister(unsigned int resolution_pyramid_levels=3, ValueType bg_value=ValueType(0));
+        virtual ~hoImageRegRegister();
+
+        /// initialize the registration
+        /// should be called after all images and parameters of registration are set
+        virtual bool initialize();
+
+        /// set target and source, create the multi-resolution pyramid and set up the interpolators
+        virtual void setTarget(TargetType& target);
+        virtual void setSource(SourceType& source);
+
+        /// create dissimilarity measures
+        DissimilarityType* createDissimilarity(GT_IMAGE_DISSIMILARITY v, unsigned int level);
+
+        /// perform the registration
+        virtual bool performRegistration() = 0;
+
+        /// print the class information
+        virtual void printContent(std::ostream& os) const;
+        virtual void print(std::ostream& os) const;
+
+        /// parameters
+
+        /// whether to perform world coordinate registration
+        bool use_world_coordinates_;
+
+        /// number of resolution pyramid levels
+        unsigned int resolution_pyramid_levels_;
+
+        /// use fast pyramid creation by dividing the image size by 2
+        /// if the use_world_coordinates_ == true and resolution_pyramid_divided_by_2_ == true, , resolution_pyramid_downsample_ratio_
+        /// and resolution_pyramid_blurring_sigma_ will be ignored
+        bool resolution_pyramid_divided_by_2_;
+
+        /// downsample ratio of the resolution pyramid for every dimension and every level
+        /// e.g. ratio=2, downsample by 100%
+        std::vector< std::vector<float> > resolution_pyramid_downsample_ratio_;
+
+        /// extra gaussian blurring can be applied on every resolution pyramid
+        /// if use_world_coordinates_=true, sigma is in the unit of world coordinate
+        /// otherwise, it is in the unit of image pixel
+        std::vector< std::vector<float> > resolution_pyramid_blurring_sigma_;
+
+        /// boundary handler and interpolator type for warper, for every resolution level, different interpolator can be used
+        std::vector<GT_BOUNDARY_CONDITION> boundary_handler_type_warper_;
+        std::vector<GT_IMAGE_INTERPOLATOR> interp_type_warper_;
+
+        /// boundary handler and interpolator type for pyramid construction
+        GT_BOUNDARY_CONDITION boundary_handler_type_pyramid_construction_;
+        GT_IMAGE_INTERPOLATOR interp_type_pyramid_construction_;
+
+        /// image dissimilarity
+        /// for different pyramid level, different dissimilarity can be used
+        std::vector<GT_IMAGE_DISSIMILARITY> dissimilarity_type_;
+
+        /// solver for every pyramid level
+        std::vector<GT_IMAGE_REG_SOLVER> solver_type_;
+
+        ///// whether to set the origin of target/source to image center
+        //bool orgin_at_image_center_;
+
+        /// parameters for dissimilarity measures, for every paramid level
+        /// LocalCCR
+        std::vector<std::vector<ValueType> > dissimilarity_LocalCCR_sigmaArg_;
+
+        /// Histogram based
+        std::vector<unsigned int> dissimilarity_hist_num_bin_target_;
+        std::vector<unsigned int> dissimilarity_hist_num_bin_warpped_;
+        bool dissimilarity_hist_pv_interpolation_;
+        std::vector<size_t> dissimilarity_hist_step_size_ignore_pixel_;
+
+        /// Mutual information
+        std::vector<ValueType> dissimilarity_MI_betaArg_;
+
+        // ----------------------------------
+        // debug and timing
+        // ----------------------------------
+        // clock for timing
+        Gadgetron::GadgetronTimer gt_timer1_;
+        Gadgetron::GadgetronTimer gt_timer2_;
+        Gadgetron::GadgetronTimer gt_timer3_;
+
+        bool performTiming_;
+
+        // exporter
+        Gadgetron::gtPlus::gtPlusIOAnalyze gt_exporter_;
+
+        // debug folder
+        std::string debugFolder_;
+
+    protected:
+
+        TargetType* target_;
+        SourceType* source_;
+
+        /// back ground values, used to mark regions in the target image which will not be warped
+        ValueType bg_value_;
+
+        /// store the multi-resolution images for every pyramid level
+        std::vector<TargetType> target_pyramid_;
+        std::vector<TargetType> source_pyramid_;
+
+        /// store the boundary handler and interpolator for warpers
+        std::vector<BoundaryHandlerTargetType*> target_bh_warper_;
+        std::vector<InterpTargetType*> target_interp_warper_;
+
+        std::vector<BoundaryHandlerSourceType*> source_bh_warper_;
+        std::vector<InterpSourceType*> source_interp_warper_;
+
+        /// store the boundary handler and interpolator for pyramid construction
+        BoundaryHandlerTargetType* target_bh_pyramid_construction_;
+        InterpTargetType* target_interp_pyramid_construction_;
+
+        BoundaryHandlerSourceType* source_bh_pyramid_construction_;
+        InterpSourceType* source_interp_pyramid_construction_;
+
+        /// store warpers for ever pyramid level
+        std::vector<WarperType> warper_pyramid_;
+
+        /// store the image dissimilarity for every pyramid level
+        std::vector<DissimilarityType*> dissimilarity_pyramid_;
+
+        /// store warpers for ever pyramid level
+        std::vector<WarperType> warper_pyramid_inverse_;
+
+        /// store the image dissimilarity for every pyramid level
+        std::vector<DissimilarityType*> dissimilarity_pyramid_inverse_;
+    };
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    hoImageRegRegister<ValueType, CoordType, DIn, DOut>::
+    hoImageRegRegister(unsigned int resolution_pyramid_levels, ValueType bg_value) 
+    : target_(NULL), source_(NULL), bg_value_(bg_value), performTiming_(false)
+    {
+        gt_timer1_.set_timing_in_destruction(false);
+        gt_timer2_.set_timing_in_destruction(false);
+        gt_timer3_.set_timing_in_destruction(false);
+
+        use_world_coordinates_ = true;
+
+        resolution_pyramid_levels_ = resolution_pyramid_levels;
+
+        resolution_pyramid_divided_by_2_ = true;
+
+        resolution_pyramid_downsample_ratio_.resize(resolution_pyramid_levels_-1, std::vector<float>(DIn, 2.0f) );
+
+        resolution_pyramid_blurring_sigma_.resize(resolution_pyramid_levels_, std::vector<float>(DIn, 0.0f));
+
+        boundary_handler_type_warper_.resize(resolution_pyramid_levels_, GT_BOUNDARY_CONDITION_FIXEDVALUE);
+        interp_type_warper_.resize(resolution_pyramid_levels_, GT_IMAGE_INTERPOLATOR_LINEAR);
+
+        boundary_handler_type_pyramid_construction_ = GT_BOUNDARY_CONDITION_BORDERVALUE;
+        interp_type_pyramid_construction_ = GT_IMAGE_INTERPOLATOR_LINEAR;
+
+        dissimilarity_type_.resize(resolution_pyramid_levels_, GT_IMAGE_DISSIMILARITY_NMI);
+
+        solver_type_.resize(resolution_pyramid_levels_, GT_IMAGE_REG_SOLVER_DOWNHILL);
+
+        target_bh_warper_.resize(resolution_pyramid_levels_, NULL);
+        target_interp_warper_.resize(resolution_pyramid_levels_, NULL);
+
+        source_bh_warper_.resize(resolution_pyramid_levels_, NULL);
+        source_interp_warper_.resize(resolution_pyramid_levels_, NULL);
+
+        target_bh_pyramid_construction_ = NULL;
+        target_interp_pyramid_construction_ = NULL;
+
+        source_bh_pyramid_construction_ = NULL;
+        source_interp_pyramid_construction_ = NULL;
+
+        dissimilarity_pyramid_.resize(resolution_pyramid_levels_, NULL);
+        dissimilarity_pyramid_inverse_.resize(resolution_pyramid_levels_, NULL);
+
+        dissimilarity_LocalCCR_sigmaArg_.resize(resolution_pyramid_levels_, std::vector<ValueType>(DOut, 2.0) );
+
+        dissimilarity_hist_num_bin_target_.resize(resolution_pyramid_levels_, 64);
+        dissimilarity_hist_num_bin_warpped_.resize(resolution_pyramid_levels_, 64);
+        dissimilarity_hist_pv_interpolation_ = false;
+        dissimilarity_hist_step_size_ignore_pixel_.resize(resolution_pyramid_levels_, 1);
+
+        dissimilarity_MI_betaArg_.resize(resolution_pyramid_levels_, 2.0);
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    hoImageRegRegister<ValueType, CoordType, DIn, DOut>::~hoImageRegRegister()
+    {
+        unsigned int ii;
+        for ( ii=0; ii<resolution_pyramid_levels_; ii++ )
+        {
+            delete target_bh_warper_[ii];
+            delete target_interp_warper_[ii];
+
+            delete source_bh_warper_[ii];
+            delete source_interp_warper_[ii];
+
+            delete dissimilarity_pyramid_[ii];
+            delete dissimilarity_pyramid_inverse_[ii];
+        }
+
+        delete target_bh_pyramid_construction_;
+        delete target_interp_pyramid_construction_;
+
+        delete source_bh_pyramid_construction_;
+        delete source_interp_pyramid_construction_;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    hoImageRegDissimilarity<ValueType, DOut>* hoImageRegRegister<ValueType, CoordType, DIn, DOut>::createDissimilarity(GT_IMAGE_DISSIMILARITY v, unsigned int level)
+    {
+        hoImageRegDissimilarity<ValueType, DOut>* res = NULL;
+
+        unsigned int ii;
+
+        switch (v)
+        {
+            case GT_IMAGE_DISSIMILARITY_SSD:
+                res = new hoImageRegDissimilaritySSD<ValueType, DOut>();
+                break;
+
+            case GT_IMAGE_DISSIMILARITY_LocalCCR:
+            {
+                hoImageRegDissimilarityLocalCCR<ValueType, DOut>* ptr = new hoImageRegDissimilarityLocalCCR<ValueType, DOut>();
+                for ( ii=0; ii<DOut; ii++ )
+                {
+                    ptr->sigmaArg_[ii] = dissimilarity_LocalCCR_sigmaArg_[level][ii];
+                }
+
+                res = ptr;
+            }
+                break;
+
+            case GT_IMAGE_DISSIMILARITY_MI:
+            {
+                hoImageRegDissimilarityMutualInformation<ValueType, DOut>* ptr = new hoImageRegDissimilarityMutualInformation<ValueType, DOut>();
+
+                ptr->betaArg_[0] = dissimilarity_MI_betaArg_[level];
+                ptr->betaArg_[1] = dissimilarity_MI_betaArg_[level];
+                ptr->num_bin_target_ = dissimilarity_hist_num_bin_target_[level];
+                ptr->num_bin_warpped_ = dissimilarity_hist_num_bin_warpped_[level];
+                ptr->pv_interpolation_ = dissimilarity_hist_pv_interpolation_;
+                ptr->step_size_ignore_pixel_ = dissimilarity_hist_step_size_ignore_pixel_[level];
+
+                res = ptr;
+            }
+                break;
+
+            case GT_IMAGE_DISSIMILARITY_NMI:
+            {
+                hoImageRegDissimilarityNormalizedMutualInformation<ValueType, DOut>* ptr = new hoImageRegDissimilarityNormalizedMutualInformation<ValueType, DOut>();
+
+                ptr->num_bin_target_ = dissimilarity_hist_num_bin_target_[level];
+                ptr->num_bin_warpped_ = dissimilarity_hist_num_bin_warpped_[level];
+                ptr->pv_interpolation_ = dissimilarity_hist_pv_interpolation_;
+                ptr->step_size_ignore_pixel_ = dissimilarity_hist_step_size_ignore_pixel_[level];
+
+                res = ptr;
+            }
+                break;
+
+            default:
+                GADGET_ERROR_MSG("Unrecognized image dissimilarity type : " << v);
+        }
+
+        res->setBackgroundValue(bg_value_);
+
+        return res;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    bool hoImageRegRegister<ValueType, CoordType, DIn, DOut>::initialize()
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(target_!=NULL);
+            GADGET_CHECK_RETURN_FALSE(source_!=NULL);
+
+            GADGET_CHECK_RETURN_FALSE(resolution_pyramid_downsample_ratio_.size()==resolution_pyramid_levels_-1);
+            GADGET_CHECK_RETURN_FALSE(resolution_pyramid_blurring_sigma_.size()==resolution_pyramid_levels_);
+
+            GADGET_CHECK_RETURN_FALSE(boundary_handler_type_warper_.size()==resolution_pyramid_levels_);
+            GADGET_CHECK_RETURN_FALSE(interp_type_warper_.size()==resolution_pyramid_levels_);
+
+            GADGET_CHECK_RETURN_FALSE(dissimilarity_type_.size()==resolution_pyramid_levels_);
+            GADGET_CHECK_RETURN_FALSE(solver_type_.size()==resolution_pyramid_levels_);
+
+            target_pyramid_.resize(resolution_pyramid_levels_);
+            source_pyramid_.resize(resolution_pyramid_levels_);
+
+            target_pyramid_[0] = *target_;
+            source_pyramid_[0] = *source_;
+
+            target_bh_pyramid_construction_ = createBoundaryHandler<TargetType>(boundary_handler_type_pyramid_construction_);
+            target_interp_pyramid_construction_ = createInterpolator<TargetType, DOut>(interp_type_pyramid_construction_);
+            target_interp_pyramid_construction_->setBoundaryHandler(*target_bh_pyramid_construction_);
+
+            source_bh_pyramid_construction_ = createBoundaryHandler<SourceType>(boundary_handler_type_pyramid_construction_);
+            source_interp_pyramid_construction_ = createInterpolator<SourceType, DIn>(interp_type_pyramid_construction_);
+            source_interp_pyramid_construction_->setBoundaryHandler(*source_bh_pyramid_construction_);
+
+            /// allocate all objects
+            unsigned int ii, jj;
+            for ( ii=0; ii<resolution_pyramid_levels_-1; ii++ )
+            {
+                // create pyramid
+                target_bh_pyramid_construction_->setArray(target_pyramid_[ii]);
+                target_interp_pyramid_construction_->setArray(target_pyramid_[ii]);
+
+                if ( use_world_coordinates_ )
+                {
+                    if ( resolution_pyramid_divided_by_2_ )
+                    {
+                        Gadgetron::downsampleImageBy2WithAveraging(target_pyramid_[ii], *target_bh_pyramid_construction_, target_pyramid_[ii+1]);
+                    }
+                    else
+                    {
+                        std::vector<float> ratio = resolution_pyramid_downsample_ratio_[ii];
+                        Gadgetron::downsampleImage(target_pyramid_[ii], *target_interp_pyramid_construction_, target_pyramid_[ii+1], &ratio[0]);
+
+                        std::vector<float> sigma = resolution_pyramid_blurring_sigma_[ii+1];
+                        for ( jj=0; jj<DOut; jj++ )
+                        {
+                            sigma[jj] /= target_pyramid_[ii+1].get_pixel_size(jj); // world to pixel
+                        }
+
+                        Gadgetron::filterGaussian(target_pyramid_[ii+1], &sigma[0]);
+                    }
+                }
+                else
+                {
+                    std::vector<float> ratio = resolution_pyramid_downsample_ratio_[ii];
+
+                    bool downsampledBy2 = true;
+                    for ( jj=0; jj<DOut; jj++ )
+                    {
+                        if ( GT_ABS(ratio[jj]-2.0f) > FLT_EPSILON )
+                        {
+                            downsampledBy2 = false;
+                            break;
+                        }
+                    }
+
+                    if ( downsampledBy2 )
+                    {
+                        Gadgetron::downsampleImageBy2WithAveraging(target_pyramid_[ii], *target_bh_pyramid_construction_, target_pyramid_[ii+1]);
+                        // Gadgetron::downsampleImage(target_pyramid_[ii], *target_interp_pyramid_construction_, target_pyramid_[ii+1], &ratio[0]);
+                    }
+                    else
+                    {
+                        Gadgetron::downsampleImage(target_pyramid_[ii], *target_interp_pyramid_construction_, target_pyramid_[ii+1], &ratio[0]);
+                        std::vector<float> sigma = resolution_pyramid_blurring_sigma_[ii+1];
+                        Gadgetron::filterGaussian(target_pyramid_[ii+1], &sigma[0]);
+                    }
+                }
+
+                // source
+
+                source_bh_pyramid_construction_->setArray(source_pyramid_[ii]);
+                source_interp_pyramid_construction_->setArray(source_pyramid_[ii]);
+
+                if ( use_world_coordinates_ )
+                {
+                    if ( resolution_pyramid_divided_by_2_ )
+                    {
+                        Gadgetron::downsampleImageBy2WithAveraging(source_pyramid_[ii], *source_bh_pyramid_construction_, source_pyramid_[ii+1]);
+                    }
+                    else
+                    {
+                        std::vector<float> ratio = resolution_pyramid_downsample_ratio_[ii];
+                        Gadgetron::downsampleImage(source_pyramid_[ii], *source_interp_pyramid_construction_, source_pyramid_[ii+1], &ratio[0]);
+
+                        std::vector<float> sigma = resolution_pyramid_blurring_sigma_[ii+1];
+                        for ( jj=0; jj<DOut; jj++ )
+                        {
+                            sigma[jj] /= source_pyramid_[ii+1].get_pixel_size(jj); // world to pixel
+                        }
+
+                        Gadgetron::filterGaussian(source_pyramid_[ii+1], &sigma[0]);
+                    }
+                }
+                else
+                {
+                    std::vector<float> ratio = resolution_pyramid_downsample_ratio_[ii];
+
+                    bool downsampledBy2 = true;
+                    for ( jj=0; jj<DOut; jj++ )
+                    {
+                        if ( GT_ABS(ratio[jj]-2.0f) > FLT_EPSILON )
+                        {
+                            downsampledBy2 = false;
+                            break;
+                        }
+                    }
+
+                    if ( downsampledBy2 )
+                    {
+                        Gadgetron::downsampleImageBy2WithAveraging(source_pyramid_[ii], *source_bh_pyramid_construction_, source_pyramid_[ii+1]);
+                        //Gadgetron::downsampleImage(source_pyramid_[ii], *source_interp_pyramid_construction_, source_pyramid_[ii+1], &ratio[0]);
+                    }
+                    else
+                    {
+                        Gadgetron::downsampleImage(source_pyramid_[ii], *source_interp_pyramid_construction_, source_pyramid_[ii+1], &ratio[0]);
+                        std::vector<float> sigma = resolution_pyramid_blurring_sigma_[ii+1];
+                        Gadgetron::filterGaussian(source_pyramid_[ii+1], &sigma[0]);
+                    }
+                }
+            }
+
+            for ( ii=0; ii<resolution_pyramid_levels_; ii++ )
+            {
+                target_bh_warper_[ii] = createBoundaryHandler<TargetType>(boundary_handler_type_warper_[ii]);
+                target_bh_warper_[ii]->setArray(target_pyramid_[ii]);
+
+                target_interp_warper_[ii] = createInterpolator<TargetType, DOut>(interp_type_warper_[ii]);
+                target_interp_warper_[ii]->setArray(target_pyramid_[ii]);
+                target_interp_warper_[ii]->setBoundaryHandler(*target_bh_warper_[ii]);
+
+                source_bh_warper_[ii] = createBoundaryHandler<SourceType>(boundary_handler_type_warper_[ii]);
+                source_bh_warper_[ii]->setArray(source_pyramid_[ii]);
+
+                source_interp_warper_[ii] = createInterpolator<SourceType, DIn>(interp_type_warper_[ii]);
+                source_interp_warper_[ii]->setArray(source_pyramid_[ii]);
+                source_interp_warper_[ii]->setBoundaryHandler(*source_bh_warper_[ii]);
+
+                dissimilarity_pyramid_[ii] = createDissimilarity(dissimilarity_type_[ii], ii);
+                dissimilarity_pyramid_[ii]->initialize(target_pyramid_[ii]);
+                dissimilarity_pyramid_[ii]->debugFolder_ = this->debugFolder_;
+
+                dissimilarity_pyramid_inverse_[ii] = createDissimilarity(dissimilarity_type_[ii], ii);
+                dissimilarity_pyramid_inverse_[ii]->initialize(source_pyramid_[ii]);
+                dissimilarity_pyramid_inverse_[ii]->debugFolder_ = this->debugFolder_;
+            }
+
+            if ( !debugFolder_.empty() )
+            {
+                for ( ii=0; ii<resolution_pyramid_levels_; ii++ )
+                {
+                    std::ostringstream ostr_t;
+                    ostr_t << "target_" << ii;
+
+                    GADGET_EXPORT_IMAGE(debugFolder_, gt_exporter_, target_pyramid_[ii], ostr_t.str());
+
+                    std::ostringstream ostr_s;
+                    ostr_s << "source_" << ii;
+
+                    GADGET_EXPORT_IMAGE(debugFolder_, gt_exporter_, source_pyramid_[ii], ostr_s.str());
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegRegister<ValueType, CoordType, DIn, DOut>::initialize() ... ");
+        }
+
+        return true;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    inline void hoImageRegRegister<ValueType, CoordType, DIn, DOut>::setTarget(TargetType& target)
+    {
+        target_ = ⌖
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    inline void hoImageRegRegister<ValueType, CoordType, DIn, DOut>::setSource(SourceType& source)
+    {
+        source_ = &source;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    void hoImageRegRegister<ValueType, CoordType, DIn, DOut>::printContent(std::ostream& os) const
+    {
+        using namespace std;
+        os << "Input dimension is : " << DIn << endl;
+        os << "Output dimension is : " << DOut << endl;
+
+        std::string elemTypeName = std::string(typeid(ValueType).name());
+        os << "Image data type is : " << elemTypeName << std::endl;
+
+        elemTypeName = std::string(typeid(CoordType).name());
+        os << "Transformation coordinate data type is : " << elemTypeName << std::endl;
+
+        os << "Whether to perform world coordinate registration is : " << use_world_coordinates_ << std::endl;
+        os << "Number of resolution pyramid levels is : " << resolution_pyramid_levels_ << std::endl;
+
+        os << "------------" << std::endl;
+        os << "Downsample ratio of the resolution pyramid for every dimension and every level is : " << std::endl;
+
+        unsigned int ii, jj;
+        for ( ii=0; ii<resolution_pyramid_levels_-1; ii++ )
+        {
+            os << "Level " << ii << " [ ";
+            for ( jj=0; jj<resolution_pyramid_downsample_ratio_[ii].size(); jj++ )
+            {
+                os << resolution_pyramid_downsample_ratio_[ii][jj] << " ";
+            }
+            os << " ] " << std::endl;
+        }
+
+        os << "------------" << std::endl;
+        os << "Gaussian blurring sigma for every dimension and every level is : " << std::endl;
+        for ( ii=0; ii<resolution_pyramid_levels_; ii++ )
+        {
+            os << "Level " << ii << " [ ";
+            for ( jj=0; jj<resolution_pyramid_blurring_sigma_[ii].size(); jj++ )
+            {
+                os << resolution_pyramid_blurring_sigma_[ii][jj] << " ";
+            }
+            os << " ] " << std::endl;
+        }
+
+        os << "------------" << std::endl;
+        os << "Boundary handler and interpolator type for warper is : " << std::endl;
+        for ( ii=0; ii<resolution_pyramid_levels_; ii++ )
+        {
+            os << " Level " << ii << " - " 
+                << getBoundaryHandlerName(boundary_handler_type_warper_[ii]) 
+                << " - " << getInterpolatorName(interp_type_warper_[ii]) << std::endl;
+        }
+
+        os << "------------" << std::endl;
+        os << "Boundary handler and interpolator type for pyramid construction is : " << std::endl;
+        for ( ii=0; ii<resolution_pyramid_levels_; ii++ )
+        {
+            os << " Level " << ii << " - " 
+                << getBoundaryHandlerName(boundary_handler_type_pyramid_construction_) 
+                << " - " << getInterpolatorName(interp_type_pyramid_construction_) << std::endl;
+        }
+
+        os << "------------" << std::endl;
+        os << "Image dissimilarity is : " << std::endl;
+        for ( ii=0; ii<resolution_pyramid_levels_; ii++ )
+        {
+            os << " Level " << ii << " - " 
+                << getDissimilarityName(dissimilarity_type_[ii]) << std::endl;
+        }
+
+        os << "------------" << std::endl;
+        os << "Image registration solver is : " << std::endl;
+        for ( ii=0; ii<resolution_pyramid_levels_; ii++ )
+        {
+            os << " Level " << ii << " - " 
+                << getImageRegSolverName(solver_type_[ii]) << std::endl;
+        }
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    void hoImageRegRegister<ValueType, CoordType, DIn, DOut>::print(std::ostream& os) const
+    {
+        using namespace std;
+        os << "--------------Gagdgetron image register -------------" << endl;
+        this->printContent(os);
+        os << "-----------------------------------------------------" << std::endl;
+    }
+}
diff --git a/toolboxes/registration/optical_flow/cpu/solver/hoImageRegDeformationFieldBidirectionalSolver.h b/toolboxes/registration/optical_flow/cpu/solver/hoImageRegDeformationFieldBidirectionalSolver.h
new file mode 100644
index 0000000..e926d21
--- /dev/null
+++ b/toolboxes/registration/optical_flow/cpu/solver/hoImageRegDeformationFieldBidirectionalSolver.h
@@ -0,0 +1,602 @@
+/** \file   hoImageRegDeformationFieldBidirectionalSolver.h
+    \brief  Implement the PDE solver for bidirecitonal deformation field non-linear image registration
+
+            The PDE solver is a classical gradient descent method, derived from the calculus of variation:
+
+            [1] Gerardo Hermosillo, Christophe Chefd'Hotel, Olivier Faugeras. Variational Methods for Multimodal Image Matching. 
+            International Journal of Computer Vision. December 2002, Volume 50, Issue 3, pp 329-343.
+            http://link.springer.com/article/10.1023%2FA%3A1020830525823
+
+            [2] Gerardo Hermosillo. Variational Methods for Multimodal Image Matching. PhD Thesis, UNIVERSIT�E DE NICE - SOPHIA ANTIPOLIS. May 2002.
+            http://webdocs.cs.ualberta.ca/~dana/readingMedIm/papers/hermosilloPhD.pdf
+
+            [3] Christophe Chefd'Hotel, Gerardo Hermosillo, Olivier D. Faugeras: Flows of diffeomorphisms for multimodal image registration. ISBI 2002: 753-756.
+            http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=1029367&tag=1
+
+            [4] Christophe Chefd'Hotel, Geometric Methods in Computer Vision and Image Processing : Contributions and Applications. PhD Thesis, April 2005.
+
+            The code is based on the listed source code at page 185 - 187 in ref [2] and extended according to the ref [3] and [4].
+
+            [5] Christoph Guetter, Hui Xue, Christophe Chefd'Hotel, Jens Guehring: Efficient symmetric and inverse-consistent deformable registration through interleaved optimization. ISBI 2011: 590-593.
+
+    \author Hui Xue
+*/
+
+#pragma once
+
+#include "hoImageRegDeformationFieldSolver.h"
+
+#ifdef max
+#undef max
+#endif // max
+
+#ifdef min
+#undef min
+#endif // min
+
+namespace Gadgetron
+{
+    /// ValueType: image pixel value type
+    /// CoordType: transformation data type
+    template<typename ValueType, typename CoordType, unsigned int D> 
+    class hoImageRegDeformationFieldBidirectionalSolver : public hoImageRegDeformationFieldSolver<ValueType, CoordType, D>
+    {
+    public:
+
+        typedef hoImageRegDeformationFieldBidirectionalSolver<ValueType, CoordType, D> Self;
+        typedef hoImageRegDeformationFieldSolver<ValueType, CoordType, D> BaseClass;
+
+        typedef hoNDImage<ValueType, D> TargetType;
+        typedef hoNDImage<ValueType, D> SourceType;
+
+        typedef hoNDImage<ValueType, 2> Target2DType;
+        typedef Target2DType Source2DType;
+
+        typedef hoNDImage<ValueType, 3> Target3DType;
+        typedef Target2DType Source3DType;
+
+        typedef ValueType T;
+        typedef ValueType element_type;
+        typedef ValueType value_type;
+
+        typedef CoordType coord_type;
+
+        typedef typename BaseClass::InterpolatorType InterpolatorType;
+
+        typedef hoImageRegDeformationField<CoordType, D> TransformationType;
+        typedef typename TransformationType::input_point_type input_point_type;
+        typedef typename TransformationType::output_point_type output_point_type;
+        typedef typename TransformationType::jacobian_position_type jacobian_position_type;
+        typedef typename TransformationType::DeformationFieldType DeformationFieldType;
+
+        typedef typename BaseClass::ImageRegWarperType ImageRegWarperType;
+
+        typedef typename BaseClass::ImageRegDissimilarityType ImageRegDissimilarityType;
+
+        hoImageRegDeformationFieldBidirectionalSolver();
+        virtual ~hoImageRegDeformationFieldBidirectionalSolver();
+
+        void setTransform(TransformationType& transform) { transform_ = &transform; }
+        void setTransformInverse(TransformationType& transform) { transform_inverse_ = &transform; }
+
+        virtual bool initialize();
+
+        virtual bool solve();
+
+        virtual void print(std::ostream& os) const;
+
+        void setDissimilarityInverse(ImageRegDissimilarityType& dissimilarity) { dissimilarity_inverse_ = &dissimilarity; }
+        void setWarperInverse(ImageRegWarperType& warper) { warper_inverse_ = &warper; }
+        void setInterpolatorInverse(InterpolatorType& interp) { interp_inverse_ = &interp; }
+
+        virtual bool enforceInverseTransform(TransformationType* transform, TransformationType* transform_inverse, DeformationFieldType* deform_delta_inverse, unsigned int iter_num=10);
+
+        /// number of iterations to improve the estimation of the inverse transform
+        unsigned int inverse_deform_enforce_iter_;
+        /// weight to update the estimation of the inverse transform, must be within [0 1]
+        CoordType inverse_deform_enforce_weight_;
+
+        using BaseClass::regularization_hilbert_strength_;
+        using BaseClass::apply_in_FOV_constraint_;
+        using BaseClass::iter_num_;
+        using BaseClass::max_iter_num_;
+        using BaseClass::dissimilarity_thres_;
+        using BaseClass::parameter_thres_;
+        using BaseClass::div_num_;
+        using BaseClass::step_size_para_;
+        using BaseClass::step_size_div_para_;
+        using BaseClass::verbose_;
+        using BaseClass::gt_timer1_;
+        using BaseClass::gt_timer2_;
+        using BaseClass::gt_timer3_;
+        using BaseClass::performTiming_;
+        using BaseClass::gt_exporter_;
+        using BaseClass::debugFolder_;
+
+    protected:
+
+        using BaseClass::transform_;
+
+        using BaseClass::curr_dissimilarity_;
+        using BaseClass::prev_dissimilarity_;
+
+        using BaseClass::deform_delta_;
+        using BaseClass::deform_updated_;
+        using BaseClass::deform_norm_;
+        using BaseClass::deform_norm_one_dim_;
+        using BaseClass::gradient_warpped_;
+
+        using BaseClass::target_;
+        using BaseClass::source_;
+        using BaseClass::warpped_;
+        using BaseClass::bg_value_;
+        using BaseClass::interp_;
+        using BaseClass::warper_;
+        using BaseClass::dissimilarity_;
+        using BaseClass::use_world_coordinate_;
+        using BaseClass::deform_delta_scale_factor_;
+
+        /// for the inverse transformation
+
+        SourceType warpped_inverse_;
+
+        InterpolatorType* interp_inverse_;
+
+        ImageRegWarperType* warper_inverse_;
+
+        ImageRegDissimilarityType* dissimilarity_inverse_;
+
+        TransformationType* transform_inverse_;
+
+        ValueType curr_dissimilarity_inverse_;
+        ValueType prev_dissimilarity_inverse_;
+
+        DeformationFieldType deform_delta_inverse_[D];
+        DeformationFieldType deform_updated_inverse_[D];
+
+        DeformationFieldType deform_norm_inverse_;
+        DeformationFieldType deform_norm_one_dim_inverse_;
+
+        TargetType gradient_warpped_inverse_[D];
+
+        coord_type deform_delta_scale_factor_inverse_[D];
+    };
+
+    template<typename ValueType, typename CoordType, unsigned int D> 
+    hoImageRegDeformationFieldBidirectionalSolver<ValueType, CoordType, D>::
+    hoImageRegDeformationFieldBidirectionalSolver() : BaseClass(), inverse_deform_enforce_iter_(10), inverse_deform_enforce_weight_(0.5)
+    {
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int D> 
+    hoImageRegDeformationFieldBidirectionalSolver<ValueType, CoordType, D>::~hoImageRegDeformationFieldBidirectionalSolver()
+    {
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int D> 
+    bool hoImageRegDeformationFieldBidirectionalSolver<ValueType, CoordType, D>::initialize()
+    {
+        GADGET_CHECK_RETURN_FALSE(interp_inverse_!=NULL);
+        GADGET_CHECK_RETURN_FALSE(warper_inverse_!=NULL);
+        GADGET_CHECK_RETURN_FALSE(dissimilarity_inverse_!=NULL);
+        GADGET_CHECK_RETURN_FALSE(transform_inverse_!=NULL);
+
+        GADGET_CHECK_RETURN_FALSE(BaseClass::initialize());
+
+        warper_inverse_->setInterpolator(*interp_inverse_);
+        warper_inverse_->setBackgroundValue(bg_value_);
+
+        dissimilarity_inverse_->setBackgroundValue(bg_value_);
+
+        if ( !warpped_inverse_.dimensions_equal(*source_) )
+        {
+            warpped_inverse_ = *source_;
+        }
+
+        dissimilarity_inverse_->initialize(*source_);
+
+        warper_inverse_->setTransformation(*transform_inverse_);
+
+        std::vector<size_t> dim;
+        source_->get_dimensions(dim);
+
+        deform_norm_inverse_.copyImageInfo(*source_);
+        deform_norm_one_dim_inverse_.copyImageInfo(*source_);
+
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            deform_delta_inverse_[ii].copyImageInfo(*source_);
+            Gadgetron::clear(deform_delta_[ii]);
+
+            deform_updated_inverse_[ii].copyImageInfo(*source_);
+            Gadgetron::clear(deform_updated_[ii]);
+
+            gradient_warpped_inverse_[ii].copyImageInfo(*source_);
+        }
+
+        deform_delta_scale_factor_inverse_[0] = 1;
+        for ( ii=0; ii<D; ii++ )
+        {
+            deform_delta_scale_factor_inverse_[ii] = source_->get_pixel_size(0)/source_->get_pixel_size(ii);
+        }
+
+        return true;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int D> 
+    bool hoImageRegDeformationFieldBidirectionalSolver<ValueType, CoordType, D>::solve()
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(this->initialize());
+
+            prev_dissimilarity_ = std::numeric_limits<ValueType>::max();
+            prev_dissimilarity_inverse_ = std::numeric_limits<ValueType>::max();
+
+            unsigned int divTimes = 0;
+
+            dissimilarity_->initialize(*target_);
+            dissimilarity_inverse_->initialize(*source_);
+
+            bool computeForwardTransform = true;
+            bool stopIteration = false;
+
+            for ( iter_num_=0; iter_num_<max_iter_num_; iter_num_++ )
+            {
+                if ( computeForwardTransform )
+                {
+                    GADGET_CHECK_RETURN_FALSE( this->solve_once(target_, source_, warpped_, iter_num_, max_iter_num_, 
+                                                                divTimes, curr_dissimilarity_, prev_dissimilarity_, 
+                                                                transform_, *warper_, *dissimilarity_,
+                                                                stopIteration, 
+                                                                gradient_warpped_, deform_delta_, 
+                                                                deform_updated_, deform_norm_, deform_norm_one_dim_,
+                                                                deform_delta_scale_factor_) );
+
+                    if ( stopIteration ) break;
+
+                    GADGET_CHECK_RETURN_FALSE( this->enforceInverseTransform(transform_, transform_inverse_, deform_delta_inverse_, 2) );
+                }
+                else
+                {
+                    GADGET_CHECK_RETURN_FALSE( this->solve_once(source_, target_, warpped_inverse_, iter_num_, max_iter_num_, 
+                                                                divTimes, curr_dissimilarity_inverse_, prev_dissimilarity_inverse_, 
+                                                                transform_inverse_, *warper_inverse_, *dissimilarity_inverse_,
+                                                                stopIteration, 
+                                                                gradient_warpped_inverse_, deform_delta_inverse_, 
+                                                                deform_updated_inverse_, deform_norm_inverse_, deform_norm_one_dim_inverse_,
+                                                                deform_delta_scale_factor_inverse_) );
+
+                    if ( stopIteration ) break;
+
+                    GADGET_CHECK_RETURN_FALSE( this->enforceInverseTransform(transform_inverse_, transform_, deform_delta_, 2) );
+                }
+
+                computeForwardTransform = !computeForwardTransform;
+            }
+
+            GADGET_CHECK_RETURN_FALSE( this->enforceInverseTransform(transform_inverse_, transform_, deform_delta_, inverse_deform_enforce_iter_) );
+
+            GADGET_CHECK_PERFORM(verbose_, GADGET_MSG("----> Total iteration number : " << iter_num_) );
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegDeformationFieldBidirectionalSolver<ValueType, CoordType, D>::solve() ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int D> 
+    bool hoImageRegDeformationFieldBidirectionalSolver<ValueType, CoordType, D>::
+    enforceInverseTransform(TransformationType* transform, TransformationType* transform_inverse, DeformationFieldType* deform_delta, unsigned int iter_num)
+    {
+        try
+        {
+
+            std::vector<size_t> dim, dim_inverse;
+
+            DeformationFieldType& deform = transform->getDeformationField(0);
+            deform.get_dimensions(dim);
+
+            DeformationFieldType& deform_inverse = transform_inverse->getDeformationField(0);
+            deform_inverse.get_dimensions(dim_inverse);
+
+            unsigned int iter_enforce;
+            for ( iter_enforce=0; iter_enforce<iter_num; iter_enforce++ )
+            {
+                if ( use_world_coordinate_ )
+                {
+                    if ( D == 2 )
+                    {
+                        long long sx = (long long)dim_inverse[0];
+                        long long sy = (long long)dim_inverse[1];
+
+                        long long y;
+                        #pragma omp parallel default(none) private(y) shared(sx, sy, transform, transform_inverse, deform_delta, deform, deform_inverse) if(sx*sy>64*1024) num_threads(2)
+                        {
+                            CoordType ix, iy, px, py, px_inverse, py_inverse, dx, dy, dx_inverse, dy_inverse;
+                            size_t offset;
+
+                            #pragma omp for 
+                            for ( y=0; y<(long long)sy; y++ )
+                            {
+                                for ( size_t x=0; x<sx; x++ )
+                                {
+                                    transform_inverse->get(x, (size_t)y, dx_inverse, dy_inverse);
+
+                                    deform_inverse.image_to_world(x, y, px_inverse, py_inverse);
+                                    px = px_inverse + dx_inverse;
+                                    py = py_inverse + dy_inverse;
+
+                                    deform.world_to_image(px, py, ix, iy);
+
+                                    transform->get(ix, iy, dx, dy);
+
+                                    offset = x + y*sx;
+
+                                    deform_delta[0](offset) = dx;
+                                    deform_delta[1](offset) = dy;
+                                }
+                            }
+                        }
+                    }
+                    else if ( D == 3 )
+                    {
+                        long long sx = (long long)dim_inverse[0];
+                        long long sy = (long long)dim_inverse[1];
+                        long long sz = (long long)dim_inverse[2];
+
+                        long long z;
+                        #pragma omp parallel default(none) private(z) shared(sx, sy, sz, transform, transform_inverse, deform_delta, deform, deform_inverse)
+                        {
+                            CoordType ix, iy, iz, px, py, pz, px_inverse, py_inverse, pz_inverse, dx, dy, dz, dx_inverse, dy_inverse, dz_inverse;
+
+                            #pragma omp for 
+                            for ( z=0; z<(long long)sz; z++ )
+                            {
+                                for ( size_t y=0; y<sy; y++ )
+                                {
+                                    size_t offset = z*sx*sy + y*sx;
+
+                                    for ( size_t x=0; x<sx; x++ )
+                                    {
+                                        transform_inverse->get(x, y, (size_t)z, dx_inverse, dy_inverse, dz_inverse);
+
+                                        deform_inverse.image_to_world(x, y, z, px_inverse, py_inverse, pz_inverse);
+                                        px = px_inverse + dx_inverse;
+                                        py = py_inverse + dy_inverse;
+                                        pz = pz_inverse + dz_inverse;
+
+                                        deform.world_to_image(px, py, pz, ix, iy, iz);
+
+                                        transform->get(ix, iy, iz, dx, dy, dz);
+
+                                        deform_delta[0](offset+x) = dx;
+                                        deform_delta[1](offset+x) = dy;
+                                        deform_delta[2](offset+x) = dz;
+                                    }
+                                }
+                            }
+                        }
+                    }
+                    else
+                    {
+                        size_t N = deform_inverse.get_number_of_elements();
+
+                        long long n;
+                        #pragma omp parallel default(none) private(n) shared(N, transform, transform_inverse, deform_delta, deform, deform_inverse)
+                        {
+                            size_t ind[D];
+                            CoordType wind[D], wind_inverse[D], d_inverse[D], pt[D], d[D];
+
+                            for ( n=0; n<(long long)N; n++ )
+                            {
+                                deform_inverse.calculate_index( (unsigned long long)(n), ind);
+                                deform_inverse.image_to_world(ind, wind_inverse);
+
+                                transform_inverse->get(ind, d_inverse);
+
+                                unsigned int ii;
+                                for ( ii=0; ii<D; ii++ ) pt[ii] = wind_inverse[ii] + d_inverse[ii];
+
+                                deform.world_to_image(pt, wind);
+
+                                transform->get(wind, d);
+                                for ( ii=0; ii<D; ii++ )
+                                {
+                                    deform_delta[ii](n) = d[ii];
+                                }
+                            }
+                        }
+                    }
+                }
+                else
+                {
+                    if ( D == 2 )
+                    {
+                        long long sx = (long long)dim_inverse[0];
+                        long long sy = (long long)dim_inverse[1];
+
+                        long long y;
+                        #pragma omp parallel default(none) private(y) shared(sx, sy, transform, transform_inverse, deform_delta) if(sx*sy>64*1024) num_threads(2)
+                        {
+                            CoordType px, py, dx, dy, dx_inverse, dy_inverse;
+                            size_t offset;
+
+                            #pragma omp for 
+                            for ( y=0; y<(long long)sy; y++ )
+                            {
+                                for ( size_t x=0; x<sx; x++ )
+                                {
+                                    transform_inverse->get(x, (size_t)y, dx_inverse, dy_inverse);
+
+                                    px = x + dx_inverse;
+                                    py = y + dy_inverse;
+
+                                    transform->get(px, py, dx, dy);
+
+                                    offset = x + y*sx;
+
+                                    deform_delta[0](offset) = dx;
+                                    deform_delta[1](offset) = dy;
+                                }
+                            }
+                        }
+                    }
+                    else if ( D == 3 )
+                    {
+                        long long sx = (long long)dim_inverse[0];
+                        long long sy = (long long)dim_inverse[1];
+                        long long sz = (long long)dim_inverse[2];
+
+                        long long z;
+                        #pragma omp parallel default(none) private(z) shared(sx, sy, sz, transform, transform_inverse, deform_delta)
+                        {
+                            CoordType px, py, pz, dx, dy, dz, dx_inverse, dy_inverse, dz_inverse;
+                            size_t offset;
+
+                            #pragma omp for 
+                            for ( z=0; z<(long long)sz; z++ )
+                            {
+                                for ( size_t y=0; y<sy; y++ )
+                                {
+                                    offset = z*sx*sy + y*sx;
+
+                                    for ( size_t x=0; x<sx; x++ )
+                                    {
+                                        transform_inverse->get(x, y, (size_t)z, dx_inverse, dy_inverse, dz_inverse);
+
+                                        px = x + dx_inverse;
+                                        py = y + dy_inverse;
+                                        pz = z + dz_inverse;
+
+                                        transform->get(px, py, pz, dx, dy, dz);
+
+                                        deform_delta[0](offset+x) = dx;
+                                        deform_delta[1](offset+x) = dy;
+                                        deform_delta[2](offset+x) = dz;
+                                    }
+                                }
+                            }
+                        }
+                    }
+                    else
+                    {
+                        size_t N = deform_inverse.get_number_of_elements();
+
+                        long long n;
+                        #pragma omp parallel default(none) private(n) shared(N, transform, transform_inverse, deform_delta, deform_inverse)
+                        {
+                            size_t ind[D];
+                            CoordType d_inverse[D], pt[D], d[D];
+
+                            for ( n=0; n<(long long)N; n++ )
+                            {
+                                deform_inverse.calculate_index( (unsigned long long)(n), ind);
+
+                                transform_inverse->get(ind, d_inverse);
+
+                                unsigned int ii;
+                                for ( ii=0; ii<D; ii++ ) pt[ii] = ind[ii] + d_inverse[ii];
+
+                                transform->get(pt, d);
+                                for ( ii=0; ii<D; ii++ )
+                                {
+                                    deform_delta[ii](n) = d[ii];
+                                }
+                            }
+                        }
+                    }
+                }
+
+                unsigned int ii;
+                for ( ii=0; ii<D; ii++ )
+                {
+                    DeformationFieldType& deform_inverse = transform_inverse->getDeformationField(ii);
+
+                    Gadgetron::scal( CoordType(1-inverse_deform_enforce_weight_), deform_inverse);
+
+                    Gadgetron::scal( CoordType(-1*inverse_deform_enforce_weight_), deform_delta[ii]);
+
+                    Gadgetron::add(deform_delta[ii], deform_inverse, deform_inverse);
+                }
+
+                if ( apply_in_FOV_constraint_ )
+                {
+                    if ( !use_world_coordinate_ )
+                    {
+                        if ( D == 2 )
+                        {
+                            long long sx = (long long)dim_inverse[0];
+                            long long sy = (long long)dim_inverse[1];
+
+                            DeformationFieldType& dxInv = transform_inverse->getDeformationField(0);
+                            DeformationFieldType& dyInv = transform_inverse->getDeformationField(1);
+
+                            long long x, y;
+                            #pragma omp parallel for default(none) private(y, x) shared(sx, sy, dxInv, dyInv) if(sx*sy>64*1024) num_threads(2)
+                            for ( y=0; y<sy; y++ )
+                            {
+                                for ( x=0; x<sx; x++ )
+                                {
+                                    size_t offset = x + y*sx;
+
+                                    CoordType tx = x + dxInv(offset);
+                                    CoordType ty = y + dyInv(offset);
+
+                                    if ( tx < 0 )
+                                    {
+                                        dxInv(offset) = FLT_EPSILON - x;
+                                    }
+                                    else if (tx > sx-1 )
+                                    {
+                                        dxInv(offset) = sx-1-FLT_EPSILON - x;
+                                    }
+
+                                    if ( ty < 0 )
+                                    {
+                                        dyInv(offset) = FLT_EPSILON - y;
+                                    }
+                                    else if (ty > sy-1 )
+                                    {
+                                        dyInv(offset) = sy-1-FLT_EPSILON - y;
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegDeformationFieldBidirectionalSolver<ValueType, CoordType, D>::enforceInverseTransform(...) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int D> 
+    void hoImageRegDeformationFieldBidirectionalSolver<ValueType, CoordType, D>::print(std::ostream& os) const
+    {
+        using namespace std;
+        os << "--------------Gagdgetron image registration non-parametric solver for pixel-wise bidirectional deformation field -------------" << endl;
+        os << "Image dimension is : " << D << endl;
+        os << "Image data type is : " << std::string(typeid(ValueType).name()) << std::endl;
+        os << "Transformation data type is : " << std::string(typeid(CoordType).name()) << std::endl;
+        os << "Use world coordinate is : " << use_world_coordinate_ << std::endl;
+        os << "Maximal iteration number is : " << max_iter_num_ << std::endl;
+        os << "Dissimilarity threshold is : " << dissimilarity_thres_ << std::endl;
+        os << "Parameter threshold is : " << parameter_thres_ << std::endl;
+        os << "Number of search size division is : " << div_num_ << std::endl;
+        os << "Solver step size is : " << step_size_para_ << std::endl;
+        os << "Step size division ratio is : " << step_size_div_para_ << std::endl;
+        os << "Step size division ratio is : " << step_size_div_para_ << std::endl;
+        os << "Number of iterations to improve the estimation of the inverse transform is : " << inverse_deform_enforce_iter_ << std::endl;
+        os << "Weight to update the estimation of the inverse transform is : " << inverse_deform_enforce_weight_ << std::endl;
+    }
+}
diff --git a/toolboxes/registration/optical_flow/cpu/solver/hoImageRegDeformationFieldSolver.h b/toolboxes/registration/optical_flow/cpu/solver/hoImageRegDeformationFieldSolver.h
new file mode 100644
index 0000000..3baadce
--- /dev/null
+++ b/toolboxes/registration/optical_flow/cpu/solver/hoImageRegDeformationFieldSolver.h
@@ -0,0 +1,673 @@
+/** \file   hoImageRegDeformationFieldSolver.h
+    \brief  Implement the PDE solver for deformation field non-linear image registration
+
+            The PDE solver is a classical gradient descent method, derived from the calculus of variation:
+
+            [1] Gerardo Hermosillo, Christophe Chefd'Hotel, Olivier Faugeras. Variational Methods for Multimodal Image Matching. 
+            International Journal of Computer Vision. December 2002, Volume 50, Issue 3, pp 329-343.
+            http://link.springer.com/article/10.1023%2FA%3A1020830525823
+
+            [2] Gerardo Hermosillo. Variational Methods for Multimodal Image Matching. PhD Thesis, UNIVERSIT�E DE NICE - SOPHIA ANTIPOLIS. May 2002.
+            http://webdocs.cs.ualberta.ca/~dana/readingMedIm/papers/hermosilloPhD.pdf
+
+            [3] Christophe Chefd'Hotel, Gerardo Hermosillo, Olivier D. Faugeras: Flows of diffeomorphisms for multimodal image registration. ISBI 2002: 753-756.
+            http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=1029367&tag=1
+
+            [4] Christophe Chefd'Hotel, Geometric Methods in Computer Vision and Image Processing : Contributions and Applications. PhD Thesis, April 2005.
+
+            The code is based on the listed source code at page 185 - 187 in ref [2] and extended according to the ref [3] and [4].
+
+    \author Hui Xue
+*/
+
+#pragma once
+
+#include "hoImageRegNonParametricSolver.h"
+#include "hoImageRegDeformationField.h"
+
+#ifdef max
+#undef max
+#endif // max
+
+#ifdef min
+#undef min
+#endif // min
+
+namespace Gadgetron
+{
+    /// ValueType: image pixel value type
+    /// CoordType: transformation data type
+    template<typename ValueType, typename CoordType, unsigned int D> 
+    class hoImageRegDeformationFieldSolver : public hoImageRegNonParametricSolver<ValueType, CoordType, D, D>
+    {
+    public:
+
+        typedef hoImageRegDeformationFieldSolver<ValueType, CoordType, D> Self;
+        typedef hoImageRegNonParametricSolver<ValueType, CoordType, D, D> BaseClass;
+
+        typedef hoNDImage<ValueType, D> TargetType;
+        typedef hoNDImage<ValueType, D> SourceType;
+
+        typedef hoNDImage<ValueType, 2> Target2DType;
+        typedef Target2DType Source2DType;
+
+        typedef hoNDImage<ValueType, 3> Target3DType;
+        typedef Target2DType Source3DType;
+
+        typedef ValueType T;
+        typedef ValueType element_type;
+        typedef ValueType value_type;
+
+        typedef CoordType coord_type;
+
+        typedef typename BaseClass::InterpolatorType InterpolatorType;
+
+        typedef hoImageRegDeformationField<CoordType, D> TransformationType;
+        typedef typename TransformationType::input_point_type input_point_type;
+        typedef typename TransformationType::output_point_type output_point_type;
+        typedef typename TransformationType::jacobian_position_type jacobian_position_type;
+        typedef typename TransformationType::DeformationFieldType DeformationFieldType;
+
+        typedef typename BaseClass::ImageRegWarperType ImageRegWarperType;
+
+        typedef typename BaseClass::ImageRegDissimilarityType ImageRegDissimilarityType;
+
+        hoImageRegDeformationFieldSolver();
+        virtual ~hoImageRegDeformationFieldSolver();
+
+        void setTransform(TransformationType& transform) { transform_ = &transform; }
+
+        virtual bool initialize();
+
+        virtual bool solve();
+
+        /// perform one iteration of optimization
+        virtual bool solve_once(TargetType* target, SourceType* source, TargetType& warped, 
+                                unsigned int iter_num, unsigned int max_iter_num, 
+                                unsigned int& divTimes, 
+                                ValueType& curr_dissimilarity, ValueType& prev_dissimilarity, 
+                                TransformationType* transform, ImageRegWarperType& warper, ImageRegDissimilarityType& dissimilarity,
+                                bool& stopIteration, 
+                                TargetType* gradient_warpped, DeformationFieldType* deform_delta, DeformationFieldType* deform_updated, 
+                                DeformationFieldType& deform_norm , DeformationFieldType& deform_norm_one_dim,
+                                CoordType* deform_delta_scale_factor);
+
+        virtual void print(std::ostream& os) const;
+
+        /// the regularization method in ref [3] is used
+        /// in the unit of pixel
+        ValueType regularization_hilbert_strength_[D];
+
+        /// whether the deformation can warp a point outside the FOV
+        /// InFOV constraint
+        bool apply_in_FOV_constraint_;
+
+        using BaseClass::iter_num_;
+        using BaseClass::max_iter_num_;
+        using BaseClass::dissimilarity_thres_;
+        using BaseClass::parameter_thres_;
+        using BaseClass::div_num_;
+        using BaseClass::step_size_para_;
+        using BaseClass::step_size_div_para_;
+        using BaseClass::verbose_;
+        using BaseClass::gt_timer1_;
+        using BaseClass::gt_timer2_;
+        using BaseClass::gt_timer3_;
+        using BaseClass::performTiming_;
+        using BaseClass::gt_exporter_;
+        using BaseClass::debugFolder_;
+
+    protected:
+
+        TransformationType* transform_;
+
+        ValueType curr_dissimilarity_;
+        ValueType prev_dissimilarity_;
+
+        DeformationFieldType deform_delta_[D];
+        DeformationFieldType deform_updated_[D];
+
+        DeformationFieldType deform_norm_;
+        DeformationFieldType deform_norm_one_dim_;
+
+        TargetType gradient_warpped_[D];
+
+        /// compensate for the non-isotropic pixel sizes
+        coord_type deform_delta_scale_factor_[D];
+
+        using BaseClass::target_;
+        using BaseClass::source_;
+        using BaseClass::warpped_;
+        using BaseClass::bg_value_;
+        using BaseClass::interp_;
+        using BaseClass::warper_;
+        using BaseClass::dissimilarity_;
+        using BaseClass::use_world_coordinate_;
+    };
+
+    template<typename ValueType, typename CoordType, unsigned int D> 
+    hoImageRegDeformationFieldSolver<ValueType, CoordType, D>::
+    hoImageRegDeformationFieldSolver() : BaseClass()
+    {
+        for ( unsigned int ii=0; ii<D; ii++ )
+        {
+            regularization_hilbert_strength_[ii] = 12;
+            deform_delta_scale_factor_[ii] = 1;
+        }
+
+        apply_in_FOV_constraint_ = false;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int D> 
+    hoImageRegDeformationFieldSolver<ValueType, CoordType, D>::~hoImageRegDeformationFieldSolver()
+    {
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int D> 
+    bool hoImageRegDeformationFieldSolver<ValueType, CoordType, D>::initialize()
+    {
+        GADGET_CHECK_RETURN_FALSE(BaseClass::initialize());
+        warper_->setTransformation(*transform_);
+
+        std::vector<size_t> dim;
+        target_->get_dimensions(dim);
+
+        deform_norm_.copyImageInfo(*target_);
+        deform_norm_one_dim_.copyImageInfo(*target_);
+
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            deform_delta_[ii].copyImageInfo(*target_);
+            Gadgetron::clear(deform_delta_[ii]);
+
+            deform_updated_[ii].copyImageInfo(*target_);
+            Gadgetron::clear(deform_updated_[ii]);
+
+            gradient_warpped_[ii].copyImageInfo(*target_);
+        }
+
+        deform_delta_scale_factor_[0] = 1;
+        for ( ii=0; ii<D; ii++ )
+        {
+            deform_delta_scale_factor_[ii] = target_->get_pixel_size(0)/target_->get_pixel_size(ii);
+        }
+
+        return true;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int D> 
+    bool hoImageRegDeformationFieldSolver<ValueType, CoordType, D>::
+    solve_once(TargetType* target, SourceType* source, TargetType& warped, 
+                unsigned int iter_num, unsigned int max_iter_num, 
+                unsigned int& divTimes, 
+                ValueType& curr_dissimilarity, ValueType& prev_dissimilarity, 
+                TransformationType* transform, ImageRegWarperType& warper, ImageRegDissimilarityType& dissimilarity,
+                bool& stopIteration, 
+                TargetType* gradient_warpped, DeformationFieldType* deform_delta, DeformationFieldType* deform_updated, 
+                DeformationFieldType& deform_norm , DeformationFieldType& deform_norm_one_dim,
+                CoordType* deform_delta_scale_factor)
+    {
+        try
+        {
+            unsigned int ii;
+
+            long long sx = (long long)(target_->get_size(0));
+            long long sy = (long long)(target_->get_size(1));
+            long long sz = (long long)(target_->get_size(2));
+
+            long long x, y, z;
+
+            if ( !debugFolder_.empty() )
+            {
+                for ( ii=0; ii<D; ii++ )
+                {
+                    std::ostringstream ostr;
+                    ostr << "DeformationFieldSolver_deformfield_" << ii;
+                    const DeformationFieldType& def = transform->getDeformationField(ii);
+                    GADGET_EXPORT_IMAGE(debugFolder_, gt_exporter_, def, ostr.str());
+                }
+            }
+
+            // warp the source
+
+            if ( use_world_coordinate_ )
+            {
+                GADGET_CHECK_RETURN_FALSE(warper.warpWithDeformationFieldWorldCoordinate(*target, *source, warped));
+            }
+            else
+            {
+                GADGET_CHECK_RETURN_FALSE(warper.warp(*target, *source, use_world_coordinate_, warped));
+            }
+
+            GADGET_CHECK_PERFORM(!debugFolder_.empty(), GADGET_EXPORT_IMAGE(debugFolder_, gt_exporter_, warped, "DeformationFieldSolver_warpped"));
+
+            // evaluate the dissimilarity and get the intensity comparison function
+            GADGET_CHECK_RETURN_FALSE(dissimilarity.evaluateDeriv(warped));
+
+            curr_dissimilarity = dissimilarity.getDissimilarity();
+            GADGET_CHECK_PERFORM(verbose_, GADGET_MSG("--> Iteration " << iter_num << " [out of " << max_iter_num << "] : \t" << curr_dissimilarity) );
+
+            if ( prev_dissimilarity < curr_dissimilarity + dissimilarity_thres_ )
+            {
+                if ( ++divTimes > div_num_ )
+                {
+                    stopIteration = true;
+                    return true;
+                }
+
+                step_size_para_ *= step_size_div_para_;
+
+                GADGET_CHECK_PERFORM(verbose_, GADGET_MSG("----> Parameter division " << divTimes << " [out of " << div_num_ << "] ") );
+            }
+
+            prev_dissimilarity = curr_dissimilarity;
+
+            /// gradient is in the 1/pixel unit
+            Gadgetron::gradient(warped, gradient_warpped);
+
+            const TargetType& deriv = dissimilarity.getDeriv();
+
+            for ( ii=0; ii<D; ii++ )
+            {
+                Gadgetron::multiply(gradient_warpped[ii], deriv, deform_delta[ii]);
+            }
+
+            if ( !debugFolder_.empty() )
+            {
+                GADGET_EXPORT_IMAGE(debugFolder_, gt_exporter_, deriv, "DeformationFieldSolver_deriv");
+
+                for ( ii=0; ii<D; ii++ )
+                {
+                    std::ostringstream ostr;
+                    ostr << "DeformationFieldSolver_gradient_warpped_" << ii;
+
+                    GADGET_EXPORT_IMAGE(debugFolder_, gt_exporter_, gradient_warpped[ii], ostr.str());
+
+                    std::ostringstream ostr2;
+                    ostr2 << "DeformationFieldSolver_deform_delta_" << ii;
+
+                    GADGET_EXPORT_IMAGE(debugFolder_, gt_exporter_, deform_delta[ii], ostr2.str());
+                }
+            }
+
+            /// compensate for non-isotropic pixel sizes
+            for ( ii=0; ii<D; ii++ )
+            {
+                if ( GT_ABS(deform_delta_scale_factor[ii]-1) > FLT_EPSILON )
+                {
+                    Gadgetron::scal(deform_delta_scale_factor[ii], deform_delta[ii]);
+                }
+            }
+
+            /// filter sigma is in the unit of pixel size
+            for ( ii=0; ii<D; ii++ )
+            {
+                Gadgetron::filterGaussian(deform_delta[ii], regularization_hilbert_strength_);
+            }
+
+            if ( !debugFolder_.empty() )
+            {
+                for ( ii=0; ii<D; ii++ )
+                {
+                    std::ostringstream ostr;
+                    ostr << "DeformationFieldSolver_deform_delta_filtered_" << ii;
+
+                    GADGET_EXPORT_IMAGE(debugFolder_, gt_exporter_, deform_delta[ii], ostr.str());
+                }
+            }
+
+            // compute the max norm of hilbert derivative
+            Gadgetron::clear(deform_norm);
+            for ( ii=0; ii<D; ii++ )
+            {
+                Gadgetron::multiply(deform_delta[ii], deform_delta[ii], deform_norm_one_dim);
+                Gadgetron::add(deform_norm_one_dim, deform_norm, deform_norm);
+            }
+
+            CoordType* pDeformNorm = deform_norm.begin();
+
+            ValueType max_norm_deform_delta = pDeformNorm[0];
+            // size_t max_ind;
+
+            for ( ii=1; ii<sx*sy; ii++ )
+            {
+                if ( max_norm_deform_delta < pDeformNorm[ii] ) max_norm_deform_delta = pDeformNorm[ii];
+            }
+
+            // Gadgetron::maxAbsolute(deform_norm, max_norm_deform_delta, max_ind);
+
+            ValueType PDE_time_integration_step_size = 0;
+            if ( max_norm_deform_delta > 1e-5 )
+            {
+                PDE_time_integration_step_size = step_size_para_ / std::sqrt(max_norm_deform_delta);
+            }
+
+            if ( PDE_time_integration_step_size > 0 )
+            {
+                for ( ii=0; ii<D; ii++ )
+                {
+                    Gadgetron::scal(PDE_time_integration_step_size, deform_delta[ii]);
+                }
+
+                if ( use_world_coordinate_ )
+                {
+                    // Note: the deform_delta is in the unit of pixel so far, need to convert it to the world coordinate
+
+                    if ( D == 2 )
+                    {
+                        CoordType ix, iy, wx, wy, pX, pY, deltaWX, deltaWY;
+
+                        // #pragma omp parallel for default(none) private(y, x, ix, iy, wx, wy, pX, pY, deltaWX, deltaWY) shared(sx, sy, target, deform_delta, deform_updated, transform)
+                        for ( y=0; y<sy; y++ )
+                        {
+                            for ( x=0; x<sx; x++ )
+                            {
+                                size_t offset = x + y*sx;
+
+                                target->image_to_world( (size_t)x, (size_t)y, wx, wy);
+
+                                CoordType deltaX = deform_delta[0](offset);
+                                CoordType deltaY = deform_delta[1](offset);
+
+                                // because the delta deformation is in the pixel size unit, it needs to be converted to world coordinate
+                                target->image_to_world( deltaX, deltaY, deltaWX, deltaWY);
+
+                                target->world_to_image(wx+deltaWX, wy+deltaWY, ix, iy);
+
+                                transform->get(ix, iy, pX, pY);
+
+                                deform_updated[0](offset) = deltaWX + pX;
+                                deform_updated[1](offset) = deltaWY + pY;
+                            }
+                        }
+                    }
+                    else if ( D == 3 )
+                    {
+                        CoordType ix, iy, iz, wx, wy, wz, pX, pY, pZ, deltaWX, deltaWY, deltaWZ;
+
+                        #pragma omp parallel for default(none) private(y, x, z, ix, iy, iz, wx, wy, wz, pX, pY, pZ, deltaWX, deltaWY, deltaWZ) shared(sx, sy, sz, target, deform_delta, deform_updated, transform)
+                        for ( z=0; z<sz; z++ )
+                        {
+                            for ( y=0; y<sy; y++ )
+                            {
+                                for ( x=0; x<sx; x++ )
+                                {
+                                    size_t offset = x + y*sx + z*sx*sy;
+
+                                    target->image_to_world( (size_t)x, (size_t)y, (size_t)z, wx, wy, wz);
+
+                                    CoordType deltaX = deform_delta[0](offset);
+                                    CoordType deltaY = deform_delta[1](offset);
+                                    CoordType deltaZ = deform_delta[2](offset);
+
+                                    target->image_to_world( deltaX, deltaY, deltaZ, deltaWX, deltaWY, deltaWZ);
+
+                                    target->world_to_image(wx+deltaWX, wy+deltaWY, wz+deltaWZ, ix, iy, iz);
+
+                                    transform->get(ix, iy, iz, pX, pY, pZ);
+
+                                    deform_updated[0](offset) = deltaWX + pX;
+                                    deform_updated[1](offset) = deltaWY + pY;
+                                    deform_updated[2](offset) = deltaWZ + pZ;
+                                }
+                            }
+                        }
+                    }
+                    else
+                    {
+                        size_t N = target_->get_number_of_elements();
+
+                        long long n;
+
+                        #pragma omp parallel default(none) private(n, ii) shared(N, target, deform_delta, deform_updated, transform)
+                        {
+                            size_t ind[D];
+                            CoordType pos[D];
+                            CoordType pDelta[D];
+                            CoordType pDeltaWorld[D];
+                            CoordType indDeform[D];
+                            CoordType pDeform[D];
+
+                            #pragma omp for 
+                            for ( n=0; n<(long long)N; n++ )
+                            {
+                                deform_delta[0].calculate_index(n, ind);
+
+                                target->image_to_world( ind, pos);
+
+                                for ( ii=0; ii<D; ii++ )
+                                {
+                                    pDelta[ii] = deform_delta[ii](n);
+                                }
+
+                                target->image_to_world( pDelta, pDeltaWorld);
+
+                                for ( ii=0; ii<D; ii++ )
+                                {
+                                    pDeltaWorld[ii] += pos[ii];
+                                }
+
+                                target->world_to_image(pDeltaWorld, indDeform);
+                                transform->get(indDeform, pDeform);
+
+                                for ( ii=0; ii<D; ii++ )
+                                {
+                                    deform_updated[ii](n) = pDeltaWorld[ii] + pDeform[ii];
+                                }
+                            }
+                        }
+                    }
+                }
+                else
+                {
+                    if ( D == 2 )
+                    {
+                        CoordType pX, pY;
+
+                        // #pragma omp parallel for default(none) private(y, x, pX, pY) shared(sx, sy, deform_delta, deform_updated, transform)
+                        for ( y=0; y<sy; y++ )
+                        {
+                            for ( x=0; x<sx; x++ )
+                            {
+                                size_t offset = x + y*sx;
+
+                                CoordType deltaX = deform_delta[0](offset);
+                                CoordType deltaY = deform_delta[1](offset);
+
+                                transform->get(x+deltaX, y+deltaY, pX, pY);
+
+                                deform_updated[0](offset) = deltaX + pX;
+                                deform_updated[1](offset) = deltaY + pY;
+                            }
+                        }
+                    }
+                    else if ( D == 3 )
+                    {
+                        CoordType pX, pY, pZ;
+
+                        #pragma omp parallel for default(none) private(y, x, z, pX, pY, pZ) shared(sx, sy, sz, deform_delta, deform_updated, transform)
+                        for ( z=0; z<sz; z++ )
+                        {
+                            for ( y=0; y<sy; y++ )
+                            {
+                                for ( x=0; x<sx; x++ )
+                                {
+                                    size_t offset = x + y*sx + z*sx*sy;
+
+                                    CoordType deltaX = deform_delta[0](offset);
+                                    CoordType deltaY = deform_delta[1](offset);
+                                    CoordType deltaZ = deform_delta[2](offset);
+
+                                    transform->get(x+deltaX, y+deltaY, z+deltaZ, pX, pY, pZ);
+
+                                    deform_updated[0](offset) = deltaX + pX;
+                                    deform_updated[1](offset) = deltaY + pY;
+                                    deform_updated[2](offset) = deltaZ + pZ;
+                                }
+                            }
+                        }
+                    }
+                    else
+                    {
+                        size_t N = target_->get_number_of_elements();
+
+                        long long n;
+
+                        #pragma omp parallel default(none) private(n, ii) shared(N, deform_delta, deform_updated, transform)
+                        {
+                            size_t ind[D];
+                            CoordType pDelta[D];
+                            CoordType indDeform[D];
+                            CoordType pDeform[D];
+
+                            #pragma omp for 
+                            for ( n=0; n<(long long)N; n++ )
+                            {
+                                deform_delta[0].calculate_index(n, ind);
+
+                                for ( ii=0; ii<D; ii++ )
+                                {
+                                    pDelta[ii] = deform_delta[ii](n);
+                                    indDeform[ii] = ind[ii] + pDelta[ii];
+                                }
+
+                                transform->get(indDeform, pDeform);
+
+                                for ( ii=0; ii<D; ii++ )
+                                {
+                                    deform_updated[ii](n) = pDelta[ii] + pDeform[ii];
+                                }
+                            }
+                        }
+                    }
+                }
+
+                if ( !debugFolder_.empty() )
+                {
+                    for ( ii=0; ii<D; ii++ )
+                    {
+                        std::ostringstream ostr;
+                        ostr << "DeformationFieldSolver_deform_updated_" << ii;
+                        GADGET_EXPORT_IMAGE(debugFolder_, gt_exporter_, deform_updated[ii], ostr.str());
+                    }
+                }
+
+                // add the InFOV constraint
+                if ( apply_in_FOV_constraint_ )
+                {
+                    if ( !use_world_coordinate_ )
+                    {
+                        if ( D == 2 )
+                        {
+                            CoordType pX, pY;
+
+                            // #pragma omp parallel for default(none) private(y, x, pX, pY) shared(sx, sy, deform_updated)
+                            for ( y=0; y<sy; y++ )
+                            {
+                                for ( x=0; x<sx; x++ )
+                                {
+                                    size_t offset = x + y*sx;
+
+                                    CoordType tx = x + deform_updated[0](offset);
+                                    CoordType ty = y + deform_updated[1](offset);
+
+                                    if ( tx < 0 )
+                                    {
+                                        deform_updated[0](offset) = FLT_EPSILON - x;
+                                    }
+                                    else if (tx > sx-1 )
+                                    {
+                                        deform_updated[0](offset) = sx-1-FLT_EPSILON - x;
+                                    }
+
+                                    if ( ty < 0 )
+                                    {
+                                        deform_updated[1](offset) = FLT_EPSILON - y;
+                                    }
+                                    else if (ty > sy-1 )
+                                    {
+                                        deform_updated[1](offset) = sy-1-FLT_EPSILON - y;
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+
+                for ( ii=0; ii<D; ii++ )
+                {
+                    transform->setDeformationField(deform_updated[ii], ii);
+                }
+            }
+        }
+        catch(...)
+        {
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int D> 
+    bool hoImageRegDeformationFieldSolver<ValueType, CoordType, D>::solve()
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(this->initialize());
+
+            prev_dissimilarity_ = std::numeric_limits<ValueType>::max();
+
+            unsigned int divTimes = 0;
+
+            dissimilarity_->initialize(*target_);
+
+            if ( !debugFolder_.empty() )
+            {
+                GADGET_EXPORT_IMAGE(debugFolder_, gt_exporter_, *target_, "DeformationFieldSolver_target");
+                GADGET_EXPORT_IMAGE(debugFolder_, gt_exporter_, *source_, "DeformationFieldSolver_source");
+            }
+
+            bool stopIteration = false;
+
+            GADGET_CHECK_PERFORM(verbose_, GADGET_MSG("--> DeformationFieldSolver ... ") );
+            for ( iter_num_=0; iter_num_<max_iter_num_; iter_num_++ )
+            {
+                GADGET_CHECK_RETURN_FALSE( this->solve_once(target_, source_, warpped_, iter_num_, max_iter_num_, 
+                                                            divTimes, curr_dissimilarity_, prev_dissimilarity_, 
+                                                            transform_, *warper_, *dissimilarity_,
+                                                            stopIteration, 
+                                                            gradient_warpped_, deform_delta_, deform_updated_, 
+                                                            deform_norm_ , deform_norm_one_dim_, deform_delta_scale_factor_) );
+
+                if ( stopIteration ) break;
+            }
+
+            GADGET_CHECK_PERFORM(verbose_, GADGET_MSG("----> Total iteration number : " << iter_num_) );
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegDeformationFieldSolver<ValueType, CoordType, D>::solve() ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int D> 
+    void hoImageRegDeformationFieldSolver<ValueType, CoordType, D>::print(std::ostream& os) const
+    {
+        using namespace std;
+        os << "--------------Gagdgetron image registration non-parametric solver for pixel-wise deformation field -------------" << endl;
+        os << "Image dimension is : " << D << endl;
+        os << "Image data type is : " << std::string(typeid(ValueType).name()) << std::endl;
+        os << "Transformation data type is : " << std::string(typeid(CoordType).name()) << std::endl;
+        os << "Use world coordinate is : " << use_world_coordinate_ << std::endl;
+        os << "Maximal iteration number is : " << max_iter_num_ << std::endl;
+        os << "Dissimilarity threshold is : " << dissimilarity_thres_ << std::endl;
+        os << "Parameter threshold is : " << parameter_thres_ << std::endl;
+        os << "Number of search size division is : " << div_num_ << std::endl;
+        os << "Solver step size is : " << step_size_para_ << std::endl;
+        os << "Step size division ratio is : " << step_size_div_para_ << std::endl;
+    }
+}
diff --git a/toolboxes/registration/optical_flow/cpu/solver/hoImageRegNonParametricSolver.h b/toolboxes/registration/optical_flow/cpu/solver/hoImageRegNonParametricSolver.h
new file mode 100644
index 0000000..1d352a8
--- /dev/null
+++ b/toolboxes/registration/optical_flow/cpu/solver/hoImageRegNonParametricSolver.h
@@ -0,0 +1,162 @@
+/** \file   hoImageRegNonParametricSolver.h
+    \brief  Define the base class of image registration solver for non-parametric image transformation
+    \author Hui Xue
+*/
+
+#pragma once
+
+#include "hoImageRegSolver.h"
+
+namespace Gadgetron
+{
+    /// ValueType: image pixel value type
+    /// CoordType: transformation data type
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    class hoImageRegNonParametricSolver : public hoImageRegSolver<ValueType, CoordType, DIn, DOut>
+    {
+    public:
+
+        typedef hoImageRegNonParametricSolver<ValueType, CoordType, DIn, DOut> Self;
+        typedef hoImageRegSolver<ValueType, CoordType, DIn, DOut> BaseClass;
+
+        typedef hoNDImage<ValueType, DOut> TargetType;
+        typedef hoNDImage<ValueType, DIn> SourceType;
+
+        typedef hoNDImage<ValueType, 2> Target2DType;
+        typedef Target2DType Source2DType;
+
+        typedef hoNDImage<ValueType, 3> Target3DType;
+        typedef Target2DType Source3DType;
+
+        typedef typename BaseClass::InterpolatorType InterpolatorType;
+
+        typedef hoImageRegNonParametricTransformation<CoordType, DIn, DOut> TransformationType;
+
+        typedef ValueType T;
+        typedef ValueType element_type;
+        typedef ValueType value_type;
+
+        typedef CoordType coord_type;
+
+        typedef typename TransformationType::input_point_type input_point_type;
+        typedef typename TransformationType::output_point_type output_point_type;
+        typedef typename TransformationType::jacobian_position_type jacobian_position_type;
+
+        typedef typename BaseClass::ImageRegWarperType ImageRegWarperType;
+
+        typedef typename BaseClass::ImageRegDissimilarityType ImageRegDissimilarityType;
+
+        hoImageRegNonParametricSolver();
+        virtual ~hoImageRegNonParametricSolver();
+
+        virtual bool initialize();
+
+        /// solve the minimization and find the optimal transformation
+        virtual bool solve() = 0;
+
+        virtual void print(std::ostream& os) const;
+
+        /// number of performed iterations
+        unsigned int iter_num_;
+
+        /// maximal number of iterations
+        unsigned int max_iter_num_;
+
+        /// threshold for minimal dissimilarity changes
+        ValueType dissimilarity_thres_;
+
+        /// threshold for minimal parameter changes
+        ValueType parameter_thres_;
+
+        /// number of search size division
+        unsigned int div_num_;
+
+        /// solver step size
+        ValueType step_size_para_;
+        /// step size division ratio
+        ValueType step_size_div_para_;
+
+        using BaseClass::verbose_;
+        using BaseClass::gt_timer1_;
+        using BaseClass::gt_timer2_;
+        using BaseClass::gt_timer3_;
+        using BaseClass::performTiming_;
+        using BaseClass::gt_exporter_;
+        using BaseClass::debugFolder_;
+
+    protected:
+
+        ValueType curr_dissimilarity_;
+        ValueType prev_dissimilarity_;
+
+        using BaseClass::target_;
+        using BaseClass::source_;
+        using BaseClass::warpped_;
+        using BaseClass::bg_value_;
+        using BaseClass::interp_;
+        using BaseClass::warper_;
+        using BaseClass::dissimilarity_;
+        using BaseClass::use_world_coordinate_;
+    };
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    hoImageRegNonParametricSolver<ValueType, CoordType, DIn, DOut>::hoImageRegNonParametricSolver() 
+        : BaseClass(), dissimilarity_thres_(0), parameter_thres_( (ValueType)1e-8 ), div_num_(3), step_size_para_( (ValueType)0.8 ), step_size_div_para_( (ValueType)0.5 )
+    {
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    hoImageRegNonParametricSolver<ValueType, CoordType, DIn, DOut>::~hoImageRegNonParametricSolver()
+    {
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    bool hoImageRegNonParametricSolver<ValueType, CoordType, DIn, DOut>::initialize()
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(target_!=NULL);
+            GADGET_CHECK_RETURN_FALSE(source_!=NULL);
+            GADGET_CHECK_RETURN_FALSE(interp_!=NULL);
+            GADGET_CHECK_RETURN_FALSE(warper_!=NULL);
+            GADGET_CHECK_RETURN_FALSE(dissimilarity_!=NULL);
+
+            warper_->setInterpolator(*interp_);
+            warper_->setBackgroundValue(bg_value_);
+
+            dissimilarity_->setBackgroundValue(bg_value_);
+
+            if ( !warpped_.dimensions_equal(*target_) )
+            {
+                warpped_ = *target_;
+            }
+
+            dissimilarity_->initialize(*target_);
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegNonParametricSolver<ValueType, CoordType, DIn, DOut>::initialize() ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    void hoImageRegNonParametricSolver<ValueType, CoordType, DIn, DOut>::print(std::ostream& os) const
+    {
+        using namespace std;
+        os << "--------------Gagdgetron image registration non-parametric solver -------------" << endl;
+        os << "Target image dimension is : " << DIn << endl;
+        os << "Source image dimension is : " << DOut << endl;
+        os << "Image data type is : " << std::string(typeid(ValueType).name()) << std::endl;
+        os << "Transformation data type is : " << std::string(typeid(CoordType).name()) << std::endl;
+        os << "Use world coordinate is : " << use_world_coordinate_ << std::endl;
+        os << "Maximal iteration number is : " << max_iter_num_ << std::endl;
+        os << "Dissimilarity threshold is : " << dissimilarity_thres_ << std::endl;
+        os << "Parameter threshold is : " << parameter_thres_ << std::endl;
+        os << "Number of search size division is : " << div_num_ << std::endl;
+        os << "Solver step size is : " << step_size_para_ << std::endl;
+        os << "Step size division ratio is : " << step_size_div_para_ << std::endl;
+    }
+}
diff --git a/toolboxes/registration/optical_flow/cpu/solver/hoImageRegParametricDownHillSolver.h b/toolboxes/registration/optical_flow/cpu/solver/hoImageRegParametricDownHillSolver.h
new file mode 100644
index 0000000..a6e6c96
--- /dev/null
+++ b/toolboxes/registration/optical_flow/cpu/solver/hoImageRegParametricDownHillSolver.h
@@ -0,0 +1,166 @@
+/** \file   hoImageRegParametricDownHillSolver.h
+    \brief  Define the class of simple down-hill solver for parametric image transformation
+    \author Hui Xue
+*/
+
+#pragma once
+
+#include "hoImageRegParametricSolver.h"
+
+namespace Gadgetron
+{
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    class hoImageRegParametricDownHillSolver : public hoImageRegParametricSolver<ValueType, CoordType, DIn, DOut>
+    {
+    public:
+
+        typedef hoImageRegParametricDownHillSolver<ValueType, CoordType, DIn, DOut> Self;
+        typedef hoImageRegParametricSolver<ValueType, CoordType, DIn, DOut> BaseClass;
+
+        typedef hoNDImage<ValueType, DOut> TargetType;
+        typedef hoNDImage<ValueType, DIn> SourceType;
+
+        typedef hoNDImage<ValueType, 2> Target2DType;
+        typedef Target2DType Source2DType;
+
+        typedef hoNDImage<ValueType, 3> Target3DType;
+        typedef Target2DType Source3DType;
+
+        typedef typename BaseClass::InterpolatorType InterpolatorType;
+
+        typedef typename BaseClass::TransformationType TransformationType;
+        typedef typename BaseClass::ParaStatusType ParaStatusType;
+
+        typedef ValueType T;
+        typedef ValueType element_type;
+        typedef ValueType value_type;
+
+        typedef CoordType coord_type;
+
+        typedef typename TransformationType::input_point_type input_point_type;
+        typedef typename TransformationType::output_point_type output_point_type;
+
+        typedef typename TransformationType::jacobian_parameter_type jacobian_parameter_type;
+        typedef typename TransformationType::jacobian_position_type jacobian_position_type;
+
+        typedef typename BaseClass::ImageRegWarperType ImageRegWarperType;
+
+        typedef typename BaseClass::ImageRegDissimilarityType ImageRegDissimilarityType;
+
+        hoImageRegParametricDownHillSolver();
+        virtual ~hoImageRegParametricDownHillSolver();
+
+        /// perform one iteration of optimization
+        virtual ValueType solver_once(ValueType curr_dissimilarity);
+
+        virtual void print(std::ostream& os) const;
+
+        using BaseClass::iter_num_;
+        using BaseClass::max_iter_num_;
+        using BaseClass::dissimilarity_thres_;
+        using BaseClass::parameter_thres_;
+        using BaseClass::div_num_;
+        using BaseClass::step_size_para_;
+        using BaseClass::step_size_div_para_;
+
+        using BaseClass::verbose_;
+        using BaseClass::gt_timer1_;
+        using BaseClass::gt_timer2_;
+        using BaseClass::gt_timer3_;
+        using BaseClass::performTiming_;
+        using BaseClass::gt_exporter_;
+        using BaseClass::debugFolder_;
+
+    protected:
+
+        using BaseClass::transform_;
+
+        using BaseClass::curr_dissimilarity_;
+        using BaseClass::prev_dissimilarity_;
+
+        using BaseClass::target_;
+        using BaseClass::source_;
+        using BaseClass::warpped_;
+        using BaseClass::bg_value_;
+        using BaseClass::interp_;
+        using BaseClass::warper_;
+        using BaseClass::dissimilarity_;
+        using BaseClass::use_world_coordinate_;
+    };
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    hoImageRegParametricDownHillSolver<ValueType, CoordType, DIn, DOut>::hoImageRegParametricDownHillSolver() : BaseClass()
+    {
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    hoImageRegParametricDownHillSolver<ValueType, CoordType, DIn, DOut>::~hoImageRegParametricDownHillSolver()
+    {
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    ValueType hoImageRegParametricDownHillSolver<ValueType, CoordType, DIn, DOut>::solver_once(ValueType curr_dissimilarity)
+    {
+        ValueType prevValue = curr_dissimilarity;
+        ValueType currValue;
+
+        size_t optimizedDimIndex = 0;
+        ValueType positiveStepFlag = 0;
+
+        size_t numOfPara = transform_->get_number_of_parameters();
+        size_t i;
+
+        ValueType currPara(0);
+        for ( i=0; i<numOfPara; i++ )
+        {
+            if ( transform_->get_para_status(i) == TransformationType::Active)
+            {
+                currPara = transform_->get_parameter(i);
+
+                // positive
+                transform_->set_parameter(i, currPara + step_size_para_[i]);
+
+                GADGET_CHECK_RETURN_FALSE(warper_->warp(*target_, *source_, use_world_coordinate_, warpped_));
+                currValue = dissimilarity_->evaluate(warpped_);
+
+                if ( currValue < curr_dissimilarity + dissimilarity_thres_ )
+                {
+                    curr_dissimilarity = currValue;
+                    optimizedDimIndex =  i;
+                    positiveStepFlag = 1;
+                }
+
+                // negative
+                transform_->set_parameter(i, currPara - step_size_para_[i]);
+
+                GADGET_CHECK_RETURN_FALSE(warper_->warp(*target_, *source_, use_world_coordinate_, warpped_));
+                currValue = dissimilarity_->evaluate(warpped_);
+
+                if ( currValue < curr_dissimilarity + dissimilarity_thres_ )
+                {
+                    curr_dissimilarity = currValue;
+                    optimizedDimIndex =  i;
+                    positiveStepFlag = -1;
+                }
+
+                transform_->set_parameter(i, currPara);
+            }
+        }
+
+        if ( curr_dissimilarity < prevValue )
+        {
+            currPara = transform_->get_parameter(optimizedDimIndex);
+            transform_->set_parameter(optimizedDimIndex, currPara+positiveStepFlag*step_size_para_[optimizedDimIndex]);
+        }
+
+        return curr_dissimilarity;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    void hoImageRegParametricDownHillSolver<ValueType, CoordType, DIn, DOut>::print(std::ostream& os) const
+    {
+        using namespace std;
+        os << "-------------- Gagdgetron DownHill image registration solver -------------" << endl;
+        BaseClass::print(os);
+    }
+}
diff --git a/toolboxes/registration/optical_flow/cpu/solver/hoImageRegParametricGradientDescentSolver.h b/toolboxes/registration/optical_flow/cpu/solver/hoImageRegParametricGradientDescentSolver.h
new file mode 100644
index 0000000..22e5b8f
--- /dev/null
+++ b/toolboxes/registration/optical_flow/cpu/solver/hoImageRegParametricGradientDescentSolver.h
@@ -0,0 +1,146 @@
+/** \file   hoImageRegParametricGradientDescentSolver.h
+    \brief  Define the class of simple gradient descent solver for parametric image transformation, no linear search is performed in this solver
+    \author Hui Xue
+*/
+
+#pragma once
+
+#include "hoImageRegParametricSolver.h"
+
+namespace Gadgetron
+{
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    class hoImageRegParametricGradientDescentSolver : public hoImageRegParametricSolver<ValueType, CoordType, DIn, DOut>
+    {
+    public:
+
+        typedef hoImageRegParametricGradientDescentSolver<ValueType, CoordType, DIn, DOut> Self;
+        typedef hoImageRegParametricSolver<ValueType, CoordType, DIn, DOut> BaseClass;
+
+        typedef hoNDImage<ValueType, DOut> TargetType;
+        typedef hoNDImage<ValueType, DIn> SourceType;
+
+        typedef hoNDImage<ValueType, 2> Target2DType;
+        typedef Target2DType Source2DType;
+
+        typedef hoNDImage<ValueType, 3> Target3DType;
+        typedef Target2DType Source3DType;
+
+        typedef typename BaseClass::InterpolatorType InterpolatorType;
+
+        typedef typename BaseClass::TransformationType TransformationType;
+
+        typedef ValueType T;
+        typedef ValueType element_type;
+        typedef ValueType value_type;
+
+        typedef CoordType coord_type;
+
+        typedef typename TransformationType::input_point_type input_point_type;
+        typedef typename TransformationType::output_point_type output_point_type;
+
+        typedef typename TransformationType::jacobian_parameter_type jacobian_parameter_type;
+        typedef typename TransformationType::jacobian_position_type jacobian_position_type;
+
+        typedef typename BaseClass::ImageRegWarperType ImageRegWarperType;
+
+        typedef typename BaseClass::ImageRegDissimilarityType ImageRegDissimilarityType;
+
+        hoImageRegParametricGradientDescentSolver();
+        virtual ~hoImageRegParametricGradientDescentSolver();
+
+        /// perform one iteration of optimization
+        virtual ValueType solver_once(ValueType curr_dissimilarity);
+
+        virtual void print(std::ostream& os) const;
+
+        using BaseClass::iter_num_;
+        using BaseClass::max_iter_num_;
+        using BaseClass::dissimilarity_thres_;
+        using BaseClass::parameter_thres_;
+        using BaseClass::div_num_;
+        using BaseClass::step_size_para_;
+        using BaseClass::step_size_div_para_;
+
+        using BaseClass::verbose_;
+        using BaseClass::gt_timer1_;
+        using BaseClass::gt_timer2_;
+        using BaseClass::gt_timer3_;
+        using BaseClass::performTiming_;
+        using BaseClass::gt_exporter_;
+        using BaseClass::debugFolder_;
+
+    protected:
+
+        using BaseClass::transform_;
+
+        using BaseClass::curr_dissimilarity_;
+        using BaseClass::prev_dissimilarity_;
+
+        using BaseClass::target_;
+        using BaseClass::source_;
+        using BaseClass::warpped_;
+        using BaseClass::bg_value_;
+        using BaseClass::interp_;
+        using BaseClass::warper_;
+        using BaseClass::dissimilarity_;
+        using BaseClass::use_world_coordinate_;
+    };
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    hoImageRegParametricGradientDescentSolver<ValueType, CoordType, DIn, DOut>::hoImageRegParametricGradientDescentSolver() : BaseClass()
+    {
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    hoImageRegParametricGradientDescentSolver<ValueType, CoordType, DIn, DOut>::~hoImageRegParametricGradientDescentSolver()
+    {
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    ValueType hoImageRegParametricGradientDescentSolver<ValueType, CoordType, DIn, DOut>::solver_once(ValueType curr_dissimilarity)
+    {
+        std::vector<ValueType> deriv;
+        GADGET_CHECK_RETURN_FALSE(this->evaluateDeriv(transform_, dissimilarity_, step_size_para_, deriv));
+
+        size_t numOfPara = transform_->get_number_of_parameters();
+        size_t i;
+        ValueType prevValue, currPara(0);
+
+        while ( 1 )
+        {
+            prevValue = curr_dissimilarity;
+
+            for ( i=0; i<numOfPara; i++ )
+            {
+                currPara = transform_->get_parameter(i);
+                transform_->set_parameter( i, currPara-step_size_para_[i]*deriv[i] );
+            }
+
+            GADGET_CHECK_RETURN_FALSE(warper_->warp(*target_, *source_, use_world_coordinate_, warpped_));
+            curr_dissimilarity = dissimilarity_->evaluate(warpped_);
+
+            if ( curr_dissimilarity > prevValue + dissimilarity_thres_ )
+            {
+                break;
+            }
+        }
+
+        // rewind
+        for ( i=0; i<numOfPara; i++ )
+        {
+            currPara = transform_->get_parameter(i);
+            transform_->set_parameter( i, currPara+step_size_para_[i]*deriv[i] );
+        }
+
+        return prevValue;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    void hoImageRegParametricGradientDescentSolver<ValueType, CoordType, DIn, DOut>::print(std::ostream& os) const
+    {
+        using namespace std;
+        os << "-------------- Gagdgetron Gradient descent image registration solver -------------" << endl;
+        BaseClass::print(os);
+    }
+}
diff --git a/toolboxes/registration/optical_flow/cpu/solver/hoImageRegParametricSolver.h b/toolboxes/registration/optical_flow/cpu/solver/hoImageRegParametricSolver.h
new file mode 100644
index 0000000..3a9db96
--- /dev/null
+++ b/toolboxes/registration/optical_flow/cpu/solver/hoImageRegParametricSolver.h
@@ -0,0 +1,326 @@
+/** \file   hoImageRegParametricSolver.h
+    \brief  Define the base class of image registration solver for parametric image transformation
+    \author Hui Xue
+*/
+
+#pragma once
+
+#include "hoImageRegSolver.h"
+
+namespace Gadgetron
+{
+    /// ValueType: image pixel value type
+    /// CoordType: transformation data type
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    class hoImageRegParametricSolver : public hoImageRegSolver<ValueType, CoordType, DIn, DOut>
+    {
+    public:
+
+        typedef hoImageRegParametricSolver<ValueType, CoordType, DIn, DOut> Self;
+        typedef hoImageRegSolver<ValueType, CoordType, DIn, DOut> BaseClass;
+
+        typedef hoNDImage<ValueType, DOut> TargetType;
+        typedef hoNDImage<ValueType, DIn> SourceType;
+
+        typedef hoNDImage<ValueType, 2> Target2DType;
+        typedef Target2DType Source2DType;
+
+        typedef hoNDImage<ValueType, 3> Target3DType;
+        typedef Target2DType Source3DType;
+
+        typedef typename BaseClass::InterpolatorType InterpolatorType;
+
+        typedef hoImageRegParametricTransformation<CoordType, DIn, DOut> TransformationType;
+        typedef typename TransformationType::ParaStatus ParaStatusType;
+
+        typedef ValueType T;
+        typedef ValueType element_type;
+        typedef ValueType value_type;
+
+        typedef CoordType coord_type;
+
+        typedef typename TransformationType::input_point_type input_point_type;
+        typedef typename TransformationType::output_point_type output_point_type;
+
+        typedef typename TransformationType::jacobian_parameter_type jacobian_parameter_type;
+        typedef typename TransformationType::jacobian_position_type jacobian_position_type;
+
+        typedef typename BaseClass::ImageRegWarperType ImageRegWarperType;
+
+        typedef typename BaseClass::ImageRegDissimilarityType ImageRegDissimilarityType;
+
+        hoImageRegParametricSolver();
+        virtual ~hoImageRegParametricSolver();
+
+        void setTransform(TransformationType& transform) { transform_ = &transform; }
+
+        virtual bool initialize();
+
+        /// solve the minimization and find the optimal transformation
+        virtual bool solve();
+
+        /// perform one iteration of optimization
+        virtual ValueType solver_once(ValueType curr_dissimilarity) = 0;
+
+        /// compute the derivatives of dissimilarity measures to the transformation parameters
+        /// if the analytic derivative is hard to compute, the central difference derivative is computed
+        /// deriv_step_size is the step size used to compute central difference derivatives
+        virtual bool evaluateDeriv(TransformationType* transform, ImageRegDissimilarityType* dissimilarity, const std::vector<ValueType>& deriv_step_size, std::vector<ValueType>& deriv);
+
+        virtual void print(std::ostream& os) const;
+
+        /// number of performed iterations
+        unsigned int iter_num_;
+
+        /// maximal number of iterations
+        unsigned int max_iter_num_;
+
+        /// threshold for minimal dissimilarity changes
+        ValueType dissimilarity_thres_;
+
+        /// threshold for minimal parameter changes
+        ValueType parameter_thres_;
+
+        /// number of search division
+        unsigned int div_num_;
+
+        /// step size for every parameters used in optimization
+        /// depending on the optimization algorithm, this variable may not be used
+        std::vector<ValueType> step_size_para_;
+        /// step size division ratio
+        /// step_size_para_ = step_size_para_ .* step_size_div_para_ to reduce search step size
+        std::vector<ValueType> step_size_div_para_;
+
+        using BaseClass::verbose_;
+        using BaseClass::gt_timer1_;
+        using BaseClass::gt_timer2_;
+        using BaseClass::gt_timer3_;
+        using BaseClass::performTiming_;
+        using BaseClass::gt_exporter_;
+        using BaseClass::debugFolder_;
+
+    protected:
+
+        TransformationType* transform_;
+
+        ValueType curr_dissimilarity_;
+        ValueType prev_dissimilarity_;
+
+        using BaseClass::target_;
+        using BaseClass::source_;
+        using BaseClass::warpped_;
+        using BaseClass::bg_value_;
+        using BaseClass::interp_;
+        using BaseClass::warper_;
+        using BaseClass::dissimilarity_;
+        using BaseClass::use_world_coordinate_;
+    };
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    hoImageRegParametricSolver<ValueType, CoordType, DIn, DOut>::hoImageRegParametricSolver() 
+        : BaseClass(), dissimilarity_thres_(1e-8), parameter_thres_(1e-8)
+    {
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    hoImageRegParametricSolver<ValueType, CoordType, DIn, DOut>::~hoImageRegParametricSolver()
+    {
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    bool hoImageRegParametricSolver<ValueType, CoordType, DIn, DOut>::initialize()
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(target_!=NULL);
+            GADGET_CHECK_RETURN_FALSE(source_!=NULL);
+            GADGET_CHECK_RETURN_FALSE(interp_!=NULL);
+            GADGET_CHECK_RETURN_FALSE(warper_!=NULL);
+            GADGET_CHECK_RETURN_FALSE(dissimilarity_!=NULL);
+            GADGET_CHECK_RETURN_FALSE(transform_!=NULL);
+
+            warper_->setTransformation(*transform_);
+            warper_->setInterpolator(*interp_);
+            warper_->setBackgroundValue(bg_value_);
+
+            dissimilarity_->setBackgroundValue(bg_value_);
+
+            if ( !warpped_.dimensions_equal(*target_) )
+            {
+                warpped_ = *target_;
+            }
+
+            dissimilarity_->initialize(*target_);
+
+            if ( step_size_para_.size() != transform_->get_number_of_parameters() )
+            {
+                step_size_para_.resize(transform_->get_number_of_parameters(), 1.0);
+            }
+
+            if ( step_size_div_para_.size() != transform_->get_number_of_parameters() )
+            {
+                step_size_div_para_.resize(transform_->get_number_of_parameters(), 0.5);
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegParametricSolver<ValueType, CoordType, DIn, DOut>::initialize() ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    bool hoImageRegParametricSolver<ValueType, CoordType, DIn, DOut>::solve()
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(this->initialize());
+
+            size_t numOfPara = transform_->get_number_of_parameters();
+
+            GADGET_CHECK_RETURN_FALSE(warper_->warp(*target_, *source_, use_world_coordinate_, warpped_));
+            curr_dissimilarity_ = dissimilarity_->evaluate(warpped_);
+
+            GADGET_CHECK_PERFORM(verbose_, GADGET_MSG("----> Initial image dissimilarity : " << curr_dissimilarity_) );
+
+            unsigned int totalIterNum = 0;
+
+            unsigned int div;
+            for ( div=0; div<div_num_; div++ )
+            {
+                GADGET_CHECK_PERFORM(verbose_, GADGET_MSG("----> Parameter division " << div << " [out of " << div_num_ << "] ") );
+
+                for ( iter_num_=0; iter_num_<max_iter_num_; iter_num_++ )
+                {
+                    GADGET_CHECK_PERFORM(verbose_, GADGET_MSG("--> Iteration " << iter_num_ << " [out of " << max_iter_num_ << "] : \t" << curr_dissimilarity_) );
+
+                    prev_dissimilarity_ = curr_dissimilarity_;
+
+                    curr_dissimilarity_ = this->solver_once(prev_dissimilarity_);
+
+                    // if the dissimilarity stops decreasing
+                    if ( prev_dissimilarity_ < curr_dissimilarity_ + dissimilarity_thres_ )
+                    {
+                        break;
+                    }
+                }
+                GADGET_CHECK_PERFORM(verbose_, transform_->printTransform(std::cout));
+
+                totalIterNum += iter_num_;
+
+                // reduce the step size
+                size_t p;
+                for ( p=0; p<numOfPara; p++ )
+                {
+                    step_size_para_[p] *= step_size_div_para_[p];
+                }
+            }
+
+            GADGET_CHECK_PERFORM(verbose_, GADGET_MSG("----> Total iteration number : " << totalIterNum) );
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegParametricSolver<ValueType, CoordType, DIn, DOut>::solve() ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    bool hoImageRegParametricSolver<ValueType, CoordType, DIn, DOut>::
+    evaluateDeriv(TransformationType* transform, ImageRegDissimilarityType* dissimilarity, const std::vector<ValueType>& deriv_step_size, std::vector<ValueType>& deriv)
+    {
+        try
+        {
+            bool has_analytic_deriv = false;
+
+            /// for some transformation and dissimilarity combination, the analytical derivative is easier to be computed
+
+            /// implement the central difference numerical derivative
+            if ( !has_analytic_deriv )
+            {
+                size_t numOfPara = transform_->get_number_of_parameters();
+                size_t i;
+
+                deriv.resize(numOfPara, 0);
+
+                ValueType currPara(0), positiveValue(0), negativeValue(0), normDeriv(0);
+                for ( i=0; i<numOfPara; i++ )
+                {
+                    if ( transform_->get_para_status(i) == TransformationType::Active )
+                    {
+                        currPara = transform_->get_parameter(i);
+
+                        // positive
+                        transform_->set_parameter(i, currPara + deriv_step_size[i]);
+
+                        GADGET_CHECK_RETURN_FALSE(warper_->warp(*target_, *source_, use_world_coordinate_, warpped_));
+                        positiveValue = dissimilarity_->evaluate(warpped_);
+
+                        // negative
+                        transform_->set_parameter(i, currPara - deriv_step_size[i]);
+
+                        GADGET_CHECK_RETURN_FALSE(warper_->warp(*target_, *source_, use_world_coordinate_, warpped_));
+                        negativeValue = dissimilarity_->evaluate(warpped_);
+
+                        deriv[i] = (positiveValue - negativeValue)/(2*deriv_step_size[i]);
+                        normDeriv += deriv[i]*deriv[i];
+
+                        transform_->set_parameter(i, currPara);
+                    }
+                }
+
+                if ( normDeriv > 0 )
+                {
+                    ValueType distDeriv=std::sqrt(normDeriv);
+
+                    for ( i=0; i<numOfPara; i++ )
+                    {
+                        deriv[i] /= distDeriv;
+                    }
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in evaluateDeriv(TransformationType* transform, ImageRegDissimilarityType* dissimilarity, const std::vector<ValueType>& deriv_step_size, std::vector<ValueType>& deriv) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    void hoImageRegParametricSolver<ValueType, CoordType, DIn, DOut>::print(std::ostream& os) const
+    {
+        using namespace std;
+        os << "--------------Gagdgetron parametric image registration solver -------------" << endl;
+        os << "Target image dimension is : " << DIn << endl;
+        os << "Source image dimension is : " << DOut << endl;
+        os << "Image data type is : " << std::string(typeid(ValueType).name()) << std::endl;
+        os << "Transformation data type is : " << std::string(typeid(CoordType).name()) << std::endl;
+        os << "Use world coordinate is : " << use_world_coordinate_ << std::endl;
+        os << "Maximal iteration number is : " << max_iter_num_ << std::endl;
+        os << "Dissimilarity threshold is : " << dissimilarity_thres_ << std::endl;
+        os << "Parameter threshold is : " << parameter_thres_ << std::endl;
+        os << "Number of search division is : " << div_num_ << std::endl;
+
+        os << "Step size for every parameters used in optimization is : [ ";
+        unsigned int ii;
+        for ( ii=0; ii<step_size_para_.size(); ii++ )
+        {
+            os << step_size_para_[ii] << " ";
+        }
+        os << " ] " << endl;
+
+        os << "Step size division ratio is : [ ";
+        for ( ii=0; ii<step_size_div_para_.size(); ii++ )
+        {
+            os << step_size_div_para_[ii] << " ";
+        }
+        os << " ] " << endl;
+    }
+}
diff --git a/toolboxes/registration/optical_flow/cpu/solver/hoImageRegSolver.h b/toolboxes/registration/optical_flow/cpu/solver/hoImageRegSolver.h
new file mode 100644
index 0000000..1534425
--- /dev/null
+++ b/toolboxes/registration/optical_flow/cpu/solver/hoImageRegSolver.h
@@ -0,0 +1,210 @@
+/** \file   hoImageRegSolver.h
+    \brief  Define the base class of image registration solver for gadgetron
+
+            The solver takes in the image warper, similarity, target and source images, and solves
+            for an optimal image transformation.
+
+    \author Hui Xue
+*/
+
+#pragma once
+
+#include "hoNDArray.h"
+#include "hoNDImage.h"
+#include "hoNDInterpolator.h"
+#include "hoNDBoundaryHandler.h"
+#include "hoMatrix.h"
+#include "hoNDArray_utils.h"
+#include "hoNDArray_elemwise.h"
+#include "hoNDImage_util.h"
+#include "hoImageRegTransformation.h"
+#include "hoImageRegWarper.h"
+#include "hoImageRegDissimilarity.h"
+#include "gtPlusISMRMRDReconUtil.h"
+#include "GtPrepUtil.h"
+
+#ifdef USE_OMP
+    #include <omp.h>
+#endif // USE_OMP
+
+namespace Gadgetron
+{
+    // define the solver type
+    enum GT_IMAGE_REG_SOLVER
+    {
+        GT_IMAGE_REG_SOLVER_DOWNHILL,
+        GT_IMAGE_REG_SOLVER_GRADIENT_DESCENT,
+        GT_IMAGE_REG_SOLVER_PDE_TIME_INTEGRATION,
+        GT_IMAGE_REG_SOLVER_PDE_TIME_INTEGRATION_INV
+    };
+
+    inline std::string getImageRegSolverName(GT_IMAGE_REG_SOLVER v)
+    {
+        std::string name;
+
+        switch (v)
+        {
+            case GT_IMAGE_REG_SOLVER_DOWNHILL:
+                name = "DownHill";
+                break;
+
+            case GT_IMAGE_REG_SOLVER_GRADIENT_DESCENT:
+                name = "GradientDescent";
+                break;
+
+            case GT_IMAGE_REG_SOLVER_PDE_TIME_INTEGRATION:
+                name = "PDE_Time_Integration";
+                break;
+
+            case GT_IMAGE_REG_SOLVER_PDE_TIME_INTEGRATION_INV:
+                name = "PDE_Time_Integration_Inv";
+                break;
+
+            default:
+                GADGET_ERROR_MSG("Unrecognized image registration solver type : " << v);
+        }
+
+        return name;
+    }
+
+    inline GT_IMAGE_REG_SOLVER getImageRegSolverType(const std::string& name)
+    {
+        GT_IMAGE_REG_SOLVER v;
+
+        if ( name == "DownHill" )
+        {
+            v = GT_IMAGE_REG_SOLVER_DOWNHILL;
+        }
+        else if ( name == "GradientDescent" )
+        {
+            v = GT_IMAGE_REG_SOLVER_GRADIENT_DESCENT;
+        }
+        else if ( name == "PDE_Time_Integration" )
+        {
+            v = GT_IMAGE_REG_SOLVER_PDE_TIME_INTEGRATION;
+        }
+        else if ( name == "PDE_Time_Integration_Inv" )
+        {
+            v = GT_IMAGE_REG_SOLVER_PDE_TIME_INTEGRATION_INV;
+        }
+        else
+        {
+            GADGET_ERROR_MSG("Unrecognized image registration solver name : " << name);
+        }
+
+        return v;
+    }
+
+    /// ValueType: image pixel value type
+    /// CoordType: transformation data type
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    class hoImageRegSolver
+    {
+    public:
+
+        typedef hoImageRegSolver<ValueType, CoordType, DIn, DOut> Self;
+
+        typedef hoNDImage<ValueType, DOut> TargetType;
+        typedef hoNDImage<ValueType, DIn> SourceType;
+
+        typedef hoNDImage<ValueType, 2> Target2DType;
+        typedef Target2DType Source2DType;
+
+        typedef hoNDImage<ValueType, 3> Target3DType;
+        typedef Target2DType Source3DType;
+
+        typedef hoNDInterpolator<SourceType> InterpolatorType;
+
+        typedef ValueType T;
+        typedef ValueType element_type;
+        typedef ValueType value_type;
+
+        typedef CoordType coord_type;
+
+        typedef hoImageRegWarper<ValueType, CoordType, DIn, DOut> ImageRegWarperType;
+
+        typedef hoImageRegDissimilarity<ValueType, DOut> ImageRegDissimilarityType;
+
+        hoImageRegSolver();
+        virtual ~hoImageRegSolver();
+
+        void setTarget(TargetType& target) { target_ = ⌖ }
+        void setSource(SourceType& source) { source_ = &source; }
+
+        void setDissimilarity(ImageRegDissimilarityType& dissimilarity) { dissimilarity_ = &dissimilarity; }
+        void setWarper(ImageRegWarperType& warper) { warper_ = &warper; }
+        void setInterpolator(InterpolatorType& interp) { interp_ = &interp; }
+        void setBackgroundValue(ValueType bg_value) { bg_value_ = bg_value; }
+
+        void setUseWorldCoordinate(bool use_world_coordinate) { use_world_coordinate_ = use_world_coordinate; }
+
+        virtual bool solve() = 0;
+
+        virtual void print(std::ostream& os) const;
+
+        /// if true, print out more intermediate information
+        bool verbose_;
+
+        // ----------------------------------
+        // debug and timing
+        // ----------------------------------
+        // clock for timing
+        Gadgetron::GadgetronTimer gt_timer1_;
+        Gadgetron::GadgetronTimer gt_timer2_;
+        Gadgetron::GadgetronTimer gt_timer3_;
+
+        bool performTiming_;
+
+        // exporter
+        Gadgetron::gtPlus::gtPlusIOAnalyze gt_exporter_;
+
+        // debug folder
+        std::string debugFolder_;
+
+    protected:
+
+        TargetType* target_;
+        SourceType* source_;
+
+        // warped image
+        TargetType warpped_;
+
+        ValueType bg_value_;
+
+        InterpolatorType* interp_;
+
+        ImageRegWarperType* warper_;
+
+        ImageRegDissimilarityType* dissimilarity_;
+
+        /// whether to perform registration using the world coordinates
+        bool use_world_coordinate_;
+    };
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    hoImageRegSolver<ValueType, CoordType, DIn, DOut>::hoImageRegSolver() 
+        : target_(NULL), source_(NULL), bg_value_(0), interp_(NULL), warper_(NULL), dissimilarity_(NULL), verbose_(false), use_world_coordinate_(true), performTiming_(false)
+    {
+        gt_timer1_.set_timing_in_destruction(false);
+        gt_timer2_.set_timing_in_destruction(false);
+        gt_timer3_.set_timing_in_destruction(false);
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    hoImageRegSolver<ValueType, CoordType, DIn, DOut>::~hoImageRegSolver()
+    {
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    void hoImageRegSolver<ValueType, CoordType, DIn, DOut>::print(std::ostream& os) const
+    {
+        using namespace std;
+        os << "--------------Gagdgetron image registration solver -------------" << endl;
+        os << "Target image dimension is : " << DIn << endl;
+        os << "Source image dimension is : " << DOut << endl;
+        os << "Image data type is : " << std::string(typeid(ValueType).name()) << std::endl;
+        os << "Transformation data type is : " << std::string(typeid(CoordType).name()) << std::endl;
+        os << "Use world coordinate is : " << use_world_coordinate_ << std::endl;
+        os << "verbose flag is : " << verbose_ << std::endl;
+    }
+}
diff --git a/toolboxes/registration/optical_flow/cpu/transformation/hoImageRegDeformationField.h b/toolboxes/registration/optical_flow/cpu/transformation/hoImageRegDeformationField.h
new file mode 100644
index 0000000..89e627a
--- /dev/null
+++ b/toolboxes/registration/optical_flow/cpu/transformation/hoImageRegDeformationField.h
@@ -0,0 +1,964 @@
+/** \file   hoImageRegDeformationField.h
+    \brief  Define the geometry transformation using deformation filed
+    \author Hui Xue
+*/
+
+#pragma once
+
+#include "hoImageRegNonParametricTransformation.h"
+
+namespace Gadgetron
+{
+    /// deformation field is defined as hoNDImage
+    /// the deformation field can be accessed on image pixels
+    /// if the non-integer image pixels are used to access deformaiton field, an image interpolator is used
+    /// linear interpolator is used for deformation field
+    /// the unit of stored deformation field is in pixel, not in world coordinates
+    template<typename ValueType, unsigned int D> 
+    class  hoImageRegDeformationField: public hoImageRegNonParametricTransformation<ValueType, D, D>
+    {
+    public:
+
+        typedef hoImageRegTransformation<ValueType, D, D> Self;
+        typedef hoImageRegNonParametricTransformation<ValueType, D, D> BaseClass;
+
+        typedef ValueType T;
+
+        typedef typename BaseClass::input_point_type input_point_type;
+        typedef typename BaseClass::output_point_type output_point_type;
+
+        typedef typename BaseClass::jacobian_position_type jacobian_position_type;
+
+        typedef hoNDImage<T, D> DeformationFieldType;
+
+        typedef typename DeformationFieldType::coord_type coord_type;
+
+        typedef typename DeformationFieldType::axis_type axis_type;
+
+        typedef hoNDBoundaryHandler<DeformationFieldType> BoundHanlderType;
+        typedef hoNDInterpolator<DeformationFieldType> InterpolatorType;
+
+        typedef hoNDInterpolatorLinear<DeformationFieldType> DefaultInterpolatorType;
+        typedef hoNDBoundaryHandlerBorderValue<DeformationFieldType> DefaultBoundHanlderType;
+
+        hoImageRegDeformationField();
+        hoImageRegDeformationField(const std::vector<size_t>& dimensions);
+        hoImageRegDeformationField(const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize, const std::vector<coord_type>& origin, const axis_type& axis);
+        hoImageRegDeformationField(const hoNDImage<ValueType, D>& im);
+
+        virtual ~hoImageRegDeformationField();
+
+        virtual bool invertTransformation();
+
+        virtual bool setIdentity();
+
+        /// update the internal status after the deformation fields are changed
+        virtual bool update();
+
+        /// transform a point
+        /// the point is in the non-integer image pixel indexes
+        /// image interpolator is used
+        virtual bool transform(const T* pt_in, T* pt_out) const;
+        virtual bool transform(const T& xi, const T& yi, T& xo, T& yo) const;
+        virtual bool transform(const T& xi, const T& yi, const T& zi, T& xo, T& yo, T& zo) const;
+
+        /// transform a point
+        /// the point is in the integer image pixel indexes
+        /// image interpolator is not used
+        /// pt_in, pt_out stores a point as an array
+        virtual bool transform(const size_t* pt_in, T* pt_out) const;
+        virtual bool transform(const size_t* pt_in, size_t N, T* pt_out) const;
+
+        /// for 2D - 2D transformation
+        virtual bool transform(const size_t& xi, const size_t& yi, T& xo, T& yo) const;
+        virtual bool transform(const size_t* xi, const size_t* yi, size_t N, T* xo, T* yo) const;
+
+        /// for 3D - 3D transformation
+        virtual bool transform(const size_t& xi, const size_t& yi, const size_t& zi, T& xo, T& yo, T& zo) const;
+        virtual bool transform(const size_t* xi, const size_t* yi, const size_t* zi, size_t N, T* xo, T* yo, T* zo) const;
+
+        /// compute jacobian matrix to spatial position
+        /// the jacobian matrix is computed with the compensation for non-isotropic pixel sizes
+        /// e.g. dxdy = ( dx(x,y+dh)*sx - dx(x, y-dh)*sx ) / (2*dh*sy); sx, sy: pixel sizes for x and y directions
+        /// DOut*DIn matrix
+        virtual bool jacobianPosition(const input_point_type& /*pos*/, jacobian_position_type& jac);
+
+        /// compute jacobian matrix on the deformation grid
+        /// jac is [DOut Din dimensions] array, storing the jacobian matrix for every point in the deformation field
+        virtual bool jacobianPosition(hoNDArray<T>& jac, DeformationFieldType* deform_field[D], unsigned int borderWidth=1);
+        virtual bool jacobianPosition(hoNDArray<T>& jac, unsigned int borderWidth=1);
+
+        /// compute some parameters from deformation field and jacobian matrix
+        /// in the world coordinate
+        virtual bool analyzeJacobianAndDeformation(const hoNDArray<T>& jac, DeformationFieldType* deform_field[D], T& meanDeform, T& maxDeform, T& meanLogJac, T& maxLogJac, unsigned int borderWidth=1);
+        virtual bool analyzeJacobianAndDeformation(const hoNDArray<T>& jac, T& meanDeform, T& maxDeform, T& meanLogJac, T& maxLogJac, unsigned int borderWidth=1);
+
+        /// get/set the deformation vector on the deformation grid (image coordinate)
+        /// given the index idx[DIn], output the deformation value for outDim
+        T& operator()( size_t idx[D], size_t outDim );
+        const T& operator()( size_t idx[D], size_t outDim ) const;
+
+        void get(size_t idx[D], T deform[D]);
+        void get(size_t x, size_t y, T& dx, T& dy);
+        void get(size_t x, size_t y, size_t z, T& dx, T& dy, T& dz);
+
+        void set(size_t idx[D], T deform[D]);
+        void set(size_t x, size_t y, T dx, T dy);
+        void set(size_t x, size_t y, size_t z, T dx, T dy, T dz);
+
+        /// get/set the deformation vector on the world coordinate
+        /// given the position pos[DIn], output the deformation value for outDim
+        T operator()( coord_type pos[D], size_t outDim );
+
+        void get(coord_type pos[D], T deform[D]);
+        void get(coord_type px, coord_type py, T& dx, T& dy);
+        void get(coord_type px, coord_type py, coord_type pz, T& dx, T& dy, T& dz);
+
+        /// get/set interpolator
+        //void getInterpolator(InterpolatorType*& interp, size_t outDim);
+        //void setInterpolator(InterpolatorType* interp, size_t outDim);
+
+        /// get/set deformation field
+        void getDeformationField(DeformationFieldType*& deform, size_t outDim);
+        DeformationFieldType& getDeformationField(size_t outDim) { GADGET_DEBUG_CHECK_THROW(outDim<=D); return this->deform_field_[outDim]; }
+
+        void setDeformationField(const DeformationFieldType& deform, size_t outDim);
+
+        /// serialize/deserialize the transformation
+        virtual bool serialize(char*& buf, size_t& len) const ;
+        virtual bool deserialize(char* buf, size_t& len);
+
+        virtual void print(std::ostream& os) const;
+
+        virtual std::string transformationName() const;
+
+        using BaseClass::gt_timer1_;
+        using BaseClass::gt_timer2_;
+        using BaseClass::gt_timer3_;
+        using BaseClass::performTiming_;
+        using BaseClass::gt_exporter_;
+        using BaseClass::debugFolder_;
+
+    protected:
+
+        DeformationFieldType deform_field_[D];
+
+        //InterpolatorType* interp_[D];
+
+        DefaultInterpolatorType* interp_default_[D];
+        DefaultBoundHanlderType* bh_default_[D];
+    };
+
+    template <typename ValueType, unsigned int D> 
+    hoImageRegDeformationField<ValueType, D>::hoImageRegDeformationField() : BaseClass()
+    {
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            //interp_[ii] = NULL;
+            bh_default_[ii] = new DefaultBoundHanlderType(deform_field_[ii]);
+            interp_default_[ii] = new DefaultInterpolatorType(deform_field_[ii], *(bh_default_[ii]));
+        }
+    }
+
+    template <typename ValueType, unsigned int D> 
+    hoImageRegDeformationField<ValueType, D>::
+    hoImageRegDeformationField(const std::vector<size_t>& dimensions) : BaseClass()
+    {
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            deform_field_[ii].create(dimensions);
+            memset(deform_field_[ii].get_data_ptr(), 0, deform_field_[ii].get_number_of_elements()*sizeof(T));
+
+            //interp_[ii] = NULL;
+
+            bh_default_[ii] = new DefaultBoundHanlderType(deform_field_[ii]);
+            interp_default_[ii] = new DefaultInterpolatorType(deform_field_[ii], *(bh_default_[ii]));
+        }
+    }
+
+    template <typename ValueType, unsigned int D> 
+    hoImageRegDeformationField<ValueType, D>::
+    hoImageRegDeformationField(const std::vector<size_t>& dimensions, const std::vector<coord_type>& pixelSize, const std::vector<coord_type>& origin, const axis_type& axis) : BaseClass()
+    {
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            deform_field_[ii].create(dimensions, pixelSize, origin, axis);
+            memset(deform_field_[ii].get_data_ptr(), 0, deform_field_[ii].get_number_of_elements()*sizeof(T));
+
+            //interp_[ii] = NULL;
+
+            bh_default_[ii] = new DefaultBoundHanlderType(deform_field_[ii]);
+            interp_default_[ii] = new DefaultInterpolatorType(deform_field_[ii], *(bh_default_[ii]));
+        }
+    }
+
+    template <typename ValueType, unsigned int D> 
+    hoImageRegDeformationField<ValueType, D>::hoImageRegDeformationField(const hoNDImage<ValueType, D>& im) : BaseClass()
+    {
+        std::vector<size_t> dim;
+        im.get_dimensions(dim);
+
+        std::vector<coord_type> pixelSize;
+        im.get_pixel_size(pixelSize);
+
+        std::vector<coord_type> origin;
+        im.get_origin(origin);
+
+        axis_type axis;
+        im.get_axis(axis);
+
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            deform_field_[ii].create(dim, pixelSize, origin, axis);
+            memset(deform_field_[ii].get_data_ptr(), 0, deform_field_[ii].get_number_of_elements()*sizeof(T));
+
+            //interp_[ii] = NULL;
+
+            bh_default_[ii] = new DefaultBoundHanlderType(deform_field_[ii]);
+            interp_default_[ii] = new DefaultInterpolatorType(deform_field_[ii], *(bh_default_[ii]));
+        }
+    }
+
+    template <typename ValueType, unsigned int D> 
+    hoImageRegDeformationField<ValueType, D>::
+    ~hoImageRegDeformationField()
+    {
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            delete bh_default_[ii];
+            delete interp_default_[ii];
+        }
+    }
+
+    template <typename ValueType, unsigned int D> 
+    inline bool hoImageRegDeformationField<ValueType, D>::invertTransformation()
+    {
+        /// to be implemented ... 
+        return true;
+    }
+
+    template <typename ValueType, unsigned int D> 
+    inline bool hoImageRegDeformationField<ValueType, D>::setIdentity()
+    {
+        try
+        {
+            unsigned int ii;
+            for ( ii=0; ii<D; ii++ )
+            {
+                memset(deform_field_[ii].get_data_ptr(), 0, deform_field_[ii].get_number_of_elements()*sizeof(T));
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegDeformationField<ValueType, D>::setIdentity() ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ValueType, unsigned int D> 
+    inline bool hoImageRegDeformationField<ValueType, D>::update()
+    {
+        try
+        {
+            unsigned int ii;
+            for ( ii=0; ii<D; ii++ )
+            {
+                interp_default_[ii]->setArray(deform_field_[ii]);
+                interp_default_[ii]->setBoundaryHandler(*bh_default_[ii]);
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegDeformationField<ValueType, D>::update() ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ValueType, unsigned int D> 
+    inline bool hoImageRegDeformationField<ValueType, D>::transform(const T* pt_in, T* pt_out) const
+    {
+        try
+        {
+            std::vector<coord_type> pos(D);
+
+            int ii;
+            for ( ii=0; ii<(int)D; ii++ )
+            {
+                pos[ii] = pt_in[ii];
+            }
+
+            #pragma omp parallel for default(none) private(ii) shared(pos, pt_out)
+            for ( ii=0; ii<(int)D; ii++ )
+            {
+                pt_out[ii] += this->interp_default_[ii]->operator()(pos);
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegDeformationField<ValueType, D>::transform(T* pt_in, T* pt_out) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ValueType, unsigned int D> 
+    inline bool hoImageRegDeformationField<ValueType, D>::transform(const T& xi, const T& yi, T& xo, T& yo) const
+    {
+        try
+        {
+            xo = xi + (*interp_default_[0])(xi, yi);
+            yo = yi + (*interp_default_[1])(xi, yi);
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegDeformationField<ValueType, D>::transform(const T& xi, const T& yi, T& xo, T& yo) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ValueType, unsigned int D> 
+    inline bool hoImageRegDeformationField<ValueType, D>::transform(const T& xi, const T& yi, const T& zi, T& xo, T& yo, T& zo) const
+    {
+        try
+        {
+            xo = xi + (*interp_default_[0])(xi, yi, zi);
+            yo = yi + (*interp_default_[1])(xi, yi, zi);
+            zo = zi + (*interp_default_[2])(xi, yi, zi);
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegDeformationField<ValueType, D>::transform(const T& xi, const T& yi, const T& zi, T& xo, T& yo, T& zo) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ValueType, unsigned int D> 
+    inline bool hoImageRegDeformationField<ValueType, D>::transform(const size_t* pt_in, T* pt_out) const
+    {
+        try
+        {
+            unsigned int ii;
+            for ( ii=0; ii<D; ii++ )
+            {
+                pt_out[ii] = pt_in[ii] + this->deform_field_[ii](pt_in);
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegDeformationField<ValueType, D>::transform(size_t* pt_in, T* pt_out) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ValueType, unsigned int D> 
+    inline bool hoImageRegDeformationField<ValueType, D>::transform(const size_t* pt_in, size_t N, T* pt_out) const
+    {
+        try
+        {
+            long long n;
+            #pragma omp parallel for default(none) private(n) shared(N, pt_in, pt_out)
+            for( n=0; n<(long long)N; n++ )
+            {
+                this->transform(pt_in+n*D, pt_out+n*D);
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegDeformationField<ValueType, D>::transform(size_t* pt_in, size_t N, T* pt_out) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ValueType, unsigned int D> 
+    inline bool hoImageRegDeformationField<ValueType, D>::transform(const size_t& xi, const size_t& yi, T& xo, T& yo) const
+    {
+        try
+        {
+            xo = xi + this->deform_field_[0](xi, yi);
+            yo = yi + this->deform_field_[1](xi, yi);
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegDeformationField<ValueType, D>::transform(const size_t& xi, const size_t& yi, T& xo, T& yo) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ValueType, unsigned int D> 
+    inline bool hoImageRegDeformationField<ValueType, D>::transform(const size_t* xi, const size_t* yi, size_t N, T* xo, T* yo) const
+    {
+        try
+        {
+            long long n;
+            #pragma omp parallel for default(none) private(n) shared(N, xi, yi, xo, yo)
+            for( n=0; n<(long long)N; n++ )
+            {
+                this->transform(xi[n], yi[n], xo[n], yo[n]);
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegDeformationField<ValueType, D>::transform(size_t* xi, size_t* yi, size_t N, T* xo, T* yo) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ValueType, unsigned int D> 
+    inline bool hoImageRegDeformationField<ValueType, D>::transform(const size_t& xi, const size_t& yi, const size_t& zi, T& xo, T& yo, T& zo) const
+    {
+        try
+        {
+            xo = xi + this->deform_field_[0](xi, yi, zi);
+            yo = yi + this->deform_field_[1](xi, yi, zi);
+            zo = zi + this->deform_field_[2](xi, yi, zi);
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegDeformationField<ValueType, D>::transform(const size_t& xi, const size_t& yi, const size_t& zi, T& xo, T& yo, T& zo) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ValueType, unsigned int D> 
+    inline bool hoImageRegDeformationField<ValueType, D>::transform(const size_t* xi, const size_t* yi, const size_t* zi, size_t N, T* xo, T* yo, T* zo) const
+    {
+        try
+        {
+            long long n;
+            #pragma omp parallel for default(none) private(n) shared(N, xi, yi, zi, xo, yo, zo)
+            for( n=0; n<(long long)N; n++ )
+            {
+                this->transform(xi[n], yi[n], zi[n], xo[n], yo[n], zo[n]);
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegDeformationField<ValueType, D>::transform(size_t* xi, size_t* yi, size_t* zi, size_t N, T* xo, T* yo, T* zo) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ValueType, unsigned int D> 
+    inline bool hoImageRegDeformationField<ValueType, D>::jacobianPosition(const input_point_type& pos, jacobian_position_type& jac)
+    {
+        try
+        {
+            jac.createMatrix(D, D);
+
+            T delta = 0.5;
+            T deltaReciprocal = T(1.0)/(T(2.0)*delta);
+
+            std::vector<coord_type> pixelSize(D);
+
+            deform_field_[0].get_pixel_size(pixelSize);
+
+            size_t din, dout;
+            for ( dout=0; dout<D; dout++ )
+            {
+                for ( din=0; din<D; din++ )
+                {
+                    input_point_type pos_positive(pos);
+                    input_point_type pos_negative(pos);
+
+                    pos_positive[din] += delta;
+                    pos_negative[din] -= delta;
+
+                    T v_positive = (*interp_default_[dout])(pos_positive.begin());
+                    T v_negative = (*interp_default_[dout])(pos_negative.begin());
+
+                    jac(dout, din) = (v_positive-v_negative)*deltaReciprocal;
+
+                    if ( dout != din )
+                    {
+                        // scaled for non-isotropic pixel sizes
+                        jac(dout, din) *= ( pixelSize[dout]/pixelSize[din] );
+                    }
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegDeformationField<ValueType, D>::jacobianPosition(const input_point_type& pos, jacobian_position_type& jac) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ValueType, unsigned int D> 
+    inline bool hoImageRegDeformationField<ValueType, D>::jacobianPosition(hoNDArray<T>& jac, unsigned int borderWidth)
+    {
+        DeformationFieldType* deform_field[D];
+
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            deform_field[ii] = &deform_field_[ii];
+        }
+
+        return this->jacobianPosition(jac, deform_field, borderWidth);
+    }
+
+    template <typename ValueType, unsigned int D> 
+    inline bool hoImageRegDeformationField<ValueType, D>::jacobianPosition(hoNDArray<T>& jac, DeformationFieldType* deform_field[D], unsigned int borderWidth)
+    {
+        try
+        {
+            std::vector<size_t> dim;
+            deform_field[0]->get_dimensions(dim);
+
+            std::vector<size_t> dimJac(D+2, D);
+            memcpy(&dimJac[0]+2, &dim[0], sizeof(size_t)*D);
+
+            jac.create(&dimJac);
+            Gadgetron::clear(&jac);
+
+            std::vector<size_t> offset(D);
+            deform_field[0]->get_offset_factor(offset);
+
+            std::vector<coord_type> pixelSize(D);
+            deform_field[0]->get_pixel_size(pixelSize);
+
+            T delta = 1.0;
+            T deltaReciprocal = T(1.0)/(T(2.0)*delta);
+
+            size_t N = deform_field[0]->get_number_of_elements();
+
+            long long n;
+
+            #pragma omp parallel default(none) private(n) shared(N, jac, dim, offset, pixelSize, borderWidth, deltaReciprocal, deform_field)
+            {
+
+                std::vector<size_t> ind(D);
+
+                hoNDArray<T> jacCurr(D, D);
+
+                #pragma omp for 
+                for ( n=0; n<(long long)N; n++ )
+                {
+                    ind = deform_field[0]->calculate_index( n );
+
+                    bool inRange = true;
+
+                    size_t din, dout;
+
+                    for ( dout=0; dout<D; dout++ )
+                    {
+                        if ( ind[dout]<borderWidth || ind[dout]>=dim[dout]-borderWidth )
+                        {
+                            inRange = false;
+                            break;
+                        }
+                    }
+
+                    if ( inRange )
+                    {
+                        for ( dout=0; dout<D; dout++ )
+                        {
+                            for ( din=0; din<D; din++ )
+                            {
+                                size_t offset_positive = n + offset[din];
+                                size_t offset_negative = n - offset[din];
+
+                                T v_positive = (*deform_field[dout])(offset_positive);
+                                T v_negative = (*deform_field[dout])(offset_negative);
+
+                                jacCurr(dout, din) = (v_positive-v_negative)*deltaReciprocal;
+
+                                if ( dout != din )
+                                {
+                                    // scaled for non-isotropic pixel sizes
+                                    jacCurr(dout, din) *= ( pixelSize[dout]/pixelSize[din] );
+                                }
+                            }
+                        }
+
+                        memcpy(jac.begin()+n*D*D, jacCurr.begin(), sizeof(T)*D*D);
+                    }
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegDeformationField<ValueType, D>::jacobianPosition(hoNDArray<T>& jac, DeformationFieldType* deform_field[D], unsigned int borderWidth) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ValueType, unsigned int D> 
+    inline bool hoImageRegDeformationField<ValueType, D>::
+    analyzeJacobianAndDeformation(const hoNDArray<T>& jac, T& meanDeform, T& maxDeform, T& meanLogJac, T& maxLogJac, unsigned int borderWidth)
+    {
+        DeformationFieldType* deform_field[D];
+
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            deform_field[ii] = &deform_field_[ii];
+        }
+
+        return this->analyzeJacobianAndDeformation(jac, deform_field, meanDeform, maxDeform, meanLogJac, maxLogJac, borderWidth);
+    }
+
+    template <typename ValueType, unsigned int D> 
+    bool hoImageRegDeformationField<ValueType, D>::
+    analyzeJacobianAndDeformation(const hoNDArray<T>& jac, DeformationFieldType* deform_field[D], T& meanDeform, T& maxDeform, T& meanLogJac, T& maxLogJac, unsigned int borderWidth)
+    {
+        try
+        {
+            std::vector<size_t> dim;
+            deform_field[0]->get_dimensions(dim);
+
+            std::vector<coord_type> pixelSize(D);
+            deform_field[0]->get_pixel_size(pixelSize);
+
+            size_t N = deform_field[0]->get_number_of_elements();
+
+            meanDeform = 0;
+            maxDeform = -1;
+            meanLogJac = 0;
+            maxLogJac = -1;
+
+            hoNDArray<T> deformNorm(dim);
+            Gadgetron::clear(deformNorm);
+
+            hoNDArray<T> logJac(dim);
+            Gadgetron::clear(logJac);
+
+            long long n;
+            #pragma omp parallel default(none) private(n) shared(N, borderWidth, jac, deformNorm, logJac, dim, pixelSize, deform_field)
+            {
+                std::vector<size_t> ind(D);
+                hoMatrix<T> jacCurr(D, D);
+                unsigned int ii;
+
+                #pragma omp for 
+                for ( n=0; n<(long long)N; n++ )
+                {
+                    ind = deform_field[0]->calculate_index( n );
+
+                    bool inRange = true;
+
+                    size_t dout;
+
+                    for ( dout=0; dout<D; dout++ )
+                    {
+                        if ( ind[dout]<borderWidth || ind[dout]>=dim[dout]-borderWidth )
+                        {
+                            inRange = false;
+                            break;
+                        }
+                    }
+
+                    if ( inRange )
+                    {
+                        memcpy(jacCurr.begin(), jac.begin()+n*D*D, sizeof(T)*D*D);
+
+                        T deformMag(0), v, det;
+
+                        for ( ii=0; ii<D; ii++ )
+                        {
+                            jacCurr(ii, ii) += 1.0;
+
+                            v = (*deform_field[ii])(n)*pixelSize[ii];
+                            deformMag += v*v;
+                        }
+
+                        deformNorm(n) = std::sqrt(deformMag);
+
+                        if ( D == 2 )
+                        {
+                            det = jacCurr(0, 0)*jacCurr(1, 1) - jacCurr(0, 1)*jacCurr(1, 0);
+                        }
+                        else if ( D == 3 )
+                        {
+                            det = jacCurr(0, 0)*jacCurr(1, 1)*jacCurr(2, 2) 
+                                + jacCurr(0, 1)*jacCurr(1, 2)*jacCurr(2, 0)
+                                + jacCurr(0, 2)*jacCurr(2, 1)*jacCurr(1, 0)
+                                - jacCurr(0, 2)*jacCurr(1, 1)*jacCurr(2, 0) 
+                                - jacCurr(0, 1)*jacCurr(1, 0)*jacCurr(2, 2) 
+                                - jacCurr(0, 0)*jacCurr(2, 1)*jacCurr(1, 2);
+                        }
+
+                        if ( GT_ABS(det) < FLT_EPSILON ) det = FLT_EPSILON;
+                        logJac(n) = std::log(det);
+                    }
+                }
+            }
+
+            size_t ind;
+            Gadgetron::maxAbsolute(deformNorm, maxDeform, ind);
+            Gadgetron::maxAbsolute(logJac, maxLogJac, ind);
+
+            Gadgetron::norm1(deformNorm, meanDeform);
+            meanDeform /= N;
+
+            Gadgetron::norm1(logJac, meanLogJac);
+            meanLogJac /= N;
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in analyzeJacobianAndDeformation(const hoNDArray<T>& jac, DeformationFieldType* deform_field[D], T& meanDeform, T& maxDeform, T& meanLogJac, T& maxLogJac, unsigned int borderWidth) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ValueType, unsigned int D> 
+    inline ValueType& hoImageRegDeformationField<ValueType, D>::operator()( size_t idx[D], size_t outDim )
+    {
+        GADGET_DEBUG_CHECK_THROW(outDim<=D);
+        return this->deform_field_[outDim](idx);
+    }
+
+    template <typename ValueType, unsigned int D> 
+    inline const ValueType& hoImageRegDeformationField<ValueType, D>::operator()( size_t idx[D], size_t outDim ) const 
+    {
+        GADGET_DEBUG_CHECK_THROW(outDim<=D);
+        return this->deform_field_[outDim](idx);
+    }
+
+    template <typename ValueType, unsigned int D> 
+    inline void hoImageRegDeformationField<ValueType, D>::get(size_t idx[D], T deform[D])
+    {
+        size_t offset = this->deform_field_[0].calculate_offset(idx);
+
+        unsigned int ii;
+        for ( ii=0; ii<D; ii++ )
+        {
+            deform[ii] = this->deform_field_[ii](offset);
+        }
+    }
+
+    template <typename ValueType, unsigned int D> 
+    inline void hoImageRegDeformationField<ValueType, D>::get(size_t x, size_t y, T& dx, T& dy)
+    {
+        size_t offset = this->deform_field_[0].calculate_offset(x, y);
+        dx = this->deform_field_[0](offset);
+        dy = this->deform_field_[1](offset);
+    }
+
+    template <typename ValueType, unsigned int D> 
+    inline void hoImageRegDeformationField<ValueType, D>::get(size_t x, size_t y, size_t z, T& dx, T& dy, T& dz)
+    {
+        size_t offset = this->deform_field_[0].calculate_offset(x, y, z);
+        dx = this->deform_field_[0](offset);
+        dy = this->deform_field_[1](offset);
+        dz = this->deform_field_[2](offset);
+    }
+
+    template <typename ValueType, unsigned int D> 
+    inline void hoImageRegDeformationField<ValueType, D>::set(size_t idx[D], T deform[D])
+    {
+        size_t offset = this->deform_field_[0].calculate_offset(idx);
+
+        unsigned int ii;
+        for ( ii<0; ii<D; ii++ )
+        {
+            this->deform_field_[ii](offset) = deform[ii];
+        }
+    }
+
+    template <typename ValueType, unsigned int D> 
+    inline void hoImageRegDeformationField<ValueType, D>::set(size_t x, size_t y, T dx, T dy)
+    {
+        size_t offset = this->deform_field_[0].calculate_offset(x, y);
+        this->deform_field_[0](offset) = dx;
+        this->deform_field_[1](offset) = dy;
+    }
+
+    template <typename ValueType, unsigned int D> 
+    inline void hoImageRegDeformationField<ValueType, D>::set(size_t x, size_t y, size_t z, T dx, T dy, T dz)
+    {
+        size_t offset = this->deform_field_[0].calculate_offset(x, y, z);
+        this->deform_field_[0](offset) = dx;
+        this->deform_field_[1](offset) = dy;
+        this->deform_field_[2](offset) = dz;
+    }
+
+    template <typename ValueType, unsigned int D> 
+    inline ValueType hoImageRegDeformationField<ValueType, D>::operator()( coord_type pos[D], size_t outDim )
+    {
+        GADGET_DEBUG_CHECK_THROW(outDim<=D);
+        return (*interp_default_[outDim])(pos);
+    }
+
+    template <typename ValueType, unsigned int D> 
+    inline void hoImageRegDeformationField<ValueType, D>::get(coord_type pos[D], T deform[D])
+    {
+        unsigned int ii;
+        for (ii=0; ii<D; ii++ )
+        {
+            deform[ii] = (*interp_default_[ii])(pos);
+        }
+    }
+
+    template <typename ValueType, unsigned int D> 
+    inline void hoImageRegDeformationField<ValueType, D>::get(coord_type px, coord_type py, T& dx, T& dy)
+    {
+        dx = (*interp_default_[0])(px, py);
+        dy = (*interp_default_[1])(px, py);
+    }
+
+    template <typename ValueType, unsigned int D> 
+    inline void hoImageRegDeformationField<ValueType, D>::get(coord_type px, coord_type py, coord_type pz, T& dx, T& dy, T& dz)
+    {
+        dx = (*interp_default_[0])(px, py, pz);
+        dy = (*interp_default_[1])(px, py, pz);
+        dz = (*interp_default_[2])(px, py, pz);
+    }
+
+    template <typename ValueType, unsigned int D> 
+    inline void hoImageRegDeformationField<ValueType, D>::  getDeformationField(DeformationFieldType*& deform, size_t outDim)
+    {
+        GADGET_DEBUG_CHECK_THROW(outDim<=D);
+        deform = &(this->deform_field_[outDim]);
+    }
+
+    template <typename ValueType, unsigned int D> 
+    inline void hoImageRegDeformationField<ValueType, D>::setDeformationField(const DeformationFieldType& deform, size_t outDim)
+    {
+        GADGET_DEBUG_CHECK_THROW(outDim<=D);
+        this->deform_field_[outDim] = deform;
+    }
+
+    template <typename ValueType, unsigned int D> 
+    bool hoImageRegDeformationField<ValueType, D>::serialize(char*& buf, size_t& len) const 
+    {
+        try
+        {
+            if ( buf != NULL ) delete[] buf;
+
+            char* bufInternal[D];
+            size_t lenInternal[D];
+
+            // serialize every dimension
+
+            size_t totalLen = 0;
+
+            unsigned int ii;
+            for ( ii=0; ii<D; ii++ )
+            {
+                GADGET_CHECK_RETURN_FALSE(this->deform_field_[ii].serialize(bufInternal[ii], lenInternal[ii]));
+                totalLen += lenInternal[ii];
+            }
+
+            // number of dimensions + dimension vector + pixel size + origin + axis + contents
+            len = sizeof(unsigned int) + totalLen;
+
+            buf = new char[len];
+            GADGET_CHECK_RETURN_FALSE(buf!=NULL);
+
+            unsigned int NDim=D;
+
+            size_t offset = 0;
+            memcpy(buf, &NDim, sizeof(unsigned int));
+            offset += sizeof(unsigned int);
+
+            if ( NDim > 0 )
+            {
+                for ( ii=0; ii<D; ii++ )
+                {
+                    memcpy(buf+offset, bufInternal[ii], lenInternal[ii]);
+                    offset += lenInternal[ii];
+                }
+
+                for ( ii=0; ii<D; ii++ )
+                {
+                    delete [] bufInternal[ii];
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegDeformationField<ValueType, D>::serialize(...) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ValueType, unsigned int D> 
+    bool hoImageRegDeformationField<ValueType, D>::deserialize(char* buf, size_t& len)
+    {
+        try
+        {
+            unsigned int NDim;
+            memcpy(&NDim, buf, sizeof(unsigned int));
+            if ( NDim != D )
+            {
+                GADGET_ERROR_MSG("hoImageRegDeformationField<ValueType, D>::deserialize(...) : number of image dimensions does not match ... ");
+                return false;
+            }
+
+            size_t offset = sizeof(unsigned int);
+
+            unsigned int ii;
+
+            if ( NDim > 0 )
+            {
+                for ( ii=0; ii<D; ii++ )
+                {
+                    size_t lenInternal;
+                    GADGET_CHECK_RETURN_FALSE(this->deform_field_[ii].deserialize(buf+offset, lenInternal));
+                    offset += lenInternal;
+                }
+            }
+
+            len = offset;
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegDeformationField<ValueType, D>::deserialize(...) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ValueType, unsigned int D> 
+    void hoImageRegDeformationField<ValueType, D>::print(std::ostream& os) const
+    {
+        using namespace std;
+        os << "--------------Gagdgetron deformation field geometry transformation -------------" << endl;
+        os << "Deformation field dimension is : " << D << endl;
+
+        std::string elemTypeName = std::string(typeid(T).name());
+        os << "Transformation data type is : " << elemTypeName << std::endl;
+    }
+
+    template <typename ValueType, unsigned int D> 
+    std::string hoImageRegDeformationField<ValueType, D>::transformationName() const
+    {
+        return std::string("hoImageRegDeformationField"); 
+    }
+}
diff --git a/toolboxes/registration/optical_flow/cpu/transformation/hoImageRegHomogenousTransformation.h b/toolboxes/registration/optical_flow/cpu/transformation/hoImageRegHomogenousTransformation.h
new file mode 100644
index 0000000..4f19f3b
--- /dev/null
+++ b/toolboxes/registration/optical_flow/cpu/transformation/hoImageRegHomogenousTransformation.h
@@ -0,0 +1,475 @@
+/** \file   hoImageRegHomogenousTransformation.h
+    \brief  Define the class for the homogenous geometry transformation in gadgetron registration
+    \author Hui Xue
+*/
+
+#pragma once
+
+#include "hoImageRegParametricTransformation.h"
+#include "hoMatrix.h"
+
+namespace Gadgetron
+{
+    /// Homogenous transformation
+    template<typename ValueType, unsigned int D> 
+    class hoImageRegHomogenousTransformation : public hoImageRegParametricTransformation<ValueType, D, D>
+    {
+    public:
+
+        typedef hoImageRegParametricTransformation<ValueType, D, D> BaseClass;
+        typedef hoImageRegHomogenousTransformation<ValueType, D> Self;
+
+        typedef ValueType T;
+
+        typedef typename BaseClass::input_point_type input_point_type;
+        typedef typename BaseClass::output_point_type output_point_type;
+
+        typedef typename BaseClass::jacobian_parameter_type jacobian_parameter_type;
+        typedef typename BaseClass::jacobian_position_type jacobian_position_type;
+
+        typedef typename BaseClass::ParaStatus ParaStatus;
+        typedef typename BaseClass::ParaStatusType ParaStatusType;
+
+        hoImageRegHomogenousTransformation();
+        virtual ~hoImageRegHomogenousTransformation();
+
+        // get/set the ith parameter
+        virtual ValueType get_parameter(size_t i) const;
+        virtual void set_parameter(size_t i, ValueType v);
+
+        virtual bool invertTransformation();
+
+        virtual bool setIdentity();
+
+        virtual bool transform(const T* pt_in, T* pt_out) const;
+
+        virtual bool transform(const T& xi, const T& yi, T& xo, T& yo) const;
+
+        virtual bool transform(const T& xi, const T& yi, const T& zi, T& xo, T& yo, T& zo) const;
+
+        virtual bool transform(const size_t* pt_in, T* pt_out) const;
+        virtual bool transform(const size_t* pt_in, size_t N, T* pt_out) const;
+        virtual bool transform(const size_t& xi, const size_t& yi, T& xo, T& yo) const;
+        virtual bool transform(const size_t* xi, const size_t* yi, size_t N, T* xo, T* yo) const;
+        virtual bool transform(const size_t& xi, const size_t& yi, const size_t& zi, T& xo, T& yo, T& zo) const;
+        virtual bool transform(const size_t* xi, const size_t* yi, const size_t* zi, size_t N, T* xo, T* yo, T* zo) const;
+
+        /// compute jacobian matrix to parameters
+        /// D*num_parameters_ matrix
+        virtual bool jacobianParameter(const input_point_type& pos, jacobian_parameter_type& jac);
+
+        /// compute jacobian matrix to spatial position
+        /// D*D matrix
+        virtual bool jacobianPosition(const input_point_type& pos, jacobian_position_type& jac);
+
+        virtual void print(std::ostream& os) const;
+        virtual void printTransform(std::ostream& os) const;
+
+        virtual std::string transformationName() const
+        {
+            return std::string("hoImageRegHomogenousTransformation"); 
+        }
+
+        using BaseClass::gt_timer1_;
+        using BaseClass::gt_timer2_;
+        using BaseClass::gt_timer3_;
+        using BaseClass::performTiming_;
+        using BaseClass::gt_exporter_;
+        using BaseClass::debugFolder_;
+
+    protected:
+
+        using BaseClass::num_parameters_;
+        using BaseClass::para_status_;
+
+        /// transformation matrix
+        hoMatrix<ValueType> matrix_;
+    };
+
+    template <typename ValueType, unsigned int D> 
+    hoImageRegHomogenousTransformation<ValueType, D>::hoImageRegHomogenousTransformation() : BaseClass()
+    {
+        num_parameters_ = D*(D+1);
+        para_status_.resize(num_parameters_, BaseClass::Active);
+
+        GADGET_CHECK_THROW(matrix_.createMatrix(D+1, D+1));
+        GADGET_CHECK_THROW(matrix_.setIdentity());
+    }
+
+    template <typename ValueType, unsigned int D> 
+    hoImageRegHomogenousTransformation<ValueType, D>::~hoImageRegHomogenousTransformation()
+    {
+    }
+
+    template <typename ValueType, unsigned int D> 
+    inline ValueType hoImageRegHomogenousTransformation<ValueType, D>::get_parameter(size_t i) const
+    {
+        GADGET_DEBUG_CHECK_THROW(i<num_parameters_);
+        return matrix_( i/(D+1), i%(D+1) );
+    }
+
+    template <typename ValueType, unsigned int D> 
+    inline void hoImageRegHomogenousTransformation<ValueType, D>::set_parameter(size_t i, ValueType v)
+    {
+        GADGET_DEBUG_CHECK_THROW(i<num_parameters_);
+        matrix_( i/(D+1), i%(D+1) ) = v;
+    }
+
+    template <typename ValueType, unsigned int D> 
+    bool hoImageRegHomogenousTransformation<ValueType, D>::invertTransformation()
+    {
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE( Gadgetron::getri(matrix_) );
+        return true;
+    }
+
+    template <typename ValueType, unsigned int D> 
+    bool hoImageRegHomogenousTransformation<ValueType, D>::setIdentity()
+    {
+        GADGET_CHECK_RETURN_FALSE( matrix_.setIdentity() );
+        return true;
+    }
+
+    template <typename ValueType, unsigned int D> 
+    bool hoImageRegHomogenousTransformation<ValueType, D>::transform(const T* pt_in, T* pt_out) const
+    {
+        try
+        {
+            unsigned int ii, jj;
+            for ( ii=0; ii<D; ii++ )
+            {
+                pt_out[ii] = 0;
+                for ( jj=0; jj<D; jj++ )
+                {
+                    pt_out[ii] += matrix_(ii, jj) * pt_in[jj];
+                }
+                pt_out[ii] += matrix_(ii, D);
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Error happened in hoImageRegHomogenousTransformation<ValueType, D>::transform(const T* pt_in, T* pt_out) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ValueType, unsigned int D> 
+    bool hoImageRegHomogenousTransformation<ValueType, D>::transform(const T& xi, const T& yi, T& xo, T& yo) const
+    {
+        try
+        {
+            xo = matrix_(0, 0)*xi + matrix_(0, 1)*yi + matrix_(0, 2);
+            yo = matrix_(1, 0)*xi + matrix_(1, 1)*yi + matrix_(1, 2);
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Error happened in hoImageRegHomogenousTransformation<ValueType, D>::transform(const T& xi, const T& yi, T& xo, T& yo) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ValueType, unsigned int D> 
+    bool hoImageRegHomogenousTransformation<ValueType, D>::transform(const T& xi, const T& yi, const T& zi, T& xo, T& yo, T& zo) const
+    {
+        try
+        {
+            xo = matrix_(0, 0)*xi + matrix_(0, 1)*yi + matrix_(0, 2)*zi + matrix_(0, 3);
+            yo = matrix_(1, 0)*xi + matrix_(1, 1)*yi + matrix_(1, 2)*zi + matrix_(1, 3);
+            zo = matrix_(2, 0)*xi + matrix_(2, 1)*yi + matrix_(2, 2)*zi + matrix_(2, 3);
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Error happened in hoImageRegHomogenousTransformation<ValueType, D>::transform(const T& xi, const T& yi, const T& zi, T& xo, T& yo, T& zo) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ValueType, unsigned int D> 
+    bool hoImageRegHomogenousTransformation<ValueType, D>::transform(const size_t* pt_in, T* pt_out) const
+    {
+        try
+        {
+            unsigned int ii, jj;
+            for ( ii=0; ii<D; ii++ )
+            {
+                pt_out[ii] = 0;
+                for ( jj=0; jj<D; jj++ )
+                {
+                    pt_out[ii] += matrix_(ii, jj) * pt_in[jj];
+                }
+                pt_out[ii] += matrix_(ii, D);
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Error happened in hoImageRegHomogenousTransformation<ValueType, D>::transform(const size_t* pt_in, T* pt_out) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ValueType, unsigned int D> 
+    bool hoImageRegHomogenousTransformation<ValueType, D>::transform(const size_t* pt_in, size_t N, T* pt_out) const
+    {
+        try
+        {
+            long long ii;
+
+            #pragma omp parallel for default(none) private(ii) shared(pt_in, pt_out, N)
+            for ( ii=0; ii<(long long)N; ii++ )
+            {
+                this->transform(pt_in+ii*D, pt_out+ii*D);
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happen in hoImageRegHomogenousTransformation<ValueType, D>::transform(const size_t* pt_in, size_t N, T* pt_out) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ValueType, unsigned int D> 
+    bool hoImageRegHomogenousTransformation<ValueType, D>::transform(const size_t& xi, const size_t& yi, T& xo, T& yo) const
+    {
+        try
+        {
+            xo = matrix_(0, 0)*xi + matrix_(0, 1)*yi + matrix_(0, 2);
+            yo = matrix_(1, 0)*xi + matrix_(1, 1)*yi + matrix_(1, 2);
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Error happened in hoImageRegHomogenousTransformation<ValueType, D>::transform(const size_t& xi, const size_t& yi, T& xo, T& yo) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ValueType, unsigned int D> 
+    bool hoImageRegHomogenousTransformation<ValueType, D>::transform(const size_t* xi, const size_t* yi, size_t N, T* xo, T* yo) const
+    {
+        try
+        {
+            long long ii;
+
+            #pragma omp parallel for default(none) private(ii) shared(xi, yi, xo, yo, N)
+            for ( ii=0; ii<(long long)N; ii++ )
+            {
+                this->transform(xi[ii], yi[ii], xo[ii], yo[ii]);
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happen in hoImageRegHomogenousTransformation<ValueType, D>::transform(const size_t* xi, const size_t* yi, size_t N, T* xo, T* yo) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ValueType, unsigned int D> 
+    bool hoImageRegHomogenousTransformation<ValueType, D>::transform(const size_t& xi, const size_t& yi, const size_t& zi, T& xo, T& yo, T& zo) const
+    {
+        try
+        {
+            xo = matrix_(0, 0)*xi + matrix_(0, 1)*yi + matrix_(0, 2)*zi + matrix_(0, 3);
+            yo = matrix_(1, 0)*xi + matrix_(1, 1)*yi + matrix_(1, 2)*zi + matrix_(1, 3);
+            zo = matrix_(2, 0)*xi + matrix_(2, 1)*yi + matrix_(2, 2)*zi + matrix_(2, 3);
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Error happened in hoImageRegHomogenousTransformation<ValueType, D>::transform(const size_t& xi, const size_t& yi, const size_t& zi, T& xo, T& yo, T& zo) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ValueType, unsigned int D> 
+    bool hoImageRegHomogenousTransformation<ValueType, D>::transform(const size_t* xi, const size_t* yi, const size_t* zi, size_t N, T* xo, T* yo, T* zo) const
+    {
+        try
+        {
+            long long ii;
+
+            #pragma omp parallel for default(none) private(ii) shared(xi, yi, zi, xo, yo, zo, N)
+            for ( ii=0; ii<(long long)N; ii++ )
+            {
+                this->transform(xi[ii], yi[ii], zi[ii], xo[ii], yo[ii], zo[ii]);
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happen in hoImageRegHomogenousTransformation<ValueType, D>::transform(const size_t* xi, const size_t* yi, const size_t* zi, size_t N, T* xo, T* yo, T* zo) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ValueType, unsigned int D> 
+    bool hoImageRegHomogenousTransformation<ValueType, D>::jacobianParameter(const input_point_type& pos, jacobian_parameter_type& jac)
+    {
+        try
+        {
+            jac.createMatrix(D, num_parameters_);
+            Gadgetron::clear(jac);
+
+            if ( D == 2 )
+            {
+                jac(0, 0) = pos(0);
+                jac(0, 1) = pos(1);
+                jac(0, 2) = 1;
+
+                jac(1, 3) = pos(0);
+                jac(1, 4) = pos(1);
+                jac(1, 5) = 1;
+            }
+            else if ( D == 3 )
+            {
+                jac(0, 0) = pos(0);
+                jac(0, 1) = pos(1);
+                jac(0, 2) = pos(2);
+                jac(0, 3) = 1;
+
+                jac(1, 4) = pos(0);
+                jac(1, 5) = pos(1);
+                jac(1, 6) = pos(2);
+                jac(1, 7) = 1;
+
+                jac(2, 8)  = pos(0);
+                jac(2, 9)  = pos(1);
+                jac(2, 10) = pos(2);
+                jac(2, 11) = 1;
+            }
+            else
+            {
+                unsigned int ii, jj;
+                for ( ii=0; ii<D; ii++ )
+                {
+                    for ( jj=0; jj<D; jj++ )
+                    {
+                        jac(ii, ii*(D+1)+jj) = pos(jj);
+                    }
+
+                    jac(ii, ii*(D+1)+D) = 1;
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happen in hoImageRegHomogenousTransformation<ValueType, D>::jacobianParameter(const input_point_type& pos, jacobian_parameter_type& jac) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ValueType, unsigned int D> 
+    bool hoImageRegHomogenousTransformation<ValueType, D>::jacobianPosition(const input_point_type& pos, jacobian_position_type& jac)
+    {
+        try
+        {
+            jac.createMatrix(D, D);
+            Gadgetron::clear(jac);
+
+            if ( D == 2 )
+            {
+                jac(0, 0) = matrix_(0, 0);
+                jac(0, 1) = matrix_(0, 1);
+                jac(1, 0) = matrix_(1, 0);
+                jac(1, 1) = matrix_(1, 1);
+            }
+            else if ( D == 3 )
+            {
+                jac(0, 0) = matrix_(0, 0);
+                jac(0, 1) = matrix_(0, 1);
+                jac(0, 2) = matrix_(0, 2);
+
+                jac(1, 0) = matrix_(1, 0);
+                jac(1, 1) = matrix_(1, 1);
+                jac(1, 2) = matrix_(1, 2);
+
+                jac(2, 0) = matrix_(2, 0);
+                jac(2, 1) = matrix_(2, 1);
+                jac(2, 2) = matrix_(2, 2);
+            }
+            else
+            {
+                unsigned int ii, jj;
+                for ( ii=0; ii<D; ii++ )
+                {
+                    for ( jj=0; jj<D; jj++ )
+                    {
+                        jac(ii, jj) = matrix_(ii, jj);
+                    }
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happen in hoImageRegHomogenousTransformation<ValueType, D>::jacobianPosition(const input_point_type& pos, jacobian_position_type& jac) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ValueType, unsigned int D> 
+    void hoImageRegHomogenousTransformation<ValueType, D>::print(std::ostream& os) const
+    {
+        using namespace std;
+        os << "--------------Gagdgetron homogenous transformation -------------" << endl;
+        os << "Input dimension is : " << D << endl;
+        os << "Output dimension is : " << D << endl;
+
+        std::string elemTypeName = std::string(typeid(T).name());
+        os << "Transformation data type is : " << elemTypeName << std::endl;
+        os << "Number of parameters is : " << num_parameters_ << endl;
+
+        size_t i;
+        os << "Status of parameters: " << endl;
+        for ( i=0; i<this->num_parameters_; i++ )
+        {
+            os << "Para " << i << " : \t";
+            if ( para_status_[i] == BaseClass::Active )
+            {
+                os << "Active";
+            }
+            else if ( para_status_[i] == BaseClass::Inactive )
+            {
+                os << "Inactive";
+            }
+            else
+            {
+                os << "Unknown";
+            }
+            os << endl;
+        }
+
+        os << "Transformation: " << endl;
+        this->printTransform(os);
+    }
+
+    template <typename ValueType, unsigned int D> 
+    void hoImageRegHomogenousTransformation<ValueType, D>::printTransform(std::ostream& os) const
+    {
+        using namespace std;
+
+        size_t i;
+        os << "[ ";
+        for ( i=0; i<this->num_parameters_; i++ )
+        {
+            os << this->get_parameter(i) << " \t";
+        }
+        os << " ]" << endl;
+    }
+}
diff --git a/toolboxes/registration/optical_flow/cpu/transformation/hoImageRegNonParametricTransformation.h b/toolboxes/registration/optical_flow/cpu/transformation/hoImageRegNonParametricTransformation.h
new file mode 100644
index 0000000..68f36fb
--- /dev/null
+++ b/toolboxes/registration/optical_flow/cpu/transformation/hoImageRegNonParametricTransformation.h
@@ -0,0 +1,82 @@
+/** \file   hoImageRegNonParametricTransformation.h
+    \brief  Define the base class for the non-parametric geometry transformation in gadgetron registration
+    \author Hui Xue
+*/
+
+#pragma once
+
+#include "hoImageRegTransformation.h"
+
+namespace Gadgetron
+{
+    /// non-parametric transformation, e.g. deformation field
+    template<typename ValueType, unsigned int DIn, unsigned int DOut> 
+    class hoImageRegNonParametricTransformation : public hoImageRegTransformation<ValueType, DIn, DOut>
+    {
+    public:
+
+        typedef hoImageRegTransformation<ValueType, DIn, DOut> BaseClass;
+        typedef hoImageRegNonParametricTransformation<ValueType, DIn, DOut> Self;
+
+        typedef ValueType T;
+
+        typedef typename BaseClass::input_point_type input_point_type;
+        typedef typename BaseClass::output_point_type output_point_type;
+
+        typedef typename BaseClass::jacobian_position_type jacobian_position_type;
+
+        hoImageRegNonParametricTransformation() : BaseClass() {}
+        virtual ~hoImageRegNonParametricTransformation() {}
+
+        virtual bool invertTransformation() = 0;
+
+        virtual bool setIdentity() = 0;
+
+        virtual bool transform(const T* pt_in, T* pt_out) const = 0;
+
+        virtual bool transform(const T& xi, const T& yi, T& xo, T& yo) const = 0;
+
+        virtual bool transform(const T& xi, const T& yi, const T& zi, T& xo, T& yo, T& zo) const = 0;
+
+        virtual bool transform(const size_t* pt_in, T* pt_out) const = 0;
+        virtual bool transform(const size_t* pt_in, size_t N, T* pt_out) const = 0;
+        virtual bool transform(const size_t& xi, const size_t& yi, T& xo, T& yo) const = 0;
+        virtual bool transform(const size_t* xi, const size_t* yi, size_t N, T* xo, T* yo) const = 0;
+        virtual bool transform(const size_t& xi, const size_t& yi, const size_t& zi, T& xo, T& yo, T& zo) const = 0;
+        virtual bool transform(const size_t* xi, const size_t* yi, const size_t* zi, size_t N, T* xo, T* yo, T* zo) const = 0;
+
+        /// compute jacobian matrix to spatial position
+        /// DOut*DIn matrix
+        virtual bool jacobianPosition(const input_point_type& /*pos*/, jacobian_position_type& jac)
+        {
+            jac.createMatrix(DOut, DIn);
+            jac.setIdentity();
+            return true;
+        }
+
+        virtual void print(std::ostream& os) const
+        {
+            using namespace std;
+            os << "--------------Gagdgetron non-parametric geometry transformation -------------" << endl;
+            os << "Input dimension is : " << DIn << endl;
+            os << "Output dimension is : " << DOut << endl;
+
+            std::string elemTypeName = std::string(typeid(T).name());
+            os << "Transformation data type is : " << elemTypeName << std::endl;
+        }
+
+        virtual std::string transformationName() const
+        {
+            return std::string("hoImageRegNonParametricTransformation"); 
+        }
+
+        using BaseClass::gt_timer1_;
+        using BaseClass::gt_timer2_;
+        using BaseClass::gt_timer3_;
+        using BaseClass::performTiming_;
+        using BaseClass::gt_exporter_;
+        using BaseClass::debugFolder_;
+
+    protected:
+    };
+}
diff --git a/toolboxes/registration/optical_flow/cpu/transformation/hoImageRegParametricTransformation.h b/toolboxes/registration/optical_flow/cpu/transformation/hoImageRegParametricTransformation.h
new file mode 100644
index 0000000..734189c
--- /dev/null
+++ b/toolboxes/registration/optical_flow/cpu/transformation/hoImageRegParametricTransformation.h
@@ -0,0 +1,227 @@
+/** \file   hoImageRegParametricTransformation.h
+    \brief  Define the base class for the parametric geometry transformation in gadgetron registration
+    \author Hui Xue
+*/
+
+#pragma once
+
+#include "hoImageRegTransformation.h"
+
+namespace Gadgetron
+{
+    /// parametric transformation, e.g. rigid and affine transformation or Free-Form Deformation
+    template<typename ValueType, unsigned int DIn, unsigned int DOut> 
+    class hoImageRegParametricTransformation : public hoImageRegTransformation<ValueType, DIn, DOut>
+    {
+    public:
+
+        typedef hoImageRegTransformation<ValueType, DIn, DOut> BaseClass;
+        typedef hoImageRegParametricTransformation<ValueType, DIn, DOut> Self;
+
+        typedef ValueType T;
+
+        typedef typename BaseClass::input_point_type input_point_type;
+        typedef typename BaseClass::output_point_type output_point_type;
+
+        typedef typename BaseClass::jacobian_parameter_type jacobian_parameter_type;
+        typedef typename BaseClass::jacobian_position_type jacobian_position_type;
+
+        /// every parameter can be active or inactive
+        /// if inactive, this parameter will not be changed during optimization
+        typedef enum { Inactive=0, Active, Unknown } ParaStatus;
+        typedef std::vector<ParaStatus> ParaStatusType;
+
+        hoImageRegParametricTransformation() : num_parameters_(0), BaseClass() {}
+        virtual ~hoImageRegParametricTransformation() {}
+
+        size_t get_number_of_parameters() const { return num_parameters_; }
+        void set_number_of_parameters(size_t num) { num_parameters_ = num; para_status_.resize(num, Active); }
+
+        // get/set the ith parameter
+        virtual ValueType get_parameter(size_t i) const = 0;
+        virtual void set_parameter(size_t i, ValueType v) = 0;
+
+        ParaStatus get_para_status(size_t i) { GADGET_CHECK_THROW(i<num_parameters_); return this->para_status_[i]; }
+        void set_para_status(size_t i, ParaStatus status) { GADGET_CHECK_THROW(i<num_parameters_); para_status_[i] = status; }
+
+        virtual bool invertTransformation() = 0;
+
+        virtual bool setIdentity() = 0;
+
+        virtual bool transform(const T* pt_in, T* pt_out) const = 0;
+
+        virtual bool transform(const T& xi, const T& yi, T& xo, T& yo) const = 0;
+
+        virtual bool transform(const T& xi, const T& yi, const T& zi, T& xo, T& yo, T& zo) const = 0;
+
+        virtual bool transform(const size_t* pt_in, T* pt_out) const = 0;
+        virtual bool transform(const size_t* pt_in, size_t N, T* pt_out) const = 0;
+        virtual bool transform(const size_t& xi, const size_t& yi, T& xo, T& yo) const = 0;
+        virtual bool transform(const size_t* xi, const size_t* yi, size_t N, T* xo, T* yo) const = 0;
+        virtual bool transform(const size_t& xi, const size_t& yi, const size_t& zi, T& xo, T& yo, T& zo) const = 0;
+        virtual bool transform(const size_t* xi, const size_t* yi, const size_t* zi, size_t N, T* xo, T* yo, T* zo) const = 0;
+
+        /// adjust transformation for the resolution pyramid, if the image coordinate is used
+        /// sourceI2W and targetI2W: source and target image to world transformation matrix
+        virtual bool adjustForResolutionPyramid(const hoMatrix<ValueType>& sourceI2W, const hoMatrix<ValueType>& targetI2W)
+        {
+            /// by default, the transformation is not changed
+            return true;
+        }
+
+        /// compute jacobian matrix to parameters
+        /// DOut*num_parameters_ matrix
+        virtual bool jacobianParameter(const input_point_type& /*pos*/, jacobian_parameter_type& jac)
+        {
+            jac.createMatrix(DOut, num_parameters_);
+            jac.setIdentity();
+            return true;
+        }
+
+        /// compute jacobian matrix to spatial position
+        /// DOut*DIn matrix
+        virtual bool jacobianPosition(const input_point_type& /*pos*/, jacobian_position_type& jac)
+        {
+            jac.createMatrix(DOut, DIn);
+            jac.setIdentity();
+            return true;
+        }
+
+        /// serialize/deserialize the transformation
+        virtual bool serialize(char*& buf, size_t& len) const;
+        virtual bool deserialize(char* buf, size_t& len);
+
+        virtual void print(std::ostream& os) const
+        {
+            using namespace std;
+            os << "--------------Gagdgetron parametric geometry transformation -------------" << endl;
+            os << "Input dimension is : " << DIn << endl;
+            os << "Output dimension is : " << DOut << endl;
+
+            std::string elemTypeName = std::string(typeid(T).name());
+            os << "Transformation data type is : " << elemTypeName << std::endl;
+            os << "Number of parameters is : " << num_parameters_ << endl;
+
+            size_t i;
+            os << "Status of parameters: " << endl;
+            for ( i=0; i<this->num_parameters_; i++ )
+            {
+                os << "Para " << i << " : \t";
+                if ( para_status_[i] == Active )
+                {
+                    os << "Active";
+                }
+                else if ( para_status_[i] == Inactive )
+                {
+                    os << "Inactive";
+                }
+                else
+                {
+                    os << "Unknown";
+                }
+                os << endl;
+            }
+        }
+
+        virtual void printTransform(std::ostream& os) const
+        {
+            using namespace std;
+
+            size_t i;
+            size_t maxNum = 12;
+
+            if ( this->num_parameters_< maxNum )
+            {
+                os << "[ ";
+                for ( i=0; i<this->num_parameters_; i++ )
+                {
+                    os << this->get_parameter(i) << " \t";
+                }
+                os << " ]" << endl;
+            }
+            else
+            {
+                os << "[ ";
+                for ( i=0; i<maxNum; i++ )
+                {
+                    os << this->get_parameter(i) << " \t";
+                }
+                os << " ... ]" << endl;
+            }
+        }
+
+        virtual std::string transformationName() const
+        {
+            return std::string("hoImageRegParametricTransformation"); 
+        }
+
+        using BaseClass::gt_timer1_;
+        using BaseClass::gt_timer2_;
+        using BaseClass::gt_timer3_;
+        using BaseClass::performTiming_;
+        using BaseClass::gt_exporter_;
+        using BaseClass::debugFolder_;
+
+    protected:
+
+        size_t num_parameters_;
+
+        ParaStatusType para_status_;
+    };
+
+    template<typename ValueType, unsigned int DIn, unsigned int DOut> 
+    bool hoImageRegParametricTransformation<ValueType, DIn, DOut>::serialize(char*& buf, size_t& len) const 
+    {
+        try
+        {
+            if ( buf != NULL ) delete[] buf;
+
+            size_t numOfPara = this->get_number_of_parameters();
+            size_t totalLen = sizeof(ValueType)*numOfPara;
+
+            buf = new char[totalLen];
+            GADGET_CHECK_RETURN_FALSE(buf!=NULL);
+
+            ValueType currPara;
+            size_t ii, offset(0);
+            for ( ii=0; ii<numOfPara; ii++ )
+            {
+                currPara = this->get_parameter(ii);
+                memcpy(buf+offset, &currPara, sizeof(ValueType));
+                offset += sizeof(ValueType);
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegParametricTransformation<ValueType, DIn, DOut>::serialize(char*& buf, size_t& len) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename ValueType, unsigned int DIn, unsigned int DOut> 
+    bool hoImageRegParametricTransformation<ValueType, DIn, DOut>::deserialize(char* buf, size_t& len)
+    {
+        try
+        {
+            size_t numOfPara = this->get_number_of_parameters();
+
+            ValueType currPara;
+            size_t ii, offset(0);
+            for ( ii=0; ii<numOfPara; ii++ )
+            {
+                memcpy(&currPara, buf+offset, sizeof(ValueType));
+                offset += sizeof(ValueType);
+                this->set_parameter(ii, currPara);
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegParametricTransformation<ValueType, DIn, DOut>::deserialize(char* buf, size_t& len) ... ");
+            return false;
+        }
+
+        return true;
+    }
+}
diff --git a/toolboxes/registration/optical_flow/cpu/transformation/hoImageRegRigid2DTransformation.h b/toolboxes/registration/optical_flow/cpu/transformation/hoImageRegRigid2DTransformation.h
new file mode 100644
index 0000000..96460d5
--- /dev/null
+++ b/toolboxes/registration/optical_flow/cpu/transformation/hoImageRegRigid2DTransformation.h
@@ -0,0 +1,380 @@
+/** \file   hoImageRegRigid2DTransformation.h
+    \brief  Define the class for the rigid 2D transformation in gadgetron registration
+            Three parameters are translation along x and y and roation along z (tx, ty, rz)
+    \author Hui Xue
+*/
+
+#pragma once
+
+#include "hoImageRegHomogenousTransformation.h"
+#include <cmath>
+
+namespace Gadgetron
+{
+    /// Homogenous transformation
+    template<typename ValueType> 
+    class hoImageRegRigid2DTransformation : public hoImageRegHomogenousTransformation<ValueType, 2>
+    {
+    public:
+
+        typedef hoImageRegParametricTransformation<ValueType, 2, 2> ParaTransformBaseClass;
+        typedef hoImageRegHomogenousTransformation<ValueType, 2> BaseClass;
+        typedef hoImageRegRigid2DTransformation<ValueType> Self;
+
+        typedef ValueType T;
+
+        typedef typename BaseClass::input_point_type input_point_type;
+        typedef typename BaseClass::output_point_type output_point_type;
+
+        typedef typename BaseClass::jacobian_parameter_type jacobian_parameter_type;
+        typedef typename BaseClass::jacobian_position_type jacobian_position_type;
+
+        typedef typename BaseClass::ParaStatus ParaStatus;
+        typedef typename BaseClass::ParaStatusType ParaStatusType;
+
+        hoImageRegRigid2DTransformation();
+        virtual ~hoImageRegRigid2DTransformation();
+
+        // get/set the ith parameter
+        virtual ValueType get_parameter(size_t i) const;
+        virtual void set_parameter(size_t i, ValueType v);
+
+        virtual bool invertTransformation();
+
+        virtual bool setIdentity();
+
+        // get/set the translation and rotation
+        ValueType get_tx() const;
+        ValueType get_ty() const;
+        ValueType get_rz() const;
+
+        void set_tx(ValueType tx);
+        void set_ty(ValueType ty);
+        void set_rz(ValueType rz);
+
+        void set_tx_ty(ValueType tx, ValueType ty);
+        void set_tx_ty_rz(ValueType tx, ValueType ty, ValueType rz);
+
+        /// compute the transformation matrix
+        bool updateTransformationMatrix(ValueType tx, ValueType ty, ValueType rz, hoMatrix<T>& matrix);
+        bool extractParametersFromTransformationMatrix(const hoMatrix<T>& matrix, ValueType& tx, ValueType& ty, ValueType& rz);
+
+        virtual bool adjustForResolutionPyramid(const hoMatrix<ValueType>& sourceI2W, const hoMatrix<ValueType>& targetI2W);
+
+        /// compute jacobian matrix to parameters
+        /// D*num_parameters_ matrix
+        virtual bool jacobianParameter(const input_point_type& pos, jacobian_parameter_type& jac);
+
+        virtual void print(std::ostream& os) const;
+        virtual void printTransform(std::ostream& os) const;
+
+        virtual std::string transformationName() const
+        {
+            return std::string("hoImageRegRigid2DTransformation"); 
+        }
+
+        using BaseClass::gt_timer1_;
+        using BaseClass::gt_timer2_;
+        using BaseClass::gt_timer3_;
+        using BaseClass::performTiming_;
+        using BaseClass::gt_exporter_;
+        using BaseClass::debugFolder_;
+
+    protected:
+
+        using BaseClass::num_parameters_;
+        using BaseClass::para_status_;
+        using BaseClass::matrix_;
+
+        /// translation along x and y
+        ValueType tx_;
+        ValueType ty_;
+        /// rotation along z, in degree
+        ValueType rz_;
+    };
+
+    template <typename ValueType> 
+    hoImageRegRigid2DTransformation<ValueType>::hoImageRegRigid2DTransformation() : BaseClass()
+    {
+        num_parameters_ = 3;
+        para_status_.resize(num_parameters_, ParaTransformBaseClass::Active);
+
+        GADGET_CHECK_THROW(matrix_.createMatrix(3, 3));
+        GADGET_CHECK_THROW(matrix_.setIdentity());
+
+        tx_ = 0;
+        ty_ = 0;
+        rz_ = 0;
+    }
+
+    template <typename ValueType> 
+    hoImageRegRigid2DTransformation<ValueType>::~hoImageRegRigid2DTransformation()
+    {
+    }
+
+    template <typename ValueType> 
+    inline ValueType hoImageRegRigid2DTransformation<ValueType>::get_parameter(size_t i) const
+    {
+        GADGET_DEBUG_CHECK_THROW(i<num_parameters_);
+        if ( i == 0 )
+        {
+            return tx_;
+        }
+        else if ( i == 1 )
+        {
+            return ty_;
+        }
+        else
+        {
+            return rz_;
+        }
+    }
+
+    template <typename ValueType> 
+    inline void hoImageRegRigid2DTransformation<ValueType>::set_parameter(size_t i, ValueType v)
+    {
+        GADGET_DEBUG_CHECK_THROW(i<num_parameters_);
+        if ( i == 0 )
+        {
+            tx_ = v;
+        }
+        else if ( i == 1 )
+        {
+            ty_ = v;
+        }
+        else
+        {
+            rz_ = v;
+        }
+
+        GADGET_CHECK_THROW(this->updateTransformationMatrix(tx_, ty_, rz_, matrix_));
+    }
+
+    template <typename ValueType> 
+    inline bool hoImageRegRigid2DTransformation<ValueType>::invertTransformation()
+    {
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE( Gadgetron::getri(matrix_) );
+        GADGET_CHECK_RETURN_FALSE( this->extractParametersFromTransformationMatrix(matrix_, tx_, ty_, rz_) );
+        return true;
+    }
+
+    template <typename ValueType> 
+    inline bool hoImageRegRigid2DTransformation<ValueType>::setIdentity()
+    {
+        GADGET_CHECK_RETURN_FALSE( matrix_.setIdentity() );
+        tx_ = 0;
+        ty_ = 0;
+        rz_ = 0;
+        return true;
+    }
+
+    template <typename ValueType> 
+    inline ValueType hoImageRegRigid2DTransformation<ValueType>::get_tx() const
+    {
+        return tx_;
+    }
+
+    template <typename ValueType> 
+    inline ValueType hoImageRegRigid2DTransformation<ValueType>::get_ty() const
+    {
+        return ty_;
+    }
+
+    template <typename ValueType> 
+    inline ValueType hoImageRegRigid2DTransformation<ValueType>::get_rz() const
+    {
+        return rz_;
+    }
+
+    template <typename ValueType> 
+    inline void hoImageRegRigid2DTransformation<ValueType>::set_tx(ValueType tx)
+    {
+        tx_ = tx;
+        GADGET_CHECK_THROW(this->updateTransformationMatrix(tx_, ty_, rz_, matrix_));
+    }
+
+    template <typename ValueType> 
+    inline void hoImageRegRigid2DTransformation<ValueType>::set_ty(ValueType ty)
+    {
+        ty_ = ty;
+        GADGET_CHECK_THROW(this->updateTransformationMatrix(tx_, ty_, rz_, matrix_));
+    }
+
+    template <typename ValueType> 
+    inline void hoImageRegRigid2DTransformation<ValueType>::set_rz(ValueType rz)
+    {
+        rz_ = rz;
+        GADGET_CHECK_THROW(this->updateTransformationMatrix(tx_, ty_, rz_, matrix_));
+    }
+
+    template <typename ValueType> 
+    inline void hoImageRegRigid2DTransformation<ValueType>::set_tx_ty(ValueType tx, ValueType ty)
+    {
+        tx_ = tx;
+        ty_ = ty;
+        GADGET_CHECK_THROW(this->updateTransformationMatrix(tx_, ty_, rz_, matrix_));
+    }
+
+    template <typename ValueType> 
+    inline void hoImageRegRigid2DTransformation<ValueType>::set_tx_ty_rz(ValueType tx, ValueType ty, ValueType rz)
+    {
+        tx_ = tx;
+        ty_ = ty;
+        rz_ = rz;
+        GADGET_CHECK_THROW(this->updateTransformationMatrix(tx_, ty_, rz_, matrix_));
+    }
+
+    template <typename ValueType> 
+    bool hoImageRegRigid2DTransformation<ValueType>::updateTransformationMatrix(ValueType tx, ValueType ty, ValueType rz, hoMatrix<T>& matrix)
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE( matrix.createMatrix(3, 3) );
+
+            ValueType cosrz = std::cos(rz*GT_PI/180.0);
+            ValueType sinrz = std::sin(rz*GT_PI/180.0);
+
+            matrix(0, 0) = cosrz;  matrix(0, 1) = sinrz; matrix(0, 2) = tx;
+            matrix(1, 0) = -sinrz; matrix(1, 1) = cosrz; matrix(1, 2) = ty;
+            matrix(2, 0) = 0;      matrix(2, 1) = 0;     matrix(2, 2) = 1;
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happen in hoImageRegRigid2DTransformation<ValueType>::updateTransformationMatrix(ValueType tx, ValueType ty, ValueType rz, hoMatrix<T>& matrix) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ValueType> 
+    bool hoImageRegRigid2DTransformation<ValueType>::extractParametersFromTransformationMatrix(const hoMatrix<T>& matrix, ValueType& tx, ValueType& ty, ValueType& rz)
+    {
+        try
+        {
+            double cosrz = matrix(0, 0);
+            double sinrz = matrix(0, 1);
+
+            if ( cosrz >= 0 ) // rz is [-PI/2 PI/2]
+            {
+                rz = std::asin(sinrz);
+            }
+            else
+            {
+                rz = std::acos(cosrz);
+                if ( sinrz < 0) rz *= -1; // [-PI -PI/2]
+            }
+
+            tx = matrix(0, 2);
+            ty = matrix(1, 2);
+            rz *= 180.0/GT_PI;
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happen in hoImageRegRigid2DTransformation<ValueType>::extractParametersFromTransformationMatrix(const hoMatrix<T>& matrix, ValueType& tx, ValueType& ty, ValueType& rz) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ValueType> 
+    bool hoImageRegRigid2DTransformation<ValueType>::jacobianParameter(const input_point_type& pos, jacobian_parameter_type& jac)
+    {
+        try
+        {
+            jac.createMatrix(2, num_parameters_);
+            Gadgetron::clear(jac);
+
+            double cosrz = matrix_(0, 0);
+            double sinrz = matrix_(0, 1);
+
+            jac(0, 0) = 1;
+            jac(0, 1) = 0;
+            jac(0, 2) = -sinrz*pos(0) + cosrz*pos(1);
+
+            jac(1, 0) = 0;
+            jac(1, 1) = 1;
+            jac(1, 2) = -cosrz*pos(0) - sinrz*pos(1);
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happen in hoImageRegRigid2DTransformation<ValueType>::jacobianParameter(const input_point_type& pos, jacobian_parameter_type& jac) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ValueType> 
+    bool hoImageRegRigid2DTransformation<ValueType>::adjustForResolutionPyramid(const hoMatrix<ValueType>& sourceI2W, const hoMatrix<ValueType>& targetI2W)
+    {
+        try
+        {
+            hoNDImage<ValueType, 2> source;
+            source.set_image_to_world_matrix(sourceI2W);
+
+            hoNDImage<ValueType, 2> target;
+            target.set_image_to_world_matrix(targetI2W);
+
+            tx_ *= source.get_pixel_size(0)/target.get_pixel_size(0);
+            ty_ *= source.get_pixel_size(1)/target.get_pixel_size(1);
+
+            GADGET_CHECK_THROW(this->updateTransformationMatrix(tx_, ty_, rz_, matrix_));
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Error happened in hoImageRegRigid2DTransformation<ValueType>::adjustForResolutionPyramid(const hoMatrix<ValueType>& sourceI2W, const hoMatrix<ValueType>& targetI2W) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ValueType> 
+    void hoImageRegRigid2DTransformation<ValueType>::print(std::ostream& os) const
+    {
+        using namespace std;
+        os << "--------------Gagdgetron rigid 2D transformation -------------" << endl;
+        std::string elemTypeName = std::string(typeid(T).name());
+        os << "Transformation data type is : " << elemTypeName << std::endl;
+        os << "Number of parameters is : " << num_parameters_ << endl;
+
+        size_t i;
+        os << "Status of parameters [tx ty rz] : " << endl;
+        for ( i=0; i<this->num_parameters_; i++ )
+        {
+            os << "Para " << i << " : \t";
+            if ( para_status_[i] == ParaTransformBaseClass::Active )
+            {
+                os << "Active";
+            }
+            else if ( para_status_[i] == ParaTransformBaseClass::Inactive )
+            {
+                os << "Inactive";
+            }
+            else
+            {
+                os << "Unknown";
+            }
+            os << endl;
+        }
+
+        os << "Transformation: " << endl;
+        this->printTransform(os);
+    }
+
+    template <typename ValueType> 
+    void hoImageRegRigid2DTransformation<ValueType>::printTransform(std::ostream& os) const
+    {
+        using namespace std;
+
+        size_t i;
+        os << "[tx ty rz] = [ ";
+        for ( i=0; i<this->num_parameters_; i++ )
+        {
+            os << this->get_parameter(i) << " \t";
+        }
+        os << " ]" << endl;
+    }
+}
diff --git a/toolboxes/registration/optical_flow/cpu/transformation/hoImageRegRigid3DTransformation.h b/toolboxes/registration/optical_flow/cpu/transformation/hoImageRegRigid3DTransformation.h
new file mode 100644
index 0000000..8838f72
--- /dev/null
+++ b/toolboxes/registration/optical_flow/cpu/transformation/hoImageRegRigid3DTransformation.h
@@ -0,0 +1,491 @@
+/** \file   hoImageRegRigid3DTransformation.h
+    \brief  Define the class for the rigid 2D transformation in gadgetron registration
+            Three parameters are translation along x and y and roation along z (tx, ty, rz)
+    \author Hui Xue
+*/
+
+#pragma once
+
+#include "hoImageRegHomogenousTransformation.h"
+#include <cmath>
+
+namespace Gadgetron
+{
+    /// Homogenous transformation
+    template<typename ValueType> 
+    class hoImageRegRigid3DTransformation : public hoImageRegHomogenousTransformation<ValueType, 3>
+    {
+    public:
+
+        typedef hoImageRegParametricTransformation<ValueType, 3, 3> ParaTransformBaseClass;
+        typedef hoImageRegHomogenousTransformation<ValueType, 3> BaseClass;
+        typedef hoImageRegRigid3DTransformation<ValueType> Self;
+
+        typedef ValueType T;
+
+        typedef typename BaseClass::input_point_type input_point_type;
+        typedef typename BaseClass::output_point_type output_point_type;
+
+        typedef typename BaseClass::jacobian_parameter_type jacobian_parameter_type;
+        typedef typename BaseClass::jacobian_position_type jacobian_position_type;
+
+        typedef typename BaseClass::ParaStatus ParaStatus;
+        typedef typename BaseClass::ParaStatusType ParaStatusType;
+
+        hoImageRegRigid3DTransformation();
+        virtual ~hoImageRegRigid3DTransformation();
+
+        // get/set the ith parameter
+        virtual ValueType get_parameter(size_t i) const;
+        virtual void set_parameter(size_t i, ValueType v);
+
+        virtual bool invertTransformation();
+
+        virtual bool setIdentity();
+
+        // get/set the translation and rotation
+        ValueType get_tx() const;
+        ValueType get_ty() const;
+        ValueType get_tz() const;
+        ValueType get_rx() const;
+        ValueType get_ry() const;
+        ValueType get_rz() const;
+
+        void set_tx(ValueType tx);
+        void set_ty(ValueType ty);
+        void set_tz(ValueType tz);
+        void set_rx(ValueType rx);
+        void set_ry(ValueType ry);
+        void set_rz(ValueType rz);
+
+        void set_tx_ty_tz(ValueType tx, ValueType ty, ValueType tz);
+        void set_rx_ry_rz(ValueType rx, ValueType ry, ValueType rz);
+
+        /// compute the transformation matrix
+        bool updateTransformationMatrix(ValueType tx, ValueType ty, ValueType tz, ValueType rx, ValueType ry, ValueType rz, hoMatrix<T>& matrix);
+        bool extractParametersFromTransformationMatrix(const hoMatrix<T>& matrix, ValueType& tx, ValueType& ty, ValueType& tz, ValueType& rx, ValueType& ry, ValueType& rz);
+
+        virtual bool adjustForResolutionPyramid(const hoMatrix<ValueType>& sourceI2W, const hoMatrix<ValueType>& targetI2W);
+
+        /// compute jacobian matrix to parameters
+        /// D*num_parameters_ matrix
+        virtual bool jacobianParameter(const input_point_type& pos, jacobian_parameter_type& jac);
+
+        virtual void print(std::ostream& os) const;
+        virtual void printTransform(std::ostream& os) const;
+
+        virtual std::string transformationName() const
+        {
+            return std::string("hoImageRegRigid3DTransformation"); 
+        }
+
+        using BaseClass::gt_timer1_;
+        using BaseClass::gt_timer2_;
+        using BaseClass::gt_timer3_;
+        using BaseClass::performTiming_;
+        using BaseClass::gt_exporter_;
+        using BaseClass::debugFolder_;
+
+    protected:
+
+        using BaseClass::num_parameters_;
+        using BaseClass::para_status_;
+        using BaseClass::matrix_;
+
+        /// translation along x, y and z
+        ValueType tx_;
+        ValueType ty_;
+        ValueType tz_;
+        /// rotation along x, y, and z, in degree
+        ValueType rx_;
+        ValueType ry_;
+        ValueType rz_;
+    };
+
+    template <typename ValueType> 
+    hoImageRegRigid3DTransformation<ValueType>::hoImageRegRigid3DTransformation() : BaseClass()
+    {
+        num_parameters_ = 6;
+        para_status_.resize(num_parameters_, BaseClass::Active);
+
+        GADGET_CHECK_THROW(matrix_.createMatrix(4, 4));
+        GADGET_CHECK_THROW(matrix_.setIdentity());
+
+        tx_ = 0;
+        ty_ = 0;
+        tz_ = 0;
+        rx_ = 0;
+        ry_ = 0;
+        rz_ = 0;
+    }
+
+    template <typename ValueType> 
+    hoImageRegRigid3DTransformation<ValueType>::~hoImageRegRigid3DTransformation()
+    {
+    }
+
+    template <typename ValueType> 
+    inline ValueType hoImageRegRigid3DTransformation<ValueType>::get_parameter(size_t i) const
+    {
+        GADGET_DEBUG_CHECK_THROW(i<num_parameters_);
+        if ( i == 0 )
+        {
+            return tx_;
+        }
+        else if ( i == 1 )
+        {
+            return ty_;
+        }
+        else if ( i == 2 )
+        {
+            return tz_;
+        }
+        else if ( i == 3 )
+        {
+            return rx_;
+        }
+        else if ( i == 4 )
+        {
+            return ry_;
+        }
+        else if ( i == 5 )
+        {
+            return rz_;
+        }
+
+        return 0;
+    }
+
+    template <typename ValueType> 
+    inline void hoImageRegRigid3DTransformation<ValueType>::set_parameter(size_t i, ValueType v)
+    {
+        GADGET_DEBUG_CHECK_THROW(i<num_parameters_);
+        if ( i == 0 )
+        {
+            tx_ = v;
+        }
+        else if ( i == 1 )
+        {
+            ty_ = v;
+        }
+        else if ( i == 2 )
+        {
+            tz_ = v;
+        }
+        else if ( i == 3 )
+        {
+            rx_ = v;
+        }
+        else if ( i == 4 )
+        {
+            ry_ = v;
+        }
+        else if ( i == 5 )
+        {
+            rz_ = v;
+        }
+
+        GADGET_CHECK_THROW(this->updateTransformationMatrix(tx_, ty_, tz_, rx_, ry_, rz_, matrix_));
+    }
+
+    template <typename ValueType> 
+    inline bool hoImageRegRigid3DTransformation<ValueType>::invertTransformation()
+    {
+        GADGET_CHECK_EXCEPTION_RETURN_FALSE( Gadgetron::getri(matrix_) );
+        GADGET_CHECK_RETURN_FALSE( this->extractParametersFromTransformationMatrix(matrix_, tx_, ty_, tz_, rx_, ry_, rz_) );
+        return true;
+    }
+
+    template <typename ValueType> 
+    inline bool hoImageRegRigid3DTransformation<ValueType>::setIdentity()
+    {
+        GADGET_CHECK_RETURN_FALSE( matrix_.setIdentity() );
+        tx_ = 0;
+        ty_ = 0;
+        tz_ = 0;
+        rx_ = 0;
+        ry_ = 0;
+        rz_ = 0;
+        return true;
+    }
+
+    template <typename ValueType> 
+    inline ValueType hoImageRegRigid3DTransformation<ValueType>::get_tx() const
+    {
+        return tx_;
+    }
+
+    template <typename ValueType> 
+    inline ValueType hoImageRegRigid3DTransformation<ValueType>::get_ty() const
+    {
+        return ty_;
+    }
+
+    template <typename ValueType> 
+    inline ValueType hoImageRegRigid3DTransformation<ValueType>::get_tz() const
+    {
+        return tz_;
+    }
+
+    template <typename ValueType> 
+    inline ValueType hoImageRegRigid3DTransformation<ValueType>::get_rx() const
+    {
+        return rx_;
+    }
+
+    template <typename ValueType> 
+    inline ValueType hoImageRegRigid3DTransformation<ValueType>::get_ry() const
+    {
+        return ry_;
+    }
+
+    template <typename ValueType> 
+    inline ValueType hoImageRegRigid3DTransformation<ValueType>::get_rz() const
+    {
+        return rz_;
+    }
+
+    template <typename ValueType> 
+    inline void hoImageRegRigid3DTransformation<ValueType>::set_tx(ValueType tx)
+    {
+        tx_ = tx;
+        GADGET_CHECK_THROW(this->updateTransformationMatrix(tx_, ty_, tz_, rx_, ry_, rz_, matrix_));
+    }
+
+    template <typename ValueType> 
+    inline void hoImageRegRigid3DTransformation<ValueType>::set_ty(ValueType ty)
+    {
+        ty_ = ty;
+        GADGET_CHECK_THROW(this->updateTransformationMatrix(tx_, ty_, tz_, rx_, ry_, rz_, matrix_));
+    }
+
+    template <typename ValueType> 
+    inline void hoImageRegRigid3DTransformation<ValueType>::set_tz(ValueType tz)
+    {
+        tz_ = tz;
+        GADGET_CHECK_THROW(this->updateTransformationMatrix(tx_, ty_, tz_, rx_, ry_, rz_, matrix_));
+    }
+
+    template <typename ValueType> 
+    inline void hoImageRegRigid3DTransformation<ValueType>::set_rx(ValueType rx)
+    {
+        rx_ = rx;
+        GADGET_CHECK_THROW(this->updateTransformationMatrix(tx_, ty_, tz_, rx_, ry_, rz_, matrix_));
+    }
+
+    template <typename ValueType> 
+    inline void hoImageRegRigid3DTransformation<ValueType>::set_ry(ValueType ry)
+    {
+        ry_ = ry;
+        GADGET_CHECK_THROW(this->updateTransformationMatrix(tx_, ty_, tz_, rx_, ry_, rz_, matrix_));
+    }
+
+    template <typename ValueType> 
+    inline void hoImageRegRigid3DTransformation<ValueType>::set_rz(ValueType rz)
+    {
+        rz_ = rz;
+        GADGET_CHECK_THROW(this->updateTransformationMatrix(tx_, ty_, tz_, rx_, ry_, rz_, matrix_));
+    }
+
+    template <typename ValueType> 
+    inline void hoImageRegRigid3DTransformation<ValueType>::set_tx_ty_tz(ValueType tx, ValueType ty, ValueType tz)
+    {
+        tx_ = tx;
+        ty_ = ty;
+        tz_ = tz;
+        GADGET_CHECK_THROW(this->updateTransformationMatrix(tx_, ty_, tz_, rx_, ry_, rz_, matrix_));
+    }
+
+    template <typename ValueType> 
+    inline void hoImageRegRigid3DTransformation<ValueType>::set_rx_ry_rz(ValueType rx, ValueType ry, ValueType rz)
+    {
+        rx_ = rx;
+        ry_ = ry;
+        rz_ = rz;
+        GADGET_CHECK_THROW(this->updateTransformationMatrix(tx_, ty_, tz_, rx_, ry_, rz_, matrix_));
+    }
+
+    template <typename ValueType> 
+    bool hoImageRegRigid3DTransformation<ValueType>::updateTransformationMatrix(ValueType tx, ValueType ty, ValueType tz, ValueType rx, ValueType ry, ValueType rz, hoMatrix<T>& matrix)
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE( matrix.createMatrix(4, 4) );
+
+            double cosrx = std::cos(rx*GT_PI/180.0);
+            double sinrx = std::sin(rx*GT_PI/180.0);
+
+            double cosry = std::cos(ry*GT_PI/180.0);
+            double sinry = std::sin(ry*GT_PI/180.0);
+
+            double cosrz = std::cos(rz*GT_PI/180.0);
+            double sinrz = std::sin(rz*GT_PI/180.0);
+
+            matrix(0, 0) = cosry*cosrz;                         matrix(0, 1) = cosry*sinrz;                             matrix(0, 2) = -sinry;           matrix(0, 3) = tx;
+            matrix(1, 0) = sinrx*sinry*cosrz-cosrx*sinrz;       matrix(1, 1) = sinrx*sinry*sinrz+cosrx*cosrz;           matrix(1, 2) = sinrx*cosry;      matrix(1, 3) = ty;
+            matrix(2, 0) = cosrx*sinry*cosrz+sinrx*sinrz;       matrix(2, 1) = cosrx*sinry*sinrz-sinrx*cosrz;           matrix(2, 2) = cosrx*cosry;      matrix(2, 3) = tz;
+            matrix(3, 0) = 0;                                   matrix(3, 1) = 0;                                       matrix(3, 2) = 0;                matrix(3, 3) = 1;
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happen in hoImageRegRigid3DTransformation<ValueType>::updateTransformationMatrix(ValueType tx, ValueType ty, ValueType rz, hoMatrix<T>& matrix) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ValueType> 
+    bool hoImageRegRigid3DTransformation<ValueType>::extractParametersFromTransformationMatrix(const hoMatrix<T>& matrix, ValueType& tx, ValueType& ty, ValueType& tz, ValueType& rx, ValueType& ry, ValueType& rz)
+    {
+        try
+        {
+            ry_ = asin(-1 * matrix_(0, 2));
+
+            if ( GT_ABS( std::cos(ry_) ) > 1e-6 )
+            {
+                rx_ = atan2(matrix_(1, 2), matrix_(2, 2));
+                rz_ = atan2(matrix_(0, 1), matrix_(0, 0));
+            } 
+            else 
+            { 
+                rx_ = atan2(-1.0*matrix_(0, 2)*matrix_(1, 0), -1.0*matrix_(0, 2)*matrix_(2, 0)); 
+                rz_ = 0;
+            }
+
+            tx_ = matrix_(0, 3);
+            ty_ = matrix_(1, 3);
+            tz_ = matrix_(2, 3);
+            rx_ *= 180.0/GT_PI;
+            ry_ *= 180.0/GT_PI;
+            rz_ *= 180.0/GT_PI;
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happen in hoImageRegRigid3DTransformation<ValueType>::extractParametersFromTransformationMatrix(const hoMatrix<T>& matrix, ValueType& tx, ValueType& ty, ValueType& tz, ValueType& rx, ValueType& ry, ValueType& rz) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ValueType> 
+    bool hoImageRegRigid3DTransformation<ValueType>::jacobianParameter(const input_point_type& pos, jacobian_parameter_type& jac)
+    {
+        try
+        {
+            jac.createMatrix(3, num_parameters_);
+            Gadgetron::clear(jac);
+
+            double cosrx = std::cos(rx_*GT_PI/180.0);
+            double sinrx = std::sin(rx_*GT_PI/180.0);
+
+            double cosry = std::cos(ry_*GT_PI/180.0);
+            double sinry = std::sin(ry_*GT_PI/180.0);
+
+            double cosrz = std::cos(rz_*GT_PI/180.0);
+            double sinrz = std::sin(rz_*GT_PI/180.0);
+
+            jac(0, 0) = 1;
+            jac(0, 1) = 0;
+            jac(0, 2) = 0;
+            jac(0, 3) = 0;
+            jac(0, 4) = -sinry*cosrz*pos(0)-sinry*sinrz*pos(1)-cosry*pos(2);
+            jac(0, 5) = -cosry*sinrz*pos(0)+cosry*cosrz*pos(1);
+
+            jac(1, 0) = 0;
+            jac(1, 1) = 1;
+            jac(1, 2) = 0;
+            jac(1, 3) = (cosrx*sinry*cosrz+sinrx*sinrz) *pos(0)             + (cosrx*sinry*sinrz-sinrx*cosrz)   *pos(1)        + cosrx*cosry*pos(2);
+            jac(1, 4) = (sinrx*cosry*cosrz)             *pos(0)             + (sinrx*cosry*sinrz)               *pos(1)        - sinrx*sinry*pos(2);
+            jac(1, 5) = (-sinrx*sinry*sinrz-cosrx*cosrz)*pos(0)             + (sinrx*sinry*cosrz-cosrx*sinrz)   *pos(1);
+
+            jac(2, 0) = 0;
+            jac(2, 1) = 0;
+            jac(2, 2) = 1;
+            jac(2, 3) = (-sinrx*sinry*cosrz+cosrx*sinrz)*pos(0)             + (-sinrx*sinry*sinrz-cosrx*cosrz)  *pos(1)         - sinrx*cosry*pos(2);
+            jac(2, 4) = (cosrx*cosry*cosrz)             *pos(0)             + (cosrx*cosry*sinrz)               *pos(1)         - cosrx*sinry*pos(2);
+            jac(2, 5) = (cosrx*sinry*-sinrz+sinrx*cosrz)*pos(0)             + (cosrx*sinry*cosrz+sinrx*sinrz)   *pos(1);
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happen in hoImageRegRigid3DTransformation<ValueType>::jacobianParameter(const input_point_type& pos, jacobian_parameter_type& jac) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ValueType> 
+    bool hoImageRegRigid3DTransformation<ValueType>::adjustForResolutionPyramid(const hoMatrix<ValueType>& sourceI2W, const hoMatrix<ValueType>& targetI2W)
+    {
+        try
+        {
+            hoNDImage<ValueType, 3> source;
+            source.set_image_to_world_matrix(sourceI2W);
+
+            hoNDImage<ValueType, 3> target;
+            target.set_image_to_world_matrix(targetI2W);
+
+            tx_ *= source.get_pixel_size(0)/target.get_pixel_size(0);
+            ty_ *= source.get_pixel_size(1)/target.get_pixel_size(1);
+            tz_ *= source.get_pixel_size(2)/target.get_pixel_size(2);
+
+            GADGET_CHECK_THROW(this->updateTransformationMatrix(tx_, ty_, tz_, rx_, ry_, rz_, matrix_));
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Error happened in hoImageRegRigid3DTransformation<ValueType>::adjustForResolutionPyramid(const hoMatrix<ValueType>& sourceI2W, const hoMatrix<ValueType>& targetI2W) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template <typename ValueType> 
+    void hoImageRegRigid3DTransformation<ValueType>::print(std::ostream& os) const
+    {
+        using namespace std;
+        os << "--------------Gagdgetron rigid 3D transformation -------------" << endl;
+        std::string elemTypeName = std::string(typeid(T).name());
+        os << "Transformation data type is : " << elemTypeName << std::endl;
+        os << "Number of parameters is : " << num_parameters_ << endl;
+
+        size_t i;
+        os << "Status of parameters [tx ty tz rx ry rz] : " << endl;
+        for ( i=0; i<this->num_parameters_; i++ )
+        {
+            os << "Para " << i << " : \t";
+            if ( para_status_[i] == ParaTransformBaseClass::Active )
+            {
+                os << "Active";
+            }
+            else if ( para_status_[i] == ParaTransformBaseClass::Inactive )
+            {
+                os << "Inactive";
+            }
+            else
+            {
+                os << "Unknown";
+            }
+            os << endl;
+        }
+
+        os << "Transformation: " << endl;
+        this->printTransform(os);
+    }
+
+    template <typename ValueType> 
+    void hoImageRegRigid3DTransformation<ValueType>::printTransform(std::ostream& os) const
+    {
+        using namespace std;
+
+        size_t i;
+        os << "[tx ty tz rx ry rz] = [ ";
+        for ( i=0; i<this->num_parameters_; i++ )
+        {
+            os << this->get_parameter(i) << " \t";
+        }
+        os << " ]" << endl;
+    }
+}
diff --git a/toolboxes/registration/optical_flow/cpu/transformation/hoImageRegTransformation.h b/toolboxes/registration/optical_flow/cpu/transformation/hoImageRegTransformation.h
new file mode 100644
index 0000000..37979be
--- /dev/null
+++ b/toolboxes/registration/optical_flow/cpu/transformation/hoImageRegTransformation.h
@@ -0,0 +1,408 @@
+/** \file   hoImageRegTransformation.h
+    \brief  Define the base class for the geometric transformation in gadgetron registration
+    \author Hui Xue
+*/
+
+#pragma once
+
+#include "hoNDArray.h"
+#include "hoNDImage.h"
+#include "hoMatrix.h"
+#include "hoNDInterpolator.h"
+#include "hoNDBoundaryHandler.h"
+#include "hoMatrix.h"
+#include "hoNDArray_utils.h"
+#include "hoNDArray_elemwise.h"
+#include "hoNDImage_util.h"
+#include "gtPlusISMRMRDReconUtil.h"
+#include "GtPrepUtil.h"
+
+#ifdef USE_OMP
+    #include <omp.h>
+#endif // USE_OMP
+
+namespace Gadgetron
+{
+    enum GT_IMAGE_REG_TRANSFORMATION
+    {
+        GT_IMAGE_REG_TRANSFORMATION_RIGID,
+        GT_IMAGE_REG_TRANSFORMATION_AFFINE,
+        GT_IMAGE_REG_TRANSFORMATION_DEFORMATION_FIELD,
+        GT_IMAGE_REG_TRANSFORMATION_DEFORMATION_FIELD_BIDIRECTIONAL
+    };
+
+    inline std::string getImageRegTransformationName(GT_IMAGE_REG_TRANSFORMATION v)
+    {
+        std::string name;
+
+        switch (v)
+        {
+            case GT_IMAGE_REG_TRANSFORMATION_RIGID:
+                name = "Rigid";
+                break;
+
+            case GT_IMAGE_REG_TRANSFORMATION_AFFINE:
+                name = "Affine";
+                break;
+
+            case GT_IMAGE_REG_TRANSFORMATION_DEFORMATION_FIELD:
+                name = "DeformationField";
+                break;
+
+            case GT_IMAGE_REG_TRANSFORMATION_DEFORMATION_FIELD_BIDIRECTIONAL:
+                name = "DeformationFieldBidirectional";
+                break;
+
+            default:
+                GADGET_ERROR_MSG("Unrecognized image registration transformation type : " << v);
+        }
+
+        return name;
+    }
+
+    inline GT_IMAGE_REG_TRANSFORMATION getImageRegTransformationType(const std::string& name)
+    {
+        GT_IMAGE_REG_TRANSFORMATION v;
+
+        if ( name == "Rigid" )
+        {
+            v = GT_IMAGE_REG_TRANSFORMATION_RIGID;
+        }
+        else if ( name == "Affine" )
+        {
+            v = GT_IMAGE_REG_TRANSFORMATION_AFFINE;
+        }
+        else if ( name == "DeformationField" )
+        {
+            v = GT_IMAGE_REG_TRANSFORMATION_DEFORMATION_FIELD;
+        }
+        else if ( name == "DeformationFieldBidirectional" )
+        {
+            v = GT_IMAGE_REG_TRANSFORMATION_DEFORMATION_FIELD_BIDIRECTIONAL;
+        }
+        else
+        {
+            GADGET_ERROR_MSG("Unrecognized image registration transformation name : " << name);
+        }
+
+        return v;
+    }
+
+    /// transform a spatial position to another spatial position
+    /// input and output can have different dimensions
+    /// input has DIn dimension and output has DOut dimension
+    /// a transformation is defined as a vector function M*1
+    /// [T1; T2; T3; ...; TDOut] = T( [x1; x2; x3; ...; xDIn], [a1, a2, a3, ..., ak])
+    /// transforms from n dimension to m dimension with k parameters
+    /// therefore, the jacobian matrix to the parameters (Jac_parameter) is a DOut*k matrix
+    /// the jacobian matrix to the spatial position (Jac_position) is a DOut*DIn matrix
+    template<typename ValueType, unsigned int DIn, unsigned int DOut> 
+    class hoImageRegTransformation
+    {
+    public:
+
+        typedef hoImageRegTransformation<ValueType, DIn, DOut> Self;
+
+        typedef ValueType T;
+        typedef ValueType element_type;
+        typedef ValueType value_type;
+
+        typedef hoNDPoint<T, DIn> input_point_type;
+        typedef hoNDPoint<T, DOut> output_point_type;
+
+        /// there are two types of jacobian for transformations
+        /// one is the jacobian to the transformation paramerters
+        /// Jacobian matrix to paramters DOut*k matrix
+        typedef hoMatrix<T> jacobian_parameter_type;
+
+        /// Jacobian matrix to spatial position DOut*DIn matrix
+        typedef hoMatrix<T> jacobian_position_type;
+
+        hoImageRegTransformation() : performTiming_(false) 
+        {
+            gt_timer1_.set_timing_in_destruction(false);
+            gt_timer2_.set_timing_in_destruction(false);
+            gt_timer3_.set_timing_in_destruction(false); 
+        }
+
+        virtual ~hoImageRegTransformation() {}
+
+        /// invert the transformation, after calling this, the transformation is replace by its inverse transformation
+        virtual bool invertTransformation() = 0;
+
+        /// set the transformation to be identical transformation
+        virtual bool setIdentity() = 0;
+
+        /// transform a point
+        /// pt_in, pt_out stores a point as an array
+        virtual bool transform(const T* pt_in, T* pt_out) const = 0;
+        /// transform a point
+        virtual bool transform( const input_point_type& in, output_point_type& out ) const;
+        /// transform a group of points
+        virtual bool transform( input_point_type* in, size_t N, output_point_type* out ) const;
+        /// hoNDArray stores input and output points
+        /// pt_in: [DIn N]; pt_out: [DOut N]
+        virtual bool transform(const hoNDArray<T>& pt_in, hoNDArray<T>& pt_out) const;
+        /// pt_in, pt_out stores the points as an array
+        virtual bool transform(const T* pt_in, size_t N, T* pt_out) const;
+        /// for the DIn==DOut
+        virtual bool transform(T* pt_inout, size_t N) const;
+
+        /// for 2D - 2D transformation
+        virtual bool transform(const T& xi, const T& yi, T& xo, T& yo) const = 0;
+        virtual bool transform(const T* xi, const T* yi, size_t N, T* xo, T* yo) const;
+        virtual bool transform(T* x_inout, T* y_inout, size_t N) const;
+
+        /// for 3D - 3D transformation
+        virtual bool transform(const T& xi, const T& yi, const T& zi, T& xo, T& yo, T& zo) const = 0;
+        virtual bool transform(const T* xi, const T* yi, const T* zi, size_t N, T* xo, T* yo, T* zo) const;
+        virtual bool transform(T* x_inout, T* y_inout, T* z_inout, size_t N) const;
+
+        /// transform a point
+        /// the point is in the integer image pixel indexes
+        /// image interpolator is not used
+        /// pt_in, pt_out stores a point as an array
+        virtual bool transform(const size_t* pt_in, T* pt_out) const = 0;
+        virtual bool transform(const size_t* pt_in, size_t N, T* pt_out) const = 0;
+
+        /// for 2D - 2D transformation
+        virtual bool transform(const size_t& xi, const size_t& yi, T& xo, T& yo) const = 0;
+        virtual bool transform(const size_t* xi, const size_t* yi, size_t N, T* xo, T* yo) const = 0;
+
+        /// for 3D - 3D transformation
+        virtual bool transform(const size_t& xi, const size_t& yi, const size_t& zi, T& xo, T& yo, T& zo) const = 0;
+        virtual bool transform(const size_t* xi, const size_t* yi, const size_t* zi, size_t N, T* xo, T* yo, T* zo) const = 0;
+
+        /// serialize/deserialize the transformation
+        virtual bool serialize(char*& buf, size_t& len) const = 0;
+        virtual bool deserialize(char* buf, size_t& len) = 0;
+
+        virtual void print(std::ostream& os) const
+        {
+            using namespace std;
+            os << "--------------Gagdgetron geometric transformation -------------" << endl;
+            os << "Input dimension is : " << DIn << endl;
+            os << "Output dimension is : " << DOut << endl;
+
+            std::string elemTypeName = std::string(typeid(T).name());
+            os << "Transformation data type is : " << elemTypeName << std::endl;
+        }
+
+        virtual std::string transformationName() const
+        {
+            return std::string("hoImageRegTransformation"); 
+        }
+
+        // ----------------------------------
+        // debug and timing
+        // ----------------------------------
+        // clock for timing
+        Gadgetron::GadgetronTimer gt_timer1_;
+        Gadgetron::GadgetronTimer gt_timer2_;
+        Gadgetron::GadgetronTimer gt_timer3_;
+
+        bool performTiming_;
+
+        // exporter
+        Gadgetron::gtPlus::gtPlusIOAnalyze gt_exporter_;
+
+        // debug folder
+        std::string debugFolder_;
+    };
+
+    template<typename ValueType, unsigned int DIn, unsigned int DOut> 
+    inline bool hoImageRegTransformation<ValueType, DIn, DOut>::
+    transform( const input_point_type& in, output_point_type& out ) const
+    {
+        return this->transform(in.begin(), out.begin());
+    }
+
+    template<typename ValueType, unsigned int DIn, unsigned int DOut> 
+    inline bool hoImageRegTransformation<ValueType, DIn, DOut>::
+    transform( input_point_type* in, size_t N, output_point_type* out ) const
+    {
+        try
+        {
+            long long ii;
+
+            #pragma omp parallel for default(none) private(ii) shared(in, out, N)
+            for ( ii=0; ii<(long long)N; ii++ )
+            {
+                this->transform(in[ii].begin(), out[ii].begin());
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happen in hoImageRegTransformation<ValueType, DIn, DOut>::transform( input_point_type* in, size_t N, output_point_type* out ) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename ValueType, unsigned int DIn, unsigned int DOut> 
+    inline bool hoImageRegTransformation<ValueType, DIn, DOut>::
+    transform(const hoNDArray<T>& pt_in, hoNDArray<T>& pt_out) const
+    {
+        const T* pIn = pt_in.begin();
+        T* pOut = pt_out.begin();
+        size_t N = pt_in.get_size(1);
+
+        return this->transform(pIn, N, pOut);
+    }
+
+    template<typename ValueType, unsigned int DIn, unsigned int DOut> 
+    inline bool hoImageRegTransformation<ValueType, DIn, DOut>::
+    transform(const T* pt_in, size_t N, T* pt_out) const
+    {
+        try
+        {
+            long long ii;
+
+            #pragma omp parallel for default(none) private(ii) shared(pt_in, N, pt_out)
+            for ( ii=0; ii<(long long)N; ii++ )
+            {
+                this->transform(pt_in+ii*DIn, pt_out+ii*DOut);
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happen in hoImageRegTransformation<ValueType, DIn, DOut>::transform(T* pt_in, size_t N, T* pt_out) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename ValueType, unsigned int DIn, unsigned int DOut> 
+    inline bool hoImageRegTransformation<ValueType, DIn, DOut>::
+    transform(T* pt_inout, size_t N) const
+    {
+        try
+        {
+            GADGET_CHECK_RETURN_FALSE(DIn>=DOut);
+
+            long long ii;
+
+            #pragma omp parallel default(none) private(ii) shared(pt_inout, N)
+            {
+                T pt_out[DOut];
+
+                #pragma omp for 
+                for ( ii=0; ii<(long long)N; ii++ )
+                {
+                    this->transform(pt_inout+ii*DIn, pt_out);
+                    memcpy(pt_inout+ii*DIn, pt_out, sizeof(T)*DOut);
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happen in hoImageRegTransformation<ValueType, DIn, DOut>::transform(T* pt_inout, size_t N) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename ValueType, unsigned int DIn, unsigned int DOut> 
+    inline bool hoImageRegTransformation<ValueType, DIn, DOut>::
+    transform(const T* xi, const T* yi, size_t N, T* xo, T* yo) const
+    {
+        try
+        {
+            long long ii;
+
+            #pragma omp parallel for default(none) private(ii) shared(xi, yi, xo, yo, N)
+            for ( ii=0; ii<(long long)N; ii++ )
+            {
+                this->transform(xi[ii], yi[ii], xo[ii], yo[ii]);
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happen in hoImageRegTransformation<ValueType, DIn, DOut>::transform(T* xi, T* yi, size_t N, T* xo, T* yo) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename ValueType, unsigned int DIn, unsigned int DOut> 
+    inline bool hoImageRegTransformation<ValueType, DIn, DOut>::
+    transform(T* x_inout, T* y_inout, size_t N) const
+    {
+        try
+        {
+            long long ii;
+
+            T xo, yo;
+
+            #pragma omp parallel for default(none) private(ii, xo, yo) shared(x_inout, y_inout, N)
+            for ( ii=0; ii<(long long)N; ii++ )
+            {
+                this->transform(x_inout[ii], y_inout[ii], xo, yo);
+                x_inout[ii] = xo;
+                y_inout[ii] = yo;
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happen in hoImageRegTransformation<ValueType, DIn, DOut>::transform(T* x_inout, T* y_inout, size_t N) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename ValueType, unsigned int DIn, unsigned int DOut> 
+    inline bool hoImageRegTransformation<ValueType, DIn, DOut>::
+    transform(const T* xi, const T* yi, const T* zi, size_t N, T* xo, T* yo, T* zo) const
+    {
+        try
+        {
+            long long ii;
+
+            #pragma omp parallel for default(none) private(ii) shared(xi, yi, zi, xo, yo, zo, N)
+            for ( ii=0; ii<(long long)N; ii++ )
+            {
+                this->transform(xi[ii], yi[ii], zi[ii], xo[ii], yo[ii], zo[ii]);
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happen in hoImageRegTransformation<ValueType, DIn, DOut>::transform(T* xi, T* yi, T* zi, size_t N, T* xo, T* yo, T* zo) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename ValueType, unsigned int DIn, unsigned int DOut> 
+    inline bool hoImageRegTransformation<ValueType, DIn, DOut>::
+    transform(T* x_inout, T* y_inout, T* z_inout, size_t N) const
+    {
+        try
+        {
+            long long ii;
+
+            T xo, yo, zo;
+
+            #pragma omp parallel for default(none) private(ii, xo, yo, zo) shared(x_inout, y_inout, z_inout, N)
+            for ( ii=0; ii<(long long)N; ii++ )
+            {
+                this->transform(x_inout[ii], y_inout[ii], z_inout[ii], xo, yo, zo);
+                x_inout[ii] = xo;
+                y_inout[ii] = yo;
+                z_inout[ii] = zo;
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happen in hoImageRegTransformation<ValueType, DIn, DOut>::transform(T* x_inout, T* y_inout, T* z_inout, size_t N) ... ");
+            return false;
+        }
+
+        return true;
+    }
+}
diff --git a/toolboxes/registration/optical_flow/cpu/warper/hoImageRegWarper.h b/toolboxes/registration/optical_flow/cpu/warper/hoImageRegWarper.h
new file mode 100644
index 0000000..d73a33b
--- /dev/null
+++ b/toolboxes/registration/optical_flow/cpu/warper/hoImageRegWarper.h
@@ -0,0 +1,529 @@
+/** \file   hoImageRegWarper.h
+    \brief  Define the class to perform image warpping using the geometric transformation in gadgetron registration
+    \author Hui Xue
+*/
+
+#pragma once
+
+#include "hoNDArray.h"
+#include "hoNDImage.h"
+#include "hoNDInterpolator.h"
+#include "hoNDBoundaryHandler.h"
+#include "hoMatrix.h"
+#include "hoNDArray_utils.h"
+#include "hoNDArray_elemwise.h"
+#include "hoNDImage_util.h"
+
+#include "hoImageRegTransformation.h"
+#include "hoImageRegDeformationField.h"
+#include "GtPrepUtil.h"
+
+#ifdef USE_OMP
+    #include <omp.h>
+#endif // USE_OMP
+
+namespace Gadgetron
+{
+    /// warp the source image to the grid of target image under a transformation
+    /// both image domain warpping and world coordinate warpping is implemented
+    /// for the image domain warpping, the pixels are in the coordinate of image grid
+    /// input and output can have different dimensions
+    /// input has DIn dimension and output has DOut dimension
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    class hoImageRegWarper
+    {
+    public:
+
+        typedef hoImageRegWarper<ValueType, CoordType, DIn, DOut> Self;
+
+        typedef hoNDImage<ValueType, DOut> TargetType;
+        typedef hoNDImage<ValueType, DIn> SourceType;
+
+        typedef hoNDImage<ValueType, 2> Target2DType;
+        typedef Target2DType Source2DType;
+
+        typedef hoNDImage<ValueType, 3> Target3DType;
+        typedef Target2DType Source3DType;
+
+        typedef hoNDInterpolator<SourceType> InterpolatorType;
+
+        typedef hoImageRegTransformation<CoordType, DIn, DOut> TransformationType;
+        typedef hoImageRegDeformationField<CoordType, DIn> DeformTransformationType;
+
+        typedef ValueType T;
+        typedef ValueType element_type;
+        typedef ValueType value_type;
+
+        typedef CoordType coord_type;
+
+        typedef typename TransformationType::input_point_type input_point_type;
+        typedef typename TransformationType::output_point_type output_point_type;
+
+        typedef typename TransformationType::jacobian_parameter_type jacobian_parameter_type;
+        typedef typename TransformationType::jacobian_position_type jacobian_position_type;
+
+        hoImageRegWarper(ValueType bg_values = 0);
+        virtual ~hoImageRegWarper();
+
+        void setTransformation(TransformationType& transform);
+        void setInterpolator(InterpolatorType& interp);
+        void setBackgroundValue(ValueType bg_value);
+
+        virtual bool warp(const TargetType& target, const SourceType& source, bool useWorldCoordinate, TargetType& warped);
+        //virtual bool warp(const Target2DType& target, const Source2DType& source, bool useWorldCoordinate, Target2DType& warped);
+        //virtual bool warp(const Target3DType& target, const Source3DType& source, bool useWorldCoordinate, Target3DType& warped);
+
+        /// warp at the target image grid using the DeformationField transformation
+        /// the DeformationField takes in the target pixel indexes and returns the transformed position in the world coordinates
+        /// the deformation field grid should be the same as the target images
+        virtual bool warpWithDeformationFieldWorldCoordinate(const TargetType& target, const SourceType& source, TargetType& warped);
+
+        virtual void print(std::ostream& os) const;
+
+        // ----------------------------------
+        // debug and timing
+        // ----------------------------------
+        // clock for timing
+        Gadgetron::GadgetronTimer gt_timer1_;
+        Gadgetron::GadgetronTimer gt_timer2_;
+        Gadgetron::GadgetronTimer gt_timer3_;
+
+        bool performTiming_;
+
+        // exporter
+        Gadgetron::gtPlus::gtPlusIOAnalyze gt_exporter_;
+
+        // debug folder
+        std::string debugFolder_;
+
+    protected:
+
+        TransformationType* transform_;
+        InterpolatorType* interp_;
+
+        /// back ground values, used to mark regions in the target image which will not be warped
+        ValueType bg_value_;
+    };
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    hoImageRegWarper<ValueType, CoordType, DIn, DOut>::hoImageRegWarper(ValueType bg_value) : transform_(NULL), interp_(NULL), performTiming_(false), bg_value_(bg_value)
+    {
+        gt_timer1_.set_timing_in_destruction(false);
+        gt_timer2_.set_timing_in_destruction(false);
+        gt_timer3_.set_timing_in_destruction(false);
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    hoImageRegWarper<ValueType, CoordType, DIn, DOut>::~hoImageRegWarper()
+    {
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    inline void hoImageRegWarper<ValueType, CoordType, DIn, DOut>::setTransformation(TransformationType& transform)
+    {
+        transform_ = &transform;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    inline void hoImageRegWarper<ValueType, CoordType, DIn, DOut>::setInterpolator(InterpolatorType& interp)
+    {
+        interp_ = &interp;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    inline void hoImageRegWarper<ValueType, CoordType, DIn, DOut>::setBackgroundValue(ValueType bg_value)
+    {
+        bg_value_ = bg_value;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    bool hoImageRegWarper<ValueType, CoordType, DIn, DOut>::
+    warp(const TargetType& target, const SourceType& source, bool useWorldCoordinate, TargetType& warped)
+    {
+        try
+        {
+            GADGET_DEBUG_CHECK_RETURN_FALSE(transform_!=NULL);
+
+            if ( useWorldCoordinate )
+            {
+                // if the transformation is the deformation filed, special version of warp should be called
+                DeformTransformationType* transformDeformField = dynamic_cast<DeformTransformationType*>(transform_);
+                if( transformDeformField != NULL )
+                {
+                    return this->warpWithDeformationFieldWorldCoordinate(target, source, warped);
+                }
+            }
+
+            GADGET_DEBUG_CHECK_RETURN_FALSE(interp_!=NULL);
+            interp_->setArray( const_cast<SourceType&>(source) );
+
+            warped = target;
+
+            if ( DIn==2 && DOut==2 )
+            {
+                size_t sx = target.get_size(0);
+                size_t sy = target.get_size(1);
+
+                long long y;
+
+                if ( useWorldCoordinate )
+                {
+                    // #pragma omp parallel private(y) shared(sx, sy, target, source, warped)
+                    {
+                        coord_type px, py, px_source, py_source, ix_source, iy_source;
+
+                        // #pragma omp for 
+                        for ( y=0; y<(long long)sy; y++ )
+                        {
+                            for ( size_t x=0; x<sx; x++ )
+                            {
+                                size_t offset = x + y*sx;
+
+                                if ( target( offset ) != bg_value_ )
+                                {
+                                    // target to world
+                                    target.image_to_world(x, size_t(y), px, py);
+
+                                    // transform the point
+                                    transform_->transform(px, py, px_source, py_source);
+
+                                    // world to source
+                                    source.world_to_image(px_source, py_source, ix_source, iy_source);
+
+                                    // interpolate the source
+                                    warped( offset ) = (*interp_)(ix_source, iy_source);
+                                }
+                            }
+                        }
+                    }
+                }
+                else
+                {
+                    // #pragma omp parallel private(y) shared(sx, sy, target, source, warped)
+                    {
+                        coord_type ix_source, iy_source;
+
+                        // #pragma omp for 
+                        for ( y=0; y<(long long)sy; y++ )
+                        {
+                            for ( size_t x=0; x<sx; x++ )
+                            {
+                                size_t offset = x + y*sx;
+
+                                if ( target( offset ) != bg_value_ )
+                                {
+                                    // transform the point
+                                    transform_->transform(x, size_t(y), ix_source, iy_source);
+
+                                    // interpolate the source
+                                    warped( offset ) = (*interp_)(ix_source, iy_source);
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            else if ( DIn==3 && DOut==3 )
+            {
+                size_t sx = target.get_size(0);
+                size_t sy = target.get_size(1);
+                size_t sz = target.get_size(2);
+
+                long long z;
+
+                if ( useWorldCoordinate )
+                {
+                    #pragma omp parallel private(z) shared(sx, sy, sz, target, source, warped)
+                    {
+                        coord_type px, py, pz, px_source, py_source, pz_source, ix_source, iy_source, iz_source;
+
+                        #pragma omp for 
+                        for ( z=0; z<(long long)sz; z++ )
+                        {
+                            for ( size_t y=0; y<sy; y++ )
+                            {
+                                size_t offset = y*sx + z*sx*sy;
+
+                                for ( size_t x=0; x<sx; x++ )
+                                {
+                                    if ( target( x+offset ) != bg_value_ )
+                                    {
+                                        // target to world
+                                        target.image_to_world(x, y, size_t(z), px, py, pz);
+
+                                        // transform the point
+                                        transform_->transform(px, py, pz, px_source, py_source, pz_source);
+
+                                        // world to source
+                                        source.world_to_image(px_source, py_source, pz_source, ix_source, iy_source, iz_source);
+
+                                        // interpolate the source
+                                        warped( x+offset ) = (*interp_)(ix_source, iy_source, iz_source);
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+                else
+                {
+                    #pragma omp parallel private(z) shared(sx, sy, sz, target, source, warped)
+                    {
+                        coord_type ix_source, iy_source, iz_source;
+
+                        #pragma omp for 
+                        for ( z=0; z<(long long)sz; z++ )
+                        {
+                            for ( size_t y=0; y<sy; y++ )
+                            {
+                                size_t offset = y*sx + z*sx*sy;
+
+                                for ( size_t x=0; x<sx; x++ )
+                                {
+                                    if ( target( x+offset ) != bg_value_ )
+                                    {
+                                        // transform the point
+                                        transform_->transform(x, y, size_t(z), ix_source, iy_source, iz_source);
+
+                                        // interpolate the source
+                                        warped( x+offset ) = (*interp_)(ix_source, iy_source, iz_source);
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            else
+            {
+                size_t numOfPixels = target.get_number_of_elements();
+
+                long long n;
+
+                if ( useWorldCoordinate )
+                {
+                    #pragma omp parallel private(n) shared(numOfPixels, target, source, warped)
+                    {
+                        size_t ind_target[DIn];
+                        coord_type pt_target[DIn];
+                        coord_type pt_source[DOut];
+                        coord_type ind_source[DOut];
+
+                        #pragma omp for 
+                        for ( n=0; n<(long long)numOfPixels; n++ )
+                        {
+                            if ( target( size_t(n) ) != bg_value_ )
+                            {
+                                // target to world
+                                target.calculate_index( size_t(n), ind_target );
+
+                                target.image_to_world(ind_target, pt_target);
+
+                                // transform the point
+                                transform_->transform(pt_target, pt_source);
+
+                                // world to source
+                                source.world_to_image(pt_source, ind_source);
+
+                                // interpolate the source
+                                warped( size_t(n) ) = (*interp_)(ind_source);
+                            }
+                        }
+                    }
+                }
+                else
+                {
+                    #pragma omp parallel private(n) shared(numOfPixels, target, source, warped)
+                    {
+                        coord_type pt_target[DIn];
+                        coord_type pt_source[DOut];
+
+                        #pragma omp for 
+                        for ( n=0; n<(long long)numOfPixels; n++ )
+                        {
+                            if ( target( size_t(n) ) != bg_value_ )
+                            {
+                                target.calculate_index( size_t(n), pt_target );
+
+                                // transform the point
+                                this->transform_->transform(pt_target, pt_source);
+
+                                // interpolate the source
+                                warped( size_t(n) ) = (*interp_)(pt_source);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegWarper<ValueType, CoordType, DIn, DOut>::\
+                                    warp(const TargetType& target, const SourceType& source, bool useWorldCoordinate, TargetType& warped) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    bool hoImageRegWarper<ValueType, CoordType, DIn, DOut>::
+    warpWithDeformationFieldWorldCoordinate(const TargetType& target, const SourceType& source, TargetType& warped)
+    {
+        try
+        {
+            GADGET_DEBUG_CHECK_RETURN_FALSE(DIn==DOut);
+            GADGET_DEBUG_CHECK_RETURN_FALSE(transform_!=NULL);
+
+            DeformTransformationType* transformDeformField = dynamic_cast<DeformTransformationType*>(transform_);
+            GADGET_DEBUG_CHECK_RETURN_FALSE(transformDeformField!=NULL);
+
+            GADGET_DEBUG_CHECK_RETURN_FALSE(interp_!=NULL);
+            interp_->setArray( const_cast<SourceType&>(source) );
+
+            warped = target;
+
+            if ( DIn==2 && DOut==2 )
+            {
+                size_t sx = target.get_size(0);
+                size_t sy = target.get_size(1);
+
+                long long y;
+
+                // #pragma omp parallel private(y) shared(sx, sy, target, source, warped)
+                {
+                    coord_type px, py, dx, dy, ix_source, iy_source;
+
+                    // #pragma omp for 
+                    for ( y=0; y<(long long)sy; y++ )
+                    {
+                        for ( size_t x=0; x<sx; x++ )
+                        {
+                            size_t offset = x + y*sx;
+
+                            if ( target( offset ) != bg_value_ )
+                            {
+                                // target to world
+                                target.image_to_world(x, size_t(y), px, py);
+
+                                // transform the point
+                                transformDeformField->get(x, size_t(y), dx, dy);
+
+                                // world to source
+                                source.world_to_image(px+dx, py+dy, ix_source, iy_source);
+
+                                // interpolate the source
+                                warped( offset ) = (*interp_)(ix_source, iy_source);
+                            }
+                        }
+                    }
+                }
+            }
+            else if ( DIn==3 && DOut==3 )
+            {
+                size_t sx = target.get_size(0);
+                size_t sy = target.get_size(1);
+                size_t sz = target.get_size(2);
+
+                long long z;
+
+                #pragma omp parallel private(z) shared(sx, sy, sz, target, source, warped)
+                {
+                    coord_type px, py, pz, dx, dy, dz, ix_source, iy_source, iz_source;
+
+                    #pragma omp for 
+                    for ( z=0; z<(long long)sz; z++ )
+                    {
+                        for ( size_t y=0; y<sy; y++ )
+                        {
+                            size_t offset = y*sx + z*sx*sy;
+
+                            for ( size_t x=0; x<sx; x++ )
+                            {
+                                if ( target( x+offset ) != bg_value_ )
+                                {
+                                    // target to world
+                                    target.image_to_world(x, y, size_t(z), px, py, pz);
+
+                                    // transform the point
+                                    transformDeformField->get(x, y, size_t(z), dx, dy, dz);
+
+                                    // world to source
+                                    source.world_to_image(px+dx, py+dy, pz+dz, ix_source, iy_source, iz_source);
+
+                                    // interpolate the source
+                                    warped( x+offset ) = (*interp_)(ix_source, iy_source, iz_source);
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            else
+            {
+                size_t numOfPixels = target.get_number_of_elements();
+
+                long long n;
+
+                #pragma omp parallel private(n) shared(numOfPixels, target, source, warped)
+                {
+                    size_t ind_target[DIn];
+                    coord_type pt_target[DIn];
+                    coord_type pt_source[DOut];
+                    coord_type ind_source[DOut];
+
+                    unsigned int ii;
+
+                    #pragma omp for 
+                    for ( n=0; n<(long long)numOfPixels; n++ )
+                    {
+                        if ( target( size_t(n) ) != bg_value_ )
+                        {
+                            // target to world
+                            target.calculate_index( size_t(n), ind_target );
+
+                            target.image_to_world(ind_target, pt_target);
+
+                            // transform the point
+                            transformDeformField->get(ind_target, pt_source);
+
+                            for ( ii=0; ii<DIn; ii++ )
+                            {
+                                pt_source[ii] += pt_target[ii];
+                            }
+
+                            // world to source
+                            source.world_to_image(pt_source, ind_source);
+
+                            // interpolate the source
+                            warped( size_t(n) ) = (*interp_)(ind_source);
+                        }
+                    }
+                }
+            }
+        }
+        catch(...)
+        {
+            GADGET_ERROR_MSG("Errors happened in hoImageRegWarper<ValueType, CoordType, DIn, DOut>::\
+                                    warpWithDeformationFieldWorldCoordinate(const TargetType& target, const SourceType& source, TargetType& warped) ... ");
+            return false;
+        }
+
+        return true;
+    }
+
+    template<typename ValueType, typename CoordType, unsigned int DIn, unsigned int DOut> 
+    void hoImageRegWarper<ValueType, CoordType, DIn, DOut>::print(std::ostream& os) const
+    {
+        using namespace std;
+        os << "--------------Gagdgetron image warper -------------" << endl;
+        os << "Input dimension is : " << DIn << endl;
+        os << "Output dimension is : " << DOut << endl;
+
+        std::string elemTypeName = std::string(typeid(ValueType).name());
+        os << "Image data type is : " << elemTypeName << std::endl;
+
+        elemTypeName = std::string(typeid(CoordType).name());
+        os << "Transformation coordinate data type is : " << elemTypeName << std::endl;
+    }
+}
diff --git a/toolboxes/registration/optical_flow/gpu/CMakeLists.txt b/toolboxes/registration/optical_flow/gpu/CMakeLists.txt
index 82c4b7d..88f2e8e 100644
--- a/toolboxes/registration/optical_flow/gpu/CMakeLists.txt
+++ b/toolboxes/registration/optical_flow/gpu/CMakeLists.txt
@@ -8,7 +8,7 @@ include_directories(
   ${CUDA_INCLUDE_DIRS}
 )
 
-cuda_add_library(gpureg SHARED 
+cuda_add_library(gadgetron_toolbox_gpureg SHARED 
   cuOpticalFlowSolver.cu 
   cuHSOpticalFlowSolver.cu 
   cuCKOpticalFlowSolver.cu 
@@ -17,12 +17,14 @@ cuda_add_library(gpureg SHARED
 #  cuRegistration_utils.cu
   )
 
-target_link_libraries(gpureg 
-  gpucore 
+set_target_properties(gadgetron_toolbox_gpureg PROPERTIES VERSION ${GADGETRON_VERSION_STRING} SOVERSION ${GADGETRON_SOVERSION})
+
+target_link_libraries(gadgetron_toolbox_gpureg 
+  gadgetron_toolbox_gpucore 
   ${CUDA_LIBRARIES} ${CUDA_CUFFT_LIBRARIES} ${CUDA_CUBLAS_LIBRARIES}
   )
 
-install(TARGETS gpureg DESTINATION lib)
+install(TARGETS gadgetron_toolbox_gpureg DESTINATION lib COMPONENT main)
 
 install(FILES
   cuOpticalFlowSolver.h
@@ -33,4 +35,4 @@ install(FILES
   cuLinearResampleOperator.h
 #  cuRegistration_utils.h
   cuCGHSOFSolver.h
-  DESTINATION include)
+  DESTINATION include COMPONENT main)
diff --git a/toolboxes/registration/optical_flow/gpu/cuCKOpticalFlowSolver.cu b/toolboxes/registration/optical_flow/gpu/cuCKOpticalFlowSolver.cu
index 22458e3..3fe2a0e 100644
--- a/toolboxes/registration/optical_flow/gpu/cuCKOpticalFlowSolver.cu
+++ b/toolboxes/registration/optical_flow/gpu/cuCKOpticalFlowSolver.cu
@@ -8,7 +8,7 @@ namespace Gadgetron{
   //
 
   template<class REAL, unsigned int D> __global__ 
-  void CorneliusKanade_kernel(REAL*,REAL*,REAL*,REAL*,typename uintd<D>::Type,unsigned int,REAL,REAL,REAL,unsigned int*);
+  void CorneliusKanade_kernel(const REAL*,const REAL*,const REAL*,REAL*,typename uintd<D>::Type,unsigned int,REAL,REAL,REAL,unsigned int*);
 
   //
   // Reference to shared memory
@@ -176,10 +176,10 @@ namespace Gadgetron{
   //
 
   template<class REAL, unsigned int D> __global__ void
-  CorneliusKanade_kernel( REAL *gradient_image, REAL *stencil_image,
-                          REAL *in_disp, REAL *out_disp, 
+  CorneliusKanade_kernel( const REAL * __restrict__ gradient_image, const REAL * __restrict__ stencil_image,
+                          const REAL * __restrict__ in_disp, REAL * __restrict__ out_disp,
                           typename uintd<D>::Type matrix_size, unsigned int num_batches,
-                          REAL alpha, REAL beta, REAL disp_thresh_sqr, unsigned int *continue_signal )
+                          REAL alpha, REAL beta, REAL disp_thresh_sqr, unsigned int * __restrict__ continue_signal )
   {  
     
     // The overall flow dimension corresponding to this thread
diff --git a/toolboxes/registration/optical_flow/gpu/cuHSOpticalFlowSolver.cu b/toolboxes/registration/optical_flow/gpu/cuHSOpticalFlowSolver.cu
index f7bf0ea..8e02bf7 100644
--- a/toolboxes/registration/optical_flow/gpu/cuHSOpticalFlowSolver.cu
+++ b/toolboxes/registration/optical_flow/gpu/cuHSOpticalFlowSolver.cu
@@ -8,7 +8,7 @@ namespace Gadgetron{
   //
 
   template<class REAL, unsigned int D> __global__ 
-  void HornSchunk_kernel(REAL*,REAL*,REAL*,REAL*,typename uintd<D>::Type,unsigned int,REAL,REAL,unsigned int*);
+  void HornSchunk_kernel(const REAL*,const REAL*,const REAL*,REAL*,typename uintd<D>::Type,unsigned int,REAL,REAL,unsigned int*);
 
   //
   // Reference to shared memory
@@ -177,10 +177,10 @@ namespace Gadgetron{
   //
   
   template<class REAL, unsigned int D> __global__ void
-  HornSchunk_kernel( REAL *gradient_image, REAL *stencil_image,
-                     REAL *in_disp, REAL *out_disp, 
+  HornSchunk_kernel( const REAL * __restrict__ gradient_image, const REAL * __restrict__ stencil_image,
+                     const REAL * __restrict__ in_disp, REAL * __restrict__ out_disp,
                      typename uintd<D>::Type matrix_size, unsigned int num_batches,
-                     REAL alpha, REAL disp_thresh_sqr, unsigned int *continue_signal )
+                     REAL alpha, REAL disp_thresh_sqr, unsigned int * __restrict__ continue_signal )
   {  
     
     // The overall flow dimension corresponding to this thread
diff --git a/toolboxes/registration/optical_flow/gpu/cuLinearResampleOperator.cu b/toolboxes/registration/optical_flow/gpu/cuLinearResampleOperator.cu
index 8d5acd9..0154082 100644
--- a/toolboxes/registration/optical_flow/gpu/cuLinearResampleOperator.cu
+++ b/toolboxes/registration/optical_flow/gpu/cuLinearResampleOperator.cu
@@ -41,7 +41,7 @@ namespace Gadgetron{
   T interpolate( unsigned int batch_no, 
                  typename reald<typename realType<T>::Type,D>::Type co, 
                  typename uintd<D>::Type matrix_size, 
-                 T *image )
+                 const T * __restrict__ image )
   {
     typedef typename realType<T>::Type REAL;
 
@@ -117,8 +117,8 @@ namespace Gadgetron{
   }
 
   template<class REAL, unsigned int D> __global__ void
-  write_sort_arrays_kernel( typename uintd<D>::Type matrix_size, unsigned int extended_size, REAL *displacements,
-                            unsigned int *sort_keys, unsigned int *sort_values_indices, REAL *sort_values_weights )
+  write_sort_arrays_kernel( typename uintd<D>::Type matrix_size, unsigned int extended_size, const REAL * __restrict__ displacements,
+                            unsigned int * __restrict__ sort_keys,  unsigned int * __restrict__ sort_values_indices, REAL * __restrict__ sort_values_weights )
   {
     const unsigned int idx = blockIdx.y*gridDim.x*blockDim.x + blockIdx.x*blockDim.x+threadIdx.x;
     const unsigned int num_elements_mat = prod(matrix_size);
diff --git a/toolboxes/registration/optical_flow/gpu/cuOpticalFlowSolver.cu b/toolboxes/registration/optical_flow/gpu/cuOpticalFlowSolver.cu
index da4aebf..a12b10c 100644
--- a/toolboxes/registration/optical_flow/gpu/cuOpticalFlowSolver.cu
+++ b/toolboxes/registration/optical_flow/gpu/cuOpticalFlowSolver.cu
@@ -11,10 +11,10 @@ namespace Gadgetron{
   //
 
   template<class REAL, unsigned int D> __global__ 
-  void spatial_grad_kernel(REAL*,REAL*,REAL*,typename uint64d<D>::Type,unsigned int,unsigned int);
+  void spatial_grad_kernel(const REAL*, const REAL*,REAL*,typename uint64d<D>::Type,unsigned int,unsigned int);
 
   template<class REAL, unsigned int D> __global__ 
-  void temporal_grad_kernel(REAL*,REAL*,REAL*,typename uint64d<D>::Type,unsigned int,unsigned int);
+  void temporal_grad_kernel(const REAL*, const REAL*,REAL*,typename uint64d<D>::Type,unsigned int,unsigned int);
 
   // There is some issue about Cuda defining min/max incompatibly...
   //
@@ -150,7 +150,7 @@ namespace Gadgetron{
   //
 
   template<class REAL, unsigned int D> __global__ void
-  spatial_grad_kernel( REAL *fixed_image, REAL *moving_image, REAL *gradient_image, 
+  spatial_grad_kernel( const REAL * __restrict__ fixed_image, const REAL * __restrict__ moving_image, REAL * __restrict__ gradient_image,
                        typename uint64d<D>::Type matrix_size, 
                        unsigned int num_batches_fixed, unsigned int num_batches_moving )
   {
@@ -241,7 +241,7 @@ namespace Gadgetron{
   //
 
   template<class REAL, unsigned int D> __global__ void
-  temporal_grad_kernel( REAL *fixed_image, REAL *moving_image, REAL *gradient_image, 
+  temporal_grad_kernel( const REAL * __restrict__ fixed_image, const REAL * __restrict__ moving_image, REAL * __restrict__ gradient_image,
                         typename uint64d<D>::Type matrix_size, 
                         unsigned int num_batches_fixed, unsigned int num_batches_moving )
   { 
diff --git a/toolboxes/solvers/CMakeLists.txt b/toolboxes/solvers/CMakeLists.txt
index 92a0a0e..6569136 100644
--- a/toolboxes/solvers/CMakeLists.txt
+++ b/toolboxes/solvers/CMakeLists.txt
@@ -9,14 +9,17 @@ install(FILES
   solver.h
   linearOperatorSolver.h
   cgSolver.h
+  nlcgSolver.h
   sbSolver.h
   sbcSolver.h
   cgCallback.h	
   cgPreconditioner.h
   lwSolver.h
+  lbfgsSolver.h
+  lsqrSolver.h
   gpSolver.h
   gpBbSolver.h
-  DESTINATION include)
+  DESTINATION include COMPONENT main)
 
 IF(ARMADILLO_FOUND)
   add_subdirectory(cpu)
diff --git a/toolboxes/solvers/cgPreconditioner.h b/toolboxes/solvers/cgPreconditioner.h
index afff8c6..04f36bd 100644
--- a/toolboxes/solvers/cgPreconditioner.h
+++ b/toolboxes/solvers/cgPreconditioner.h
@@ -37,11 +37,7 @@ namespace Gadgetron{
       *out = *in;
       *out *= *weights_;
     };
-    
-    void* operator new (size_t bytes) { return ::new char[bytes]; }
-    void operator delete (void *ptr) { delete [] static_cast <char *> (ptr); } 
-    void * operator new(size_t s, void * p) { return p; }    
-    
+
   protected:
     boost::shared_ptr<ARRAY_TYPE> weights_;    
   };
diff --git a/toolboxes/solvers/cpu/CMakeLists.txt b/toolboxes/solvers/cpu/CMakeLists.txt
index 390abee..245b808 100644
--- a/toolboxes/solvers/cpu/CMakeLists.txt
+++ b/toolboxes/solvers/cpu/CMakeLists.txt
@@ -4,7 +4,7 @@ endif (WIN32)
 
 include_directories(
   ${CMAKE_SOURCE_DIR}/toolboxes/cpucore/
-  ${CMAKE_SOURCE_DIR}/toolboxes/cpucore/arma_math
+  ${CMAKE_SOURCE_DIR}/toolboxes/cpucore/math
   )
 
 install(FILES 	
@@ -12,4 +12,5 @@ install(FILES
   hoSbCgSolver.h
   hoGpBbSolver.h
   hoCgPreconditioner.h
-  DESTINATION include)
+  hoSolverUtils.h
+  DESTINATION include COMPONENT main)
diff --git a/toolboxes/solvers/cpu/hoCgPreconditioner.h b/toolboxes/solvers/cpu/hoCgPreconditioner.h
index fea5e38..35ee14b 100644
--- a/toolboxes/solvers/cpu/hoCgPreconditioner.h
+++ b/toolboxes/solvers/cpu/hoCgPreconditioner.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include "hoNDArray_operators.h"
+#include "hoNDArray_math.h"
 #include "cgPreconditioner.h"
 
 namespace Gadgetron{
diff --git a/toolboxes/solvers/cpu/hoCgSolver.h b/toolboxes/solvers/cpu/hoCgSolver.h
index c1ad815..cde6f33 100644
--- a/toolboxes/solvers/cpu/hoCgSolver.h
+++ b/toolboxes/solvers/cpu/hoCgSolver.h
@@ -9,9 +9,7 @@
 #pragma once
 
 #include "cgSolver.h"
-#include "hoNDArray_operators.h"
-#include "hoNDArray_elemwise.h"
-#include "hoNDArray_blas.h"
+#include "hoNDArray_math.h"
 
 namespace Gadgetron{
 
diff --git a/toolboxes/solvers/cpu/hoGpBbSolver.h b/toolboxes/solvers/cpu/hoGpBbSolver.h
index c664b64..5349a8c 100644
--- a/toolboxes/solvers/cpu/hoGpBbSolver.h
+++ b/toolboxes/solvers/cpu/hoGpBbSolver.h
@@ -1,15 +1,11 @@
 #pragma once
 
 #include "gpBbSolver.h"
-#include "hoNDArray_operators.h"
-#include "hoNDArray_elemwise.h"
-#include "hoNDArray_blas.h"
+#include "hoNDArray_math.h"
 #include "real_utilities.h"
 #include "vector_td_utilities.h"
 
-#ifdef USE_OMP
-#include <omp.h>
-#endif
+
 
 namespace Gadgetron{
 
@@ -20,19 +16,6 @@ namespace Gadgetron{
     hoGpBbSolver() : gpBbSolver< hoNDArray<T> >() {};
     virtual ~hoGpBbSolver() {};
         
-    virtual void solver_non_negativity_filter(hoNDArray<T> *xdata, hoNDArray<T> *gdata)
-    {
-      typedef typename realType<T>::Type REAL;
-
-      T* x = xdata->get_data_ptr();
-      T* g = gdata->get_data_ptr();
 
-#ifdef USE_OMP
-#pragma omp parallel for
-#endif
-      for( int i=0; i < xdata->get_number_of_elements(); i++ )
-	if( (real(x[i]) <= REAL(0)) && (real(g[i]) > 0) ) 
-	  g[i]=T(0);
-    }
   };
 }
diff --git a/toolboxes/solvers/cpu/hoSolverUtils.h b/toolboxes/solvers/cpu/hoSolverUtils.h
new file mode 100644
index 0000000..a495078
--- /dev/null
+++ b/toolboxes/solvers/cpu/hoSolverUtils.h
@@ -0,0 +1,26 @@
+#pragma once
+
+#include "hoNDArray.h"
+#include "hoNDArray_math.h"
+#include "complext.h"
+
+#ifdef USE_OMP
+#include <omp.h>
+#endif
+
+namespace Gadgetron {
+template<class T> void solver_non_negativity_filter(hoNDArray<T> *xdata, hoNDArray<T> *gdata)
+{
+	typedef typename realType<T>::Type REAL;
+
+	T* x = xdata->get_data_ptr();
+	T* g = gdata->get_data_ptr();
+
+#ifdef USE_OMP
+#pragma omp parallel for
+#endif
+	for( int i=0; i < xdata->get_number_of_elements(); i++ )
+		if( (real(x[i]) <= REAL(0)) && (real(g[i]) > 0) )
+			g[i]=T(0);
+}
+}
diff --git a/toolboxes/solvers/gpBbSolver.h b/toolboxes/solvers/gpBbSolver.h
index c801c1d..ee995c2 100644
--- a/toolboxes/solvers/gpBbSolver.h
+++ b/toolboxes/solvers/gpBbSolver.h
@@ -180,7 +180,6 @@ protected:
 	typedef typename std::vector<boost::shared_ptr<linearOperator<ARRAY_TYPE> > >::iterator  csIterator;
 	typedef typename std::vector< std::vector<boost::shared_ptr<linearOperator<ARRAY_TYPE> > > >::iterator csGroupIterator;
 
-	virtual void solver_non_negativity_filter(ARRAY_TYPE*,ARRAY_TYPE*)=0;
 	virtual void iteration_callback(ARRAY_TYPE*,int i,REAL,REAL){};
 
 protected:
diff --git a/toolboxes/solvers/gpSolver.h b/toolboxes/solvers/gpSolver.h
index 5dee7d5..2031c9a 100644
--- a/toolboxes/solvers/gpSolver.h
+++ b/toolboxes/solvers/gpSolver.h
@@ -247,7 +247,7 @@ namespace Gadgetron{
           op->mult_M(x2,tmp.get());
           data.push_back(tmp);
           ARRAY_TYPE tmp2 = *tmp;
-          tmp2 *= *tmp;
+          tmp2 *= *tmp; //Square data
           gData += tmp2;
         }
         if (this->prior.get()){
diff --git a/toolboxes/solvers/gpu/CMakeLists.txt b/toolboxes/solvers/gpu/CMakeLists.txt
index 0a66de5..7e42a24 100644
--- a/toolboxes/solvers/gpu/CMakeLists.txt
+++ b/toolboxes/solvers/gpu/CMakeLists.txt
@@ -3,7 +3,7 @@ if (WIN32)
 endif (WIN32)
 
 if(WIN32)
-link_directories(${Boost_LIBRARY_DIRS})
+  link_directories(${Boost_LIBRARY_DIRS})
 endif(WIN32)
 
 include_directories(
@@ -11,39 +11,40 @@ include_directories(
   ${CMAKE_SOURCE_DIR}/toolboxes/core/gpu
   ${CMAKE_SOURCE_DIR}/toolboxes/operators/gpu
   ${CMAKE_SOURCE_DIR}/toolboxes/solvers
+  ${CMAKE_SOURCE_DIR}/toolboxes/solvers/cpu
   )
 
-cuda_add_library(gpusolvers SHARED 
-    cuCgPreconditioner.h
-    cuCgSolver.h
-    cuGpBbSolver.h
-    cuLwSolver.h
-    cuSbcCgSolver.h
-    cuSbCgSolver.h
-    cuSbcLwSolver.h
-    cuSbLwSolver.h
+cuda_add_library(gadgetron_toolbox_gpusolvers SHARED 
     gpusolvers_export.h
-    cuGpBbSolver.cu
+    cuSolverUtils.cu
   )
 
-target_link_libraries(gpusolvers 
-  gpucore 
+set_target_properties(gadgetron_toolbox_gpusolvers PROPERTIES VERSION ${GADGETRON_VERSION_STRING} SOVERSION ${GADGETRON_SOVERSION})
+
+target_link_libraries(gadgetron_toolbox_gpusolvers 
+  gadgetron_toolbox_gpucore 
   ${Boost_LIBRARIES}
   ${CUDA_LIBRARIES}
   ${CUDA_CUBLAS_LIBRARIES} 
   )
 
-install(TARGETS gpusolvers DESTINATION lib)
+install(TARGETS gadgetron_toolbox_gpusolvers DESTINATION lib COMPONENT main)
 
 install(FILES 	
   cuSbCgSolver.h
   cuSbcCgSolver.h
   cuCgPreconditioner.h
   cuLwSolver.h
+  cuLbfgsSolver.h
   cuSbLwSolver.h
   cuSbcLwSolver.h
   cuCgSolver.h
+  cuNlcgSolver.h
   cuGpBbSolver.h
+  hoCuCgSolver.h
+  hoCuNlcgSolver.h
+  hoCuSbcCgSolver.h
   hoCuGpBbSolver.h
+  cuSolverUtils.h
   gpusolvers_export.h
-  DESTINATION include)
+  DESTINATION include COMPONENT main)
diff --git a/toolboxes/solvers/gpu/cuGpBbSolver.cu b/toolboxes/solvers/gpu/cuGpBbSolver.cu
deleted file mode 100644
index 6ab3765..0000000
--- a/toolboxes/solvers/gpu/cuGpBbSolver.cu
+++ /dev/null
@@ -1,40 +0,0 @@
-#include "cuGpBbSolver.h"
-#include "complext.h"
-
-#define MAX_THREADS_PER_BLOCK 512
-
-using namespace Gadgetron;
-template <class T> __global__ void filter_kernel(T* x, T* g, int elements){
-  const int idx = blockIdx.y*gridDim.x*blockDim.x + blockIdx.x*blockDim.x + threadIdx.x;
-  if (idx < elements){
-    if ( x[idx] <= T(0) && g[idx] > 0) g[idx]=T(0);
-  }
-}
-
-template <class REAL> __global__ void filter_kernel(complext<REAL>* x, complext<REAL>* g, int elements){
-  const int idx = blockIdx.y*gridDim.x*blockDim.x + blockIdx.x*blockDim.x + threadIdx.x;
-  if (idx < elements){
-    if ( real(x[idx]) <= REAL(0) && real(g[idx]) > 0) g[idx].vec[0] = REAL(0);
-    g[idx].vec[1]=REAL(0);
-  }
-}
-
-template <class T> void Gadgetron::cuGpBbSolver<T>::
-solver_non_negativity_filter(Gadgetron::cuNDArray<T>* x , Gadgetron::cuNDArray<T>* g)
-{
-  int elements = g->get_number_of_elements();
-
-  int threadsPerBlock = std::min(elements,MAX_THREADS_PER_BLOCK);
-  dim3 dimBlock( threadsPerBlock);
-  int totalBlocksPerGrid = std::max(1,elements/MAX_THREADS_PER_BLOCK);
-  dim3 dimGrid(totalBlocksPerGrid);
-
-  filter_kernel<typename realType<T>::Type><<<dimGrid,dimBlock>>>(x->get_data_ptr(),g->get_data_ptr(),elements);
-}
-
-
-template class EXPORTGPUSOLVERS Gadgetron::cuGpBbSolver<float>;
-template class EXPORTGPUSOLVERS Gadgetron::cuGpBbSolver<double>;
-
-template class EXPORTGPUSOLVERS Gadgetron::cuGpBbSolver< complext<float> >;
-template class EXPORTGPUSOLVERS Gadgetron::cuGpBbSolver< complext<double> >;
diff --git a/toolboxes/solvers/gpu/cuGpBbSolver.h b/toolboxes/solvers/gpu/cuGpBbSolver.h
index 27c57ec..3def796 100644
--- a/toolboxes/solvers/gpu/cuGpBbSolver.h
+++ b/toolboxes/solvers/gpu/cuGpBbSolver.h
@@ -6,21 +6,17 @@
 #include "cuNDArray_blas.h"
 #include "real_utilities.h"
 #include "vector_td_utilities.h"
-#include "gpusolvers_export.h"
 
-#include <thrust/device_vector.h>
-#include <thrust/transform.h>
-#include <thrust/functional.h>
+
+#include "cuSolverUtils.h"
 
 namespace Gadgetron{
-  
-  template <class T> class EXPORTGPUSOLVERS cuGpBbSolver : public gpBbSolver<cuNDArray<T> >
+
+  template <class T> class cuGpBbSolver : public gpBbSolver<cuNDArray<T> >
   {
   public:
-    
+
     cuGpBbSolver() : gpBbSolver<cuNDArray<T> >() {}
     virtual ~cuGpBbSolver() {}
-    
-    virtual void solver_non_negativity_filter(cuNDArray<T> *x,cuNDArray<T> *g);    
   };
 }
diff --git a/toolboxes/solvers/gpu/cuLbfgsSolver.h b/toolboxes/solvers/gpu/cuLbfgsSolver.h
new file mode 100644
index 0000000..f7803a9
--- /dev/null
+++ b/toolboxes/solvers/gpu/cuLbfgsSolver.h
@@ -0,0 +1,36 @@
+#pragma once
+
+#include "lbfgsSolver.h"
+#include "cuNDArray_operators.h"
+#include "cuNDArray_elemwise.h"
+#include "cuNDArray_blas.h"
+#include "real_utilities.h"
+#include "vector_td_utilities.h"
+#include "gpusolvers_export.h"
+
+
+#include <fstream>
+#include "cuSolverUtils.h"
+
+namespace Gadgetron{
+  
+  template <class T> class cuLbfgsSolver : public lbfgsSolver<cuNDArray<T> >
+  {
+  public:
+    
+    cuLbfgsSolver() : lbfgsSolver<cuNDArray<T> >() {}
+    virtual ~cuLbfgsSolver() {}
+/*
+    virtual void iteration_callback(cuNDArray<T>* x ,int iteration,typename realType<T>::Type value){
+  	  if (iteration == 0){
+  		  std::ofstream textFile("residual.txt",std::ios::trunc);
+  	  	  textFile << value << std::endl;
+  	  } else{
+  		  std::ofstream textFile("residual.txt",std::ios::app);
+  		  textFile << value << std::endl;
+  	  }
+
+    };
+    */
+  };
+}
diff --git a/toolboxes/solvers/gpu/cuNlcgSolver.h b/toolboxes/solvers/gpu/cuNlcgSolver.h
new file mode 100644
index 0000000..16670f5
--- /dev/null
+++ b/toolboxes/solvers/gpu/cuNlcgSolver.h
@@ -0,0 +1,24 @@
+#pragma once
+
+#include "nlcgSolver.h"
+#include "cuNDArray_operators.h"
+#include "cuNDArray_elemwise.h"
+#include "cuNDArray_blas.h"
+#include "real_utilities.h"
+#include "vector_td_utilities.h"
+#include "gpusolvers_export.h"
+
+#include <thrust/device_vector.h>
+#include <thrust/transform.h>
+#include <thrust/functional.h>
+#include "cuSolverUtils.h"
+
+namespace Gadgetron{
+  
+  template <class T> class cuNlcgSolver : public nlcgSolver<cuNDArray<T> >
+  {
+  public:
+    cuNlcgSolver() : nlcgSolver<cuNDArray<T> >() {}
+    virtual ~cuNlcgSolver() {}
+  };
+}
diff --git a/toolboxes/solvers/gpu/cuSolverUtils.cu b/toolboxes/solvers/gpu/cuSolverUtils.cu
new file mode 100644
index 0000000..0428d8a
--- /dev/null
+++ b/toolboxes/solvers/gpu/cuSolverUtils.cu
@@ -0,0 +1,38 @@
+#include "complext.h"
+#include "cuSolverUtils.h"
+#define MAX_THREADS_PER_BLOCK 512
+
+using namespace Gadgetron;
+template <class T> __global__ static void filter_kernel(T* x, T* g, int elements){
+	const int idx = blockIdx.y*gridDim.x*blockDim.x + blockIdx.x*blockDim.x + threadIdx.x;
+	if (idx < elements){
+		if ( x[idx] <= T(0) && g[idx] > 0) g[idx]=T(0);
+	}
+}
+
+template <class REAL> __global__ static void filter_kernel(complext<REAL>* x, complext<REAL>* g, int elements){
+	const int idx = blockIdx.y*gridDim.x*blockDim.x + blockIdx.x*blockDim.x + threadIdx.x;
+	if (idx < elements){
+		if ( real(x[idx]) <= REAL(0) && real(g[idx]) > 0) g[idx].vec[0] = REAL(0);
+		g[idx].vec[1]=REAL(0);
+	}
+}
+
+template <class T> void EXPORTGPUSOLVERS Gadgetron::solver_non_negativity_filter(cuNDArray<T>* x , cuNDArray<T>* g)
+{
+	int elements = g->get_number_of_elements();
+
+	int threadsPerBlock = std::min(elements,MAX_THREADS_PER_BLOCK);
+	dim3 dimBlock( threadsPerBlock);
+	int totalBlocksPerGrid = std::max(1,elements/MAX_THREADS_PER_BLOCK);
+	dim3 dimGrid(totalBlocksPerGrid);
+
+	filter_kernel<typename realType<T>::Type><<<dimGrid,dimBlock>>>(x->get_data_ptr(),g->get_data_ptr(),elements);
+}
+
+
+template void EXPORTGPUSOLVERS Gadgetron::solver_non_negativity_filter<float>(cuNDArray<float>*, cuNDArray<float>*);
+template void EXPORTGPUSOLVERS Gadgetron::solver_non_negativity_filter<double>(cuNDArray<double>*, cuNDArray<double>*);
+template void EXPORTGPUSOLVERS Gadgetron::solver_non_negativity_filter<float_complext>(cuNDArray<float_complext>*, cuNDArray<float_complext>*);
+template void EXPORTGPUSOLVERS Gadgetron::solver_non_negativity_filter<double_complext>(cuNDArray<double_complext>*, cuNDArray<double_complext>*);
+
diff --git a/toolboxes/solvers/gpu/cuSolverUtils.h b/toolboxes/solvers/gpu/cuSolverUtils.h
new file mode 100644
index 0000000..a351242
--- /dev/null
+++ b/toolboxes/solvers/gpu/cuSolverUtils.h
@@ -0,0 +1,11 @@
+#pragma once
+#include "complext.h"
+#include "hoCuNDArray.h"
+#include "cuNDArray.h"
+#include "gpusolvers_export.h"
+
+namespace Gadgetron{
+
+template<class T> void EXPORTGPUSOLVERS solver_non_negativity_filter(cuNDArray<T>* x , cuNDArray<T>* g);
+
+}
diff --git a/toolboxes/solvers/gpu/hoCuCgSolver.h b/toolboxes/solvers/gpu/hoCuCgSolver.h
new file mode 100644
index 0000000..1995b3c
--- /dev/null
+++ b/toolboxes/solvers/gpu/hoCuCgSolver.h
@@ -0,0 +1,34 @@
+#pragma once
+
+#include "cgSolver.h"
+
+#include "cgSolver.h"
+#include "hoNDArray_math.h"
+#include "hoCuNDArray_math.h"
+
+namespace Gadgetron{
+
+  /** \class hoCuCgSolver
+      \brief Instantiation of the conjugate gradient solver on the cpu.
+
+      The class hoCuCgSolver is a convienience wrapper for the device independent cgSolver class.
+      hoCuCgSolver instantiates the cgSolver for type hoNDArray<T>.
+  */
+  template <class T> class hoCuCgSolver : public cgSolver< hoCuNDArray<T> >
+  {
+  public:
+    hoCuCgSolver() : cgSolver<hoCuNDArray<T> >(), _it(0) {}
+    virtual ~hoCuCgSolver() {}
+
+    /* TSS: This is too expensive to do in general. Move responsibility of dumping to the apps.
+    virtual void solver_dump(hoCuNDArray<T>* x){
+    	std::stringstream ss;
+			ss << "iteration-" << _it << ".real";
+			write_nd_array(x,ss.str().c_str());
+			_it++;
+      }*/
+
+  private:
+    int _it;
+  };
+}
diff --git a/toolboxes/solvers/gpu/hoCuGpBbSolver.h b/toolboxes/solvers/gpu/hoCuGpBbSolver.h
index 75a7db9..4db1aee 100644
--- a/toolboxes/solvers/gpu/hoCuGpBbSolver.h
+++ b/toolboxes/solvers/gpu/hoCuGpBbSolver.h
@@ -1,11 +1,10 @@
 #pragma once
 
 #include "gpBbSolver.h"
-#include "hoNDArray_operators.h"
-#include "hoNDArray_elemwise.h"
-#include "hoNDArray_blas.h"
+#include "hoNDArray_math.h"
 #include "real_utilities.h"
 #include "vector_td_utilities.h"
+#include "hoSolverUtils.h"
 
 #ifdef USE_OMP
 #include <omp.h>
@@ -19,20 +18,6 @@ namespace Gadgetron{
 
     hoCuGpBbSolver() : gpBbSolver< hoCuNDArray<T> >() {};
     virtual ~hoCuGpBbSolver() {};
-        
-    virtual void solver_non_negativity_filter(hoCuNDArray<T> *xdata, hoCuNDArray<T> *gdata)
-    {
-      typedef typename realType<T>::Type REAL;
 
-      T* x = xdata->get_data_ptr();
-      T* g = gdata->get_data_ptr();
-
-#ifdef USE_OMP
-#pragma omp parallel for
-#endif
-      for( int i=0; i < xdata->get_number_of_elements(); i++ )
-        if( (real(x[i]) <= REAL(0)) && (real(g[i]) > 0) ) 
-          g[i]=T(0);
-    }
   };
 }
diff --git a/toolboxes/solvers/gpu/hoCuNlcgSolver.h b/toolboxes/solvers/gpu/hoCuNlcgSolver.h
new file mode 100644
index 0000000..9ab5f7d
--- /dev/null
+++ b/toolboxes/solvers/gpu/hoCuNlcgSolver.h
@@ -0,0 +1,35 @@
+#pragma once
+
+#include "hoNDArray_math.h"
+#include "hoCuNDArray_math.h"
+#include "hoNDArray_fileio.h"
+#include "complext.h"
+#include "nlcgSolver.h"
+#include "hoSolverUtils.h"
+
+namespace Gadgetron{
+
+template<class T> class hoCuNlcgSolver: public nlcgSolver<hoCuNDArray<T> >{
+	typedef typename realType<T>::Type REAL;
+public:
+	hoCuNlcgSolver():nlcgSolver<hoCuNDArray<T> >(){
+
+	}
+
+	virtual ~hoCuNlcgSolver(){};
+
+  virtual void iteration_callback(hoCuNDArray<T>* x,int i,REAL data_res,REAL reg_res){
+	  /*
+	  if (i == 0){
+		  std::ofstream textFile("residual.txt",std::ios::trunc);
+	  	  textFile << data_res << std::endl;
+	  } else{
+		  std::ofstream textFile("residual.txt",std::ios::app);
+		  textFile << data_res << std::endl;
+	  }
+	  std::stringstream ss;
+	  ss << "iteration-" << i << ".real";
+	  write_nd_array(x,ss.str().c_str());*/
+  };
+};
+}
diff --git a/toolboxes/solvers/gpu/hoCuSbcCgSolver.h b/toolboxes/solvers/gpu/hoCuSbcCgSolver.h
new file mode 100644
index 0000000..ff29e6f
--- /dev/null
+++ b/toolboxes/solvers/gpu/hoCuSbcCgSolver.h
@@ -0,0 +1,16 @@
+#pragma once
+
+#include "hoCuCgSolver.h"
+#include "sbcSolver.h"
+
+#include "complext.h"
+
+namespace Gadgetron{
+
+  template <class T> class hoCuSbcCgSolver : public sbcSolver< hoCuNDArray<typename realType<T>::Type >, hoCuNDArray<T>, hoCuCgSolver<T> >
+  {
+  public:
+    hoCuSbcCgSolver() : sbcSolver<hoCuNDArray<typename realType<T>::Type >, hoCuNDArray<T>, hoCuCgSolver<T> >() {}
+    virtual ~hoCuSbcCgSolver() {}
+  };
+}
diff --git a/toolboxes/solvers/lbfgsSolver.h b/toolboxes/solvers/lbfgsSolver.h
new file mode 100644
index 0000000..f111250
--- /dev/null
+++ b/toolboxes/solvers/lbfgsSolver.h
@@ -0,0 +1,825 @@
+#pragma once
+
+#include "gpSolver.h"
+#include "linearOperatorSolver.h"
+#include "real_utilities.h"
+#include "complext.h"
+#include "cgPreconditioner.h"
+
+#include <vector>
+#include <iostream>
+#include <numeric>
+#include <list>
+
+namespace Gadgetron{
+/** Memory Limited BFGS Solver Adapted from Numerical Optimization (Wright and Nocedal 1999).
+ *
+ */
+
+template <class ARRAY_TYPE> class lbfgsSolver : public gpSolver<ARRAY_TYPE>
+{
+
+
+protected:
+	typedef typename ARRAY_TYPE::element_type ELEMENT_TYPE;
+	typedef typename realType<ELEMENT_TYPE>::Type REAL;
+	typedef ARRAY_TYPE ARRAY_CLASS;
+	typedef gpSolver<ARRAY_TYPE> GP;
+	typedef typename gpSolver<ARRAY_TYPE>::l1GPRegularizationOperator l1GPRegularizationOperator;
+
+public:
+
+	lbfgsSolver(): gpSolver<ARRAY_TYPE>() {
+		iterations_ = 10;
+		tc_tolerance_ = (REAL)1e-7;
+		non_negativity_constraint_=false;
+		dump_residual = false;
+		threshold= REAL(1e-7);
+		m_ = 3;
+		rho = 0.5f;
+	}
+
+	virtual ~lbfgsSolver(){}
+
+
+	virtual void set_rho(REAL _rho){
+		rho = _rho;
+	}
+
+	/***
+	 * @brief Sets the number of iterations to use for estimating the Hessian. Memory usage increases linearly with m_;
+	 * @param m
+	 */
+	virtual void set_m(unsigned int m){
+		m_ = m;
+	}
+
+	virtual boost::shared_ptr<ARRAY_TYPE> solve(ARRAY_TYPE* in)
+																																	{
+		if( this->encoding_operator_.get() == 0 ){
+			throw std::runtime_error("Error: lbfgsSolver::compute_rhs : no encoding operator is set" );
+		}
+
+		// Get image space dimensions from the encoding operator
+		//
+
+		boost::shared_ptr< std::vector<size_t> > image_dims = this->encoding_operator_->get_domain_dimensions();
+		if( image_dims->size() == 0 ){
+			throw std::runtime_error("Error: lbfgsSolver::compute_rhs : encoding operator has not set domain dimension" );
+		}
+
+		ARRAY_TYPE * x = new ARRAY_TYPE(image_dims.get()); //The image. Will be returned inside a shared_ptr
+
+		ARRAY_TYPE g(image_dims.get()); //Contains the gradient of the current step
+		ARRAY_TYPE g_old(image_dims.get()); //Contains the gradient of the previous step
+
+
+		ARRAY_TYPE g_linear(image_dims.get()); //Contains the linear part of the gradient;
+
+		//If a prior image was given, use it for the initial guess.
+		if (this->x0_.get()){
+			*x = *(this->x0_.get());
+		} else  {
+			clear(x);
+		}
+
+		// Contains the encoding space of the linear regularization operators
+		std::vector<ARRAY_TYPE> regEnc;
+
+		//Initialize encoding space
+		for (int i = 0; i < this->regularization_operators_.size(); i++){
+			regEnc.push_back(ARRAY_TYPE(this->regularization_operators_[i]->get_codomain_dimensions()));
+			if (reg_priors[i].get()){
+				regEnc.back() = *reg_priors[i];
+				regEnc.back() *= -std::sqrt(this->regularization_operators_[i]->get_weight());
+			}
+
+		}
+		std::vector<ARRAY_TYPE> regEnc2 = regEnc;
+
+		ARRAY_TYPE d(image_dims.get()); //Search direction.
+		clear(&d);
+
+		ARRAY_TYPE encoding_space(in->get_dimensions().get()); //Contains the encoding space, or, equivalently, the residual vector
+
+		ARRAY_TYPE g_step(image_dims.get()); //Linear part of the gradient of the step d will be stored here
+
+		ARRAY_TYPE encoding_space2(in->get_dimensions().get());
+		REAL reg_res,data_res;
+
+
+		std::list<bfgsPair> subspace;
+
+		if( this->output_mode_ >= solver<ARRAY_TYPE,ARRAY_TYPE>::OUTPUT_VERBOSE ){
+			std::cout << "Iterating..." << std::endl;
+		}
+		REAL grad_norm0;
+
+		for (int i = 0; i < iterations_; i++){
+			if (i==0){
+				if (this->x0_.get()){
+					this->encoding_operator_->mult_M(x,&encoding_space);
+
+				} else clear(&encoding_space);
+				encoding_space -= *in;
+				this->encoding_operator_->mult_MH(&encoding_space,&g_linear);
+
+				g_linear *=  this->encoding_operator_->get_weight();
+				data_res = std::sqrt(this->encoding_operator_->get_weight())*real(dot(&encoding_space,&encoding_space));
+
+				calc_regMultM(x,regEnc);
+				for (int n = 0; n < regEnc.size(); n++)
+					if (reg_priors[n].get())
+						axpy(-std::sqrt(this->regularization_operators_[n]->get_weight()),reg_priors[n].get(),&regEnc[n]);
+				add_linear_gradient(regEnc,&g_linear);
+				g = g_linear;
+				this->add_gradient(x,&g);
+
+				reg_res=REAL(0);
+
+			}else {
+				data_res = real(dot(&encoding_space,&encoding_space));
+			}
+
+
+
+			if (non_negativity_constraint_) solver_non_negativity_filter(x,&g);
+			if (i==0) grad_norm0=nrm2(&g);
+			REAL grad_norm = nrm2(&g);
+			if( this->output_mode_ >= solver<ARRAY_TYPE,ARRAY_TYPE>::OUTPUT_VERBOSE ){
+
+				std::cout << "Iteration " <<i << ". Relative gradient norm: " <<  grad_norm/grad_norm0 << std::endl;
+			}
+
+			lbfgs_update(&g,&d,subspace);
+
+			if (this->precond_.get()){
+				this->precond_->apply(&d,&d);
+				this->precond_->apply(&d,&d);
+			}
+
+
+			this->encoding_operator_->mult_M(&d,&encoding_space2);
+
+			calc_regMultM(&d,regEnc2);
+
+
+
+			this->encoding_operator_->mult_MH(&encoding_space2,&g_step);
+			g_step *= this->encoding_operator_->get_weight();
+
+
+			add_linear_gradient(regEnc2,&g_step);
+
+			REAL gd = real(dot(&g,&d));
+
+			REAL alpha0=REAL(1);
+
+			//In the linear or semi-linear case, we can calculate the ideal step size.
+			if (this->operators.size() == 0) alpha0 = -real(dot(&encoding_space,&encoding_space2)+calc_dot(regEnc,regEnc2))/real(dot(&encoding_space2,&encoding_space2)+calc_dot(regEnc2,regEnc2));
+
+			REAL alpha;
+			REAL old_norm = functionValue(&encoding_space,regEnc,x);
+
+
+
+			g_old = g;
+
+
+
+
+			{
+				FunctionEstimator f(&encoding_space,&encoding_space2,&regEnc,&regEnc2,x,&d,&g_linear,&g_step,this);
+				//alpha=backtracking(f,alpha0,gd,rho,old_norm);
+					alpha=cg_linesearch(f,alpha0,gd,old_norm);
+				if (alpha == 0) {
+					std::cerr << "Linesearch failed, returning current iteration" << std::endl;
+					return boost::shared_ptr<ARRAY_TYPE>(x);
+				}
+			}
+
+			std::cout << "Alpha : " << alpha << std::endl;
+
+
+
+			if (non_negativity_constraint_){
+				//Restore encoding space and gradient. Why not keep a copy? Memory!
+				axpy(-alpha,&encoding_space2,&encoding_space);
+				reg_axpy(-alpha,regEnc2,regEnc);
+				axpy(-alpha,&g_step,&g_linear);
+
+				ARRAY_TYPE x2 = *x;
+				axpy(alpha,&d,&x2);
+
+				clamp_min(&x2,REAL(0));
+
+				d = x2;
+				d -= *x;
+				gd = real(dot(&g,&d));
+				x2 = *x;
+				alpha0 = 1;
+				this->encoding_operator_->mult_M(&d,&encoding_space2);
+				calc_regMultM(&d,regEnc2);
+
+
+				this->encoding_operator_->mult_MH(&encoding_space2,&g_step);
+				g_step *= this->encoding_operator_->get_weight();
+				add_linear_gradient(regEnc2,&g_step);
+
+				FunctionEstimator f(&encoding_space,&encoding_space2,&regEnc,&regEnc2,x,&d,&g_linear,&g_step,this);
+				//alpha=gold(f,0,alpha0*1.5);
+				//alpha = wolfesearch(f,alpha0,gd,rho,old_norm);
+				//alpha = backtracking(f,alpha0,gd,rho,old_norm);
+
+				alpha = cg_linesearch(f,alpha0,gd,old_norm);
+				axpy(alpha,&d,x);
+				if (alpha == 0){
+					std::cerr << "Linesearch failed, returning current iteration" << std::endl;
+					return boost::shared_ptr<ARRAY_TYPE>(x);
+				}
+			} else {
+				axpy(alpha,&d,x);
+
+			}
+
+
+
+
+
+
+			REAL f = functionValue(&encoding_space,regEnc,x);
+			std::cout << "Function value: " << f << std::endl;
+
+			g = g_linear;
+
+			this->add_gradient(x,&g);
+
+
+			//Expand current BFGS subspace with new pair
+			bfgsPair pair;
+			if (subspace.size() == m_){
+				pair=subspace.back();
+				subspace.pop_back();
+				*(pair.s) = d;
+				*(pair.y) = g;
+			} else {
+				pair.s = boost::shared_ptr<ARRAY_TYPE>(new ARRAY_TYPE(d));
+				pair.y = boost::shared_ptr<ARRAY_TYPE>(new ARRAY_TYPE(g));
+			}
+			*(pair.s) *= alpha;
+			*(pair.y) -= g_old;
+
+			pair.rho = dot(pair.s.get(),pair.y.get());
+
+			subspace.push_front(pair);
+
+
+			iteration_callback(x,i,f);
+
+
+			if (grad_norm/grad_norm0 < tc_tolerance_)  break;
+
+		}
+
+		return boost::shared_ptr<ARRAY_TYPE>(x);
+																																	}
+
+
+
+	// Set preconditioner
+	//
+	/*virtual void set_preconditioner( boost::shared_ptr< cgPreconditioner<ARRAY_TYPE> > precond ) {
+      precond_ = precond;
+      }*/
+
+	// Set/get maximally allowed number of iterations
+	//
+	virtual void set_max_iterations( unsigned int iterations ) { iterations_ = iterations; }
+	virtual unsigned int get_max_iterations() { return iterations_; }
+
+	// Set/get tolerance threshold for termination criterium
+	//
+	virtual void set_tc_tolerance( REAL tolerance ) { tc_tolerance_ = tolerance; }
+	virtual REAL get_tc_tolerance() { return tc_tolerance_; }
+
+	virtual void set_non_negativity_constraint(bool non_negativity_constraint){
+		non_negativity_constraint_=non_negativity_constraint;
+	}
+
+	virtual void set_dump_residual(bool dump_res){
+		dump_residual = dump_res;
+	}
+	// Set preconditioner
+	//
+
+	virtual void set_preconditioner( boost::shared_ptr< cgPreconditioner<ARRAY_TYPE> > precond ) {
+		precond_ = precond;
+	}
+
+	virtual void add_regularization_operator( boost::shared_ptr< linearOperator< ARRAY_TYPE> > op)
+	{
+		if( !op.get() ){
+			throw std::runtime_error( "Error: linearOperatorSolver::add_regularization_operator : NULL operator provided" );
+		}
+		this->regularization_operators_.push_back(op);
+		reg_priors.push_back(boost::shared_ptr<ARRAY_TYPE>((ARRAY_TYPE*)0));
+	}
+
+	virtual void add_regularization_operator( boost::shared_ptr< linearOperator< ARRAY_TYPE> > op,boost::shared_ptr<ARRAY_TYPE> prior)
+	{
+		if( !op.get() ){
+			throw std::runtime_error( "Error: linearOperatorSolver::add_regularization_operator : NULL operator provided" );
+		}
+
+		this->regularization_operators_.push_back(op);
+		reg_priors.push_back(prior);
+	}
+
+	virtual void add_regularization_operator(boost::shared_ptr< linearOperator<ARRAY_TYPE> > op, int L_norm ){
+		if (L_norm==1){
+
+			this->operators.push_back(boost::shared_ptr< l1GPRegularizationOperator>(new l1GPRegularizationOperator(op)));
+		}else{
+			add_regularization_operator(op);
+		}
+	}
+
+
+	virtual void add_regularization_operator(boost::shared_ptr< linearOperator<ARRAY_TYPE> > op, boost::shared_ptr<ARRAY_TYPE> prior, int L_norm ){
+		if (L_norm==1){
+			this->operators.push_back(boost::shared_ptr<l1GPRegularizationOperator>(new l1GPRegularizationOperator(op,prior)));
+		}else{
+			add_regularization_operator(op,prior);
+		}
+	}
+
+
+protected:
+	typedef typename std::vector<boost::shared_ptr<linearOperator<ARRAY_TYPE> > >::iterator  csIterator;
+	typedef typename std::vector< std::vector<boost::shared_ptr<linearOperator<ARRAY_TYPE> > > >::iterator csGroupIterator;
+
+	virtual void solver_non_negativity_filter(ARRAY_TYPE*,ARRAY_TYPE*)=0;
+	virtual void iteration_callback(ARRAY_TYPE* x ,int iteration,REAL value){};
+
+
+
+
+	ELEMENT_TYPE calc_dot(std::vector<ARRAY_TYPE>& x,std::vector<ARRAY_TYPE>& y){
+		ELEMENT_TYPE res(0);
+		for (int  i = 0; i < x.size(); i++)
+			res += dot(&x[i],&y[i]);
+		return res;
+	}
+
+	void add_linear_gradient(std::vector<ARRAY_TYPE>& elems, ARRAY_TYPE* g){
+		ARRAY_TYPE tmp(g->get_dimensions());
+		for (int i = 0; i <elems.size(); i++){
+			this->regularization_operators_[i]->mult_MH(&elems[i],&tmp);
+			axpy(std::sqrt(this->regularization_operators_[i]->get_weight()),&tmp,g);
+		}
+	}
+
+	void calc_regMultM(ARRAY_TYPE* x,std::vector<ARRAY_TYPE>& elems){
+		for (int i = 0; i <elems.size(); i++){
+			this->regularization_operators_[i]->mult_M(x,&elems[i]);
+			elems[i] *= std::sqrt(this->regularization_operators_[i]->get_weight());
+		}
+	}
+
+	void reg_axpy(REAL alpha, std::vector<ARRAY_TYPE>& x, std::vector<ARRAY_TYPE>& y){
+		for (int i = 0; i <x.size(); i++){
+			axpy(alpha,&x[i],&y[i]);
+
+		}
+	}
+	struct bfgsPair{
+		boost::shared_ptr<ARRAY_TYPE> s;
+		boost::shared_ptr<ARRAY_TYPE> y;
+		ELEMENT_TYPE rho;
+	};
+
+	/***
+	 * @brief L-BFGS update, following algorithm 9.2 in Numerical Optimization
+	 * @param[in] g gradient
+	 * @param[out] d search direction
+	 * @param[in] pairs
+	 */
+	void lbfgs_update(ARRAY_TYPE* g, ARRAY_TYPE* d, std::list<bfgsPair>& pairs){
+		*d = *g;
+
+		if (pairs.size() > 0){
+			std::list<ELEMENT_TYPE> alpha_list;
+			for (typename std::list<bfgsPair>::iterator it = pairs.begin(); it != pairs.end(); ++it){
+				ELEMENT_TYPE alpha = dot(it->s.get(),d)/it->rho;
+				axpy(-alpha,it->y.get(),d);
+				alpha_list.push_back(alpha);
+			}
+
+			bfgsPair front = pairs.front();
+			ELEMENT_TYPE gamma = front.rho/dot(front.y.get(),front.y.get());
+			*d *= gamma;
+
+			typename std::list<ELEMENT_TYPE>::reverse_iterator alpha_it = alpha_list.rbegin();
+			//Reverse iteration
+			for (typename std::list<bfgsPair>::reverse_iterator it = pairs.rbegin(); it != pairs.rend(); ++it, ++alpha_it){
+				ELEMENT_TYPE beta = dot(it->y.get(),d)/it->rho;
+				ELEMENT_TYPE alpha = *alpha_it;
+				axpy(alpha-beta,it->s.get(),d);
+			}
+		}
+		*d *= REAL(-1);
+
+	}
+
+
+
+
+
+	class FunctionEstimator{
+	public:
+
+		FunctionEstimator(ARRAY_TYPE* _encoding_space,ARRAY_TYPE* _encoding_step,std::vector<ARRAY_TYPE>* _regEnc,std::vector<ARRAY_TYPE>* _regEnc_step, ARRAY_TYPE * _x, ARRAY_TYPE * _d, ARRAY_TYPE * _g, ARRAY_TYPE * _g_step, lbfgsSolver<ARRAY_TYPE> * _parent)
+	{
+			encoding_step = _encoding_step;
+			encoding_space = _encoding_space;
+			regEnc = _regEnc;
+			regEnc_step = _regEnc_step;
+			x = _x;
+			xtmp = *x;
+			d = _d;
+			parent = _parent;
+			alpha_old = 0;
+			g = _g;
+			g_step = _g_step;
+
+	}
+
+
+
+		REAL operator () (REAL alpha){
+			axpy(alpha-alpha_old,encoding_step,encoding_space);
+
+			axpy(alpha-alpha_old,g_step,g);
+			parent->reg_axpy(alpha-alpha_old,*regEnc_step,*regEnc);
+			axpy(alpha-alpha_old,d,&xtmp);
+
+			alpha_old = alpha;
+			REAL res = parent->functionValue(encoding_space,*regEnc,&xtmp);
+			return res;
+
+		}
+
+		ELEMENT_TYPE dir_deriv(){
+			ARRAY_TYPE g_tmp = *g;
+			parent->add_gradient(&xtmp,&g_tmp);
+			return dot(d,&g_tmp);
+		}
+
+
+
+
+
+
+	private:
+
+		REAL alpha_old;
+		ARRAY_TYPE* encoding_step;
+		ARRAY_TYPE * encoding_space;
+		std::vector<ARRAY_TYPE>* regEnc;
+		std::vector<ARRAY_TYPE>* regEnc_step;
+		ARRAY_TYPE* x, *d;
+		ARRAY_TYPE* g, *g_step;
+
+		lbfgsSolver<ARRAY_TYPE>* parent;
+		ARRAY_TYPE xtmp;
+
+
+	};
+	friend class FunctionEstimator;
+
+	/***
+	 * @brief Gold section search algorithm. Only works with unimodal functions, which we assume we're dealing with, at least locally
+	 * @param f Functor to calculate the function to minimize
+	 * @param a Start of the bracketing
+	 * @param d End of bracketing
+	 * @return Value minimizing the function f.
+	 */
+	REAL gold(FunctionEstimator& f, REAL a, REAL d){
+		const REAL gold = 1.0/(1.0+std::sqrt(5.0))/2;
+
+		REAL b = d-(d-a)*gold;
+		REAL c = (d-a)*gold-a;
+
+		REAL fa = f(a);
+		REAL fb = f(b);
+		REAL fc = f(c);
+		REAL fd = f(d);
+		REAL tol = 1e-6;
+
+		while (abs(a-d) > tol*(abs(b)+abs(c))){
+			if (fb > fc){
+				a = b;
+				fa = fb;
+				b = c;
+				fb = fc;
+				c= b*gold+(1.0-gold)*d;
+				fc = f(c);
+			} else {
+				d = c;
+				fd = fc;
+				c = b;
+				fc = fb;
+				b = c*gold+(1-gold)*a;
+				fb = f(b);
+			}
+		}
+		if (fb < fc){
+			f(b);
+			return b;
+		}else {
+			f(c);
+			return c;
+		}
+	}
+
+	/***
+	 * Armijo type linesearch
+	 * @param f
+	 * @param alpha0
+	 * @param gd
+	 * @param rho
+	 * @param old_norm
+	 * @return
+	 */
+	REAL backtracking(FunctionEstimator& f, const REAL alpha0, const REAL gd, const REAL rho, const REAL old_norm){
+		REAL alpha;
+		REAL delta=1e-4;
+		REAL sigma=0.9;
+		//REAL precision = 0.0003; //Estimated precision of function evaluation
+		REAL precision = 1e-4f; //Estimated precision of function evaluation
+		bool wolfe = false;
+		int  k=0;
+
+		while (not wolfe){
+			alpha=alpha0*std::pow(rho,k);
+			//if (f(alpha) <= old_norm+alpha*delta*gd) wolfe = true;//Strong Wolfe condition..
+			REAL fa = f(alpha);
+			ELEMENT_TYPE dir_deriv = f.dir_deriv();
+			if (((2*delta-1.0)*real(gd) >= real(dir_deriv)) && (fa < (old_norm+precision))) wolfe=true; //Approx Wolfe condition from Hager, W. and Zhang, H.SIAM Journal on Optimization 2005 16:1, 170-192
+			if (abs(dir_deriv) > sigma*abs(gd)) wolfe = false;//Strong Wolfe condition..
+			k++;
+			if (alpha == 0){
+				//std::cout << "Backtracking search failed, switching to slow wolfe-search" << std::endl;
+				//return wolfesearch(f,alpha0,gd,rho,old_norm);
+				return 0;
+			}
+		}
+
+		return alpha;
+
+	}
+
+	/***
+	 * Line search taken from Numerical Optimization (Wright and Nocedal 1999).
+	 * Adapted from the scipy optimize algorithm.
+	 * Like the gold-section method it works quite poorly in practice.
+	 * @param f
+	 * @param alpha0
+	 * @param gd
+	 * @param rho
+	 * @param old_norm
+	 * @return
+	 */
+	REAL wolfesearch(FunctionEstimator& f, const REAL alpha_init, const REAL gd, const REAL rho, const REAL old_norm){
+		using std::sqrt;
+		using std::abs;
+		REAL delta=0.01;
+		unsigned int k=0;
+		REAL alpha0 = alpha_init;
+		REAL f0 = f(alpha0);
+
+		if (f0 <= old_norm+alpha0*delta*gd){//Strong Wolfe condition..
+			return alpha0;
+		}
+
+
+		REAL alpha1 = -gd*alpha0*alpha0/2.0/(f0-old_norm-gd*alpha0);
+		//std::cout << "F0 " <<f0 << " old " << old_norm << " gd " << gd <<std::endl;
+		std::cout << "Alpha0: "  << alpha0 << std::endl;
+		//std::cout << "Alpha1: "  << alpha1 << std::endl;
+		REAL f1 = f(alpha1);
+
+
+		if (f1 <= old_norm+alpha1*delta*gd){//Strong Wolfe condition..
+			return alpha1;
+		}
+
+
+		while (alpha1 > 0){
+			double factor = alpha0*alpha0*alpha1*alpha1*(alpha1-alpha0);
+			double a = alpha0*alpha0*(f1-old_norm-gd*alpha1) - alpha1*alpha1*(f0-old_norm-gd*alpha0);
+			a /= factor;
+
+			double b = -alpha0*alpha0*alpha0*(f1-old_norm-gd*alpha1) + alpha1*alpha1*alpha1*(f0-old_norm-gd*alpha0);
+			b /= factor;
+
+			double alpha2 = (-b+std::sqrt(std::abs(b*b-3*a*gd)))/(3*a);
+			REAL f2 = f(alpha2);
+			//std::cout << "a " << a << "b " << b << std::endl;
+			std::cout << "Alpha1: "  << alpha1 << std::endl;
+			std::cout << "Alpha2: "  << alpha2 << std::endl;
+			if (f2 < old_norm+alpha2*delta*gd){//Strong Wolfe condition..
+				return alpha2;
+			}
+
+			if (((alpha1-alpha2) > (alpha1/2.0)) || ((1.0-alpha2/alpha1) < 0.96)){
+				alpha2 = alpha1 / 2.0;
+			}
+
+			alpha0 = alpha1;
+			alpha1 = alpha2;
+			f0 = f1;
+			f1 = f2;
+			k++;
+
+
+		}
+
+		throw std::runtime_error("Wolfe line search failed");
+
+
+	}
+
+
+
+	/***
+	 * CG linesearch adapted from  Hager, W. and Zhang, H.SIAM Journal on Optimization 2005 16:1, 170-192
+	 * @param f
+	 * @param alpha0
+	 * @param gd
+	 * @param rho
+	 * @param old_norm
+	 * @return
+	 */
+	REAL cg_linesearch(FunctionEstimator& f, const REAL alpha0, const REAL gd, const REAL old_norm){
+		REAL delta=0.1;
+		REAL sigma=0.9;
+		REAL nabla=0.66;
+		//REAL precision = 0.0003; //Estimated precision of function evaluation
+		REAL precision = 1e-4f; //Estimated precision of function evaluation
+
+
+
+
+		REAL a=0;
+		REAL b = alpha0;
+
+		REAL ak = a;
+		REAL bk = b;
+		REAL fa = old_norm;
+		ELEMENT_TYPE a_deriv = gd;
+		REAL fb = f(alpha0);
+		ELEMENT_TYPE b_deriv = f.dir_deriv();
+
+		while (abs(a-b) > 0){
+			if ((((2*delta-1.0)*real(gd) >= real(b_deriv)) && (fb < old_norm+precision)) && //Check Approximate Wolfe conditions
+					(abs(b_deriv) <= sigma*abs(gd))){
+				f(b);
+				return b;
+			}
+
+			if ((((2*delta-1.0)*real(gd) >= real(a_deriv)) && (fa < old_norm+precision)) && //Check Approximate Wolfe conditions
+					(abs(a_deriv) <= sigma*abs(gd))){
+				f(a);
+				return a;
+			}
+
+			secant2(a,b,f,old_norm+precision);
+			if ((b-a) > nabla*(bk-ak)) {
+				REAL c = (a+b)/2;
+				interval_update(a,b,c,f,old_norm);
+			}
+			if (a != ak){
+				fa = f(a);
+				a_deriv = f.dir_deriv();
+			}
+
+			if (b != bk){
+				fb = f(b);
+				b_deriv = f.dir_deriv();
+			}
+
+			ak = a;
+			bk = b;
+
+			std::cout << "a: " << a << " b: " << b << std::endl;
+		}
+		return 0;
+		//throw std::runtime_error("CG_linesearch failed");
+
+	}
+
+
+	void secant2(REAL& a, REAL& b,FunctionEstimator& f,REAL old_norm){
+		REAL fa = f(a);
+		ELEMENT_TYPE dfa = f.dir_deriv();
+		REAL fb = f(b);
+		ELEMENT_TYPE dfb = f.dir_deriv();
+
+		REAL c= real((a*dfb-b*dfa)/(dfb-dfa));
+
+		REAL fc = f(c);
+		ELEMENT_TYPE dfc = f.dir_deriv();
+
+		REAL A=a;
+		REAL B = b;
+
+		interval_update(A,B,c,f,old_norm);
+
+		if (c == B){
+			c= real((b*dfc-c*dfb)/(dfc-dfb));
+			interval_update(A,B,c,f,old_norm);
+		} if (c == A){
+			c= real((a*dfc-c*dfa)/(dfc-dfa));
+			interval_update(A,B,c,f,old_norm);
+		}
+
+		a= A;
+		b = B;
+	}
+
+	void interval_update(REAL & a, REAL & b, REAL c,FunctionEstimator& f,REAL old_norm){
+		REAL theta = 0.5;
+		if (c < a || c > b) return; // C not in interval
+		REAL fc = f(c);
+		ELEMENT_TYPE dfc = f.dir_deriv();
+
+		if (real(dfc) >= 0){
+			b =c;
+			return;
+		}
+		if (fc < old_norm){
+			a = c;
+			return;
+		}
+		b =c;
+		while(true){
+			REAL d = (1-theta)*a+theta*b;
+			REAL fd = f(d);
+			ELEMENT_TYPE dfd = f.dir_deriv();
+
+			if (real(dfd) >= 0){
+				b = d;
+				return;
+			}
+			if (fd < old_norm){
+				a = d;
+			} else 	b = d;
+
+			std::cout << "Interval a: " << a << " b: " << b << std::endl;
+
+		}
+
+
+
+
+	}
+
+	REAL functionValue(ARRAY_TYPE* encoding_space,std::vector<ARRAY_TYPE>& regEnc, ARRAY_TYPE * x){
+		REAL res= std::sqrt(this->encoding_operator_->get_weight())*abs(dot(encoding_space,encoding_space));
+
+		for (int i = 0; i  < this->operators.size(); i++){
+			res += this->operators[i]->magnitude(x);
+		}
+
+		res += abs(calc_dot(regEnc,regEnc));
+		return res;
+
+	}
+
+
+
+
+
+
+protected:
+
+	// Preconditioner
+	//boost::shared_ptr< cgPreconditioner<ARRAY_TYPE> > precond_;
+	// Maximum number of iterations
+	unsigned int iterations_;
+	bool non_negativity_constraint_;
+	REAL tc_tolerance_;
+	REAL threshold;
+	bool dump_residual;
+	REAL rho;
+
+	unsigned int m_; // Number of copies to use.
+
+	// Preconditioner
+
+	std::vector<boost::shared_ptr<ARRAY_TYPE> > reg_priors;
+	boost::shared_ptr< cgPreconditioner<ARRAY_TYPE> > precond_;
+
+};
+}
diff --git a/toolboxes/solvers/lsqrSolver.h b/toolboxes/solvers/lsqrSolver.h
new file mode 100644
index 0000000..cf8bfc5
--- /dev/null
+++ b/toolboxes/solvers/lsqrSolver.h
@@ -0,0 +1,173 @@
+#pragma once
+
+
+#include "linearOperator.h"
+#include "linearOperatorSolver.h"
+#include "cgPreconditioner.h"
+#include "real_utilities.h"
+
+#include <vector>
+#include <iostream>
+#include "encodingOperatorContainer.h"
+
+namespace Gadgetron {
+template <class ARRAY_TYPE> class lsqrSolver: public linearOperatorSolver<ARRAY_TYPE>
+{
+protected:
+	typedef typename ARRAY_TYPE::element_type ELEMENT_TYPE;
+	typedef typename realType<ELEMENT_TYPE>::Type REAL;
+public:
+
+	lsqrSolver()  {
+		iterations_ = 10;
+		tc_tolerance_ = (REAL)1e-3;
+
+	}
+
+	virtual ~lsqrSolver() {}
+/*
+	virtual int set_preconditioner( boost::shared_ptr< cgPreconditioner<ARRAY_TYPE> > precond ) {
+		precond_ = precond;
+		return 0;
+	}
+*/
+	virtual void set_tc_tolerance( REAL tolerance ) { tc_tolerance_ = tolerance; }
+	virtual REAL get_tc_tolerance() { return tc_tolerance_; }
+
+	virtual void set_max_iterations( unsigned int iterations ) { iterations_ = iterations; }
+	virtual unsigned int get_max_iterations() { return iterations_; }
+
+
+
+
+
+	virtual boost::shared_ptr<ARRAY_TYPE> solve( ARRAY_TYPE *b )
+  {
+
+		boost::shared_ptr< std::vector<size_t> > image_dims = this->encoding_operator_->get_domain_dimensions();
+		if( image_dims->size() == 0 ){
+			throw std::runtime_error( "Error: cgSolver::compute_rhs : encoding operator has not set domain dimension" );
+		}
+
+
+
+
+		ARRAY_TYPE * x = new ARRAY_TYPE(image_dims);
+		clear(x);
+
+
+		encodingOperatorContainer<ARRAY_TYPE> enc_op;
+		boost::shared_ptr<ARRAY_TYPE> u;
+
+		{
+			enc_op.add_operator(this->encoding_operator_);
+			for (unsigned int i =0; i < this->regularization_operators_.size(); i++)
+				enc_op.add_operator(this->regularization_operators_[i]);
+			std::vector<ARRAY_TYPE*> encspace(this->regularization_operators_.size()+1,NULL);
+			encspace[0] = b;
+			u = enc_op.create_codomain(encspace);
+		}
+
+
+
+		//Initialise u vector
+		REAL beta = 0;
+
+		beta = nrm2(u.get());
+		*u *= REAL(1)/beta;
+
+		//Initialise v vector
+		REAL alpha = 0;
+		ARRAY_TYPE v(*x); //v vector is in image space
+
+		clear(&v);
+
+
+		enc_op.mult_MH(u.get(),&v);
+
+
+		alpha = nrm2(&v);
+
+		v *= REAL(1)/alpha;
+
+		//Initialise w vector
+		ARRAY_TYPE w(v);
+
+		//Initialise phibar
+		REAL phibar = beta;
+		REAL phibar0 = phibar;
+
+		//Initialise rhobar
+		REAL rhobar = alpha;
+		REAL rhobar0 = alpha;
+
+		REAL cg_res = alpha;
+
+		REAL rnorm = beta;
+
+		REAL xnorm = 0;
+		REAL anorm = 0;
+		REAL arnorm = alpha*beta;
+
+
+		for (int it = 0; it < iterations_; it ++){
+			beta = REAL(0);
+
+			*u *= -alpha;
+
+			enc_op.mult_M(&v,u.get(),true);
+
+			beta =nrm2(u.get());
+			*u *= REAL(1)/beta;
+
+			v *= -beta;
+
+			enc_op.mult_MH(u.get(),&v,true);
+			alpha = nrm2(&v);
+
+			v *= REAL(1)/alpha;
+
+
+			//Construct and apply next orthogonal transformation
+			REAL rho = std::sqrt(norm(rhobar)+norm(beta));
+			REAL c = rhobar/rho;
+			REAL s = beta/rho;
+			REAL theta = s*alpha;
+			rhobar = -c*alpha;
+			REAL phi = c*phibar;
+			phibar *= s;
+
+
+			//Update x, w
+			axpy(phi/rho,&w,x);  //x = x + phi/rho * w
+
+			w *= -theta/rho;
+			w += v;
+
+			//Check for convergence
+
+			//rhobar is a good approximation of the euclidian norm of the residual, so we check for that
+
+			if( this->output_mode_ >= solver<ARRAY_TYPE,ARRAY_TYPE>::OUTPUT_VERBOSE ){
+				std::cout << "Iteration " <<it << ". Relative residual: " <<  rhobar/rhobar0 << std::endl;
+			}
+
+		}
+
+
+
+		return boost::shared_ptr<ARRAY_TYPE>(x);
+
+}
+
+
+
+protected:
+
+	//boost::shared_ptr< cgPreconditioner<ARRAY_TYPE> > precond_;
+	unsigned int iterations_;
+	REAL tc_tolerance_;
+
+};
+
+}
diff --git a/toolboxes/solvers/nlcgSolver.h b/toolboxes/solvers/nlcgSolver.h
new file mode 100644
index 0000000..3751e16
--- /dev/null
+++ b/toolboxes/solvers/nlcgSolver.h
@@ -0,0 +1,776 @@
+#pragma once
+
+#include "gpSolver.h"
+#include "linearOperatorSolver.h"
+#include "real_utilities.h"
+#include "complext.h"
+#include "cgPreconditioner.h"
+
+#include <vector>
+#include <iostream>
+#include <numeric>
+
+namespace Gadgetron{
+/** Nonlinear conjugate gradient solver.
+ * Adapted from Y.H. Dai & Y. Yuan 2001 "An Efficient Hybrid Conjugate Gradient Method for Unconstrained Optimization"
+ * Annals of Operations Research, March 2001, Volume 103, Issue 1-4, pp 33-47
+ *
+ */
+
+template <class ARRAY_TYPE> class nlcgSolver : public gpSolver<ARRAY_TYPE>
+{
+
+
+protected:
+	typedef typename ARRAY_TYPE::element_type ELEMENT_TYPE;
+	typedef typename realType<ELEMENT_TYPE>::Type REAL;
+	typedef ARRAY_TYPE ARRAY_CLASS;
+	typedef gpSolver<ARRAY_TYPE> GP;
+	typedef typename gpSolver<ARRAY_TYPE>::l1GPRegularizationOperator l1GPRegularizationOperator;
+
+public:
+
+	nlcgSolver(): gpSolver<ARRAY_TYPE>() {
+		iterations_ = 10;
+		tc_tolerance_ = (REAL)1e-7;
+		non_negativity_constraint_=false;
+		dump_residual = false;
+		threshold= REAL(1e-7);
+
+		rho = 0.5f;
+	}
+
+	virtual ~nlcgSolver(){}
+
+
+	virtual void set_rho(REAL _rho){
+		rho = _rho;
+	}
+
+	virtual boost::shared_ptr<ARRAY_TYPE> solve(ARRAY_TYPE* in)
+																															{
+		if( this->encoding_operator_.get() == 0 ){
+			throw std::runtime_error("Error: nlcgSolver::compute_rhs : no encoding operator is set" );
+		}
+
+		// Get image space dimensions from the encoding operator
+		//
+
+		boost::shared_ptr< std::vector<size_t> > image_dims = this->encoding_operator_->get_domain_dimensions();
+		if( image_dims->size() == 0 ){
+			throw std::runtime_error("Error: nlcgSolver::compute_rhs : encoding operator has not set domain dimension" );
+		}
+
+		ARRAY_TYPE * x = new ARRAY_TYPE(image_dims.get()); //The image. Will be returned inside a shared_ptr
+
+		ARRAY_TYPE g(image_dims.get()); //Contains the gradient of the current step
+		ARRAY_TYPE g_old(image_dims.get()); //Contains the gradient of the previous step
+
+
+		ARRAY_TYPE g_linear(image_dims.get()); //Contains the linear part of the gradient;
+
+		//If a prior image was given, use it for the initial guess.
+		if (this->x0_.get()){
+			*x = *(this->x0_.get());
+		} else  {
+			clear(x);
+		}
+
+		// Contains the encoding space of the linear regularization operators
+		std::vector<ARRAY_TYPE> regEnc;
+
+		//Initialize encoding space
+		for (int i = 0; i < this->regularization_operators_.size(); i++){
+			regEnc.push_back(ARRAY_TYPE(this->regularization_operators_[i]->get_codomain_dimensions()));
+			if (reg_priors[i].get()){
+				regEnc.back() = *reg_priors[i];
+				regEnc.back() *= -std::sqrt(this->regularization_operators_[i]->get_weight());
+			}
+
+		}
+		std::vector<ARRAY_TYPE> regEnc2 = regEnc;
+
+		ARRAY_TYPE d(image_dims.get()); //Search direction.
+		clear(&d);
+
+		ARRAY_TYPE encoding_space(in->get_dimensions().get()); //Contains the encoding space, or, equivalently, the residual vector
+
+		ARRAY_TYPE g_step(image_dims.get()); //Linear part of the gradient of the step d will be stored here
+
+		ARRAY_TYPE encoding_space2(in->get_dimensions().get());
+		REAL reg_res,data_res;
+
+		if( this->output_mode_ >= solver<ARRAY_TYPE,ARRAY_TYPE>::OUTPUT_VERBOSE ){
+			std::cout << "Iterating..." << std::endl;
+		}
+		REAL grad_norm0;
+
+		for (int i = 0; i < iterations_; i++){
+			if (i==0){
+				if (this->x0_.get()){
+					this->encoding_operator_->mult_M(x,&encoding_space);
+
+				} else clear(&encoding_space);
+				encoding_space -= *in;
+				this->encoding_operator_->mult_MH(&encoding_space,&g_linear);
+
+				g_linear *=  this->encoding_operator_->get_weight();
+				data_res = std::sqrt(this->encoding_operator_->get_weight())*real(dot(&encoding_space,&encoding_space));
+
+				calc_regMultM(x,regEnc);
+				for (int n = 0; n < regEnc.size(); n++)
+					if (reg_priors[n].get())
+						axpy(-std::sqrt(this->regularization_operators_[n]->get_weight()),reg_priors[n].get(),&regEnc[n]);
+				add_linear_gradient(regEnc,&g_linear);
+				g = g_linear;
+				this->add_gradient(x,&g);
+
+				reg_res=REAL(0);
+
+			}else {
+				data_res = real(dot(&encoding_space,&encoding_space));
+			}
+
+
+
+			if (non_negativity_constraint_) solver_non_negativity_filter(x,&g);
+			if (i==0) grad_norm0=nrm2(&g);
+			REAL grad_norm = nrm2(&g);
+			if( this->output_mode_ >= solver<ARRAY_TYPE,ARRAY_TYPE>::OUTPUT_VERBOSE ){
+
+				std::cout << "Iteration " <<i << ". Relative gradient norm: " <<  grad_norm/grad_norm0 << std::endl;
+			}
+
+			if (i == 0){
+				d -= g;
+				if (this->precond_.get()){
+					this->precond_->apply(&d,&d);
+					this->precond_->apply(&d,&d);
+				}
+
+			} else {
+
+				g_step = g; //Not using g_step for anything right now, so let's use it for our beta calculation
+				if (this->precond_.get()){
+					this->precond_->apply(&g_step,&g_step); //Perform first half of the preconditioning
+					this->precond_->apply(&g_old,&g_old);
+				}
+
+				ELEMENT_TYPE g_old_norm = dot(&g_old,&g_old);
+				ELEMENT_TYPE ggold = dot(&g_step,&g_old);
+				g_old -= g_step;
+				REAL gg = real(dot(&g_step,&g_step));
+				ELEMENT_TYPE gy = -dot(&d,&g_old);
+				//ELEMENT_TYPE beta = -dot(g,g_old)/g_old_norm; //PRP ste[
+				//ELEMENT_TYPE theta = gy/g_old_norm;
+
+				REAL betaDy = -gg/real(dot(&d,&g_old));
+				REAL betaHS = real(dot(&g_step,&g_old))/real(dot(&d,&g_old));
+				REAL beta = std::max(REAL(0),std::min(betaDy,betaHS)); //Hybrid step size from Dai and Yuan 2001
+
+				d *= beta;
+
+				if (this->precond_.get()) this->precond_->apply(&g_step,&g_step); //Perform the rest of the preconditioning
+
+				d -= g_step;
+				std::cout << "Beta " << beta << std::endl;
+			}
+
+			this->encoding_operator_->mult_M(&d,&encoding_space2);
+
+			calc_regMultM(&d,regEnc2);
+
+
+
+			this->encoding_operator_->mult_MH(&encoding_space2,&g_step);
+			g_step *= this->encoding_operator_->get_weight();
+
+
+			add_linear_gradient(regEnc2,&g_step);
+
+			REAL gd = real(dot(&g,&d));
+
+			REAL alpha0=REAL(1);
+
+			//In the linear or semi-linear case, we can calculate the ideal step size.
+			if (this->operators.size() == 0) alpha0 = -real(dot(&encoding_space,&encoding_space2)+calc_dot(regEnc,regEnc2))/real(dot(&encoding_space2,&encoding_space2)+calc_dot(regEnc2,regEnc2));
+
+			REAL alpha;
+			REAL old_norm = functionValue(&encoding_space,regEnc,x);
+
+
+
+			g_old = g;
+
+
+
+
+			{
+				FunctionEstimator f(&encoding_space,&encoding_space2,&regEnc,&regEnc2,x,&d,&g_linear,&g_step,this);
+				alpha=backtracking(f,alpha0,gd,rho,old_norm);
+				//alpha=cg_linesearch(f,alpha0,gd,old_norm);
+				if (alpha == 0) {
+					std::cerr << "Linesearch failed, returning current iteration" << std::endl;
+					return boost::shared_ptr<ARRAY_TYPE>(x);
+				}
+			}
+
+			std::cout << "Alpha: " << alpha << std::endl;
+
+
+
+			if (non_negativity_constraint_){
+				//Restore encoding space and gradient. Why not keep a copy? Memory!
+				axpy(-alpha,&encoding_space2,&encoding_space);
+				reg_axpy(-alpha,regEnc2,regEnc);
+				axpy(-alpha,&g_step,&g_linear);
+
+				ARRAY_TYPE x2 = *x;
+				axpy(alpha,&d,&x2);
+
+				clamp_min(&x2,REAL(0));
+
+				d = x2;
+				d -= *x;
+				gd = real(dot(&g,&d));
+				x2 = *x;
+				alpha0 = 1;
+				this->encoding_operator_->mult_M(&d,&encoding_space2);
+				calc_regMultM(&d,regEnc2);
+
+
+				this->encoding_operator_->mult_MH(&encoding_space2,&g_step);
+				g_step *= this->encoding_operator_->get_weight();
+				add_linear_gradient(regEnc2,&g_step);
+
+				FunctionEstimator f(&encoding_space,&encoding_space2,&regEnc,&regEnc2,x,&d,&g_linear,&g_step,this);
+				//alpha=gold(f,0,alpha0*1.5);
+				//alpha = wolfesearch(f,alpha0,gd,rho,old_norm);
+				alpha = backtracking(f,alpha0,gd,rho,old_norm);
+
+				//alpha = cg_linesearch(f,alpha0,gd,old_norm);
+				axpy(alpha,&d,x);
+				if (alpha == 0){
+					std::cerr << "Linesearch failed, returning current iteration" << std::endl;
+					return boost::shared_ptr<ARRAY_TYPE>(x);
+				}
+			} else {
+				axpy(alpha,&d,x);
+
+			}
+
+
+
+			std::cout << "Function value: " << functionValue(&encoding_space,regEnc,x) << std::endl;
+
+			g = g_linear;
+
+			this->add_gradient(x,&g);
+
+
+			iteration_callback(x,i,data_res,reg_res);
+
+
+			if (grad_norm/grad_norm0 < tc_tolerance_)  break;
+
+		}
+
+		return boost::shared_ptr<ARRAY_TYPE>(x);
+																															}
+
+
+
+	// Set preconditioner
+	//
+	/*virtual void set_preconditioner( boost::shared_ptr< cgPreconditioner<ARRAY_TYPE> > precond ) {
+      precond_ = precond;
+      }*/
+
+	// Set/get maximally allowed number of iterations
+	//
+	virtual void set_max_iterations( unsigned int iterations ) { iterations_ = iterations; }
+	virtual unsigned int get_max_iterations() { return iterations_; }
+
+	// Set/get tolerance threshold for termination criterium
+	//
+	virtual void set_tc_tolerance( REAL tolerance ) { tc_tolerance_ = tolerance; }
+	virtual REAL get_tc_tolerance() { return tc_tolerance_; }
+
+	virtual void set_non_negativity_constraint(bool non_negativity_constraint){
+		non_negativity_constraint_=non_negativity_constraint;
+	}
+
+	virtual void set_dump_residual(bool dump_res){
+		dump_residual = dump_res;
+	}
+	// Set preconditioner
+	//
+
+	virtual void set_preconditioner( boost::shared_ptr< cgPreconditioner<ARRAY_TYPE> > precond ) {
+		precond_ = precond;
+	}
+
+	virtual void add_regularization_operator( boost::shared_ptr< linearOperator< ARRAY_TYPE> > op)
+	{
+		if( !op.get() ){
+			throw std::runtime_error( "Error: linearOperatorSolver::add_regularization_operator : NULL operator provided" );
+		}
+		this->regularization_operators_.push_back(op);
+		reg_priors.push_back(boost::shared_ptr<ARRAY_TYPE>((ARRAY_TYPE*)0));
+	}
+
+	virtual void add_regularization_operator( boost::shared_ptr< linearOperator< ARRAY_TYPE> > op,boost::shared_ptr<ARRAY_TYPE> prior)
+	{
+		if( !op.get() ){
+			throw std::runtime_error( "Error: linearOperatorSolver::add_regularization_operator : NULL operator provided" );
+		}
+
+		this->regularization_operators_.push_back(op);
+		reg_priors.push_back(prior);
+	}
+
+	virtual void add_regularization_operator(boost::shared_ptr< linearOperator<ARRAY_TYPE> > op, int L_norm ){
+		if (L_norm==1){
+
+			this->operators.push_back(boost::shared_ptr< l1GPRegularizationOperator>(new l1GPRegularizationOperator(op)));
+		}else{
+			add_regularization_operator(op);
+		}
+	}
+
+
+	virtual void add_regularization_operator(boost::shared_ptr< linearOperator<ARRAY_TYPE> > op, boost::shared_ptr<ARRAY_TYPE> prior, int L_norm ){
+		if (L_norm==1){
+			this->operators.push_back(boost::shared_ptr<l1GPRegularizationOperator>(new l1GPRegularizationOperator(op,prior)));
+		}else{
+			add_regularization_operator(op,prior);
+		}
+	}
+
+
+protected:
+	typedef typename std::vector<boost::shared_ptr<linearOperator<ARRAY_TYPE> > >::iterator  csIterator;
+	typedef typename std::vector< std::vector<boost::shared_ptr<linearOperator<ARRAY_TYPE> > > >::iterator csGroupIterator;
+
+	virtual void iteration_callback(ARRAY_TYPE*,int i,REAL,REAL){};
+
+
+
+
+	ELEMENT_TYPE calc_dot(std::vector<ARRAY_TYPE>& x,std::vector<ARRAY_TYPE>& y){
+		ELEMENT_TYPE res(0);
+		for (int  i = 0; i < x.size(); i++)
+			res += dot(&x[i],&y[i]);
+		return res;
+	}
+
+	void add_linear_gradient(std::vector<ARRAY_TYPE>& elems, ARRAY_TYPE* g){
+		ARRAY_TYPE tmp(g->get_dimensions());
+		for (int i = 0; i <elems.size(); i++){
+			this->regularization_operators_[i]->mult_MH(&elems[i],&tmp);
+			axpy(std::sqrt(this->regularization_operators_[i]->get_weight()),&tmp,g);
+		}
+	}
+
+	void calc_regMultM(ARRAY_TYPE* x,std::vector<ARRAY_TYPE>& elems){
+		for (int i = 0; i <elems.size(); i++){
+			this->regularization_operators_[i]->mult_M(x,&elems[i]);
+			elems[i] *= std::sqrt(this->regularization_operators_[i]->get_weight());
+		}
+	}
+
+	void reg_axpy(REAL alpha, std::vector<ARRAY_TYPE>& x, std::vector<ARRAY_TYPE>& y){
+		for (int i = 0; i <x.size(); i++){
+			axpy(alpha,&x[i],&y[i]);
+
+		}
+	}
+
+
+	class FunctionEstimator{
+	public:
+
+		FunctionEstimator(ARRAY_TYPE* _encoding_space,ARRAY_TYPE* _encoding_step,std::vector<ARRAY_TYPE>* _regEnc,std::vector<ARRAY_TYPE>* _regEnc_step, ARRAY_TYPE * _x, ARRAY_TYPE * _d, ARRAY_TYPE * _g, ARRAY_TYPE * _g_step, nlcgSolver<ARRAY_TYPE> * _parent)
+	{
+			encoding_step = _encoding_step;
+			encoding_space = _encoding_space;
+			regEnc = _regEnc;
+			regEnc_step = _regEnc_step;
+			x = _x;
+			xtmp = *x;
+			d = _d;
+			parent = _parent;
+			alpha_old = 0;
+			g = _g;
+			g_step = _g_step;
+
+	}
+
+
+
+		REAL operator () (REAL alpha){
+			axpy(alpha-alpha_old,encoding_step,encoding_space);
+
+			axpy(alpha-alpha_old,g_step,g);
+			parent->reg_axpy(alpha-alpha_old,*regEnc_step,*regEnc);
+			axpy(alpha-alpha_old,d,&xtmp);
+
+			alpha_old = alpha;
+			REAL res = parent->functionValue(encoding_space,*regEnc,&xtmp);
+			return res;
+
+		}
+
+		ELEMENT_TYPE dir_deriv(){
+			ARRAY_TYPE g_tmp = *g;
+			parent->add_gradient(&xtmp,&g_tmp);
+			return dot(d,&g_tmp);
+		}
+
+
+
+
+
+
+	private:
+
+		REAL alpha_old;
+		ARRAY_TYPE* encoding_step;
+		ARRAY_TYPE * encoding_space;
+		std::vector<ARRAY_TYPE>* regEnc;
+		std::vector<ARRAY_TYPE>* regEnc_step;
+		ARRAY_TYPE* x, *d;
+		ARRAY_TYPE* g, *g_step;
+
+		nlcgSolver<ARRAY_TYPE>* parent;
+		ARRAY_TYPE xtmp;
+
+
+	};
+	friend class FunctionEstimator;
+
+	/***
+	 * @brief Gold section search algorithm. Only works with unimodal functions, which we assume we're dealing with, at least locally
+	 * @param f Functor to calculate the function to minimize
+	 * @param a Start of the bracketing
+	 * @param d End of bracketing
+	 * @return Value minimizing the function f.
+	 */
+	REAL gold(FunctionEstimator& f, REAL a, REAL d){
+		const REAL gold = 1.0/(1.0+std::sqrt(5.0))/2;
+
+		REAL b = d-(d-a)*gold;
+		REAL c = (d-a)*gold-a;
+
+		REAL fa = f(a);
+		REAL fb = f(b);
+		REAL fc = f(c);
+		REAL fd = f(d);
+		REAL tol = 1e-6;
+
+		while (abs(a-d) > tol*(abs(b)+abs(c))){
+			if (fb > fc){
+				a = b;
+				fa = fb;
+				b = c;
+				fb = fc;
+				c= b*gold+(1.0-gold)*d;
+				fc = f(c);
+			} else {
+				d = c;
+				fd = fc;
+				c = b;
+				fc = fb;
+				b = c*gold+(1-gold)*a;
+				fb = f(b);
+			}
+		}
+		if (fb < fc){
+			f(b);
+			return b;
+		}else {
+			f(c);
+			return c;
+		}
+	}
+
+	/***
+	 * Armijo type linesearch
+	 * @param f
+	 * @param alpha0
+	 * @param gd
+	 * @param rho
+	 * @param old_norm
+	 * @return
+	 */
+	REAL backtracking(FunctionEstimator& f, const REAL alpha0, const REAL gd, const REAL rho, const REAL old_norm){
+		REAL alpha;
+		REAL delta=0.1;
+		REAL sigma=0.9;
+		//REAL precision = 0.0003; //Estimated precision of function evaluation
+		REAL precision = 1e-4f; //Estimated precision of function evaluation
+		bool wolfe = false;
+		int  k=0;
+
+		while ( !wolfe)
+        {
+			alpha=alpha0*std::pow(rho,k);
+			//if (f(alpha) <= old_norm+alpha*delta*gd) wolfe = true;//Strong Wolfe condition..
+			REAL fa = f(alpha);
+			ELEMENT_TYPE dir_deriv = f.dir_deriv();
+			if (((2*delta-1.0)*real(gd) >= real(dir_deriv)) && (fa < (old_norm+precision))) wolfe=true; //Approx Wolfe condition from Hager, W. and Zhang, H.SIAM Journal on Optimization 2005 16:1, 170-192
+			if (abs(dir_deriv) > sigma*abs(gd)) wolfe = false;//Strong Wolfe condition..
+			k++;
+			if (alpha == 0){
+				//std::cout << "Backtracking search failed, switching to slow wolfe-search" << std::endl;
+				//return wolfesearch(f,alpha0,gd,rho,old_norm);
+				return 0;
+			}
+		}
+
+		return alpha;
+
+	}
+
+	/***
+	 * Line search taken from Numerical Optimization (Wright and Nocedal 1999).
+	 * Adapted from the scipy optimize algorithm.
+	 * Like the gold-section method it works quite poorly in practice.
+	 * @param f
+	 * @param alpha0
+	 * @param gd
+	 * @param rho
+	 * @param old_norm
+	 * @return
+	 */
+	REAL wolfesearch(FunctionEstimator& f, const REAL alpha_init, const REAL gd, const REAL rho, const REAL old_norm){
+		using std::sqrt;
+		using std::abs;
+		REAL delta=0.01;
+		unsigned int k=0;
+		REAL alpha0 = alpha_init;
+		REAL f0 = f(alpha0);
+
+		if (f0 <= old_norm+alpha0*delta*gd){//Strong Wolfe condition..
+			return alpha0;
+		}
+
+
+		REAL alpha1 = -gd*alpha0*alpha0/2.0/(f0-old_norm-gd*alpha0);
+		//std::cout << "F0 " <<f0 << " old " << old_norm << " gd " << gd <<std::endl;
+		std::cout << "Alpha0: "  << alpha0 << std::endl;
+		//std::cout << "Alpha1: "  << alpha1 << std::endl;
+		REAL f1 = f(alpha1);
+
+
+		if (f1 <= old_norm+alpha1*delta*gd){//Strong Wolfe condition..
+			return alpha1;
+		}
+
+
+		while (alpha1 > 0){
+			double factor = alpha0*alpha0*alpha1*alpha1*(alpha1-alpha0);
+			double a = alpha0*alpha0*(f1-old_norm-gd*alpha1) - alpha1*alpha1*(f0-old_norm-gd*alpha0);
+			a /= factor;
+
+			double b = -alpha0*alpha0*alpha0*(f1-old_norm-gd*alpha1) + alpha1*alpha1*alpha1*(f0-old_norm-gd*alpha0);
+			b /= factor;
+
+			double alpha2 = (-b+std::sqrt(std::abs(b*b-3*a*gd)))/(3*a);
+			REAL f2 = f(alpha2);
+			//std::cout << "a " << a << "b " << b << std::endl;
+			std::cout << "Alpha1: "  << alpha1 << std::endl;
+			std::cout << "Alpha2: "  << alpha2 << std::endl;
+			if (f2 < old_norm+alpha2*delta*gd){//Strong Wolfe condition..
+				return alpha2;
+			}
+
+			if (((alpha1-alpha2) > (alpha1/2.0)) || ((1.0-alpha2/alpha1) < 0.96)){
+				alpha2 = alpha1 / 2.0;
+			}
+
+			alpha0 = alpha1;
+			alpha1 = alpha2;
+			f0 = f1;
+			f1 = f2;
+			k++;
+
+
+		}
+
+		throw std::runtime_error("Wolfe line search failed");
+
+
+	}
+
+
+
+	/***
+	 * CG linesearch adapted from  Hager, W. and Zhang, H.SIAM Journal on Optimization 2005 16:1, 170-192
+	 * @param f
+	 * @param alpha0
+	 * @param gd
+	 * @param rho
+	 * @param old_norm
+	 * @return
+	 */
+	REAL cg_linesearch(FunctionEstimator& f, const REAL alpha0, const REAL gd, const REAL old_norm){
+		REAL delta=0.1;
+		REAL sigma=0.9;
+		REAL nabla=0.66;
+		//REAL precision = 0.0003; //Estimated precision of function evaluation
+		REAL precision = 1e-4f; //Estimated precision of function evaluation
+
+
+
+
+		REAL a=0;
+		REAL b = alpha0;
+
+		REAL ak = a;
+		REAL bk = b;
+		REAL fa = old_norm;
+		ELEMENT_TYPE a_deriv = gd;
+		REAL fb = f(alpha0);
+		ELEMENT_TYPE b_deriv = f.dir_deriv();
+
+		while (abs(a-b) > 0){
+			if ((((2*delta-1.0)*real(gd) >= real(b_deriv)) && (fb < old_norm+precision)) && //Check Approximate Wolfe conditions
+					(abs(b_deriv) <= sigma*abs(gd))){
+				f(b);
+				return b;
+			}
+
+			if ((((2*delta-1.0)*real(gd) >= real(a_deriv)) && (fa < old_norm+precision)) && //Check Approximate Wolfe conditions
+					(abs(a_deriv) <= sigma*abs(gd))){
+				f(a);
+				return a;
+			}
+
+			secant2(a,b,f,old_norm+precision);
+			if ((b-a) > nabla*(bk-ak)) {
+				REAL c = (a+b)/2;
+				interval_update(a,b,c,f,old_norm);
+			}
+			if (a != ak){
+				fa = f(a);
+				a_deriv = f.dir_deriv();
+			}
+
+			if (b != bk){
+				fb = f(b);
+				b_deriv = f.dir_deriv();
+			}
+
+			ak = a;
+			bk = b;
+
+			std::cout << "a: " << a << " b: " << b << std::endl;
+		}
+		return 0;
+		//throw std::runtime_error("CG_linesearch failed");
+
+	}
+
+
+	void secant2(REAL& a, REAL& b,FunctionEstimator& f,REAL old_norm){
+		REAL fa = f(a);
+		ELEMENT_TYPE dfa = f.dir_deriv();
+		REAL fb = f(b);
+		ELEMENT_TYPE dfb = f.dir_deriv();
+
+		REAL c= real((a*dfb-b*dfa)/(dfb-dfa));
+
+		REAL fc = f(c);
+		ELEMENT_TYPE dfc = f.dir_deriv();
+
+		REAL A=a;
+		REAL B = b;
+
+		interval_update(A,B,c,f,old_norm);
+
+		if (c == B){
+			c= real((b*dfc-c*dfb)/(dfc-dfb));
+			interval_update(A,B,c,f,old_norm);
+		} if (c == A){
+			c= real((a*dfc-c*dfa)/(dfc-dfa));
+			interval_update(A,B,c,f,old_norm);
+		}
+
+		a= A;
+		b = B;
+	}
+
+	void interval_update(REAL & a, REAL & b, REAL c,FunctionEstimator& f,REAL old_norm){
+		REAL theta = 0.5;
+		if (c < a || c > b) return; // C not in interval
+		REAL fc = f(c);
+		ELEMENT_TYPE dfc = f.dir_deriv();
+
+		if (real(dfc) >= 0){
+			b =c;
+			return;
+		}
+		if (fc < old_norm){
+			a = c;
+			return;
+		}
+		b =c;
+		while(true){
+			REAL d = (1-theta)*a+theta*b;
+			REAL fd = f(d);
+			ELEMENT_TYPE dfd = f.dir_deriv();
+
+			if (real(dfd) >= 0){
+				b = d;
+				return;
+			}
+			if (fd < old_norm){
+				a = d;
+			} else 	b = d;
+
+			std::cout << "Interval a: " << a << " b: " << b << std::endl;
+
+		}
+
+
+
+
+	}
+
+	REAL functionValue(ARRAY_TYPE* encoding_space,std::vector<ARRAY_TYPE>& regEnc, ARRAY_TYPE * x){
+		REAL res= std::sqrt(this->encoding_operator_->get_weight())*abs(dot(encoding_space,encoding_space));
+
+		for (int i = 0; i  < this->operators.size(); i++){
+			res += this->operators[i]->magnitude(x);
+		}
+
+		res += abs(calc_dot(regEnc,regEnc));
+		return res;
+
+	}
+
+
+
+
+
+
+protected:
+
+	// Preconditioner
+	//boost::shared_ptr< cgPreconditioner<ARRAY_TYPE> > precond_;
+	// Maximum number of iterations
+	unsigned int iterations_;
+	bool non_negativity_constraint_;
+	REAL tc_tolerance_;
+	REAL threshold;
+	bool dump_residual;
+	REAL rho;
+
+	// Preconditioner
+
+	std::vector<boost::shared_ptr<ARRAY_TYPE> > reg_priors;
+	boost::shared_ptr< cgPreconditioner<ARRAY_TYPE> > precond_;
+
+};
+}
diff --git a/toolboxes/solvers/sbSolver.h b/toolboxes/solvers/sbSolver.h
index a502f38..9ab2045 100644
--- a/toolboxes/solvers/sbSolver.h
+++ b/toolboxes/solvers/sbSolver.h
@@ -37,7 +37,7 @@ protected:
 		sbRegularizationOperator(boost::shared_ptr< linearOperator<ARRAY_TYPE_ELEMENT> > op) { reg_op=op; }
 		virtual ~sbRegularizationOperator(){}
 
-		virtual void initialize(REAL normalization_factor = REAL(1))
+		virtual void initialize(boost::shared_ptr< std::vector<size_t> > image_dims, REAL normalization_factor = REAL(1))
 		{
 			d_k = boost::shared_ptr<ARRAY_TYPE_ELEMENT>(new ARRAY_TYPE_ELEMENT(reg_op->get_codomain_dimensions()));
 			b_k = boost::shared_ptr<ARRAY_TYPE_ELEMENT>(new ARRAY_TYPE_ELEMENT(reg_op->get_codomain_dimensions()));
@@ -161,7 +161,7 @@ protected:
 			}
 		}
 
-		virtual void initialize(REAL normalization_factor = REAL(1))
+		virtual void initialize(boost::shared_ptr< std::vector<size_t> > image_dims, REAL normalization_factor = REAL(1))
 		{
 			codom_dims = reg_ops.front()->get_codomain_dimensions();
 			d_ks = std::vector< boost::shared_ptr<ARRAY_TYPE_ELEMENT> >(reg_ops.size());
@@ -261,7 +261,7 @@ protected:
 			}
 		}
 
-		virtual void initialize(REAL normalization_factor = REAL(1))
+		virtual void initialize(boost::shared_ptr< std::vector<size_t> > image_dims, REAL normalization_factor = REAL(1))
 		{
 			codom_dims = reg_ops.front()->get_codomain_dimensions();
 			d_ks = std::vector< boost::shared_ptr<ARRAY_TYPE_ELEMENT> >(reg_ops.size());
@@ -369,9 +369,9 @@ protected:
 		virtual void initialize(boost::shared_ptr< std::vector<size_t> > image_dims,
 				REAL normalization_factor = REAL(1))
 		{
-			sbRegularizationOperator::initialize( normalization_factor);
 			this->reg_op->set_domain_dimensions(image_dims.get());
 			this->reg_op->set_codomain_dimensions(image_dims.get());
+			sbRegularizationOperator::initialize( image_dims, normalization_factor);
 		}
 
 		virtual void update_encoding_space(ARRAY_TYPE_ELEMENT* encoding_space){
@@ -626,6 +626,10 @@ protected:
 
 			boost::shared_ptr< linearOperator<ARRAY_TYPE_ELEMENT> > op = regularization_operators_[i]->reg_op;
 			boost::shared_ptr< std::vector<size_t> > op_dims = op->get_domain_dimensions();
+			boost::shared_ptr< std::vector<size_t> > op_codims = op->get_codomain_dimensions();
+			if (!op_codims.get()){
+				throw std::runtime_error("Error: sbSolver::validate_regularization_operators : operator codomain dimension not set");
+			}
 
 			if( !op.get() ){
 				throw std::runtime_error( "Error: sbSolver::validate_regularization_operators : invalid operator provided" );
@@ -671,7 +675,7 @@ protected:
 		//
 
 		for (int i=0; i < regularization_operators_.size(); i++){
-			regularization_operators_[i]->initialize(normalization_factor);
+			regularization_operators_[i]->initialize(image_dims, normalization_factor);
 			enc_op_container_->add_operator( regularization_operators_[i]->reg_op );
 		}
 	}
diff --git a/toolboxes/solvers/solver.h b/toolboxes/solvers/solver.h
index a7df7f5..3382c43 100644
--- a/toolboxes/solvers/solver.h
+++ b/toolboxes/solvers/solver.h
@@ -39,10 +39,6 @@ namespace Gadgetron
 
     // Invoke solver
     virtual boost::shared_ptr<ARRAY_TYPE_OUT> solve( ARRAY_TYPE_IN* ) = 0;
- 
-    void* operator new(size_t bytes) { return ::new char[bytes]; }
-    void* operator new(size_t s, void * p) { return p; }
-    void operator delete(void *ptr) { delete[] static_cast<char*> (ptr); }
 
   protected:
     int output_mode_;

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/gadgetron.git